nv50_screen.c 27.4 KB
Newer Older
1
/*
2
 * Copyright 2010 Christoph Bumiller
3 4 5 6 7 8 9 10 11 12 13 14 15 16
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 18 19 20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
21 22
 */

23
#include "util/u_format.h"
24
#include "util/u_format_s3tc.h"
Ben Skeggs's avatar
Ben Skeggs committed
25 26
#include "pipe/p_screen.h"

27 28
#include "nv50/nv50_context.h"
#include "nv50/nv50_screen.h"
Ben Skeggs's avatar
Ben Skeggs committed
29

30
#include "nouveau_vp3_video.h"
31

32
#include "nv_object.xml.h"
33
#include <errno.h>
34 35 36 37 38

#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
#endif

39 40 41 42 43 44 45 46 47
/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
#define LOCAL_WARPS_ALLOC 32
/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
#define STACK_WARPS_ALLOC 32

#define THREADS_IN_WARP 32

#define ONE_TEMP_SIZE (4/*vector*/ * sizeof(float))

Ben Skeggs's avatar
Ben Skeggs committed
48 49
static boolean
nv50_screen_is_format_supported(struct pipe_screen *pscreen,
50 51 52
                                enum pipe_format format,
                                enum pipe_texture_target target,
                                unsigned sample_count,
53
                                unsigned bindings)
Ben Skeggs's avatar
Ben Skeggs committed
54
{
55 56
   if (sample_count > 8)
      return FALSE;
57
   if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
58 59
      return FALSE;
   if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
60 61
      return FALSE;

62 63
   if (!util_format_is_supported(format, bindings))
      return FALSE;
64 65 66

   switch (format) {
   case PIPE_FORMAT_Z16_UNORM:
67
      if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
68 69 70 71 72 73 74 75 76 77 78 79
         return FALSE;
      break;
   default:
      break;
   }

   /* transfers & shared are always supported */
   bindings &= ~(PIPE_BIND_TRANSFER_READ |
                 PIPE_BIND_TRANSFER_WRITE |
                 PIPE_BIND_SHARED);

   return (nv50_format_table[format].usage & bindings) == bindings;
Ben Skeggs's avatar
Ben Skeggs committed
80 81 82
}

static int
83
nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
Ben Skeggs's avatar
Ben Skeggs committed
84
{
85 86
   const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;

87 88
   switch (param) {
   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
Adel Gadllah's avatar
Adel Gadllah committed
89
      return 14;
90
   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
Adel Gadllah's avatar
Adel Gadllah committed
91
      return 12;
92
   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
Adel Gadllah's avatar
Adel Gadllah committed
93
      return 14;
94 95
   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
      return 512;
96
   case PIPE_CAP_MIN_TEXEL_OFFSET:
97
      return -8;
98
   case PIPE_CAP_MAX_TEXEL_OFFSET:
99
      return 7;
100 101 102 103
   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
   case PIPE_CAP_TEXTURE_SWIZZLE:
   case PIPE_CAP_TEXTURE_SHADOW_MAP:
   case PIPE_CAP_NPOT_TEXTURES:
104
   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
105
   case PIPE_CAP_ANISOTROPIC_FILTER:
106
   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
107
      return 1;
108 109
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
      return 65536;
110
   case PIPE_CAP_SEAMLESS_CUBE_MAP:
111
      return 1; /* nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; */
112 113
   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
      return 0;
114 115 116 117 118
   case PIPE_CAP_CUBE_MAP_ARRAY:
      return 0;
      /*
      return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
      */
119
   case PIPE_CAP_TWO_SIDED_STENCIL:
120
   case PIPE_CAP_DEPTH_CLIP_DISABLE:
121 122 123 124
   case PIPE_CAP_POINT_SPRITE:
      return 1;
   case PIPE_CAP_SM3:
      return 1;
125
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
126
      return 330;
127 128
   case PIPE_CAP_MAX_RENDER_TARGETS:
      return 8;
129 130
   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
      return 1;
131 132 133
   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
134
      return 1;
135
   case PIPE_CAP_QUERY_TIMESTAMP:
136
   case PIPE_CAP_QUERY_TIME_ELAPSED:
137 138
   case PIPE_CAP_OCCLUSION_QUERY:
      return 1;
139
   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
140
      return 4;
141 142
   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
143 144 145
      return 64;
   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
      return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
146 147 148
   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
   case PIPE_CAP_INDEP_BLEND_ENABLE:
      return 1;
149
   case PIPE_CAP_INDEP_BLEND_FUNC:
150
      return nv50_screen(pscreen)->tesla->oclass >= NVA3_3D_CLASS;
151 152 153 154 155 156 157 158 159
   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
      return 1;
   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
      return 0;
   case PIPE_CAP_SHADER_STENCIL_EXPORT:
      return 0;
   case PIPE_CAP_PRIMITIVE_RESTART:
160 161
   case PIPE_CAP_TGSI_INSTANCEID:
   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
162
   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
163
   case PIPE_CAP_CONDITIONAL_RENDER:
164
   case PIPE_CAP_TEXTURE_BARRIER:
165
   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
166
   case PIPE_CAP_START_INSTANCE:
167
      return 1;
168
   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
169
      return 0; /* state trackers will know better */
170
   case PIPE_CAP_USER_CONSTANT_BUFFERS:
171
   case PIPE_CAP_USER_INDEX_BUFFERS:
172
   case PIPE_CAP_USER_VERTEX_BUFFERS:
173
      return 1;
174 175
   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
      return 256;
176 177
   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
      return 1; /* 256 for binding as RT, but that's not possible in GL */
178
   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
179
      return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
180 181 182
   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
183
   case PIPE_CAP_TGSI_TEXCOORD:
184
      return 0;
185 186
   case PIPE_CAP_TEXTURE_MULTISAMPLE:
      return 1;
187 188
   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
      return 1;
189 190
   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
      return 0;
191 192
   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
      return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
193 194
   case PIPE_CAP_ENDIANNESS:
      return PIPE_ENDIAN_LITTLE;
195 196
   case PIPE_CAP_TGSI_VS_LAYER:
      return 0;
197 198 199 200
   default:
      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
      return 0;
   }
201 202 203
}

static int
204
nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
205
                             enum pipe_shader_cap param)
206
{
207 208 209 210 211 212 213 214
   switch (shader) {
   case PIPE_SHADER_VERTEX:
   case PIPE_SHADER_GEOMETRY:
   case PIPE_SHADER_FRAGMENT:
      break;
   default:
      return 0;
   }
215

216 217 218 219 220 221 222 223 224 225 226
   switch (param) {
   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
      return 16384;
   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
      return 4;
   case PIPE_SHADER_CAP_MAX_INPUTS:
      if (shader == PIPE_SHADER_VERTEX)
         return 32;
227
      return 15;
228 229 230
   case PIPE_SHADER_CAP_MAX_CONSTS:
      return 65536 / 16;
   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
231
      return NV50_MAX_PIPE_CONSTBUFS;
232 233 234 235 236 237 238 239 240 241 242
   case PIPE_SHADER_CAP_MAX_ADDRS:
      return 1;
   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
      return shader != PIPE_SHADER_FRAGMENT;
   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
      return 1;
   case PIPE_SHADER_CAP_MAX_PREDS:
      return 0;
   case PIPE_SHADER_CAP_MAX_TEMPS:
243
      return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
244 245
   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
      return 1;
246 247
   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
      return 0;
248 249
   case PIPE_SHADER_CAP_SUBROUTINES:
      return 0; /* please inline, or provide function declarations */
250
   case PIPE_SHADER_CAP_INTEGERS:
251
      return 1;
252
   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
253 254
      /* The chip could handle more sampler views than samplers */
   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
255
      return MIN2(32, PIPE_MAX_SAMPLERS);
256 257 258 259
   default:
      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
      return 0;
   }
Ben Skeggs's avatar
Ben Skeggs committed
260 261 262
}

static float
263
nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
Ben Skeggs's avatar
Ben Skeggs committed
264
{
265
   switch (param) {
266 267
   case PIPE_CAPF_MAX_LINE_WIDTH:
   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
268
      return 10.0f;
269 270
   case PIPE_CAPF_MAX_POINT_WIDTH:
   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
271
      return 64.0f;
272
   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
273
      return 16.0f;
274
   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
275 276 277 278 279
      return 4.0f;
   default:
      NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
      return 0.0f;
   }
Ben Skeggs's avatar
Ben Skeggs committed
280 281 282 283 284
}

static void
nv50_screen_destroy(struct pipe_screen *pscreen)
{
285 286
   struct nv50_screen *screen = nv50_screen(pscreen);

Ben Skeggs's avatar
Ben Skeggs committed
287 288 289
   if (screen->base.fence.current) {
      nouveau_fence_wait(screen->base.fence.current);
      nouveau_fence_ref (NULL, &screen->base.fence.current);
290
   }
291 292 293
   if (screen->base.pushbuf)
      screen->base.pushbuf->user_priv = NULL;

Christoph Bumiller's avatar
Christoph Bumiller committed
294 295
   if (screen->blitter)
      nv50_blitter_destroy(screen);
296 297 298 299 300 301 302 303

   nouveau_bo_ref(NULL, &screen->code);
   nouveau_bo_ref(NULL, &screen->tls_bo);
   nouveau_bo_ref(NULL, &screen->stack_bo);
   nouveau_bo_ref(NULL, &screen->txc);
   nouveau_bo_ref(NULL, &screen->uniforms);
   nouveau_bo_ref(NULL, &screen->fence.bo);

304 305 306
   nouveau_heap_destroy(&screen->vp_code_heap);
   nouveau_heap_destroy(&screen->gp_code_heap);
   nouveau_heap_destroy(&screen->fp_code_heap);
307

308
   FREE(screen->tic.entries);
309

310 311 312 313
   nouveau_object_del(&screen->tesla);
   nouveau_object_del(&screen->eng2d);
   nouveau_object_del(&screen->m2mf);
   nouveau_object_del(&screen->sync);
314 315 316 317

   nouveau_screen_fini(&screen->base);

   FREE(screen);
Ben Skeggs's avatar
Ben Skeggs committed
318 319
}

320
static void
Marcin Ślusarz's avatar
Marcin Ślusarz committed
321
nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
322
{
Ben Skeggs's avatar
Ben Skeggs committed
323
   struct nv50_screen *screen = nv50_screen(pscreen);
324
   struct nouveau_pushbuf *push = screen->base.pushbuf;
Marcin Ślusarz's avatar
Marcin Ślusarz committed
325 326 327 328

   /* we need to do it after possible flush in MARK_RING */
   *sequence = ++screen->base.fence.sequence;

329 330 331 332 333
   PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
   PUSH_DATAh(push, screen->fence.bo->offset);
   PUSH_DATA (push, screen->fence.bo->offset);
   PUSH_DATA (push, *sequence);
   PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
Ben Skeggs's avatar
Ben Skeggs committed
334 335 336 337 338
                    NV50_3D_QUERY_GET_UNK4 |
                    NV50_3D_QUERY_GET_UNIT_CROP |
                    NV50_3D_QUERY_GET_TYPE_QUERY |
                    NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
                    NV50_3D_QUERY_GET_SHORT);
339 340
}

Ben Skeggs's avatar
Ben Skeggs committed
341 342
static u32
nv50_screen_fence_update(struct pipe_screen *pscreen)
343
{
344 345 346
   return nv50_screen(pscreen)->fence.map[0];
}

347
static void
348
nv50_screen_init_hwctx(struct nv50_screen *screen)
349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
{
   struct nouveau_pushbuf *push = screen->base.pushbuf;
   struct nv04_fifo *fifo;
   unsigned i;

   fifo = (struct nv04_fifo *)screen->base.channel->data;

   BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->m2mf->handle);
   BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
   PUSH_DATA (push, screen->sync->handle);
   PUSH_DATA (push, fifo->vram);
   PUSH_DATA (push, fifo->vram);

   BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->eng2d->handle);
   BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
   PUSH_DATA (push, screen->sync->handle);
   PUSH_DATA (push, fifo->vram);
   PUSH_DATA (push, fifo->vram);
   PUSH_DATA (push, fifo->vram);
   BEGIN_NV04(push, NV50_2D(OPERATION), 1);
   PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
   BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, SUBC_2D(0x0888), 1);
   PUSH_DATA (push, 1);

   BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
   PUSH_DATA (push, screen->tesla->handle);

   BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
   PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);

   BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
   PUSH_DATA (push, screen->sync->handle);
   BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
   for (i = 0; i < 11; ++i)
      PUSH_DATA(push, fifo->vram);
   BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
   for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
      PUSH_DATA(push, fifo->vram);

   BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
   PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
   BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
   PUSH_DATA (push, 0xf);

399 400 401 402 403
   if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
      BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
      PUSH_DATA (push, 0x18);
   }

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
   BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
   PUSH_DATA (push, 1);

   BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
   PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(LINE_LAST_PIXEL), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
   PUSH_DATA (push, 1);

   if (screen->tesla->oclass >= NVA0_3D_CLASS) {
      BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
      PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
   }

   BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
   PUSH_DATA (push, 0);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
   PUSH_DATA (push, 0x3f);

   BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));
   PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));

   BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));
   PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));

   BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
   PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));
   PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));

   BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->tls_bo->offset);
   PUSH_DATA (push, screen->tls_bo->offset);
448
   PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472

   BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->stack_bo->offset);
   PUSH_DATA (push, screen->stack_bo->offset);
   PUSH_DATA (push, 4);

   BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
   PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
   PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);

   BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
   PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
   PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);

   BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
   PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
   PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);

   BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->uniforms->offset + (3 << 16));
   PUSH_DATA (push, screen->uniforms->offset + (3 << 16));
473
   PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));
474

475
   BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
476 477 478 479
   PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
   PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
   PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);

480 481
   /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
   BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
482
   PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);
483 484 485 486 487 488
   BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
   PUSH_DATAf(push, 0.0f);
   PUSH_DATAf(push, 0.0f);
   PUSH_DATAf(push, 0.0f);
   PUSH_DATAf(push, 0.0f);
   BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
489 490
   PUSH_DATAh(push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
   PUSH_DATA (push, screen->uniforms->offset + (3 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
491

492 493
   nv50_upload_ms_info(push);

494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
   /* max TIC (bits 4:8) & TSC bindings, per program type */
   for (i = 0; i < 3; ++i) {
      BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
      PUSH_DATA (push, 0x54);
   }

   BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset);
   PUSH_DATA (push, screen->txc->offset);
   PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->txc->offset + 65536);
   PUSH_DATA (push, screen->txc->offset + 65536);
   PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);

   BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
   PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
   BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
   for (i = 0; i < 8 * 2; ++i)
      PUSH_DATA(push, 0);
   BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
   PUSH_DATA (push, 0);

   BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(0)), 2);
   PUSH_DATAf(push, 0.0f);
   PUSH_DATAf(push, 1.0f);

   BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
#ifdef NV50_SCISSORS_CLIPPING
   PUSH_DATA (push, 0x0000);
#else
   PUSH_DATA (push, 0x1080);
#endif

   BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
   PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);

   /* We use scissors instead of exact view volume clipping,
    * so they're always enabled.
    */
   BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(0)), 3);
   PUSH_DATA (push, 1);
   PUSH_DATA (push, 8192 << 16);
   PUSH_DATA (push, 8192 << 16);

   BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
   PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
   BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
   PUSH_DATA (push, 0x11111111);
   BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
   PUSH_DATA (push, 1);

   PUSH_KICK (push);
557 558
}

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
      uint64_t *tls_size)
{
   struct nouveau_device *dev = screen->base.device;
   int ret;

   screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
         ONE_TEMP_SIZE;
   if (nouveau_mesa_debug)
      debug_printf("allocating space for %u temps\n",
            util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
   *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
         screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;

   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
                        *tls_size, NULL, &screen->tls_bo);
   if (ret) {
      NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
      return ret;
   }

   return 0;
}

int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
{
   struct nouveau_pushbuf *push = screen->base.pushbuf;
   int ret;
   uint64_t tls_size;

   if (tls_space < screen->cur_tls_space)
      return 0;
   if (tls_space > screen->max_tls_space) {
      /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
       * LOCAL_WARPS_NO_CLAMP) */
      NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
            (unsigned)(tls_space / ONE_TEMP_SIZE),
            (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
      return -ENOMEM;
   }

   nouveau_bo_ref(NULL, &screen->tls_bo);
   ret = nv50_tls_alloc(screen, tls_space, &tls_size);
   if (ret)
      return ret;

   BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
   PUSH_DATAh(push, screen->tls_bo->offset);
   PUSH_DATA (push, screen->tls_bo->offset);
   PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));

   return 1;
}

613
struct pipe_screen *
614
nv50_screen_create(struct nouveau_device *dev)
615 616 617
{
   struct nv50_screen *screen;
   struct pipe_screen *pscreen;
618
   struct nouveau_object *chan;
619 620
   uint64_t value;
   uint32_t tesla_class;
621
   unsigned stack_size;
622 623 624 625 626 627 628 629
   int ret;

   screen = CALLOC_STRUCT(nv50_screen);
   if (!screen)
      return NULL;
   pscreen = &screen->base.base;

   ret = nouveau_screen_init(&screen->base, dev);
630 631 632 633
   if (ret) {
      NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
      goto fail;
   }
634

635 636 637 638 639 640 641 642
   /* TODO: Prevent FIFO prefetch before transfer of index buffers and
    *  admit them to VRAM.
    */
   screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
      PIPE_BIND_VERTEX_BUFFER;
   screen->base.sysmem_bindings |=
      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;

643 644 645
   screen->base.pushbuf->user_priv = screen;
   screen->base.pushbuf->rsvd_kick = 5;

646 647 648 649 650 651 652 653 654 655 656
   chan = screen->base.channel;

   pscreen->destroy = nv50_screen_destroy;
   pscreen->context_create = nv50_create;
   pscreen->is_format_supported = nv50_screen_is_format_supported;
   pscreen->get_param = nv50_screen_get_param;
   pscreen->get_shader_param = nv50_screen_get_shader_param;
   pscreen->get_paramf = nv50_screen_get_paramf;

   nv50_screen_init_resource_functions(pscreen);

657 658
   if (screen->base.device->chipset < 0x84 ||
       debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
659 660 661 662 663 664 665 666
      /* PMPEG */
      nouveau_screen_init_vdec(&screen->base);
   } else if (screen->base.device->chipset < 0x98 ||
              screen->base.device->chipset == 0xa0) {
      /* VP2 */
      screen->base.base.get_video_param = nv84_screen_get_video_param;
      screen->base.base.is_video_format_supported = nv84_screen_video_supported;
   } else {
667 668 669
      /* VP3/4 */
      screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
      screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
670
   }
671

672
   ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
673
                        NULL, &screen->fence.bo);
674 675
   if (ret) {
      NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
676
      goto fail;
677 678
   }

679
   nouveau_bo_map(screen->fence.bo, 0, NULL);
680
   screen->fence.map = screen->fence.bo->map;
Ben Skeggs's avatar
Ben Skeggs committed
681 682
   screen->base.fence.emit = nv50_screen_fence_emit;
   screen->base.fence.update = nv50_screen_fence_update;
683

684 685 686
   ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
                            &(struct nv04_notify){ .length = 32 },
                            sizeof(struct nv04_notify), &screen->sync);
687 688 689 690
   if (ret) {
      NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
      goto fail;
   }
691 692 693

   ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
                            NULL, 0, &screen->m2mf);
694 695 696 697
   if (ret) {
      NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
      goto fail;
   }
698

699 700
   ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
                            NULL, 0, &screen->eng2d);
701 702 703 704
   if (ret) {
      NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
      goto fail;
   }
705 706 707

   switch (dev->chipset & 0xf0) {
   case 0x50:
708
      tesla_class = NV50_3D_CLASS;
709 710 711
      break;
   case 0x80:
   case 0x90:
712
      tesla_class = NV84_3D_CLASS;
713 714 715 716 717 718
      break;
   case 0xa0:
      switch (dev->chipset) {
      case 0xa0:
      case 0xaa:
      case 0xac:
719
         tesla_class = NVA0_3D_CLASS;
720 721
         break;
      case 0xaf:
722
         tesla_class = NVAF_3D_CLASS;
723 724
         break;
      default:
725
         tesla_class = NVA3_3D_CLASS;
726 727 728 729
         break;
      }
      break;
   default:
730 731
      NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
      goto fail;
732
   }
733
   screen->base.class_3d = tesla_class;
734

735 736
   ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
                            NULL, 0, &screen->tesla);
737 738 739 740
   if (ret) {
      NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
      goto fail;
   }
741

742 743 744 745
   /* This over-allocates by a whole code BO. The GP, which would execute at
    * the end of the last page, would trigger faults. The going theory is that
    * it prefetches up to a certain amount. This avoids dmesg spam.
    */
746
   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
747
                        4 << NV50_CODE_BO_SIZE_LOG2, NULL, &screen->code);
748 749
   if (ret) {
      NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
750
      goto fail;
751
   }
752

753 754 755
   nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
   nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
   nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
756

757
   nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
758

759 760
   screen->TPs = util_bitcount(value & 0xffff);
   screen->MPsInTP = util_bitcount((value >> 24) & 0xf);
761

762 763
   stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
         STACK_WARPS_ALLOC * 64 * 8;
764

765
   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
766
                        &screen->stack_bo);
767 768 769 770
   if (ret) {
      NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
      goto fail;
   }
771

772 773 774 775 776
   uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
         screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
         ONE_TEMP_SIZE;
   screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
   screen->max_tls_space /= 2; /* half of vram */
777

778 779
   /* hw can address max 64 KiB */
   screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
780

781 782 783 784
   uint64_t tls_size;
   unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
   ret = nv50_tls_alloc(screen, tls_space, &tls_size);
   if (ret)
785
      goto fail;
786 787 788 789

   if (nouveau_mesa_debug)
      debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
            screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
790

791
   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 4 << 16, NULL,
792
                        &screen->uniforms);
793 794
   if (ret) {
      NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
795
      goto fail;
796
   }
797

798
   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
799
                        &screen->txc);
800 801 802 803
   if (ret) {
      NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
      goto fail;
   }
804 805 806

   screen->tic.entries = CALLOC(4096, sizeof(void *));
   screen->tsc.entries = screen->tic.entries + 2048;
807

Christoph Bumiller's avatar
Christoph Bumiller committed
808
   if (!nv50_blitter_create(screen))
809 810
      goto fail;

811
   nv50_screen_init_hwctx(screen);
812

Ben Skeggs's avatar
Ben Skeggs committed
813
   nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
814

815
   return pscreen;
816

817 818 819
fail:
   nv50_screen_destroy(pscreen);
   return NULL;
820 821
}

822 823
int
nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
Ben Skeggs's avatar
Ben Skeggs committed
824
{
825 826 827 828 829 830 831 832 833 834 835 836
   int i = screen->tic.next;

   while (screen->tic.lock[i / 32] & (1 << (i % 32)))
      i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);

   screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);

   if (screen->tic.entries[i])
      nv50_tic_entry(screen->tic.entries[i])->id = -1;

   screen->tic.entries[i] = entry;
   return i;
Ben Skeggs's avatar
Ben Skeggs committed
837 838
}

839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
int
nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
{
   int i = screen->tsc.next;

   while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
      i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);

   screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);

   if (screen->tsc.entries[i])
      nv50_tsc_entry(screen->tsc.entries[i])->id = -1;

   screen->tsc.entries[i] = entry;
   return i;
}