iris_program.c 91.7 KB
Newer Older
1
2
3
4
5
6
/*
 * Copyright © 2017 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
7
8
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
12
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
13
 *
14
15
16
17
18
19
20
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
21
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
22
23
24
25
26
27
28
29
30
31

/**
 * @file iris_program.c
 *
 * This file contains the driver interface for compiling shaders.
 *
 * See iris_program_cache.c for the in-memory program cache where the
 * compiled shaders are stored.
 */

32
33
34
35
36
37
38
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_atomic.h"
39
#include "util/u_upload_mgr.h"
40
#include "util/debug.h"
41
42
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
43
#include "compiler/nir/nir_serialize.h"
44
45
46
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_nir.h"
#include "iris_context.h"
47
#include "nir/tgsi_to_nir.h"
48

49
50
51
#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
#define BRW_KEY_INIT(gen, prog_id)                       \
   .base.program_string_id = prog_id,                    \
52
   .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
53
54
55
   .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
   .base.tex.compressed_multisample_layout_mask = ~0,    \
   .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
Kenneth Graunke's avatar
Kenneth Graunke committed
56

57
58
59
60
61
62
static unsigned
get_new_program_id(struct iris_screen *screen)
{
   return p_atomic_inc_return(&screen->program_id);
}

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct gen_device_info *devinfo,
                   const struct iris_vs_prog_key *key)
{
   return (struct brw_vs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),

      /* Don't tell the backend about our clip plane constants, we've
       * already lowered them in NIR and don't want it doing it again.
       */
      .nr_userclip_plane_consts = 0,
   };
}

static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct gen_device_info *devinfo,
                    const struct iris_tcs_prog_key *key)
{
   return (struct brw_tcs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
      .tes_primitive_mode = key->tes_primitive_mode,
      .input_vertices = key->input_vertices,
      .patch_outputs_written = key->patch_outputs_written,
      .outputs_written = key->outputs_written,
      .quads_workaround = key->quads_workaround,
   };
}

static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct gen_device_info *devinfo,
                    const struct iris_tes_prog_key *key)
{
   return (struct brw_tes_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
      .patch_inputs_read = key->patch_inputs_read,
      .inputs_read = key->inputs_read,
   };
}

static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct gen_device_info *devinfo,
                   const struct iris_gs_prog_key *key)
{
   return (struct brw_gs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
   };
}

static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct gen_device_info *devinfo,
                   const struct iris_fs_prog_key *key)
{
   return (struct brw_wm_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
      .nr_color_regions = key->nr_color_regions,
      .flat_shade = key->flat_shade,
      .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
      .alpha_to_coverage = key->alpha_to_coverage,
      .clamp_fragment_color = key->clamp_fragment_color,
      .persample_interp = key->persample_interp,
      .multisample_fbo = key->multisample_fbo,
      .force_dual_color_blend = key->force_dual_color_blend,
      .coherent_fb_fetch = key->coherent_fb_fetch,
      .color_outputs_valid = key->color_outputs_valid,
      .input_slots_valid = key->input_slots_valid,
128
      .ignore_sample_mask_out = !key->multisample_fbo,
129
130
131
132
133
134
135
136
137
138
139
140
   };
}

static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct gen_device_info *devinfo,
                   const struct iris_cs_prog_key *key)
{
   return (struct brw_cs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
   };
}

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
static void *
upload_state(struct u_upload_mgr *uploader,
             struct iris_state_ref *ref,
             unsigned size,
             unsigned alignment)
{
   void *p = NULL;
   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
   return p;
}

void
iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
                                struct pipe_shader_buffer *buf,
                                struct iris_state_ref *surf_state,
156
                                isl_surf_usage_flags_t usage)
157
158
159
{
   struct pipe_context *ctx = &ice->ctx;
   struct iris_screen *screen = (struct iris_screen *) ctx->screen;
160
   bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

   void *map =
      upload_state(ice->state.surface_uploader, surf_state,
                   screen->isl_dev.ss.size, 64);
   if (!unlikely(map)) {
      surf_state->res = NULL;
      return;
   }

   struct iris_resource *res = (void *) buf->buffer;
   struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
   surf_state->offset += iris_bo_offset_from_base_address(surf_bo);

   isl_buffer_fill_state(&screen->isl_dev, map,
                         .address = res->bo->gtt_offset + res->offset +
                                    buf->buffer_offset,
                         .size_B = buf->buffer_size - res->offset,
178
179
                         .format = ssbo ? ISL_FORMAT_RAW
                                        : ISL_FORMAT_R32G32B32A32_FLOAT,
180
181
                         .swizzle = ISL_SWIZZLE_IDENTITY,
                         .stride_B = 1,
182
                         .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
183
184
}

185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
static nir_ssa_def *
get_aoa_deref_offset(nir_builder *b,
                     nir_deref_instr *deref,
                     unsigned elem_size)
{
   unsigned array_size = elem_size;
   nir_ssa_def *offset = nir_imm_int(b, 0);

   while (deref->deref_type != nir_deref_type_var) {
      assert(deref->deref_type == nir_deref_type_array);

      /* This level's element size is the previous level's array size */
      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
      assert(deref->arr.index.ssa);
      offset = nir_iadd(b, offset,
                           nir_imul(b, index, nir_imm_int(b, array_size)));

      deref = nir_deref_instr_parent(deref);
      assert(glsl_type_is_array(deref->type));
      array_size *= glsl_get_length(deref->type);
   }

   /* Accessing an invalid surface index with the dataport can result in a
    * hang.  According to the spec "if the index used to select an individual
    * element is negative or greater than or equal to the size of the array,
    * the results of the operation are undefined but may not lead to
    * termination" -- which is one of the possible outcomes of the hang.
    * Clamp the index to prevent access outside of the array bounds.
    */
   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
}

static void
iris_lower_storage_image_derefs(nir_shader *nir)
{
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);

   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;

         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
         case nir_intrinsic_image_deref_load:
         case nir_intrinsic_image_deref_store:
         case nir_intrinsic_image_deref_atomic_add:
235
236
237
238
         case nir_intrinsic_image_deref_atomic_imin:
         case nir_intrinsic_image_deref_atomic_umin:
         case nir_intrinsic_image_deref_atomic_imax:
         case nir_intrinsic_image_deref_atomic_umax:
239
240
241
242
243
244
         case nir_intrinsic_image_deref_atomic_and:
         case nir_intrinsic_image_deref_atomic_or:
         case nir_intrinsic_image_deref_atomic_xor:
         case nir_intrinsic_image_deref_atomic_exchange:
         case nir_intrinsic_image_deref_atomic_comp_swap:
         case nir_intrinsic_image_deref_size:
245
246
247
         case nir_intrinsic_image_deref_samples:
         case nir_intrinsic_image_deref_load_raw_intel:
         case nir_intrinsic_image_deref_store_raw_intel: {
248
249
250
251
252
253
254
            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
            nir_variable *var = nir_deref_instr_get_variable(deref);

            b.cursor = nir_before_instr(&intrin->instr);
            nir_ssa_def *index =
               nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
                            get_aoa_deref_offset(&b, deref, 1));
255
            nir_rewrite_image_intrinsic(intrin, index, false);
256
257
258
259
260
261
262
263
264
265
            break;
         }

         default:
            break;
         }
      }
   }
}

266
267
268
269
270
271
/**
 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
 */
static bool
iris_fix_edge_flags(nir_shader *nir)
{
272
273
   if (nir->info.stage != MESA_SHADER_VERTEX) {
      nir_shader_preserve_all_metadata(nir);
274
      return false;
275
   }
276

277
278
   nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
                                                       VARYING_SLOT_EDGE);
279
280
   if (!var) {
      nir_shader_preserve_all_metadata(nir);
281
      return false;
282
   }
283
284
285
286
287
288

   var->data.mode = nir_var_shader_temp;
   nir->info.outputs_written &= ~VARYING_BIT_EDGE;
   nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
   nir_fixup_deref_modes(nir);

289
290
291
292
293
294
   nir_foreach_function(f, nir) {
      if (f->impl) {
         nir_metadata_preserve(f->impl, nir_metadata_block_index |
                                        nir_metadata_dominance |
                                        nir_metadata_live_ssa_defs |
                                        nir_metadata_loop_analysis);
295
296
      } else {
         nir_metadata_preserve(f->impl, nir_metadata_all);
297
298
299
      }
   }

300
   return true;
301
302
}

303
304
305
306
307
308
309
310
311
312
313
314
315
/**
 * Fix an uncompiled shader's stream output info.
 *
 * Core Gallium stores output->register_index as a "slot" number, where
 * slots are assigned consecutively to all outputs in info->outputs_written.
 * This naive packing of outputs doesn't work for us - we too have slots,
 * but the layout is defined by the VUE map, which we won't have until we
 * compile a specific shader variant.  So, we remap these and simply store
 * VARYING_SLOT_* in our copy's output->register_index fields.
 *
 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
 * components of our VUE header.  See brw_vue_map.c for the layout.
 */
316
static void
317
318
update_so_info(struct pipe_stream_output_info *so_info,
               uint64_t outputs_written)
319
{
320
321
322
323
324
325
   uint8_t reverse_map[64] = {};
   unsigned slot = 0;
   while (outputs_written) {
      reverse_map[slot++] = u_bit_scan64(&outputs_written);
   }

326
327
328
   for (unsigned i = 0; i < so_info->num_outputs; i++) {
      struct pipe_stream_output *output = &so_info->output[i];

329
330
331
      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
      output->register_index = reverse_map[output->register_index];

332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
      /* The VUE header contains three scalar fields packed together:
       * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
       * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
       * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
       */
      switch (output->register_index) {
      case VARYING_SLOT_LAYER:
         assert(output->num_components == 1);
         output->register_index = VARYING_SLOT_PSIZ;
         output->start_component = 1;
         break;
      case VARYING_SLOT_VIEWPORT:
         assert(output->num_components == 1);
         output->register_index = VARYING_SLOT_PSIZ;
         output->start_component = 2;
         break;
      case VARYING_SLOT_PSIZ:
         assert(output->num_components == 1);
         output->start_component = 3;
         break;
      }
353

354
355
356
      //info->outputs_written |= 1ull << output->register_index;
   }
}
Kenneth Graunke's avatar
Kenneth Graunke committed
357

358
359
360
static void
setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
                        unsigned offset, unsigned n)
361
{
362
   assert(offset % sizeof(uint32_t) == 0);
Kenneth Graunke's avatar
Kenneth Graunke committed
363

364
365
   for (unsigned i = 0; i < n; ++i)
      sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
Kenneth Graunke's avatar
Kenneth Graunke committed
366

367
368
   for (unsigned i = n; i < 4; ++i)
      sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
369
370
}

371
372
373
374
375
376
377
378
379
380
/**
 * Associate NIR uniform variables with the prog_data->param[] mechanism
 * used by the backend.  Also, decide which UBOs we'd like to push in an
 * ideal situation (though the backend can reduce this).
 */
static void
iris_setup_uniforms(const struct brw_compiler *compiler,
                    void *mem_ctx,
                    nir_shader *nir,
                    struct brw_stage_prog_data *prog_data,
381
                    unsigned kernel_input_size,
382
383
384
                    enum brw_param_builtin **out_system_values,
                    unsigned *out_num_system_values,
                    unsigned *out_num_cbufs)
385
{
386
   UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
387

388
389
   unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));

390
391
392
393
394
   const unsigned IRIS_MAX_SYSTEM_VALUES =
      PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
   enum brw_param_builtin *system_values =
      rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
   unsigned num_system_values = 0;
Kenneth Graunke's avatar
Kenneth Graunke committed
395

396
397
398
   unsigned patch_vert_idx = -1;
   unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
   unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
399
   unsigned variable_group_size_idx = -1;
400
   unsigned work_dim_idx = -1;
401
402
   memset(ucp_idx, -1, sizeof(ucp_idx));
   memset(img_idx, -1, sizeof(img_idx));
403

404
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
405

406
407
   nir_builder b;
   nir_builder_init(&b, impl);
408

409
410
   b.cursor = nir_before_block(nir_start_block(impl));
   nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
Kenneth Graunke's avatar
Kenneth Graunke committed
411

412
413
414
415
416
   /* Turn system value intrinsics into uniforms */
   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;
Kenneth Graunke's avatar
Kenneth Graunke committed
417

418
419
         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         nir_ssa_def *offset;
420

421
         switch (intrin->intrinsic) {
422
         case nir_intrinsic_load_constant: {
423
424
425
426
            unsigned load_size = intrin->dest.ssa.num_components *
                                 intrin->dest.ssa.bit_size / 8;
            unsigned load_align = intrin->dest.ssa.bit_size / 8;

427
428
429
            /* This one is special because it reads from the shader constant
             * data and not cbuf0 which gallium uploads for us.
             */
430
431
            b.cursor = nir_instr_remove(&intrin->instr);

432
433
434
435
            nir_ssa_def *offset =
               nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
                                nir_intrinsic_base(intrin));

436
437
438
439
440
441
442
443
444
445
446
447
448
449
            assert(load_size < b.shader->constant_data_size);
            unsigned max_offset = b.shader->constant_data_size - load_size;
            offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));

            nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
               nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_LOW),
               nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_HIGH));

            nir_ssa_def *data =
               nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
                                                nir_u2u64(&b, offset)),
                               load_align,
                               intrin->dest.ssa.num_components,
                               intrin->dest.ssa.bit_size);
450
451

            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
452
                                     nir_src_for_ssa(data));
453
454
            continue;
         }
455
456
         case nir_intrinsic_load_user_clip_plane: {
            unsigned ucp = nir_intrinsic_ucp_id(intrin);
457

458
459
460
461
            if (ucp_idx[ucp] == -1) {
               ucp_idx[ucp] = num_system_values;
               num_system_values += 4;
            }
462

463
464
465
466
            for (int i = 0; i < 4; i++) {
               system_values[ucp_idx[ucp] + i] =
                  BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
            }
467

468
            b.cursor = nir_before_instr(instr);
469
470
            offset = nir_imm_int(&b, system_values_start +
                                     ucp_idx[ucp] * sizeof(uint32_t));
471
472
473
474
475
            break;
         }
         case nir_intrinsic_load_patch_vertices_in:
            if (patch_vert_idx == -1)
               patch_vert_idx = num_system_values++;
476
477

            system_values[patch_vert_idx] =
478
               BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
479
480

            b.cursor = nir_before_instr(instr);
481
482
            offset = nir_imm_int(&b, system_values_start +
                                     patch_vert_idx * sizeof(uint32_t));
483
            break;
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
         case nir_intrinsic_image_deref_load_param_intel: {
            assert(devinfo->gen < 9);
            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
            nir_variable *var = nir_deref_instr_get_variable(deref);

            if (img_idx[var->data.binding] == -1) {
               /* GL only allows arrays of arrays of images. */
               assert(glsl_type_is_image(glsl_without_array(var->type)));
               unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));

               for (int i = 0; i < num_images; i++) {
                  const unsigned img = var->data.binding + i;

                  img_idx[img] = num_system_values;
                  num_system_values += BRW_IMAGE_PARAM_SIZE;

                  uint32_t *img_sv = &system_values[img_idx[img]];

                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
                     offsetof(struct brw_image_param, offset), 2);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
                     offsetof(struct brw_image_param, size), 3);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
                     offsetof(struct brw_image_param, stride), 4);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
                     offsetof(struct brw_image_param, tiling), 3);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
                     offsetof(struct brw_image_param, swizzling), 2);
               }
            }

            b.cursor = nir_before_instr(instr);
            offset = nir_iadd(&b,
               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
523
524
               nir_imm_int(&b, system_values_start +
                               img_idx[var->data.binding] * 4 +
525
526
527
                               nir_intrinsic_base(intrin) * 16));
            break;
         }
528
529
530
531
532
533
534
535
536
537
538
539
         case nir_intrinsic_load_local_group_size: {
            assert(nir->info.cs.local_size_variable);
            if (variable_group_size_idx == -1) {
               variable_group_size_idx = num_system_values;
               num_system_values += 3;
               for (int i = 0; i < 3; i++) {
                  system_values[variable_group_size_idx + i] =
                     BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
               }
            }

            b.cursor = nir_before_instr(instr);
540
541
542
543
            offset = nir_imm_int(&b, system_values_start +
                                     variable_group_size_idx * sizeof(uint32_t));
            break;
         }
544
545
546
547
548
549
550
551
552
553
         case nir_intrinsic_load_work_dim: {
            if (work_dim_idx == -1) {
               work_dim_idx = num_system_values++;
               system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
            }
            b.cursor = nir_before_instr(instr);
            offset = nir_imm_int(&b, system_values_start +
                                     work_dim_idx * sizeof(uint32_t));
            break;
         }
554
555
556
557
558
559
         case nir_intrinsic_load_kernel_input: {
            assert(nir_intrinsic_base(intrin) +
                   nir_intrinsic_range(intrin) <= kernel_input_size);
            b.cursor = nir_before_instr(instr);
            offset = nir_iadd_imm(&b, intrin->src[0].ssa,
                                      nir_intrinsic_base(intrin));
560
561
            break;
         }
562
563
564
565
566
567
         default:
            continue;
         }

         nir_intrinsic_instr *load =
            nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
568
         load->num_components = intrin->dest.ssa.num_components;
569
570
         load->src[0] = nir_src_for_ssa(temp_ubo_name);
         load->src[1] = nir_src_for_ssa(offset);
571
         nir_intrinsic_set_align(load, 4, 0);
572
573
         nir_intrinsic_set_range_base(load, 0);
         nir_intrinsic_set_range(load, ~0);
574
575
576
         nir_ssa_dest_init(&load->instr, &load->dest,
                           intrin->dest.ssa.num_components,
                           intrin->dest.ssa.bit_size, NULL);
577
578
579
580
581
582
583
         nir_builder_instr_insert(&b, &load->instr);
         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                  nir_src_for_ssa(&load->dest.ssa));
         nir_instr_remove(instr);
      }
   }

584
585
   nir_validate_shader(nir, "before remapping");

586
587
588
589
590
591
592
593
594
   /* Uniforms are stored in constant buffer 0, the
    * user-facing UBOs are indexed by one.  So if any constant buffer is
    * needed, the constant buffer 0 will be needed, so account for it.
    */
   unsigned num_cbufs = nir->info.num_ubos;
   if (num_cbufs || nir->num_uniforms)
      num_cbufs++;

   /* Place the new params in a new cbuf. */
595
   if (num_system_values > 0 || kernel_input_size > 0) {
596
597
      unsigned sysval_cbuf_index = num_cbufs;
      num_cbufs++;
Kenneth Graunke's avatar
Kenneth Graunke committed
598

599
600
601
      system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
                               num_system_values);

602
603
604
605
606
607
608
609
610
611
612
613
      nir_foreach_block(block, impl) {
         nir_foreach_instr_safe(instr, block) {
            if (instr->type != nir_instr_type_intrinsic)
               continue;

            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);

            if (load->intrinsic != nir_intrinsic_load_ubo)
               continue;

            b.cursor = nir_before_instr(instr);

614
615
            assert(load->src[0].is_ssa);

616
            if (load->src[0].ssa == temp_ubo_name) {
617
               nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
618
               nir_instr_rewrite_src(instr, &load->src[0],
619
                                     nir_src_for_ssa(imm));
620
            }
621
622
         }
      }
Kenneth Graunke's avatar
Kenneth Graunke committed
623
624
625

      /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
      nir_opt_constant_folding(nir);
626
627
628
   } else {
      ralloc_free(system_values);
      system_values = NULL;
629
630
   }

631
   assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
632
633
   nir_validate_shader(nir, "after remap");

634
635
636
637
   /* We don't use params[] but gallium leaves num_uniforms set.  We use this
    * to detect when cbuf0 exists but we don't need it anymore when we get
    * here.  Instead, zero it out so that the back-end doesn't get confused
    * when nr_params * 4 != num_uniforms != nr_params * 4.
638
    */
639
   nir->num_uniforms = 0;
640

641
642
   *out_system_values = system_values;
   *out_num_system_values = num_system_values;
643
   *out_num_cbufs = num_cbufs;
Kenneth Graunke's avatar
Kenneth Graunke committed
644
645
}

646
647
static const char *surface_group_names[] = {
   [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
648
   [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
   [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
   [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
   [IRIS_SURFACE_GROUP_UBO]                = "ubo",
   [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
   [IRIS_SURFACE_GROUP_IMAGE]              = "image",
};

static void
iris_print_binding_table(FILE *fp, const char *name,
                         const struct iris_binding_table *bt)
{
   STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);

   uint32_t total = 0;
   uint32_t compacted = 0;

   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      uint32_t size = bt->sizes[i];
      total += size;
      if (size)
         compacted += util_bitcount64(bt->used_mask[i]);
   }

   if (total == 0) {
      fprintf(fp, "Binding table for %s is empty\n\n", name);
      return;
   }

   if (total != compacted) {
      fprintf(fp, "Binding table for %s "
              "(compacted to %u entries from %u entries)\n",
              name, compacted, total);
   } else {
      fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
   }

   uint32_t entry = 0;
   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      uint64_t mask = bt->used_mask[i];
      while (mask) {
         int index = u_bit_scan64(&mask);
         fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
      }
   }
   fprintf(fp, "\n");
}

696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
enum {
   /* Max elements in a surface group. */
   SURFACE_GROUP_MAX_ELEMENTS = 64,
};

/**
 * Map a <group, index> pair to a binding table index.
 *
 * For example: <UBO, 5> => binding table index 12
 */
uint32_t
iris_group_index_to_bti(const struct iris_binding_table *bt,
                        enum iris_surface_group group, uint32_t index)
{
   assert(index < bt->sizes[group]);
   uint64_t mask = bt->used_mask[group];
   uint64_t bit = 1ull << index;
   if (bit & mask) {
      return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
   } else {
      return IRIS_SURFACE_NOT_USED;
   }
}

/**
 * Map a binding table index back to a <group, index> pair.
 *
 * For example: binding table index 12 => <UBO, 5>
 */
uint32_t
iris_bti_to_group_index(const struct iris_binding_table *bt,
                        enum iris_surface_group group, uint32_t bti)
{
   uint64_t used_mask = bt->used_mask[group];
   assert(bti >= bt->offsets[group]);

   uint32_t c = bti - bt->offsets[group];
   while (used_mask) {
      int i = u_bit_scan64(&used_mask);
      if (c == 0)
         return i;
      c--;
   }

   return IRIS_SURFACE_NOT_USED;
}

743
static void
744
745
746
rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
                     nir_instr *instr, nir_src *src,
                     enum iris_surface_group group)
747
{
748
   assert(bt->sizes[group] > 0);
749
750
751
752

   b->cursor = nir_before_instr(instr);
   nir_ssa_def *bti;
   if (nir_src_is_const(*src)) {
753
754
      uint32_t index = nir_src_as_uint(*src);
      bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
755
756
                           src->ssa->bit_size);
   } else {
757
758
759
760
      /* Indirect usage makes all the surfaces of the group to be available,
       * so we can just add the base.
       */
      assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
761
      bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
762
763
764
765
   }
   nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
}

766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
static void
mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
                   enum iris_surface_group group)
{
   assert(bt->sizes[group] > 0);

   if (nir_src_is_const(*src)) {
      uint64_t index = nir_src_as_uint(*src);
      assert(index < bt->sizes[group]);
      bt->used_mask[group] |= 1ull << index;
   } else {
      /* There's an indirect usage, we need all the surfaces. */
      bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
   }
}

static bool
skip_compacting_binding_tables(void)
{
   static int skip = -1;
   if (skip < 0)
      skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
   return skip;
}

791
792
793
794
/**
 * Set up the binding table indices and apply to the shader.
 */
static void
795
796
iris_setup_binding_table(const struct gen_device_info *devinfo,
                         struct nir_shader *nir,
797
798
799
800
801
802
803
804
805
                         struct iris_binding_table *bt,
                         unsigned num_render_targets,
                         unsigned num_system_values,
                         unsigned num_cbufs)
{
   const struct shader_info *info = &nir->info;

   memset(bt, 0, sizeof(*bt));

806
807
808
   /* Set the sizes for each surface group.  For some groups, we already know
    * upfront how many will be used, so mark them.
    */
809
   if (info->stage == MESA_SHADER_FRAGMENT) {
810
811
812
813
      bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
      /* All render targets used. */
      bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
         BITFIELD64_MASK(num_render_targets);
814
815
816
817
818
819
820
821
822

      /* Setup render target read surface group inorder to support non-coherent
       * framebuffer fetch on Gen8
       */
      if (devinfo->gen == 8 && info->outputs_read) {
         bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
         bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
            BITFIELD64_MASK(num_render_targets);
      }
823
   } else if (info->stage == MESA_SHADER_COMPUTE) {
824
      bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
825
826
   }

827
828
   bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
   bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
829

830
   bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
831

832
833
834
   /* Allocate an extra slot in the UBO section for NIR constants.
    * Binding table compaction will remove it if unnecessary.
    *
835
836
837
838
    * We don't include them in iris_compiled_shader::num_cbufs because
    * they are uploaded separately from shs->constbuf[], but from a shader
    * point of view, they're another UBO (at the end of the section).
    */
839
   bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
840

841
   bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
842
843
844
845
846
847
848
849
850
851
852
853

   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
      assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);

   /* Mark surfaces used for the cases we don't have the information available
    * upfront.
    */
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
   nir_foreach_block (block, impl) {
      nir_foreach_instr (instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;
854

855
856
         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
857
858
859
860
         case nir_intrinsic_load_num_work_groups:
            bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
            break;

861
862
863
864
865
866
867
         case nir_intrinsic_load_output:
            if (devinfo->gen == 8) {
               mark_used_with_src(bt, &intrin->src[0],
                                  IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
            }
            break;

868
869
870
871
         case nir_intrinsic_image_size:
         case nir_intrinsic_image_load:
         case nir_intrinsic_image_store:
         case nir_intrinsic_image_atomic_add:
872
873
874
875
         case nir_intrinsic_image_atomic_imin:
         case nir_intrinsic_image_atomic_umin:
         case nir_intrinsic_image_atomic_imax:
         case nir_intrinsic_image_atomic_umax:
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
         case nir_intrinsic_image_atomic_and:
         case nir_intrinsic_image_atomic_or:
         case nir_intrinsic_image_atomic_xor:
         case nir_intrinsic_image_atomic_exchange:
         case nir_intrinsic_image_atomic_comp_swap:
         case nir_intrinsic_image_load_raw_intel:
         case nir_intrinsic_image_store_raw_intel:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
            break;

         case nir_intrinsic_load_ubo:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
            break;

         case nir_intrinsic_store_ssbo:
            mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
            break;

894
         case nir_intrinsic_get_ssbo_size:
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
         case nir_intrinsic_ssbo_atomic_add:
         case nir_intrinsic_ssbo_atomic_imin:
         case nir_intrinsic_ssbo_atomic_umin:
         case nir_intrinsic_ssbo_atomic_imax:
         case nir_intrinsic_ssbo_atomic_umax:
         case nir_intrinsic_ssbo_atomic_and:
         case nir_intrinsic_ssbo_atomic_or:
         case nir_intrinsic_ssbo_atomic_xor:
         case nir_intrinsic_ssbo_atomic_exchange:
         case nir_intrinsic_ssbo_atomic_comp_swap:
         case nir_intrinsic_ssbo_atomic_fmin:
         case nir_intrinsic_ssbo_atomic_fmax:
         case nir_intrinsic_ssbo_atomic_fcomp_swap:
         case nir_intrinsic_load_ssbo:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
            break;

         default:
            break;
         }
      }
916
917
   }

918
919
920
921
922
   /* When disable we just mark everything as used. */
   if (unlikely(skip_compacting_binding_tables())) {
      for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
         bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
   }
923

924
925
926
927
928
929
930
931
932
933
934
935
   /* Calculate the offsets and the binding table size based on the used
    * surfaces.  After this point, the functions to go between "group indices"
    * and binding table indices can be used.
    */
   uint32_t next = 0;
   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      if (bt->used_mask[i] != 0) {
         bt->offsets[i] = next;
         next += util_bitcount64(bt->used_mask[i]);
      }
   }
   bt->size_bytes = next * 4;
936

937
   if (INTEL_DEBUG & DEBUG_BT) {
938
939
940
      iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
   }

941
942
943
944
945
946
947
948
949
950
   /* Apply the binding table indices.  The backend compiler is not expected
    * to change those, as we haven't set any of the *_start entries in brw
    * binding_table.
    */
   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block (block, impl) {
      nir_foreach_instr (instr, block) {
         if (instr->type == nir_instr_type_tex) {
951
952
953
954
            nir_tex_instr *tex = nir_instr_as_tex(instr);
            tex->texture_index =
               iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
                                       tex->texture_index);
955
956
957
958
959
960
961
962
963
964
965
966
            continue;
         }

         if (instr->type != nir_instr_type_intrinsic)
            continue;

         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
         case nir_intrinsic_image_size:
         case nir_intrinsic_image_load:
         case nir_intrinsic_image_store:
         case nir_intrinsic_image_atomic_add:
967
968
969
970
         case nir_intrinsic_image_atomic_imin:
         case nir_intrinsic_image_atomic_umin:
         case nir_intrinsic_image_atomic_imax:
         case nir_intrinsic_image_atomic_umax:
971
972
973
974
975
976
977
         case nir_intrinsic_image_atomic_and:
         case nir_intrinsic_image_atomic_or:
         case nir_intrinsic_image_atomic_xor:
         case nir_intrinsic_image_atomic_exchange:
         case nir_intrinsic_image_atomic_comp_swap:
         case nir_intrinsic_image_load_raw_intel:
         case nir_intrinsic_image_store_raw_intel:
978
979
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_IMAGE);
980
981
982
            break;

         case nir_intrinsic_load_ubo:
983
984
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_UBO);
985
986
987
            break;

         case nir_intrinsic_store_ssbo:
988
989
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
                                 IRIS_SURFACE_GROUP_SSBO);
990
991
            break;

992
993
994
995
996
997
998
         case nir_intrinsic_load_output:
            if (devinfo->gen == 8) {
               rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                    IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
            }
            break;

999
         case nir_intrinsic_get_ssbo_size:
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
         case nir_intrinsic_ssbo_atomic_add:
         case nir_intrinsic_ssbo_atomic_imin:
         case nir_intrinsic_ssbo_atomic_umin:
         case nir_intrinsic_ssbo_atomic_imax:
         case nir_intrinsic_ssbo_atomic_umax:
         case nir_intrinsic_ssbo_atomic_and:
         case nir_intrinsic_ssbo_atomic_or:
         case nir_intrinsic_ssbo_atomic_xor:
         case nir_intrinsic_ssbo_atomic_exchange:
         case nir_intrinsic_ssbo_atomic_comp_swap:
         case nir_intrinsic_ssbo_atomic_fmin:
         case nir_intrinsic_ssbo_atomic_fmax:
         case nir_intrinsic_ssbo_atomic_fcomp_swap:
         case nir_intrinsic_load_ssbo:
1014
1015
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_SSBO);
1016
1017
1018
1019
1020
1021
1022
1023
1024
            break;

         default:
            break;
         }
      }
   }
}

1025
1026
1027
static void
iris_debug_recompile(struct iris_context *ice,
                     struct shader_info *info,
1028
                     const struct brw_base_prog_key *key)
1029
1030
{
   struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
1031
   const struct gen_device_info *devinfo = &screen->devinfo;
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
   const struct brw_compiler *c = screen->compiler;

   if (!info)
      return;

   c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
                      _mesa_shader_stage_to_string(info->stage),
                      info->name ? info->name : "(no identifier)",
                      info->label ? info->label : "");

1042
   const void *old_iris_key =
1043
      iris_find_previous_compile(ice, info->stage, key->program_string_id);
1044

1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
   union brw_any_prog_key old_key;

   switch (info->stage) {
   case MESA_SHADER_VERTEX:
      old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_TESS_CTRL:
      old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_TESS_EVAL:
      old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_GEOMETRY:
      old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_FRAGMENT:
      old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_COMPUTE:
      old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
      break;
   default:
      unreachable("invalid shader stage");
   }

   brw_debug_key_recompile(c, &ice->dbg, info->stage, &old_key.base, key);
1071
1072
}

1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
/**
 * Get the shader for the last enabled geometry stage.
 *
 * This stage is the one which will feed stream output and the rasterizer.
 */
static gl_shader_stage
last_vue_stage(struct iris_context *ice)
{
   if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
      return MESA_SHADER_GEOMETRY;

   if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
      return MESA_SHADER_TESS_EVAL;

   return MESA_SHADER_VERTEX;
}
1089

Kenneth Graunke's avatar
Kenneth Graunke committed
1090
1091
1092
/**
 * Compile a vertex shader, and upload the assembly.
 */
1093
static struct iris_compiled_shader *
Kenneth Graunke's avatar
Kenneth Graunke committed
1094
1095
iris_compile_vs(struct iris_context *ice,
                struct iris_uncompiled_shader *ish,
1096
                const struct iris_vs_prog_key *key)
Kenneth Graunke's avatar
Kenneth Graunke committed
1097
1098
1099
1100
1101
{
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
   const struct brw_compiler *compiler = screen->compiler;
   const struct gen_device_info *devinfo = &screen->devinfo;
   void *mem_ctx = ralloc_context(NULL);
1102
1103
1104
1105
   struct brw_vs_prog_data *vs_prog_data =
      rzalloc(mem_ctx, struct brw_vs_prog_data);
   struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1106
1107
   enum brw_param_builtin *system_values;
   unsigned num_system_values;
1108
   unsigned num_cbufs;
Kenneth Graunke's avatar
Kenneth Graunke committed
1109

Kenneth Graunke's avatar
Kenneth Graunke committed
1110
   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
Kenneth Graunke's avatar
Kenneth Graunke committed
1111

1112
   if (key->vue.nr_userclip_plane_consts) {
Kenneth Graunke's avatar
Kenneth Graunke committed
1113
      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1114
1115
      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
                        true, false, NULL);
Kenneth Graunke's avatar
Kenneth Graunke committed
1116
1117
1118
      nir_lower_io_to_temporaries(nir, impl, true, false);
      nir_lower_global_vars_to_local(nir);
      nir_lower_vars_to_ssa(nir);
1119
      nir_shader_gather_info(nir, impl);
Kenneth Graunke's avatar
Kenneth Graunke committed
1120
1121
   }

1122
   prog_data->use_alt_mode = ish->use_alt_mode;
Kenneth Graunke's avatar
Kenneth Graunke committed
1123

1124
   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1125
                       &num_system_values, &num_cbufs);
Kenneth Graunke's avatar
Kenneth Graunke committed
1126

1127
   struct iris_binding_table bt;
1128
   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1129
                            num_system_values, num_cbufs);
1130

1131
1132
   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);

Kenneth Graunke's avatar
Kenneth Graunke committed
1133
   brw_compute_vue_map(devinfo,
1134
                       &vue_prog_data->vue_map, nir->info.outputs_written,
1135
                       nir->info.separate_shader, /* pos_slots */ 1);
Kenneth Graunke's avatar
Kenneth Graunke committed
1136

1137
   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1138

1139
   char *error_str = NULL;
1140
   const unsigned *program =
1141
      brw_compile_vs(compiler, &ice->dbg, mem_ctx, &brw_key, vs_prog_data,
1142
                     nir, -1, NULL, &error_str);
Kenneth Graunke's avatar
Kenneth Graunke committed
1143
   if (program == NULL) {
1144
      dbg_printf("Failed to compile vertex shader: %s\n", error_str);
Kenneth Graunke's avatar
Kenneth Graunke committed
1145
1146
1147
1148
      ralloc_free(mem_ctx);
      return false;
   }

1149
   if (ish->compiled_once) {
1150
      iris_debug_recompile(ice, &nir->info, &brw_key.base);
1151
1152
1153
1154
   } else {
      ish->compiled_once = true;
   }

1155
   uint32_t *so_decls =
1156
      screen->vtbl.create_so_decl_list(&ish->stream_output,
1157
1158
                                    &vue_prog_data->vue_map);

1159
1160
   struct iris_compiled_shader *shader =
      iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
1161
                         prog_data, so_decls, system_values, num_system_values,
1162
                         0, num_cbufs, &bt);
Kenneth Graunke's avatar
Kenneth Graunke committed
1163

1164
1165
   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));

1166
   ralloc_free(mem_ctx);
1167
   return shader;
Kenneth Graunke's avatar
Kenneth Graunke committed
1168
1169
}

Kenneth Graunke's avatar
Kenneth Graunke committed
1170
1171
1172
1173
1174
/**
 * Update the current vertex shader variant.
 *
 * Fill out the key, look in the cache, compile and bind if needed.
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
1175
1176
1177
static void
iris_update_compiled_vs(struct iris_context *ice)
{
1178
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1179
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1180
1181
1182
   struct iris_uncompiled_shader *ish =
      ice->shaders.uncompiled[MESA_SHADER_VERTEX];

1183
   struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1184
   screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
Kenneth Graunke's avatar
Kenneth Graunke committed
1185

1186
1187
1188
1189
   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
   struct iris_compiled_shader *shader =
      iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);

1190
1191
1192
   if (!shader)
      shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));

1193
1194
   if (!shader)
      shader = iris_compile_vs(ice, ish, &key);
1195

1196
1197
   if (old != shader) {
      ice->shaders.prog[IRIS_CACHE_VS] = shader;
1198
1199
1200
1201
      ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
      ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
                                IRIS_STAGE_DIRTY_BINDINGS_VS |
                                IRIS_STAGE_DIRTY_CONSTANTS_VS;
1202
      shs->sysvals_need_upload = true;
1203

1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
      const struct brw_vs_prog_data *vs_prog_data =
            (void *) shader->prog_data;
      const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
                                    vs_prog_data->uses_baseinstance;
      const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
                                            vs_prog_data->uses_is_indexed_draw;
      const bool needs_sgvs_element = uses_draw_params ||
                                      vs_prog_data->uses_instanceid ||
                                      vs_prog_data->uses_vertexid;

      if (ice->state.vs_uses_draw_params != uses_draw_params ||
1215
          ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1216
          ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {
1217
1218
1219
1220
1221
1222
         ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
                             IRIS_DIRTY_VERTEX_ELEMENTS;
      }
      ice->state.vs_uses_draw_params = uses_draw_params;
      ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
      ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1223
      ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1224
   }
1225
1226
}

Kenneth Graunke's avatar
Kenneth Graunke committed
1227
1228
1229
/**
 * Get the shader_info for a given stage, or NULL if the stage is disabled.
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
1230
1231
const struct shader_info *
iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
Kenneth Graunke's avatar
Kenneth Graunke committed
1232
1233
1234
1235
1236
1237
{
   const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];

   if (!ish)
      return NULL;

Kenneth Graunke's avatar
Kenneth Graunke committed
1238
   const nir_shader *nir = ish->nir;
Kenneth Graunke's avatar
Kenneth Graunke committed
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
   return &nir->info;
}

/**
 * Get the union of TCS output and TES input slots.
 *
 * TCS and TES need to agree on a common URB entry layout.  In particular,
 * the data for all patch vertices is stored in a single URB entry (unlike
 * GS which has one entry per input vertex).  This means that per-vertex
 * array indexing needs a stride.
 *
 * SSO requires locations to match, but doesn't require the number of
 * outputs/inputs to match (in fact, the TCS often has extra outputs).
 * So, we need to take the extra step of unifying these on the fly.
 */
static void
get_unified_tess_slots(const struct iris_context *ice,
                       uint64_t *per_vertex_slots,
                       uint32_t *per_patch_slots)
{
Kenneth Graunke's avatar
Kenneth Graunke committed
1259
1260
1261
1262
   const struct shader_info *tcs =
      iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
   const struct shader_info *tes =
      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
Kenneth Graunke's avatar
Kenneth Graunke committed
1263

1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
   *per_vertex_slots = tes->inputs_read;
   *per_patch_slots = tes->patch_inputs_read;

   if (tcs) {
      *per_vertex_slots |= tcs->outputs_written;
      *per_patch_slots |= tcs->patch_outputs_written;
   }
}

/**
 * Compile a tessellation control shader, and upload the assembly.
 */
static struct iris_compiled_shader *
iris_compile_tcs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
1279
                 const struct iris_tcs_prog_key *key)
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
{
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
   const struct brw_compiler *compiler = screen->compiler;
   const struct nir_shader_compiler_options *options =
      compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
   void *mem_ctx = ralloc_context(NULL);
   struct brw_tcs_prog_data *tcs_prog_data =
      rzalloc(mem_ctx, struct brw_tcs_prog_data);
   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1290
   const struct gen_device_info *devinfo = &screen->devinfo;
1291
1292
   enum brw_param_builtin *system_values = NULL;
   unsigned num_system_values = 0;
Tapani Pälli's avatar
Tapani Pälli committed
1293
   unsigned num_cbufs = 0;
1294
1295
1296

   nir_shader *nir;

1297
1298
   struct iris_binding_table bt;

1299
   struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1300

1301
1302
1303
   if (ish) {
      nir = nir_shader_clone(mem_ctx, ish->nir);

1304
      iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1305
                          &num_system_values, &num_cbufs);
1306
      iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1307
                               num_system_values, num_cbufs);
1308
      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1309
   } else {
1310
1311
      nir =
         brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1312
1313

      /* Reserve space for passing the default tess levels as constants. */
1314
      num_cbufs = 1;
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
      num_system_values = 8;
      system_values =
         rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
      prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
      prog_data->nr_params = num_system_values;

      if (key->tes_primitive_mode == GL_QUADS) {
         for (int i = 0; i < 4; i++)
            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;

         system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
         system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
      } else if (key->tes_primitive_mode == GL_TRIANGLES) {
         for (int i = 0; i < 3; i++)
            system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;

         system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
      } else {
         assert(key->tes_primitive_mode == GL_ISOLINES);
         system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
         system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
      }

1338
1339
      /* Manually setup the TCS binding table. */
      memset(&bt, 0, sizeof(bt));
1340
1341
      bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
      bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1342
1343
      bt.size_bytes = 4;

1344
1345
1346
1347
1348
      prog_data->ubo_ranges[0].length = 1;
   }

   char *error_str = NULL;
   const unsigned *program =
1349
1350
      brw_compile_tcs(compiler, &ice->dbg, mem_ctx, &brw_key, tcs_prog_data,
                      nir, -1, NULL, &error_str);
1351
1352
1353
1354
1355
1356
1357
1358
   if (program == NULL) {
      dbg_printf("Failed to compile control shader: %s\n", error_str);
      ralloc_free(mem_ctx);
      return false;
   }

   if (ish) {
      if (ish->compiled_once) {
1359
         iris_debug_recompile(ice, &nir->info, &brw_key.base);
1360
1361
1362
1363
1364
      } else {
         ish->compiled_once = true;
      }
   }

1365
1366
1367
   struct iris_compiled_shader *shader =
      iris_upload_shader(ice, IRIS_CACHE_TCS, sizeof(*key), key, program,
                         prog_data, NULL, system_values, num_system_values,