iris_program.c 89.8 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright © 2017 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
7 8 9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
10
 *
11 12
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
13
 *
14 15 16 17 18 19 20
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
21
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
22 23 24 25 26 27 28 29 30 31

/**
 * @file iris_program.c
 *
 * This file contains the driver interface for compiling shaders.
 *
 * See iris_program_cache.c for the in-memory program cache where the
 * compiled shaders are stored.
 */

32 33 34 35 36 37 38
#include <stdio.h>
#include <errno.h>
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "pipe/p_context.h"
#include "pipe/p_screen.h"
#include "util/u_atomic.h"
39
#include "util/u_upload_mgr.h"
40
#include "util/debug.h"
41 42
#include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h"
43
#include "compiler/nir/nir_serialize.h"
44 45 46
#include "intel/compiler/brw_compiler.h"
#include "intel/compiler/brw_nir.h"
#include "iris_context.h"
47
#include "nir/tgsi_to_nir.h"
48

49 50 51
#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
#define BRW_KEY_INIT(gen, prog_id)                       \
   .base.program_string_id = prog_id,                    \
52
   .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
53 54 55
   .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
   .base.tex.compressed_multisample_layout_mask = ~0,    \
   .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
Kenneth Graunke's avatar
Kenneth Graunke committed
56

57 58 59 60 61 62
static unsigned
get_new_program_id(struct iris_screen *screen)
{
   return p_atomic_inc_return(&screen->program_id);
}

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct gen_device_info *devinfo,
                   const struct iris_vs_prog_key *key)
{
   return (struct brw_vs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),

      /* Don't tell the backend about our clip plane constants, we've
       * already lowered them in NIR and don't want it doing it again.
       */
      .nr_userclip_plane_consts = 0,
   };
}

static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct gen_device_info *devinfo,
                    const struct iris_tcs_prog_key *key)
{
   return (struct brw_tcs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
      .tes_primitive_mode = key->tes_primitive_mode,
      .input_vertices = key->input_vertices,
      .patch_outputs_written = key->patch_outputs_written,
      .outputs_written = key->outputs_written,
      .quads_workaround = key->quads_workaround,
   };
}

static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct gen_device_info *devinfo,
                    const struct iris_tes_prog_key *key)
{
   return (struct brw_tes_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
      .patch_inputs_read = key->patch_inputs_read,
      .inputs_read = key->inputs_read,
   };
}

static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct gen_device_info *devinfo,
                   const struct iris_gs_prog_key *key)
{
   return (struct brw_gs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->vue.base.program_string_id),
   };
}

static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct gen_device_info *devinfo,
                   const struct iris_fs_prog_key *key)
{
   return (struct brw_wm_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
      .nr_color_regions = key->nr_color_regions,
      .flat_shade = key->flat_shade,
      .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
      .alpha_to_coverage = key->alpha_to_coverage,
      .clamp_fragment_color = key->clamp_fragment_color,
      .persample_interp = key->persample_interp,
      .multisample_fbo = key->multisample_fbo,
      .force_dual_color_blend = key->force_dual_color_blend,
      .coherent_fb_fetch = key->coherent_fb_fetch,
      .color_outputs_valid = key->color_outputs_valid,
      .input_slots_valid = key->input_slots_valid,
   };
}

static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct gen_device_info *devinfo,
                   const struct iris_cs_prog_key *key)
{
   return (struct brw_cs_prog_key) {
      BRW_KEY_INIT(devinfo->gen, key->base.program_string_id),
   };
}

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
static void *
upload_state(struct u_upload_mgr *uploader,
             struct iris_state_ref *ref,
             unsigned size,
             unsigned alignment)
{
   void *p = NULL;
   u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
   return p;
}

void
iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
                                struct pipe_shader_buffer *buf,
                                struct iris_state_ref *surf_state,
                                bool ssbo)
{
   struct pipe_context *ctx = &ice->ctx;
   struct iris_screen *screen = (struct iris_screen *) ctx->screen;

   void *map =
      upload_state(ice->state.surface_uploader, surf_state,
                   screen->isl_dev.ss.size, 64);
   if (!unlikely(map)) {
      surf_state->res = NULL;
      return;
   }

   struct iris_resource *res = (void *) buf->buffer;
   struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
   surf_state->offset += iris_bo_offset_from_base_address(surf_bo);

   isl_buffer_fill_state(&screen->isl_dev, map,
                         .address = res->bo->gtt_offset + res->offset +
                                    buf->buffer_offset,
                         .size_B = buf->buffer_size - res->offset,
                         .format = ssbo ? ISL_FORMAT_RAW
                                        : ISL_FORMAT_R32G32B32A32_FLOAT,
                         .swizzle = ISL_SWIZZLE_IDENTITY,
                         .stride_B = 1,
180
                         .mocs = iris_mocs(res->bo, &screen->isl_dev));
181 182
}

183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
static nir_ssa_def *
get_aoa_deref_offset(nir_builder *b,
                     nir_deref_instr *deref,
                     unsigned elem_size)
{
   unsigned array_size = elem_size;
   nir_ssa_def *offset = nir_imm_int(b, 0);

   while (deref->deref_type != nir_deref_type_var) {
      assert(deref->deref_type == nir_deref_type_array);

      /* This level's element size is the previous level's array size */
      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
      assert(deref->arr.index.ssa);
      offset = nir_iadd(b, offset,
                           nir_imul(b, index, nir_imm_int(b, array_size)));

      deref = nir_deref_instr_parent(deref);
      assert(glsl_type_is_array(deref->type));
      array_size *= glsl_get_length(deref->type);
   }

   /* Accessing an invalid surface index with the dataport can result in a
    * hang.  According to the spec "if the index used to select an individual
    * element is negative or greater than or equal to the size of the array,
    * the results of the operation are undefined but may not lead to
    * termination" -- which is one of the possible outcomes of the hang.
    * Clamp the index to prevent access outside of the array bounds.
    */
   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
}

static void
iris_lower_storage_image_derefs(nir_shader *nir)
{
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);

   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;

         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
         case nir_intrinsic_image_deref_load:
         case nir_intrinsic_image_deref_store:
         case nir_intrinsic_image_deref_atomic_add:
233 234 235 236
         case nir_intrinsic_image_deref_atomic_imin:
         case nir_intrinsic_image_deref_atomic_umin:
         case nir_intrinsic_image_deref_atomic_imax:
         case nir_intrinsic_image_deref_atomic_umax:
237 238 239 240 241 242
         case nir_intrinsic_image_deref_atomic_and:
         case nir_intrinsic_image_deref_atomic_or:
         case nir_intrinsic_image_deref_atomic_xor:
         case nir_intrinsic_image_deref_atomic_exchange:
         case nir_intrinsic_image_deref_atomic_comp_swap:
         case nir_intrinsic_image_deref_size:
243 244 245
         case nir_intrinsic_image_deref_samples:
         case nir_intrinsic_image_deref_load_raw_intel:
         case nir_intrinsic_image_deref_store_raw_intel: {
246 247 248 249 250 251 252
            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
            nir_variable *var = nir_deref_instr_get_variable(deref);

            b.cursor = nir_before_instr(&intrin->instr);
            nir_ssa_def *index =
               nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
                            get_aoa_deref_offset(&b, deref, 1));
253
            nir_rewrite_image_intrinsic(intrin, index, false);
254 255 256 257 258 259 260 261 262 263
            break;
         }

         default:
            break;
         }
      }
   }
}

264 265 266 267 268 269 270 271 272
/**
 * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
 */
static bool
iris_fix_edge_flags(nir_shader *nir)
{
   if (nir->info.stage != MESA_SHADER_VERTEX)
      return false;

273 274 275 276 277
   nir_variable *var = NULL;
   nir_foreach_variable(v, &nir->outputs) {
      if (v->data.location == VARYING_SLOT_EDGE) {
         var = v;
         break;
278 279 280
      }
   }

281 282 283 284 285 286 287 288 289 290
   if (!var)
      return false;

   exec_node_remove(&var->node);
   var->data.mode = nir_var_shader_temp;
   exec_list_push_tail(&nir->globals, &var->node);
   nir->info.outputs_written &= ~VARYING_BIT_EDGE;
   nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
   nir_fixup_deref_modes(nir);

291 292 293 294 295 296 297 298 299
   nir_foreach_function(f, nir) {
      if (f->impl) {
         nir_metadata_preserve(f->impl, nir_metadata_block_index |
                                        nir_metadata_dominance |
                                        nir_metadata_live_ssa_defs |
                                        nir_metadata_loop_analysis);
      }
   }

300
   return true;
301 302
}

303 304 305 306 307 308 309 310 311 312 313 314 315
/**
 * Fix an uncompiled shader's stream output info.
 *
 * Core Gallium stores output->register_index as a "slot" number, where
 * slots are assigned consecutively to all outputs in info->outputs_written.
 * This naive packing of outputs doesn't work for us - we too have slots,
 * but the layout is defined by the VUE map, which we won't have until we
 * compile a specific shader variant.  So, we remap these and simply store
 * VARYING_SLOT_* in our copy's output->register_index fields.
 *
 * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
 * components of our VUE header.  See brw_vue_map.c for the layout.
 */
316
static void
317 318
update_so_info(struct pipe_stream_output_info *so_info,
               uint64_t outputs_written)
319
{
320 321 322 323 324 325
   uint8_t reverse_map[64] = {};
   unsigned slot = 0;
   while (outputs_written) {
      reverse_map[slot++] = u_bit_scan64(&outputs_written);
   }

326 327 328
   for (unsigned i = 0; i < so_info->num_outputs; i++) {
      struct pipe_stream_output *output = &so_info->output[i];

329 330 331
      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
      output->register_index = reverse_map[output->register_index];

332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
      /* The VUE header contains three scalar fields packed together:
       * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
       * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
       * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
       */
      switch (output->register_index) {
      case VARYING_SLOT_LAYER:
         assert(output->num_components == 1);
         output->register_index = VARYING_SLOT_PSIZ;
         output->start_component = 1;
         break;
      case VARYING_SLOT_VIEWPORT:
         assert(output->num_components == 1);
         output->register_index = VARYING_SLOT_PSIZ;
         output->start_component = 2;
         break;
      case VARYING_SLOT_PSIZ:
         assert(output->num_components == 1);
         output->start_component = 3;
         break;
      }
353

354 355 356
      //info->outputs_written |= 1ull << output->register_index;
   }
}
Kenneth Graunke's avatar
Kenneth Graunke committed
357

358 359 360
static void
setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
                        unsigned offset, unsigned n)
361
{
362
   assert(offset % sizeof(uint32_t) == 0);
Kenneth Graunke's avatar
Kenneth Graunke committed
363

364 365
   for (unsigned i = 0; i < n; ++i)
      sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
Kenneth Graunke's avatar
Kenneth Graunke committed
366

367 368
   for (unsigned i = n; i < 4; ++i)
      sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
369 370
}

371 372 373 374 375 376 377 378 379 380 381 382 383
/**
 * Associate NIR uniform variables with the prog_data->param[] mechanism
 * used by the backend.  Also, decide which UBOs we'd like to push in an
 * ideal situation (though the backend can reduce this).
 */
static void
iris_setup_uniforms(const struct brw_compiler *compiler,
                    void *mem_ctx,
                    nir_shader *nir,
                    struct brw_stage_prog_data *prog_data,
                    enum brw_param_builtin **out_system_values,
                    unsigned *out_num_system_values,
                    unsigned *out_num_cbufs)
384
{
385
   UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
386

387 388 389 390 391
   const unsigned IRIS_MAX_SYSTEM_VALUES =
      PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
   enum brw_param_builtin *system_values =
      rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
   unsigned num_system_values = 0;
Kenneth Graunke's avatar
Kenneth Graunke committed
392

393 394 395
   unsigned patch_vert_idx = -1;
   unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
   unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
396
   unsigned variable_group_size_idx = -1;
397 398
   memset(ucp_idx, -1, sizeof(ucp_idx));
   memset(img_idx, -1, sizeof(img_idx));
399

400
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
401

402 403
   nir_builder b;
   nir_builder_init(&b, impl);
404

405 406
   b.cursor = nir_before_block(nir_start_block(impl));
   nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
407
   nir_ssa_def *temp_const_ubo_name = NULL;
Kenneth Graunke's avatar
Kenneth Graunke committed
408

409 410 411 412 413
   /* Turn system value intrinsics into uniforms */
   nir_foreach_block(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;
Kenneth Graunke's avatar
Kenneth Graunke committed
414

415 416
         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         nir_ssa_def *offset;
417

418
         switch (intrin->intrinsic) {
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
         case nir_intrinsic_load_constant: {
            /* This one is special because it reads from the shader constant
             * data and not cbuf0 which gallium uploads for us.
             */
            b.cursor = nir_before_instr(instr);
            nir_ssa_def *offset =
               nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
                                nir_intrinsic_base(intrin));

            if (temp_const_ubo_name == NULL)
               temp_const_ubo_name = nir_imm_int(&b, 0);

            nir_intrinsic_instr *load_ubo =
               nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
            load_ubo->num_components = intrin->num_components;
            load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
            load_ubo->src[1] = nir_src_for_ssa(offset);
436 437 438
            nir_intrinsic_set_align(load_ubo,
                                    nir_intrinsic_align_mul(intrin),
                                    nir_intrinsic_align_offset(intrin));
439 440 441 442 443 444 445 446 447 448 449
            nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
                              intrin->dest.ssa.num_components,
                              intrin->dest.ssa.bit_size,
                              intrin->dest.ssa.name);
            nir_builder_instr_insert(&b, &load_ubo->instr);

            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                     nir_src_for_ssa(&load_ubo->dest.ssa));
            nir_instr_remove(&intrin->instr);
            continue;
         }
450 451
         case nir_intrinsic_load_user_clip_plane: {
            unsigned ucp = nir_intrinsic_ucp_id(intrin);
452

453 454 455 456
            if (ucp_idx[ucp] == -1) {
               ucp_idx[ucp] = num_system_values;
               num_system_values += 4;
            }
457

458 459 460 461
            for (int i = 0; i < 4; i++) {
               system_values[ucp_idx[ucp] + i] =
                  BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
            }
462

463 464 465 466 467 468 469
            b.cursor = nir_before_instr(instr);
            offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
            break;
         }
         case nir_intrinsic_load_patch_vertices_in:
            if (patch_vert_idx == -1)
               patch_vert_idx = num_system_values++;
470 471

            system_values[patch_vert_idx] =
472
               BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
473 474 475

            b.cursor = nir_before_instr(instr);
            offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
476
            break;
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
         case nir_intrinsic_image_deref_load_param_intel: {
            assert(devinfo->gen < 9);
            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
            nir_variable *var = nir_deref_instr_get_variable(deref);

            if (img_idx[var->data.binding] == -1) {
               /* GL only allows arrays of arrays of images. */
               assert(glsl_type_is_image(glsl_without_array(var->type)));
               unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));

               for (int i = 0; i < num_images; i++) {
                  const unsigned img = var->data.binding + i;

                  img_idx[img] = num_system_values;
                  num_system_values += BRW_IMAGE_PARAM_SIZE;

                  uint32_t *img_sv = &system_values[img_idx[img]];

                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
                     offsetof(struct brw_image_param, offset), 2);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
                     offsetof(struct brw_image_param, size), 3);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
                     offsetof(struct brw_image_param, stride), 4);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
                     offsetof(struct brw_image_param, tiling), 3);
                  setup_vec4_image_sysval(
                     img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
                     offsetof(struct brw_image_param, swizzling), 2);
               }
            }

            b.cursor = nir_before_instr(instr);
            offset = nir_iadd(&b,
               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
               nir_imm_int(&b, img_idx[var->data.binding] * 4 +
                               nir_intrinsic_base(intrin) * 16));
            break;
         }
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
         case nir_intrinsic_load_local_group_size: {
            assert(nir->info.cs.local_size_variable);
            if (variable_group_size_idx == -1) {
               variable_group_size_idx = num_system_values;
               num_system_values += 3;
               for (int i = 0; i < 3; i++) {
                  system_values[variable_group_size_idx + i] =
                     BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
               }
            }

            b.cursor = nir_before_instr(instr);
            offset = nir_imm_int(&b, variable_group_size_idx * sizeof(uint32_t));
            break;
         }
535 536 537 538 539 540 541 542 543 544 545
         default:
            continue;
         }

         unsigned comps = nir_intrinsic_dest_components(intrin);

         nir_intrinsic_instr *load =
            nir_intrinsic_instr_create(nir, nir_intrinsic_load_ubo);
         load->num_components = comps;
         load->src[0] = nir_src_for_ssa(temp_ubo_name);
         load->src[1] = nir_src_for_ssa(offset);
546
         nir_intrinsic_set_align(load, 4, 0);
547 548 549 550 551 552 553 554
         nir_ssa_dest_init(&load->instr, &load->dest, comps, 32, NULL);
         nir_builder_instr_insert(&b, &load->instr);
         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
                                  nir_src_for_ssa(&load->dest.ssa));
         nir_instr_remove(instr);
      }
   }

555 556
   nir_validate_shader(nir, "before remapping");

557 558 559 560 561 562 563 564 565
   /* Uniforms are stored in constant buffer 0, the
    * user-facing UBOs are indexed by one.  So if any constant buffer is
    * needed, the constant buffer 0 will be needed, so account for it.
    */
   unsigned num_cbufs = nir->info.num_ubos;
   if (num_cbufs || nir->num_uniforms)
      num_cbufs++;

   /* Place the new params in a new cbuf. */
566
   if (num_system_values > 0) {
567 568
      unsigned sysval_cbuf_index = num_cbufs;
      num_cbufs++;
Kenneth Graunke's avatar
Kenneth Graunke committed
569

570 571 572
      system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
                               num_system_values);

573 574 575 576 577 578 579 580 581 582 583 584
      nir_foreach_block(block, impl) {
         nir_foreach_instr_safe(instr, block) {
            if (instr->type != nir_instr_type_intrinsic)
               continue;

            nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);

            if (load->intrinsic != nir_intrinsic_load_ubo)
               continue;

            b.cursor = nir_before_instr(instr);

585 586
            assert(load->src[0].is_ssa);

587
            if (load->src[0].ssa == temp_ubo_name) {
588
               nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
589
               nir_instr_rewrite_src(instr, &load->src[0],
590
                                     nir_src_for_ssa(imm));
591
            }
592 593
         }
      }
Kenneth Graunke's avatar
Kenneth Graunke committed
594 595 596

      /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
      nir_opt_constant_folding(nir);
597 598 599
   } else {
      ralloc_free(system_values);
      system_values = NULL;
600 601
   }

602
   assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
603 604
   nir_validate_shader(nir, "after remap");

605 606 607 608
   /* We don't use params[] but gallium leaves num_uniforms set.  We use this
    * to detect when cbuf0 exists but we don't need it anymore when we get
    * here.  Instead, zero it out so that the back-end doesn't get confused
    * when nr_params * 4 != num_uniforms != nr_params * 4.
609
    */
610
   nir->num_uniforms = 0;
611

612 613 614 615 616 617 618 619 620 621
   /* Constant loads (if any) need to go at the end of the constant buffers so
    * we need to know num_cbufs before we can lower to them.
    */
   if (temp_const_ubo_name != NULL) {
      nir_load_const_instr *const_ubo_index =
         nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
      assert(const_ubo_index->def.bit_size == 32);
      const_ubo_index->value[0].u32 = num_cbufs;
   }

622 623
   *out_system_values = system_values;
   *out_num_system_values = num_system_values;
624
   *out_num_cbufs = num_cbufs;
Kenneth Graunke's avatar
Kenneth Graunke committed
625 626
}

627 628
static const char *surface_group_names[] = {
   [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
629
   [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
   [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
   [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
   [IRIS_SURFACE_GROUP_UBO]                = "ubo",
   [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
   [IRIS_SURFACE_GROUP_IMAGE]              = "image",
};

static void
iris_print_binding_table(FILE *fp, const char *name,
                         const struct iris_binding_table *bt)
{
   STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);

   uint32_t total = 0;
   uint32_t compacted = 0;

   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      uint32_t size = bt->sizes[i];
      total += size;
      if (size)
         compacted += util_bitcount64(bt->used_mask[i]);
   }

   if (total == 0) {
      fprintf(fp, "Binding table for %s is empty\n\n", name);
      return;
   }

   if (total != compacted) {
      fprintf(fp, "Binding table for %s "
              "(compacted to %u entries from %u entries)\n",
              name, compacted, total);
   } else {
      fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
   }

   uint32_t entry = 0;
   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      uint64_t mask = bt->used_mask[i];
      while (mask) {
         int index = u_bit_scan64(&mask);
         fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
      }
   }
   fprintf(fp, "\n");
}

677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
enum {
   /* Max elements in a surface group. */
   SURFACE_GROUP_MAX_ELEMENTS = 64,
};

/**
 * Map a <group, index> pair to a binding table index.
 *
 * For example: <UBO, 5> => binding table index 12
 */
uint32_t
iris_group_index_to_bti(const struct iris_binding_table *bt,
                        enum iris_surface_group group, uint32_t index)
{
   assert(index < bt->sizes[group]);
   uint64_t mask = bt->used_mask[group];
   uint64_t bit = 1ull << index;
   if (bit & mask) {
      return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
   } else {
      return IRIS_SURFACE_NOT_USED;
   }
}

/**
 * Map a binding table index back to a <group, index> pair.
 *
 * For example: binding table index 12 => <UBO, 5>
 */
uint32_t
iris_bti_to_group_index(const struct iris_binding_table *bt,
                        enum iris_surface_group group, uint32_t bti)
{
   uint64_t used_mask = bt->used_mask[group];
   assert(bti >= bt->offsets[group]);

   uint32_t c = bti - bt->offsets[group];
   while (used_mask) {
      int i = u_bit_scan64(&used_mask);
      if (c == 0)
         return i;
      c--;
   }

   return IRIS_SURFACE_NOT_USED;
}

724
static void
725 726 727
rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
                     nir_instr *instr, nir_src *src,
                     enum iris_surface_group group)
728
{
729
   assert(bt->sizes[group] > 0);
730 731 732 733

   b->cursor = nir_before_instr(instr);
   nir_ssa_def *bti;
   if (nir_src_is_const(*src)) {
734 735
      uint32_t index = nir_src_as_uint(*src);
      bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
736 737
                           src->ssa->bit_size);
   } else {
738 739 740 741
      /* Indirect usage makes all the surfaces of the group to be available,
       * so we can just add the base.
       */
      assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
742
      bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
743 744 745 746
   }
   nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
}

747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
static void
mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
                   enum iris_surface_group group)
{
   assert(bt->sizes[group] > 0);

   if (nir_src_is_const(*src)) {
      uint64_t index = nir_src_as_uint(*src);
      assert(index < bt->sizes[group]);
      bt->used_mask[group] |= 1ull << index;
   } else {
      /* There's an indirect usage, we need all the surfaces. */
      bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
   }
}

static bool
skip_compacting_binding_tables(void)
{
   static int skip = -1;
   if (skip < 0)
      skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
   return skip;
}

772 773 774 775
/**
 * Set up the binding table indices and apply to the shader.
 */
static void
776 777
iris_setup_binding_table(const struct gen_device_info *devinfo,
                         struct nir_shader *nir,
778 779 780 781 782 783 784 785 786
                         struct iris_binding_table *bt,
                         unsigned num_render_targets,
                         unsigned num_system_values,
                         unsigned num_cbufs)
{
   const struct shader_info *info = &nir->info;

   memset(bt, 0, sizeof(*bt));

787 788 789
   /* Set the sizes for each surface group.  For some groups, we already know
    * upfront how many will be used, so mark them.
    */
790
   if (info->stage == MESA_SHADER_FRAGMENT) {
791 792 793 794
      bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
      /* All render targets used. */
      bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
         BITFIELD64_MASK(num_render_targets);
795 796 797 798 799 800 801 802 803

      /* Setup render target read surface group inorder to support non-coherent
       * framebuffer fetch on Gen8
       */
      if (devinfo->gen == 8 && info->outputs_read) {
         bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
         bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
            BITFIELD64_MASK(num_render_targets);
      }
804
   } else if (info->stage == MESA_SHADER_COMPUTE) {
805
      bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
806 807
   }

808 809
   bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used);
   bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used;
810

811
   bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
812

813 814 815
   /* Allocate an extra slot in the UBO section for NIR constants.
    * Binding table compaction will remove it if unnecessary.
    *
816 817 818 819
    * We don't include them in iris_compiled_shader::num_cbufs because
    * they are uploaded separately from shs->constbuf[], but from a shader
    * point of view, they're another UBO (at the end of the section).
    */
820
   bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
821

822
   bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
823 824 825 826 827 828 829 830 831 832 833 834

   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
      assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);

   /* Mark surfaces used for the cases we don't have the information available
    * upfront.
    */
   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
   nir_foreach_block (block, impl) {
      nir_foreach_instr (instr, block) {
         if (instr->type != nir_instr_type_intrinsic)
            continue;
835

836 837
         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
838 839 840 841
         case nir_intrinsic_load_num_work_groups:
            bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
            break;

842 843 844 845 846 847 848
         case nir_intrinsic_load_output:
            if (devinfo->gen == 8) {
               mark_used_with_src(bt, &intrin->src[0],
                                  IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
            }
            break;

849 850 851 852
         case nir_intrinsic_image_size:
         case nir_intrinsic_image_load:
         case nir_intrinsic_image_store:
         case nir_intrinsic_image_atomic_add:
853 854 855 856
         case nir_intrinsic_image_atomic_imin:
         case nir_intrinsic_image_atomic_umin:
         case nir_intrinsic_image_atomic_imax:
         case nir_intrinsic_image_atomic_umax:
857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
         case nir_intrinsic_image_atomic_and:
         case nir_intrinsic_image_atomic_or:
         case nir_intrinsic_image_atomic_xor:
         case nir_intrinsic_image_atomic_exchange:
         case nir_intrinsic_image_atomic_comp_swap:
         case nir_intrinsic_image_load_raw_intel:
         case nir_intrinsic_image_store_raw_intel:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
            break;

         case nir_intrinsic_load_ubo:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
            break;

         case nir_intrinsic_store_ssbo:
            mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
            break;

         case nir_intrinsic_get_buffer_size:
         case nir_intrinsic_ssbo_atomic_add:
         case nir_intrinsic_ssbo_atomic_imin:
         case nir_intrinsic_ssbo_atomic_umin:
         case nir_intrinsic_ssbo_atomic_imax:
         case nir_intrinsic_ssbo_atomic_umax:
         case nir_intrinsic_ssbo_atomic_and:
         case nir_intrinsic_ssbo_atomic_or:
         case nir_intrinsic_ssbo_atomic_xor:
         case nir_intrinsic_ssbo_atomic_exchange:
         case nir_intrinsic_ssbo_atomic_comp_swap:
         case nir_intrinsic_ssbo_atomic_fmin:
         case nir_intrinsic_ssbo_atomic_fmax:
         case nir_intrinsic_ssbo_atomic_fcomp_swap:
         case nir_intrinsic_load_ssbo:
            mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
            break;

         default:
            break;
         }
      }
897 898
   }

899 900 901 902 903
   /* When disable we just mark everything as used. */
   if (unlikely(skip_compacting_binding_tables())) {
      for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
         bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
   }
904

905 906 907 908 909 910 911 912 913 914 915 916
   /* Calculate the offsets and the binding table size based on the used
    * surfaces.  After this point, the functions to go between "group indices"
    * and binding table indices can be used.
    */
   uint32_t next = 0;
   for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
      if (bt->used_mask[i] != 0) {
         bt->offsets[i] = next;
         next += util_bitcount64(bt->used_mask[i]);
      }
   }
   bt->size_bytes = next * 4;
917

918 919 920 921
   if (unlikely(INTEL_DEBUG & DEBUG_BT)) {
      iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
   }

922 923 924 925 926 927 928 929 930 931
   /* Apply the binding table indices.  The backend compiler is not expected
    * to change those, as we haven't set any of the *_start entries in brw
    * binding_table.
    */
   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block (block, impl) {
      nir_foreach_instr (instr, block) {
         if (instr->type == nir_instr_type_tex) {
932 933 934 935
            nir_tex_instr *tex = nir_instr_as_tex(instr);
            tex->texture_index =
               iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
                                       tex->texture_index);
936 937 938 939 940 941 942 943 944 945 946 947
            continue;
         }

         if (instr->type != nir_instr_type_intrinsic)
            continue;

         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
         switch (intrin->intrinsic) {
         case nir_intrinsic_image_size:
         case nir_intrinsic_image_load:
         case nir_intrinsic_image_store:
         case nir_intrinsic_image_atomic_add:
948 949 950 951
         case nir_intrinsic_image_atomic_imin:
         case nir_intrinsic_image_atomic_umin:
         case nir_intrinsic_image_atomic_imax:
         case nir_intrinsic_image_atomic_umax:
952 953 954 955 956 957 958
         case nir_intrinsic_image_atomic_and:
         case nir_intrinsic_image_atomic_or:
         case nir_intrinsic_image_atomic_xor:
         case nir_intrinsic_image_atomic_exchange:
         case nir_intrinsic_image_atomic_comp_swap:
         case nir_intrinsic_image_load_raw_intel:
         case nir_intrinsic_image_store_raw_intel:
959 960
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_IMAGE);
961 962 963
            break;

         case nir_intrinsic_load_ubo:
964 965
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_UBO);
966 967 968
            break;

         case nir_intrinsic_store_ssbo:
969 970
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
                                 IRIS_SURFACE_GROUP_SSBO);
971 972
            break;

973 974 975 976 977 978 979
         case nir_intrinsic_load_output:
            if (devinfo->gen == 8) {
               rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                    IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
            }
            break;

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994
         case nir_intrinsic_get_buffer_size:
         case nir_intrinsic_ssbo_atomic_add:
         case nir_intrinsic_ssbo_atomic_imin:
         case nir_intrinsic_ssbo_atomic_umin:
         case nir_intrinsic_ssbo_atomic_imax:
         case nir_intrinsic_ssbo_atomic_umax:
         case nir_intrinsic_ssbo_atomic_and:
         case nir_intrinsic_ssbo_atomic_or:
         case nir_intrinsic_ssbo_atomic_xor:
         case nir_intrinsic_ssbo_atomic_exchange:
         case nir_intrinsic_ssbo_atomic_comp_swap:
         case nir_intrinsic_ssbo_atomic_fmin:
         case nir_intrinsic_ssbo_atomic_fmax:
         case nir_intrinsic_ssbo_atomic_fcomp_swap:
         case nir_intrinsic_load_ssbo:
995 996
            rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
                                 IRIS_SURFACE_GROUP_SSBO);
997 998 999 1000 1001 1002 1003 1004 1005
            break;

         default:
            break;
         }
      }
   }
}

1006 1007 1008
static void
iris_debug_recompile(struct iris_context *ice,
                     struct shader_info *info,
1009
                     const struct brw_base_prog_key *key)
1010 1011
{
   struct iris_screen *screen = (struct iris_screen *) ice->ctx.screen;
1012
   const struct gen_device_info *devinfo = &screen->devinfo;
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
   const struct brw_compiler *c = screen->compiler;

   if (!info)
      return;

   c->shader_perf_log(&ice->dbg, "Recompiling %s shader for program %s: %s\n",
                      _mesa_shader_stage_to_string(info->stage),
                      info->name ? info->name : "(no identifier)",
                      info->label ? info->label : "");

1023
   const void *old_iris_key =
1024
      iris_find_previous_compile(ice, info->stage, key->program_string_id);
1025

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
   union brw_any_prog_key old_key;

   switch (info->stage) {
   case MESA_SHADER_VERTEX:
      old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_TESS_CTRL:
      old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_TESS_EVAL:
      old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_GEOMETRY:
      old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_FRAGMENT:
      old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
      break;
   case MESA_SHADER_COMPUTE:
      old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
      break;
   default:
      unreachable("invalid shader stage");
   }

   brw_debug_key_recompile(c, &ice->dbg, info->stage, &old_key.base, key);
1052 1053
}

1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
/**
 * Get the shader for the last enabled geometry stage.
 *
 * This stage is the one which will feed stream output and the rasterizer.
 */
static gl_shader_stage
last_vue_stage(struct iris_context *ice)
{
   if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
      return MESA_SHADER_GEOMETRY;

   if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
      return MESA_SHADER_TESS_EVAL;

   return MESA_SHADER_VERTEX;
}
1070

Kenneth Graunke's avatar
Kenneth Graunke committed
1071 1072 1073
/**
 * Compile a vertex shader, and upload the assembly.
 */
1074
static struct iris_compiled_shader *
Kenneth Graunke's avatar
Kenneth Graunke committed
1075 1076
iris_compile_vs(struct iris_context *ice,
                struct iris_uncompiled_shader *ish,
1077
                const struct iris_vs_prog_key *key)
Kenneth Graunke's avatar
Kenneth Graunke committed
1078 1079 1080 1081 1082
{
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
   const struct brw_compiler *compiler = screen->compiler;
   const struct gen_device_info *devinfo = &screen->devinfo;
   void *mem_ctx = ralloc_context(NULL);
1083 1084 1085 1086
   struct brw_vs_prog_data *vs_prog_data =
      rzalloc(mem_ctx, struct brw_vs_prog_data);
   struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1087 1088
   enum brw_param_builtin *system_values;
   unsigned num_system_values;
1089
   unsigned num_cbufs;
Kenneth Graunke's avatar
Kenneth Graunke committed
1090

Kenneth Graunke's avatar
Kenneth Graunke committed
1091
   nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
Kenneth Graunke's avatar
Kenneth Graunke committed
1092

1093
   if (key->vue.nr_userclip_plane_consts) {
Kenneth Graunke's avatar
Kenneth Graunke committed
1094
      nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1095 1096
      nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
                        true, false, NULL);
Kenneth Graunke's avatar
Kenneth Graunke committed
1097 1098 1099
      nir_lower_io_to_temporaries(nir, impl, true, false);
      nir_lower_global_vars_to_local(nir);
      nir_lower_vars_to_ssa(nir);
1100
      nir_shader_gather_info(nir, impl);
Kenneth Graunke's avatar
Kenneth Graunke committed
1101 1102
   }

1103
   prog_data->use_alt_mode = ish->use_alt_mode;
Kenneth Graunke's avatar
Kenneth Graunke committed
1104

1105
   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
1106
                       &num_system_values, &num_cbufs);
Kenneth Graunke's avatar
Kenneth Graunke committed
1107

1108
   struct iris_binding_table bt;
1109
   iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1110
                            num_system_values, num_cbufs);
1111

1112 1113
   brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);

Kenneth Graunke's avatar
Kenneth Graunke committed
1114
   brw_compute_vue_map(devinfo,
1115
                       &vue_prog_data->vue_map, nir->info.outputs_written,
1116
                       nir->info.separate_shader, /* pos_slots */ 1);
Kenneth Graunke's avatar
Kenneth Graunke committed
1117

1118
   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1119

1120
   char *error_str = NULL;
1121
   const unsigned *program =
1122
      brw_compile_vs(compiler, &ice->dbg, mem_ctx, &brw_key, vs_prog_data,
1123
                     nir, -1, NULL, &error_str);
Kenneth Graunke's avatar
Kenneth Graunke committed
1124
   if (program == NULL) {
1125
      dbg_printf("Failed to compile vertex shader: %s\n", error_str);
Kenneth Graunke's avatar
Kenneth Graunke committed
1126 1127 1128 1129
      ralloc_free(mem_ctx);
      return false;
   }

1130
   if (ish->compiled_once) {
1131
      iris_debug_recompile(ice, &nir->info, &brw_key.base);
1132 1133 1134 1135
   } else {
      ish->compiled_once = true;
   }

1136
   uint32_t *so_decls =
1137
      screen->vtbl.create_so_decl_list(&ish->stream_output,
1138 1139
                                    &vue_prog_data->vue_map);

1140 1141
   struct iris_compiled_shader *shader =
      iris_upload_shader(ice, IRIS_CACHE_VS, sizeof(*key), key, program,
1142
                         prog_data, so_decls, system_values, num_system_values,
1143
                         num_cbufs, &bt);
Kenneth Graunke's avatar
Kenneth Graunke committed
1144

1145 1146
   iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));

1147
   ralloc_free(mem_ctx);
1148
   return shader;
Kenneth Graunke's avatar
Kenneth Graunke committed
1149 1150
}

Kenneth Graunke's avatar
Kenneth Graunke committed
1151 1152 1153 1154 1155
/**
 * Update the current vertex shader variant.
 *
 * Fill out the key, look in the cache, compile and bind if needed.
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
1156 1157 1158
static void
iris_update_compiled_vs(struct iris_context *ice)
{
1159
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1160
   struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1161 1162 1163
   struct iris_uncompiled_shader *ish =
      ice->shaders.uncompiled[MESA_SHADER_VERTEX];

1164
   struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1165
   screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
Kenneth Graunke's avatar
Kenneth Graunke committed
1166

1167 1168 1169 1170
   struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
   struct iris_compiled_shader *shader =
      iris_find_cached_shader(ice, IRIS_CACHE_VS, sizeof(key), &key);

1171 1172 1173
   if (!shader)
      shader = iris_disk_cache_retrieve(ice, ish, &key, sizeof(key));

1174 1175
   if (!shader)
      shader = iris_compile_vs(ice, ish, &key);
1176

1177 1178 1179 1180 1181 1182
   if (old != shader) {
      ice->shaders.prog[IRIS_CACHE_VS] = shader;
      ice->state.dirty |= IRIS_DIRTY_VS |
                          IRIS_DIRTY_BINDINGS_VS |
                          IRIS_DIRTY_CONSTANTS_VS |
                          IRIS_DIRTY_VF_SGVS;
1183
      shs->sysvals_need_upload = true;
1184

1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
      const struct brw_vs_prog_data *vs_prog_data =
            (void *) shader->prog_data;
      const bool uses_draw_params = vs_prog_data->uses_firstvertex ||
                                    vs_prog_data->uses_baseinstance;
      const bool uses_derived_draw_params = vs_prog_data->uses_drawid ||
                                            vs_prog_data->uses_is_indexed_draw;
      const bool needs_sgvs_element = uses_draw_params ||
                                      vs_prog_data->uses_instanceid ||
                                      vs_prog_data->uses_vertexid;

      if (ice->state.vs_uses_draw_params != uses_draw_params ||
1196
          ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
1197
          ice->state.vs_needs_edge_flag != ish->needs_edge_flag) {
1198 1199 1200 1201 1202 1203
         ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
                             IRIS_DIRTY_VERTEX_ELEMENTS;
      }
      ice->state.vs_uses_draw_params = uses_draw_params;
      ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
      ice->state.vs_needs_sgvs_element = needs_sgvs_element;
1204
      ice->state.vs_needs_edge_flag = ish->needs_edge_flag;
1205
   }
1206 1207
}

Kenneth Graunke's avatar
Kenneth Graunke committed
1208 1209 1210
/**
 * Get the shader_info for a given stage, or NULL if the stage is disabled.
 */
Kenneth Graunke's avatar
Kenneth Graunke committed
1211 1212
const struct shader_info *
iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
Kenneth Graunke's avatar
Kenneth Graunke committed
1213 1214 1215 1216 1217 1218
{
   const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];

   if (!ish)
      return NULL;

Kenneth Graunke's avatar
Kenneth Graunke committed
1219
   const nir_shader *nir = ish->nir;
Kenneth Graunke's avatar
Kenneth Graunke committed
1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
   return &nir->info;
}

/**
 * Get the union of TCS output and TES input slots.
 *
 * TCS and TES need to agree on a common URB entry layout.  In particular,
 * the data for all patch vertices is stored in a single URB entry (unlike
 * GS which has one entry per input vertex).  This means that per-vertex
 * array indexing needs a stride.
 *
 * SSO requires locations to match, but doesn't require the number of
 * outputs/inputs to match (in fact, the TCS often has extra outputs).
 * So, we need to take the extra step of unifying these on the fly.
 */
static void
get_unified_tess_slots(const struct iris_context *ice,
                       uint64_t *per_vertex_slots,
                       uint32_t *per_patch_slots)
{
Kenneth Graunke's avatar
Kenneth Graunke committed
1240 1241 1242 1243
   const struct shader_info *tcs =
      iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
   const struct shader_info *tes =
      iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
Kenneth Graunke's avatar
Kenneth Graunke committed
1244

1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259
   *per_vertex_slots = tes->inputs_read;
   *per_patch_slots = tes->patch_inputs_read;

   if (tcs) {
      *per_vertex_slots |= tcs->outputs_written;
      *per_patch_slots |= tcs->patch_outputs_written;
   }
}

/**
 * Compile a tessellation control shader, and upload the assembly.
 */
static struct iris_compiled_shader *
iris_compile_tcs(struct iris_context *ice,
                 struct iris_uncompiled_shader *ish,
1260
                 const struct iris_tcs_prog_key *key)
1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
{
   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
   const struct brw_compiler *compiler = screen->compiler;
   const struct nir_shader_compiler_options *options =
      compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
   void *mem_ctx = ralloc_context(NULL);
   struct brw_tcs_prog_data *tcs_prog_data =
      rzalloc(mem_ctx, struct brw_tcs_prog_data);
   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
   struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1271
   const struct gen_device_info *devinfo = &screen->devinfo;
1272 1273
   enum brw_param_builtin *system_values = NULL;
   unsigned num_system_values = 0;
Tapani Pälli's avatar
Tapani Pälli committed
1274
   unsigned num_cbufs = 0;
1275 1276 1277

   nir_shader *nir;