vtn_variables.c 96.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
 * Copyright © 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Jason Ekstrand (jason@jlekstrand.net)
 *
 */

#include "vtn_private.h"
29
#include "spirv_info.h"
30
#include "nir_deref.h"
31
#include <vulkan/vulkan_core.h>
32

33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
static void
ptr_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
                  const struct vtn_decoration *dec, void *void_ptr)
{
   struct vtn_pointer *ptr = void_ptr;

   switch (dec->decoration) {
   case SpvDecorationNonUniformEXT:
      ptr->access |= ACCESS_NON_UNIFORM;
      break;

   default:
      break;
   }
}

static struct vtn_pointer*
vtn_decorate_pointer(struct vtn_builder *b, struct vtn_value *val,
                     struct vtn_pointer *ptr)
{
Jason Ekstrand's avatar
Jason Ekstrand committed
53
   struct vtn_pointer dummy = { .access = 0 };
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
   vtn_foreach_decoration(b, val, ptr_decoration_cb, &dummy);

   /* If we're adding access flags, make a copy of the pointer.  We could
    * probably just OR them in without doing so but this prevents us from
    * leaking them any further than actually specified in the SPIR-V.
    */
   if (dummy.access & ~ptr->access) {
      struct vtn_pointer *copy = ralloc(b, struct vtn_pointer);
      *copy = *ptr;
      copy->access |= dummy.access;
      return copy;
   }

   return ptr;
}
69
70

struct vtn_value *
71
72
vtn_push_pointer(struct vtn_builder *b, uint32_t value_id,
                 struct vtn_pointer *ptr)
73
74
{
   struct vtn_value *val = vtn_push_value(b, value_id, vtn_value_type_pointer);
75
   val->pointer = vtn_decorate_pointer(b, val, ptr);
76
77
78
   return val;
}

79
80
81
82
83
void
vtn_copy_value(struct vtn_builder *b, uint32_t src_value_id,
               uint32_t dst_value_id)
{
   struct vtn_value *src = vtn_untyped_value(b, src_value_id);
84
   struct vtn_value *dst = vtn_untyped_value(b, dst_value_id);
85
86
   struct vtn_value src_copy = *src;

87
88
89
90
   vtn_fail_if(dst->value_type != vtn_value_type_invalid,
               "SPIR-V id %u has already been written by another instruction",
               dst_value_id);

91
92
93
94
95
96
97
98
   vtn_fail_if(dst->type->id != src->type->id,
               "Result Type must equal Operand type");

   src_copy.name = dst->name;
   src_copy.decoration = dst->decoration;
   src_copy.type = dst->type;
   *dst = src_copy;

99
100
   if (dst->value_type == vtn_value_type_pointer)
      dst->pointer = vtn_decorate_pointer(b, dst, dst->pointer);
101
102
}

103
104
105
106
107
108
109
110
111
112
113
114
115
116
static struct vtn_access_chain *
vtn_access_chain_create(struct vtn_builder *b, unsigned length)
{
   struct vtn_access_chain *chain;

   /* Subtract 1 from the length since there's already one built in */
   size_t size = sizeof(*chain) +
                 (MAX2(length, 1) - 1) * sizeof(chain->link[0]);
   chain = rzalloc_size(b, size);
   chain->length = length;

   return chain;
}

117
bool
118
119
vtn_mode_uses_ssa_offset(struct vtn_builder *b,
                         enum vtn_variable_mode mode)
120
{
121
122
   return ((mode == vtn_variable_mode_ubo ||
            mode == vtn_variable_mode_ssbo) &&
123
           b->options->lower_ubo_ssbo_access_to_offsets) ||
124
          mode == vtn_variable_mode_push_constant;
125
126
127
128
129
130
131
132
}

static bool
vtn_pointer_is_external_block(struct vtn_builder *b,
                              struct vtn_pointer *ptr)
{
   return ptr->mode == vtn_variable_mode_ssbo ||
          ptr->mode == vtn_variable_mode_ubo ||
133
          ptr->mode == vtn_variable_mode_phys_ssbo ||
134
          ptr->mode == vtn_variable_mode_push_constant;
135
136
}

137
138
139
140
141
142
143
144
145
146
147
static nir_ssa_def *
vtn_access_link_as_ssa(struct vtn_builder *b, struct vtn_access_link link,
                       unsigned stride, unsigned bit_size)
{
   vtn_assert(stride > 0);
   if (link.mode == vtn_access_mode_literal) {
      return nir_imm_intN_t(&b->nb, link.id * stride, bit_size);
   } else {
      nir_ssa_def *ssa = vtn_ssa_value(b, link.id)->def;
      if (ssa->bit_size != bit_size)
         ssa = nir_i2i(&b->nb, ssa, bit_size);
148
      return nir_imul_imm(&b->nb, ssa, stride);
149
150
151
   }
}

152
153
154
155
156
157
158
159
160
161
162
163
164
static VkDescriptorType
vk_desc_type_for_mode(struct vtn_builder *b, enum vtn_variable_mode mode)
{
   switch (mode) {
   case vtn_variable_mode_ubo:
      return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
   case vtn_variable_mode_ssbo:
      return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
   default:
      vtn_fail("Invalid mode for vulkan_resource_index");
   }
}

165
166
167
168
static nir_ssa_def *
vtn_variable_resource_index(struct vtn_builder *b, struct vtn_variable *var,
                            nir_ssa_def *desc_array_index)
{
169
170
   vtn_assert(b->options->environment == NIR_SPIRV_VULKAN);

171
   if (!desc_array_index) {
172
      vtn_assert(glsl_type_is_struct_or_ifc(var->type->type));
173
174
175
176
177
178
179
180
181
      desc_array_index = nir_imm_int(&b->nb, 0);
   }

   nir_intrinsic_instr *instr =
      nir_intrinsic_instr_create(b->nb.shader,
                                 nir_intrinsic_vulkan_resource_index);
   instr->src[0] = nir_src_for_ssa(desc_array_index);
   nir_intrinsic_set_desc_set(instr, var->descriptor_set);
   nir_intrinsic_set_binding(instr, var->binding);
182
   nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, var->mode));
183

184
185
186
187
188
   vtn_fail_if(var->mode != vtn_variable_mode_ubo &&
               var->mode != vtn_variable_mode_ssbo,
               "Invalid mode for vulkan_resource_index");

   nir_address_format addr_format = vtn_mode_to_address_format(b, var->mode);
189
190
   const struct glsl_type *index_type =
      b->options->lower_ubo_ssbo_access_to_offsets ?
191
      glsl_uint_type() : nir_address_format_to_glsl_type(addr_format);
192
193
194
195

   instr->num_components = glsl_get_vector_elements(index_type);
   nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components,
                     glsl_get_bit_size(index_type), NULL);
196
197
198
199
200
   nir_builder_instr_insert(&b->nb, &instr->instr);

   return &instr->dest.ssa;
}

201
static nir_ssa_def *
202
203
vtn_resource_reindex(struct vtn_builder *b, enum vtn_variable_mode mode,
                     nir_ssa_def *base_index, nir_ssa_def *offset_index)
204
{
205
206
   vtn_assert(b->options->environment == NIR_SPIRV_VULKAN);

207
208
209
210
211
   nir_intrinsic_instr *instr =
      nir_intrinsic_instr_create(b->nb.shader,
                                 nir_intrinsic_vulkan_resource_reindex);
   instr->src[0] = nir_src_for_ssa(base_index);
   instr->src[1] = nir_src_for_ssa(offset_index);
212
   nir_intrinsic_set_desc_type(instr, vk_desc_type_for_mode(b, mode));
213

214
215
216
217
   vtn_fail_if(mode != vtn_variable_mode_ubo && mode != vtn_variable_mode_ssbo,
               "Invalid mode for vulkan_resource_reindex");

   nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
218
219
   const struct glsl_type *index_type =
      b->options->lower_ubo_ssbo_access_to_offsets ?
220
      glsl_uint_type() : nir_address_format_to_glsl_type(addr_format);
221
222
223
224

   instr->num_components = glsl_get_vector_elements(index_type);
   nir_ssa_dest_init(&instr->instr, &instr->dest, instr->num_components,
                     glsl_get_bit_size(index_type), NULL);
225
226
227
228
229
   nir_builder_instr_insert(&b->nb, &instr->instr);

   return &instr->dest.ssa;
}

230
231
static nir_ssa_def *
vtn_descriptor_load(struct vtn_builder *b, enum vtn_variable_mode mode,
232
                    nir_ssa_def *desc_index)
233
{
234
235
   vtn_assert(b->options->environment == NIR_SPIRV_VULKAN);

236
237
238
239
240
   nir_intrinsic_instr *desc_load =
      nir_intrinsic_instr_create(b->nb.shader,
                                 nir_intrinsic_load_vulkan_descriptor);
   desc_load->src[0] = nir_src_for_ssa(desc_index);
   nir_intrinsic_set_desc_type(desc_load, vk_desc_type_for_mode(b, mode));
241

242
243
244
245
246
247
   vtn_fail_if(mode != vtn_variable_mode_ubo && mode != vtn_variable_mode_ssbo,
               "Invalid mode for load_vulkan_descriptor");

   nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
   const struct glsl_type *ptr_type =
      nir_address_format_to_glsl_type(addr_format);
248
249

   desc_load->num_components = glsl_get_vector_elements(ptr_type);
250
251
   nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
                     desc_load->num_components,
252
                     glsl_get_bit_size(ptr_type), NULL);
253
254
255
256
257
258
259
260
261
262
263
264
   nir_builder_instr_insert(&b->nb, &desc_load->instr);

   return &desc_load->dest.ssa;
}

/* Dereference the given base pointer by the access chain */
static struct vtn_pointer *
vtn_nir_deref_pointer_dereference(struct vtn_builder *b,
                                  struct vtn_pointer *base,
                                  struct vtn_access_chain *deref_chain)
{
   struct vtn_type *type = base->type;
265
   enum gl_access_qualifier access = base->access | deref_chain->access;
266
267
268
269
270
   unsigned idx = 0;

   nir_deref_instr *tail;
   if (base->deref) {
      tail = base->deref;
271
272
   } else if (b->options->environment == NIR_SPIRV_VULKAN &&
              vtn_pointer_is_external_block(b, base)) {
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
      nir_ssa_def *block_index = base->block_index;

      /* We dereferencing an external block pointer.  Correctness of this
       * operation relies on one particular line in the SPIR-V spec, section
       * entitled "Validation Rules for Shader Capabilities":
       *
       *    "Block and BufferBlock decorations cannot decorate a structure
       *    type that is nested at any level inside another structure type
       *    decorated with Block or BufferBlock."
       *
       * This means that we can detect the point where we cross over from
       * descriptor indexing to buffer indexing by looking for the block
       * decorated struct type.  Anything before the block decorated struct
       * type is a descriptor indexing operation and anything after the block
       * decorated struct is a buffer offset operation.
       */

      /* Figure out the descriptor array index if any
       *
       * Some of the Vulkan CTS tests with hand-rolled SPIR-V have been known
       * to forget the Block or BufferBlock decoration from time to time.
       * It's more robust if we check for both !block_index and for the type
       * to contain a block.  This way there's a decent chance that arrays of
       * UBOs/SSBOs will work correctly even if variable pointers are
       * completley toast.
       */
      nir_ssa_def *desc_arr_idx = NULL;
      if (!block_index || vtn_type_contains_block(b, type)) {
         /* If our type contains a block, then we're still outside the block
          * and we need to process enough levels of dereferences to get inside
          * of it.
          */
         if (deref_chain->ptr_as_array) {
            unsigned aoa_size = glsl_get_aoa_size(type->type);
            desc_arr_idx = vtn_access_link_as_ssa(b, deref_chain->link[idx],
                                                  MAX2(aoa_size, 1), 32);
            idx++;
         }

         for (; idx < deref_chain->length; idx++) {
            if (type->base_type != vtn_base_type_array) {
               vtn_assert(type->base_type == vtn_base_type_struct);
               break;
            }

            unsigned aoa_size = glsl_get_aoa_size(type->array_element->type);
            nir_ssa_def *arr_offset =
               vtn_access_link_as_ssa(b, deref_chain->link[idx],
                                      MAX2(aoa_size, 1), 32);
            if (desc_arr_idx)
               desc_arr_idx = nir_iadd(&b->nb, desc_arr_idx, arr_offset);
            else
               desc_arr_idx = arr_offset;

            type = type->array_element;
            access |= type->access;
         }
      }

      if (!block_index) {
         vtn_assert(base->var && base->type);
         block_index = vtn_variable_resource_index(b, base->var, desc_arr_idx);
      } else if (desc_arr_idx) {
         block_index = vtn_resource_reindex(b, base->mode,
                                            block_index, desc_arr_idx);
      }

      if (idx == deref_chain->length) {
         /* The entire deref was consumed in finding the block index.  Return
          * a pointer which just has a block index and a later access chain
          * will dereference deeper.
          */
         struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
         ptr->mode = base->mode;
         ptr->type = type;
         ptr->block_index = block_index;
         ptr->access = access;
         return ptr;
      }

      /* If we got here, there's more access chain to handle and we have the
       * final block index.  Insert a descriptor load and cast to a deref to
       * start the deref chain.
       */
357
      nir_ssa_def *desc = vtn_descriptor_load(b, base->mode, block_index);
358
359
360
361

      assert(base->mode == vtn_variable_mode_ssbo ||
             base->mode == vtn_variable_mode_ubo);
      nir_variable_mode nir_mode =
362
         base->mode == vtn_variable_mode_ssbo ? nir_var_mem_ssbo : nir_var_mem_ubo;
363

364
365
      tail = nir_build_deref_cast(&b->nb, desc, nir_mode,
                                  vtn_type_get_nir_type(b, type, base->mode),
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
                                  base->ptr_type->stride);
   } else {
      assert(base->var && base->var->var);
      tail = nir_build_deref_var(&b->nb, base->var->var);
      if (base->ptr_type && base->ptr_type->type) {
         tail->dest.ssa.num_components =
            glsl_get_vector_elements(base->ptr_type->type);
         tail->dest.ssa.bit_size = glsl_get_bit_size(base->ptr_type->type);
      }
   }

   if (idx == 0 && deref_chain->ptr_as_array) {
      /* We start with a deref cast to get the stride.  Hopefully, we'll be
       * able to delete that cast eventually.
       */
      tail = nir_build_deref_cast(&b->nb, &tail->dest.ssa, tail->mode,
                                  tail->type, base->ptr_type->stride);

      nir_ssa_def *index = vtn_access_link_as_ssa(b, deref_chain->link[0], 1,
                                                  tail->dest.ssa.bit_size);
      tail = nir_build_deref_ptr_as_array(&b->nb, tail, index);
      idx++;
   }

   for (; idx < deref_chain->length; idx++) {
391
      if (glsl_type_is_struct_or_ifc(type->type)) {
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
         vtn_assert(deref_chain->link[idx].mode == vtn_access_mode_literal);
         unsigned field = deref_chain->link[idx].id;
         tail = nir_build_deref_struct(&b->nb, tail, field);
         type = type->members[field];
      } else {
         nir_ssa_def *arr_index =
            vtn_access_link_as_ssa(b, deref_chain->link[idx], 1,
                                   tail->dest.ssa.bit_size);
         tail = nir_build_deref_array(&b->nb, tail, arr_index);
         type = type->array_element;
      }

      access |= type->access;
   }

   struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
   ptr->mode = base->mode;
   ptr->type = type;
   ptr->var = base->var;
   ptr->deref = tail;
   ptr->access = access;

   return ptr;
}

417
418
419
420
421
422
423
424
static struct vtn_pointer *
vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
                                   struct vtn_pointer *base,
                                   struct vtn_access_chain *deref_chain)
{
   nir_ssa_def *block_index = base->block_index;
   nir_ssa_def *offset = base->offset;
   struct vtn_type *type = base->type;
425
   enum gl_access_qualifier access = base->access;
426
427

   unsigned idx = 0;
428
429
430
431
432
433
434
435
   if (base->mode == vtn_variable_mode_ubo ||
       base->mode == vtn_variable_mode_ssbo) {
      if (!block_index) {
         vtn_assert(base->var && base->type);
         nir_ssa_def *desc_arr_idx;
         if (glsl_type_is_array(type->type)) {
            if (deref_chain->length >= 1) {
               desc_arr_idx =
436
                  vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
437
438
439
               idx++;
               /* This consumes a level of type */
               type = type->array_element;
440
               access |= type->access;
441
442
443
444
445
446
447
448
449
450
451
            } else {
               /* This is annoying.  We've been asked for a pointer to the
                * array of UBOs/SSBOs and not a specifc buffer.  Return a
                * pointer with a descriptor index of 0 and we'll have to do
                * a reindex later to adjust it to the right thing.
                */
               desc_arr_idx = nir_imm_int(&b->nb, 0);
            }
         } else if (deref_chain->ptr_as_array) {
            /* You can't have a zero-length OpPtrAccessChain */
            vtn_assert(deref_chain->length >= 1);
452
            desc_arr_idx = vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
         } else {
            /* We have a regular non-array SSBO. */
            desc_arr_idx = NULL;
         }
         block_index = vtn_variable_resource_index(b, base->var, desc_arr_idx);
      } else if (deref_chain->ptr_as_array &&
                 type->base_type == vtn_base_type_struct && type->block) {
         /* We are doing an OpPtrAccessChain on a pointer to a struct that is
          * decorated block.  This is an interesting corner in the SPIR-V
          * spec.  One interpretation would be that they client is clearly
          * trying to treat that block as if it's an implicit array of blocks
          * repeated in the buffer.  However, the SPIR-V spec for the
          * OpPtrAccessChain says:
          *
          *    "Base is treated as the address of the first element of an
          *    array, and the Element element’s address is computed to be the
          *    base for the Indexes, as per OpAccessChain."
          *
          * Taken literally, that would mean that your struct type is supposed
          * to be treated as an array of such a struct and, since it's
          * decorated block, that means an array of blocks which corresponds
          * to an array descriptor.  Therefore, we need to do a reindex
          * operation to add the index from the first link in the access chain
          * to the index we recieved.
          *
          * The downside to this interpretation (there always is one) is that
          * this might be somewhat surprising behavior to apps if they expect
          * the implicit array behavior described above.
          */
         vtn_assert(deref_chain->length >= 1);
         nir_ssa_def *offset_index =
484
            vtn_access_link_as_ssa(b, deref_chain->link[0], 1, 32);
485
         idx++;
486

487
488
         block_index = vtn_resource_reindex(b, base->mode,
                                            block_index, offset_index);
489
      }
490
491
   }

492
   if (!offset) {
493
494
495
      if (base->mode == vtn_variable_mode_workgroup) {
         /* SLM doesn't need nor have a block index */
         vtn_assert(!block_index);
496

497
498
         /* We need the variable for the base offset */
         vtn_assert(base->var);
499

500
501
         /* We need ptr_type for size and alignment */
         vtn_assert(base->ptr_type);
502

503
504
505
506
         /* Assign location on first use so that we don't end up bloating SLM
          * address space for variables which are never statically used.
          */
         if (base->var->shared_location < 0) {
507
            vtn_assert(base->ptr_type->length > 0 && base->ptr_type->align > 0);
508
509
510
511
512
513
514
            b->shader->num_shared = vtn_align_u32(b->shader->num_shared,
                                                  base->ptr_type->align);
            base->var->shared_location = b->shader->num_shared;
            b->shader->num_shared += base->ptr_type->length;
         }

         offset = nir_imm_int(&b->nb, base->var->shared_location);
515
516
517
518
519
520
      } else if (base->mode == vtn_variable_mode_push_constant) {
         /* Push constants neither need nor have a block index */
         vtn_assert(!block_index);

         /* Start off with at the start of the push constant block. */
         offset = nir_imm_int(&b->nb, 0);
521
522
523
      } else {
         /* The code above should have ensured a block_index when needed. */
         vtn_assert(block_index);
524

525
526
         /* Start off with at the start of the buffer. */
         offset = nir_imm_int(&b->nb, 0);
527
528
      }
   }
529
530
531
532
533
534
535
536
537
538

   if (deref_chain->ptr_as_array && idx == 0) {
      /* We need ptr_type for the stride */
      vtn_assert(base->ptr_type);

      /* We need at least one element in the chain */
      vtn_assert(deref_chain->length >= 1);

      nir_ssa_def *elem_offset =
         vtn_access_link_as_ssa(b, deref_chain->link[idx],
539
                                base->ptr_type->stride, offset->bit_size);
540
541
542
      offset = nir_iadd(&b->nb, offset, elem_offset);
      idx++;
   }
543
544
545
546
547

   for (; idx < deref_chain->length; idx++) {
      switch (glsl_get_base_type(type->type)) {
      case GLSL_TYPE_UINT:
      case GLSL_TYPE_INT:
548
549
      case GLSL_TYPE_UINT16:
      case GLSL_TYPE_INT16:
Karol Herbst's avatar
Karol Herbst committed
550
551
      case GLSL_TYPE_UINT8:
      case GLSL_TYPE_INT8:
552
553
554
      case GLSL_TYPE_UINT64:
      case GLSL_TYPE_INT64:
      case GLSL_TYPE_FLOAT:
555
      case GLSL_TYPE_FLOAT16:
556
557
558
559
      case GLSL_TYPE_DOUBLE:
      case GLSL_TYPE_BOOL:
      case GLSL_TYPE_ARRAY: {
         nir_ssa_def *elem_offset =
560
561
            vtn_access_link_as_ssa(b, deref_chain->link[idx],
                                   type->stride, offset->bit_size);
562
563
         offset = nir_iadd(&b->nb, offset, elem_offset);
         type = type->array_element;
564
         access |= type->access;
565
566
567
         break;
      }

568
      case GLSL_TYPE_INTERFACE:
569
      case GLSL_TYPE_STRUCT: {
570
         vtn_assert(deref_chain->link[idx].mode == vtn_access_mode_literal);
571
         unsigned member = deref_chain->link[idx].id;
572
         offset = nir_iadd_imm(&b->nb, offset, type->offsets[member]);
573
         type = type->members[member];
574
         access |= type->access;
575
576
577
578
         break;
      }

      default:
579
         vtn_fail("Invalid type for deref");
580
581
582
583
584
585
586
587
      }
   }

   struct vtn_pointer *ptr = rzalloc(b, struct vtn_pointer);
   ptr->mode = base->mode;
   ptr->type = type;
   ptr->block_index = block_index;
   ptr->offset = offset;
588
   ptr->access = access;
589
590
591
592
593
594
595
596
597
598

   return ptr;
}

/* Dereference the given base pointer by the access chain */
static struct vtn_pointer *
vtn_pointer_dereference(struct vtn_builder *b,
                        struct vtn_pointer *base,
                        struct vtn_access_chain *deref_chain)
{
599
   if (vtn_pointer_uses_ssa_offset(b, base)) {
600
601
      return vtn_ssa_offset_pointer_dereference(b, base, deref_chain);
   } else {
602
      return vtn_nir_deref_pointer_dereference(b, base, deref_chain);
603
604
605
   }
}

606
nir_deref_instr *
607
vtn_pointer_to_deref(struct vtn_builder *b, struct vtn_pointer *ptr)
608
{
609
610
611
612
613
614
   vtn_assert(!vtn_pointer_uses_ssa_offset(b, ptr));
   if (!ptr->deref) {
      struct vtn_access_chain chain = {
         .length = 0,
      };
      ptr = vtn_nir_deref_pointer_dereference(b, ptr, &chain);
615
616
   }

617
   return ptr->deref;
618
619
}

620
621
static void
_vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_instr *deref,
622
                      struct vtn_ssa_value *inout,
623
                      enum gl_access_qualifier access)
624
625
{
   if (glsl_type_is_vector_or_scalar(deref->type)) {
626
      if (load) {
627
         inout->def = nir_load_deref_with_access(&b->nb, deref, access);
628
      } else {
629
         nir_store_deref_with_access(&b->nb, deref, inout->def, ~0, access);
630
      }
631
632
633
   } else if (glsl_type_is_array(deref->type) ||
              glsl_type_is_matrix(deref->type)) {
      unsigned elems = glsl_get_length(deref->type);
634
      for (unsigned i = 0; i < elems; i++) {
635
         nir_deref_instr *child =
636
            nir_build_deref_array_imm(&b->nb, deref, i);
637
         _vtn_local_load_store(b, load, child, inout->elems[i], access);
638
639
      }
   } else {
640
      vtn_assert(glsl_type_is_struct_or_ifc(deref->type));
641
      unsigned elems = glsl_get_length(deref->type);
642
      for (unsigned i = 0; i < elems; i++) {
643
         nir_deref_instr *child = nir_build_deref_struct(&b->nb, deref, i);
644
         _vtn_local_load_store(b, load, child, inout->elems[i], access);
645
646
647
648
      }
   }
}

649
nir_deref_instr *
650
651
vtn_nir_deref(struct vtn_builder *b, uint32_t id)
{
652
653
   struct vtn_pointer *ptr = vtn_value(b, id, vtn_value_type_pointer)->pointer;
   return vtn_pointer_to_deref(b, ptr);
654
655
656
657
658
659
660
}

/*
 * Gets the NIR-level deref tail, which may have as a child an array deref
 * selecting which component due to OpAccessChain supporting per-component
 * indexing in SPIR-V.
 */
661
static nir_deref_instr *
662
get_deref_tail(struct vtn_builder *b, nir_deref_instr *deref)
663
{
664
665
666
667
668
   if (deref->deref_type != nir_deref_type_array)
      return deref;

   nir_deref_instr *parent =
      nir_instr_as_deref(deref->parent.ssa->parent_instr);
669

670
671
672
673
674
675
   if (!glsl_type_is_vector(parent->type) ||
       deref->mode == nir_var_mem_ubo ||
       deref->mode == nir_var_mem_ssbo ||
       deref->mode == nir_var_mem_shared ||
       deref->mode == nir_var_mem_global ||
       b->nb.shader->info.stage == MESA_SHADER_KERNEL)
676
      return deref;
677
678

   return parent;
679
680
681
}

struct vtn_ssa_value *
682
vtn_local_load(struct vtn_builder *b, nir_deref_instr *src,
683
               enum gl_access_qualifier access)
684
{
685
   nir_deref_instr *src_tail = get_deref_tail(b, src);
686
   struct vtn_ssa_value *val = vtn_create_ssa_value(b, src_tail->type);
687
   _vtn_local_load_store(b, true, src_tail, val, access);
688
689
690

   if (src_tail != src) {
      val->type = src->type;
691
      val->def = nir_vector_extract(&b->nb, val->def, src->arr.index.ssa);
692
693
694
695
696
697
698
   }

   return val;
}

void
vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
699
                nir_deref_instr *dest, enum gl_access_qualifier access)
700
{
701
   nir_deref_instr *dest_tail = get_deref_tail(b, dest);
702

703
   if (dest_tail != dest) {
704
      struct vtn_ssa_value *val = vtn_create_ssa_value(b, dest_tail->type);
705
      _vtn_local_load_store(b, true, dest_tail, val, access);
706

707
708
      val->def = nir_vector_insert(&b->nb, val->def, src->def,
                                   dest->arr.index.ssa);
709
      _vtn_local_load_store(b, false, dest_tail, val, access);
710
   } else {
711
      _vtn_local_load_store(b, false, dest_tail, src, access);
712
713
714
715
   }
}

nir_ssa_def *
716
vtn_pointer_to_offset(struct vtn_builder *b, struct vtn_pointer *ptr,
717
                      nir_ssa_def **index_out)
718
{
719
720
721
722
723
724
   assert(vtn_pointer_uses_ssa_offset(b, ptr));
   if (!ptr->offset) {
      struct vtn_access_chain chain = {
         .length = 0,
      };
      ptr = vtn_ssa_offset_pointer_dereference(b, ptr, &chain);
725
   }
726
727
   *index_out = ptr->block_index;
   return ptr->offset;
728
729
}

730
731
732
733
/* Tries to compute the size of an interface block based on the strides and
 * offsets that are provided to us in the SPIR-V source.
 */
static unsigned
734
vtn_type_block_size(struct vtn_builder *b, struct vtn_type *type)
735
736
737
738
739
{
   enum glsl_base_type base_type = glsl_get_base_type(type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
740
741
   case GLSL_TYPE_UINT16:
   case GLSL_TYPE_INT16:
Karol Herbst's avatar
Karol Herbst committed
742
743
   case GLSL_TYPE_UINT8:
   case GLSL_TYPE_INT8:
Dave Airlie's avatar
Dave Airlie committed
744
745
   case GLSL_TYPE_UINT64:
   case GLSL_TYPE_INT64:
746
   case GLSL_TYPE_FLOAT:
747
   case GLSL_TYPE_FLOAT16:
748
749
750
751
752
   case GLSL_TYPE_BOOL:
   case GLSL_TYPE_DOUBLE: {
      unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) :
                                        glsl_get_matrix_columns(type->type);
      if (cols > 1) {
753
         vtn_assert(type->stride > 0);
754
755
         return type->stride * cols;
      } else {
756
757
         unsigned type_size = glsl_get_bit_size(type->type) / 8;
         return glsl_get_vector_elements(type->type) * type_size;
758
759
760
761
762
763
764
765
766
      }
   }

   case GLSL_TYPE_STRUCT:
   case GLSL_TYPE_INTERFACE: {
      unsigned size = 0;
      unsigned num_fields = glsl_get_length(type->type);
      for (unsigned f = 0; f < num_fields; f++) {
         unsigned field_end = type->offsets[f] +
767
                              vtn_type_block_size(b, type->members[f]);
768
769
770
771
772
773
         size = MAX2(size, field_end);
      }
      return size;
   }

   case GLSL_TYPE_ARRAY:
774
775
      vtn_assert(type->stride > 0);
      vtn_assert(glsl_get_length(type->type) > 0);
776
777
778
      return type->stride * glsl_get_length(type->type);

   default:
779
      vtn_fail("Invalid block type");
780
781
782
783
      return 0;
   }
}

784
785
786
static void
_vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                     nir_ssa_def *index, nir_ssa_def *offset,
787
                     unsigned access_offset, unsigned access_size,
788
789
                     struct vtn_ssa_value **inout, const struct glsl_type *type,
                     enum gl_access_qualifier access)
790
791
792
793
{
   nir_intrinsic_instr *instr = nir_intrinsic_instr_create(b->nb.shader, op);
   instr->num_components = glsl_get_vector_elements(type);

794
795
796
797
798
799
800
   /* Booleans usually shouldn't show up in external memory in SPIR-V.
    * However, they do for certain older GLSLang versions and can for shared
    * memory when we lower access chains internally.
    */
   const unsigned data_bit_size = glsl_type_is_boolean(type) ? 32 :
                                  glsl_get_bit_size(type);

801
802
   int src = 0;
   if (!load) {
Jason Ekstrand's avatar
Jason Ekstrand committed
803
      nir_intrinsic_set_write_mask(instr, (1 << instr->num_components) - 1);
804
805
806
807
      instr->src[src++] = nir_src_for_ssa((*inout)->def);
   }

   if (op == nir_intrinsic_load_push_constant) {
808
809
      nir_intrinsic_set_base(instr, access_offset);
      nir_intrinsic_set_range(instr, access_size);
810
811
   }

812
813
   if (op == nir_intrinsic_load_ubo ||
       op == nir_intrinsic_load_ssbo ||
814
815
816
817
       op == nir_intrinsic_store_ssbo) {
      nir_intrinsic_set_access(instr, access);
   }

818
819
820
821
822
823
   /* With extensions like relaxed_block_layout, we really can't guarantee
    * much more than scalar alignment.
    */
   if (op != nir_intrinsic_load_push_constant)
      nir_intrinsic_set_align(instr, data_bit_size / 8, 0);

824
825
826
   if (index)
      instr->src[src++] = nir_src_for_ssa(index);

827
828
829
830
831
832
833
834
835
   if (op == nir_intrinsic_load_push_constant) {
      /* We need to subtract the offset from where the intrinsic will load the
       * data. */
      instr->src[src++] =
         nir_src_for_ssa(nir_isub(&b->nb, offset,
                                  nir_imm_int(&b->nb, access_offset)));
   } else {
      instr->src[src++] = nir_src_for_ssa(offset);
   }
836
837
838

   if (load) {
      nir_ssa_dest_init(&instr->instr, &instr->dest,
839
                        instr->num_components, data_bit_size, NULL);
840
841
842
843
844
845
846
847
848
849
850
851
      (*inout)->def = &instr->dest.ssa;
   }

   nir_builder_instr_insert(&b->nb, &instr->instr);

   if (load && glsl_get_base_type(type) == GLSL_TYPE_BOOL)
      (*inout)->def = nir_ine(&b->nb, (*inout)->def, nir_imm_int(&b->nb, 0));
}

static void
_vtn_block_load_store(struct vtn_builder *b, nir_intrinsic_op op, bool load,
                      nir_ssa_def *index, nir_ssa_def *offset,
852
                      unsigned access_offset, unsigned access_size,
853
854
                      struct vtn_type *type, enum gl_access_qualifier access,
                      struct vtn_ssa_value **inout)
855
856
857
858
859
{
   enum glsl_base_type base_type = glsl_get_base_type(type->type);
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
860
861
   case GLSL_TYPE_UINT16:
   case GLSL_TYPE_INT16:
Karol Herbst's avatar
Karol Herbst committed
862
863
   case GLSL_TYPE_UINT8:
   case GLSL_TYPE_INT8:
Dave Airlie's avatar
Dave Airlie committed
864
865
   case GLSL_TYPE_UINT64:
   case GLSL_TYPE_INT64:
866
   case GLSL_TYPE_FLOAT:
867
   case GLSL_TYPE_FLOAT16:
868
   case GLSL_TYPE_DOUBLE:
869
870
871
872
873
   case GLSL_TYPE_BOOL:
      /* This is where things get interesting.  At this point, we've hit
       * a vector, a scalar, or a matrix.
       */
      if (glsl_type_is_matrix(type->type)) {
874
875
876
877
878
879
880
881
882
883
884
         /* Loading the whole matrix */
         struct vtn_ssa_value *transpose;
         unsigned num_ops, vec_width, col_stride;
         if (type->row_major) {
            num_ops = glsl_get_vector_elements(type->type);
            vec_width = glsl_get_matrix_columns(type->type);
            col_stride = type->array_element->stride;
            if (load) {
               const struct glsl_type *transpose_type =
                  glsl_matrix_type(base_type, vec_width, num_ops);
               *inout = vtn_create_ssa_value(b, transpose_type);
885
            } else {
886
887
               transpose = vtn_ssa_transpose(b, *inout);
               inout = &transpose;
888
            }
889
890
891
892
893
894
895
896
         } else {
            num_ops = glsl_get_matrix_columns(type->type);
            vec_width = glsl_get_vector_elements(type->type);
            col_stride = type->stride;
         }

         for (unsigned i = 0; i < num_ops; i++) {
            nir_ssa_def *elem_offset =
897
               nir_iadd_imm(&b->nb, offset, i * col_stride);
898
899
900
            _vtn_load_store_tail(b, op, load, index, elem_offset,
                                 access_offset, access_size,
                                 &(*inout)->elems[i],
901
902
                                 glsl_vector_type(base_type, vec_width),
                                 type->access | access);
903
         }
904

905
906
907
908
909
910
911
         if (load && type->row_major)
            *inout = vtn_ssa_transpose(b, *inout);
      } else {
         unsigned elems = glsl_get_vector_elements(type->type);
         unsigned type_size = glsl_get_bit_size(type->type) / 8;
         if (elems == 1 || type->stride == type_size) {
            /* This is a tightly-packed normal scalar or vector load */
912
            vtn_assert(glsl_type_is_vector_or_scalar(type->type));
913
914
            _vtn_load_store_tail(b, op, load, index, offset,
                                 access_offset, access_size,
915
916
                                 inout, type->type,
                                 type->access | access);
917
918
919
920
         } else {
            /* This is a strided load.  We have to load N things separately.
             * This is the single column of a row-major matrix case.
             */
921
922
            vtn_assert(type->stride > type_size);
            vtn_assert(type->stride % type_size == 0);
923
924
925

            nir_ssa_def *per_comp[4];
            for (unsigned i = 0; i < elems; i++) {
926
               nir_ssa_def *elem_offset =
927
                  nir_iadd_imm(&b->nb, offset, i * type->stride);
928
929
930
931
932
933
               struct vtn_ssa_value *comp, temp_val;
               if (!load) {
                  temp_val.def = nir_channel(&b->nb, (*inout)->def, i);
                  temp_val.type = glsl_scalar_type(base_type);
               }
               comp = &temp_val;
934
               _vtn_load_store_tail(b, op, load, index, elem_offset,
935
                                    access_offset, access_size,
936
937
                                    &comp, glsl_scalar_type(base_type),
                                    type->access | access);
938
               per_comp[i] = comp->def;
939
940
            }

941
942
943
944
            if (load) {
               if (*inout == NULL)
                  *inout = vtn_create_ssa_value(b, type->type);
               (*inout)->def = nir_vec(&b->nb, per_comp, elems);
945
946
947
948
949
950
951
952
953
            }
         }
      }
      return;

   case GLSL_TYPE_ARRAY: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
954
            nir_iadd_imm(&b->nb, offset, i * type->stride);
955
956
         _vtn_block_load_store(b, op, load, index, elem_off,
                               access_offset, access_size,
957
958
959
                               type->array_element,
                               type->array_element->access | access,
                               &(*inout)->elems[i]);
960
961
962
963
      }
      return;
   }

964
   case GLSL_TYPE_INTERFACE:
965
966
967
968
   case GLSL_TYPE_STRUCT: {
      unsigned elems = glsl_get_length(type->type);
      for (unsigned i = 0; i < elems; i++) {
         nir_ssa_def *elem_off =
969
            nir_iadd_imm(&b->nb, offset, type->offsets[i]);
970
971
         _vtn_block_load_store(b, op, load, index, elem_off,
                               access_offset, access_size,
972
973
974
                               type->members[i],
                               type->members[i]->access | access,
                               &(*inout)->elems[i]);
975
976
977
978
979
      }
      return;
   }

   default:
980
      vtn_fail("Invalid block member type");
981
982
983
984
   }
}

static struct vtn_ssa_value *
985
vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src)
986
987
{
   nir_intrinsic_op op;
988
   unsigned access_offset = 0, access_size = 0;
989
   switch (src->mode) {
990
991
992
993
   case vtn_variable_mode_ubo:
      op = nir_intrinsic_load_ubo;
      break;
   case vtn_variable_mode_ssbo:
994
      op = nir_intrinsic_load_ssbo;
995
996
997
      break;
   case vtn_variable_mode_push_constant:
      op = nir_intrinsic_load_push_constant;
998
      access_size = b->shader->num_uniforms;
999
      break;
1000
1001
1002
   case vtn_variable_mode_workgroup:
      op = nir_intrinsic_load_shared;
      break;
1003
   default:
1004
      vtn_fail("Invalid block variable mode");
1005
1006
1007
   }

   nir_ssa_def *offset, *index = NULL;
1008
   offset = vtn_pointer_to_offset(b, src, &index);
1009

1010
   struct vtn_ssa_value *value = vtn_create_ssa_value(b, src->type->type);
1011
   _vtn_block_load_store(b, op, true, index, offset,
1012
                         access_offset, access_size,
1013
                         src->type, src->access, &value);
1014
1015
1016
1017
1018
   return value;
}

static void
vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
1019
                struct vtn_pointer *dst)
1020
{
1021
1022
1023
1024
1025
   nir_intrinsic_op op;
   switch (dst->mode) {
   case vtn_variable_mode_ssbo:
      op = nir_intrinsic_store_ssbo;
      break;
1026
1027
1028
   case vtn_variable_mode_workgroup:
      op = nir_intrinsic_store_shared;
      break;
1029
1030
1031
1032
   default:
      vtn_fail("Invalid block variable mode");
   }

1033
   nir_ssa_def *offset, *index = NULL;
1034
   offset = vtn_pointer_to_offset(b, dst, &index);
1035

1036
   _vtn_block_load_store(b, op, false, index, offset,
1037
                         0, 0, dst->type, dst->access, &src);
1038
1039
}

1040
1041
static void
_vtn_variable_load_store(struct vtn_builder *b, bool load,
1042
                         struct vtn_pointer *ptr,
1043
                         enum gl_access_qualifier access,
1044
1045
                         struct vtn_ssa_value **inout)
{
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
   if (ptr->mode == vtn_variable_mode_uniform) {
      if (ptr->type->base_type == vtn_base_type_image ||
          ptr->type->base_type == vtn_base_type_sampler) {
         /* See also our handling of OpTypeSampler and OpTypeImage */
         vtn_assert(load);
         (*inout)->def = vtn_pointer_to_ssa(b, ptr);
         return;
      } else if (ptr->type->base_type == vtn_base_type_sampled_image) {
         /* See also our handling of OpTypeSampledImage */
         vtn_assert(load);
         struct vtn_sampled_image si = {
            .image = vtn_pointer_to_deref(b, ptr),
            .sampler = vtn_pointer_to_deref(b, ptr),
         };
         (*inout)->def = vtn_sampled_image_to_nir_ssa(b, si);
         return;
      }
   }

1065
   enum glsl_base_type base_type = glsl_get_base_type(ptr->type->type);
1066
1067
1068
   switch (base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
1069
1070
   case GLSL_TYPE_UINT16:
   case GLSL_TYPE_INT16:
Karol Herbst's avatar
Karol Herbst committed
1071
1072
   case GLSL_TYPE_UINT8:
   case GLSL_TYPE_INT8:
Dave Airlie's avatar
Dave Airlie committed
1073
1074
   case GLSL_TYPE_UINT64:
   case GLSL_TYPE_INT64:
1075
   case GLSL_TYPE_FLOAT:
1076
   case GLSL_TYPE_FLOAT16:
1077
   case GLSL_TYPE_BOOL:
1078
   case GLSL_TYPE_DOUBLE:
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
      if (glsl_type_is_vector_or_scalar(ptr->type->type)) {
         /* We hit a vector or scalar; go ahead and emit the load[s] */
         nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr);
         if (vtn_pointer_is_external_block(b, ptr)) {
            /* If it's external, we call nir_load/store_deref directly.  The
             * vtn_local_load/store helpers are too clever and do magic to
             * avoid array derefs of vectors.  That magic is both less
             * efficient than the direct load/store and, in the case of
             * stores, is broken because it creates a race condition if two
             * threads are writing to different components of the same vector
             * due to the load+insert+store it uses to emulate the array
             * deref.
             */
            if (load) {
1093
1094
               (*inout)->def = nir_load_deref_with_access(&b->nb, deref,
                                                          ptr->type->access | access);
1095
            } else {
1096
1097
               nir_store_deref_with_access(&b->nb, deref, (*inout)->def, ~0,
                                           ptr->type->access | access);
1098
1099
1100
            }
         } else {
            if (load) {
1101
               *inout = vtn_local_load(b, deref, ptr->type->access | access);
1102
            } else {
1103
               vtn_local_store(b, *inout, deref, ptr->type->access | access);
1104
1105
1106
            }
         }
         return;
1107
      }
1108
      /* Fall through */
1109

1110
   case GLSL_TYPE_INTERFACE:
1111
1112
   case GLSL_TYPE_ARRAY:
   case GLSL_TYPE_STRUCT: {
1113
      unsigned elems = glsl_get_length(ptr->type->type);
1114
1115
1116
1117
1118
1119
      struct vtn_access_chain chain = {
         .length = 1,
         .link = {
            { .mode = vtn_access_mode_literal, },
         }
      };
1120
      for (unsigned i = 0; i < elems; i++) {
1121
1122
         chain.link[0].id = i;
         struct vtn_pointer *elem = vtn_pointer_dereference(b, ptr, &chain);
1123
         _vtn_variable_load_store(b, load, elem, ptr->type->access | access,
Samuel Pitoiset's avatar