d3d12_compiler.cpp 46.5 KB
Newer Older
1
/*
2
 * Copyright © Microsoft Corporation
3
4
5
6
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
7
8
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
10
11
12
13
14
15
16
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
18
19
20
21
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
22
23
24
 */

#include "d3d12_compiler.h"
25
#include "d3d12_context.h"
26
#include "d3d12_debug.h"
27
#include "d3d12_screen.h"
28
#include "d3d12_nir_passes.h"
29
#include "nir_to_dxil.h"
30
31
32
33

#include "pipe/p_state.h"

#include "nir.h"
34
#include "nir/nir_draw_helpers.h"
35
#include "nir/tgsi_to_nir.h"
36
#include "compiler/nir/nir_builder.h"
37
#include "tgsi/tgsi_from_mesa.h"
38
#include "tgsi/tgsi_ureg.h"
39
40

#include "util/u_memory.h"
41
#include "util/u_prim.h"
42
#include "util/u_simple_shaders.h"
43

44
45
46
47
#include <d3d12.h>
#include <dxcapi.h>
#include <wrl.h>

48
49
extern "C" {
#include "tgsi/tgsi_parse.h"
50
#include "tgsi/tgsi_point_sprite.h"
51
52
}

53
54
55
56
57
58
59
60
using Microsoft::WRL::ComPtr;

struct d3d12_validation_tools
{
   d3d12_validation_tools();

   bool validate_and_sign(struct blob *dxil);

61
62
   void disassemble(struct blob *dxil);

63
64
   void load_dxil_dll();

65
66
67
68
69
70
71
72
73
74
75
76
   struct HModule {
      HModule();
      ~HModule();

      bool load(LPCSTR file_name);
      operator HMODULE () const;
   private:
      HMODULE module;
   };

   HModule dxil_module;
   HModule dxc_compiler_module;
77
   ComPtr<IDxcCompiler> compiler;
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
   ComPtr<IDxcValidator> validator;
   ComPtr<IDxcLibrary> library;
};

struct d3d12_validation_tools *d3d12_validator_create()
{
   return new d3d12_validation_tools();
}

void d3d12_validator_destroy(struct d3d12_validation_tools *validator)
{
   delete validator;
}


93
94
95
96
97
98
99
100
101
const void *
d3d12_get_compiler_options(struct pipe_screen *screen,
                           enum pipe_shader_ir ir,
                           enum pipe_shader_type shader)
{
   assert(ir == PIPE_SHADER_IR_NIR);
   return dxil_get_nir_compiler_options();
}

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
static uint32_t
resource_dimension(enum glsl_sampler_dim dim)
{
   switch (dim) {
   case GLSL_SAMPLER_DIM_1D:
      return RESOURCE_DIMENSION_TEXTURE1D;
   case GLSL_SAMPLER_DIM_2D:
      return RESOURCE_DIMENSION_TEXTURE2D;
   case GLSL_SAMPLER_DIM_3D:
      return RESOURCE_DIMENSION_TEXTURE3D;
   case GLSL_SAMPLER_DIM_CUBE:
      return RESOURCE_DIMENSION_TEXTURECUBE;
   default:
      return RESOURCE_DIMENSION_UNKNOWN;
   }
}

119
static struct d3d12_shader *
120
compile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel,
121
            struct d3d12_shader_key *key, struct nir_shader *nir)
122
{
123
   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
124
   struct d3d12_shader *shader = rzalloc(sel, d3d12_shader);
125
   shader->key = *key;
126
   shader->nir = nir;
127
   sel->current = shader;
128

129
130
   struct nir_lower_tex_options tex_options = { };
   tex_options.lower_txp = ~0u; /* No equivalent for textureProj */
Gert Wollny's avatar
Gert Wollny committed
131
132
   tex_options.lower_rect = true;
   tex_options.lower_rect_offset = true;
133

134
   NIR_PASS_V(nir, nir_lower_samplers);
135
   NIR_PASS_V(nir, d3d12_create_bare_samplers);
136
   NIR_PASS_V(nir, nir_lower_tex, &tex_options);
137

138
139
   if (key->samples_int_textures)
      NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
140
141
                 key->tex_wrap_states, key->swizzle_state,
                 screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS));
142

143
144
145
   if (key->vs.needs_format_emulation)
      d3d12_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion);

146
147
148
149
150
151
   uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos;
   uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms;
   NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 16);
   shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 &&
                              nir->info.num_ubos > num_ubos_before_lower_to_ubo;

152
   if (key->last_vertex_processing_stage) {
153
154
      if (key->invert_depth)
         NIR_PASS_V(nir, d3d12_nir_invert_depth);
155
156
157
      NIR_PASS_V(nir, nir_lower_clip_halfz);
      NIR_PASS_V(nir, d3d12_lower_yflip);
   }
158
   NIR_PASS_V(nir, nir_lower_packed_ubo_loads);
159
   NIR_PASS_V(nir, d3d12_lower_load_first_vertex);
160
   NIR_PASS_V(nir, d3d12_lower_state_vars, shader);
161
   NIR_PASS_V(nir, d3d12_lower_bool_input);
162

163
   struct nir_to_dxil_options opts = {};
164
   opts.interpolate_at_vertex = screen->have_load_at_vertex;
165
   opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported;
166
   opts.ubo_binding_offset = shader->has_default_ubo0 ? 0 : 1;
167
   opts.provoking_vertex = key->fs.provoking_vertex;
168

169
   struct blob tmp;
170
   if (!nir_to_dxil(nir, &opts, &tmp)) {
171
172
173
      debug_printf("D3D12: nir_to_dxil failed\n");
      return NULL;
   }
174

175
   // Non-ubo variables
176
   nir_foreach_variable(var, &nir->uniforms) {
177
      auto type = glsl_without_array(var->type);
178
      if (glsl_type_is_sampler(type) && glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
179
         unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
180
181
182
183
184
185
         for (unsigned i = 0; i < count; ++i) {
            shader->srv_bindings[shader->num_srv_bindings].index = var->data.binding + i;
            shader->srv_bindings[shader->num_srv_bindings].binding = var->data.binding;
            shader->srv_bindings[shader->num_srv_bindings].dimension = resource_dimension(glsl_get_sampler_dim(type));
            shader->num_srv_bindings++;
         }
186
      }
187
188
   }

189
190
191
192
193
194
195
196
   // Ubo variables
   if(nir->info.num_ubos) {
      // Ignore state_vars ubo as it is bound as root constants
      unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0);
      for(unsigned i = opts.ubo_binding_offset; i < num_ubo_bindings; ++i) {
         shader->cb_bindings[shader->num_cb_bindings++].binding = i;
      }
   }
197
198
199
200
201
202
   ctx->validation_tools->validate_and_sign(&tmp);

   if (d3d12_debug & D3D12_DEBUG_DISASS) {
      ctx->validation_tools->disassemble(&tmp);
   }

203
   blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length);
204
205
206
207
208
209

   if (d3d12_debug & D3D12_DEBUG_DXIL) {
      char buf[256];
      static int i;
      snprintf(buf, sizeof(buf), "dump%02d.dxil", i++);
      FILE *fp = fopen(buf, "wb");
210
      fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp);
211
212
213
      fclose(fp);
      fprintf(stderr, "wrote '%s'...\n", buf);
   }
214
215
216
   return shader;
}

217
218
struct d3d12_selection_context {
   struct d3d12_context *ctx;
219
   const struct pipe_draw_info *dinfo;
220
   bool needs_point_sprite_lowering;
221
   bool needs_vertex_reordering;
222
   unsigned provoking_vertex;
223
   bool alternate_tri;
224
   unsigned fill_mode_lowered;
225
   unsigned cull_mode_lowered;
226
   bool manual_depth_range;
227
   unsigned missing_dual_src_outputs;
228
   unsigned frag_result_color_lowering;
229
230
};

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
static unsigned
missing_dual_src_outputs(struct d3d12_context *ctx)
{
   if (!ctx->gfx_pipeline_state.blend->is_dual_src)
      return 0;

   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
   nir_shader *s = fs->initial;

   unsigned indices_seen = 0;
   nir_foreach_function(function, s) {
      if (function->impl) {
         nir_foreach_block(block, function->impl) {
            nir_foreach_instr(instr, block) {
               if (instr->type != nir_instr_type_intrinsic)
                  continue;

               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
               if (intr->intrinsic != nir_intrinsic_store_deref)
                  continue;

               nir_variable *var = nir_intrinsic_get_var(intr, 0);
               if (var->data.mode != nir_var_shader_out ||
                   (var->data.location != FRAG_RESULT_COLOR &&
                    var->data.location != FRAG_RESULT_DATA0))
                  continue;

               indices_seen |= 1u << var->data.index;
               if ((indices_seen & 3) == 3)
                  return 0;
            }
         }
      }
   }

   return 3 & ~indices_seen;
}

269
270
271
272
273
274
static unsigned
frag_result_color_lowering(struct d3d12_context *ctx)
{
   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
   assert(fs);

275
   if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR))
276
277
278
279
280
      return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0;

   return 0;
}

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
static bool
manual_depth_range(struct d3d12_context *ctx)
{
   if (!d3d12_need_zero_one_depth_range(ctx))
      return false;

   /**
    * If we can't use the D3D12 zero-one depth-range, we might have to apply
    * depth-range ourselves.
    *
    * Because we only need to override the depth-range to zero-one range in
    * the case where we write frag-depth, we only need to apply manual
    * depth-range to gl_FragCoord.z.
    *
    * No extra care is needed to be taken in the case where gl_FragDepth is
    * written conditionally, because the GLSL 4.60 spec states:
    *
    *    If a shader statically assigns a value to gl_FragDepth, and there
    *    is an execution path through the shader that does not set
    *    gl_FragDepth, then the value of the fragment’s depth may be
    *    undefined for executions of the shader that take that path. That
    *    is, if the set of linked fragment shaders statically contain a
    *    write to gl_FragDepth, then it is responsible for always writing
    *    it.
    */

   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
   return fs && fs->initial->info.inputs_read & VARYING_BIT_POS;
}

311
312
313
314
315
316
static bool
needs_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
{
   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
   struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];

317
   if (gs != NULL && !gs->is_gs_variant) {
318
      /* There is an user GS; Check if it outputs points with PSIZE */
319
320
      return (gs->initial->info.gs.output_primitive == GL_POINTS &&
              gs->initial->info.outputs_written & VARYING_BIT_PSIZ);
321
322
323
   } else {
      /* No user GS; check if we are drawing wide points */
      return (dinfo->mode == PIPE_PRIM_POINTS &&
324
              (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 ||
325
326
               (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex &&
                vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) &&
327
              (vs->initial->info.outputs_written & VARYING_BIT_POS));
328
329
330
   }
}

331
332
333
334
335
336
337
338
static bool
needs_edge_flag_fix(enum pipe_prim_type mode)
{
   return (mode == PIPE_PRIM_QUADS ||
           mode == PIPE_PRIM_QUAD_STRIP ||
           mode == PIPE_PRIM_POLYGON);
}

339
340
341
342
343
344
345
346
347
348
349
350
static unsigned
fill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo)
{
   struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];

   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
       ctx->gfx_pipeline_state.rast == NULL ||
       (dinfo->mode != PIPE_PRIM_TRIANGLES &&
        dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP))
      return PIPE_POLYGON_MODE_FILL;

351
352
353
354
355
   /* D3D12 supports line mode (wireframe) but doesn't support edge flags */
   if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE &&
         ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) ||
        (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE &&
         ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) &&
356
357
       (vs->initial->info.outputs_written & VARYING_BIT_EDGE ||
        needs_edge_flag_fix(ctx->initial_api_prim)))
358
359
      return PIPE_POLYGON_MODE_LINE;

360
361
362
363
364
365
   if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT)
      return PIPE_POLYGON_MODE_POINT;

   return PIPE_POLYGON_MODE_FILL;
}

366
367
368
369
370
371
372
373
374
375
376
377
static unsigned
cull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode)
{
   if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL &&
        !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant) ||
       ctx->gfx_pipeline_state.rast == NULL ||
       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE)
      return PIPE_FACE_NONE;

   return ctx->gfx_pipeline_state.rast->base.cull_face;
}

378
static unsigned
379
get_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate)
380
{
381
382
383
   struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX];
   struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
   struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_gs_variant ? gs : vs;
384
385
386
387
388
389
390

   /* Make sure GL prims match Gallium prims */
   STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS);
   STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES);
   STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP);

   enum pipe_prim_type mode;
391
   switch (last_vertex_stage->stage) {
392
   case PIPE_SHADER_GEOMETRY:
393
      mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive;
394
395
      break;
   case PIPE_SHADER_VERTEX:
396
      mode = sel_ctx->dinfo ? sel_ctx->dinfo->mode : PIPE_PRIM_TRIANGLES;
397
398
399
400
401
      break;
   default:
      unreachable("Tesselation shaders are not supported");
   }

402
403
   bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast &&
                          sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first;
404
405
   *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) &&
                (!gs || gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min);
406
   return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1;
407
408
}

409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
static bool
has_flat_varyings(struct d3d12_context *ctx)
{
   struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];

   if (!fs || !fs->current)
      return false;

   nir_foreach_variable(input, &fs->current->nir->inputs) {
      if (input->data.interpolation == INTERP_MODE_FLAT)
         return true;
   }

   return false;
}

static bool
needs_vertex_reordering(struct d3d12_selection_context *sel_ctx)
{
   struct d3d12_context *ctx = sel_ctx->ctx;
   bool flat = has_flat_varyings(ctx);
   bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0;

   if (fill_mode_lowered(ctx, sel_ctx->dinfo) != PIPE_POLYGON_MODE_FILL)
      return false;

435
436
437
438
439
440
441
442
443
444
445
446
447
   /* TODO add support for line primitives */

   /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex.
      If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */
   if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex ||
                                                  sel_ctx->alternate_tri))
      return true;

   /* When transform feedback is enabled and the output is alternating (triangle strip or triangle
      strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This
      only works when there is no flat shading involved. In that scenario, we don't care about
      the provoking vertex. */
   if (xfb && !flat && sel_ctx->alternate_tri) {
448
      sel_ctx->provoking_vertex = 0;
449
450
      return true;
   }
451

452
   return false;
453
454
}

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
static nir_variable *
create_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info,
                         unsigned slot, nir_variable_mode mode)
{
   nir_variable *var;
   char tmp[100];

   snprintf(tmp, ARRAY_SIZE(tmp),
            mode == nir_var_shader_in ? "in_%d" : "out_%d",
            info->vars[slot].driver_location);
   var = nir_variable_create(nir, mode, info->vars[slot].type, tmp);
   var->data.location = slot;
   var->data.driver_location = info->vars[slot].driver_location;
   var->data.interpolation = info->vars[slot].interpolation;

   return var;
}

473
static void
474
fill_varyings(struct d3d12_varying_info *info, struct exec_list *vars, uint64_t mask)
475
{
476
   nir_foreach_variable(var, vars) {
477
478
479
      unsigned slot = var->data.location;
      uint64_t slot_bit = BITFIELD64_BIT(slot);

480
      if (!(mask & slot_bit))
481
         continue;
482
483
484
485
      info->vars[slot].driver_location = var->data.driver_location;
      info->vars[slot].type = var->type;
      info->vars[slot].interpolation = var->data.interpolation;
      info->mask |= slot_bit;
486
487
488
   }
}

489
490
491
492
493
494
495
496
497
498
499
500
static void
fill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs)
{
   if (!fs || !fs->current)
      return;

   nir_foreach_variable(input, &fs->current->nir->inputs) {
      if (input->data.interpolation == INTERP_MODE_FLAT)
         key->flat_varyings |= BITFIELD64_BIT(input->data.location);
   }
}

501
502
static void
validate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx)
503
{
504
   struct d3d12_context *ctx = sel_ctx->ctx;
505
   d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX];
506
   d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
507
508
   struct d3d12_gs_variant_key key = {0};
   bool variant_needed = false;
509

510
511
   d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];

512
513
514
   /* Nothing to do if there is a user geometry shader bound */
   if (gs != NULL && !gs->is_gs_variant)
      return;
515

516
   /* Fill the geometry shader variant key */
517
   if (sel_ctx->needs_point_sprite_lowering) {
518
519
      key.passthrough = true;
      variant_needed = true;
520
521
522
523
524
   } else if (sel_ctx->needs_vertex_reordering) {
      /* TODO support cases where flat shading (pv != 0) and xfb are enabled */
      key.provoking_vertex = sel_ctx->provoking_vertex;
      key.alternate_tri = sel_ctx->alternate_tri;
      variant_needed = true;
525
526
   } else if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) {
      key.fill_mode = sel_ctx->fill_mode_lowered;
527
      key.cull_mode = sel_ctx->cull_mode_lowered;
528
529
      key.has_front_face = (fs->initial->info.inputs_read & VARYING_BIT_FACE) ? 1 : 0;
      if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face)
530
         key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0);
531
      key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim);
532
      fill_flat_varyings(&key, fs);
533
534
      if (key.flat_varyings != 0)
         key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first;
535
      variant_needed = true;
536
   }
537

538
539
540
   if (variant_needed)
      fill_varyings(&key.varyings, &vs->initial->outputs, vs->initial->info.outputs_written);

541
542
543
   /* Check if the currently bound geometry shader variant is correct */
   if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0)
      return;
544

545
546
547
   /* Find/create the proper variant and bind it */
   gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL;
   ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs;
548
549
550
}

static bool
551
552
d3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have)
{
553
   assert(expect->stage == have->stage);
554
555
556
557
558
   assert(expect);
   assert(have);

   /* Because we only add varyings we check that a shader has at least the expected in-
    * and outputs. */
559
560
561
562
   if (memcmp(&expect->required_varying_inputs, &have->required_varying_inputs,
              sizeof(struct d3d12_varying_info)) ||
       memcmp(&expect->required_varying_outputs, &have->required_varying_outputs,
              sizeof(struct d3d12_varying_info)) ||
563
564
       (expect->next_varying_inputs != have->next_varying_inputs) ||
       (expect->prev_varying_outputs != have->prev_varying_outputs))
565
566
      return false;

567
568
569
570
571
   if (expect->stage == PIPE_SHADER_GEOMETRY) {
      if (expect->gs.writes_psize) {
         if (!have->gs.writes_psize ||
             expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out ||
             expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable ||
572
573
             expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left ||
             expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex)
574
575
576
577
            return false;
      } else if (have->gs.writes_psize) {
         return false;
      }
578
579
      if (expect->gs.primitive_id != have->gs.primitive_id ||
          expect->gs.triangle_strip != have->gs.triangle_strip)
580
         return false;
581
582
583
   } else if (expect->stage == PIPE_SHADER_FRAGMENT) {
      if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering)
         return false;
584
585
      if (expect->fs.manual_depth_range != have->fs.manual_depth_range)
         return false;
586
587
      if (expect->fs.polygon_stipple != have->fs.polygon_stipple)
         return false;
588
589
   }

590
   if (expect->samples_int_textures != have->samples_int_textures)
591
592
593
594
595
596
597
      return false;

   if (expect->n_texture_states != have->n_texture_states)
      return false;

   if (memcmp(expect->tex_wrap_states, have->tex_wrap_states,
              expect->n_texture_states * sizeof(dxil_wrap_sampler_state)))
598
599
      return false;

600
601
   if (memcmp(expect->swizzle_state, have->swizzle_state,
              expect->n_texture_states * sizeof(dxil_texture_swizzle_state)))
602
603
      return false;

604
605
   if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs,
              expect->n_texture_states * sizeof(enum compare_func)))
606
607
      return false;

608
609
610
   if (expect->invert_depth != have->invert_depth)
      return false;

611
612
613
614
615
616
617
618
619
620
621
   if (expect->stage == PIPE_SHADER_VERTEX) {
      if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation)
         return false;

      if (expect->vs.needs_format_emulation) {
         if (memcmp(expect->vs.format_conversion, have->vs.format_conversion,
                    PIPE_MAX_ATTRIBS * sizeof (enum pipe_format)))
            return false;
      }
   }

622
   if (expect->fs.provoking_vertex != have->fs.provoking_vertex)
623
624
      return false;

625
   return true;
626
627
}

628
629
static void
d3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx,
630
                      d3d12_shader_key *key, d3d12_shader_selector *sel,
631
                      d3d12_shader_selector *prev, d3d12_shader_selector *next)
632
{
633
634
   pipe_shader_type stage = sel->stage;

635
   uint64_t system_generated_in_values =
636
637
638
         VARYING_BIT_FACE |
         VARYING_BIT_PNTC |
         VARYING_BIT_PRIMITIVE_ID;
639

640
   uint64_t system_out_values =
641
642
         VARYING_BIT_CLIP_DIST0 |
         VARYING_BIT_CLIP_DIST1;
643
644

   memset(key, 0, sizeof(d3d12_shader_key));
645
   key->stage = stage;
646

647
   if (prev) {
648
649
      /* We require as inputs what the previous stage has written,
       * except certain system values */
650
      if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY)
651
         system_out_values |= VARYING_BIT_POS;
652
653
      if (stage == PIPE_SHADER_FRAGMENT)
         system_out_values |= VARYING_BIT_PSIZ;
654
655
      uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values;
      fill_varyings(&key->required_varying_inputs, &prev->current->nir->outputs, mask);
656
      key->prev_varying_outputs = prev->current->nir->info.outputs_written;
657
658
659
660

      /* Set the provoking vertex based on the previous shader output. Only set the
       * key value if the driver actually supports changing the provoking vertex though */
      if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast &&
661
          !sel_ctx->needs_vertex_reordering &&
662
          d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex)
663
         key->fs.provoking_vertex = sel_ctx->provoking_vertex;
664
   }
665
666
667

   /* We require as outputs what the next stage reads,
    * except certain system values */
668
   if (next) {
669
      if (!next->is_gs_variant) {
670
         if (stage == PIPE_SHADER_VERTEX)
671
            system_generated_in_values |= VARYING_BIT_POS;
672
673
         uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values;
         fill_varyings(&key->required_varying_outputs, &next->current->nir->inputs, mask);
674
      }
675
      key->next_varying_inputs = next->current->nir->info.inputs_read;
676
677
   }

678
   if (stage == PIPE_SHADER_GEOMETRY ||
679
       (stage == PIPE_SHADER_VERTEX && (!next || next->stage != PIPE_SHADER_GEOMETRY))) {
680
      key->last_vertex_processing_stage = 1;
681
      key->invert_depth = sel_ctx->ctx->reverse_depth_range;
682
683
      if (sel_ctx->ctx->pstipple.enabled)
         key->next_varying_inputs |= VARYING_BIT_POS;
684
   }
685

686
   if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) {
687
      struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base;
688
689
      if (sel_ctx->needs_point_sprite_lowering) {
         key->gs.writes_psize = 1;
690
         key->gs.point_size_per_vertex = rast->point_size_per_vertex;
691
692
693
694
695
696
697
698
         key->gs.sprite_coord_enable = rast->sprite_coord_enable;
         key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT);
         if (sel_ctx->ctx->flip_y < 0)
            key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left;
         key->gs.aa_point = rast->point_smooth;
         key->gs.stream_output_factor = 6;
      } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) {
         key->gs.stream_output_factor = 2;
699
700
      } else if (sel_ctx->needs_vertex_reordering && !sel->is_gs_variant) {
         key->gs.triangle_strip = 1;
701
      }
702
703
704

      if (sel->is_gs_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID)
         key->gs.primitive_id = 1;
705
706
   } else if (stage == PIPE_SHADER_FRAGMENT) {
      key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs;
707
      key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering;
708
      key->fs.manual_depth_range = sel_ctx->manual_depth_range;
709
      key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled;
710
   }
711

712
713
   if (sel->samples_int_textures) {
      key->samples_int_textures = sel->samples_int_textures;
714
      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
715
      /* Copy only states with integer textures */
716
717
      for(int i = 0; i < key->n_texture_states; ++i) {
         auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i];
718
         if (wrap_state.is_int_sampler) {
719
            memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state));
720
            key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i];
721
722
723
         }
      }
   }
724

725
   if (sel->compare_with_lod_bias_grad) {
726
727
728
729
730
      key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage];
      memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage],
             key->n_texture_states * sizeof(enum compare_func));
      memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage],
             key->n_texture_states * sizeof(dxil_texture_swizzle_state));
731
   }
732

733
734
735
736
737
738
739
   if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) {
      key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation;
      if (key->vs.needs_format_emulation) {
         memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion,
                sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format));
      }
   }
740
741
742
743
744
745
746

   if (stage == PIPE_SHADER_FRAGMENT &&
       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] &&
       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_gs_variant &&
       sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) {
      key->fs.remap_front_facing = 1;
   }
747
748
749
}

static void
750
751
select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel,
                     d3d12_shader_selector *prev, d3d12_shader_selector *next)
752
{
753
   struct d3d12_context *ctx = sel_ctx->ctx;
754
   d3d12_shader_key key;
755
   nir_shader *new_nir_variant;
756
   unsigned pstipple_binding = UINT32_MAX;
757

758
   d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next);
759

760
   /* Check for an existing variant */
761
762
763
764
765
766
767
768
769
   for (d3d12_shader *variant = sel->first; variant;
        variant = variant->next_variant) {

      if (d3d12_compare_shader_keys(&key, &variant->key)) {
         sel->current = variant;
         return;
      }
   }

770
   /* Clone the NIR shader */
Gert Wollny's avatar
Gert Wollny committed
771
   new_nir_variant = nir_shader_clone(sel, sel->initial);
772
773
774
775
776

   /* Apply any needed lowering passes */
   if (key.gs.writes_psize) {
      NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite,
                 !key.gs.sprite_origin_upper_left,
777
                 key.gs.point_size_per_vertex,
778
                 key.gs.sprite_coord_enable,
779
                 key.next_varying_inputs);
780
781

      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
782
783
784
785
786
787
788
      nir_shader_gather_info(new_nir_variant, impl);
   }

   if (key.gs.primitive_id) {
      NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id);

      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
789
      nir_shader_gather_info(new_nir_variant, impl);
790
791
   }

792
793
794
   if (key.gs.triangle_strip)
      NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip);

795
796
797
798
799
800
801
802
   if (key.fs.polygon_stipple) {
      NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs,
                 &pstipple_binding, 0, false);

      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
      nir_shader_gather_info(new_nir_variant, impl);
   }

803
804
805
806
807
808
809
810
811
812
   if (key.fs.remap_front_facing) {
      nir_foreach_variable(input, &new_nir_variant->inputs) {
         if (input->data.location == VARYING_SLOT_FACE)
            input->data.location = VARYING_SLOT_VAR12;
      }

      nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant);
      nir_shader_gather_info(new_nir_variant, impl);
   }

813
814
815
816
   if (key.fs.missing_dual_src_outputs) {
      NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target,
                 key.fs.missing_dual_src_outputs);
   } else if (key.fs.frag_result_color_lowering) {
817
818
      NIR_PASS_V(new_nir_variant, d3d12_lower_frag_result,
                 key.fs.frag_result_color_lowering);
819
   }
820

821
822
823
   if (key.fs.manual_depth_range)
      NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range);

824
   if (sel->compare_with_lod_bias_grad)
825
826
      NIR_PASS_V(new_nir_variant, d3d12_lower_sample_tex_compare, key.n_texture_states,
                 key.sampler_compare_funcs, key.swizzle_state);
827

828
   /* Add the needed in and outputs, and re-sort */
829
   uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read;
830

831
   if (prev) {
832
833
834
      while (mask) {
         int slot = u_bit_scan64(&mask);
         create_varying_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in);
835
      }
836
      d3d12_reassign_driver_locations(&new_nir_variant->inputs, key.prev_varying_outputs);
837
838
   }

839
   mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written;
840

841
   if (next) {
842
843
844
      while (mask) {
         int slot = u_bit_scan64(&mask);
         create_varying_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out);
845
      }
846
      d3d12_reassign_driver_locations(&new_nir_variant->outputs, key.next_varying_inputs);
847
848
   }

849
   d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant);
850
851
   assert(new_variant);

852
853
854
   /* keep track of polygon stipple texture binding */
   new_variant->pstipple_binding = pstipple_binding;

855
856
857
858
859
   /* prepend the new shader in the selector chain and pick it */
   new_variant->next_variant = sel->first;
   sel->current = sel->first = new_variant;
}

860
static d3d12_shader_selector *
861
862
863
864
865
866
867
868
869
get_prev_shader(struct d3d12_context *ctx, pipe_shader_type current)
{
   /* No TESS_CTRL or TESS_EVAL yet */

   switch (current) {
   case PIPE_SHADER_VERTEX:
      return NULL;
   case PIPE_SHADER_FRAGMENT:
      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
870
         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
871
872
      /* fallthrough */
   case PIPE_SHADER_GEOMETRY:
873
      return ctx->gfx_stages[PIPE_SHADER_VERTEX];
874
875
876
877
878
   default:
      unreachable("shader type not supported");
   }
}

879
static d3d12_shader_selector *
880
881
882
883
884
885
886
get_next_shader(struct d3d12_context *ctx, pipe_shader_type current)
{
   /* No TESS_CTRL or TESS_EVAL yet */

   switch (current) {
   case PIPE_SHADER_VERTEX:
      if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY])
887
         return ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
888
889
      /* fallthrough */
   case PIPE_SHADER_GEOMETRY:
890
      return ctx->gfx_stages[PIPE_SHADER_FRAGMENT];
891
892
893
894
895
896
897
   case PIPE_SHADER_FRAGMENT:
      return NULL;
   default:
      unreachable("shader type not supported");
   }
}

898
899
enum tex_scan_flags {
   TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0,
900
901
   TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1,
   TEX_SCAN_ALL_FLAGS         = (1 << 2) - 1
902
903
904
};

static unsigned
905
906
scan_texture_use(nir_shader *nir)
{
907
   unsigned result = 0;
908
909
910
911
912
913
914
915
916
   nir_foreach_function(func, nir) {
      nir_foreach_block(block, func->impl) {
         nir_foreach_instr(instr, block) {
            if (instr->type == nir_instr_type_tex) {
               auto tex = nir_instr_as_tex(instr);
               switch (tex->op) {
               case nir_texop_txb:
               case nir_texop_txl:
               case nir_texop_txd:
917
918
919
920
                  if (tex->is_shadow)
                     result |= TEX_CMP_WITH_LOD_BIAS_GRAD;
                  /* fallthrough */
               case nir_texop_tex:
921
                  if (tex->dest_type & (nir_type_int | nir_type_uint))
922
                     result |= TEX_SAMPLE_INTEGER_TEXTURE;
923
924
925
926
               default:
                  ;
               }
            }
927
928
            if (TEX_SCAN_ALL_FLAGS == result)
               return result;
929
930
931
         }
      }
   }
932
   return result;
933
934
}

935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
static uint64_t
update_so_info(struct pipe_stream_output_info *so_info,
               uint64_t outputs_written)
{
   uint64_t so_outputs = 0;
   uint8_t reverse_map[64] = {0};
   unsigned slot = 0;

   while (outputs_written)
      reverse_map[slot++] = u_bit_scan64(&outputs_written);

   for (unsigned i = 0; i < so_info->num_outputs; i++) {
      struct pipe_stream_output *output = &so_info->output[i];

      /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
      output->register_index = reverse_map[output->register_index];

      so_outputs |= 1ull << output->register_index;
   }

   return so_outputs;
}

958
struct d3d12_shader_selector *
959
960
961
d3d12_create_shader(struct d3d12_context *ctx,
                    pipe_shader_type stage,
                    const struct pipe_shader_state *shader)
962
963
964
965
966
967
968
969
970
971
972
973
974
{
   struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector);
   sel->stage = stage;

   struct nir_shader *nir = NULL;

   if (shader->type == PIPE_SHADER_IR_NIR) {
      nir = (nir_shader *)shader->ir.nir;
   } else {
      assert(shader->type == PIPE_SHADER_IR_TGSI);
      nir = tgsi_to_nir(shader->tokens, ctx->base.screen);
   }

975
976
   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

977
978
   unsigned tex_scan_result = scan_texture_use(nir);
   sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0;
979
   sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0;
980

981
   memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info));
982
   update_so_info(&sel->so_info, nir->info.outputs_written);
983

984
   assert(nir != NULL);
985
986
   d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage);
   d3d12_shader_selector *next = get_next_shader(ctx, sel->stage);
987

988
989
990
   uint64_t in_mask = nir->info.stage == MESA_SHADER_VERTEX ?
                         0 : VARYING_BIT_PRIMITIVE_ID;

991
   uint64_t out_mask = nir->info.stage == MESA_SHADER_FRAGMENT ?
992
993
                          (1ull << FRAG_RESULT_STENCIL) :
                          VARYING_BIT_PRIMITIVE_ID;
994

995
   d3d12_fix_io_uint_type(nir, in_mask, out_mask);
996

997
   if (nir->info.stage != MESA_SHADER_VERTEX)
998
999
1000
      nir->info.inputs_read =
            d3d12_reassign_driver_locations(&nir->inputs,
                                            prev ? prev->current->nir->info.outputs_written : 0);
1001
1002
1003
   else
      nir->info.inputs_read = d3d12_sort_by_driver_location(&nir->inputs);

1004
   if (nir->info.stage != MESA_SHADER_FRAGMENT) {
1005
1006
1007
      nir->info.outputs_written =
            d3d12_reassign_driver_locations(&nir->outputs,
                                            next ? next->current->nir->info.inputs_read : 0);
1008
   } else {
1009
      NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
1010
      d3d12_sort_ps_outputs(&nir->outputs);
1011
   }
1012

1013
1014
1015
1016
1017
   /* Integer cube maps are not supported in DirectX because sampling is not supported
    * on integer textures and TextureLoad is not supported for cube maps, so we have to
    * lower integer cube maps to be handled like 2D textures arrays*/
   NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array);

1018
1019
1020
   /* Keep this initial shader as the blue print for possible variants */
   sel->initial = nir;

1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
   /*
    * We must compile some shader here, because if the previous or a next shaders exists later
    * when the shaders are bound, then the key evaluation in the shader selector will access
    * the current variant of these  prev and next shader, and we can only assign
    * a current variant when it has been successfully compiled.
    *
    * For shaders that require lowering because certain instructions are not available
    * and their emulation is state depended (like sampling an integer texture that must be
    * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD),
    * we must go through the shader selector here to create a compilable variant.
    * For shaders that are not depended on the state this is just compiling the original
    * shader.
1033
1034
1035
1036
    *
    * TODO: get rid of having to compiling the shader here if it can be forseen that it will
    * be thrown away (i.e. it depends on states that are likely to change before the shader is
    * used for the first time)