radv_cmd_buffer.c 344 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Copyright © 2016 Red Hat.
 * Copyright © 2016 Bas Nieuwenhuizen
 *
 * based in part on anv driver which is:
 * Copyright © 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
28
29
30
#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_meta.h"
31
32
#include "radv_private.h"
#include "radv_radeon_winsys.h"
33
#include "radv_shader.h"
34
35
#include "sid.h"
#include "vk_format.h"
36
#include "vk_util.h"
37
#include "vk_enum_defines.h"
38

39
40
#include "ac_debug.h"

41
42
#include "util/fast_idiv_by_const.h"

43
enum {
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
44
45
46
47
48
49
50
51
   RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
   RADV_PREFETCH_VS = (1 << 1),
   RADV_PREFETCH_TCS = (1 << 2),
   RADV_PREFETCH_TES = (1 << 3),
   RADV_PREFETCH_GS = (1 << 4),
   RADV_PREFETCH_PS = (1 << 5),
   RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES |
                            RADV_PREFETCH_GS | RADV_PREFETCH_PS)
52
53
};

54
55
56
57
58
59
enum {
   RADV_RT_STAGE_BITS = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
                         VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
                         VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR)
};

60
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
61
62
63
64
65
                                         struct radv_image *image, VkImageLayout src_layout,
                                         bool src_render_loop, VkImageLayout dst_layout,
                                         bool dst_render_loop, uint32_t src_family,
                                         uint32_t dst_family, const VkImageSubresourceRange *range,
                                         struct radv_sample_locations_state *sample_locs);
66

67
68
static void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size);

69
const struct radv_dynamic_state default_dynamic_state = {
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
   .viewport =
      {
         .count = 0,
      },
   .scissor =
      {
         .count = 0,
      },
   .line_width = 1.0f,
   .depth_bias =
      {
         .bias = 0.0f,
         .clamp = 0.0f,
         .slope = 0.0f,
      },
   .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f},
   .depth_bounds =
      {
         .min = 0.0f,
         .max = 1.0f,
      },
   .stencil_compare_mask =
      {
         .front = ~0u,
         .back = ~0u,
      },
   .stencil_write_mask =
      {
         .front = ~0u,
         .back = ~0u,
      },
   .stencil_reference =
      {
         .front = 0u,
         .back = 0u,
      },
   .line_stipple =
      {
         .factor = 0u,
         .pattern = 0u,
      },
   .cull_mode = 0u,
   .front_face = 0u,
   .primitive_topology = 0u,
   .fragment_shading_rate =
      {
         .size = {1u, 1u},
         .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
                          VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR},
      },
120
   .depth_bias_enable = 0u,
121
   .primitive_restart_enable = 0u,
122
   .rasterizer_discard_enable = 0u,
123
   .logic_op = 0u,
124
   .color_write_enable = 0xffffffffu,
125
126
};

127
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src)
{
   struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
   uint64_t copy_mask = src->mask;
   uint64_t dest_mask = 0;

   dest->discard_rectangle.count = src->discard_rectangle.count;
   dest->sample_location.count = src->sample_location.count;

   if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
      if (dest->viewport.count != src->viewport.count) {
         dest->viewport.count = src->viewport.count;
         dest_mask |= RADV_DYNAMIC_VIEWPORT;
      }

      if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
                 src->viewport.count * sizeof(VkViewport))) {
         typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count);
146
         typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
         dest_mask |= RADV_DYNAMIC_VIEWPORT;
      }
   }

   if (copy_mask & RADV_DYNAMIC_SCISSOR) {
      if (dest->scissor.count != src->scissor.count) {
         dest->scissor.count = src->scissor.count;
         dest_mask |= RADV_DYNAMIC_SCISSOR;
      }

      if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
                 src->scissor.count * sizeof(VkRect2D))) {
         typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count);
         dest_mask |= RADV_DYNAMIC_SCISSOR;
      }
   }

   if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
      if (dest->line_width != src->line_width) {
         dest->line_width = src->line_width;
         dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
      if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) {
         dest->depth_bias = src->depth_bias;
         dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
      }
   }

   if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
      if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) {
         typed_memcpy(dest->blend_constants, src->blend_constants, 4);
         dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
      if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) {
         dest->depth_bounds = src->depth_bounds;
         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
      }
   }

   if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
      if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
                 sizeof(src->stencil_compare_mask))) {
         dest->stencil_compare_mask = src->stencil_compare_mask;
         dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
      }
   }

   if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
      if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
                 sizeof(src->stencil_write_mask))) {
         dest->stencil_write_mask = src->stencil_write_mask;
         dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
      }
   }

   if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
      if (memcmp(&dest->stencil_reference, &src->stencil_reference,
                 sizeof(src->stencil_reference))) {
         dest->stencil_reference = src->stencil_reference;
         dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
      if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
                 src->discard_rectangle.count * sizeof(VkRect2D))) {
         typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles,
                      src->discard_rectangle.count);
         dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
      if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
          dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
          dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
          memcmp(&dest->sample_location.locations, &src->sample_location.locations,
                 src->sample_location.count * sizeof(VkSampleLocationEXT))) {
         dest->sample_location.per_pixel = src->sample_location.per_pixel;
         dest->sample_location.grid_size = src->sample_location.grid_size;
         typed_memcpy(dest->sample_location.locations, src->sample_location.locations,
                      src->sample_location.count);
         dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
      }
   }

   if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
      if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) {
         dest->line_stipple = src->line_stipple;
         dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
      if (dest->cull_mode != src->cull_mode) {
         dest->cull_mode = src->cull_mode;
         dest_mask |= RADV_DYNAMIC_CULL_MODE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
      if (dest->front_face != src->front_face) {
         dest->front_face = src->front_face;
         dest_mask |= RADV_DYNAMIC_FRONT_FACE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
      if (dest->primitive_topology != src->primitive_topology) {
         dest->primitive_topology = src->primitive_topology;
         dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
      if (dest->depth_test_enable != src->depth_test_enable) {
         dest->depth_test_enable = src->depth_test_enable;
         dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
      if (dest->depth_write_enable != src->depth_write_enable) {
         dest->depth_write_enable = src->depth_write_enable;
         dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
      if (dest->depth_compare_op != src->depth_compare_op) {
         dest->depth_compare_op = src->depth_compare_op;
         dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
      }
   }

   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
      if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
         dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
      if (dest->stencil_test_enable != src->stencil_test_enable) {
         dest->stencil_test_enable = src->stencil_test_enable;
         dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
      }
   }

   if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
      if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) {
         dest->stencil_op = src->stencil_op;
         dest_mask |= RADV_DYNAMIC_STENCIL_OP;
      }
   }

   if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
      if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate,
                 sizeof(src->fragment_shading_rate))) {
         dest->fragment_shading_rate = src->fragment_shading_rate;
         dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
      }
   }

317
318
319
320
321
322
323
   if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
      if (dest->depth_bias_enable != src->depth_bias_enable) {
         dest->depth_bias_enable = src->depth_bias_enable;
         dest_mask |= RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
      }
   }

324
325
326
327
328
329
330
   if (copy_mask & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
      if (dest->primitive_restart_enable != src->primitive_restart_enable) {
         dest->primitive_restart_enable = src->primitive_restart_enable;
         dest_mask |= RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
      }
   }

331
332
333
334
335
336
337
   if (copy_mask & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
      if (dest->rasterizer_discard_enable != src->rasterizer_discard_enable) {
         dest->rasterizer_discard_enable = src->rasterizer_discard_enable;
         dest_mask |= RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
      }
   }

338
339
340
341
342
343
344
   if (copy_mask & RADV_DYNAMIC_LOGIC_OP) {
      if (dest->logic_op != src->logic_op) {
         dest->logic_op = src->logic_op;
         dest_mask |= RADV_DYNAMIC_LOGIC_OP;
      }
   }

345
346
347
348
349
350
351
   if (copy_mask & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
      if (dest->color_write_enable != src->color_write_enable) {
         dest->color_write_enable = src->color_write_enable;
         dest_mask |= RADV_DYNAMIC_COLOR_WRITE_ENABLE;
      }
   }

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
352
   cmd_buffer->state.dirty |= dest_mask;
353
354
}

355
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
356
radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
357
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
358
359
   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
   struct radv_shader_info *info;
360

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
361
362
   if (!pipeline->streamout_shader || cmd_buffer->device->physical_device->use_ngg_streamout)
      return;
363

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
364
365
366
   info = &pipeline->streamout_shader->info;
   for (int i = 0; i < MAX_SO_BUFFERS; i++)
      so->stride_in_dw[i] = info->so.strides[i];
367

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
368
   so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
369
370
}

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
371
372
bool
radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
373
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
374
375
   return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
          cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
376
377
}

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
378
379
380
381
382
383
384
385
386
387
388
389
390
enum ring_type
radv_queue_family_to_ring(int f)
{
   switch (f) {
   case RADV_QUEUE_GENERAL:
      return RING_GFX;
   case RADV_QUEUE_COMPUTE:
      return RING_COMPUTE;
   case RADV_QUEUE_TRANSFER:
      return RING_DMA;
   default:
      unreachable("Unknown queue family");
   }
391
392
}

393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
static void
radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
                            unsigned count, const uint32_t *data)
{
   struct radeon_cmdbuf *cs = cmd_buffer->cs;

   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);

   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
   radeon_emit(cs, va);
   radeon_emit(cs, va >> 32);
   radeon_emit_array(cs, data, count);
}

static void
radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
                     unsigned size)
{
   uint32_t *zeroes = alloca(size);
   memset(zeroes, 0, size);
   radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes);
}

417
418
419
static void
radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
420
   list_del(&cmd_buffer->pool_link);
421

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
422
423
424
425
426
427
   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
   {
      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
      list_del(&up->list);
      free(up);
   }
428

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
429
430
   if (cmd_buffer->upload.upload_bo)
      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
431

432
433
434
435
436
437
   if (cmd_buffer->state.own_render_pass) {
      radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device),
                             radv_render_pass_to_handle(cmd_buffer->state.pass), NULL);
      cmd_buffer->state.own_render_pass = false;
   }

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
438
439
   if (cmd_buffer->cs)
      cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
440

441
   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
442
      free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
443
444
      vk_object_base_finish(&cmd_buffer->descriptors[i].push_set.set.base);
   }
445

446
447
   vk_object_base_finish(&cmd_buffer->meta_push_descriptors.base);

448
   vk_command_buffer_finish(&cmd_buffer->vk);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
449
   vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
450
451
}

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
452
453
454
static VkResult
radv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool,
                       VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer)
455
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
456
457
458
459
   struct radv_cmd_buffer *cmd_buffer;
   unsigned ring;
   cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (cmd_buffer == NULL)
460
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
461

462
463
464
465
466
467
   VkResult result =
      vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
   if (result != VK_SUCCESS) {
      vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
      return result;
   }
468

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
469
470
471
   cmd_buffer->device = device;
   cmd_buffer->pool = pool;
   cmd_buffer->level = level;
472

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
473
474
   list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
   cmd_buffer->queue_family_index = pool->queue_family_index;
475

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
476
   ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
477

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
478
479
480
   cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
   if (!cmd_buffer->cs) {
      radv_destroy_cmd_buffer(cmd_buffer);
481
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
482
   }
483

484
485
486
   vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base,
                       VK_OBJECT_TYPE_DESCRIPTOR_SET);

487
488
489
490
   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
      vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base,
                          VK_OBJECT_TYPE_DESCRIPTOR_SET);

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
491
   *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
492

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
493
   list_inithead(&cmd_buffer->upload.list);
494

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
495
   return VK_SUCCESS;
496
497
}

498
499
static VkResult
radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
500
{
501
502
   vk_command_buffer_reset(&cmd_buffer->vk);

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
503
504
505
506
507
508
509
510
511
   cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);

   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
   {
      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
      list_del(&up->list);
      free(up);
   }

512
513
514
515
516
517
   if (cmd_buffer->state.own_render_pass) {
      radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device),
                             radv_render_pass_to_handle(cmd_buffer->state.pass), NULL);
      cmd_buffer->state.own_render_pass = false;
   }

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
   cmd_buffer->push_constant_stages = 0;
   cmd_buffer->scratch_size_per_wave_needed = 0;
   cmd_buffer->scratch_waves_wanted = 0;
   cmd_buffer->compute_scratch_size_per_wave_needed = 0;
   cmd_buffer->compute_scratch_waves_wanted = 0;
   cmd_buffer->esgs_ring_size_needed = 0;
   cmd_buffer->gsvs_ring_size_needed = 0;
   cmd_buffer->tess_rings_needed = false;
   cmd_buffer->gds_needed = false;
   cmd_buffer->gds_oa_needed = false;
   cmd_buffer->sample_positions_needed = false;

   if (cmd_buffer->upload.upload_bo)
      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
   cmd_buffer->upload.offset = 0;

   cmd_buffer->record_result = VK_SUCCESS;

   memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));

   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
      cmd_buffer->descriptors[i].dirty = 0;
      cmd_buffer->descriptors[i].valid = 0;
      cmd_buffer->descriptors[i].push_dirty = false;
   }

   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
       cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
      unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
      unsigned fence_offset, eop_bug_offset;
      void *fence_ptr;

      radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr);
      memset(fence_ptr, 0, 8);

      cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
      cmd_buffer->gfx9_fence_va += fence_offset;

556
557
      radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8);

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
558
559
560
561
562
563
      if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
         /* Allocate a buffer for the EOP bug on GFX9. */
         radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
         memset(fence_ptr, 0, 16 * num_db);
         cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
         cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
564
565

         radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
566
567
568
569
570
571
      }
   }

   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;

   return cmd_buffer->record_result;
572
573
}

574
static bool
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
575
576
577
radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed)
{
   uint64_t new_size;
578
   struct radeon_winsys_bo *bo = NULL;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
579
580
581
582
583
584
   struct radv_cmd_buffer_upload *upload;
   struct radv_device *device = cmd_buffer->device;

   new_size = MAX2(min_needed, 16 * 1024);
   new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);

585
586
587
588
   VkResult result =
      device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
                                RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
                                   RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
589
                                RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
590

591
592
   if (result != VK_SUCCESS) {
      cmd_buffer->record_result = result;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
      return false;
   }

   radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
   if (cmd_buffer->upload.upload_bo) {
      upload = malloc(sizeof(*upload));

      if (!upload) {
         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
         device->ws->buffer_destroy(device->ws, bo);
         return false;
      }

      memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
      list_add(&upload->list, &cmd_buffer->upload.list);
   }

   cmd_buffer->upload.upload_bo = bo;
   cmd_buffer->upload.size = new_size;
   cmd_buffer->upload.offset = 0;
   cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);

   if (!cmd_buffer->upload.map) {
      cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
      return false;
   }

   return true;
621
622
623
}

bool
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
624
625
radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
                             unsigned *out_offset, void **ptr)
626
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
627
   assert(size % 4 == 0);
628

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
629
   struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
630

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
631
632
633
634
635
636
637
638
   /* Align to the scalar cache line size if it results in this allocation
    * being placed in less of them.
    */
   unsigned offset = cmd_buffer->upload.offset;
   unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
   unsigned gap = align(offset, line_size) - offset;
   if ((size & (line_size - 1)) > gap)
      offset = align(offset, line_size);
639

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
640
641
642
643
644
   if (offset + size > cmd_buffer->upload.size) {
      if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
         return false;
      offset = 0;
   }
645

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
646
647
   *out_offset = offset;
   *ptr = cmd_buffer->upload.map + offset;
648

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
649
650
   cmd_buffer->upload.offset = offset + size;
   return true;
651
652
653
}

bool
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
654
655
radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
                            unsigned *out_offset)
656
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
657
   uint8_t *ptr;
658

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
659
660
   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
      return false;
661

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
662
663
   if (ptr)
      memcpy(ptr, data, size);
664

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
665
   return true;
666
667
}

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
668
669
void
radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
670
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
671
672
673
   struct radv_device *device = cmd_buffer->device;
   struct radeon_cmdbuf *cs = cmd_buffer->cs;
   uint64_t va;
674

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
675
676
677
   va = radv_buffer_get_va(device->trace_bo);
   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
      va += 4;
678

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
679
   ++cmd_buffer->state.trace_id;
680
   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id);
681

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
682
   radeon_check_space(cmd_buffer->device->ws, cs, 2);
683

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
684
685
   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
   radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
686
687
}

688
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
689
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
690
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
691
692
693
694
   if (unlikely(cmd_buffer->device->thread_trace.bo)) {
      radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
      radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
   }
695

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
696
697
698
   if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
      enum rgp_flush_bits sqtt_flush_bits = 0;
      assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
699

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
700
      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
701

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
702
703
704
705
706
707
708
      /* Force wait for graphics or compute engines to be idle. */
      si_cs_emit_cache_flush(cmd_buffer->cs,
                             cmd_buffer->device->physical_device->rad_info.chip_class,
                             &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
                             radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
                             cmd_buffer->gfx9_eop_bug_va);
   }
709

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
710
711
   if (unlikely(cmd_buffer->device->trace_bo))
      radv_cmd_buffer_trace_emit(cmd_buffer);
712
713
}

714
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
715
radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
716
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
717
718
719
720
   struct radv_device *device = cmd_buffer->device;
   enum ring_type ring;
   uint32_t data[2];
   uint64_t va;
721

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
722
   va = radv_buffer_get_va(device->trace_bo);
723

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
724
   ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
725

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
726
727
728
729
730
731
732
733
734
735
   switch (ring) {
   case RING_GFX:
      va += 8;
      break;
   case RING_COMPUTE:
      va += 16;
      break;
   default:
      assert(!"invalid ring type");
   }
736

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
737
738
739
   uint64_t pipeline_address = (uintptr_t)pipeline;
   data[0] = pipeline_address;
   data[1] = pipeline_address >> 32;
740

741
   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
742
743
}

744
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
745
radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr)
746
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
747
748
749
   struct radv_device *device = cmd_buffer->device;
   uint32_t data[2];
   uint64_t va;
750

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
751
752
   va = radv_buffer_get_va(device->trace_bo);
   va += 24;
753

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
754
755
   data[0] = vb_ptr;
   data[1] = vb_ptr >> 32;
756

757
   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
758
759
}

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
760
761
762
void
radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
                        struct radv_descriptor_set *set, unsigned idx)
763
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
764
765
   struct radv_descriptor_state *descriptors_state =
      radv_get_descriptors_state(cmd_buffer, bind_point);
766

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
767
   descriptors_state->sets[idx] = set;
768

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
769
770
   descriptors_state->valid |= (1u << idx); /* active descriptors */
   descriptors_state->dirty |= (1u << idx);
771
772
}

773
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
774
radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
775
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
776
777
778
779
780
781
   struct radv_descriptor_state *descriptors_state =
      radv_get_descriptors_state(cmd_buffer, bind_point);
   struct radv_device *device = cmd_buffer->device;
   uint32_t data[MAX_SETS * 2] = {0};
   uint64_t va;
   va = radv_buffer_get_va(device->trace_bo) + 32;
782

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
783
784
785
786
787
788
   u_foreach_bit(i, descriptors_state->valid)
   {
      struct radv_descriptor_set *set = descriptors_state->sets[i];
      data[i * 2] = (uint64_t)(uintptr_t)set;
      data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
   }
789

790
   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data);
791
792
}

793
struct radv_userdata_info *
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
794
radv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx)
795
{
796
   struct radv_shader *shader = radv_get_shader(pipeline, stage);
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
797
   return &shader->info.user_sgprs_locs.shader_data[idx];
798
799
800
}

static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
801
802
radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
                           gl_shader_stage stage, int idx, uint64_t va)
803
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
804
805
806
807
   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
   uint32_t base_reg = pipeline->user_data_0[stage];
   if (loc->sgpr_idx == -1)
      return;
808

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
809
   assert(loc->num_sgprs == 1);
810

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
811
812
   radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
                            false);
813
814
}

815
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
816
817
818
radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
                              struct radv_descriptor_state *descriptors_state,
                              gl_shader_stage stage)
819
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
820
821
822
823
824
   struct radv_device *device = cmd_buffer->device;
   struct radeon_cmdbuf *cs = cmd_buffer->cs;
   uint32_t sh_base = pipeline->user_data_0[stage];
   struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs;
   unsigned mask = locs->descriptor_sets_enabled;
825

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
826
   mask &= descriptors_state->dirty & descriptors_state->valid;
827

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
828
829
   while (mask) {
      int start, count;
830

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
831
      u_bit_scan_consecutive_range(&mask, &start, &count);
832

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
833
834
      struct radv_userdata_info *loc = &locs->descriptor_sets[start];
      unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
835

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
836
837
838
      radv_emit_shader_pointer_head(cs, sh_offset, count, true);
      for (int i = 0; i < count; i++) {
         struct radv_descriptor_set *set = descriptors_state->sets[start + i];
839

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
840
841
842
         radv_emit_shader_pointer_body(device, cs, set->header.va, true);
      }
   }
843
844
}

845
846
847
848
849
/**
 * Convert the user sample locations to hardware sample locations (the values
 * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
 */
static void
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
850
851
radv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y,
                              VkOffset2D *sample_locs)
852
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
853
854
855
856
857
   uint32_t x_offset = x % state->grid_size.width;
   uint32_t y_offset = y % state->grid_size.height;
   uint32_t num_samples = (uint32_t)state->per_pixel;
   VkSampleLocationEXT *user_locs;
   uint32_t pixel_offset;
858

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
859
   pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
860

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
861
862
   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
   user_locs = &state->locations[pixel_offset];
863

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
864
865
866
   for (uint32_t i = 0; i < num_samples; i++) {
      float shifted_pos_x = user_locs[i].x - 0.5;
      float shifted_pos_y = user_locs[i].y - 0.5;
867

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
868
869
      int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
      int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
870

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
871
872
873
      sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
      sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
   }
874
875
876
877
878
879
880
881
}

/**
 * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
 * locations.
 */
static void
radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
882
                               uint32_t *sample_locs_pixel)
883
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
884
885
886
887
888
   for (uint32_t i = 0; i < num_samples; i++) {
      uint32_t sample_reg_idx = i / 4;
      uint32_t sample_loc_idx = i % 4;
      int32_t pos_x = sample_locs[i].x;
      int32_t pos_y = sample_locs[i].y;
889

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
890
891
      uint32_t shift_x = 8 * sample_loc_idx;
      uint32_t shift_y = shift_x + 4;
892

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
893
894
895
      sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
      sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
   }
896
897
898
899
900
901
902
}

/**
 * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware
 * sample locations.
 */
static uint64_t
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
903
904
radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs,
                               uint32_t num_samples)
905
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
906
907
908
909
   uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
   uint32_t sample_mask = num_samples - 1;
   uint32_t *distances = alloca(num_samples * sizeof(*distances));
   uint64_t centroid_priority = 0;
910

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
911
912
913
914
   /* Compute the distances from center for each sample. */
   for (int i = 0; i < num_samples; i++) {
      distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y);
   }
915

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
916
917
918
   /* Compute the centroid priorities by looking at the distances array. */
   for (int i = 0; i < num_samples; i++) {
      uint32_t min_idx = 0;
919

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
920
921
922
923
      for (int j = 1; j < num_samples; j++) {
         if (distances[j] < distances[min_idx])
            min_idx = j;
      }
924

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
925
926
927
      centroid_priorities[i] = min_idx;
      distances[min_idx] = 0xffffffff;
   }
928

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
929
930
931
932
   /* Compute the final centroid priority. */
   for (int i = 0; i < 8; i++) {
      centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4);
   }
933

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
934
   return centroid_priority << 32 | centroid_priority;
935
936
937
938
939
940
941
942
}

/**
 * Emit the sample locations that are specified with VK_EXT_sample_locations.
 */
static void
radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
{
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
   struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location;
   uint32_t num_samples = (uint32_t)sample_location->per_pixel;
   struct radeon_cmdbuf *cs = cmd_buffer->cs;
   uint32_t sample_locs_pixel[4][2] = {0};
   VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
   uint32_t max_sample_dist = 0;
   uint64_t centroid_priority;

   if (!cmd_buffer->state.dynamic.sample_location.count)
      return;

   /* Convert the user sample locations to hardware sample locations. */
   radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
   radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
   radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
   radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);

   /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
   for (uint32_t i = 0; i < 4; i++) {
      radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]);
   }

   /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
   centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples);

   /* Compute the maximum sample distance from the specified locations. */
   for (unsigned i = 0; i < 4; ++i) {
      for (uint32_t j = 0; j < num_samples; j++) {
         VkOffset2D offset = sample_locs[i][j];
         max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y)));
      }
   }

   /* Emit the specified user sample locations. */
   switch (num_samples) {
   case 2:
   case 4:
      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
                             sample_locs_pixel[0][0]);
      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
                             sample_locs_pixel[1][0]);
      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
                             sample_locs_pixel[2][0]);
      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
                             sample_locs_pixel[3][0]);
      break;
   case 8:
      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
                             sample_locs_pixel[0][0]);
      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
                             sample_locs_pixel[1][0]);
      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
                             sample_locs_pixel[2][0]);
      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
                             sample_locs_pixel[3][0]);
      radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1,
                             sample_locs_pixel[0][1]);
      radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1,