v3dv_meta_copy.c 205 KB
Newer Older
Iago Toral's avatar
Iago Toral committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/*
 * Copyright © 2019 Raspberry Pi
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "v3dv_private.h"

26
#include "compiler/nir/nir_builder.h"
27
28
#include "broadcom/cle/v3dx_pack.h"
#include "vk_format_info.h"
29
#include "util/u_pack_color.h"
30

31
32
33
34
35
36
37
38
39
40
41
42
static uint32_t
meta_blit_key_hash(const void *key)
{
   return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
}

static bool
meta_blit_key_compare(const void *key1, const void *key2)
{
   return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
}

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
static bool
create_blit_pipeline_layout(struct v3dv_device *device,
                            VkDescriptorSetLayout *descriptor_set_layout,
                            VkPipelineLayout *pipeline_layout)
{
   VkResult result;

   if (*descriptor_set_layout == 0) {
      VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
         .binding = 0,
         .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
         .descriptorCount = 1,
         .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
      };
      VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .bindingCount = 1,
         .pBindings = &descriptor_set_layout_binding,
      };
      result =
         v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
                                        &descriptor_set_layout_info,
65
                                        &device->vk.alloc,
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
                                        descriptor_set_layout);
      if (result != VK_SUCCESS)
         return false;
   }

   assert(*pipeline_layout == 0);
   VkPipelineLayoutCreateInfo pipeline_layout_info = {
      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
      .setLayoutCount = 1,
      .pSetLayouts = descriptor_set_layout,
      .pushConstantRangeCount = 1,
      .pPushConstantRanges =
         &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
   };

   result =
      v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
                                &pipeline_layout_info,
84
                                &device->vk.alloc,
85
86
87
88
                                pipeline_layout);
   return result == VK_SUCCESS;
}

89
90
91
92
93
94
95
96
97
void
v3dv_meta_blit_init(struct v3dv_device *device)
{
   for (uint32_t i = 0; i < 3; i++) {
      device->meta.blit.cache[i] =
         _mesa_hash_table_create(NULL,
                                 meta_blit_key_hash,
                                 meta_blit_key_compare);
   }
98
99

   create_blit_pipeline_layout(device,
100
101
                               &device->meta.blit.ds_layout,
                               &device->meta.blit.p_layout);
102
103
104
105
106
107
108
109
110
111
}

void
v3dv_meta_blit_finish(struct v3dv_device *device)
{
   VkDevice _device = v3dv_device_to_handle(device);

   for (uint32_t i = 0; i < 3; i++) {
      hash_table_foreach(device->meta.blit.cache[i], entry) {
         struct v3dv_meta_blit_pipeline *item = entry->data;
112
113
114
115
         v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
         v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
         v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
         vk_free(&device->vk.alloc, item);
116
117
118
119
      }
      _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
   }

120
121
   if (device->meta.blit.p_layout) {
      v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
122
                                 &device->vk.alloc);
123
124
   }

125
126
   if (device->meta.blit.ds_layout) {
      v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
127
                                      &device->vk.alloc);
128
129
130
   }
}

131
132
133
134
135
136
137
138
139
140
141
142
static uint32_t
meta_texel_buffer_copy_key_hash(const void *key)
{
   return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
}

static bool
meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
{
   return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
}

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
                                         VkDescriptorSetLayout *ds_layout,
                                         VkPipelineLayout *p_layout)
{
   VkResult result;

   if (*ds_layout == 0) {
      VkDescriptorSetLayoutBinding ds_layout_binding = {
         .binding = 0,
         .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
         .descriptorCount = 1,
         .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
      };
      VkDescriptorSetLayoutCreateInfo ds_layout_info = {
         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
         .bindingCount = 1,
         .pBindings = &ds_layout_binding,
      };
      result =
         v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
                                        &ds_layout_info,
165
                                        &device->vk.alloc,
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
                                        ds_layout);
      if (result != VK_SUCCESS)
         return false;
   }

   assert(*p_layout == 0);
   VkPipelineLayoutCreateInfo p_layout_info = {
      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
      .setLayoutCount = 1,
      .pSetLayouts = ds_layout,
      .pushConstantRangeCount = 1,
      .pPushConstantRanges =
         &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 20 },
   };

   result =
      v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
                                &p_layout_info,
184
                                &device->vk.alloc,
185
186
187
188
                                p_layout);
   return result == VK_SUCCESS;
}

189
190
191
192
193
194
195
196
197
void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
{
   for (uint32_t i = 0; i < 3; i++) {
      device->meta.texel_buffer_copy.cache[i] =
         _mesa_hash_table_create(NULL,
                                 meta_texel_buffer_copy_key_hash,
                                 meta_texel_buffer_copy_key_compare);
   }
198
199
200

   create_texel_buffer_copy_pipeline_layout(
      device,
201
202
      &device->meta.texel_buffer_copy.ds_layout,
      &device->meta.texel_buffer_copy.p_layout);
203
204
205
206
207
208
209
210
211
212
}

void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
{
   VkDevice _device = v3dv_device_to_handle(device);

   for (uint32_t i = 0; i < 3; i++) {
      hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
         struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
213
214
215
216
         v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
         v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
         v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
         vk_free(&device->vk.alloc, item);
217
218
219
220
      }
      _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
   }

221
222
   if (device->meta.texel_buffer_copy.p_layout) {
      v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
223
                                 &device->vk.alloc);
224
225
   }

226
227
   if (device->meta.texel_buffer_copy.ds_layout) {
      v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
228
                                      &device->vk.alloc);
229
230
231
   }
}

232
233
234
235
236
static inline bool
can_use_tlb(struct v3dv_image *image,
            const VkOffset3D *offset,
            VkFormat *compat_format);

Iago Toral's avatar
Iago Toral committed
237
238
239
240
241
242
243
244
/**
 * Copy operations implemented in this file don't operate on a framebuffer
 * object provided by the user, however, since most use the TLB for this,
 * we still need to have some representation of the framebuffer. For the most
 * part, the job's frame tiling information is enough for this, however we
 * still need additional information such us the internal type of our single
 * render target, so we use this auxiliary struct to pass that information
 * around.
245
 */
Iago Toral's avatar
Iago Toral committed
246
247
struct framebuffer_data {
   /* The internal type of the single render target */
248
   uint32_t internal_type;
Iago Toral's avatar
Iago Toral committed
249
250

   /* Supertile coverage */
251
252
253
254
   uint32_t min_x_supertile;
   uint32_t min_y_supertile;
   uint32_t max_x_supertile;
   uint32_t max_y_supertile;
255
256
257
258

   /* Format info */
   VkFormat vk_format;
   const struct v3dv_format *format;
259
   uint8_t internal_depth_type;
260
261
};

262
static void
Iago Toral's avatar
Iago Toral committed
263
setup_framebuffer_data(struct framebuffer_data *fb,
264
                       VkFormat vk_format,
Iago Toral's avatar
Iago Toral committed
265
266
                       uint32_t internal_type,
                       const struct v3dv_frame_tiling *tiling)
267
268
269
{
   fb->internal_type = internal_type;

Iago Toral's avatar
Iago Toral committed
270
   /* Supertile coverage always starts at 0,0  */
Iago Toral's avatar
Iago Toral committed
271
   uint32_t supertile_w_in_pixels =
Iago Toral's avatar
Iago Toral committed
272
      tiling->tile_width * tiling->supertile_width;
Iago Toral's avatar
Iago Toral committed
273
   uint32_t supertile_h_in_pixels =
Iago Toral's avatar
Iago Toral committed
274
275
      tiling->tile_height * tiling->supertile_height;

276
277
   fb->min_x_supertile = 0;
   fb->min_y_supertile = 0;
Iago Toral's avatar
Iago Toral committed
278
279
280
   fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
   fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;

281
282
   fb->vk_format = vk_format;
   fb->format = v3dv_get_format(vk_format);
283
284
285
286

   fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
   if (vk_format_is_depth_or_stencil(vk_format))
      fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format);
287
288
}

289
/* This chooses a tile buffer format that is appropriate for the copy operation.
Iago Toral's avatar
Iago Toral committed
290
 * Typically, this is the image render target type, however, if we are copying
291
292
293
 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
 * we need to load and store to/from a tile color buffer using a compatible
 * color format.
294
295
 */
static uint32_t
296
choose_tlb_format(struct framebuffer_data *framebuffer,
297
                  VkImageAspectFlags aspect,
Iago Toral's avatar
Iago Toral committed
298
                  bool for_store,
299
300
                  bool is_copy_to_buffer,
                  bool is_copy_from_buffer)
301
{
302
   if (is_copy_to_buffer || is_copy_from_buffer) {
303
      switch (framebuffer->vk_format) {
Iago Toral's avatar
Iago Toral committed
304
305
306
307
308
      case VK_FORMAT_D16_UNORM:
         return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
      case VK_FORMAT_D32_SFLOAT:
         return V3D_OUTPUT_IMAGE_FORMAT_R32F;
      case VK_FORMAT_X8_D24_UNORM_PACK32:
309
         return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
Iago Toral's avatar
Iago Toral committed
310
311
312
313
314
315
316
      case VK_FORMAT_D24_UNORM_S8_UINT:
         /* When storing the stencil aspect of a combined depth/stencil image
          * to a buffer, the Vulkan spec states that the output buffer must
          * have packed stencil values, so we choose an R8UI format for our
          * store outputs. For the load input we still want RGBA8UI since the
          * source image contains 4 channels (including the 3 channels
          * containing the 24-bit depth value).
317
318
319
320
321
322
323
324
325
326
327
          *
          * When loading the stencil aspect of a combined depth/stencil image
          * from a buffer, we read packed 8-bit stencil values from the buffer
          * that we need to put into the LSB of the 32-bit format (the R
          * channel), so we use R8UI. For the store, if we used R8UI then we
          * would write 8-bit stencil values consecutively over depth channels,
          * so we need to use RGBA8UI. This will write each stencil value in
          * its correct position, but will overwrite depth values (channels G
          * B,A) with undefined values. To fix this,  we will have to restore
          * the depth aspect from the Z tile buffer, which we should pre-load
          * from the image before the store).
Iago Toral's avatar
Iago Toral committed
328
329
330
331
332
          */
         if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
            return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
         } else {
            assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
333
334
335
336
337
338
339
340
            if (is_copy_to_buffer) {
               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
                                  V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
            } else {
               assert(is_copy_from_buffer);
               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
                                  V3D_OUTPUT_IMAGE_FORMAT_R8UI;
            }
Iago Toral's avatar
Iago Toral committed
341
342
         }
      default: /* Color formats */
343
         return framebuffer->format->rt_type;
Iago Toral's avatar
Iago Toral committed
344
         break;
345
      }
Iago Toral's avatar
Iago Toral committed
346
   } else {
347
      return framebuffer->format->rt_type;
Iago Toral's avatar
Iago Toral committed
348
349
350
   }
}

351
352
353
354
355
356
357
static inline bool
format_needs_rb_swap(VkFormat format)
{
   const uint8_t *swizzle = v3dv_get_format_swizzle(format);
   return swizzle[0] == PIPE_SWIZZLE_Z;
}

Iago Toral's avatar
Iago Toral committed
358
static void
359
get_internal_type_bpp_for_image_aspects(VkFormat vk_format,
Iago Toral's avatar
Iago Toral committed
360
361
362
363
364
365
366
367
368
369
370
371
372
                                        VkImageAspectFlags aspect_mask,
                                        uint32_t *internal_type,
                                        uint32_t *internal_bpp)
{
   const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
                                         VK_IMAGE_ASPECT_STENCIL_BIT;

   /* We can't store depth/stencil pixel formats to a raster format, so
    * so instead we load our depth/stencil aspects to a compatible color
    * format.
    */
   /* FIXME: pre-compute this at image creation time? */
   if (aspect_mask & ds_aspects) {
373
      switch (vk_format) {
Iago Toral's avatar
Iago Toral committed
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
      case VK_FORMAT_D16_UNORM:
         *internal_type = V3D_INTERNAL_TYPE_16UI;
         *internal_bpp = V3D_INTERNAL_BPP_64;
         break;
      case VK_FORMAT_D32_SFLOAT:
         *internal_type = V3D_INTERNAL_TYPE_32F;
         *internal_bpp = V3D_INTERNAL_BPP_128;
         break;
      case VK_FORMAT_X8_D24_UNORM_PACK32:
      case VK_FORMAT_D24_UNORM_S8_UINT:
         /* Use RGBA8 format so we can relocate the X/S bits in the appropriate
          * place to match Vulkan expectations. See the comment on the tile
          * load command for more details.
          */
         *internal_type = V3D_INTERNAL_TYPE_8UI;
         *internal_bpp = V3D_INTERNAL_BPP_32;
         break;
      default:
         assert(!"unsupported format");
         break;
      }
   } else {
396
397
      const struct v3dv_format *format = v3dv_get_format(vk_format);
      v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
Iago Toral's avatar
Iago Toral committed
398
399
                                                   internal_type,
                                                   internal_bpp);
400
401
402
   }
}

403
404
405
406
407
408
409
410
struct rcl_clear_info {
   const union v3dv_clear_value *clear_value;
   struct v3dv_image *image;
   VkImageAspectFlags aspects;
   uint32_t layer;
   uint32_t level;
};

411
static struct v3dv_cl *
412
emit_rcl_prologue(struct v3dv_job *job,
413
                  struct framebuffer_data *fb,
414
                  const struct rcl_clear_info *clear_info)
415
{
Iago Toral's avatar
Iago Toral committed
416
417
   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;

418
419
   struct v3dv_cl *rcl = &job->rcl;
   v3dv_cl_ensure_space_with_branch(rcl, 200 +
Iago Toral's avatar
Iago Toral committed
420
                                    tiling->layers * 256 *
421
                                    cl_packet_length(SUPERTILE_COORDINATES));
422
423
   if (job->cmd_buffer->state.oom)
      return NULL;
424
425
426

   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
      config.early_z_disable = true;
Iago Toral's avatar
Iago Toral committed
427
428
      config.image_width_pixels = tiling->width;
      config.image_height_pixels = tiling->height;
429
      config.number_of_render_targets = 1;
430
      config.multisample_mode_4x = tiling->msaa;
Iago Toral's avatar
Iago Toral committed
431
      config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
432
      config.internal_depth_type = fb->internal_depth_type;
433
434
   }

435
   if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
436
      uint32_t clear_pad = 0;
437
438
      if (clear_info->image) {
         const struct v3dv_image *image = clear_info->image;
439
440
         const struct v3d_resource_slice *slice =
            &image->slices[clear_info->level];
Juan A. Suárez's avatar
Juan A. Suárez committed
441
442
         if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
             slice->tiling == V3D_TILING_UIF_XOR) {
443
444
445
            int uif_block_height = v3d_utile_height(image->cpp) * 2;

            uint32_t implicit_padded_height =
Iago Toral's avatar
Iago Toral committed
446
               align(tiling->height, uif_block_height) / uif_block_height;
447
448
449
450
451
452
453
454

            if (slice->padded_height_of_output_image_in_uif_blocks -
                implicit_padded_height >= 15) {
               clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
            }
         }
      }

455
      const uint32_t *color = &clear_info->clear_value->color[0];
456
      cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
457
458
         clear.clear_color_low_32_bits = color[0];
         clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
459
460
         clear.render_target_number = 0;
      };
461

Iago Toral's avatar
Iago Toral committed
462
      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
463
464
         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
            clear.clear_color_mid_low_32_bits =
465
              ((color[1] >> 24) | (color[2] << 8));
466
            clear.clear_color_mid_high_24_bits =
467
              ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
468
469
470
471
            clear.render_target_number = 0;
         };
      }

Iago Toral's avatar
Iago Toral committed
472
      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
473
474
         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
            clear.uif_padded_height_in_uif_blocks = clear_pad;
475
            clear.clear_color_high_16_bits = color[3] >> 16;
476
477
478
            clear.render_target_number = 0;
         };
      }
479
480
481
   }

   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
Iago Toral's avatar
Iago Toral committed
482
      rt.render_target_0_internal_bpp = tiling->internal_bpp;
483
      rt.render_target_0_internal_type = fb->internal_type;
484
485
486
487
      rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
   }

   cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
488
489
      clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
      clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
490
491
492
493
494
495
496
497
498
499
500
501
   };

   cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
      init.use_auto_chained_tile_lists = true;
      init.size_of_first_block_in_chained_tile_lists =
         TILE_ALLOCATION_BLOCK_SIZE_64B;
   }

   return rcl;
}

static void
502
503
emit_frame_setup(struct v3dv_job *job,
                 uint32_t layer,
504
                 const union v3dv_clear_value *clear_value)
505
{
506
507
   v3dv_return_if_oom(NULL, job);

Iago Toral's avatar
Iago Toral committed
508
   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
509

510
511
512
   struct v3dv_cl *rcl = &job->rcl;

   const uint32_t tile_alloc_offset =
Iago Toral's avatar
Iago Toral committed
513
      64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
514
515
516
517
518
519
   cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
   }

   cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
      config.number_of_bin_tile_lists = 1;
Iago Toral's avatar
Iago Toral committed
520
521
      config.total_frame_width_in_tiles = tiling->draw_tiles_x;
      config.total_frame_height_in_tiles = tiling->draw_tiles_y;
522

Iago Toral's avatar
Iago Toral committed
523
524
      config.supertile_width_in_tiles = tiling->supertile_width;
      config.supertile_height_in_tiles = tiling->supertile_height;
525
526

      config.total_frame_width_in_supertiles =
Iago Toral's avatar
Iago Toral committed
527
         tiling->frame_width_in_supertiles;
528
      config.total_frame_height_in_supertiles =
Iago Toral's avatar
Iago Toral committed
529
         tiling->frame_height_in_supertiles;
530
531
532
533
534
535
536
537
538
539
540
   }

   /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
    * it here.
    */
   for (int i = 0; i < 2; i++) {
      cl_emit(rcl, TILE_COORDINATES, coords);
      cl_emit(rcl, END_OF_LOADS, end);
      cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
         store.buffer_to_store = NONE;
      }
541
      if (clear_value && i == 0) {
542
543
544
545
546
547
548
549
550
551
552
553
554
         cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
            clear.clear_z_stencil_buffer = true;
            clear.clear_all_render_targets = true;
         }
      }
      cl_emit(rcl, END_OF_TILE_MARKER, end);
   }

   cl_emit(rcl, FLUSH_VCD_CACHE, flush);
}

static void
emit_supertile_coordinates(struct v3dv_job *job,
Iago Toral's avatar
Iago Toral committed
555
                           struct framebuffer_data *framebuffer)
556
{
557
558
   v3dv_return_if_oom(NULL, job);

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
   struct v3dv_cl *rcl = &job->rcl;

   const uint32_t min_y = framebuffer->min_y_supertile;
   const uint32_t max_y = framebuffer->max_y_supertile;
   const uint32_t min_x = framebuffer->min_x_supertile;
   const uint32_t max_x = framebuffer->max_x_supertile;

   for (int y = min_y; y <= max_y; y++) {
      for (int x = min_x; x <= max_x; x++) {
         cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
            coords.column_number_in_supertiles = x;
            coords.row_number_in_supertiles = y;
         }
      }
   }
}

static void
emit_linear_load(struct v3dv_cl *cl,
                 uint32_t buffer,
                 struct v3dv_bo *bo,
                 uint32_t offset,
                 uint32_t stride,
                 uint32_t format)
{
   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
      load.buffer_to_load = buffer;
      load.address = v3dv_cl_address(bo, offset);
      load.input_image_format = format;
Juan A. Suárez's avatar
Juan A. Suárez committed
588
      load.memory_format = V3D_TILING_RASTER;
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
      load.height_in_ub_or_stride = stride;
      load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
   }
}

static void
emit_linear_store(struct v3dv_cl *cl,
                  uint32_t buffer,
                  struct v3dv_bo *bo,
                  uint32_t offset,
                  uint32_t stride,
                  bool msaa,
                  uint32_t format)
{
   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
      store.buffer_to_store = RENDER_TARGET_0;
      store.address = v3dv_cl_address(bo, offset);
      store.clear_buffer_being_stored = false;
      store.output_image_format = format;
Juan A. Suárez's avatar
Juan A. Suárez committed
608
      store.memory_format = V3D_TILING_RASTER;
609
610
611
612
613
614
      store.height_in_ub_or_stride = stride;
      store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
                                   V3D_DECIMATE_MODE_SAMPLE_0;
   }
}

615
static void
616
emit_image_load(struct v3dv_cl *cl,
617
                struct framebuffer_data *framebuffer,
618
619
620
                struct v3dv_image *image,
                VkImageAspectFlags aspect,
                uint32_t layer,
Iago Toral's avatar
Iago Toral committed
621
                uint32_t mip_level,
622
623
                bool is_copy_to_buffer,
                bool is_copy_from_buffer)
624
625
626
{
   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);

627
628
629
   /* For image to/from buffer copies we always load to and store from RT0,
    * even for depth/stencil aspects, because the hardware can't do raster
    * stores or loads from/to the depth/stencil tile buffers.
Iago Toral's avatar
Iago Toral committed
630
    */
631
   bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
Iago Toral's avatar
Iago Toral committed
632
633
                            aspect == VK_IMAGE_ASPECT_COLOR_BIT;

634
   const struct v3d_resource_slice *slice = &image->slices[mip_level];
635
   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
Iago Toral's avatar
Iago Toral committed
636
637
638
      load.buffer_to_load = load_to_color_tlb ?
         RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);

639
640
      load.address = v3dv_cl_address(image->mem->bo, layer_offset);

641
      load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
642
643
                                                  is_copy_to_buffer,
                                                  is_copy_from_buffer);
644
      load.memory_format = slice->tiling;
645

Iago Toral's avatar
Iago Toral committed
646
647
648
649
650
      /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
       * expects the depth value in the LSB bits of each 32-bit pixel.
       * Unfortunately, the hardware seems to put the S8/X8 bits there and the
       * depth bits on the MSB. To work around that we can reverse the channel
       * order and then swap the R/B channels to get what we want.
651
652
653
654
655
656
657
       *
       * NOTE: reversing and swapping only gets us the behavior we want if the
       * operations happen in that exact order, which seems to be the case when
       * done on the tile buffer load operations. On the store, it seems the
       * order is not the same. The order on the store is probably reversed so
       * that reversing and swapping on both the load and the store preserves
       * the original order of the channels in memory.
Iago Toral's avatar
Iago Toral committed
658
659
660
661
       *
       * Notice that we only need to do this when copying to a buffer, where
       * depth and stencil aspects are copied as separate regions and
       * the spec expects them to be tightly packed.
662
       */
663
664
665
      bool needs_rb_swap = false;
      bool needs_chan_reverse = false;
      if (is_copy_to_buffer &&
666
667
         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
668
669
670
671
672
673
674
675
           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
         needs_rb_swap = true;
         needs_chan_reverse = true;
      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
         /* This is not a raw data copy (i.e. we are clearing the image),
          * so we need to make sure we respect the format swizzle.
          */
676
         needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
677
678
      }

679
680
681
      load.r_b_swap = needs_rb_swap;
      load.channel_reverse = needs_chan_reverse;

Juan A. Suárez's avatar
Juan A. Suárez committed
682
683
      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
          slice->tiling == V3D_TILING_UIF_XOR) {
684
685
         load.height_in_ub_or_stride =
            slice->padded_height_of_output_image_in_uif_blocks;
Juan A. Suárez's avatar
Juan A. Suárez committed
686
      } else if (slice->tiling == V3D_TILING_RASTER) {
687
688
689
690
691
692
693
694
695
696
         load.height_in_ub_or_stride = slice->stride;
      }

      if (image->samples > VK_SAMPLE_COUNT_1_BIT)
         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
      else
         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
   }
}

Iago Toral's avatar
Iago Toral committed
697
698
static void
emit_image_store(struct v3dv_cl *cl,
699
                 struct framebuffer_data *framebuffer,
Iago Toral's avatar
Iago Toral committed
700
701
702
703
                 struct v3dv_image *image,
                 VkImageAspectFlags aspect,
                 uint32_t layer,
                 uint32_t mip_level,
704
                 bool is_copy_to_buffer,
705
706
                 bool is_copy_from_buffer,
                 bool is_multisample_resolve)
Iago Toral's avatar
Iago Toral committed
707
708
709
{
   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);

710
   bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
Iago Toral's avatar
Iago Toral committed
711
712
713
714
715
716
717
718
719
720
                               aspect == VK_IMAGE_ASPECT_COLOR_BIT;

   const struct v3d_resource_slice *slice = &image->slices[mip_level];
   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
      store.buffer_to_store = store_from_color_tlb ?
         RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);

      store.address = v3dv_cl_address(image->mem->bo, layer_offset);
      store.clear_buffer_being_stored = false;

721
      /* See rationale in emit_image_load() */
722
723
724
      bool needs_rb_swap = false;
      bool needs_chan_reverse = false;
      if (is_copy_from_buffer &&
725
726
         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
727
728
729
730
731
           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
         needs_rb_swap = true;
         needs_chan_reverse = true;
      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
732
         needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
733
734
      }

735
736
737
      store.r_b_swap = needs_rb_swap;
      store.channel_reverse = needs_chan_reverse;

738
      store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
739
740
                                                    is_copy_to_buffer,
                                                    is_copy_from_buffer);
Iago Toral's avatar
Iago Toral committed
741
      store.memory_format = slice->tiling;
Juan A. Suárez's avatar
Juan A. Suárez committed
742
743
      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
          slice->tiling == V3D_TILING_UIF_XOR) {
Iago Toral's avatar
Iago Toral committed
744
745
         store.height_in_ub_or_stride =
            slice->padded_height_of_output_image_in_uif_blocks;
Juan A. Suárez's avatar
Juan A. Suárez committed
746
      } else if (slice->tiling == V3D_TILING_RASTER) {
Iago Toral's avatar
Iago Toral committed
747
748
749
750
751
         store.height_in_ub_or_stride = slice->stride;
      }

      if (image->samples > VK_SAMPLE_COUNT_1_BIT)
         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
752
753
      else if (is_multisample_resolve)
         store.decimate_mode = V3D_DECIMATE_MODE_4X;
Iago Toral's avatar
Iago Toral committed
754
755
756
757
758
      else
         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
   }
}

759
static void
Iago Toral's avatar
Iago Toral committed
760
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
761
                                        struct framebuffer_data *framebuffer,
762
763
                                        struct v3dv_buffer *buffer,
                                        struct v3dv_image *image,
764
                                        uint32_t layer_offset,
765
766
                                        const VkBufferImageCopy *region)
{
Iago Toral's avatar
Iago Toral committed
767
   struct v3dv_cl *cl = &job->indirect;
768
   v3dv_cl_ensure_space(cl, 200, 1);
769
770
   v3dv_return_if_oom(NULL, job);

771
772
773
774
775
   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);

   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);

   /* Load image to TLB */
776
777
778
779
780
781
782
783
784
785
786
787
   assert((image->type != VK_IMAGE_TYPE_3D &&
           layer_offset < region->imageSubresource.layerCount) ||
          layer_offset < image->extent.depth);

   const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
      region->imageSubresource.baseArrayLayer + layer_offset :
      region->imageOffset.z + layer_offset;

   emit_image_load(cl, framebuffer, image,
                   region->imageSubresource.aspectMask,
                   image_layer,
                   region->imageSubresource.mipLevel,
788
789
790
                   true, false);

   cl_emit(cl, END_OF_LOADS, end);
791
792
793
794
795

   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);

   /* Store TLB to buffer */
   uint32_t width, height;
Iago Toral's avatar
Iago Toral committed
796
   if (region->bufferRowLength == 0)
797
      width = region->imageExtent.width;
Iago Toral's avatar
Iago Toral committed
798
   else
799
      width = region->bufferRowLength;
Iago Toral's avatar
Iago Toral committed
800
801

   if (region->bufferImageHeight == 0)
802
      height = region->imageExtent.height;
Iago Toral's avatar
Iago Toral committed
803
   else
804
805
      height = region->bufferImageHeight;

806
   /* Handle copy from compressed format */
807
808
   width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
   height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
809

810
811
812
813
   /* If we are storing stencil from a combined depth/stencil format the
    * Vulkan spec states that the output buffer must have packed stencil
    * values, where each stencil value is 1 byte.
    */
814
815
816
   uint32_t cpp =
      region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
         1 : image->cpp;
817
   uint32_t buffer_stride = width * cpp;
818
819
   uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
                            height * buffer_stride * layer_offset;
820

821
822
   uint32_t format = choose_tlb_format(framebuffer,
                                       region->imageSubresource.aspectMask,
823
                                       true, true, false);
824
825
826
827
   bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;

   emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
                     buffer_offset, buffer_stride, msaa, format);
828
829
830
831
832

   cl_emit(cl, END_OF_TILE_MARKER, end);

   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);

Iago Toral's avatar
Iago Toral committed
833
   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
834
835
836
837
838
839
      branch.start = tile_list_start;
      branch.end = v3dv_cl_get_address(cl);
   }
}

static void
Iago Toral's avatar
Iago Toral committed
840
emit_copy_layer_to_buffer(struct v3dv_job *job,
841
842
                          struct v3dv_buffer *buffer,
                          struct v3dv_image *image,
Iago Toral's avatar
Iago Toral committed
843
                          struct framebuffer_data *framebuffer,
844
845
846
                          uint32_t layer,
                          const VkBufferImageCopy *region)
{
847
   emit_frame_setup(job, layer, NULL);
848
849
   emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
                                           image, layer, region);
850
   emit_supertile_coordinates(job, framebuffer);
851
852
853
}

static void
Iago Toral's avatar
Iago Toral committed
854
emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
855
856
                              struct v3dv_buffer *buffer,
                              struct v3dv_image *image,
Iago Toral's avatar
Iago Toral committed
857
                              struct framebuffer_data *framebuffer,
858
859
                              const VkBufferImageCopy *region)
{
860
   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
861
862
   v3dv_return_if_oom(NULL, job);

Iago Toral's avatar
Iago Toral committed
863
   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
864
      emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
865
866
867
868
869
870
871
872
   cl_emit(rcl, END_OF_RENDERING, end);
}

/* Implements a copy using the TLB.
 *
 * This only works if we are copying from offset (0,0), since a TLB store for
 * tile (x,y) will be written at the same tile offset into the destination.
 * When this requirement is not met, we need to use a blit instead.
873
874
875
876
 *
 * Returns true if the implementation supports the requested operation (even if
 * it failed to process it, for example, due to an out-of-memory error).
 *
877
 */
878
static bool
879
880
881
882
883
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
                         struct v3dv_buffer *buffer,
                         struct v3dv_image *image,
                         const VkBufferImageCopy *region)
{
884
885
886
887
   VkFormat fb_format;
   if (!can_use_tlb(image, &region->imageOffset, &fb_format))
      return false;

Iago Toral's avatar
Iago Toral committed
888
   uint32_t internal_type, internal_bpp;
889
   get_internal_type_bpp_for_image_aspects(fb_format,
Iago Toral's avatar
Iago Toral committed
890
891
                                           region->imageSubresource.aspectMask,
                                           &internal_type, &internal_bpp);
892

893
894
895
896
897
   uint32_t num_layers;
   if (image->type != VK_IMAGE_TYPE_3D)
      num_layers = region->imageSubresource.layerCount;
   else
      num_layers = region->imageExtent.depth;
Iago Toral's avatar
Iago Toral committed
898
   assert(num_layers > 0);
899

900
901
   struct v3dv_job *job =
      v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
902
   if (!job)
903
      return true;
Iago Toral's avatar
Iago Toral committed
904

905
906
907
   /* Handle copy from compressed format using a compatible format */
   const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
   const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
908
909
   const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
   const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
910

911
   v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
Iago Toral's avatar
Iago Toral committed
912
913

   struct framebuffer_data framebuffer;
914
915
   setup_framebuffer_data(&framebuffer, fb_format, internal_type,
                          &job->frame_tiling);
Iago Toral's avatar
Iago Toral committed
916

Iago Toral's avatar
Iago Toral committed
917
   v3dv_job_emit_binning_flush(job);
918
   emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region);
Iago Toral's avatar
Iago Toral committed
919
920

   v3dv_cmd_buffer_finish_job(cmd_buffer);
921
922
923
924
925
926
927
928
929
930
931
932
933

   return true;
}

static bool
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
            struct v3dv_image *dst,
            VkFormat dst_format,
            struct v3dv_image *src,
            VkFormat src_format,
            VkColorComponentFlags cmask,
            VkComponentMapping *cswizzle,
            const VkImageBlit *region,
934
935
            VkFilter filter,
            bool dst_is_padded_image);
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968

/**
 * Returns true if the implementation supports the requested operation (even if
 * it failed to process it, for example, due to an out-of-memory error).
 */
static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
                          struct v3dv_buffer *buffer,
                          struct v3dv_image *image,
                          const VkBufferImageCopy *region)
{
   bool handled = false;

   /* Generally, the bpp of the data in the buffer matches that of the
    * source image. The exception is the case where we are copying
    * stencil (8bpp) to a combined d24s8 image (32bpp).
    */
   uint32_t buffer_bpp = image->cpp;

   VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;

   /* Because we are going to implement the copy as a blit, we need to create
    * a linear image from the destination buffer and we also want our blit
    * source and destination formats to be the same (to avoid any format
    * conversions), so we choose a canonical format that matches the
    * source image bpp.
    *
    * The exception to the above is copying from combined depth/stencil images
    * because we are copying only one aspect of the image, so we need to setup
    * our formats, color write mask and source swizzle mask to match that.
    */
   VkFormat dst_format;
   VkFormat src_format;
969
   VkColorComponentFlags cmask = 0; /* All components */
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
   VkComponentMapping cswizzle = {
      .r = VK_COMPONENT_SWIZZLE_IDENTITY,
      .g = VK_COMPONENT_SWIZZLE_IDENTITY,
      .b = VK_COMPONENT_SWIZZLE_IDENTITY,
      .a = VK_COMPONENT_SWIZZLE_IDENTITY,
   };
   switch (buffer_bpp) {
   case 16:
      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
      dst_format = VK_FORMAT_R32G32B32A32_UINT;
      src_format = dst_format;
      break;
   case 8:
      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
      dst_format = VK_FORMAT_R16G16B16A16_UINT;
      src_format = dst_format;
      break;
   case 4:
      switch (copy_aspect) {
      case VK_IMAGE_ASPECT_COLOR_BIT:
         src_format = VK_FORMAT_R8G8B8A8_UINT;
         dst_format = VK_FORMAT_R8G8B8A8_UINT;
         break;
      case VK_IMAGE_ASPECT_DEPTH_BIT:
         assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
                image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
                image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
         if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
            src_format = VK_FORMAT_R32_UINT;
            dst_format = VK_FORMAT_R32_UINT;
         } else {
            /* We want to write depth in the buffer in the first 24-bits,
             * however, the hardware has depth in bits 8-31, so swizzle the
             * the source components to match what we want. Also, we don't
             * want to write bits 24-31 in the destination.
             */
            src_format = VK_FORMAT_R8G8B8A8_UINT;
            dst_format = VK_FORMAT_R8G8B8A8_UINT;
1008
1009
1010
            cmask = VK_COLOR_COMPONENT_R_BIT |
                    VK_COLOR_COMPONENT_G_BIT |
                    VK_COLOR_COMPONENT_B_BIT;
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
            cswizzle.r = VK_COMPONENT_SWIZZLE_G;
            cswizzle.g = VK_COMPONENT_SWIZZLE_B;
            cswizzle.b = VK_COMPONENT_SWIZZLE_A;
            cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
         }
         break;
      case VK_IMAGE_ASPECT_STENCIL_BIT:
         assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
         assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
         /* Copying from S8D24. We want to write 8-bit stencil values only,
          * so adjust the buffer bpp for that. Since the hardware stores stencil
          * in the LSB, we can just do a RGBA8UI to R8UI blit.
          */
         src_format = VK_FORMAT_R8G8B8A8_UINT;
         dst_format = VK_FORMAT_R8_UINT;
         buffer_bpp = 1;
         break;
      default:
         unreachable("unsupported aspect");
         return handled;
      };
      break;
   case 2:
      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
             copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
      dst_format = VK_FORMAT_R16_UINT;
      src_format = dst_format;
      break;
   case 1:
      assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
      dst_format = VK_FORMAT_R8_UINT;
      src_format = dst_format;
      break;
   default:
      unreachable("unsupported bit-size");
      return handled;
   };

   /* The hardware doesn't support linear depth/stencil stores, so we
    * implement copies of depth/stencil aspect as color copies using a
    * compatible color format.
    */
   assert(vk_format_is_color(src_format));
   assert(vk_format_is_color(dst_format));
   copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;

   /* We should be able to handle the blit if we got this far */
   handled = true;

   /* Obtain the 2D buffer region spec */
   uint32_t buf_width, buf_height;
   if (region->bufferRowLength == 0)
      buf_width = region->imageExtent.width;
   else
      buf_width = region->bufferRowLength;

   if (region->bufferImageHeight == 0)
      buf_height = region->imageExtent.height;
   else
      buf_height = region->bufferImageHeight;

1072
1073
1074
1075
1076
1077
   /* If the image is compressed, the bpp refers to blocks, not pixels */
   uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
   uint32_t block_height = vk_format_get_blockheight(image->vk_format);
   buf_width = buf_width / block_width;
   buf_height = buf_height / block_height;

1078
1079
1080
1081
1082
1083
1084
1085
   /* Compute layers to copy */
   uint32_t num_layers;
   if (image->type != VK_IMAGE_TYPE_3D)
      num_layers = region->imageSubresource.layerCount;
   else
      num_layers = region->imageExtent.depth;
   assert(num_layers > 0);

1086
1087
1088
1089
1090
1091
1092
1093
1094
   /* Our blit interface can see the real format of the images to detect
    * copies between compressed and uncompressed images and adapt the
    * blit region accordingly. Here we are just doing a raw copy of
    * compressed data, but we are passing an uncompressed view of the
    * buffer for the blit destination image (since compressed formats are
    * not renderable), so we also want to provide an uncompressed view of
    * the source image.
    */
   VkResult result;
1095
1096
   struct v3dv_device *device = cmd_buffer->device;
   VkDevice _device = v3dv_device_to_handle(device);
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
   if (vk_format_is_compressed(image->vk_format)) {
      VkImage uiview;
      VkImageCreateInfo uiview_info = {
         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
         .imageType = VK_IMAGE_TYPE_3D,
         .format = dst_format,
         .extent = { buf_width, buf_height, image->extent.depth },
         .mipLevels = image->levels,
         .arrayLayers = image->array_size,
         .samples = image->samples,
         .tiling = image->tiling,
         .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
      };
1113
      result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
      if (result != VK_SUCCESS)
         return handled;

      v3dv_cmd_buffer_add_private_obj(
         cmd_buffer, (uintptr_t)uiview,
         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);

      result = v3dv_BindImageMemory(_device, uiview,
                                    v3dv_device_memory_to_handle(image->mem),
                                    image->mem_offset);
      if (result != VK_SUCCESS)
         return handled;

      image = v3dv_image_from_handle(uiview);
   }

   /* Copy requested layers */
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
   for (uint32_t i = 0; i < num_layers; i++) {
      /* Create the destination blit image from the destination buffer */
      VkImageCreateInfo image_info = {
         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
         .imageType = VK_IMAGE_TYPE_2D,
         .format = dst_format,
         .extent = { buf_width, buf_height, 1 },
         .mipLevels = 1,
         .arrayLayers = 1,
         .samples = VK_SAMPLE_COUNT_1_BIT,
         .tiling = VK_IMAGE_TILING_LINEAR,
         .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .queueFamilyIndexCount = 0,
         .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
      };

      VkImage buffer_image;
1149
      result =
1150
         v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
1151
1152
1153
1154
      if (result != VK_SUCCESS)
         return handled;

      v3dv_cmd_buffer_add_private_obj(
1155
         cmd_buffer, (uintptr_t)buffer_image,
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
         (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);

      /* Bind the buffer memory to the image */
      VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
         i * buf_width * buf_height * buffer_bpp;
      result = v3dv_BindImageMemory(_device, buffer_image,
                                    v3dv_device_memory_to_handle(buffer->mem),
                                    buffer_offset);
      if (result != VK_SUCCESS)
         return handled;

      /* Blit-copy the requested image extent.
       *
       * Since we are copying, the blit must use the same format on the
       * destination and source images to avoid format conversions. The
       * only exception is copying stencil, which we upload to a R8UI source
       * image, but that we need to blit to a S8D24 destination (the only
       * stencil format we support).
       */
      const VkImageBlit blit_region = {
         .srcSubresource = {
            .aspectMask = copy_aspect,
            .mipLevel = region->imageSubresource.mipLevel,
1179
1180
            .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
            .layerCount = 1,
1181
1182
1183
         },
         .srcOffsets = {
            {
1184
1185
               DIV_ROUND_UP(region->imageOffset.x, block_width),
               DIV_ROUND_UP(region->imageOffset.y, block_height),
1186
1187
1188
               region->imageOffset.z + i,
            },
            {
1189
1190
1191
1192
               DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
                            block_width),
               DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
                            block_height),
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
               region->imageOffset.z + i + 1,
            },
         },
         .dstSubresource = {
            .aspectMask = copy_aspect,
            .mipLevel = 0,
            .baseArrayLayer = 0,
            .layerCount = 1,
         },
         .dstOffsets = {
            { 0, 0, 0 },
1204
1205
1206
1207
1208
            {
               DIV_ROUND_UP(region->imageExtent.width, block_width),
               DIV_ROUND_UP(region->imageExtent.height, block_height),
               1
            },
1209
1210
1211
1212
1213
1214
1215
         },
      };

      handled = blit_shader(cmd_buffer,
                            v3dv_image_from_handle(buffer_image), dst_format,
                            image, src_format,
                            cmask, &cswizzle,
1216
                            &blit_region, VK_FILTER_NEAREST, false);
1217
1218
1219
1220
1221
1222
1223
1224
1225
      if (!handled) {
         /* This is unexpected, we should have a supported blit spec */
         unreachable("Unable to blit buffer to destination image");
         return false;
      }
   }

   assert(handled);
   return true;
1226
1227
}

1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
static VkFormat
get_compatible_tlb_format(VkFormat format)
{
   switch (format) {
   case VK_FORMAT_R8G8B8A8_SNORM:
      return VK_FORMAT_R8G8B8A8_UINT;

   case VK_FORMAT_R8G8_SNORM:
      return VK_FORMAT_R8G8_UINT;

   case VK_FORMAT_R8_SNORM:
      return VK_FORMAT_R8_UINT;

   case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
      return VK_FORMAT_A8B8G8R8_UINT_PACK32;

   case VK_FORMAT_R16_UNORM:
   case VK_FORMAT_R16_SNORM:
      return VK_FORMAT_R16_UINT;

   case VK_FORMAT_R16G16_UNORM:
   case VK_FORMAT_R16G16_SNORM:
      return VK_FORMAT_R16G16_UINT;

   case VK_FORMAT_R16G16B16A16_UNORM:
   case VK_FORMAT_R16G16B16A16_SNORM:
      return VK_FORMAT_R16G16B16A16_UINT;

   case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
      return VK_FORMAT_R32_SFLOAT;

1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
   /* We can't render to compressed formats using the TLB so instead we use
    * a compatible format with the same bpp as the compressed format. Because
    * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
    * case of ETC), when we implement copies with the compatible format we
    * will have to divide offsets and dimensions on the compressed image by
    * the compressed block size.
    */
   case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
   case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
   case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
   case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
1270
1271
1272
1273
   case VK_FORMAT_BC2_UNORM_BLOCK:
   case VK_FORMAT_BC2_SRGB_BLOCK:
   case VK_FORMAT_BC3_SRGB_BLOCK:
   case VK_FORMAT_BC3_UNORM_BLOCK:
1274
1275
1276
1277
1278
1279
1280
1281
      return VK_FORMAT_R32G32B32A32_UINT;

   case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
   case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
   case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
   case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
   case VK_FORMAT_EAC_R11_UNORM_BLOCK:
   case VK_FORMAT_EAC_R11_SNORM_BLOCK:
1282
1283
1284
1285
   case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
   case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
   case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
   case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
1286
1287
      return VK_FORMAT_R16G16B16A16_UINT;

1288
1289
1290
1291
1292
   default:
      return VK_FORMAT_UNDEFINED;
   }
}

1293
static inline bool
1294
1295
1296
can_use_tlb(struct v3dv_image *image,
            const VkOffset3D *offset,
            VkFormat *compat_format)
1297
{
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
   if (offset->x != 0 || offset->y != 0)
      return false;

   if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
      if (compat_format)
         *compat_format = image->vk_format;
      return true;
   }

   /* If the image format is not TLB-supported, then check if we can use
    * a compatible format instead.
    */
   if (compat_format) {
      *compat_format = get_compatible_tlb_format(image->vk_format);
      if (*compat_format != VK_FORMAT_UNDEFINED)
         return true;
   }

   return false;
1317
1318
}

Iago Toral's avatar
Iago Toral committed
1319
1320
1321
1322
1323
1324
1325
1326
void
v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
                          VkImage srcImage,
                          VkImageLayout srcImageLayout,
                          VkBuffer destBuffer,
                          uint32_t regionCount,
                          const VkBufferImageCopy *pRegions)
{
1327
1328
1329
1330
   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
   V3DV_FROM_HANDLE(v3dv_image, image, srcImage);
   V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer);

1331
1332
   assert(image->samples == VK_SAMPLE_COUNT_1_BIT);

1333
   for (uint32_t i = 0; i < regionCount; i++) {
1334
1335
1336
1337
1338
      if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i]))
         continue;
      if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i]))
         continue;
      unreachable("Unsupported image to buffer copy.");
1339
   }
Iago Toral's avatar
Iago Toral committed
1340
1341
}

Iago Toral's avatar
Iago Toral committed
1342
1343
static void
emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
1344
                                    struct framebuffer_data *framebuffer,
Iago Toral's avatar
Iago Toral committed
1345
1346
                                    struct v3dv_image *dst,
                                    struct v3dv_image *src,
1347
                                    uint32_t layer_offset,
Iago Toral's avatar
Iago Toral committed
1348
1349
1350
1351
                                    const VkImageCopy *region)
{
   struct v3dv_cl *cl = &job->indirect;
   v3dv_cl_ensure_space(cl, 200, 1);
1352
1353
   v3dv_return_if_oom(NULL, job);

Iago Toral's avatar
Iago Toral committed
1354
1355
1356
1357
   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);

   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);

1358
1359
1360
1361
1362
1363
1364
   assert((src->type != VK_IMAGE_TYPE_3D &&
           layer_offset < region-><