radv_image.c 81.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Copyright © 2016 Red Hat.
 * Copyright © 2016 Bas Nieuwenhuizen
 *
 * based in part on anv driver which is:
 * Copyright © 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

28
#include "drm-uapi/drm_fourcc.h"
29
#include "radv_debug.h"
30
31
#include "radv_private.h"
#include "vk_format.h"
32
#include "vk_util.h"
33
34
35
#include "radv_radeon_winsys.h"
#include "sid.h"
#include "util/debug.h"
36
#include "util/u_atomic.h"
37
#include "vulkan/util/vk_format.h"
38
39
40

#include "gfx10_format_table.h"

41
42
43
44
45
46
47

static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
	VK_IMAGE_USAGE_TRANSFER_DST_BIT |
	VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
	VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
	VK_IMAGE_USAGE_STORAGE_BIT;

48
static unsigned
49
radv_choose_tiling(struct radv_device *device,
50
51
		   const VkImageCreateInfo *pCreateInfo,
		   VkFormat format)
52
53
54
55
56
57
{
	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
		assert(pCreateInfo->samples <= 1);
		return RADEON_SURF_MODE_LINEAR_ALIGNED;
	}

58
59
60
61
	/* MSAA resources must be 2D tiled. */
	if (pCreateInfo->samples > 1)
		return RADEON_SURF_MODE_2D;

62
63
	if (!vk_format_is_compressed(format) &&
	    !vk_format_is_depth_or_stencil(format)
64
	    && device->physical_device->rad_info.chip_class <= GFX8) {
65
		/* this causes hangs in some VK CTS tests on GFX9. */
66
67
68
69
70
71
72
		/* Textures with a very small height are recommended to be linear. */
		if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
		    /* Only very thin and long 2D textures should benefit from
		     * linear_aligned. */
		    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
			return RADEON_SURF_MODE_LINEAR_ALIGNED;
	}
73

74
75
	return RADEON_SURF_MODE_2D;
}
76
77

static bool
78
radv_use_tc_compat_htile_for_image(struct radv_device *device,
79
80
				   const VkImageCreateInfo *pCreateInfo,
				   VkFormat format)
81
82
{
	/* TC-compat HTILE is only available for GFX8+. */
83
	if (device->physical_device->rad_info.chip_class < GFX8)
84
85
		return false;

86
	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
87
88
89
90
91
		return false;

	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
		return false;

92
93
94
95
96
97
98
99
	/* Do not enable TC-compatible HTILE if the image isn't readable by a
	 * shader because no texture fetches will happen.
	 */
	if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
				    VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
				    VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
		return false;

100
	if (device->physical_device->rad_info.chip_class < GFX9) {
101
102
103
104
105
106
		/* TC-compat HTILE for MSAA depth/stencil images is broken
		 * on GFX8 because the tiling doesn't match.
		 */
		if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
			return false;

107
108
109
110
111
112
113
114
115
116
		/* GFX9+ supports compression for both 32-bit and 16-bit depth
		 * surfaces, while GFX8 only supports 32-bit natively. Though,
		 * the driver allows TC-compat HTILE for 16-bit depth surfaces
		 * with no Z planes compression.
		 */
		if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
		    format != VK_FORMAT_D32_SFLOAT &&
		    format != VK_FORMAT_D16_UNORM)
			return false;
	}
117
118
119
120

	return true;
}

121
122
123
static bool
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
{
124
125
126
127
128
	if (info->bo_metadata) {
		if (device->physical_device->rad_info.chip_class >= GFX9)
			return info->bo_metadata->u.gfx9.scanout;
		else
			return info->bo_metadata->u.legacy.scanout;
129
	}
130
131

	return info->scanout;
132
133
}

134
static bool
135
136
radv_image_use_fast_clear_for_image(const struct radv_device *device,
                                    const struct radv_image *image)
137
{
138
139
140
	if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
		return true;

141
142
143
144
145
146
147
148
149
150
151
	if (image->info.samples <= 1 &&
	    image->info.width * image->info.height <= 512 * 512) {
		/* Do not enable CMASK or DCC for small surfaces where the cost
		 * of the eliminate pass can be higher than the benefit of fast
		 * clear. RadeonSI does this, but the image threshold is
		 * different.
		 */
		return false;
	}

	return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
152
153
154
155
156
157
	       (image->exclusive ||
		/* Enable DCC for concurrent images if stores are
		 * supported because that means we can keep DCC compressed on
		 * all layouts/queues.
		 */
		radv_image_use_dcc_image_stores(device, image));
158
159
}

160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
bool
radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
                                const void *pNext, VkFormat format,
                                VkImageCreateFlags flags)
{
	bool blendable;

	if (!radv_is_colorbuffer_format_supported(pdev,
	                                          format, &blendable))
		return false;

	if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
		const struct VkImageFormatListCreateInfo *format_list =
			(const struct  VkImageFormatListCreateInfo *)
				vk_find_struct_const(pNext,
						     IMAGE_FORMAT_LIST_CREATE_INFO);

		/* We have to ignore the existence of the list if viewFormatCount = 0 */
		if (format_list && format_list->viewFormatCount) {
			/* compatibility is transitive, so we only need to check
			 * one format with everything else. */
			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
				if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
					continue;

				if (!radv_dcc_formats_compatible(format,
				                                 format_list->pViewFormats[i]))
					return false;
			}
		} else {
			return false;
		}
	}

	return true;
}

197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
static bool
radv_formats_is_atomic_allowed(const void *pNext, VkFormat format,
                               VkImageCreateFlags flags)
{
	if (radv_is_atomic_format_supported(format))
		return true;

	if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
		const struct VkImageFormatListCreateInfo *format_list =
			(const struct  VkImageFormatListCreateInfo *)
				vk_find_struct_const(pNext,
						     IMAGE_FORMAT_LIST_CREATE_INFO);

		/* We have to ignore the existence of the list if viewFormatCount = 0 */
		if (format_list && format_list->viewFormatCount) {
			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
				if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
					return true;
			}
		}
	}

	return false;
}

222
223
static bool
radv_use_dcc_for_image(struct radv_device *device,
224
		       const struct radv_image *image,
225
226
		       const VkImageCreateInfo *pCreateInfo,
		       VkFormat format)
227
228
{
	/* DCC (Delta Color Compression) is only available for GFX8+. */
229
	if (device->physical_device->rad_info.chip_class < GFX8)
230
231
232
233
234
		return false;

	if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
		return false;

235
	if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
236
237
		return false;

238
239
240
241
242
243
244
245
246
247
	/*
	 * TODO: Enable DCC for storage images on GFX9 and earlier.
	 *
	 * Also disable DCC with atomics because even when DCC stores are
	 * supported atomics will always decompress. So if we are
	 * decompressing a lot anyway we might as well not have DCC.
	 */
	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
	    (!radv_image_use_dcc_image_stores(device, image) ||
	     radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
248
249
250
251
252
		return false;

	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
		return false;

253
254
	if (vk_format_is_subsampled(format) ||
	    vk_format_get_plane_count(format) > 1)
255
256
		return false;

257
258
	if (!radv_image_use_fast_clear_for_image(device, image) &&
	    image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
259
260
		return false;

261
262
263
264
	/* Do not enable DCC for mipmapped arrays because performance is worse. */
	if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
		return false;

265
266
267
268
269
270
271
272
273
274
275
	if (device->physical_device->rad_info.chip_class < GFX10) {
		/* TODO: Add support for DCC MSAA on GFX8-9. */
		if (pCreateInfo->samples > 1 &&
		    !device->physical_device->dcc_msaa_allowed)
			return false;

		/* TODO: Add support for DCC layers/mipmaps on GFX9. */
		if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
		     device->physical_device->rad_info.chip_class == GFX9)
			return false;
	}
276

277
278
279
	return radv_are_formats_dcc_compatible(device->physical_device,
	                                       pCreateInfo->pNext, format,
	                                       pCreateInfo->flags);
280
281
}

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
/*
 * Whether to enable image stores with DCC compression for this image. If
 * this function returns false the image subresource should be decompressed
 * before using it with image stores.
 *
 * Note that this can have mixed performance implications, see
 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
 *
 * This function assumes the image uses DCC compression.
 */
bool radv_image_use_dcc_image_stores(const struct radv_device *device,
				     const struct radv_image *image)
{
	/*
	 * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
	 * we need more perf analysis.
	 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
	 */
300
	return device->physical_device->rad_info.chip_class == GFX10 ||
301
302
		(device->physical_device->rad_info.chip_class == GFX10_3 &&
		 (device->instance->perftest_flags & RADV_PERFTEST_DCC_STORES) &&
303
		 !device->physical_device->use_llvm);
304
305
306
307
308
309
310
311
312
313
314
315
316
317
}

/*
 * Whether to use a predicate to determine whether DCC is in a compressed
 * state. This can be used to avoid decompressing an image multiple times.
 *
 * This function assumes the image uses DCC compression.
 */
bool radv_image_use_dcc_predication(const struct radv_device *device,
				    const struct radv_image *image)
{
	return !radv_image_use_dcc_image_stores(device, image);
}

318
static inline bool
319
320
radv_use_fmask_for_image(const struct radv_device *device,
                         const struct radv_image *image)
321
322
{
	return image->info.samples > 1 &&
323
324
	       ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
	        (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
325
326
}

327
static inline bool
328
329
radv_use_htile_for_image(const struct radv_device *device,
                         const struct radv_image *image)
330
{
331
332
333
334
335
336
337
338
339
	/* TODO:
	 * - Investigate about mips+layers.
	 * - Enable on other gens.
	 */
	bool use_htile_for_mips = image->info.array_size == 1 &&
				  device->physical_device->rad_info.chip_class >= GFX10;

	return (image->info.levels == 1 || use_htile_for_mips) &&
		!image->shareable &&
340
341
	       ((image->info.width * image->info.height >= 8 * 8) ||
	        (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
342
343
}

344
345
346
347
348
349
350
351
static bool
radv_use_tc_compat_cmask_for_image(struct radv_device *device,
				   struct radv_image *image)
{
	/* TC-compat CMASK is only available for GFX8+. */
	if (device->physical_device->rad_info.chip_class < GFX8)
		return false;

352
353
354
	if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
		return false;

355
356
357
358
359
	/* TODO: Enable TC-compat CMASK on GFX8-9. */
	if (device->physical_device->rad_info.chip_class < GFX10 &&
	    !(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
		return false;

360
361
362
	if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
		return false;

363
364
365
366
367
368
369
370
	/* Do not enable TC-compatible if the image isn't readable by a shader
	 * because no texture fetches will happen.
	 */
	if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
			      VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
			      VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
		return false;

371
372
	/* If the image doesn't have FMASK, it can't be fetchable. */
	if (!radv_image_has_fmask(image))
373
374
375
376
377
		return false;

	return true;
}

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
{
	return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
}

static bool
radv_is_valid_opaque_metadata(const struct radv_device *device,
                              const struct radeon_bo_metadata *md)
{
	if (md->metadata[0] != 1 ||
	    md->metadata[1] != si_get_bo_metadata_word1(device))
		return false;

	if (md->size_metadata < 40)
		return false;

	return true;
}

397
static void
398
399
400
radv_patch_surface_from_metadata(struct radv_device *device,
                                 struct radeon_surf *surface,
                                 const struct radeon_bo_metadata *md)
401
{
402
403
	surface->flags = RADEON_SURF_CLR(surface->flags, MODE);

404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
	if (device->physical_device->rad_info.chip_class >= GFX9) {
		if (md->u.gfx9.swizzle_mode > 0)
			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
		else
			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);

		surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
	} else {
		surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
		surface->u.legacy.bankw = md->u.legacy.bankw;
		surface->u.legacy.bankh = md->u.legacy.bankh;
		surface->u.legacy.tile_split = md->u.legacy.tile_split;
		surface->u.legacy.mtilea = md->u.legacy.mtilea;
		surface->u.legacy.num_banks = md->u.legacy.num_banks;

		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
		else
			surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);

	}
}

429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
static VkResult
radv_patch_image_dimensions(struct radv_device *device,
                            struct radv_image *image,
                            const struct radv_image_create_info *create_info,
                            struct ac_surf_info *image_info)
{
	unsigned width = image->info.width;
	unsigned height = image->info.height;

	/*
	 * minigbm sometimes allocates bigger images which is going to result in
	 * weird strides and other properties. Lets be lenient where possible and
	 * fail it on GFX10 (as we cannot cope there).
	 *
	 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
	 */
	if (create_info->bo_metadata &&
	    radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
		const struct radeon_bo_metadata *md = create_info->bo_metadata;

		if (device->physical_device->rad_info.chip_class >= GFX10) {
			width = G_00A004_WIDTH_LO(md->metadata[3]) +
			        (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
452
			height = G_00A008_HEIGHT(md->metadata[4]) + 1;
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
		} else {
			width = G_008F18_WIDTH(md->metadata[4]) + 1;
			height = G_008F18_HEIGHT(md->metadata[4]) + 1;
		}
	}

	if (image->info.width == width && image->info.height == height)
		return VK_SUCCESS;

	if (width < image->info.width || height < image->info.height) {
		fprintf(stderr,
		        "The imported image has smaller dimensions than the internal\n"
		        "dimensions. Using it is going to fail badly, so we reject\n"
		        "this import.\n"
		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
		        image->info.width, image->info.height, width, height);
		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
	} else if (device->physical_device->rad_info.chip_class >= GFX10) {
		fprintf(stderr,
		        "Tried to import an image with inconsistent width on GFX10.\n"
		        "As GFX10 has no separate stride fields we cannot cope with\n"
		        "an inconsistency in width and will fail this import.\n"
		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
		        image->info.width, image->info.height, width, height);
		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
	} else {
		fprintf(stderr,
		        "Tried to import an image with inconsistent width on pre-GFX10.\n"
		        "As GFX10 has no separate stride fields we cannot cope with\n"
		        "an inconsistency and would fail on GFX10.\n"
		        "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
		        image->info.width, image->info.height, width, height);
	}
	image_info->width = width;
	image_info->height = height;

	return VK_SUCCESS;
}

static VkResult
493
494
radv_patch_image_from_extra_info(struct radv_device *device,
                                 struct radv_image *image,
495
496
                                 const struct radv_image_create_info *create_info,
                                 struct ac_surf_info *image_info)
497
{
498
499
500
501
	VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
	if (result != VK_SUCCESS)
		return result;

502
503
504
505
506
507
508
509
	for (unsigned plane = 0; plane < image->plane_count; ++plane) {
		if (create_info->bo_metadata) {
			radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
							 create_info->bo_metadata);
		}

		if (radv_surface_has_scanout(device, create_info)) {
			image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
510
511
			if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
				image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
512
513
514
515

			image->info.surf_index = NULL;
		}
	}
516
	return VK_SUCCESS;
517
518
}

519
static uint64_t
520
521
522
523
524
radv_get_surface_flags(struct radv_device *device,
                       const struct radv_image *image,
                       unsigned plane_id,
                       const VkImageCreateInfo *pCreateInfo,
                       VkFormat image_format)
525
{
526
	uint64_t flags;
527
528
	unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
	VkFormat format = vk_format_get_plane_format(image_format, plane_id);
529
	const struct util_format_description *desc = vk_format_description(format);
530
	bool is_depth, is_stencil;
531

532
533
	is_depth = util_format_has_depth(desc);
	is_stencil = util_format_has_stencil(desc);
534

535
	flags = RADEON_SURF_SET(array_mode, MODE);
536
537
538
539

	switch (pCreateInfo->imageType){
	case VK_IMAGE_TYPE_1D:
		if (pCreateInfo->arrayLayers > 1)
540
			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
541
		else
542
			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
543
544
545
		break;
	case VK_IMAGE_TYPE_2D:
		if (pCreateInfo->arrayLayers > 1)
546
			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
547
		else
548
			flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
549
550
		break;
	case VK_IMAGE_TYPE_3D:
551
		flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
552
553
554
555
556
		break;
	default:
		unreachable("unhandled image type");
	}

557
	/* Required for clearing/initializing a specific layer on GFX8. */
558
	flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
559

560
	if (is_depth) {
561
		flags |= RADEON_SURF_ZBUFFER;
562
563
564
565
566
567

		if (radv_use_htile_for_image(device, image) &&
		    !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
			if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
				flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
		} else {
568
			flags |= RADEON_SURF_NO_HTILE;
569
		}
570
	}
571
572

	if (is_stencil)
573
		flags |= RADEON_SURF_SBUFFER;
574

575
576
	if (device->physical_device->rad_info.chip_class >= GFX9 &&
	    pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
577
578
	    vk_format_get_blocksizebits(image_format) == 128 &&
	    vk_format_is_compressed(image_format))
579
		flags |= RADEON_SURF_NO_RENDER_TARGET;
580

581
	if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
582
		flags |= RADEON_SURF_DISABLE_DCC;
583

584
	if (!radv_use_fmask_for_image(device, image))
585
		flags |= RADEON_SURF_NO_FMASK;
586

587
588
589
590
591
592
593
	if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
		flags |= RADEON_SURF_PRT |
		         RADEON_SURF_NO_FMASK |
		         RADEON_SURF_NO_HTILE |
		         RADEON_SURF_DISABLE_DCC;
	}

594
	return flags;
595
}
596

597
static inline unsigned
598
si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
599
600
{
	if (stencil)
601
		return plane->surface.u.legacy.stencil_tiling_index[level];
602
	else
603
		return plane->surface.u.legacy.tiling_index[level];
604
605
606
607
608
}

static unsigned radv_map_swizzle(unsigned swizzle)
{
	switch (swizzle) {
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
609
	case PIPE_SWIZZLE_Y:
610
		return V_008F0C_SQ_SEL_Y;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
611
	case PIPE_SWIZZLE_Z:
612
		return V_008F0C_SQ_SEL_Z;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
613
	case PIPE_SWIZZLE_W:
614
		return V_008F0C_SQ_SEL_W;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
615
	case PIPE_SWIZZLE_0:
616
		return V_008F0C_SQ_SEL_0;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
617
	case PIPE_SWIZZLE_1:
618
		return V_008F0C_SQ_SEL_1;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
619
	default: /* PIPE_SWIZZLE_X */
620
621
622
623
		return V_008F0C_SQ_SEL_X;
	}
}

624
static void
625
radv_compose_swizzle(const struct util_format_description *desc,
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
626
		     const VkComponentMapping *mapping, enum pipe_swizzle swizzle[4])
627
{
628
	if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
629
630
631
632
633
634
635
636
637
		/* 64-bit formats only support storage images and storage images
		 * require identity component mappings. We use 32-bit
		 * instructions to access 64-bit images, so we need a special
		 * case here.
		 *
		 * The zw components are 1,0 so that they can be easily be used
		 * by loads to create the w component, which has to be 0 for
		 * NULL descriptors.
		 */
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
638
639
640
641
		swizzle[0] = PIPE_SWIZZLE_X;
		swizzle[1] = PIPE_SWIZZLE_Y;
		swizzle[2] = PIPE_SWIZZLE_1;
		swizzle[3] = PIPE_SWIZZLE_0;
642
643
644
	} else if (!mapping) {
		for (unsigned i = 0; i < 4; i++)
			swizzle[i] = desc->swizzle[i];
645
	} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
646
		const unsigned char swizzle_xxxx[4] = {
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
647
			PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1
648
		};
649
650
651
652
653
654
		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
	} else {
		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
	}
}

655
656
657
658
659
660
661
662
static void
radv_make_buffer_descriptor(struct radv_device *device,
			    struct radv_buffer *buffer,
			    VkFormat vk_format,
			    unsigned offset,
			    unsigned range,
			    uint32_t *state)
{
663
	const struct util_format_description *desc;
664
	unsigned stride;
665
	uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
666
667
668
	uint64_t va = gpu_address + buffer->offset;
	unsigned num_format, data_format;
	int first_non_void;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
669
	enum pipe_swizzle swizzle[4];
670
671
672
673
	desc = vk_format_description(vk_format);
	first_non_void = vk_format_get_first_non_void_channel(vk_format);
	stride = desc->block.bits / 8;

674
675
	radv_compose_swizzle(desc, NULL, swizzle);

676
677
678
679
	va += offset;
	state[0] = va;
	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
		S_008F04_STRIDE(stride);
680

681
	if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
682
683
684
		range /= stride;
	}

685
	state[2] = range;
686
687
688
689
	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
		   S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
		   S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
		   S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
690
691

	if (device->physical_device->rad_info.chip_class >= GFX10) {
692
		const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
693
694
695
696
697
698
699
700
701

		/* OOB_SELECT chooses the out-of-bounds check:
		 *  - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
		 *  - 1: index >= NUM_RECORDS
		 *  - 2: NUM_RECORDS == 0
		 *  - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
		 *       else: swizzle_address >= NUM_RECORDS
		 */
		state[3] |= S_008F0C_FORMAT(fmt->img_format) |
702
			    S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
703
704
705
706
707
708
709
710
711
712
713
			    S_008F0C_RESOURCE_LEVEL(1);
	} else {
		num_format = radv_translate_buffer_numformat(desc, first_non_void);
		data_format = radv_translate_buffer_dataformat(desc, first_non_void);

		assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
		assert(num_format != ~0);

		state[3] |= S_008F0C_NUM_FORMAT(num_format) |
			    S_008F0C_DATA_FORMAT(data_format);
	}
714
715
716
717
718
}

static void
si_set_mutable_tex_desc_fields(struct radv_device *device,
			       struct radv_image *image,
719
			       const struct legacy_surf_level *base_level_info,
720
			       unsigned plane_id,
721
722
			       unsigned base_level, unsigned first_level,
			       unsigned block_width, bool is_stencil,
723
			       bool is_storage_image, bool disable_compression, bool enable_write_compression,
724
			       uint32_t *state)
725
{
726
	struct radv_image_plane *plane = &image->planes[plane_id];
727
	uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
728
	uint64_t va = gpu_address;
729
730
731
732
	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
	uint64_t meta_va = 0;
	if (chip_class >= GFX9) {
		if (is_stencil)
733
			va += plane->surface.u.gfx9.stencil_offset;
734
		else
735
			va += plane->surface.u.gfx9.surf_offset;
736
737
	} else
		va += base_level_info->offset;
738
739

	state[0] = va >> 8;
740
741
	if (chip_class >= GFX9 ||
	    base_level_info->mode == RADEON_SURF_MODE_2D)
742
		state[0] |= plane->surface.tile_swizzle;
743
	state[1] &= C_008F14_BASE_ADDRESS_HI;
744
745
	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);

746
	if (chip_class >= GFX8) {
747
748
		state[6] &= C_008F28_COMPRESSION_EN;
		state[7] = 0;
749
		if (!disable_compression && radv_dcc_enabled(image, first_level)) {
750
			meta_va = gpu_address + plane->surface.dcc_offset;
751
			if (chip_class <= GFX8)
752
				meta_va += base_level_info->dcc_offset;
753

754
755
756
			unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
			dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
			meta_va |= dcc_tile_swizzle;
757
		} else if (!disable_compression &&
758
			   radv_image_is_tc_compat_htile(image)) {
759
			meta_va = gpu_address +  plane->surface.htile_offset;
760
761
762
		}

		if (meta_va) {
763
			state[6] |= S_008F28_COMPRESSION_EN(1);
764
			if (chip_class <= GFX9)
765
				state[7] = meta_va >> 8;
766
767
768
		}
	}

769
770
771
772
773
774
775
776
777
778
779
780
781
	if (chip_class >= GFX10) {
		state[3] &= C_00A00C_SW_MODE;

		if (is_stencil) {
			state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
		} else {
			state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
		}

		state[6] &= C_00A018_META_DATA_ADDRESS_LO &
			    C_00A018_META_PIPE_ALIGNED;

		if (meta_va) {
782
783
784
785
			struct gfx9_surf_meta_flags meta = {
				.rb_aligned = 1,
				.pipe_aligned = 1,
			};
786

787
			if (plane->surface.dcc_offset)
788
789
				meta = plane->surface.u.gfx9.dcc;

790
791
			if (radv_dcc_enabled(image, first_level) &&
			    is_storage_image && enable_write_compression)
792
793
				state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);

794
795
796
797
798
			state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
				    S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
		}

		state[7] = meta_va >> 16;
799
	} else if (chip_class == GFX9) {
800
		state[3] &= C_008F1C_SW_MODE;
801
		state[4] &= C_008F20_PITCH;
802
803

		if (is_stencil) {
804
			state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
805
			state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
806
		} else {
807
			state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
808
			state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
809
810
811
812
813
814
		}

		state[5] &= C_008F24_META_DATA_ADDRESS &
			    C_008F24_META_PIPE_ALIGNED &
			    C_008F24_META_RB_ALIGNED;
		if (meta_va) {
815
816
817
818
			struct gfx9_surf_meta_flags meta = {
				.rb_aligned = 1,
				.pipe_aligned = 1,
			};
819

820
			if (plane->surface.dcc_offset)
821
				meta = plane->surface.u.gfx9.dcc;
822
823
824
825
826
827

			state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
				    S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
				    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
		}
	} else {
828
		/* GFX6-GFX8 */
829
		unsigned pitch = base_level_info->nblk_x * block_width;
830
		unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
831
832
833

		state[3] &= C_008F1C_TILING_INDEX;
		state[3] |= S_008F1C_TILING_INDEX(index);
834
835
		state[4] &= C_008F20_PITCH;
		state[4] |= S_008F20_PITCH(pitch - 1);
836
837
838
839
	}
}

static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
Dave Airlie's avatar
Dave Airlie committed
840
			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
841
842
843
{
	if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
		return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
Dave Airlie's avatar
Dave Airlie committed
844
845
846
847

	/* GFX9 allocates 1D textures as 2D. */
	if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
		image_type = VK_IMAGE_TYPE_2D;
848
849
850
851
852
853
854
855
856
857
858
859
860
861
	switch (image_type) {
	case VK_IMAGE_TYPE_1D:
		return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
	case VK_IMAGE_TYPE_2D:
		if (nr_samples > 1)
			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
		else
			return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
	case VK_IMAGE_TYPE_3D:
		if (view_type == VK_IMAGE_VIEW_TYPE_3D)
			return V_008F1C_SQ_RSRC_IMG_3D;
		else
			return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
	default:
Grazvydas Ignotas's avatar
Grazvydas Ignotas committed
862
		unreachable("illegal image type");
863
864
	}
}
865

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
866
static unsigned gfx9_border_color_swizzle(const enum pipe_swizzle swizzle[4])
867
868
869
{
	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;

Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
870
	if (swizzle[3] == PIPE_SWIZZLE_X) {
871
872
873
874
875
876
		/* For the pre-defined border color values (white, opaque
		 * black, transparent black), the only thing that matters is
		 * that the alpha channel winds up in the correct place
		 * (because the RGB channels are all the same) so either of
		 * these enumerations will work.
		 */
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
877
		if (swizzle[2] == PIPE_SWIZZLE_Y)
878
879
880
			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
		else
			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
881
882
	} else if (swizzle[0] == PIPE_SWIZZLE_X) {
		if (swizzle[1] == PIPE_SWIZZLE_Y)
883
884
885
			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
		else
			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
886
	} else if (swizzle[1] == PIPE_SWIZZLE_X) {
887
		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
888
	} else if (swizzle[2] == PIPE_SWIZZLE_X) {
889
890
891
892
893
894
		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
	}

	return bc_swizzle;
}

895
bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
896
{
897
	const struct util_format_description *desc = vk_format_description(format);
898
899

	if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
900
		return desc->swizzle[3] == PIPE_SWIZZLE_X;
901
902
903

	return radv_translate_colorswap(format, false) <= 1;
}
904
/**
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
 * Build the sampler view descriptor for a texture (GFX10).
 */
static void
gfx10_make_texture_descriptor(struct radv_device *device,
			   struct radv_image *image,
			   bool is_storage_image,
			   VkImageViewType view_type,
			   VkFormat vk_format,
			   const VkComponentMapping *mapping,
			   unsigned first_level, unsigned last_level,
			   unsigned first_layer, unsigned last_layer,
			   unsigned width, unsigned height, unsigned depth,
			   uint32_t *state,
			   uint32_t *fmask_state)
{
920
	const struct util_format_description *desc;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
921
	enum pipe_swizzle swizzle[4];
922
923
924
925
	unsigned img_format;
	unsigned type;

	desc = vk_format_description(vk_format);
926
	img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
927

928
	radv_compose_swizzle(desc, mapping, swizzle);
929
930

	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
931
			    is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
	        height = 1;
		depth = image->info.array_size;
	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
			depth = image->info.array_size;
	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
		depth = image->info.array_size / 6;

	state[0] = 0;
	state[1] = S_00A004_FORMAT(img_format) |
		   S_00A004_WIDTH_LO(width - 1);
	state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
		   S_00A008_HEIGHT(height - 1) |
		   S_00A008_RESOURCE_LEVEL(1);
	state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
		   S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
		   S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
		   S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
		   S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
					0 : first_level) |
		   S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
					util_logbase2(image->info.samples) :
					last_level) |
		   S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
		   S_00A00C_TYPE(type);
959
960
961
962
963
	/* Depth is the the last accessible layer on gfx9+. The hw doesn't need
	 * to know the total number of layers.
	 */
	state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
		   S_00A010_BASE_ARRAY(first_layer);
Samuel Pitoiset's avatar
Samuel Pitoiset committed
964
	state[5] = S_00A014_ARRAY_PITCH(0) |
965
966
967
		   S_00A014_MAX_MIP(image->info.samples > 1 ?
				    util_logbase2(image->info.samples) :
				    image->info.levels - 1) |
968
		   S_00A014_PERF_MOD(4);
969
970
971
972
973
	state[6] = 0;
	state[7] = 0;

	if (radv_dcc_enabled(image, first_level)) {
		state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
974
			    S_00A018_MAX_COMPRESSED_BLOCK_SIZE(image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size) |
975
			    S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
976
977
978
	}

	/* Initialize the sampler view for FMASK. */
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
	if (fmask_state) {
		if (radv_image_has_fmask(image)) {
			uint64_t gpu_address = radv_buffer_get_va(image->bo);
			uint32_t format;
			uint64_t va;

			assert(image->plane_count == 1);

			va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;

			switch (image->info.samples) {
			case 2:
				format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
				break;
			case 4:
				format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
				break;
			case 8:
				format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
				break;
			default:
				unreachable("invalid nr_samples");
			}
1002

1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
			fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
			fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
					S_00A004_FORMAT(format) |
					S_00A004_WIDTH_LO(width - 1);
			fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
					S_00A008_HEIGHT(height - 1) |
					S_00A008_RESOURCE_LEVEL(1);
			fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
					S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
					S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
					S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
					S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
					S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
			fmask_state[4] = S_00A010_DEPTH(last_layer) |
					S_00A010_BASE_ARRAY(first_layer);
			fmask_state[5] = 0;
			fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
			fmask_state[7] = 0;
1021
1022
1023
1024
1025
1026
1027
1028

			if (radv_image_is_tc_compat_cmask(image)) {
				va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;

				fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
				fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
				fmask_state[7] |= va >> 16;
			}
1029
1030
1031
		} else
			memset(fmask_state, 0, 8 * 4);
	}
1032
1033
1034
1035
}

/**
 * Build the sampler view descriptor for a texture (SI-GFX9)
1036
1037
1038
1039
 */
static void
si_make_texture_descriptor(struct radv_device *device,
			   struct radv_image *image,
1040
			   bool is_storage_image,
1041
1042
1043
1044
1045
1046
1047
1048
1049
			   VkImageViewType view_type,
			   VkFormat vk_format,
			   const VkComponentMapping *mapping,
			   unsigned first_level, unsigned last_level,
			   unsigned first_layer, unsigned last_layer,
			   unsigned width, unsigned height, unsigned depth,
			   uint32_t *state,
			   uint32_t *fmask_state)
{
1050
	const struct util_format_description *desc;
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
1051
	enum pipe_swizzle swizzle[4];
1052
1053
1054
1055
1056
	int first_non_void;
	unsigned num_format, data_format, type;

	desc = vk_format_description(vk_format);

1057
	radv_compose_swizzle(desc, mapping, swizzle);
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070

	first_non_void = vk_format_get_first_non_void_channel(vk_format);

	num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
	if (num_format == ~0) {
		num_format = 0;
	}

	data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
	if (data_format == ~0) {
		data_format = 0;
	}

1071
	/* S8 with either Z16 or Z32 HTILE need a special format. */
1072
	if (device->physical_device->rad_info.chip_class == GFX9 &&
1073
	    vk_format == VK_FORMAT_S8_UINT &&
1074
	    radv_image_is_tc_compat_htile(image)) {
1075
1076
1077
1078
1079
		if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
			data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
		else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
			data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
	}
1080
	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1081
			    is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1082
1083
	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
	        height = 1;
1084
		depth = image->info.array_size;
1085
1086
1087
	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1088
			depth = image->info.array_size;
1089
	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1090
		depth = image->info.array_size / 6;
1091
1092

	state[0] = 0;
1093
1094
	state[1] = (S_008F14_DATA_FORMAT(data_format) |
		    S_008F14_NUM_FORMAT(num_format));
1095
	state[2] = (S_008F18_WIDTH(width - 1) |
1096
1097
		    S_008F18_HEIGHT(height - 1) |
		    S_008F18_PERF_MOD(4));
1098
1099
1100
1101
	state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
		    S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
		    S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
		    S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1102
		    S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
1103
					0 : first_level) |
1104
1105
		    S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
					util_logbase2(image->info.samples) :
1106
1107
					last_level) |
		    S_008F1C_TYPE(type));
1108
1109
	state[4] = 0;
	state[5] = S_008F24_BASE_ARRAY(first_layer);
1110
1111
1112
	state[6] = 0;
	state[7] = 0;

1113
	if (device->physical_device->rad_info.chip_class == GFX9) {
1114
		unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
1115

Grazvydas Ignotas's avatar
Grazvydas Ignotas committed
1116
		/* Depth is the last accessible layer on Gfx9.
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
		 * The hw doesn't need to know the total number of layers.
		 */
		if (type == V_008F1C_SQ_RSRC_IMG_3D)
			state[4] |= S_008F20_DEPTH(depth - 1);
		else
			state[4] |= S_008F20_DEPTH(last_layer);

		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
		state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
					     util_logbase2(image->info.samples) :
1127
					     image->info.levels - 1);
1128
	} else {
1129
1130
1131
1132
		state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
		state[4] |= S_008F20_DEPTH(depth - 1);
		state[5] |= S_008F24_LAST_ARRAY(last_layer);
	}
1133
	if (image->planes[0].surface.dcc_offset) {
1134
		state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1135
1136
1137
1138
	} else {
		/* The last dword is unused by hw. The shader uses it to clear
		 * bits in the first dword of sampler state.
		 */
1139
		if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1140
1141
1142
1143
1144
1145
1146
1147
			if (first_level == last_level)
				state[7] = C_008F30_MAX_ANISO_RATIO;
			else
				state[7] = 0xffffffff;
		}
	}

	/* Initialize the sampler view for FMASK. */
1148
1149
	if (fmask_state) {
		if (radv_image_has_fmask(image)) {
Tony Wasserka's avatar
Tony Wasserka committed
1150
			uint32_t fmask_format;
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
			uint64_t gpu_address = radv_buffer_get_va(image->bo);
			uint64_t va;

			assert(image->plane_count == 1);

			va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;

			if (device->physical_device->rad_info.chip_class == GFX9) {
				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
				switch (image->info.samples) {
				case 2:
					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
					break;
				case 4:
					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
					break;
				case 8:
					num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
					break;
				default:
					unreachable("invalid nr_samples");
				}
			} else {
				switch (image->info.samples) {
				case 2:
					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
					break;
				case 4:
					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
					break;
				case 8:
					fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
					break;
				default:
					assert(0);
					fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
				}
				num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1189
			}
1190

1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
			fmask_state[0] = va >> 8;
			fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
			fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
				S_008F14_DATA_FORMAT(fmask_format) |
				S_008F14_NUM_FORMAT(num_format);
			fmask_state[2] = S_008F18_WIDTH(width - 1) |
				S_008F18_HEIGHT(height - 1);
			fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
				S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
				S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
				S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
				S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
			fmask_state[4] = 0;
			fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
			fmask_state[6] = 0;
			fmask_state[7] = 0;

			if (device->physical_device->rad_info.chip_class == GFX9) {
				fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
				fmask_state[4] |= S_008F20_DEPTH(last_layer) |
						S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
				fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
						S_008F24_META_RB_ALIGNED(1);

				if (radv_image_is_tc_compat_cmask(image)) {
					va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;

					fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
					fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
					fmask_state[7] |= va >> 8;
				}
			} else {
				fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
				fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
					S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
				fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);

				if (radv_image_is_tc_compat_cmask(image)) {
					va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;

					fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
					fmask_state[7] |= va >> 8;
				}
1234
			}
1235
1236
1237
		} else
			memset(fmask_state, 0, 8 * 4);
	}
1238
1239
}

1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251