radv_device.c 140 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright © 2016 Red Hat.
 * Copyright © 2016 Bas Nieuwenhuizen
 *
 * based in part on anv driver which is:
 * Copyright © 2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
32
#include "radv_debug.h"
33
#include "radv_private.h"
34
#include "radv_shader.h"
35
#include "radv_cs.h"
36
#include "util/disk_cache.h"
37
#include "util/strtod.h"
38
#include "vk_util.h"
39
#include <xf86drm.h>
40
41
42
43
44
45
#include <amdgpu.h>
#include <amdgpu_drm.h>
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
#include "ac_llvm_util.h"
#include "vk_format.h"
#include "sid.h"
Dave Airlie's avatar
Dave Airlie committed
46
#include "gfx9d.h"
47
48
#include "util/debug.h"

49
static int
50
radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
51
{
52
	uint32_t mesa_timestamp, llvm_timestamp;
53
	uint16_t f = family;
54
	memset(uuid, 0, VK_UUID_SIZE);
55
56
	if (!disk_cache_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) ||
	    !disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp))
57
58
59
60
		return -1;

	memcpy(uuid, &mesa_timestamp, 4);
	memcpy((char*)uuid + 4, &llvm_timestamp, 4);
61
62
	memcpy((char*)uuid + 8, &f, 2);
	snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv");
63
	return 0;
64
65
}

66
67
68
69
70
71
static void
radv_get_driver_uuid(void *uuid)
{
	ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
}

72
static void
73
74
75
radv_get_device_uuid(struct radeon_info *info, void *uuid)
{
	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
76
77
}

78
79
static void
radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
80
{
81
82
83
	const char *chip_string;
	char llvm_string[32] = {};

84
	switch (family) {
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
	case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
	case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
	case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
	case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
	case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
	case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
	case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
	case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
	case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
	case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
	case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
	case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
	case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
	case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
	case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
	case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
	case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
	case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
	case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
	default: chip_string = "AMD RADV unknown"; break;
106
	}
107
108
109
110
111
112
113
114

	if (HAVE_LLVM > 0) {
		snprintf(llvm_string, sizeof(llvm_string),
			 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
			 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
	}

	snprintf(name, name_len, "%s%s", chip_string, llvm_string);
115
116
}

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
static void
radv_physical_device_init_mem_types(struct radv_physical_device *device)
{
	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
	                                  device->rad_info.vram_vis_size);

	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
	device->memory_properties.memoryHeapCount = 0;
	if (device->rad_info.vram_size - visible_vram_size > 0) {
		vram_index = device->memory_properties.memoryHeapCount++;
		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
			.size = device->rad_info.vram_size - visible_vram_size,
			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
		};
	}
	if (visible_vram_size) {
		visible_vram_index = device->memory_properties.memoryHeapCount++;
		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
			.size = visible_vram_size,
			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
		};
	}
	if (device->rad_info.gart_size > 0) {
		gart_index = device->memory_properties.memoryHeapCount++;
		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
			.size = device->rad_info.gart_size,
			.flags = 0,
		};
	}

	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
	unsigned type_count = 0;
	if (vram_index >= 0) {
		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
			.heapIndex = vram_index,
		};
	}
	if (gart_index >= 0) {
		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
			.heapIndex = gart_index,
		};
	}
	if (visible_vram_index >= 0) {
		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
			.heapIndex = visible_vram_index,
		};
	}
	if (gart_index >= 0) {
		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
			.heapIndex = gart_index,
		};
	}
	device->memory_properties.memoryTypeCount = type_count;
}

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
static void
radv_handle_env_var_force_family(struct radv_physical_device *device)
{
	const char *family = getenv("RADV_FORCE_FAMILY");
	unsigned i;

	if (!family)
		return;

	for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
		if (!strcmp(family, ac_get_llvm_processor_name(i))) {
			/* Override family and chip_class. */
			device->rad_info.family = i;

			if (i >= CHIP_VEGA10)
				device->rad_info.chip_class = GFX9;
			else if (i >= CHIP_TONGA)
				device->rad_info.chip_class = VI;
			else if (i >= CHIP_BONAIRE)
				device->rad_info.chip_class = CIK;
			else
				device->rad_info.chip_class = SI;

			return;
		}
	}

	fprintf(stderr, "radv: Unknown family: %s\n", family);
	exit(1);
}

217
218
219
static VkResult
radv_physical_device_init(struct radv_physical_device *device,
			  struct radv_instance *instance,
220
			  drmDevicePtr drm_device)
221
{
222
	const char *path = drm_device->nodes[DRM_NODE_RENDER];
223
	VkResult result;
224
	drmVersionPtr version;
225
226
227
228
	int fd;

	fd = open(path, O_RDWR | O_CLOEXEC);
	if (fd < 0)
229
		return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
230

231
232
233
234
235
236
237
238
239
240
	version = drmGetVersion(fd);
	if (!version) {
		close(fd);
		return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
				 "failed to get version %s: %m", path);
	}

	if (strcmp(version->name, "amdgpu")) {
		drmFreeVersion(version);
		close(fd);
241
		return VK_ERROR_INCOMPATIBLE_DRIVER;
242
243
244
	}
	drmFreeVersion(version);

245
246
247
248
249
	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
	device->instance = instance;
	assert(strlen(path) < ARRAY_SIZE(device->path));
	strncpy(device->path, path, ARRAY_SIZE(device->path));

250
251
	device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
					       instance->perftest_flags);
252
253
254
255
	if (!device->ws) {
		result = VK_ERROR_INCOMPATIBLE_DRIVER;
		goto fail;
	}
256
257

	device->local_fd = fd;
258
259
	device->ws->query_info(device->ws, &device->rad_info);

260
261
	radv_handle_env_var_force_family(device);

262
	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
263

264
	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
265
		device->ws->destroy(device->ws);
266
267
		result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
				   "cannot generate UUID");
268
269
		goto fail;
	}
270

271
272
273
274
275
276
277
278
	/* These flags affect shader compilation. */
	uint64_t shader_env_flags =
		(device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
		(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);

	/* The gpu id is already embeded in the uuid so we just pass "radv"
	 * when creating the cache.
	 */
279
280
	char buf[VK_UUID_SIZE * 2 + 1];
	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
281
	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
282

283
	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
284

285
	radv_get_driver_uuid(&device->device_uuid);
286
	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
287

Dave Airlie's avatar
Dave Airlie committed
288
289
290
291
292
293
	if (device->rad_info.family == CHIP_STONEY ||
	    device->rad_info.chip_class >= GFX9) {
		device->has_rbplus = true;
		device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
	}

294
295
296
297
298
	/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
	 * on SI.
	 */
	device->has_clear_state = device->rad_info.chip_class >= CIK;

299
300
	device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;

301
302
303
304
	/* Vega10/Raven need a special workaround for a hardware bug. */
	device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
				  device->rad_info.family == CHIP_RAVEN;

305
	radv_physical_device_init_mem_types(device);
306
	radv_fill_device_extension_table(device, &device->supported_extensions);
307
308
309
310
311
312
313

	result = radv_init_wsi(device);
	if (result != VK_SUCCESS) {
		device->ws->destroy(device->ws);
		goto fail;
	}

314
315
316
317
318
319
320
321
322
323
324
325
	return VK_SUCCESS;

fail:
	close(fd);
	return result;
}

static void
radv_physical_device_finish(struct radv_physical_device *device)
{
	radv_finish_wsi(device);
	device->ws->destroy(device->ws);
326
	disk_cache_destroy(device->disk_cache);
327
	close(device->local_fd);
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
}

static void *
default_alloc_func(void *pUserData, size_t size, size_t align,
                   VkSystemAllocationScope allocationScope)
{
	return malloc(size);
}

static void *
default_realloc_func(void *pUserData, void *pOriginal, size_t size,
                     size_t align, VkSystemAllocationScope allocationScope)
{
	return realloc(pOriginal, size);
}

static void
default_free_func(void *pUserData, void *pMemory)
{
	free(pMemory);
}

static const VkAllocationCallbacks default_alloc = {
	.pUserData = NULL,
	.pfnAllocation = default_alloc_func,
	.pfnReallocation = default_realloc_func,
	.pfnFree = default_free_func,
};

357
static const struct debug_control radv_debug_options[] = {
358
	{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
359
360
361
362
363
364
365
	{"nodcc", RADV_DEBUG_NO_DCC},
	{"shaders", RADV_DEBUG_DUMP_SHADERS},
	{"nocache", RADV_DEBUG_NO_CACHE},
	{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
	{"nohiz", RADV_DEBUG_NO_HIZ},
	{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
	{"unsafemath", RADV_DEBUG_UNSAFE_MATH},
366
367
	{"allbos", RADV_DEBUG_ALL_BOS},
	{"noibs", RADV_DEBUG_NO_IBS},
368
	{"spirv", RADV_DEBUG_DUMP_SPIRV},
369
	{"vmfaults", RADV_DEBUG_VM_FAULTS},
370
	{"zerovram", RADV_DEBUG_ZERO_VRAM},
371
	{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
372
	{"nosisched", RADV_DEBUG_NO_SISCHED},
373
	{"preoptir", RADV_DEBUG_PREOPTIR},
374
375
376
	{NULL, 0}
};

377
378
379
380
381
382
383
const char *
radv_get_debug_option_name(int id)
{
	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
	return radv_debug_options[id].string;
}

384
static const struct debug_control radv_perftest_options[] = {
385
	{"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN},
386
	{"sisched", RADV_PERFTEST_SISCHED},
387
	{"localbos", RADV_PERFTEST_LOCAL_BOS},
388
	{"binning", RADV_PERFTEST_BINNING},
389
390
391
	{NULL, 0}
};

392
393
394
395
396
397
398
const char *
radv_get_perftest_option_name(int id)
{
	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
	return radv_perftest_options[id].string;
}

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
static void
radv_handle_per_app_options(struct radv_instance *instance,
			    const VkApplicationInfo *info)
{
	const char *name = info ? info->pApplicationName : NULL;

	if (!name)
		return;

	if (!strcmp(name, "Talos - Linux - 32bit") ||
	    !strcmp(name, "Talos - Linux - 64bit")) {
		/* Force enable LLVM sisched for Talos because it looks safe
		 * and it gives few more FPS.
		 */
		instance->perftest_flags |= RADV_PERFTEST_SISCHED;
	}
}

417
418
419
420
421
422
423
424
425
426
static int radv_get_instance_extension_index(const char *name)
{
	for (unsigned i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; ++i) {
		if (strcmp(name, radv_instance_extensions[i].extensionName) == 0)
			return i;
	}
	return -1;
}


427
428
429
430
431
432
VkResult radv_CreateInstance(
	const VkInstanceCreateInfo*                 pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkInstance*                                 pInstance)
{
	struct radv_instance *instance;
433
	VkResult result;
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);

	uint32_t client_version;
	if (pCreateInfo->pApplicationInfo &&
	    pCreateInfo->pApplicationInfo->apiVersion != 0) {
		client_version = pCreateInfo->pApplicationInfo->apiVersion;
	} else {
		client_version = VK_MAKE_VERSION(1, 0, 0);
	}

	if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
	    client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
		return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
				 "Client requested version %d.%d.%d",
				 VK_VERSION_MAJOR(client_version),
				 VK_VERSION_MINOR(client_version),
				 VK_VERSION_PATCH(client_version));
	}

454
455
	instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
			      VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
456
457
458
459
460
461
462
463
464
465
466
467
468
	if (!instance)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;

	if (pAllocator)
		instance->alloc = *pAllocator;
	else
		instance->alloc = default_alloc;

	instance->apiVersion = client_version;
	instance->physicalDeviceCount = -1;

469
470
471
472
473
474
475
476
477
478
479
480
	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
		int index = radv_get_instance_extension_index(ext_name);

		if (index < 0 || !radv_supported_instance_extensions.extensions[index]) {
			vk_free2(&default_alloc, pAllocator, instance);
			return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
		}

		instance->enabled_extensions.extensions[index] = true;
	}

481
482
483
484
485
486
	result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
	if (result != VK_SUCCESS) {
		vk_free2(&default_alloc, pAllocator, instance);
		return vk_error(result);
	}

487
488
489
490
	_mesa_locale_init();

	VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));

491
492
493
	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
						   radv_debug_options);

494
495
496
	instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
						   radv_perftest_options);

497
498
	radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);

499
500
501
502
503
504
505
506
	if (instance->debug_flags & RADV_DEBUG_NO_SISCHED) {
		/* Disable sisched when the user requests it, this is mostly
		 * useful when the driver force-enable sisched for the given
		 * application.
		 */
		instance->perftest_flags &= ~RADV_PERFTEST_SISCHED;
	}

507
508
509
510
511
512
513
514
515
516
517
	*pInstance = radv_instance_to_handle(instance);

	return VK_SUCCESS;
}

void radv_DestroyInstance(
	VkInstance                                  _instance,
	const VkAllocationCallbacks*                pAllocator)
{
	RADV_FROM_HANDLE(radv_instance, instance, _instance);

518
519
520
	if (!instance)
		return;

521
522
	for (int i = 0; i < instance->physicalDeviceCount; ++i) {
		radv_physical_device_finish(instance->physicalDevices + i);
523
524
525
526
527
528
	}

	VG(VALGRIND_DESTROY_MEMPOOL(instance));

	_mesa_locale_fini();

529
530
	vk_debug_report_instance_destroy(&instance->debug_report_callbacks);

531
	vk_free(&instance->alloc, instance);
532
533
}

534
535
static VkResult
radv_enumerate_devices(struct radv_instance *instance)
536
{
537
538
539
540
541
542
543
	/* TODO: Check for more devices ? */
	drmDevicePtr devices[8];
	VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER;
	int max_devices;

	instance->physicalDeviceCount = 0;

544
	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
545
	if (max_devices < 1)
546
		return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
547
548
549
550

	for (unsigned i = 0; i < (unsigned)max_devices; i++) {
		if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
		    devices[i]->bustype == DRM_BUS_PCI &&
551
		    devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
552

553
554
			result = radv_physical_device_init(instance->physicalDevices +
			                                   instance->physicalDeviceCount,
555
			                                   instance,
556
			                                   devices[i]);
557
558
559
			if (result == VK_SUCCESS)
				++instance->physicalDeviceCount;
			else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
560
				break;
561
562
		}
	}
563
564
	drmFreeDevices(devices, max_devices);

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
	return result;
}

VkResult radv_EnumeratePhysicalDevices(
	VkInstance                                  _instance,
	uint32_t*                                   pPhysicalDeviceCount,
	VkPhysicalDevice*                           pPhysicalDevices)
{
	RADV_FROM_HANDLE(radv_instance, instance, _instance);
	VkResult result;

	if (instance->physicalDeviceCount < 0) {
		result = radv_enumerate_devices(instance);
		if (result != VK_SUCCESS &&
		    result != VK_ERROR_INCOMPATIBLE_DRIVER)
			return result;
	}
582
583
584
585

	if (!pPhysicalDevices) {
		*pPhysicalDeviceCount = instance->physicalDeviceCount;
	} else {
586
587
588
		*pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount);
		for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
			pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i);
589
590
	}

591
592
	return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE
	                                                             : VK_SUCCESS;
593
594
595
596
597
598
599
600
601
602
603
604
605
}

void radv_GetPhysicalDeviceFeatures(
	VkPhysicalDevice                            physicalDevice,
	VkPhysicalDeviceFeatures*                   pFeatures)
{
	memset(pFeatures, 0, sizeof(*pFeatures));

	*pFeatures = (VkPhysicalDeviceFeatures) {
		.robustBufferAccess                       = true,
		.fullDrawIndexUint32                      = true,
		.imageCubeArray                           = true,
		.independentBlend                         = true,
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
606
		.geometryShader                           = true,
607
		.tessellationShader                       = true,
Dave Airlie's avatar
Dave Airlie committed
608
		.sampleRateShading                        = true,
609
610
611
612
613
614
615
616
617
618
619
		.dualSrcBlend                             = true,
		.logicOp                                  = true,
		.multiDrawIndirect                        = true,
		.drawIndirectFirstInstance                = true,
		.depthClamp                               = true,
		.depthBiasClamp                           = true,
		.fillModeNonSolid                         = true,
		.depthBounds                              = true,
		.wideLines                                = true,
		.largePoints                              = true,
		.alphaToOne                               = true,
620
		.multiViewport                            = true,
621
		.samplerAnisotropy                        = true,
622
623
624
625
		.textureCompressionETC2                   = false,
		.textureCompressionASTC_LDR               = false,
		.textureCompressionBC                     = true,
		.occlusionQueryPrecise                    = true,
626
		.pipelineStatisticsQuery                  = true,
627
628
629
		.vertexPipelineStoresAndAtomics           = true,
		.fragmentStoresAndAtomics                 = true,
		.shaderTessellationAndGeometryPointSize   = true,
630
		.shaderImageGatherExtended                = true,
631
		.shaderStorageImageExtendedFormats        = true,
632
633
634
635
636
		.shaderStorageImageMultisample            = false,
		.shaderUniformBufferArrayDynamicIndexing  = true,
		.shaderSampledImageArrayDynamicIndexing   = true,
		.shaderStorageBufferArrayDynamicIndexing  = true,
		.shaderStorageImageArrayDynamicIndexing   = true,
637
		.shaderStorageImageReadWithoutFormat      = true,
638
		.shaderStorageImageWriteWithoutFormat     = true,
639
640
		.shaderClipDistance                       = true,
		.shaderCullDistance                       = true,
641
		.shaderFloat64                            = true,
642
		.shaderInt64                              = true,
643
		.shaderInt16                              = false,
644
		.sparseBinding                            = true,
645
		.variableMultisampleRate                  = true,
646
		.inheritedQueries                         = true,
647
648
649
	};
}

650
651
652
653
void radv_GetPhysicalDeviceFeatures2KHR(
	VkPhysicalDevice                            physicalDevice,
	VkPhysicalDeviceFeatures2KHR               *pFeatures)
{
654
655
656
657
658
659
660
661
	vk_foreach_struct(ext, pFeatures->pNext) {
		switch (ext->sType) {
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
			VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
			features->variablePointersStorageBuffer = true;
			features->variablePointers = false;
			break;
		}
662
663
664
665
666
667
668
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX: {
			VkPhysicalDeviceMultiviewFeaturesKHX *features = (VkPhysicalDeviceMultiviewFeaturesKHX*)ext;
			features->multiview = true;
			features->multiviewGeometryShader = true;
			features->multiviewTessellationShader = true;
			break;
		}
669
670
671
672
		default:
			break;
		}
	}
673
674
675
	return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
}

676
677
678
679
680
681
void radv_GetPhysicalDeviceProperties(
	VkPhysicalDevice                            physicalDevice,
	VkPhysicalDeviceProperties*                 pProperties)
{
	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
	VkSampleCountFlags sample_counts = 0xf;
682
683
684
685
686
687
688
689

	/* make sure that the entire descriptor set is addressable with a signed
	 * 32-bit int. So the sum of all limits scaled by descriptor size has to
	 * be at most 2 GiB. the combined image & samples object count as one of
	 * both. This limit is for the pipeline layout, not for the set layout, but
	 * there is no set limit, so we just set a pipeline limit. I don't think
	 * any app is going to hit this soon. */
	size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) /
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
690
691
	          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
	           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
692
693
694
695
	           32 /* sampler, largest when combined with image */ +
	           64 /* sampled image */ +
	           64 /* storage image */);

696
697
698
699
700
701
702
703
704
705
706
707
708
	VkPhysicalDeviceLimits limits = {
		.maxImageDimension1D                      = (1 << 14),
		.maxImageDimension2D                      = (1 << 14),
		.maxImageDimension3D                      = (1 << 11),
		.maxImageDimensionCube                    = (1 << 14),
		.maxImageArrayLayers                      = (1 << 11),
		.maxTexelBufferElements                   = 128 * 1024 * 1024,
		.maxUniformBufferRange                    = UINT32_MAX,
		.maxStorageBufferRange                    = UINT32_MAX,
		.maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
		.maxMemoryAllocationCount                 = UINT32_MAX,
		.maxSamplerAllocationCount                = 64 * 1024,
		.bufferImageGranularity                   = 64, /* A cache line */
709
		.sparseAddressSpaceSize                   = 0xffffffffu, /* buffer max size */
710
		.maxBoundDescriptorSets                   = MAX_SETS,
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
		.maxPerStageDescriptorSamplers            = max_descriptor_set_size,
		.maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
		.maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
		.maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
		.maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
		.maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
		.maxPerStageResources                     = max_descriptor_set_size,
		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
726
727
728
729
730
		.maxVertexInputAttributes                 = 32,
		.maxVertexInputBindings                   = 32,
		.maxVertexInputAttributeOffset            = 2047,
		.maxVertexInputBindingStride              = 2048,
		.maxVertexOutputComponents                = 128,
731
732
733
734
735
736
737
738
		.maxTessellationGenerationLevel           = 64,
		.maxTessellationPatchSize                 = 32,
		.maxTessellationControlPerVertexInputComponents = 128,
		.maxTessellationControlPerVertexOutputComponents = 128,
		.maxTessellationControlPerPatchOutputComponents = 120,
		.maxTessellationControlTotalOutputComponents = 4096,
		.maxTessellationEvaluationInputComponents = 128,
		.maxTessellationEvaluationOutputComponents = 128,
739
		.maxGeometryShaderInvocations             = 127,
740
741
742
743
744
745
		.maxGeometryInputComponents               = 64,
		.maxGeometryOutputComponents              = 128,
		.maxGeometryOutputVertices                = 256,
		.maxGeometryTotalOutputComponents         = 1024,
		.maxFragmentInputComponents               = 128,
		.maxFragmentOutputAttachments             = 8,
746
		.maxFragmentDualSrcAttachments            = 1,
747
748
749
		.maxFragmentCombinedOutputResources       = 8,
		.maxComputeSharedMemorySize               = 32768,
		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
750
		.maxComputeWorkGroupInvocations           = 2048,
751
		.maxComputeWorkGroupSize = {
752
753
754
			2048,
			2048,
			2048
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
		},
		.subPixelPrecisionBits                    = 4 /* FIXME */,
		.subTexelPrecisionBits                    = 4 /* FIXME */,
		.mipmapPrecisionBits                      = 4 /* FIXME */,
		.maxDrawIndexedIndexValue                 = UINT32_MAX,
		.maxDrawIndirectCount                     = UINT32_MAX,
		.maxSamplerLodBias                        = 16,
		.maxSamplerAnisotropy                     = 16,
		.maxViewports                             = MAX_VIEWPORTS,
		.maxViewportDimensions                    = { (1 << 14), (1 << 14) },
		.viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
		.viewportSubPixelBits                     = 13, /* We take a float? */
		.minMemoryMapAlignment                    = 4096, /* A page */
		.minTexelBufferOffsetAlignment            = 1,
		.minUniformBufferOffsetAlignment          = 4,
		.minStorageBufferOffsetAlignment          = 4,
771
772
773
774
		.minTexelOffset                           = -32,
		.maxTexelOffset                           = 31,
		.minTexelGatherOffset                     = -32,
		.maxTexelGatherOffset                     = 31,
775
776
777
		.minInterpolationOffset                   = -2,
		.maxInterpolationOffset                   = 2,
		.subPixelInterpolationOffsetBits          = 8,
778
779
780
781
782
783
784
785
786
787
788
789
790
791
		.maxFramebufferWidth                      = (1 << 14),
		.maxFramebufferHeight                     = (1 << 14),
		.maxFramebufferLayers                     = (1 << 10),
		.framebufferColorSampleCounts             = sample_counts,
		.framebufferDepthSampleCounts             = sample_counts,
		.framebufferStencilSampleCounts           = sample_counts,
		.framebufferNoAttachmentsSampleCounts     = sample_counts,
		.maxColorAttachments                      = MAX_RTS,
		.sampledImageColorSampleCounts            = sample_counts,
		.sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
		.sampledImageDepthSampleCounts            = sample_counts,
		.sampledImageStencilSampleCounts          = sample_counts,
		.storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
		.maxSampleMaskWords                       = 1,
792
		.timestampComputeAndGraphics              = true,
793
		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
		.maxClipDistances                         = 8,
		.maxCullDistances                         = 8,
		.maxCombinedClipAndCullDistances          = 8,
		.discreteQueuePriorities                  = 1,
		.pointSizeRange                           = { 0.125, 255.875 },
		.lineWidthRange                           = { 0.0, 7.9921875 },
		.pointSizeGranularity                     = (1.0 / 8.0),
		.lineWidthGranularity                     = (1.0 / 128.0),
		.strictLines                              = false, /* FINISHME */
		.standardSampleLocations                  = true,
		.optimalBufferCopyOffsetAlignment         = 128,
		.optimalBufferCopyRowPitchAlignment       = 128,
		.nonCoherentAtomSize                      = 64,
	};

	*pProperties = (VkPhysicalDeviceProperties) {
810
		.apiVersion = radv_physical_device_api_version(pdevice),
811
		.driverVersion = vk_get_driver_version(),
812
		.vendorID = ATI_VENDOR_ID,
813
		.deviceID = pdevice->rad_info.pci_id,
814
		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
815
		.limits = limits,
816
		.sparseProperties = {0},
817
818
819
	};

	strcpy(pProperties->deviceName, pdevice->name);
820
	memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
821
822
}

823
824
825
826
void radv_GetPhysicalDeviceProperties2KHR(
	VkPhysicalDevice                            physicalDevice,
	VkPhysicalDeviceProperties2KHR             *pProperties)
{
827
	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
828
829
830
831
832
833
834
835
836
837
	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);

	vk_foreach_struct(ext, pProperties->pNext) {
		switch (ext->sType) {
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
			VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
				(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
			break;
		}
838
839
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR: {
			VkPhysicalDeviceIDPropertiesKHR *properties = (VkPhysicalDeviceIDPropertiesKHR*)ext;
840
			memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
841
842
843
844
			memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
			properties->deviceLUIDValid = false;
			break;
		}
845
846
847
848
849
850
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX: {
			VkPhysicalDeviceMultiviewPropertiesKHX *properties = (VkPhysicalDeviceMultiviewPropertiesKHX*)ext;
			properties->maxMultiviewViewCount = MAX_VIEWS;
			properties->maxMultiviewInstanceIndex = INT_MAX;
			break;
		}
851
852
853
854
855
856
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES_KHR: {
			VkPhysicalDevicePointClippingPropertiesKHR *properties =
			    (VkPhysicalDevicePointClippingPropertiesKHR*)ext;
			properties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES_KHR;
			break;
		}
857
858
859
860
861
862
		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
			VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
			    (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
			properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
			break;
		}
863
864
865
866
867
868
		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
			VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
			    (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
			properties->minImportedHostPointerAlignment = 4096;
			break;
		}
869
870
871
872
		default:
			break;
		}
	}
873
874
}

875
876
static void radv_get_physical_device_queue_family_properties(
	struct radv_physical_device*                pdevice,
877
	uint32_t*                                   pCount,
878
	VkQueueFamilyProperties**                    pQueueFamilyProperties)
879
{
Dave Airlie's avatar
Dave Airlie committed
880
881
	int num_queue_families = 1;
	int idx;
Nicolai Hähnle's avatar
Nicolai Hähnle committed
882
	if (pdevice->rad_info.num_compute_rings > 0 &&
883
884
	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
		num_queue_families++;
Dave Airlie's avatar
Dave Airlie committed
885

886
	if (pQueueFamilyProperties == NULL) {
Dave Airlie's avatar
Dave Airlie committed
887
		*pCount = num_queue_families;
888
889
		return;
	}
Dave Airlie's avatar
Dave Airlie committed
890
891
892
893
894
895

	if (!*pCount)
		return;

	idx = 0;
	if (*pCount >= 1) {
896
		*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
Dave Airlie's avatar
Dave Airlie committed
897
			.queueFlags = VK_QUEUE_GRAPHICS_BIT |
898
899
900
			              VK_QUEUE_COMPUTE_BIT |
			              VK_QUEUE_TRANSFER_BIT |
			              VK_QUEUE_SPARSE_BINDING_BIT,
Dave Airlie's avatar
Dave Airlie committed
901
902
903
904
905
906
907
			.queueCount = 1,
			.timestampValidBits = 64,
			.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
		};
		idx++;
	}

Nicolai Hähnle's avatar
Nicolai Hähnle committed
908
	if (pdevice->rad_info.num_compute_rings > 0 &&
909
	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
Dave Airlie's avatar
Dave Airlie committed
910
		if (*pCount > idx) {
911
			*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
912
913
914
				.queueFlags = VK_QUEUE_COMPUTE_BIT |
				              VK_QUEUE_TRANSFER_BIT |
				              VK_QUEUE_SPARSE_BINDING_BIT,
Nicolai Hähnle's avatar
Nicolai Hähnle committed
915
				.queueCount = pdevice->rad_info.num_compute_rings,
Dave Airlie's avatar
Dave Airlie committed
916
917
918
919
920
921
				.timestampValidBits = 64,
				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
			};
			idx++;
		}
	}
922
	*pCount = idx;
923
924
}

925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
void radv_GetPhysicalDeviceQueueFamilyProperties(
	VkPhysicalDevice                            physicalDevice,
	uint32_t*                                   pCount,
	VkQueueFamilyProperties*                    pQueueFamilyProperties)
{
	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
	if (!pQueueFamilyProperties) {
		return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
		return;
	}
	VkQueueFamilyProperties *properties[] = {
		pQueueFamilyProperties + 0,
		pQueueFamilyProperties + 1,
		pQueueFamilyProperties + 2,
	};
	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
	assert(*pCount <= 3);
}

944
945
946
947
948
void radv_GetPhysicalDeviceQueueFamilyProperties2KHR(
	VkPhysicalDevice                            physicalDevice,
	uint32_t*                                   pCount,
	VkQueueFamilyProperties2KHR                *pQueueFamilyProperties)
{
949
950
951
952
953
954
955
956
957
958
959
960
	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
	if (!pQueueFamilyProperties) {
		return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
		return;
	}
	VkQueueFamilyProperties *properties[] = {
		&pQueueFamilyProperties[0].queueFamilyProperties,
		&pQueueFamilyProperties[1].queueFamilyProperties,
		&pQueueFamilyProperties[2].queueFamilyProperties,
	};
	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
	assert(*pCount <= 3);
961
962
}

963
964
void radv_GetPhysicalDeviceMemoryProperties(
	VkPhysicalDevice                            physicalDevice,
965
	VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
966
967
968
{
	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);

969
	*pMemoryProperties = physical_device->memory_properties;
970
971
}

972
973
974
975
976
977
978
979
void radv_GetPhysicalDeviceMemoryProperties2KHR(
	VkPhysicalDevice                            physicalDevice,
	VkPhysicalDeviceMemoryProperties2KHR       *pMemoryProperties)
{
	return radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
						      &pMemoryProperties->memoryProperties);
}

980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
VkResult radv_GetMemoryHostPointerPropertiesEXT(
	VkDevice                                    _device,
	VkExternalMemoryHandleTypeFlagBitsKHR       handleType,
	const void                                 *pHostPointer,
	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
{
	RADV_FROM_HANDLE(radv_device, device, _device);

	switch (handleType)
	{
	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
		const struct radv_physical_device *physical_device = device->physical_device;
		uint32_t memoryTypeBits = 0;
		for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
			if (physical_device->mem_type_indices[i] == RADV_MEM_TYPE_GTT_CACHED) {
				memoryTypeBits = (1 << i);
				break;
			}
		}
		pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
		return VK_SUCCESS;
	}
	default:
		return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
	}
}

1007
1008
1009
1010
1011
1012
1013
1014
static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
{
	/* Default to MEDIUM when a specific global priority isn't requested */
	if (!pObj)
		return RADEON_CTX_PRIORITY_MEDIUM;

	switch(pObj->globalPriority) {
1015
	case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
1016
		return RADEON_CTX_PRIORITY_REALTIME;
1017
	case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
1018
		return RADEON_CTX_PRIORITY_HIGH;
1019
	case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
1020
		return RADEON_CTX_PRIORITY_MEDIUM;
1021
	case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
1022
1023
1024
1025
1026
1027
1028
		return RADEON_CTX_PRIORITY_LOW;
	default:
		unreachable("Illegal global priority value");
		return RADEON_CTX_PRIORITY_INVALID;
	}
}

1029
static int
1030
radv_queue_init(struct radv_device *device, struct radv_queue *queue,
1031
		uint32_t queue_family_index, int idx,
1032
		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
1033
1034
1035
{
	queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
	queue->device = device;
1036
1037
	queue->queue_family_index = queue_family_index;
	queue->queue_idx = idx;
1038
	queue->priority = radv_get_queue_global_priority(global_priority);
1039

1040
	queue->hw_ctx = device->ws->ctx_create(device->ws, queue->priority);
1041
	if (!queue->hw_ctx)
1042
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1043
1044

	return VK_SUCCESS;
1045
1046
1047
1048
1049
}

static void
radv_queue_finish(struct radv_queue *queue)
{
1050
1051
	if (queue->hw_ctx)
		queue->device->ws->ctx_destroy(queue->hw_ctx);
1052

1053
1054
	if (queue->initial_full_flush_preamble_cs)
		queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
1055
1056
1057
1058
	if (queue->initial_preamble_cs)
		queue->device->ws->cs_destroy(queue->initial_preamble_cs);
	if (queue->continue_preamble_cs)
		queue->device->ws->cs_destroy(queue->continue_preamble_cs);
1059
1060
1061
1062
	if (queue->descriptor_bo)
		queue->device->ws->buffer_destroy(queue->descriptor_bo);
	if (queue->scratch_bo)
		queue->device->ws->buffer_destroy(queue->scratch_bo);
1063
1064
1065
1066
	if (queue->esgs_ring_bo)
		queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
	if (queue->gsvs_ring_bo)
		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
1067
1068
	if (queue->tess_rings_bo)
		queue->device->ws->buffer_destroy(queue->tess_rings_bo);
1069
1070
	if (queue->compute_scratch_bo)
		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
1071
1072
}

1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
static void
radv_device_init_gs_info(struct radv_device *device)
{
	switch (device->physical_device->rad_info.family) {
	case CHIP_OLAND:
	case CHIP_HAINAN:
	case CHIP_KAVERI:
	case CHIP_KABINI:
	case CHIP_MULLINS:
	case CHIP_ICELAND:
	case CHIP_CARRIZO:
	case CHIP_STONEY:
		device->gs_table_depth = 16;
		return;
	case CHIP_TAHITI:
	case CHIP_PITCAIRN:
	case CHIP_VERDE:
	case CHIP_BONAIRE:
	case CHIP_HAWAII:
	case CHIP_TONGA:
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
Dave Airlie's avatar
Dave Airlie committed
1096
	case CHIP_POLARIS12:
Dave Airlie's avatar
Dave Airlie committed
1097
1098
	case CHIP_VEGA10:
	case CHIP_RAVEN:
1099
1100
1101
1102
1103
1104
1105
		device->gs_table_depth = 32;
		return;
	default:
		unreachable("unknown GPU");
	}
}

1106
1107
1108
1109
1110
1111
1112
1113
1114
static int radv_get_device_extension_index(const char *name)
{
	for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
		if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
			return i;
	}
	return -1;
}

1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
VkResult radv_CreateDevice(
	VkPhysicalDevice                            physicalDevice,
	const VkDeviceCreateInfo*                   pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkDevice*                                   pDevice)
{
	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
	VkResult result;
	struct radv_device *device;

1125
1126
	bool keep_shader_info = false;

1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
	/* Check enabled features */
	if (pCreateInfo->pEnabledFeatures) {
		VkPhysicalDeviceFeatures supported_features;
		radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
		VkBool32 *supported_feature = (VkBool32 *)&supported_features;
		VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
		unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
		for (uint32_t i = 0; i < num_features; i++) {
			if (enabled_feature[i] && !supported_feature[i])
				return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
		}
	}

1140
1141
1142
	device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
			    sizeof(*device), 8,
			    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1143
1144
1145
1146
1147
	if (!device)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
	device->instance = physical_device->instance;
1148
	device->physical_device = physical_device;
1149

1150
1151
1152
1153
1154
1155
	device->ws = physical_device->ws;
	if (pAllocator)
		device->alloc = *pAllocator;
	else
		device->alloc = physical_device->instance->alloc;

1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
		int index = radv_get_device_extension_index(ext_name);
		if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
			vk_free(&device->alloc, device);
			return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
		}

		device->enabled_extensions.extensions[index] = true;
	}

	keep_shader_info = device->enabled_extensions.AMD_shader_info;

1169
1170
1171
	mtx_init(&device->shader_slab_mutex, mtx_plain);
	list_inithead(&device->shader_slabs);

1172
1173
1174
	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
		uint32_t qfi = queue_create->queueFamilyIndex;
1175
1176
		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
1177

1178
1179
		assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);

1180
1181
1182
1183
1184
1185
1186
		device->queues[qfi] = vk_alloc(&device->alloc,
					       queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
		if (!device->queues[qfi]) {
			result = VK_ERROR_OUT_OF_HOST_MEMORY;
			goto fail;
		}

1187
		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
1188

1189
		device->queue_count[qfi] = queue_create->queueCount;
1190

1191
		for (unsigned q = 0; q < queue_create->queueCount; q++) {
1192
			result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, global_priority);
1193
1194
1195
			if (result != VK_SUCCESS)
				goto fail;
		}
1196
1197
	}

1198
1199
1200
1201
1202
1203
	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
	                      (device->instance->perftest_flags & RADV_PERFTEST_BINNING);

	/* Disabled and not implemented for now. */
	device->dfsm_allowed = device->pbb_allowed && false;

1204
1205
1206
#ifdef ANDROID
	device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
#endif
1207

1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
	device->llvm_supports_spill = true;

	/* The maximum number of scratch waves. Scratch space isn't divided
	 * evenly between CUs. The number is only a function of the number of CUs.
	 * We can decrease the constant to decrease the scratch buffer size.
	 *
	 * sctx->scratch_waves must be >= the maximum posible size of
	 * 1 threadgroup, so that the hw doesn't hang from being unable
	 * to start any.
	 *
	 * The recommended value is 4 per CU at most. Higher numbers don't
	 * bring much benefit, but they still occupy chip resources (think
	 * async compute). I've seen ~2% performance difference between 4 and 32.
	 */
	uint32_t max_threads_per_block = 2048;
	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
				     max_threads_per_block / 64);

1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) |
				     S_00B800_FORCE_START_AT_000(1);

	if (device->physical_device->rad_info.chip_class >= CIK) {
		/* If the KMD allows it (there is a KMD hw register for it),
		 * allow launching waves out-of-order.
		 */
		device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
	}

1236
1237
	radv_device_init_gs_info(device);

1238
1239
1240
1241
1242
1243
	device->tess_offchip_block_dw_size =
		device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
	device->has_distributed_tess =
		device->physical_device->rad_info.chip_class >= VI &&
		device->physical_device->rad_info.max_se >= 2;

1244
	if (getenv("RADV_TRACE_FILE")) {
1245
1246
		keep_shader_info = true;

1247
1248
1249
1250
		if (!radv_init_trace(device))
			goto fail;
	}

1251
1252
	device->keep_shader_info = keep_shader_info;

1253
	result = radv_device_init_meta(device);
1254
	if (result != VK_SUCCESS)
1255
		goto fail;
1256
1257

	radv_device_init_msaa(device);
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274

	for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
		device->empty_cs[family] = device->ws->cs_create(device->ws, family);
		switch (family) {
		case RADV_QUEUE_GENERAL:
			radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
			radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1));
			radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1));
			break;
		case RADV_QUEUE_COMPUTE:
			radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
			radeon_emit(device->empty_cs[family], 0);
			break;
		}
		device->ws->cs_finalize(device->empty_cs[family]);
	}

1275
	if (device->physical_device->rad_info.chip_class >= CIK)
1276
1277
		cik_create_gfx_config(device);

1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
	VkPipelineCacheCreateInfo ci;
	ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
	ci.pNext = NULL;
	ci.flags = 0;
	ci.pInitialData = NULL;
	ci.initialDataSize = 0;
	VkPipelineCache pc;
	result = radv_CreatePipelineCache(radv_device_to_handle(device),
					  &ci, NULL, &pc);
	if (result != VK_SUCCESS)
1288
		goto fail_meta;
1289
1290
1291

	device->mem_cache = radv_pipeline_cache_from_handle(pc);

1292
1293
	*pDevice = radv_device_to_handle(device);
	return VK_SUCCESS;
1294

1295
1296
fail_meta:
	radv_device_finish_meta(device);
1297
fail:
1298
1299
1300
	if (device->trace_bo)
		device->ws->buffer_destroy(device->trace_bo);

1301
1302
1303
	if (device->gfx_init)
		device->ws->buffer_destroy(device->gfx_init);

1304
1305
1306
1307
1308
1309
	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
		for (unsigned q = 0; q < device->queue_count[i]; q++)
			radv_queue_finish(&device->queues[i][q]);
		if (device->queue_count[i])
			vk_free(&device->alloc, device->queues[i]);
	}
1310

1311
	vk_free(&device->alloc, device);