Commit a70a9987 authored by Daniel Schürmann's avatar Daniel Schürmann

radv/aco: Setup alternate path in RADV to support the experimental ACO compiler

LLVM remains default and ACO can be enabled with RADV_PERFTEST=aco.
Co-authored-by: Daniel Schürmann's avatarDaniel Schürmann <daniel@schuermann.dev>
Co-authored-by: Rhys Perry's avatarRhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset's avatarSamuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent 93c8ebfa
......@@ -49,6 +49,9 @@ static void ac_init_llvm_target()
/* For inline assembly. */
LLVMInitializeAMDGPUAsmParser();
/* For ACO disassembly. */
LLVMInitializeAMDGPUDisassembler();
/* Workaround for bug in llvm 4.0 that causes image intrinsics
* to disappear.
* https://reviews.llvm.org/D26348
......
......@@ -22,6 +22,7 @@ inc_amd = include_directories('.')
subdir('addrlib')
subdir('common')
subdir('compiler')
if with_amd_vk
subdir('vulkan')
endif
......@@ -156,7 +156,7 @@ libvulkan_radeon = shared_library(
],
dependencies : [
dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
dep_valgrind, radv_deps,
dep_valgrind, radv_deps, idep_aco,
idep_mesautil, idep_nir, idep_vulkan_util, idep_amdgfxregs_h, idep_xmlconfig,
],
c_args : [c_vis_args, no_override_init_args, radv_flags],
......
......@@ -2844,6 +2844,10 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
break;
case VK_ACCESS_SHADER_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
/* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
* invalidate the scalar cache. */
if (cmd_buffer->device->physical_device->use_aco)
flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_L2;
......
......@@ -58,18 +58,19 @@ enum {
};
enum {
RADV_PERFTEST_NO_BATCHCHAIN = 0x1,
RADV_PERFTEST_SISCHED = 0x2,
RADV_PERFTEST_LOCAL_BOS = 0x4,
RADV_PERFTEST_OUT_OF_ORDER = 0x8,
RADV_PERFTEST_DCC_MSAA = 0x10,
RADV_PERFTEST_BO_LIST = 0x20,
RADV_PERFTEST_SHADER_BALLOT = 0x40,
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
RADV_PERFTEST_CS_WAVE_32 = 0x100,
RADV_PERFTEST_PS_WAVE_32 = 0x200,
RADV_PERFTEST_GE_WAVE_32 = 0x400,
RADV_PERFTEST_DFSM = 0x800,
RADV_PERFTEST_NO_BATCHCHAIN = 0x1,
RADV_PERFTEST_SISCHED = 0x2,
RADV_PERFTEST_LOCAL_BOS = 0x4,
RADV_PERFTEST_OUT_OF_ORDER = 0x8,
RADV_PERFTEST_DCC_MSAA = 0x10,
RADV_PERFTEST_BO_LIST = 0x20,
RADV_PERFTEST_SHADER_BALLOT = 0x40,
RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
RADV_PERFTEST_CS_WAVE_32 = 0x100,
RADV_PERFTEST_PS_WAVE_32 = 0x200,
RADV_PERFTEST_GE_WAVE_32 = 0x400,
RADV_PERFTEST_DFSM = 0x800,
RADV_PERFTEST_ACO = 0x1000,
};
bool
......
......@@ -86,41 +86,41 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid)
}
static void
radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
radv_get_device_name(enum radeon_family family, char *name, size_t name_len, bool aco)
{
const char *chip_string;
switch (family) {
case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
default: chip_string = "AMD RADV unknown"; break;
}
snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
case CHIP_TAHITI: chip_string = "TAHITI"; break;
case CHIP_PITCAIRN: chip_string = "PITCAIRN"; break;
case CHIP_VERDE: chip_string = "CAPE VERDE"; break;
case CHIP_OLAND: chip_string = "OLAND"; break;
case CHIP_HAINAN: chip_string = "HAINAN"; break;
case CHIP_BONAIRE: chip_string = "BONAIRE"; break;
case CHIP_KAVERI: chip_string = "KAVERI"; break;
case CHIP_KABINI: chip_string = "KABINI"; break;
case CHIP_HAWAII: chip_string = "HAWAII"; break;
case CHIP_TONGA: chip_string = "TONGA"; break;
case CHIP_ICELAND: chip_string = "ICELAND"; break;
case CHIP_CARRIZO: chip_string = "CARRIZO"; break;
case CHIP_FIJI: chip_string = "FIJI"; break;
case CHIP_POLARIS10: chip_string = "POLARIS10"; break;
case CHIP_POLARIS11: chip_string = "POLARIS11"; break;
case CHIP_POLARIS12: chip_string = "POLARIS12"; break;
case CHIP_STONEY: chip_string = "STONEY"; break;
case CHIP_VEGAM: chip_string = "VEGA M"; break;
case CHIP_VEGA10: chip_string = "VEGA10"; break;
case CHIP_VEGA12: chip_string = "VEGA12"; break;
case CHIP_VEGA20: chip_string = "VEGA20"; break;
case CHIP_RAVEN: chip_string = "RAVEN"; break;
case CHIP_RAVEN2: chip_string = "RAVEN2"; break;
case CHIP_NAVI10: chip_string = "NAVI10"; break;
case CHIP_NAVI12: chip_string = "NAVI12"; break;
case CHIP_NAVI14: chip_string = "NAVI14"; break;
default: chip_string = "unknown"; break;
}
snprintf(name, name_len, "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", aco ? "/ACO" : "", chip_string);
}
static uint64_t
......@@ -327,7 +327,14 @@ radv_physical_device_init(struct radv_physical_device *device,
radv_handle_env_var_force_family(device);
radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
if ((device->rad_info.chip_class < GFX8 ||
device->rad_info.chip_class > GFX9) && device->use_aco) {
fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n");
device->use_aco = false;
}
radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name), device->use_aco);
if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
device->ws->destroy(device->ws);
......@@ -339,7 +346,8 @@ radv_physical_device_init(struct radv_physical_device *device,
/* These flags affect shader compilation. */
uint64_t shader_env_flags =
(device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0) |
(device->use_aco ? 0x4 : 0);
/* The gpu id is already embedded in the uuid so we just pass "radv"
* when creating the cache.
......@@ -362,9 +370,10 @@ radv_physical_device_init(struct radv_physical_device *device,
(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
(device->use_aco || device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT);
device->use_ngg_streamout = false;
device->use_aco = device->instance->perftest_flags & RADV_PERFTEST_ACO;
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
......@@ -500,6 +509,7 @@ static const struct debug_control radv_perftest_options[] = {
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
{"dfsm", RADV_PERFTEST_DFSM},
{"aco", RADV_PERFTEST_ACO},
{NULL, 0}
};
......@@ -622,6 +632,8 @@ VkResult radv_CreateInstance(
instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
radv_perftest_options);
if (instance->perftest_flags & RADV_PERFTEST_ACO)
fprintf(stderr, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n");
if (instance->debug_flags & RADV_DEBUG_STARTUP)
radv_logi("Created an instance");
......@@ -832,7 +844,7 @@ void radv_GetPhysicalDeviceFeatures(
.shaderCullDistance = true,
.shaderFloat64 = true,
.shaderInt64 = true,
.shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
.shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && !pdevice->use_aco,
.sparseBinding = true,
.variableMultisampleRate = true,
.inheritedQueries = true,
......@@ -874,7 +886,7 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
bool enabled = pdevice->rad_info.chip_class >= GFX8;
bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
......@@ -968,7 +980,7 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
VkPhysicalDevice8BitStorageFeaturesKHR *features =
(VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
bool enabled = pdevice->rad_info.chip_class >= GFX8;
bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
features->storageBuffer8BitAccess = enabled;
features->uniformAndStorageBuffer8BitAccess = enabled;
features->storagePushConstant8 = enabled;
......@@ -977,8 +989,8 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
(VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
features->shaderInt8 = true;
features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
features->shaderInt8 = !pdevice->use_aco;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
......
......@@ -51,7 +51,7 @@ class Extension:
# and dEQP-VK.api.info.device fail due to the duplicated strings.
EXTENSIONS = [
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
Extension('VK_KHR_16bit_storage', 1, True),
Extension('VK_KHR_16bit_storage', 1, '!device->use_aco'),
Extension('VK_KHR_bind_memory2', 1, True),
Extension('VK_KHR_create_renderpass2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
......@@ -87,7 +87,7 @@ EXTENSIONS = [
Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
Extension('VK_KHR_shader_atomic_int64', 1, 'LLVM_VERSION_MAJOR >= 9'),
Extension('VK_KHR_shader_draw_parameters', 1, True),
Extension('VK_KHR_shader_float16_int8', 1, True),
Extension('VK_KHR_shader_float16_int8', 1, '!device->use_aco'),
Extension('VK_KHR_storage_buffer_storage_class', 1, True),
Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'),
Extension('VK_KHR_surface_protected_capabilities', 1, 'RADV_HAS_SURFACE'),
......@@ -99,7 +99,7 @@ EXTENSIONS = [
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_multiview', 1, True),
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'),
Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8 && !device->use_aco'),
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_buffer_device_address', 1, True),
......@@ -138,8 +138,8 @@ EXTENSIONS = [
Extension('VK_AMD_buffer_marker', 1, True),
Extension('VK_AMD_draw_indirect_count', 1, True),
Extension('VK_AMD_gcn_shader', 1, True),
Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX9'),
Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX9'),
Extension('VK_AMD_gpu_shader_half_float', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
Extension('VK_AMD_gpu_shader_int16', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.has_out_of_order_rast'),
Extension('VK_AMD_shader_ballot', 1, 'device->use_shader_ballot'),
Extension('VK_AMD_shader_core_properties', 1, True),
......
......@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
if (device->physical_device->ge_wave_size == 32)
hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
if (device->physical_device->use_aco)
hash_flags |= RADV_HASH_SHADER_ACO;
return hash_flags;
}
......@@ -2551,6 +2553,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
(cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
}
static
bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
{
return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
stage == MESA_SHADER_FRAGMENT ||
stage == MESA_SHADER_COMPUTE;
}
static
void radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
......@@ -2613,6 +2623,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
bool has_gs = modules[MESA_SHADER_GEOMETRY];
bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
bool use_aco = device->physical_device->use_aco;
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
......@@ -2621,10 +2635,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[i]);
bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
flags, pipeline->layout);
flags, pipeline->layout, aco);
/* We don't want to alter meta shaders IR directly so clone it
* first.
......@@ -2651,7 +2666,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
nir_lower_non_uniform_ssbo_access |
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
if (!aco)
NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
}
if (radv_can_dump_shader(device, modules[i], false))
......@@ -2690,11 +2708,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT,
infos + MESA_SHADER_FRAGMENT,
keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
keep_executable_info, aco,
&binaries[MESA_SHADER_FRAGMENT]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
}
......@@ -2725,7 +2745,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
pipeline->layout,
&key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
&binaries[MESA_SHADER_TESS_CTRL]);
false, &binaries[MESA_SHADER_TESS_CTRL]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
}
......@@ -2744,7 +2764,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout,
&keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
&binaries[MESA_SHADER_GEOMETRY]);
false, &binaries[MESA_SHADER_GEOMETRY]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
}
......@@ -2763,10 +2783,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[i]);
bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
pipeline->layout,
keys + i, infos + i,keep_executable_info,
&binaries[i]);
aco, &binaries[i]);
radv_stop_feedback(stage_feedbacks[i], false);
}
......
......@@ -296,6 +296,9 @@ struct radv_physical_device {
uint8_t cs_wave_size;
uint8_t ge_wave_size;
/* Whether to use the experimental compiler backend */
bool use_aco;
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
......@@ -1421,6 +1424,7 @@ struct radv_shader_module;
#define RADV_HASH_SHADER_CS_WAVE32 (1 << 4)
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 5)
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 6)
#define RADV_HASH_SHADER_ACO (1 << 7)
void
radv_hash_shaders(unsigned char *hash,
......
......@@ -48,9 +48,11 @@
#include "util/debug.h"
#include "ac_exp_param.h"
#include "aco_interface.h"
#include "util/string_buffer.h"
static const struct nir_shader_compiler_options nir_options = {
static const struct nir_shader_compiler_options nir_options_llvm = {
.vertex_id_zero_based = true,
.lower_scmp = true,
.lower_flrp16 = true,
......@@ -80,6 +82,36 @@ static const struct nir_shader_compiler_options nir_options = {
.use_interpolated_input_intrinsics = true,
};
static const struct nir_shader_compiler_options nir_options_aco = {
.vertex_id_zero_based = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_device_index_to_zero = true,
.lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_bitfield_extract = true,
.lower_sub = true, /* TODO: set this to false once !1236 is merged */
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_ffma = true,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
};
bool
radv_can_dump_shader(struct radv_device *device,
struct radv_shader_module *module,
......@@ -257,15 +289,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout)
const struct radv_pipeline_layout *layout,
bool use_aco)
{
nir_shader *nir;
const nir_shader_compiler_options *nir_options = use_aco ? &nir_options_aco :
&nir_options_llvm;
if (module->nir) {
/* Some things such as our meta clear/blit code will give us a NIR
* shader directly. In that case, we just ignore the SPIR-V entirely
* and just use the NIR shader */
nir = module->nir;
nir->options = &nir_options;
nir->options = nir_options;
nir_validate_shader(nir, "in internal shader");
assert(exec_list_length(&nir->functions) == 1);
......@@ -305,13 +340,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
.descriptor_indexing = true,
.device_group = true,
.draw_parameters = true,
.float16 = true,
.float16 = !device->physical_device->use_aco,
.float64 = true,
.geometry_streams = true,
.image_read_without_format = true,
.image_write_without_format = true,
.int8 = true,
.int16 = true,
.int8 = !device->physical_device->use_aco,
.int16 = !device->physical_device->use_aco,
.int64 = true,
.int64_atomics = true,
.multiview = true,
......@@ -320,8 +355,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
.runtime_descriptor_array = true,
.shader_viewport_index_layer = true,
.stencil_export = true,
.storage_8bit = true,
.storage_16bit = true,
.storage_8bit = !device->physical_device->use_aco,
.storage_16bit = !device->physical_device->use_aco,
.storage_image_ms = true,
.subgroup_arithmetic = true,
.subgroup_ballot = true,
......@@ -343,7 +378,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir = spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
stage, entrypoint_name,
&spirv_options, &nir_options);
&spirv_options, nir_options);
assert(nir->info.stage == stage);
nir_validate_shader(nir, "after spirv_to_nir");
......@@ -383,6 +418,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
if (nir->info.stage == MESA_SHADER_FRAGMENT && use_aco)
NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
if (nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(nir, nir_lower_input_attachments, true);
......@@ -961,7 +998,7 @@ radv_shader_variant_create(struct radv_device *device,
assert(binary->type == RADV_BINARY_TYPE_LEGACY);
config = ((struct radv_shader_binary_legacy *)binary)->config;
variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
variant->exec_size = variant->code_size;
variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
}
variant->info = binary->info;
......@@ -1049,13 +1086,12 @@ shader_variant_compile(struct radv_device *device,
struct radv_nir_compiler_options *options,
bool gs_copy_shader,
bool keep_shader_info,
bool use_aco,
struct radv_shader_binary **binary_out)
{
enum radeon_family chip_family = device->physical_device->rad_info.family;
enum ac_target_machine_options tm_options = 0;
struct ac_llvm_compiler ac_llvm;
struct radv_shader_binary *binary = NULL;
bool thread_compiler;
bool init_llvm;
options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class;
......@@ -1079,32 +1115,48 @@ shader_variant_compile(struct radv_device *device,
else
options->wave_size = device->physical_device->ge_wave_size;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
tm_options |= AC_TM_CHECK_IR;
if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
tm_options |= AC_TM_NO_LOAD_STORE_OPT;
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
ac_init_llvm_once();
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options,
options->wave_size);
if (gs_copy_shader) {
assert(shader_count == 1);
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
info, options);
init_llvm = !use_aco || options->dump_shader;
#ifndef NDEBUG
init_llvm |= options->record_llvm_ir;
#endif
if (init_llvm)
ac_init_llvm_once();
if (use_aco) {
aco_compile_shader(shader_count, shaders, &binary, info, options);
binary->info = *info;
} else {
radv_compile_nir_shader(&ac_llvm, &binary, info,
shaders, shader_count, options);
}
binary->info = *info;
enum ac_target_machine_options tm_options = 0;
struct ac_llvm_compiler ac_llvm;
bool thread_compiler;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
if (options->check_ir)
tm_options |= AC_TM_CHECK_IR;
if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
tm_options |= AC_TM_NO_LOAD_STORE_OPT;
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options,
options->wave_size);
if (gs_copy_shader) {
assert(shader_count == 1);
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
info, options);
} else {
radv_compile_nir_shader(&ac_llvm, &binary, info,
shaders, shader_count, options);
}
radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
binary->info = *info;
radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
}
struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
keep_shader_info);
......@@ -1143,6 +1195,7 @@ radv_shader_variant_compile(struct radv_device *device,
const struct radv_shader_variant_key *key,
struct radv_shader_info *info,
bool keep_shader_info,
bool use_aco,
struct radv_shader_binary **binary_out)
{
struct radv_nir_compiler_options options = {0};
......@@ -1156,7 +1209,7 @@ radv_shader_variant_compile(struct radv_device *device,
options.robust_buffer_access = device->robust_buffer_access;
return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
&options, false, keep_shader_info, binary_out);
&options, false, keep_shader_info, use_aco, binary_out);
}
struct radv_shader_variant *
......@@ -1172,7 +1225,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
options.key.has_multiview_view_index = multiview;
return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
info, &options, true, keep_shader_info, binary_out);
info, &options, true, keep_shader_info, false, binary_out);
}
void
......
......@@ -333,6 +333,7 @@ struct radv_shader_binary_legacy {
struct radv_shader_binary base;
struct ac_shader_config config;
unsigned code_size;
unsigned exec_size;
unsigned llvm_ir_size;
unsigned disasm_size;
......@@ -390,7 +391,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout);
const struct radv_pipeline_layout *layout,
bool use_aco);
void *
radv_alloc_shader_memory(struct radv_device *device,
......@@ -412,6 +414,7 @@ radv_shader_variant_compile(struct radv_device *device,
const struct radv_shader_variant_key *key,
struct radv_shader_info *info,
bool keep_shader_info,
bool use_aco,
struct radv_shader_binary **binary_out);
struct radv_shader_variant *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment