Commit 8921e776 authored by Jesse Natalie's avatar Jesse Natalie Committed by Erik Faye-Lund

microsoft/clc: Deal with alignment on derefs instead of intrinsics

parent c8604be4
......@@ -945,9 +945,8 @@ get_cast_type(unsigned bit_size)
}
static void
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin)
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
unsigned alignment = nir_intrinsic_align(intrin);
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
......@@ -956,13 +955,14 @@ split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin)
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->mode, cast_type, alignment);
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
for (unsigned i = 0; i < num_loads; ++i) {
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
srcs[i] = nir_load_deref_with_access_and_align(b, elem, access, alignment, 0);
srcs[i] = nir_load_deref_with_access(b, elem, access);
}
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
......@@ -971,9 +971,8 @@ split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin)
}
static void
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin)
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
{
unsigned alignment = nir_intrinsic_align(intrin);
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
assert(intrin->src[1].is_ssa);
......@@ -984,6 +983,7 @@ split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin)
b->cursor = nir_before_instr(&intrin->instr);
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->mode, cast_type, alignment);
......@@ -991,7 +991,7 @@ split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin)
for (unsigned i = 0; i < num_stores; ++i) {
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
nir_store_deref_with_access_and_align(b, elem, substore_val, ~0, access, alignment, 0);
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
}
nir_instr_remove(&intrin->instr);
......@@ -1017,15 +1017,21 @@ split_unaligned_loads_stores(nir_shader *shader)
if (intrin->intrinsic != nir_intrinsic_load_deref &&
intrin->intrinsic != nir_intrinsic_store_deref)
continue;
unsigned alignment = nir_intrinsic_align(intrin);
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
/* Alignment = 0 means naturally aligned. We can load anything at
* 4-byte alignment, except for UBOs (AKA CBs where the granularity
* is 16 bytes.
unsigned align_mul = 0, align_offset = 0;
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
if (align_mul == 0) {
DebugBreak();
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
}
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
/* We can load anything at 4-byte alignment, except for
* UBOs (AKA CBs where the granularity is 16 bytes).
*/
if (alignment == 0 ||
alignment >= (deref->mode == nir_var_mem_ubo ? 16 : 4))
if (alignment >= (deref->mode == nir_var_mem_ubo ? 16 : 4))
continue;
nir_ssa_def *val;
......@@ -1045,9 +1051,9 @@ split_unaligned_loads_stores(nir_shader *shader)
continue;
if (intrin->intrinsic == nir_intrinsic_load_deref)
split_unaligned_load(&b, intrin);
split_unaligned_load(&b, intrin, alignment);
else
split_unaligned_store(&b, intrin);
split_unaligned_store(&b, intrin, alignment);
progress = true;
}
}
......@@ -1447,7 +1453,6 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_alu);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, split_unaligned_loads_stores);
// Needs to come before lower_explicit_io
struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };
......@@ -1457,17 +1462,19 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
int_sampler_states, NULL, 14.0f);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp);
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_function_temp | nir_var_shader_in | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id);
NIR_PASS_V(nir, clc_lower_global_to_ssbo);
NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp);
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
nir_var_mem_shared | nir_var_function_temp,
glsl_get_cl_type_size_align);
NIR_PASS_V(nir, split_unaligned_loads_stores);
assert(nir->info.cs.ptr_size == 64);
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
......
......@@ -196,10 +196,11 @@ lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
nir_u2u(b, intr->src[0].ssa, 32));
nir_deref_instr *deref = nir_build_deref_cast(b, ptr, nir_var_mem_ubo, type,
bit_size / 8);
deref->cast.align_mul = nir_intrinsic_align_mul(intr);
deref->cast.align_offset = nir_intrinsic_align_offset(intr);
nir_ssa_def *result =
nir_load_deref_with_access_and_align(b, deref, (enum gl_access_qualifier)0,
nir_intrinsic_align_mul(intr),
nir_intrinsic_align_offset(intr));
nir_load_deref(b, deref);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(result));
nir_instr_remove(&intr->instr);
return true;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment