Commit a539b00b authored by Boris Brezillon's avatar Boris Brezillon
Browse files

nir: Set proper alignment when splitting var copies



Now that copy_deref are passed alignment information we should take it
into account when splitting var copies. Note that we always pass an
explicit alignment even if the copy is naturally aligned (which is
normally encoded with a 0), but that shouldn't be a problem.

We assume GL or CL size/align for natural alignment calculation, but
this can easily be extended through the addition of a new helper taking
a glsl_type_size_align_func.

The last thing that's worth mentioning is the fact that we get struct
field offsets with glsl_get_struct_field_offset() which returns the
cached offset value. That means we rely on proper offset calculation at
type creation. Another option would be to make
struct_type_get_field_offset() public which can be passed a
glsl_type_size_align_func.
Signed-off-by: Boris Brezillon's avatarBoris Brezillon <boris.brezillon@collabora.com>
parent 34fa01a2
......@@ -27,6 +27,7 @@
#include "nir.h"
#include "nir_builder.h"
#include "nir_deref.h"
/*
* Implements "copy splitting" which is similar to structure splitting only
......@@ -62,32 +63,114 @@
* possibly a few wildcard array dereferences.
*/
static void
get_natural_align_mul_offset(nir_deref_instr *deref,
unsigned *align_mul, unsigned *align_offset,
glsl_type_size_align_func size_align)
{
unsigned size, align;
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
size_align(path.path[0]->type, &size, align_mul);
*align_offset = 0;
for (nir_deref_instr **p = &path.path[1]; *p; p++) {
if ((*p)->deref_type == nir_deref_type_array) {
size_align((*p)->type, &size, &align);
size = ALIGN_POT(size, align);
if (nir_src_is_const((*p)->arr.index)) {
*align_offset += nir_src_as_uint((*p)->arr.index) * size;
} else {
/* If we can't guess the index we have to assume the worst
* alignment, which depends on the base align mul, the current
* offset, and the element size. Offset is reset to 0.
*/
unsigned elem_align_mul = 1 << (ffs(size) - 1);
unsigned base_align_mul =
*align_offset ? 1 << (ffs(*align_offset) - 1) : *align_mul;
*align_mul = MIN3(*align_mul, elem_align_mul, base_align_mul);
*align_offset = 0;
}
} else if ((*p)->deref_type == nir_deref_type_struct) {
/* p starts at path[1], so this is safe */
nir_deref_instr *parent = *(p - 1);
*align_offset += glsl_get_struct_field_offset(parent->type,
(*p)->strct.index);
} else {
unreachable("Unsupported deref type");
}
}
nir_deref_path_finish(&path);
}
static void
split_deref_copy_instr(nir_builder *b,
nir_deref_instr *dst, nir_deref_instr *src,
enum gl_access_qualifier dst_access,
enum gl_access_qualifier src_access)
enum gl_access_qualifier src_access,
unsigned dst_align_mul, unsigned dst_align_offset,
unsigned src_align_mul, unsigned src_align_offset,
glsl_type_size_align_func size_align)
{
assert(glsl_get_bare_type(dst->type) ==
glsl_get_bare_type(src->type));
if (glsl_type_is_vector_or_scalar(src->type)) {
nir_copy_deref_with_access(b, dst, src, dst_access, src_access);
dst_align_offset = dst_align_mul ? dst_align_offset % dst_align_mul : 0;
src_align_offset = src_align_mul ? src_align_offset % src_align_mul : 0;
nir_copy_deref_with_access_and_align(b, dst, src, dst_access, src_access,
dst_align_mul, dst_align_offset,
src_align_mul, src_align_offset);
} else if (glsl_type_is_struct_or_ifc(src->type)) {
for (unsigned i = 0; i < glsl_get_length(src->type); i++) {
unsigned offset = glsl_get_struct_field_offset(src->type, i);
dst_align_offset += offset;
src_align_offset += offset;
split_deref_copy_instr(b, nir_build_deref_struct(b, dst, i),
nir_build_deref_struct(b, src, i),
dst_access, src_access);
dst_access, src_access,
dst_align_mul, dst_align_offset,
src_align_mul, src_align_offset,
size_align);
}
} else {
assert(glsl_type_is_matrix(src->type) || glsl_type_is_array(src->type));
const struct glsl_type *elem_type = glsl_get_array_element(src->type);
unsigned elem_size, elem_align;
size_align(elem_type, &elem_size, &elem_align);
elem_size = ALIGN_POT(elem_size, elem_align);
/* Reset the offset when crossing an array, and adjust the mul
* accordingly.
*/
unsigned elem_align_mul = 1 << (ffs(elem_size) - 1);
unsigned src_array_align_mul =
src_align_offset ? 1 << (ffs(src_align_offset) - 1) : src_align_mul;
unsigned dst_array_align_mul =
dst_align_offset ? 1 << (ffs(dst_align_offset) - 1) : dst_align_mul;
dst_align_mul = MIN3(dst_align_mul, elem_align_mul, dst_array_align_mul);
src_align_mul = MIN3(src_align_mul, elem_align_mul, src_array_align_mul);
dst_align_offset = 0;
src_align_offset = 0;
split_deref_copy_instr(b, nir_build_deref_array_wildcard(b, dst),
nir_build_deref_array_wildcard(b, src),
dst_access, src_access);
dst_access, src_access,
dst_align_mul, dst_align_offset,
src_align_mul, src_align_offset,
size_align);
}
}
static bool
split_var_copies_impl(nir_function_impl *impl)
split_var_copies_impl(nir_function_impl *impl,
glsl_type_size_align_func size_align)
{
bool progress = false;
......@@ -109,9 +192,26 @@ split_var_copies_impl(nir_function_impl *impl)
nir_instr_as_deref(copy->src[0].ssa->parent_instr);
nir_deref_instr *src =
nir_instr_as_deref(copy->src[1].ssa->parent_instr);
unsigned dst_align_mul = nir_intrinsic_dst_align_mul(copy);
unsigned dst_align_offset = nir_intrinsic_dst_align_offset(copy);
unsigned src_align_mul = nir_intrinsic_src_align_mul(copy);
unsigned src_align_offset = nir_intrinsic_src_align_offset(copy);
if (!dst_align_mul)
get_natural_align_mul_offset(src, &dst_align_mul,
&dst_align_offset, size_align);
if (!src_align_mul)
get_natural_align_mul_offset(src, &src_align_mul,
&src_align_offset, size_align);
split_deref_copy_instr(&b, dst, src,
nir_intrinsic_dst_access(copy),
nir_intrinsic_src_access(copy));
nir_intrinsic_src_access(copy),
dst_align_mul, dst_align_offset,
src_align_mul, src_align_offset,
size_align);
progress = true;
}
......@@ -132,11 +232,17 @@ split_var_copies_impl(nir_function_impl *impl)
bool
nir_split_var_copies(nir_shader *shader)
{
glsl_type_size_align_func size_align =
shader->info.stage == MESA_SHADER_KERNEL ?
glsl_get_cl_type_size_align :
glsl_get_natural_size_align_bytes;
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
progress = split_var_copies_impl(function->impl) || progress;
progress =
split_var_copies_impl(function->impl, size_align) || progress;
}
return progress;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment