Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Erik Faye-Lund
mesa
Commits
8921e776
Commit
8921e776
authored
Sep 17, 2020
by
Jesse Natalie
Committed by
Erik Faye-Lund
Oct 28, 2020
Browse files
microsoft/clc: Deal with alignment on derefs instead of intrinsics
parent
c8604be4
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/microsoft/clc/clc_compiler.c
View file @
8921e776
...
...
@@ -945,9 +945,8 @@ get_cast_type(unsigned bit_size)
}
static
void
split_unaligned_load
(
nir_builder
*
b
,
nir_intrinsic_instr
*
intrin
)
split_unaligned_load
(
nir_builder
*
b
,
nir_intrinsic_instr
*
intrin
,
unsigned
alignment
)
{
unsigned
alignment
=
nir_intrinsic_align
(
intrin
);
enum
gl_access_qualifier
access
=
nir_intrinsic_access
(
intrin
);
nir_ssa_def
*
srcs
[
NIR_MAX_VEC_COMPONENTS
*
NIR_MAX_VEC_COMPONENTS
*
sizeof
(
int64_t
)
/
8
];
unsigned
comp_size
=
intrin
->
dest
.
ssa
.
bit_size
/
8
;
...
...
@@ -956,13 +955,14 @@ split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin)
b
->
cursor
=
nir_before_instr
(
&
intrin
->
instr
);
nir_deref_instr
*
ptr
=
nir_src_as_deref
(
intrin
->
src
[
0
]);
const
struct
glsl_type
*
cast_type
=
get_cast_type
(
alignment
*
8
);
nir_deref_instr
*
cast
=
nir_build_deref_cast
(
b
,
&
ptr
->
dest
.
ssa
,
ptr
->
mode
,
cast_type
,
alignment
);
unsigned
num_loads
=
DIV_ROUND_UP
(
comp_size
*
num_comps
,
alignment
);
for
(
unsigned
i
=
0
;
i
<
num_loads
;
++
i
)
{
nir_deref_instr
*
elem
=
nir_build_deref_ptr_as_array
(
b
,
cast
,
nir_imm_intN_t
(
b
,
i
,
cast
->
dest
.
ssa
.
bit_size
));
srcs
[
i
]
=
nir_load_deref_with_access
_and_align
(
b
,
elem
,
access
,
alignment
,
0
);
srcs
[
i
]
=
nir_load_deref_with_access
(
b
,
elem
,
access
);
}
nir_ssa_def
*
new_dest
=
nir_extract_bits
(
b
,
srcs
,
num_loads
,
0
,
num_comps
,
intrin
->
dest
.
ssa
.
bit_size
);
...
...
@@ -971,9 +971,8 @@ split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin)
}
static
void
split_unaligned_store
(
nir_builder
*
b
,
nir_intrinsic_instr
*
intrin
)
split_unaligned_store
(
nir_builder
*
b
,
nir_intrinsic_instr
*
intrin
,
unsigned
alignment
)
{
unsigned
alignment
=
nir_intrinsic_align
(
intrin
);
enum
gl_access_qualifier
access
=
nir_intrinsic_access
(
intrin
);
assert
(
intrin
->
src
[
1
].
is_ssa
);
...
...
@@ -984,6 +983,7 @@ split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin)
b
->
cursor
=
nir_before_instr
(
&
intrin
->
instr
);
nir_deref_instr
*
ptr
=
nir_src_as_deref
(
intrin
->
src
[
0
]);
const
struct
glsl_type
*
cast_type
=
get_cast_type
(
alignment
*
8
);
nir_deref_instr
*
cast
=
nir_build_deref_cast
(
b
,
&
ptr
->
dest
.
ssa
,
ptr
->
mode
,
cast_type
,
alignment
);
...
...
@@ -991,7 +991,7 @@ split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin)
for
(
unsigned
i
=
0
;
i
<
num_stores
;
++
i
)
{
nir_ssa_def
*
substore_val
=
nir_extract_bits
(
b
,
&
value
,
1
,
i
*
alignment
*
8
,
1
,
alignment
*
8
);
nir_deref_instr
*
elem
=
nir_build_deref_ptr_as_array
(
b
,
cast
,
nir_imm_intN_t
(
b
,
i
,
cast
->
dest
.
ssa
.
bit_size
));
nir_store_deref_with_access
_and_align
(
b
,
elem
,
substore_val
,
~
0
,
access
,
alignment
,
0
);
nir_store_deref_with_access
(
b
,
elem
,
substore_val
,
~
0
,
access
);
}
nir_instr_remove
(
&
intrin
->
instr
);
...
...
@@ -1017,15 +1017,21 @@ split_unaligned_loads_stores(nir_shader *shader)
if
(
intrin
->
intrinsic
!=
nir_intrinsic_load_deref
&&
intrin
->
intrinsic
!=
nir_intrinsic_store_deref
)
continue
;
unsigned
alignment
=
nir_intrinsic_align
(
intrin
);
nir_deref_instr
*
deref
=
nir_src_as_deref
(
intrin
->
src
[
0
]);
/* Alignment = 0 means naturally aligned. We can load anything at
* 4-byte alignment, except for UBOs (AKA CBs where the granularity
* is 16 bytes.
unsigned
align_mul
=
0
,
align_offset
=
0
;
nir_get_explicit_deref_align
(
deref
,
true
,
&
align_mul
,
&
align_offset
);
if
(
align_mul
==
0
)
{
DebugBreak
();
nir_get_explicit_deref_align
(
deref
,
true
,
&
align_mul
,
&
align_offset
);
}
unsigned
alignment
=
align_offset
?
1
<<
(
ffs
(
align_offset
)
-
1
)
:
align_mul
;
/* We can load anything at 4-byte alignment, except for
* UBOs (AKA CBs where the granularity is 16 bytes).
*/
if
(
alignment
==
0
||
alignment
>=
(
deref
->
mode
==
nir_var_mem_ubo
?
16
:
4
))
if
(
alignment
>=
(
deref
->
mode
==
nir_var_mem_ubo
?
16
:
4
))
continue
;
nir_ssa_def
*
val
;
...
...
@@ -1045,9 +1051,9 @@ split_unaligned_loads_stores(nir_shader *shader)
continue
;
if
(
intrin
->
intrinsic
==
nir_intrinsic_load_deref
)
split_unaligned_load
(
&
b
,
intrin
);
split_unaligned_load
(
&
b
,
intrin
,
alignment
);
else
split_unaligned_store
(
&
b
,
intrin
);
split_unaligned_store
(
&
b
,
intrin
,
alignment
);
progress
=
true
;
}
}
...
...
@@ -1447,7 +1453,6 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V
(
nir
,
nir_lower_vars_to_ssa
);
NIR_PASS_V
(
nir
,
nir_lower_alu
);
NIR_PASS_V
(
nir
,
nir_opt_dce
);
NIR_PASS_V
(
nir
,
split_unaligned_loads_stores
);
// Needs to come before lower_explicit_io
struct
clc_image_lower_context
image_lower_context
=
{
metadata
,
&
srv_id
,
&
uav_id
};
...
...
@@ -1457,17 +1462,19 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V
(
nir
,
dxil_lower_sample_to_txf_for_integer_tex
,
int_sampler_states
,
NULL
,
14
.
0
f
);
NIR_PASS_V
(
nir
,
nir_remove_dead_variables
,
nir_var_mem_shared
|
nir_var_function_temp
);
assert
(
nir
->
scratch_size
==
0
);
NIR_PASS_V
(
nir
,
nir_lower_vars_to_explicit_types
,
nir_var_mem_shared
|
nir_var_function_temp
|
nir_var_shader_in
|
nir_var_mem_global
|
nir_var_mem_constant
,
glsl_get_cl_type_size_align
);
NIR_PASS_V
(
nir
,
dxil_nir_lower_ubo_to_temp
);
NIR_PASS_V
(
nir
,
clc_lower_constant_to_ssbo
,
dxil
->
kernel
,
&
uav_id
);
NIR_PASS_V
(
nir
,
clc_lower_global_to_ssbo
);
NIR_PASS_V
(
nir
,
dxil_nir_lower_deref_ssbo
);
NIR_PASS_V
(
nir
,
nir_remove_dead_variables
,
nir_var_mem_shared
|
nir_var_function_temp
);
assert
(
nir
->
scratch_size
==
0
);
NIR_PASS_V
(
nir
,
nir_lower_vars_to_explicit_types
,
nir_var_mem_shared
|
nir_var_function_temp
,
glsl_get_cl_type_size_align
);
NIR_PASS_V
(
nir
,
split_unaligned_loads_stores
);
assert
(
nir
->
info
.
cs
.
ptr_size
==
64
);
NIR_PASS_V
(
nir
,
nir_lower_explicit_io
,
nir_var_mem_ssbo
,
...
...
src/microsoft/clc/clc_nir.c
View file @
8921e776
...
...
@@ -196,10 +196,11 @@ lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *intr,
nir_u2u
(
b
,
intr
->
src
[
0
].
ssa
,
32
));
nir_deref_instr
*
deref
=
nir_build_deref_cast
(
b
,
ptr
,
nir_var_mem_ubo
,
type
,
bit_size
/
8
);
deref
->
cast
.
align_mul
=
nir_intrinsic_align_mul
(
intr
);
deref
->
cast
.
align_offset
=
nir_intrinsic_align_offset
(
intr
);
nir_ssa_def
*
result
=
nir_load_deref_with_access_and_align
(
b
,
deref
,
(
enum
gl_access_qualifier
)
0
,
nir_intrinsic_align_mul
(
intr
),
nir_intrinsic_align_offset
(
intr
));
nir_load_deref
(
b
,
deref
);
nir_ssa_def_rewrite_uses
(
&
intr
->
dest
.
ssa
,
nir_src_for_ssa
(
result
));
nir_instr_remove
(
&
intr
->
instr
);
return
true
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment