Commit 09f1de97 authored by Jason Ekstrand's avatar Jason Ekstrand

anv,i965: Lower away image derefs in the driver

Previously, the back-end compiler turn image access into magic uniform
reads and there was a complex contract between back-end compiler and
driver about setting up and filling out those params.  As of this
commit, both drivers now lower image_deref_load_param_intel intrinsics
to load_uniform intrinsics controlled by the driver and lower the other
image_deref_* intrinsics to image_* intrinsics which take an actual
binding table index.  There are still "magic" uniforms but they are now
added and controlled entirely by the driver and that contract no longer
spans components.

This also has the side-effect of making most image use compile-time
binding table indices.  Previously, all image access pulled the binding
table index from a uniform.  Part of the reason for this was that the
magic uniforms made it difficult to decouple binding table indices from
the uniforms and, since they are indexed completely differently
(especially in Vulkan), it was hard to pull them apart.  Now that the
driver is handling both, it's trivial to decouple the two and provide
actual binding table indices.

Shader-db results on Kaby Lake:

    total instructions in shared programs: 15166872 -> 15164293 (-0.02%)
    instructions in affected programs: 115834 -> 113255 (-2.23%)
    helped: 191
    HURT: 0

    total cycles in shared programs: 571311495 -> 571196465 (-0.02%)
    cycles in affected programs: 4757115 -> 4642085 (-2.42%)
    helped: 73
    HURT: 67

    total spills in shared programs: 10951 -> 10926 (-0.23%)
    spills in affected programs: 742 -> 717 (-3.37%)
    helped: 7
    HURT: 0

    total fills in shared programs: 22226 -> 22201 (-0.11%)
    fills in affected programs: 1146 -> 1121 (-2.18%)
    helped: 7
    HURT: 0
Reviewed-by: Kenneth Graunke (AFK until mid-April)'s avatarKenneth Graunke <kenneth@whitecape.org>
parent 0de003be
......@@ -331,9 +331,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
# variable. The const index specifies which of the six parameters to load.
intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0,
indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0,
flags=[CAN_ELIMINATE])
intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0])
image("load_raw_intel", src_comp=[1], dest_comp=0,
flags=[CAN_ELIMINATE])
image("store_raw_intel", src_comp=[1, 0])
# Vulkan descriptor set intrinsics
#
......
......@@ -494,16 +494,14 @@ type_size_scalar(const struct glsl_type *type)
}
return size;
case GLSL_TYPE_SAMPLER:
/* Samplers take up no register space, since they're baked in at
* link time.
*/
return 0;
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_IMAGE:
/* Samplers, atomics, and images take up no register space, since
* they're baked in at link time.
*/
return 0;
case GLSL_TYPE_SUBROUTINE:
return 1;
case GLSL_TYPE_IMAGE:
return BRW_IMAGE_PARAM_SIZE;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
......
......@@ -216,6 +216,8 @@ public:
nir_intrinsic_instr *instr);
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
......@@ -235,7 +237,6 @@ public:
fs_reg get_nir_src(const nir_src &src);
fs_reg get_nir_src_imm(const nir_src &src);
fs_reg get_nir_dest(const nir_dest &dest);
fs_reg get_nir_image_deref(nir_deref_instr *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
......
This diff is collapsed.
......@@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_image_load_store(nir_shader *nir,
const struct gen_device_info *devinfo);
void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
nir_ssa_def *index);
nir_shader *brw_postprocess_nir(nir_shader *nir,
const struct brw_compiler *compiler,
......@@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
struct gl_program *prog,
struct brw_stage_prog_data *stage_prog_data);
void brw_nir_lower_glsl_images(nir_shader *shader,
const struct gl_program *prog);
void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
nir_shader *nir,
const struct brw_vs_prog_key *vs_key,
......
......@@ -811,3 +811,44 @@ brw_nir_lower_image_load_store(nir_shader *shader,
return progress;
}
void
brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
nir_ssa_def *index)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
switch (intrin->intrinsic) {
#define CASE(op) \
case nir_intrinsic_image_deref_##op: \
intrin->intrinsic = nir_intrinsic_image_##op; \
break;
CASE(load)
CASE(store)
CASE(atomic_add)
CASE(atomic_min)
CASE(atomic_max)
CASE(atomic_and)
CASE(atomic_or)
CASE(atomic_xor)
CASE(atomic_exchange)
CASE(atomic_comp_swap)
CASE(atomic_fadd)
CASE(size)
CASE(samples)
CASE(load_raw_intel)
CASE(store_raw_intel)
#undef CASE
default:
unreachable("Unhanded image intrinsic");
}
nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
nir_intrinsic_set_access(intrin, var->data.image.access);
nir_intrinsic_set_format(intrin, var->data.image.format);
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
nir_src_for_ssa(index));
}
......@@ -24,6 +24,7 @@
#include "anv_nir.h"
#include "program/prog_parameter.h"
#include "nir/nir_builder.h"
#include "compiler/brw_nir.h"
struct apply_pipeline_layout_state {
nir_shader *shader;
......@@ -32,6 +33,8 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
unsigned first_image_uniform;
bool uses_constants;
uint8_t constants_offset;
struct {
......@@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block,
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_load_param_intel:
case nir_intrinsic_image_deref_load_raw_intel:
case nir_intrinsic_image_deref_store_raw_intel:
add_deref_src_binding(state, intrin->src[0]);
break;
......@@ -178,6 +184,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
nir_instr_remove(&intrin->instr);
}
static void
lower_image_intrinsic(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
unsigned array_size =
state->layout->set[set].layout->binding[binding].array_size;
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *index = NULL;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
index = nir_ssa_for_src(b, deref->arr.index, 1);
if (state->add_bounds_checks)
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
} else {
index = nir_imm_int(b, 0);
}
if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
b->cursor = nir_instr_remove(&intrin->instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
nir_intrinsic_set_base(load, state->first_image_uniform +
state->set[set].image_offsets[binding] *
BRW_IMAGE_PARAM_SIZE * 4);
nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
const unsigned param = nir_intrinsic_base(intrin);
nir_ssa_def *offset =
nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
load->src[0] = nir_src_for_ssa(offset);
load->num_components = intrin->dest.ssa.num_components;
nir_ssa_dest_init(&load->instr, &load->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
} else {
unsigned binding_offset = state->set[set].surface_offsets[binding];
index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
brw_nir_rewrite_image_intrinsic(intrin, index);
}
}
static void
lower_load_constant(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
......@@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block,
case nir_intrinsic_vulkan_resource_reindex:
lower_res_reindex_intrinsic(intrin, state);
break;
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_min:
case nir_intrinsic_image_deref_atomic_max:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_load_param_intel:
case nir_intrinsic_image_deref_load_raw_intel:
case nir_intrinsic_image_deref_store_raw_intel:
lower_image_intrinsic(intrin, state);
break;
case nir_intrinsic_load_constant:
lower_load_constant(intrin, state);
break;
......@@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
}
}
unsigned image_uniform;
if (map->image_count > 0) {
assert(map->image_count <= MAX_IMAGES);
assert(shader->num_uniforms == prog_data->nr_params * 4);
state.first_image_uniform = shader->num_uniforms;
uint32_t *param = brw_stage_prog_data_add_params(prog_data,
map->image_count *
BRW_IMAGE_PARAM_SIZE);
struct anv_push_constants *null_data = NULL;
const struct brw_image_param *image_param = null_data->images;
for (uint32_t i = 0; i < map->image_count; i++) {
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
(uintptr_t)&image_param->surface_idx, 1);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
(uintptr_t)image_param->offset, 2);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
(uintptr_t)image_param->size, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
(uintptr_t)image_param->stride, 4);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
(uintptr_t)image_param->tiling, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
(uintptr_t)image_param->swizzling, 2);
param += BRW_IMAGE_PARAM_SIZE;
image_param ++;
}
assert(param == prog_data->param + prog_data->nr_params);
shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
assert(shader->num_uniforms == prog_data->nr_params * 4);
}
nir_foreach_variable(var, &shader->uniforms) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);
......@@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_metadata_dominance);
}
if (map->image_count > 0) {
assert(map->image_count <= MAX_IMAGES);
nir_foreach_variable(var, &shader->uniforms) {
if (glsl_type_is_image(var->type) ||
(glsl_type_is_array(var->type) &&
glsl_type_is_image(glsl_get_array_element(var->type)))) {
/* Images are represented as uniform push constants and the actual
* information required for reading/writing to/from the image is
* storred in the uniform.
*/
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
unsigned image_index = state.set[set].image_offsets[binding];
var->data.driver_location = shader->num_uniforms +
image_index * BRW_IMAGE_PARAM_SIZE * 4;
}
}
uint32_t *param = brw_stage_prog_data_add_params(prog_data,
map->image_count *
BRW_IMAGE_PARAM_SIZE);
struct anv_push_constants *null_data = NULL;
const struct brw_image_param *image_param = null_data->images;
for (uint32_t i = 0; i < map->image_count; i++) {
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
(uintptr_t)&image_param->surface_idx, 1);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
(uintptr_t)image_param->offset, 2);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
(uintptr_t)image_param->size, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
(uintptr_t)image_param->stride, 4);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
(uintptr_t)image_param->tiling, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
(uintptr_t)image_param->swizzling, 2);
param += BRW_IMAGE_PARAM_SIZE;
image_param ++;
}
assert(param == prog_data->param + prog_data->nr_params);
shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
}
ralloc_free(mem_ctx);
}
......@@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
pipeline->needs_data_cache = true;
NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
if (layout) {
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
......@@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
if (nir->info.stage != MESA_SHADER_COMPUTE)
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
assert(nir->num_uniforms == prog_data->nr_params * 4);
stage->nir = nir;
......
......@@ -23,6 +23,7 @@
#include "compiler/brw_nir.h"
#include "compiler/glsl/ir_uniform.h"
#include "compiler/nir/nir_builder.h"
#include "brw_program.h"
static void
......@@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
}
}
static nir_ssa_def *
get_aoa_deref_offset(nir_builder *b,
nir_deref_instr *deref,
unsigned elem_size)
{
unsigned array_size = elem_size;
nir_ssa_def *offset = nir_imm_int(b, 0);
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
/* This level's element size is the previous level's array size */
nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
assert(deref->arr.index.ssa);
offset = nir_iadd(b, offset,
nir_imul(b, index, nir_imm_int(b, array_size)));
deref = nir_deref_instr_parent(deref);
assert(glsl_type_is_array(deref->type));
array_size *= glsl_get_length(deref->type);
}
/* Accessing an invalid surface index with the dataport can result in a
* hang. According to the spec "if the index used to select an individual
* element is negative or greater than or equal to the size of the array,
* the results of the operation are undefined but may not lead to
* termination" -- which is one of the possible outcomes of the hang.
* Clamp the index to prevent access outside of the array bounds.
*/
return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
}
void
brw_nir_lower_glsl_images(nir_shader *shader,
const struct gl_program *prog)
{
/* We put image uniforms at the end */
nir_foreach_variable(var, &shader->uniforms) {
if (!var->type->contains_image())
continue;
/* GL Only allows arrays of arrays of images */
assert(var->type->without_array()->is_image());
const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
var->data.driver_location = shader->num_uniforms;
shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
}
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_min:
case nir_intrinsic_image_deref_atomic_max:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
case nir_intrinsic_image_deref_load_raw_intel:
case nir_intrinsic_image_deref_store_raw_intel: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
const unsigned num_images =
MAX2(1, var->type->arrays_of_arrays_size());
struct gl_uniform_storage *storage =
&prog->sh.data->UniformStorage[var->data.location];
const unsigned image_var_idx =
storage->opaque[shader->info.stage].index;
b.cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
get_aoa_deref_offset(&b, deref, 1));
brw_nir_rewrite_image_intrinsic(intrin, index);
break;
}
case nir_intrinsic_image_deref_load_param_intel: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
const unsigned num_images =
MAX2(1, var->type->arrays_of_arrays_size());
b.cursor = nir_instr_remove(&intrin->instr);
const unsigned param = nir_intrinsic_base(intrin);
nir_ssa_def *offset =
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b.shader,
nir_intrinsic_load_uniform);
nir_intrinsic_set_base(load, var->data.driver_location);
nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
load->src[0] = nir_src_for_ssa(offset);
load->num_components = intrin->dest.ssa.num_components;
nir_ssa_dest_init(&load->instr, &load->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(&b, &load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
break;
}
default:
break;
}
}
}
}
......@@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw,
}
NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog);
return nir;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment