Commit 81e51b41 authored by Jason Ekstrand's avatar Jason Ekstrand

nir: Make nir_constant a vector rather than a matrix

Most places in NIR, we treat matrices like arrays.  The one annoying
exception to this has been nir_constant where a matrix is a first-class
thing.  This commit changes that so a matrix nir_constant is the same as
an array nir_constant.  This makes matrix nir_constants a tiny bit more
expensive but shrinks all others by 96B.
Reviewed-by: Karol Herbst's avatarKarol Herbst <kherbst@redhat.com>
parent b019fe8a
......@@ -121,29 +121,34 @@ copy_constant_to_storage(union gl_constant_value *storage,
unsigned dmul = glsl_base_type_is_64bit(base_type) ? 2 : 1;
int i = 0;
for (unsigned int column = 0; column < n_columns; column++) {
if (n_columns > 0) {
const struct glsl_type *column_type = glsl_get_column_type(type);
for (unsigned int column = 0; column < n_columns; column++) {
copy_constant_to_storage(&storage[i], val->elements[column],
column_type, boolean_true);
i += n_rows * dmul;
}
} else {
for (unsigned int row = 0; row < n_rows; row++) {
switch (base_type) {
case GLSL_TYPE_UINT:
storage[i].u = val->values[column][row].u32;
storage[i].u = val->values[row].u32;
break;
case GLSL_TYPE_INT:
case GLSL_TYPE_SAMPLER:
storage[i].i = val->values[column][row].i32;
storage[i].i = val->values[row].i32;
break;
case GLSL_TYPE_FLOAT:
storage[i].f = val->values[column][row].f32;
storage[i].f = val->values[row].f32;
break;
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
/* XXX need to check on big-endian */
memcpy(&storage[i * 2].u,
&val->values[column][row].f64,
sizeof(double));
memcpy(&storage[i * 2].u, &val->values[row].f64, sizeof(double));
break;
case GLSL_TYPE_BOOL:
storage[i].b = val->values[column][row].u32 ? boolean_true : 0;
storage[i].b = val->values[row].u32 ? boolean_true : 0;
break;
case GLSL_TYPE_ARRAY:
case GLSL_TYPE_STRUCT:
......
......@@ -307,7 +307,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0][r].u32 = ir->value.u[r];
ret->values[r].u32 = ir->value.u[r];
break;
......@@ -316,21 +316,49 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0][r].i32 = ir->value.i[r];
ret->values[r].i32 = ir->value.i[r];
break;
case GLSL_TYPE_FLOAT:
for (unsigned c = 0; c < cols; c++) {
for (unsigned r = 0; r < rows; r++)
ret->values[c][r].f32 = ir->value.f[c * rows + r];
}
break;
case GLSL_TYPE_DOUBLE:
for (unsigned c = 0; c < cols; c++) {
for (unsigned r = 0; r < rows; r++)
ret->values[c][r].f64 = ir->value.d[c * rows + r];
if (cols > 1) {
ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);
ret->num_elements = cols;
for (unsigned c = 0; c < cols; c++) {
nir_constant *col_const = rzalloc(mem_ctx, nir_constant);
col_const->num_elements = 0;
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
for (unsigned r = 0; r < rows; r++)
col_const->values[r].f32 = ir->value.f[c * rows + r];
break;
case GLSL_TYPE_DOUBLE:
for (unsigned r = 0; r < rows; r++)
col_const->values[r].f64 = ir->value.d[c * rows + r];
break;
default:
unreachable("Cannot get here from the first level switch");
}
ret->elements[c] = col_const;
}
} else {
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
for (unsigned r = 0; r < rows; r++)
ret->values[r].f32 = ir->value.f[r];
break;
case GLSL_TYPE_DOUBLE:
for (unsigned r = 0; r < rows; r++)
ret->values[r].f64 = ir->value.d[r];
break;
default:
unreachable("Cannot get here from the first level switch");
}
}
break;
......@@ -339,7 +367,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0][r].u64 = ir->value.u64[r];
ret->values[r].u64 = ir->value.u64[r];
break;
case GLSL_TYPE_INT64:
......@@ -347,7 +375,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0][r].i64 = ir->value.i64[r];
ret->values[r].i64 = ir->value.i64[r];
break;
case GLSL_TYPE_BOOL:
......@@ -355,7 +383,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
assert(cols == 1);
for (unsigned r = 0; r < rows; r++)
ret->values[0][r].b = ir->value.b[r];
ret->values[r].b = ir->value.b[r];
break;
......
......@@ -148,7 +148,7 @@ typedef struct nir_constant {
* by the type associated with the \c nir_variable. Constants may be
* scalars, vectors, or matrices.
*/
nir_const_value values[NIR_MAX_MATRIX_COLUMNS][NIR_MAX_VEC_COMPONENTS];
nir_const_value values[NIR_MAX_VEC_COMPONENTS];
/* we could get this from the var->type but makes clone *much* easier to
* not have to care about the type.
......
......@@ -32,21 +32,9 @@ build_constant_load(nir_builder *b, nir_deref_instr *deref, nir_constant *c)
nir_load_const_instr_create(b->shader,
glsl_get_vector_elements(deref->type),
glsl_get_bit_size(deref->type));
memcpy(load->value, c->values[0], sizeof(*load->value) * load->def.num_components);
memcpy(load->value, c->values, sizeof(*load->value) * load->def.num_components);
nir_builder_instr_insert(b, &load->instr);
nir_store_deref(b, deref, &load->def, ~0);
} else if (glsl_type_is_matrix(deref->type)) {
unsigned cols = glsl_get_matrix_columns(deref->type);
unsigned rows = glsl_get_vector_elements(deref->type);
unsigned bit_size = glsl_get_bit_size(deref->type);
for (unsigned i = 0; i < cols; i++) {
nir_load_const_instr *load =
nir_load_const_instr_create(b->shader, rows, bit_size);
memcpy(load->value, c->values[i], sizeof(*load->value) * load->def.num_components);
nir_builder_instr_insert(b, &load->instr);
nir_store_deref(b, nir_build_deref_array_imm(b, deref, i),
&load->def, ~0);
}
} else if (glsl_type_is_struct_or_ifc(deref->type)) {
unsigned len = glsl_get_length(deref->type);
for (unsigned i = 0; i < len; i++) {
......@@ -54,7 +42,8 @@ build_constant_load(nir_builder *b, nir_deref_instr *deref, nir_constant *c)
c->elements[i]);
}
} else {
assert(glsl_type_is_array(deref->type));
assert(glsl_type_is_array(deref->type) ||
glsl_type_is_matrix(deref->type));
unsigned len = glsl_get_length(deref->type);
for (unsigned i = 0; i < len; i++) {
build_constant_load(b,
......
......@@ -291,7 +291,7 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
FILE *fp = state->fp;
const unsigned rows = glsl_get_vector_elements(type);
const unsigned cols = glsl_get_matrix_columns(type);
unsigned i, j;
unsigned i;
switch (glsl_get_base_type(type)) {
case GLSL_TYPE_BOOL:
......@@ -300,7 +300,7 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "%s", c->values[0][i].b ? "true" : "false");
fprintf(fp, "%s", c->values[i].b ? "true" : "false");
}
break;
......@@ -311,7 +311,7 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "0x%02x", c->values[0][i].u8);
fprintf(fp, "0x%02x", c->values[i].u8);
}
break;
......@@ -322,7 +322,7 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "0x%04x", c->values[0][i].u16);
fprintf(fp, "0x%04x", c->values[i].u16);
}
break;
......@@ -333,33 +333,43 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "0x%08x", c->values[0][i].u32);
fprintf(fp, "0x%08x", c->values[i].u32);
}
break;
case GLSL_TYPE_FLOAT16:
for (i = 0; i < cols; i++) {
for (j = 0; j < rows; j++) {
if (i + j > 0) fprintf(fp, ", ");
fprintf(fp, "%f", _mesa_half_to_float(c->values[i][j].u16));
}
}
break;
case GLSL_TYPE_FLOAT:
for (i = 0; i < cols; i++) {
for (j = 0; j < rows; j++) {
if (i + j > 0) fprintf(fp, ", ");
fprintf(fp, "%f", c->values[i][j].f32);
}
}
break;
case GLSL_TYPE_DOUBLE:
for (i = 0; i < cols; i++) {
for (j = 0; j < rows; j++) {
if (i + j > 0) fprintf(fp, ", ");
fprintf(fp, "%f", c->values[i][j].f64);
if (cols > 1) {
for (i = 0; i < cols; i++) {
if (i > 0) fprintf(fp, ", ");
print_constant(c->elements[i], glsl_get_column_type(type), state);
}
} else {
switch (glsl_get_base_type(type)) {
case GLSL_TYPE_FLOAT16:
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "%f", _mesa_half_to_float(c->values[i].u16));
}
break;
case GLSL_TYPE_FLOAT:
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "%f", c->values[i].f32);
}
break;
case GLSL_TYPE_DOUBLE:
for (i = 0; i < rows; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "%f", c->values[i].f64);
}
break;
default:
unreachable("Cannot get here from the first level switch");
}
}
break;
......@@ -371,7 +381,7 @@ print_constant(nir_constant *c, const struct glsl_type *type, print_state *state
for (i = 0; i < cols; i++) {
if (i > 0) fprintf(fp, ", ");
fprintf(fp, "0x%08" PRIx64, c->values[0][i].u64);
fprintf(fp, "0x%08" PRIx64, c->values[i].u64);
}
break;
......
......@@ -236,31 +236,19 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
nir_load_const_instr *load =
nir_load_const_instr_create(b->shader, num_components, bit_size);
memcpy(load->value, constant->values[0],
memcpy(load->value, constant->values,
sizeof(nir_const_value) * load->def.num_components);
nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
val->def = &load->def;
} else {
assert(glsl_type_is_matrix(type));
unsigned rows = glsl_get_vector_elements(val->type);
unsigned columns = glsl_get_matrix_columns(val->type);
val->elems = ralloc_array(b, struct vtn_ssa_value *, columns);
for (unsigned i = 0; i < columns; i++) {
struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value);
col_val->type = glsl_get_column_type(val->type);
nir_load_const_instr *load =
nir_load_const_instr_create(b->shader, rows, bit_size);
memcpy(load->value, constant->values[i],
sizeof(nir_const_value) * load->def.num_components);
nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
col_val->def = &load->def;
val->elems[i] = col_val;
}
const struct glsl_type *column_type = glsl_get_column_type(val->type);
for (unsigned i = 0; i < columns; i++)
val->elems[i] = vtn_const_ssa_value(b, constant->elements[i],
column_type);
}
break;
}
......@@ -1542,7 +1530,7 @@ vtn_null_constant(struct vtn_builder *b, struct vtn_type *type)
nir_address_format addr_format = vtn_mode_to_address_format(b, mode);
const nir_const_value *null_value = nir_address_format_null_value(addr_format);
memcpy(c->values[0], null_value,
memcpy(c->values, null_value,
sizeof(nir_const_value) * nir_address_format_num_components(addr_format));
break;
}
......@@ -1662,7 +1650,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
opcode == SpvOpSpecConstantFalse)
int_val = get_specialization(b, val, int_val);
val->constant->values[0][0].b = int_val != 0;
val->constant->values[0].b = int_val != 0;
break;
}
......@@ -1673,16 +1661,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
int bit_size = glsl_get_bit_size(val->type->type);
switch (bit_size) {
case 64:
val->constant->values[0][0].u64 = vtn_u64_literal(&w[3]);
val->constant->values[0].u64 = vtn_u64_literal(&w[3]);
break;
case 32:
val->constant->values[0][0].u32 = w[3];
val->constant->values[0].u32 = w[3];
break;
case 16:
val->constant->values[0][0].u16 = w[3];
val->constant->values[0].u16 = w[3];
break;
case 8:
val->constant->values[0][0].u8 = w[3];
val->constant->values[0].u8 = w[3];
break;
default:
vtn_fail("Unsupported SpvOpConstant bit size: %u", bit_size);
......@@ -1697,17 +1685,17 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
int bit_size = glsl_get_bit_size(val->type->type);
switch (bit_size) {
case 64:
val->constant->values[0][0].u64 =
val->constant->values[0].u64 =
get_specialization64(b, val, vtn_u64_literal(&w[3]));
break;
case 32:
val->constant->values[0][0].u32 = get_specialization(b, val, w[3]);
val->constant->values[0].u32 = get_specialization(b, val, w[3]);
break;
case 16:
val->constant->values[0][0].u16 = get_specialization(b, val, w[3]);
val->constant->values[0].u16 = get_specialization(b, val, w[3]);
break;
case 8:
val->constant->values[0][0].u8 = get_specialization(b, val, w[3]);
val->constant->values[0].u8 = get_specialization(b, val, w[3]);
break;
default:
vtn_fail("Unsupported SpvOpSpecConstant bit size");
......@@ -1741,20 +1729,11 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
case vtn_base_type_vector: {
assert(glsl_type_is_vector(val->type->type));
for (unsigned i = 0; i < elem_count; i++)
val->constant->values[0][i] = elems[i]->values[0][0];
val->constant->values[i] = elems[i]->values[0];
break;
}
case vtn_base_type_matrix:
assert(glsl_type_is_matrix(val->type->type));
for (unsigned i = 0; i < elem_count; i++) {
unsigned components =
glsl_get_components(glsl_get_column_type(val->type->type));
memcpy(val->constant->values[i], elems[i]->values,
sizeof(nir_const_value) * components);
}
break;
case vtn_base_type_struct:
case vtn_base_type_array:
ralloc_steal(val->constant, elems);
......@@ -1798,11 +1777,11 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
if (v0->value_type == vtn_value_type_constant) {
for (unsigned i = 0; i < len0; i++)
combined[i] = v0->constant->values[0][i];
combined[i] = v0->constant->values[i];
}
if (v1->value_type == vtn_value_type_constant) {
for (unsigned i = 0; i < len1; i++)
combined[len0 + i] = v1->constant->values[0][i];
combined[len0 + i] = v1->constant->values[i];
}
for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
......@@ -1811,12 +1790,12 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
/* If component is not used, set the value to a known constant
* to detect if it is wrongly used.
*/
val->constant->values[0][j] = undef;
val->constant->values[j] = undef;
} else {
vtn_fail_if(comp >= len0 + len1,
"All Component literals must either be FFFFFFFF "
"or in [0, N - 1] (inclusive).");
val->constant->values[0][j] = combined[comp];
val->constant->values[j] = combined[comp];
}
}
break;
......@@ -1840,7 +1819,6 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
}
int elem = -1;
int col = 0;
const struct vtn_type *type = comp->type;
for (unsigned i = deref_start; i < count; i++) {
vtn_fail_if(w[i] > type->length,
......@@ -1855,12 +1833,6 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
break;
case vtn_base_type_matrix:
assert(col == 0 && elem == -1);
col = w[i];
elem = 0;
type = type->array_element;
break;
case vtn_base_type_array:
c = &(*c)->elements[w[i]];
type = type->array_element;
......@@ -1883,7 +1855,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
} else {
unsigned num_components = type->length;
for (unsigned i = 0; i < num_components; i++)
val->constant->values[0][i] = (*c)->values[col][elem + i];
val->constant->values[i] = (*c)->values[elem + i];
}
} else {
struct vtn_value *insert =
......@@ -1894,7 +1866,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
} else {
unsigned num_components = type->length;
for (unsigned i = 0; i < num_components; i++)
(*c)->values[col][elem + i] = insert->constant->values[0][i];
(*c)->values[elem + i] = insert->constant->values[i];
}
}
break;
......@@ -1946,7 +1918,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
unsigned j = swap ? 1 - i : i;
for (unsigned c = 0; c < src_comps; c++)
src[j][c] = src_val->constant->values[0][c];
src[j][c] = src_val->constant->values[c];
}
/* fix up fixed size sources */
......@@ -1972,7 +1944,7 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
nir_const_value *srcs[3] = {
src[0], src[1], src[2],
};
nir_eval_const_opcode(op, val->constant->values[0], num_components, bit_size, srcs);
nir_eval_const_opcode(op, val->constant->values, num_components, bit_size, srcs);
break;
} /* default */
}
......@@ -2376,7 +2348,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
unsigned bit_size = glsl_get_bit_size(vec_type->type);
for (uint32_t i = 0; i < 4; i++) {
const nir_const_value *cvec =
gather_offsets->constant->elements[i]->values[0];
gather_offsets->constant->elements[i]->values;
for (uint32_t j = 0; j < 2; j++) {
switch (bit_size) {
case 8: instr->tg4_offsets[i][j] = cvec[j].i8; break;
......@@ -4746,7 +4718,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
glsl_vector_type(GLSL_TYPE_UINT, 3));
nir_const_value *const_size =
b->workgroup_size_builtin->constant->values[0];
b->workgroup_size_builtin->constant->values;
b->shader->info.cs.local_size[0] = const_size[0].u32;
b->shader->info.cs.local_size[1] = const_size[1].u32;
......
......@@ -97,17 +97,17 @@ vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode
if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) {
struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
unsigned mask = val->constant->values[0][0].u32 |
val->constant->values[0][1].u32 << 2 |
val->constant->values[0][2].u32 << 4 |
val->constant->values[0][3].u32 << 6;
unsigned mask = val->constant->values[0].u32 |
val->constant->values[1].u32 << 2 |
val->constant->values[2].u32 << 4 |
val->constant->values[3].u32 << 6;
nir_intrinsic_set_swizzle_mask(intrin, mask);
} else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) {
struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant);
unsigned mask = val->constant->values[0][0].u32 |
val->constant->values[0][1].u32 << 5 |
val->constant->values[0][2].u32 << 10;
unsigned mask = val->constant->values[0].u32 |
val->constant->values[1].u32 << 5 |
val->constant->values[2].u32 << 10;
nir_intrinsic_set_swizzle_mask(intrin, mask);
}
......
......@@ -708,10 +708,10 @@ vtn_constant_uint(struct vtn_builder *b, uint32_t value_id)
"Expected id %u to be an integer constant", value_id);
switch (glsl_get_bit_size(val->type->type)) {
case 8: return val->constant->values[0][0].u8;
case 16: return val->constant->values[0][0].u16;
case 32: return val->constant->values[0][0].u32;
case 64: return val->constant->values[0][0].u64;
case 8: return val->constant->values[0].u8;
case 16: return val->constant->values[0].u16;
case 32: return val->constant->values[0].u32;
case 64: return val->constant->values[0].u64;
default: unreachable("Invalid bit size");
}
}
......@@ -726,10 +726,10 @@ vtn_constant_int(struct vtn_builder *b, uint32_t value_id)
"Expected id %u to be an integer constant", value_id);
switch (glsl_get_bit_size(val->type->type)) {
case 8: return val->constant->values[0][0].i8;
case 16: return val->constant->values[0][0].i16;
case 32: return val->constant->values[0][0].i32;
case 64: return val->constant->values[0][0].i64;
case 8: return val->constant->values[0].i8;
case 16: return val->constant->values[0].i16;
case 32: return val->constant->values[0].i32;
case 64: return val->constant->values[0].i64;
default: unreachable("Invalid bit size");
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment