Commit 8be7de22 authored by Samuel Iglesias Gonsálvez's avatar Samuel Iglesias Gonsálvez Committed by Emil Velikov

i965/vec4: load dvec3/4 uniforms first in the push constant buffer

Reorder the uniforms to load first the dvec4-aligned variables in the
push constant buffer and then push the vec4-aligned ones. It takes
into account that the relocated uniforms should be aligned to their
channel size.

This fixes a bug were the dvec3/4 might be loaded one part on a GRF and
the rest in next GRF, so the region parameters to read that could break
the HW rules.

v2:
- Fix broken logic.
- Add a comment to explain what should be needed to optimise the usage
  of the push constant buffer slots, as this patch does not pack the
  uniforms.

v3:
- Implemented the push constant buffer usage optimization.
Signed-off-by: Samuel Iglesias Gonsálvez's avatarSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Cc: "17.1" <mesa-stable@lists.freedesktop.org>
Acked-by: Francisco Jerez's avatarFrancisco Jerez <currojerez@riseup.net>
(cherry picked from commit e69e5c70)
parent b7923353
......@@ -578,16 +578,46 @@ vec4_visitor::split_uniform_registers()
}
}
/* This function returns the register number where we placed the uniform */
static int
set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
const int src, const int size, const int channel_size,
int *new_loc, int *new_chan,
int *new_chans_used)
{
int dst;
/* Find the lowest place we can slot this uniform in. */
for (dst = 0; dst < nr_uniforms; dst++) {
if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
break;
}
assert(dst < nr_uniforms);
new_loc[src] = dst;
new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
*new_uniform_count = MAX2(*new_uniform_count, dst + 1);
return dst;
}
void
vec4_visitor::pack_uniform_registers()
{
uint8_t chans_used[this->uniforms];
int new_loc[this->uniforms];
int new_chan[this->uniforms];
bool is_aligned_to_dvec4[this->uniforms];
int new_chans_used[this->uniforms];
int channel_sizes[this->uniforms];
memset(chans_used, 0, sizeof(chans_used));
memset(new_loc, 0, sizeof(new_loc));
memset(new_chan, 0, sizeof(new_chan));
memset(new_chans_used, 0, sizeof(new_chans_used));
memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
memset(channel_sizes, 0, sizeof(channel_sizes));
/* Find which uniform vectors are actually used by the program. We
* expect unused vector elements when we've moved array access out
......@@ -617,7 +647,7 @@ vec4_visitor::pack_uniform_registers()
continue;
assert(type_sz(inst->src[i].type) % 4 == 0);
unsigned channel_size = type_sz(inst->src[i].type) / 4;
int channel_size = type_sz(inst->src[i].type) / 4;
int reg = inst->src[i].nr;
for (int c = 0; c < 4; c++) {
......@@ -626,10 +656,15 @@ vec4_visitor::pack_uniform_registers()
unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
unsigned used = MAX2(chans_used[reg], channel * channel_size);
if (used <= 4)
if (used <= 4) {
chans_used[reg] = used;
else
channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
} else {
is_aligned_to_dvec4[reg] = true;
is_aligned_to_dvec4[reg + 1] = true;
chans_used[reg + 1] = used - 4;
channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
}
}
}
......@@ -654,42 +689,60 @@ vec4_visitor::pack_uniform_registers()
int new_uniform_count = 0;
/* As the uniforms are going to be reordered, take the data from a temporary
* copy of the original param[].
*/
gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
stage_prog_data->nr_params);
memcpy(param, stage_prog_data->param,
sizeof(gl_constant_value*) * stage_prog_data->nr_params);
/* Now, figure out a packing of the live uniform vectors into our
* push constants.
* push constants. Start with dvec{3,4} because they are aligned to
* dvec4 size (2 vec4).
*/
for (int src = 0; src < uniforms; src++) {
int size = chans_used[src];
if (size == 0)
if (size == 0 || !is_aligned_to_dvec4[src])
continue;
int dst;
/* Find the lowest place we can slot this uniform in. */
for (dst = 0; dst < src; dst++) {
if (chans_used[dst] + size <= 4)
break;
/* dvec3 are aligned to dvec4 size, apply the alignment of the size
* to 4 to avoid moving last component of a dvec3 to the available
* location at the end of a previous dvec3. These available locations
* could be filled by smaller variables in next loop.
*/
size = ALIGN(size, 4);
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
src, size, channel_sizes[src],
new_loc, new_chan,
new_chans_used);
/* Move the references to the data */
for (int j = 0; j < size; j++) {
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
param[src * 4 + j];
}
}
if (src == dst) {
new_loc[src] = dst;
new_chan[src] = 0;
} else {
new_loc[src] = dst;
new_chan[src] = chans_used[dst];
/* Continue with the rest of data, which is aligned to vec4. */
for (int src = 0; src < uniforms; src++) {
int size = chans_used[src];
/* Move the references to the data */
for (int j = 0; j < size; j++) {
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
stage_prog_data->param[src * 4 + j];
}
if (size == 0 || is_aligned_to_dvec4[src])
continue;
chans_used[dst] += size;
chans_used[src] = 0;
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
src, size, channel_sizes[src],
new_loc, new_chan,
new_chans_used);
/* Move the references to the data */
for (int j = 0; j < size; j++) {
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
param[src * 4 + j];
}
new_uniform_count = MAX2(new_uniform_count, dst + 1);
}
ralloc_free(param);
this->uniforms = new_uniform_count;
/* Now, update the instructions for our repacked uniforms. */
......@@ -700,9 +753,9 @@ vec4_visitor::pack_uniform_registers()
if (inst->src[i].file != UNIFORM)
continue;
int chan = new_chan[src] / channel_sizes[src];
inst->src[i].nr = new_loc[src];
inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
new_chan[src], new_chan[src]);
inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment