Commit ff472607 authored by Jason Ekstrand's avatar Jason Ekstrand

intel/fs: Set up sampler message headers in the visitor on gen7+

This gives the scheduler visibility into the headers which should
improve scheduling.  More importantly, however, it lets the scheduler
know that the header gets written.  As-is, the scheduler thinks that a
texture instruction only reads it's payload and is unaware that it may
write to the first register so it may reorder it with respect to a read
from that register.  This is causing issues in a couple of Dota 2 vertex
shaders.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Francisco Jerez's avatarFrancisco Jerez <currojerez@riseup.net>
parent f5305c1b
...@@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, ...@@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
op == SHADER_OPCODE_SAMPLEINFO || op == SHADER_OPCODE_SAMPLEINFO ||
is_high_sampler(devinfo, sampler)) { is_high_sampler(devinfo, sampler)) {
/* For general texture offsets (no txf workaround), we need a header to /* For general texture offsets (no txf workaround), we need a header to
* put them in. Note that we're only reserving space for it in the * put them in.
* message payload as it will be initialized implicitly by the
* generator.
* *
* TG4 needs to place its channel select in the header, for interaction * TG4 needs to place its channel select in the header, for interaction
* with ARB_texture_swizzle. The sampler index is only 4-bits, so for * with ARB_texture_swizzle. The sampler index is only 4-bits, so for
* larger sampler numbers we need to offset the Sampler State Pointer in * larger sampler numbers we need to offset the Sampler State Pointer in
* the header. * the header.
*/ */
fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD);
header_size = 1; header_size = 1;
sources[0] = fs_reg();
length++; length++;
/* If we're requesting fewer than four channels worth of response, /* If we're requesting fewer than four channels worth of response,
...@@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, ...@@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf; unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
inst->offset |= mask << 12; inst->offset |= mask << 12;
} }
/* Build the actual header */
const fs_builder ubld = bld.exec_all().group(8, 0);
const fs_builder ubld1 = ubld.group(1, 0);
ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
if (inst->offset) {
ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
} else if (bld.shader->stage != MESA_SHADER_VERTEX &&
bld.shader->stage != MESA_SHADER_FRAGMENT) {
/* The vertex and fragment stages have g0.2 set to 0, so
* header0.2 is 0 when g0 is copied. Other stages may not, so we
* must set it to 0 to avoid setting undesirable bits in the
* message.
*/
ubld1.MOV(component(header, 2), brw_imm_ud(0));
}
if (is_high_sampler(devinfo, sampler)) {
if (sampler.file == BRW_IMMEDIATE_VALUE) {
assert(sampler.ud >= 16);
const int sampler_state_size = 16; /* 16 bytes */
ubld1.ADD(component(header, 3),
retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));
} else {
fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
ubld1.SHL(tmp, tmp, brw_imm_ud(4));
ubld1.ADD(component(header, 3),
retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
tmp);
}
}
} }
if (shadow_c.file != BAD_FILE) { if (shadow_c.file != BAD_FILE) {
......
...@@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src ...@@ -1001,19 +1001,13 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
* we need to set it up explicitly and load the offset bitfield. * we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg. * Otherwise, we can use an implied move from g0 to the first message reg.
*/ */
if (inst->header_size != 0) { if (inst->header_size != 0 && devinfo->gen < 7) {
if (devinfo->gen < 6 && !inst->offset) { if (devinfo->gen < 6 && !inst->offset) {
/* Set up an implied move from g0 to the MRF. */ /* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
} else { } else {
struct brw_reg header_reg; assert(inst->base_mrf != -1);
struct brw_reg header_reg = brw_message_reg(inst->base_mrf);
if (devinfo->gen >= 7) {
header_reg = src;
} else {
assert(inst->base_mrf != -1);
header_reg = brw_message_reg(inst->base_mrf);
}
brw_push_insn_state(p); brw_push_insn_state(p);
brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_exec_size(p, BRW_EXECUTE_8);
...@@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src ...@@ -1027,17 +1021,8 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
/* Set the offset bits in DWord 2. */ /* Set the offset bits in DWord 2. */
brw_MOV(p, get_element_ud(header_reg, 2), brw_MOV(p, get_element_ud(header_reg, 2),
brw_imm_ud(inst->offset)); brw_imm_ud(inst->offset));
} else if (stage != MESA_SHADER_VERTEX &&
stage != MESA_SHADER_FRAGMENT) {
/* The vertex and fragment stages have g0.2 set to 0, so
* header0.2 is 0 when g0 is copied. Other stages may not, so we
* must set it to 0 to avoid setting undesirable bits in the
* message.
*/
brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
} }
brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
brw_pop_insn_state(p); brw_pop_insn_state(p);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment