Commit 7dfb4b2d authored by Paul Berry's avatar Paul Berry
Browse files

i965/gen7: Emit workaround flush when changing GS enable state.



v2: Don't go to extra work to avoid extraneous flushes.  (Previous
experiments in the kernel have suggested that flushing the pipeline
when it is already empty is extremely cheap).

Cc: "10.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Emma Anholt's avatarEric Anholt <eric@anholt.net>
parent d2222021
......@@ -764,6 +764,7 @@ brwCreateContext(gl_api api,
brw->prim_restart.in_progress = false;
brw->prim_restart.enable_cut_index = false;
brw->gs.enabled = false;
if (brw->gen < 6) {
brw->curbe.last_buf = calloc(1, 4096);
......
......@@ -1300,6 +1300,12 @@ struct brw_context
struct {
struct brw_stage_state base;
struct brw_gs_prog_data *prog_data;
/**
* True if the 3DSTATE_GS command most recently emitted to the 3D
* pipeline enabled the GS; false otherwise.
*/
bool enabled;
} gs;
struct {
......
......@@ -402,6 +402,21 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
OUT_BATCH(0);
ADVANCE_BATCH();
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
gen7_emit_cs_stall_flush(brw);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
......@@ -411,6 +426,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
brw->gs.enabled = false;
}
/* 3DSTATE_STREAMOUT
......
......@@ -80,6 +80,21 @@ upload_gs_state(struct brw_context *brw)
gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
gen7_emit_cs_stall_flush(brw);
if (active) {
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
......@@ -176,6 +191,7 @@ upload_gs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
}
brw->gs.enabled = active;
}
const struct brw_tracked_state gen7_gs_state = {
......
......@@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
*
* No such restriction exists for Haswell.
*/
if (!brw->is_haswell) {
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
/* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
* CS Stall):
*
* One of the following must also be set:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
*
* We choose to do a Post-Sync Operation (Write Immediate Data), since
* it seems like it will incur the least additional performance penalty.
*/
OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
OUT_RELOC(brw->batch.workaround_bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
if (!brw->is_haswell)
gen7_emit_cs_stall_flush(brw);
}
const struct brw_tracked_state gen7_push_constant_space = {
......
......@@ -511,6 +511,36 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
ADVANCE_BATCH();
}
/**
* Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
*/
void
gen7_emit_cs_stall_flush(struct brw_context *brw)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
/* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
* CS Stall):
*
* One of the following must also be set:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
*
* We choose to do a Post-Sync Operation (Write Immediate Data), since
* it seems like it will incur the least additional performance penalty.
*/
OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
OUT_RELOC(brw->batch.workaround_bo,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/**
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
* implementing two workarounds on gen6. From section 1.4.7.1
......
......@@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
void intel_emit_depth_stall_flushes(struct brw_context *brw);
void gen7_emit_vs_workaround_flush(struct brw_context *brw);
void gen7_emit_cs_stall_flush(struct brw_context *brw);
static INLINE uint32_t float_as_int(float f)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment