Commit aa757f4f authored by Marek Olšák's avatar Marek Olšák Committed by Marge Bot

ac/llvm: fix demote inside conditional branches

The big comment explains it.

v2: don't kill if subgroup ops are used
Reviewed-by: Rhys Perry's avatarRhys Perry <pendingchaos02@gmail.com>
Part-of: <mesa/mesa!7586>
parent cb20d58f
......@@ -3138,6 +3138,22 @@ void ac_build_else(struct ac_llvm_context *ctx, int label_id)
current_branch->next_block = endif_block;
}
/* Invoked after a branch is exited. */
static void ac_branch_exited(struct ac_llvm_context *ctx)
{
if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) {
/* The previous conditional branch contained demote. Kill threads
* after all conditional blocks because amdgcn.wqm.vote doesn't
* return usable values inside the blocks.
*
* This is an optional optimization that only kills whole inactive quads.
*/
LLVMValueRef cond = LLVMBuildLoad(ctx->builder, ctx->postponed_kill, "");
ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond));
ctx->conditional_demote_seen = false;
}
}
void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
{
struct ac_llvm_flow *current_branch = get_current_flow(ctx);
......@@ -3149,6 +3165,7 @@ void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
set_basicblock_name(current_branch->next_block, "endif", label_id);
ctx->flow->depth--;
ac_branch_exited(ctx);
}
void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
......@@ -3162,6 +3179,7 @@ void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
set_basicblock_name(current_loop->next_block, "endloop", label_id);
ctx->flow->depth--;
ac_branch_exited(ctx);
}
void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
......
......@@ -118,6 +118,7 @@ struct ac_llvm_context {
* False = demoted lanes
*/
LLVMValueRef postponed_kill;
bool conditional_demote_seen;
/* Since ac_nir_translate makes a local copy of ac_llvm_context, there
* are two ac_llvm_contexts. Declare a pointer here, so that the control
......
......@@ -2813,13 +2813,37 @@ static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *i
cond = ctx->ac.i1false;
}
/* Kill immediately while maintaining WQM. */
ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
return;
if (!ctx->info->fs.needs_all_helper_invocations) {
/* This is an optional optimization that only kills whole inactive quads.
* It's not used when subgroup operations can possibly use all helper
* invocations.
*/
if (ctx->ac.flow->depth == 0) {
ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
} else {
/* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.
*
* The problem is that kill(wqm.vote(0)) kills all active threads within
* the block, which breaks the whole quad mode outside the block if
* the conditional block has partially active quads (2x2 pixel blocks).
* E.g. threads 0-3 are active outside the block, but only thread 0 is
* active inside the block. Thread 0 shouldn't be killed by demote,
* because threads 1-3 are still active outside the block.
*
* The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)
* instead of S_WQM(cond).
*
* The less efficient workaround we do here is to save the kill condition
* to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we
* exit the conditional block.
*/
ctx->ac.conditional_demote_seen = true;
}
}
}
static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment