Commit bfd55711 authored by Tom Stellard's avatar Tom Stellard

radeon/llvm: Inital flow control support for SI

This adds basic flow control support for If-Then-Else blocks using
predicates (stored in the EXEC register) and a predicate stack for
nested flow control.
parent ef0d7e13
......@@ -25,6 +25,7 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
// SI Passes
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
......
......@@ -134,6 +134,7 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(FinalizeMachineBundlesID);
} else {
PM->add(createSILowerLiteralConstantsPass(*TM));
PM->add(createSILowerFlowControlPass(*TM));
}
return false;
......
......@@ -2892,6 +2892,8 @@ struct CFGStructTraits<AMDGPUCFGStructurizer>
switch (instr->getOpcode()) {
case AMDGPU::JUMP:
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
case AMDGPU::BRANCH:
return true;
default:
return false;
}
......
......@@ -211,7 +211,7 @@ include "AMDILIntrinsics.td"
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1, usesCustomInserter = 1 in {
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
......
......@@ -71,6 +71,7 @@ CPP_SOURCES := \
SIInstrInfo.cpp \
SIISelLowering.cpp \
SILowerLiteralConstants.cpp \
SILowerFlowControl.cpp \
SIMachineFunctionInfo.cpp \
SIRegisterInfo.cpp \
InstPrinter/AMDGPUInstPrinter.cpp \
......
......@@ -78,7 +78,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::CLAMP_SI:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
.addOperand(MI->getOperand(0))
......
//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
// to predicated instructions.
//
// All flow control (except loops) is handled using predicated instructions and
// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
// by writting to the 64-bit EXEC register (each bit corresponds to a
// single vector ALU). Typically, for predicates, a vector ALU will write
// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
// Vector ALU) and then the ScalarALU will AND the VCC register with the
// EXEC to update the predicates.
//
// For example:
// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
// SI_IF_NZ %VCC
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
// ELSE
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
// ENDIF
//
// becomes:
//
// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
// S_CBRANCH_EXECZ label0 // This instruction is an
// // optimization which allows us to
// // branch if all the bits of
// // EXEC are zero.
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
//
// label0:
// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
// // Then block.
// %EXEC = S_AND_B64 %SGPR0, %EXEC
// S_BRANCH_EXECZ label1 // Use our branch optimization
// // instruction again.
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
// label1:
// S_MOV_B64 // Restore the old EXEC value
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
namespace {
class SILowerFlowControlPass : public MachineFunctionPass {
private:
static char ID;
const TargetInstrInfo *TII;
std::vector<unsigned> PredicateStack;
std::vector<unsigned> UnusedRegisters;
void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
public:
SILowerFlowControlPass(TargetMachine &tm) :
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const {
return "SI Lower flow control instructions";
}
};
} // End anonymous namespace
char SILowerFlowControlPass::ID = 0;
FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
return new SILowerFlowControlPass(tm);
}
bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
// Find all the unused registers that can be used for the predicate stack.
for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
I = AMDGPU::SReg_64RegClass.end();
I != S; --I) {
unsigned Reg = *I;
if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
UnusedRegisters.push_back(Reg);
}
}
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
I != MBB.end(); I = Next, Next = llvm::next(I)) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF_NZ:
pushExecMask(MBB, I);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
AMDGPU::EXEC)
.addOperand(MI.getOperand(0)) // VCC
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ELSE:
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
AMDGPU::EXEC)
.addReg(PredicateStack.back())
.addReg(AMDGPU::EXEC);
MI.eraseFromParent();
break;
case AMDGPU::ENDIF:
popExecMask(MBB, I);
MI.eraseFromParent();
break;
}
}
}
return false;
}
void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
unsigned StackReg = UnusedRegisters.back();
UnusedRegisters.pop_back();
PredicateStack.push_back(StackReg);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
StackReg)
.addReg(AMDGPU::EXEC);
}
void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
unsigned StackReg = PredicateStack.back();
PredicateStack.pop_back();
UnusedRegisters.push_back(StackReg);
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::EXEC)
.addReg(StackReg);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment