Commit 6e5eb4ea authored by Alok Hota's avatar Alok Hota 🤖
Browse files

swr/rast: update SWR rasterizer shader stats



Primarily refactoring internal stats types
Reviewed-by: Bruce Cherniak's avatarBruce Cherniak <bruce.cherniak@intel.com>
parent c0a540f3
......@@ -339,40 +339,57 @@ namespace ArchRast
_mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
}
struct ShaderStats
void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
{
uint32_t numInstExecuted;
};
pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
}
virtual void Handle(const VSStats& event)
{
mShaderStats[SHADER_VERTEX].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
}
virtual void Handle(const GSStats& event)
{
mShaderStats[SHADER_GEOMETRY].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
}
virtual void Handle(const DSStats& event)
{
mShaderStats[SHADER_DOMAIN].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
}
virtual void Handle(const HSStats& event)
{
mShaderStats[SHADER_HULL].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_HULL], pStats);
}
virtual void Handle(const PSStats& event)
{
mShaderStats[SHADER_PIXEL].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
mNeedFlush = true;
}
virtual void Handle(const CSStats& event)
{
mShaderStats[SHADER_COMPUTE].numInstExecuted += event.data.numInstExecuted;
SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
mNeedFlush = true;
}
......@@ -382,8 +399,32 @@ namespace ArchRast
if (mNeedFlush == false)
return;
EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
EventHandlerFile::Handle(PSInfo(drawId,
mShaderStats[SHADER_PIXEL].numInstExecuted,
mShaderStats[SHADER_PIXEL].numSampleExecuted,
mShaderStats[SHADER_PIXEL].numSampleLExecuted,
mShaderStats[SHADER_PIXEL].numSampleBExecuted,
mShaderStats[SHADER_PIXEL].numSampleCExecuted,
mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
mShaderStats[SHADER_PIXEL].numGather4Executed,
mShaderStats[SHADER_PIXEL].numGather4CExecuted,
mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
mShaderStats[SHADER_PIXEL].numLodExecuted));
EventHandlerFile::Handle(CSInfo(drawId,
mShaderStats[SHADER_COMPUTE].numInstExecuted,
mShaderStats[SHADER_COMPUTE].numSampleExecuted,
mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
mShaderStats[SHADER_COMPUTE].numGather4Executed,
mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
mShaderStats[SHADER_COMPUTE].numLodExecuted));
// singleSample
EventHandlerFile::Handle(EarlyZSingleSample(
......@@ -480,14 +521,58 @@ namespace ArchRast
EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
EventHandlerFile::Handle(
VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
EventHandlerFile::Handle(
HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
EventHandlerFile::Handle(
DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
EventHandlerFile::Handle(
GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
EventHandlerFile::Handle(VSInfo(event.data.drawId,
mShaderStats[SHADER_VERTEX].numInstExecuted,
mShaderStats[SHADER_VERTEX].numSampleExecuted,
mShaderStats[SHADER_VERTEX].numSampleLExecuted,
mShaderStats[SHADER_VERTEX].numSampleBExecuted,
mShaderStats[SHADER_VERTEX].numSampleCExecuted,
mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
mShaderStats[SHADER_VERTEX].numGather4Executed,
mShaderStats[SHADER_VERTEX].numGather4CExecuted,
mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
mShaderStats[SHADER_VERTEX].numLodExecuted));
EventHandlerFile::Handle(HSInfo(event.data.drawId,
mShaderStats[SHADER_HULL].numInstExecuted,
mShaderStats[SHADER_HULL].numSampleExecuted,
mShaderStats[SHADER_HULL].numSampleLExecuted,
mShaderStats[SHADER_HULL].numSampleBExecuted,
mShaderStats[SHADER_HULL].numSampleCExecuted,
mShaderStats[SHADER_HULL].numSampleCLZExecuted,
mShaderStats[SHADER_HULL].numSampleCDExecuted,
mShaderStats[SHADER_HULL].numGather4Executed,
mShaderStats[SHADER_HULL].numGather4CExecuted,
mShaderStats[SHADER_HULL].numGather4CPOExecuted,
mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
mShaderStats[SHADER_HULL].numLodExecuted));
EventHandlerFile::Handle(DSInfo(event.data.drawId,
mShaderStats[SHADER_DOMAIN].numInstExecuted,
mShaderStats[SHADER_DOMAIN].numSampleExecuted,
mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
mShaderStats[SHADER_DOMAIN].numGather4Executed,
mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
mShaderStats[SHADER_DOMAIN].numLodExecuted));
EventHandlerFile::Handle(GSInfo(event.data.drawId,
mShaderStats[SHADER_GEOMETRY].numInstExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
mShaderStats[SHADER_GEOMETRY].numGather4Executed,
mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
mShaderStats[SHADER_GEOMETRY].numLodExecuted));
mShaderStats[SHADER_VERTEX] = {};
mShaderStats[SHADER_HULL] = {};
......@@ -544,7 +629,7 @@ namespace ArchRast
CullStats mCullStats = {};
AlphaStats mAlphaStats = {};
ShaderStats mShaderStats[NUM_SHADER_TYPES];
SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
};
......
......@@ -325,34 +325,101 @@ event VSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
event HSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
event DSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
event GSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
event PSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
event CSInfo
{
uint32_t drawId;
uint32_t numInstExecuted;
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
\ No newline at end of file
......@@ -168,30 +168,30 @@ event DrawIndexedInstancedEvent
event VSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
event HSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
event DSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
event GSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
event PSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
event CSStats
{
uint32_t numInstExecuted;
HANDLE hStats; // SWR_SHADER_STATS
};
\ No newline at end of file
......@@ -88,7 +88,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
&csContext);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_EVENT(CSStats(csContext.stats.numInstExecuted));
AR_EVENT(CSStats((HANDLE)&csContext.stats));
RDTSC_END(BEDispatch, 1);
}
......
......@@ -1212,7 +1212,7 @@ void BackendPixelRate(DRAW_CONTEXT* pDC,
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
AR_EVENT(PSStats(psContext.stats.numInstExecuted));
AR_EVENT(PSStats((HANDLE)&psContext.stats));
// update active lanes to remove any discarded or oMask'd pixels
activeLanes = _simd_castsi_ps(_simd_and_si(
......
......@@ -207,7 +207,7 @@ void BackendSampleRate(DRAW_CONTEXT* pDC,
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
AR_EVENT(PSStats(psContext.stats.numInstExecuted));
AR_EVENT(PSStats((HANDLE)&psContext.stats));
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
......
......@@ -188,7 +188,7 @@ void BackendSingleSample(DRAW_CONTEXT* pDC,
// update stats
UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
AR_EVENT(PSStats(psContext.stats.numInstExecuted));
AR_EVENT(PSStats((HANDLE)&psContext.stats));
vCoverageMask = _simd_castsi_ps(psContext.activeMask);
......
......@@ -888,7 +888,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
// execute the geometry shader
state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
AR_EVENT(GSStats((HANDLE)&gsContext.stats));
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
{
......@@ -1375,7 +1375,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
RDTSC_END(FEHullShader, 0);
UPDATE_STAT_FE(HsInvocations, numPrims);
AR_EVENT(HSStats(hsContext.stats.numInstExecuted));
AR_EVENT(HSStats((HANDLE)&hsContext.stats));
const uint32_t* pPrimId = (const uint32_t*)&primID;
......@@ -1443,7 +1443,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
RDTSC_END(FEDomainShader, 0);
AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
AR_EVENT(DSStats((HANDLE)&dsContext.stats));
dsInvocations += KNOB_SIMD_WIDTH;
}
......@@ -1950,15 +1950,15 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
RDTSC_BEGIN(FEVertexShader, pDC->drawId);
#if USE_SIMD16_VS
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
#else
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats));
}
#endif
RDTSC_END(FEVertexShader, 0);
......@@ -2214,7 +2214,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
RDTSC_END(FEVertexShader, 0);
UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
AR_EVENT(VSStats(vsContext.stats.numInstExecuted));
AR_EVENT(VSStats((HANDLE)&vsContext.stats));
}
}
......
......@@ -221,7 +221,18 @@ struct SIMDVERTEX_T
/////////////////////////////////////////////////////////////////////////
struct SWR_SHADER_STATS
{
uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
uint32_t numSampleExecuted;
uint32_t numSampleLExecuted;
uint32_t numSampleBExecuted;
uint32_t numSampleCExecuted;
uint32_t numSampleCLZExecuted;
uint32_t numSampleCDExecuted;
uint32_t numGather4Executed;
uint32_t numGather4CExecuted;
uint32_t numGather4CPOExecuted;
uint32_t numGather4CPOCExecuted;
uint32_t numLodExecuted;
};
//////////////////////////////////////////////////////////////////////////
......
......@@ -55,6 +55,9 @@ namespace SwrJit
STATS_STORE_TGSM = 15,
STATS_DISCARD = 16,
STATS_BARRIER = 17,
// ------------------
STATS_TOTAL_COUNTERS
};
using namespace llvm;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment