Commit 9d86a5ee authored by Bruce Cherniak's avatar Bruce Cherniak Committed by Tim Rowley

swr: Remove stall waiting for core query counters.

When gathering query results, swr_gather_stats was
unnecessarily stalling the entire pipeline.  Results are now
collected asynchronously, with a fence marking completion.
Reviewed-By: George Kyriazis's avatarGeorge Kyriazis <george.kyriazis@intel.com>
parent 76a36ac3
......@@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen,
swr_fence_destroy(old);
}
static INLINE boolean
swr_is_fence_done(struct pipe_fence_handle *fence_handle)
{
struct swr_fence *fence = swr_fence(fence_handle);
return (fence->read == fence->write);
}
/*
* Wait for the fence to finish.
......
......@@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence)
return (struct swr_fence *)fence;
}
static INLINE boolean
swr_is_fence_done(struct pipe_fence_handle *fence_handle)
{
struct swr_fence *fence = swr_fence(fence_handle);
return (fence->read == fence->write);
}
static INLINE boolean
swr_is_fence_pending(struct pipe_fence_handle *fence_handle)
{
......
......@@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
struct swr_query *pq = swr_query(q);
if (pq->fence) {
if (!swr_is_fence_pending(pq->fence)) {
swr_fence_submit(swr_context(pipe), pq->fence);
if (swr_is_fence_pending(pq->fence))
swr_fence_finish(pipe->screen, pq->fence, 0);
}
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
......@@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
}
// XXX Create a fence callback, rather than stalling SwrWaitForIdle
static void
swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq)
{
struct swr_context *ctx = swr_context(pipe);
assert(pq->result);
union pipe_query_result *result = pq->result;
struct swr_query_result *result = pq->result;
boolean enable_stats = pq->enable_stats;
SWR_STATS swr_stats = {0};
if (pq->fence) {
if (!swr_is_fence_pending(pq->fence)) {
swr_fence_submit(ctx, pq->fence);
swr_fence_finish(pipe->screen, pq->fence, 0);
}
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
/*
* These queries don't need SWR Stats enabled in the core
* Set and return.
*/
/* A few results don't require the core, so don't involve it */
switch (pq->type) {
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
result->u64 = swr_get_timestamp(pipe->screen);
return;
result->timestamp = swr_get_timestamp(pipe->screen);
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* nothing to do here */
return;
break;
case PIPE_QUERY_GPU_FINISHED:
result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId
vs LastRetiredId? */
return;
/* nothing to do here */
break;
default:
/* Any query that needs SwrCore stats */
break;
}
/*
* All other results are collected from SwrCore counters
*/
/*
* All other results are collected from SwrCore counters via
* SwrGetStats. This returns immediately, but results are later filled
* in by the backend. Fence status is the only indication of
* completion. */
SwrGetStats(ctx->swrContext, &result->core);
if (!pq->fence) {
struct swr_screen *screen = swr_screen(pipe->screen);
swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence);
}
swr_fence_submit(ctx, pq->fence);
/* XXX, Should turn this into a fence callback and skip the stall */
SwrGetStats(ctx->swrContext, &swr_stats);
/* SwrGetStats returns immediately, wait for collection */
SwrWaitForIdle(ctx->swrContext);
/* Only change stat collection if there are no active queries */
if (ctx->active_queries == 0)
SwrEnableStats(ctx->swrContext, enable_stats);
switch (pq->type) {
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_COUNTER:
result->u64 = swr_stats.DepthPassCount;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
result->u64 = swr_stats.IaPrimitives;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
result->u64 = swr_stats.SoNumPrimsWritten[pq->index];
break;
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
so_stats->num_primitives_written =
swr_stats.SoNumPrimsWritten[pq->index];
so_stats->primitives_storage_needed =
swr_stats.SoPrimStorageNeeded[pq->index];
} break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
struct pipe_query_data_pipeline_statistics *p_stats =
&result->pipeline_statistics;
p_stats->ia_vertices = swr_stats.IaVertices;
p_stats->ia_primitives = swr_stats.IaPrimitives;
p_stats->vs_invocations = swr_stats.VsInvocations;
p_stats->gs_invocations = swr_stats.GsInvocations;
p_stats->gs_primitives = swr_stats.GsPrimitives;
p_stats->c_invocations = swr_stats.CPrimitives;
p_stats->c_primitives = swr_stats.CPrimitives;
p_stats->ps_invocations = swr_stats.PsInvocations;
p_stats->hs_invocations = swr_stats.HsInvocations;
p_stats->ds_invocations = swr_stats.DsInvocations;
p_stats->cs_invocations = swr_stats.CsInvocations;
} break;
default:
assert(0 && "Unsupported query");
break;
}
/* Only change stat collection if there are no active queries */
if (ctx->active_queries == 0)
SwrEnableStats(ctx->swrContext, enable_stats);
}
......@@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe,
boolean wait,
union pipe_query_result *result)
{
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
struct swr_query_result *start = &pq->start;
struct swr_query_result *end = &pq->end;
unsigned index = pq->index;
if (pq->fence) {
if (!swr_is_fence_pending(pq->fence)) {
swr_fence_submit(ctx, pq->fence);
if (!wait)
return FALSE;
swr_fence_finish(pipe->screen, pq->fence, 0);
}
if (!wait && !swr_is_fence_done(pq->fence))
return FALSE;
swr_fence_finish(pipe->screen, pq->fence, 0);
swr_fence_reference(pipe->screen, &pq->fence, NULL);
}
......@@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe,
switch (pq->type) {
/* Booleans */
case PIPE_QUERY_OCCLUSION_PREDICATE:
result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE;
result->b = end->core.DepthPassCount != start->core.DepthPassCount;
break;
case PIPE_QUERY_GPU_FINISHED:
result->b = pq->end.b;
result->b = TRUE;
break;
/* Counters */
case PIPE_QUERY_OCCLUSION_COUNTER:
result->u64 = end->core.DepthPassCount - start->core.DepthPassCount;
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
result->u64 = end->timestamp - start->timestamp;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
result->u64 = end->core.IaPrimitives - start->core.IaPrimitives;
case PIPE_QUERY_PRIMITIVES_EMITTED:
result->u64 = pq->end.u64 - pq->start.u64;
result->u64 = end->core.SoNumPrimsWritten[index]
- start->core.SoNumPrimsWritten[index];
break;
/* Structures */
case PIPE_QUERY_SO_STATISTICS: {
struct pipe_query_data_so_statistics *so_stats = &result->so_statistics;
struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
struct SWR_STATS *start = &pq->start.core;
struct SWR_STATS *end = &pq->end.core;
so_stats->num_primitives_written =
end->num_primitives_written - start->num_primitives_written;
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
so_stats->primitives_storage_needed =
end->primitives_storage_needed - start->primitives_storage_needed;
end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
} break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* os_get_time_nano returns nanoseconds */
result->timestamp_disjoint.frequency = UINT64_C(1000000000);
result->timestamp_disjoint.disjoint = FALSE;
} break;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
struct pipe_query_data_pipeline_statistics *p_stats =
&result->pipeline_statistics;
struct pipe_query_data_pipeline_statistics *start =
&pq->start.pipeline_statistics;
struct pipe_query_data_pipeline_statistics *end =
&pq->end.pipeline_statistics;
p_stats->ia_vertices = end->ia_vertices - start->ia_vertices;
p_stats->ia_primitives = end->ia_primitives - start->ia_primitives;
p_stats->vs_invocations = end->vs_invocations - start->vs_invocations;
p_stats->gs_invocations = end->gs_invocations - start->gs_invocations;
p_stats->gs_primitives = end->gs_primitives - start->gs_primitives;
p_stats->c_invocations = end->c_invocations - start->c_invocations;
p_stats->c_primitives = end->c_primitives - start->c_primitives;
p_stats->ps_invocations = end->ps_invocations - start->ps_invocations;
p_stats->hs_invocations = end->hs_invocations - start->hs_invocations;
p_stats->ds_invocations = end->ds_invocations - start->ds_invocations;
p_stats->cs_invocations = end->cs_invocations - start->cs_invocations;
} break;
struct SWR_STATS *start = &pq->start.core;
struct SWR_STATS *end = &pq->end.core;
p_stats->ia_vertices = end->IaVertices - start->IaVertices;
p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives;
p_stats->vs_invocations = end->VsInvocations - start->VsInvocations;
p_stats->gs_invocations = end->GsInvocations - start->GsInvocations;
p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives;
p_stats->c_invocations = end->CPrimitives - start->CPrimitives;
p_stats->c_primitives = end->CPrimitives - start->CPrimitives;
p_stats->ps_invocations = end->PsInvocations - start->PsInvocations;
p_stats->hs_invocations = end->HsInvocations - start->HsInvocations;
p_stats->ds_invocations = end->DsInvocations - start->DsInvocations;
p_stats->cs_invocations = end->CsInvocations - start->CsInvocations;
} break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE: {
struct pipe_query_data_so_statistics *start = &pq->start.so_statistics;
struct pipe_query_data_so_statistics *end = &pq->end.so_statistics;
struct SWR_STATS *start = &pq->start.core;
struct SWR_STATS *end = &pq->end.core;
uint64_t num_primitives_written =
end->num_primitives_written - start->num_primitives_written;
end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index];
uint64_t primitives_storage_needed =
end->primitives_storage_needed - start->primitives_storage_needed;
end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index];
result->b = num_primitives_written > primitives_storage_needed;
} break;
}
break;
default:
assert(0 && "Unsupported query");
break;
......@@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
struct swr_context *ctx = swr_context(pipe);
struct swr_query *pq = swr_query(q);
assert(!pq->enable_stats && "swr_begin_query: Query is already active!");
/* Initialize Results */
memset(&pq->start, 0, sizeof(pq->start));
memset(&pq->end, 0, sizeof(pq->end));
......@@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q)
/* override start timestamp to 0 for TIMESTAMP query */
if (pq->type == PIPE_QUERY_TIMESTAMP)
pq->start.u64 = 0;
pq->start.timestamp = 0;
return true;
}
......
......@@ -27,13 +27,18 @@
#include <limits.h>
struct swr_query_result {
SWR_STATS core;
uint64_t timestamp;
};
struct swr_query {
unsigned type; /* PIPE_QUERY_* */
unsigned index;
union pipe_query_result *result;
union pipe_query_result start;
union pipe_query_result end;
struct swr_query_result *result;
struct swr_query_result start;
struct swr_query_result end;
struct pipe_fence_handle *fence;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment