Commit c988bf94 authored by Jordan Justen's avatar Jordan Justen

i965: enable ARB_instanced_arrays extension

Set the step_rate value when drawing to implement
ARB_instanced_arrays for gen >= 4.

v2:
 * leave (total_size < 2048) check where it was to only make
   this check once rather than once for each array.
Signed-off-by: Jordan Justen's avatarJordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Eric Anholt's avatarEric Anholt <eric@anholt.net>
parent 345ee593
...@@ -676,6 +676,7 @@ struct brw_vertex_buffer { ...@@ -676,6 +676,7 @@ struct brw_vertex_buffer {
uint32_t offset; uint32_t offset;
/** Byte stride between elements in the uploaded array */ /** Byte stride between elements in the uploaded array */
GLuint stride; GLuint stride;
GLuint step_rate;
}; };
struct brw_vertex_element { struct brw_vertex_element {
const struct gl_client_array *glarray; const struct gl_client_array *glarray;
...@@ -738,6 +739,7 @@ struct brw_context ...@@ -738,6 +739,7 @@ struct brw_context
uint32_t handle; uint32_t handle;
uint32_t offset; uint32_t offset;
uint32_t stride; uint32_t stride;
uint32_t step_rate;
} current_buffers[VERT_ATTRIB_MAX]; } current_buffers[VERT_ATTRIB_MAX];
struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
...@@ -1046,6 +1048,8 @@ struct brw_context ...@@ -1046,6 +1048,8 @@ struct brw_context
bool in_progress; bool in_progress;
bool enable_cut_index; bool enable_cut_index;
} prim_restart; } prim_restart;
uint32_t num_instances;
}; };
......
...@@ -466,6 +466,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx, ...@@ -466,6 +466,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
intel_batchbuffer_save_state(intel); intel_batchbuffer_save_state(intel);
brw->num_instances = prim->num_instances;
if (intel->gen < 6) if (intel->gen < 6)
brw_set_prim(brw, &prim[i]); brw_set_prim(brw, &prim[i]);
else else
......
...@@ -361,6 +361,7 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -361,6 +361,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
unsigned int min_index = brw->vb.min_index; unsigned int min_index = brw->vb.min_index;
unsigned int max_index = brw->vb.max_index; unsigned int max_index = brw->vb.max_index;
int delta, i, j; int delta, i, j;
GLboolean can_merge_uploads = GL_TRUE;
struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
GLuint nr_uploads = 0; GLuint nr_uploads = 0;
...@@ -403,6 +404,7 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -403,6 +404,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
const struct gl_client_array *other = brw->vb.enabled[k]->glarray; const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
if (glarray->BufferObj == other->BufferObj && if (glarray->BufferObj == other->BufferObj &&
glarray->StrideB == other->StrideB && glarray->StrideB == other->StrideB &&
glarray->InstanceDivisor == other->InstanceDivisor &&
(uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
{ {
input->buffer = brw->vb.enabled[k]->buffer; input->buffer = brw->vb.enabled[k]->buffer;
...@@ -420,6 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -420,6 +422,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
drm_intel_bo_reference(buffer->bo); drm_intel_bo_reference(buffer->bo);
buffer->offset += (uintptr_t)glarray->Ptr; buffer->offset += (uintptr_t)glarray->Ptr;
buffer->stride = glarray->StrideB; buffer->stride = glarray->StrideB;
buffer->step_rate = glarray->InstanceDivisor;
input->buffer = j++; input->buffer = j++;
input->offset = 0; input->offset = 0;
...@@ -465,8 +468,13 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -465,8 +468,13 @@ static void brw_prepare_vertices(struct brw_context *brw)
} }
upload[nr_uploads++] = input; upload[nr_uploads++] = input;
total_size = ALIGN(total_size, type_size); total_size = ALIGN(total_size, type_size);
total_size += input->element_size; total_size += input->element_size;
if (glarray->InstanceDivisor != 0) {
can_merge_uploads = GL_FALSE;
}
} }
} }
...@@ -504,7 +512,7 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -504,7 +512,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
nr_uploads = 0; nr_uploads = 0;
} }
else if (total_size < 2048) { else if ((total_size < 2048) && can_merge_uploads) {
/* Upload non-interleaved arrays into a single interleaved array */ /* Upload non-interleaved arrays into a single interleaved array */
struct brw_vertex_buffer *buffer; struct brw_vertex_buffer *buffer;
int count = MAX2(max_index - min_index + 1, 1); int count = MAX2(max_index - min_index + 1, 1);
...@@ -539,6 +547,7 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -539,6 +547,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
intel_upload_unmap(&brw->intel, map, offset * count, offset, intel_upload_unmap(&brw->intel, map, offset * count, offset,
&buffer->bo, &buffer->offset); &buffer->bo, &buffer->offset);
buffer->stride = offset; buffer->stride = offset;
buffer->step_rate = 0;
buffer->offset -= delta * offset; buffer->offset -= delta * offset;
nr_uploads = 0; nr_uploads = 0;
...@@ -547,9 +556,21 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -547,9 +556,21 @@ static void brw_prepare_vertices(struct brw_context *brw)
/* Upload non-interleaved arrays */ /* Upload non-interleaved arrays */
for (i = 0; i < nr_uploads; i++) { for (i = 0; i < nr_uploads; i++) {
struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
copy_array_to_vbo_array(brw, upload[i], min_index, max_index, if (upload[i]->glarray->InstanceDivisor == 0) {
buffer, upload[i]->element_size); copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
buffer, upload[i]->element_size);
} else {
/* This is an instanced attribute, since its InstanceDivisor
* is not zero. Therefore, its data will be stepped after the
* instanced draw has been run InstanceDivisor times.
*/
uint32_t instanced_attr_max_index =
(brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
buffer, upload[i]->element_size);
}
buffer->offset -= delta * buffer->stride; buffer->offset -= delta * buffer->stride;
buffer->step_rate = upload[i]->glarray->InstanceDivisor;
upload[i]->buffer = j++; upload[i]->buffer = j++;
upload[i]->offset = 0; upload[i]->offset = 0;
} }
...@@ -561,7 +582,8 @@ static void brw_prepare_vertices(struct brw_context *brw) ...@@ -561,7 +582,8 @@ static void brw_prepare_vertices(struct brw_context *brw)
int d; int d;
if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle ||
brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride) brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride ||
brw->vb.current_buffers[i].step_rate != brw->vb.buffers[i].step_rate)
break; break;
d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset;
...@@ -643,9 +665,15 @@ static void brw_emit_vertices(struct brw_context *brw) ...@@ -643,9 +665,15 @@ static void brw_emit_vertices(struct brw_context *brw)
uint32_t dw0; uint32_t dw0;
if (intel->gen >= 6) { if (intel->gen >= 6) {
dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT); dw0 = buffer->step_rate
? GEN6_VB0_ACCESS_INSTANCEDATA
: GEN6_VB0_ACCESS_VERTEXDATA;
dw0 |= i << GEN6_VB0_INDEX_SHIFT;
} else { } else {
dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT); dw0 = buffer->step_rate
? BRW_VB0_ACCESS_INSTANCEDATA
: BRW_VB0_ACCESS_VERTEXDATA;
dw0 |= i << BRW_VB0_INDEX_SHIFT;
} }
if (intel->gen >= 7) if (intel->gen >= 7)
...@@ -657,11 +685,12 @@ static void brw_emit_vertices(struct brw_context *brw) ...@@ -657,11 +685,12 @@ static void brw_emit_vertices(struct brw_context *brw)
OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
} else } else
OUT_BATCH(0); OUT_BATCH(0);
OUT_BATCH(0); /* Instance data step rate */ OUT_BATCH(buffer->step_rate);
brw->vb.current_buffers[i].handle = buffer->bo->handle; brw->vb.current_buffers[i].handle = buffer->bo->handle;
brw->vb.current_buffers[i].offset = buffer->offset; brw->vb.current_buffers[i].offset = buffer->offset;
brw->vb.current_buffers[i].stride = buffer->stride; brw->vb.current_buffers[i].stride = buffer->stride;
brw->vb.current_buffers[i].step_rate = buffer->step_rate;
} }
brw->vb.nr_current_buffers = i; brw->vb.nr_current_buffers = i;
ADVANCE_BATCH(); ADVANCE_BATCH();
......
...@@ -115,6 +115,7 @@ intelInitExtensions(struct gl_context *ctx) ...@@ -115,6 +115,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_depth_buffer_float = true; ctx->Extensions.ARB_depth_buffer_float = true;
ctx->Extensions.ARB_depth_clamp = true; ctx->Extensions.ARB_depth_clamp = true;
ctx->Extensions.ARB_draw_instanced = true; ctx->Extensions.ARB_draw_instanced = true;
ctx->Extensions.ARB_instanced_arrays = true;
ctx->Extensions.ARB_fragment_coord_conventions = true; ctx->Extensions.ARB_fragment_coord_conventions = true;
ctx->Extensions.ARB_fragment_program_shadow = true; ctx->Extensions.ARB_fragment_program_shadow = true;
ctx->Extensions.ARB_fragment_shader = true; ctx->Extensions.ARB_fragment_shader = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment