Commit ef83c9b7 authored by Dave Airlie's avatar Dave Airlie

st/mesa: add double input support including lowering (v3.1)

This takes a different approach to previously, we cannot index into the
inputMapping with anything but the mesa attribute index, so we can't use
the just add one to index trick, we need more info to add one to it
after we've mapped the input.

(Fixed copy propgation and cleaned up a little)

v2: drop float64 format check, just attr->Doubles.
merge enable patch.
v3: cleanup code a bit.
v3.1: minor review fixups (comment, newline) (Ilia)
Reviewed-by: Ilia Mirkin's avatarIlia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
parent c4254ee5
......@@ -44,7 +44,6 @@
#include "cso_cache/cso_context.h"
#include "util/u_math.h"
#include "main/bufferobj.h"
#include "main/glformats.h"
......@@ -311,6 +310,18 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
return PIPE_FORMAT_NONE; /* silence compiler warning */
}
static const struct gl_client_array *
get_client_array(const struct st_vertex_program *vp,
const struct gl_client_array **arrays,
int attr)
{
const GLuint mesaAttr = vp->index_to_input[attr];
/* st_program uses 0xffffffff to denote a double placeholder attribute */
if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
return NULL;
return arrays[mesaAttr];
}
/**
* Examine the active arrays to determine if we have interleaved
* vertex arrays all living in one VBO, or all living in user space.
......@@ -327,11 +338,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
GLboolean userSpaceBuffer = GL_FALSE;
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
const struct gl_client_array *array = arrays[mesaAttr];
const struct gl_buffer_object *bufObj = array->BufferObj;
const GLsizei stride = array->StrideB; /* in bytes */
const struct gl_client_array *array;
const struct gl_buffer_object *bufObj;
GLsizei stride;
array = get_client_array(vp, arrays, attr);
if (!array)
continue;
stride = array->StrideB; /* in bytes */
bufObj = array->BufferObj;
if (attr == 0) {
/* save info about the first array */
firstStride = stride;
......@@ -358,6 +374,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
return GL_TRUE;
}
static void init_velement(struct pipe_vertex_element *velement,
int src_offset, int format,
int instance_divisor, int vbo_index)
{
velement->src_offset = src_offset;
velement->src_format = format;
velement->instance_divisor = instance_divisor;
velement->vertex_buffer_index = vbo_index;
assert(velement->src_format);
}
static void init_velement_lowered(struct st_context *st,
struct pipe_vertex_element *velements,
int src_offset, int format,
int instance_divisor, int vbo_index,
int nr_components, GLboolean doubles,
GLuint *attr_idx)
{
int idx = *attr_idx;
if (doubles) {
int lower_format;
if (nr_components == 1)
lower_format = PIPE_FORMAT_R32G32_UINT;
else if (nr_components >= 2)
lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
init_velement(&velements[idx], src_offset,
lower_format, instance_divisor, vbo_index);
idx++;
if (nr_components > 2) {
if (nr_components == 3)
lower_format = PIPE_FORMAT_R32G32_UINT;
else if (nr_components >= 4)
lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
init_velement(&velements[idx], src_offset + 4 * sizeof(float),
lower_format, instance_divisor, vbo_index);
idx++;
}
} else {
init_velement(&velements[idx], src_offset,
format, instance_divisor, vbo_index);
idx++;
}
*attr_idx = idx;
}
/**
* Set up for drawing interleaved arrays that all live in one VBO
* or all live in user space.
......@@ -365,13 +430,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
* \param velements returns vertex element info
*/
static boolean
setup_interleaved_attribs(const struct st_vertex_program *vp,
setup_interleaved_attribs(struct st_context *st,
const struct st_vertex_program *vp,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
struct pipe_vertex_buffer *vbuffer,
struct pipe_vertex_element velements[])
struct pipe_vertex_element velements[],
unsigned *num_velements)
{
GLuint attr;
GLuint attr, attr_idx;
const GLubyte *low_addr = NULL;
GLboolean usingVBO; /* all arrays in a VBO? */
struct gl_buffer_object *bufobj;
......@@ -381,8 +448,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
* Init bufobj and stride.
*/
if (vpv->num_inputs) {
const GLuint mesaAttr0 = vp->index_to_input[0];
const struct gl_client_array *array = arrays[mesaAttr0];
const struct gl_client_array *array;
array = get_client_array(vp, arrays, 0);
assert(array);
/* Since we're doing interleaved arrays, we know there'll be at most
* one buffer object and the stride will be the same for all arrays.
......@@ -394,7 +463,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
low_addr = arrays[vp->index_to_input[0]]->Ptr;
for (attr = 1; attr < vpv->num_inputs; attr++) {
const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
const GLubyte *start;
array = get_client_array(vp, arrays, attr);
if (!array)
continue;
start = array->Ptr;
low_addr = MIN2(low_addr, start);
}
}
......@@ -408,25 +481,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
/* are the arrays in user space? */
usingVBO = _mesa_is_bufferobj(bufobj);
attr_idx = 0;
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
const struct gl_client_array *array = arrays[mesaAttr];
unsigned src_offset = (unsigned) (array->Ptr - low_addr);
const struct gl_client_array *array;
unsigned src_offset;
unsigned src_format;
array = get_client_array(vp, arrays, attr);
if (!array)
continue;
src_offset = (unsigned) (array->Ptr - low_addr);
assert(array->_ElementSize ==
_mesa_bytes_per_vertex_attrib(array->Size, array->Type));
velements[attr].src_offset = src_offset;
velements[attr].instance_divisor = array->InstanceDivisor;
velements[attr].vertex_buffer_index = 0;
velements[attr].src_format = st_pipe_vertex_format(array->Type,
array->Size,
array->Format,
array->Normalized,
array->Integer);
assert(velements[attr].src_format);
src_format = st_pipe_vertex_format(array->Type,
array->Size,
array->Format,
array->Normalized,
array->Integer);
init_velement_lowered(st, velements, src_offset, src_format,
array->InstanceDivisor, 0,
array->Size, array->Doubles, &attr_idx);
}
*num_velements = attr_idx;
/*
* Return the vbuffer info and setup user-space attrib info, if needed.
*/
......@@ -472,17 +553,25 @@ setup_non_interleaved_attribs(struct st_context *st,
const struct st_vp_variant *vpv,
const struct gl_client_array **arrays,
struct pipe_vertex_buffer vbuffer[],
struct pipe_vertex_element velements[])
struct pipe_vertex_element velements[],
unsigned *num_velements)
{
struct gl_context *ctx = st->ctx;
GLuint attr;
GLuint attr, attr_idx = 0;
for (attr = 0; attr < vpv->num_inputs; attr++) {
const GLuint mesaAttr = vp->index_to_input[attr];
const struct gl_client_array *array = arrays[mesaAttr];
struct gl_buffer_object *bufobj = array->BufferObj;
GLsizei stride = array->StrideB;
const struct gl_client_array *array;
struct gl_buffer_object *bufobj;
GLsizei stride;
unsigned src_format;
array = get_client_array(vp, arrays, attr);
if (!array)
continue;
stride = array->StrideB;
bufobj = array->BufferObj;
assert(array->_ElementSize ==
_mesa_bytes_per_vertex_attrib(array->Size, array->Type));
......@@ -524,16 +613,19 @@ setup_non_interleaved_attribs(struct st_context *st,
/* common-case setup */
vbuffer[attr].stride = stride; /* in bytes */
velements[attr].src_offset = 0;
velements[attr].instance_divisor = array->InstanceDivisor;
velements[attr].vertex_buffer_index = attr;
velements[attr].src_format = st_pipe_vertex_format(array->Type,
array->Size,
array->Format,
array->Normalized,
array->Integer);
assert(velements[attr].src_format);
src_format = st_pipe_vertex_format(array->Type,
array->Size,
array->Format,
array->Normalized,
array->Integer);
init_velement_lowered(st, velements, 0, src_format,
array->InstanceDivisor, attr,
array->Size, array->Doubles, &attr_idx);
}
*num_velements = attr_idx;
return TRUE;
}
......@@ -563,25 +655,23 @@ static void update_array(struct st_context *st)
* Setup the vbuffer[] and velements[] arrays.
*/
if (is_interleaved_arrays(vp, vpv, arrays)) {
if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
st->vertex_array_out_of_memory = TRUE;
return;
}
num_vbuffers = 1;
num_velements = vpv->num_inputs;
if (num_velements == 0)
num_vbuffers = 0;
}
else {
if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
velements)) {
velements, &num_velements)) {
st->vertex_array_out_of_memory = TRUE;
return;
}
num_vbuffers = vpv->num_inputs;
num_velements = vpv->num_inputs;
}
cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
......
......@@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen,
if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
PIPE_SHADER_CAP_DOUBLES) &&
screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
PIPE_SHADER_CAP_DOUBLES))
PIPE_SHADER_CAP_DOUBLES)) {
extensions->ARB_gpu_shader_fp64 = GL_TRUE;
extensions->ARB_vertex_attrib_64bit = GL_TRUE;
}
}
......@@ -88,6 +88,7 @@ public:
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->double_reg2 = false;
}
st_src_reg(gl_register_file file, int index, int type)
......@@ -101,6 +102,7 @@ public:
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->double_reg2 = false;
}
st_src_reg(gl_register_file file, int index, int type, int index2D)
......@@ -114,6 +116,7 @@ public:
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->double_reg2 = false;
}
st_src_reg()
......@@ -127,6 +130,7 @@ public:
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->double_reg2 = false;
}
explicit st_src_reg(st_dst_reg reg);
......@@ -141,6 +145,11 @@ public:
st_src_reg *reladdr;
st_src_reg *reladdr2;
bool has_index2;
/*
* Is this the second half of a double register pair?
* currently used for input mapping only.
*/
bool double_reg2;
};
class st_dst_reg {
......@@ -197,6 +206,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
this->index2D = 0;
this->reladdr2 = NULL;
this->has_index2 = false;
this->double_reg2 = false;
}
st_dst_reg::st_dst_reg(st_src_reg reg)
......@@ -677,8 +687,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
dinst->src[j].index = initial_src_idx[j];
if (swz > 1)
if (swz > 1) {
dinst->src[j].double_reg2 = true;
dinst->src[j].index++;
}
if (swz & 1)
dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
......@@ -3705,6 +3717,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
} else {
if (first->src[0].file != copy_chan->src[0].file ||
first->src[0].index != copy_chan->src[0].index ||
first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
first->src[0].index2D != copy_chan->src[0].index2D) {
good = false;
break;
......@@ -3720,6 +3733,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
inst->src[r].index = first->src[0].index;
inst->src[r].index2D = first->src[0].index2D;
inst->src[r].has_index2 = first->src[0].has_index2;
inst->src[r].double_reg2 = first->src[0].double_reg2;
int swizzle = 0;
for (int i = 0; i < 4; i++) {
......@@ -4552,6 +4566,9 @@ dst_register(struct st_translate *t,
static struct ureg_src
src_register(struct st_translate *t, const st_src_reg *reg)
{
int index = reg->index;
int double_reg2 = reg->double_reg2 ? 1 : 0;
switch(reg->file) {
case PROGRAM_UNDEFINED:
return ureg_imm4f(t->ureg, 0, 0, 0, 0);
......@@ -4577,8 +4594,12 @@ src_register(struct st_translate *t, const st_src_reg *reg)
return t->immediates[reg->index];
case PROGRAM_INPUT:
assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs));
return t->inputs[t->inputMapping[reg->index]];
/* GLSL inputs are 64-bit containers, so we have to
* map back to the original index and add the offset after
* mapping. */
index -= double_reg2;
assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
return t->inputs[t->inputMapping[index] + double_reg2];
case PROGRAM_OUTPUT:
assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
......
......@@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx,
stvp->input_to_index[attr] = stvp->num_inputs;
stvp->index_to_input[stvp->num_inputs] = attr;
stvp->num_inputs++;
if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
/* add placeholder for second part of a double attribute */
stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
stvp->num_inputs++;
}
}
}
/* bit of a hack, presetup potentially unused edgeflag input */
......
......@@ -45,6 +45,7 @@
extern "C" {
#endif
#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
/** Fragment program variant key */
struct st_fp_variant_key
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment