u_vbuf.c 56.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/**************************************************************************
 *
 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
/**
 * This module uploads user buffers and translates the vertex buffers which
 * contain incompatible vertices (i.e. not supported by the driver/hardware)
 * into compatible ones, based on the Gallium CAPs.
 *
 * It does not upload index buffers.
 *
 * The module heavily uses bitmasks to represent per-buffer and
 * per-vertex-element flags to avoid looping over the list of buffers just
 * to see if there's a non-zero stride, or user buffer, or unsupported format,
 * etc.
 *
 * There are 3 categories of vertex elements, which are processed separately:
 * - per-vertex attribs (stride != 0, instance_divisor == 0)
 * - instanced attribs (stride != 0, instance_divisor > 0)
 * - constant attribs (stride == 0)
 *
 * All needed uploads and translations are performed every draw command, but
 * only the subset of vertices needed for that draw command is uploaded or
 * translated. (the module never translates whole buffers)
 *
 *
 * The module consists of two main parts:
 *
 *
 * 1) Translate (u_vbuf_translate_begin/end)
 *
 * This is pretty much a vertex fetch fallback. It translates vertices from
 * one vertex buffer to another in an unused vertex buffer slot. It does
 * whatever is needed to make the vertices readable by the hardware (changes
 * vertex formats and aligns offsets and strides). The translate module is
 * used here.
 *
 * Each of the 3 categories is translated to a separate buffer.
 * Only the [min_index, max_index] range is translated. For instanced attribs,
 * the range is [start_instance, start_instance+instance_count]. For constant
 * attribs, the range is [0, 1].
 *
 *
 * 2) User buffer uploading (u_vbuf_upload_buffers)
 *
 * Only the [min_index, max_index] range is uploaded (just like Translate)
 * with a single memcpy.
 *
 * This method works best for non-indexed draw operations or indexed draw
 * operations where the [min_index, max_index] range is not being way bigger
 * than the vertex count.
 *
 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
 * the per-vertex attribs are uploaded via the translate module, all packed
 * into one vertex buffer, and the indexed draw call is turned into
 * a non-indexed one in the process. This adds additional complexity
 * to the translate part, but it prevents bad apps from bringing your frame
 * rate down.
 *
 *
 * If there is nothing to do, it forwards every command to the driver.
 * The module also has its own CSO cache of vertex element states.
 */

Marek Olšák's avatar
Marek Olšák committed
88
#include "util/u_vbuf.h"
89

90
#include "util/u_dump.h"
91
#include "util/format/u_format.h"
92 93
#include "util/u_inlines.h"
#include "util/u_memory.h"
94
#include "util/u_screen.h"
95 96 97
#include "util/u_upload_mgr.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
98 99
#include "cso_cache/cso_cache.h"
#include "cso_cache/cso_hash.h"
100

101
struct u_vbuf_elements {
102 103 104
   unsigned count;
   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];

105 106 107
   unsigned src_format_size[PIPE_MAX_ATTRIBS];

   /* If (velem[i].src_format != native_format[i]), the vertex buffer
108
    * referenced by the vertex element cannot be used for rendering and
109
    * its vertex data must be translated to native_format[i]. */
110 111 112
   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
   unsigned native_format_size[PIPE_MAX_ATTRIBS];

113 114
   /* Which buffers are used by the vertex element state. */
   uint32_t used_vb_mask;
115 116 117
   /* This might mean two things:
    * - src_format != native_format, as discussed above.
    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
   /* Which buffer has at least one vertex element referencing it
    * incompatible. */
   uint32_t incompatible_vb_mask_any;
   /* Which buffer has all vertex elements referencing it incompatible. */
   uint32_t incompatible_vb_mask_all;
   /* Which buffer has at least one vertex element referencing it
    * compatible. */
   uint32_t compatible_vb_mask_any;
   /* Which buffer has all vertex elements referencing it compatible. */
   uint32_t compatible_vb_mask_all;

   /* Which buffer has at least one vertex element referencing it
    * non-instanced. */
   uint32_t noninstance_vb_mask_any;
133

134 135 136
   /* Which buffers are used by multiple vertex attribs. */
   uint32_t interleaved_vb_mask;

137
   void *driver_cso;
138 139
};

140 141 142 143 144 145 146
enum {
   VB_VERTEX = 0,
   VB_INSTANCE = 1,
   VB_CONST = 2,
   VB_NUM = 3
};

147
struct u_vbuf {
148
   struct u_vbuf_caps caps;
149
   bool has_signed_vb_offset;
150

151 152
   struct pipe_context *pipe;
   struct translate_cache *translate_cache;
153
   struct cso_cache *cso_cache;
154 155 156 157

   /* This is what was set in set_vertex_buffers.
    * May contain user buffers. */
   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
158
   uint32_t enabled_vb_mask;
159

160
   /* Saved vertex buffer. */
161
   struct pipe_vertex_buffer vertex_buffer0_saved;
162

163
   /* Vertex buffers for the driver.
164
    * There are usually no user buffers. */
165
   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
166 167
   uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
                                   call of set_vertex_buffers */
168

169 170
   /* Vertex elements. */
   struct u_vbuf_elements *ve, *ve_saved;
171 172

   /* Vertex elements used for the translate fallback. */
173
   struct cso_velems_state fallback_velems;
Marek Olšák's avatar
Marek Olšák committed
174 175
   /* If non-NULL, this is a vertex element state used for the translate
    * fallback and therefore used for rendering too. */
176
   boolean using_translate;
Marek Olšák's avatar
Marek Olšák committed
177 178
   /* The vertex buffer slot index where translated vertices have been
    * stored in. */
179
   unsigned fallback_vbs[VB_NUM];
180
   unsigned fallback_vbs_mask;
181

182 183 184 185 186 187
   /* Which buffer is a user buffer. */
   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
   /* Which buffer is incompatible (unaligned). */
   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
   /* Which buffer has a non-zero stride. */
   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
188 189
   /* Which buffers are allowed (supported by hardware). */
   uint32_t allowed_vb_mask;
190 191
};

192 193 194 195 196
static void *
u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
                              const struct pipe_vertex_element *attribs);
static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso);

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
static const struct {
   enum pipe_format from, to;
} vbuf_format_fallbacks[] = {
   { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
   { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
   { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
   { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
   { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
   { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
260
};
261

Rob Clark's avatar
Rob Clark committed
262 263
void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
                     bool needs64b)
264
{
265
   unsigned i;
266 267

   memset(caps, 0, sizeof(*caps));
268 269 270 271 272 273 274 275 276

   /* I'd rather have a bitfield of which formats are supported and a static
    * table of the translations indexed by format, but since we don't have C99
    * we can't easily make a sparsely-populated table indexed by format.  So,
    * we construct the sparse table here.
    */
   for (i = 0; i < PIPE_FORMAT_COUNT; i++)
      caps->format_translation[i] = i;

277
   for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
278
      enum pipe_format format = vbuf_format_fallbacks[i].from;
Rob Clark's avatar
Rob Clark committed
279 280 281 282
      unsigned comp_bits = util_format_get_component_bits(format, 0, 0);

      if ((comp_bits > 32) && !needs64b)
         continue;
283

284
      if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
285 286
                                       PIPE_BIND_VERTEX_BUFFER)) {
         caps->format_translation[format] = vbuf_format_fallbacks[i].to;
287
         caps->fallback_always = true;
288 289 290
      }
   }

291 292 293 294 295 296 297 298 299 300 301
   caps->buffer_offset_unaligned =
      !screen->get_param(screen,
                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
   caps->buffer_stride_unaligned =
     !screen->get_param(screen,
                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
   caps->velem_src_offset_unaligned =
      !screen->get_param(screen,
                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
   caps->user_vertex_buffers =
      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
302 303
   caps->max_vertex_buffers =
      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
304

305 306
   /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
   if (caps->max_vertex_buffers < 16)
307
      caps->fallback_always = true;
308

309 310
   if (!caps->buffer_offset_unaligned ||
       !caps->buffer_stride_unaligned ||
311 312
       !caps->velem_src_offset_unaligned)
      caps->fallback_always = true;
313

314 315
   if (!caps->fallback_always && !caps->user_vertex_buffers)
      caps->fallback_only_for_user_vbuffers = true;
316 317
}

Marek Olšák's avatar
Marek Olšák committed
318
struct u_vbuf *
319
u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
320
{
321
   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
322

323
   mgr->caps = *caps;
324
   mgr->pipe = pipe;
325
   mgr->cso_cache = cso_cache_create();
326
   mgr->translate_cache = translate_cache_create();
327
   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
328
   mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers);
329

330 331 332 333
   mgr->has_signed_vb_offset =
      pipe->screen->get_param(pipe->screen,
                              PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);

334
   return mgr;
335 336
}

337 338 339
/* u_vbuf uses its own caching for vertex elements, because it needs to keep
 * its own preprocessed state per vertex element CSO. */
static struct u_vbuf_elements *
340 341
u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr,
                                    const struct cso_velems_state *velems)
342
{
343
   struct pipe_context *pipe = mgr->pipe;
344 345
   unsigned key_size, hash_key;
   struct cso_hash_iter iter;
346
   struct u_vbuf_elements *ve;
347 348

   /* need to include the count into the stored state data too. */
349 350 351
   key_size = sizeof(struct pipe_vertex_element) * velems->count +
              sizeof(unsigned);
   hash_key = cso_construct_key((void*)velems, key_size);
352
   iter = cso_find_state_template(mgr->cso_cache, hash_key, CSO_VELEMENTS,
353
                                  (void*)velems, key_size);
354 355 356

   if (cso_hash_iter_is_null(iter)) {
      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
357 358 359
      memcpy(&cso->state, velems, key_size);
      cso->data = u_vbuf_create_vertex_elements(mgr, velems->count,
                                                velems->velems);
360 361
      cso->delete_state = (cso_state_callback)u_vbuf_delete_vertex_elements;
      cso->context = (void*)mgr;
362 363

      iter = cso_insert_state(mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
364
      ve = cso->data;
365
   } else {
366
      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
367 368
   }

369
   assert(ve);
370 371

   if (ve != mgr->ve)
Brian Paul's avatar
Brian Paul committed
372 373
      pipe->bind_vertex_elements_state(pipe, ve->driver_cso);

374
   return ve;
375 376
}

377 378
void u_vbuf_set_vertex_elements(struct u_vbuf *mgr,
                                const struct cso_velems_state *velems)
379
{
380
   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems);
381
}
382

383 384 385 386 387
void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
{
   mgr->ve = NULL;
}

388 389
void u_vbuf_destroy(struct u_vbuf *mgr)
{
390
   struct pipe_screen *screen = mgr->pipe->screen;
391
   unsigned i;
392 393
   const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
                                                    PIPE_SHADER_CAP_MAX_INPUTS);
394

395
   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);
396

397 398 399 400 401
   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);

402
   pipe_vertex_buffer_unreference(&mgr->vertex_buffer0_saved);
403 404

   translate_cache_destroy(mgr->translate_cache);
405
   cso_cache_delete(mgr->cso_cache);
406 407 408
   FREE(mgr);
}

409
static enum pipe_error
410
u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
411
                         const struct pipe_draw_info *info,
412
                         unsigned vb_mask, unsigned out_vb,
413
                         int start_vertex, unsigned num_vertices,
414
                         int min_index, boolean unroll_indices)
415 416 417 418 419
{
   struct translate *tr;
   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
   struct pipe_resource *out_buffer = NULL;
   uint8_t *out_map;
420
   unsigned out_offset, mask;
421 422 423 424 425

   /* Get a translate object. */
   tr = translate_cache_find(mgr->translate_cache, key);

   /* Map buffers we want to translate. */
426 427 428 429 430 431
   mask = vb_mask;
   while (mask) {
      struct pipe_vertex_buffer *vb;
      unsigned offset;
      uint8_t *map;
      unsigned i = u_bit_scan(&mask);
432

433 434
      vb = &mgr->vertex_buffer[i];
      offset = vb->buffer_offset + vb->stride * start_vertex;
435

436 437
      if (vb->is_user_buffer) {
         map = (uint8_t*)vb->buffer.user + offset;
438 439 440
      } else {
         unsigned size = vb->stride ? num_vertices * vb->stride
                                    : sizeof(double)*4;
441

442 443 444
         if (!vb->buffer.resource)
            continue;

445 446 447 448 449 450
         if (offset + size > vb->buffer.resource->width0) {
            /* Don't try to map past end of buffer.  This often happens when
             * we're translating an attribute that's at offset > 0 from the
             * start of the vertex.  If we'd subtract attrib's offset from
             * the size, this probably wouldn't happen.
             */
451
            size = vb->buffer.resource->width0 - offset;
452 453 454 455 456 457 458 459 460

            /* Also adjust num_vertices.  A common user error is to call
             * glDrawRangeElements() with incorrect 'end' argument.  The 'end
             * value should be the max index value, but people often
             * accidentally add one to this value.  This adjustment avoids
             * crashing (by reading past the end of a hardware buffer mapping)
             * when people do that.
             */
            num_vertices = (size + vb->stride - 1) / vb->stride;
461 462
         }

463
         map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
464
                                     PIPE_MAP_READ, &vb_transfer[i]);
465
      }
466

467 468
      /* Subtract min_index so that indexing with the index buffer works. */
      if (unroll_indices) {
469
         map -= (ptrdiff_t)vb->stride * min_index;
470
      }
471

472
      tr->set_buffer(tr, i, map, vb->stride, info->max_index);
473 474
   }

475 476 477
   /* Translate. */
   if (unroll_indices) {
      struct pipe_transfer *transfer = NULL;
478
      const unsigned offset = info->start * info->index_size;
479
      uint8_t *map;
480

481
      /* Create and map the output buffer. */
482
      u_upload_alloc(mgr->pipe->stream_uploader, 0,
483
                     key->output_stride * info->count, 4,
484 485 486 487
                     &out_offset, &out_buffer,
                     (void**)&out_map);
      if (!out_buffer)
         return PIPE_ERROR_OUT_OF_MEMORY;
488

489 490
      if (info->has_user_indices) {
         map = (uint8_t*)info->index.user + offset;
491
      } else {
492 493
         map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
                                     info->count * info->index_size,
494
                                     PIPE_MAP_READ, &transfer);
495 496
      }

497
      switch (info->index_size) {
498
      case 4:
499
         tr->run_elts(tr, (unsigned*)map, info->count, 0, 0, out_map);
500 501
         break;
      case 2:
502
         tr->run_elts16(tr, (uint16_t*)map, info->count, 0, 0, out_map);
503 504
         break;
      case 1:
505
         tr->run_elts8(tr, map, info->count, 0, 0, out_map);
506 507 508 509 510 511 512 513
         break;
      }

      if (transfer) {
         pipe_buffer_unmap(mgr->pipe, transfer);
      }
   } else {
      /* Create and map the output buffer. */
514
      u_upload_alloc(mgr->pipe->stream_uploader,
515 516
                     mgr->has_signed_vb_offset ?
                        0 : key->output_stride * start_vertex,
517
                     key->output_stride * num_vertices, 4,
518 519 520 521
                     &out_offset, &out_buffer,
                     (void**)&out_map);
      if (!out_buffer)
         return PIPE_ERROR_OUT_OF_MEMORY;
522 523 524

      out_offset -= key->output_stride * start_vertex;

Zack Rusin's avatar
Zack Rusin committed
525
      tr->run(tr, 0, num_vertices, 0, 0, out_map);
526
   }
527 528

   /* Unmap all buffers. */
529 530 531 532
   mask = vb_mask;
   while (mask) {
      unsigned i = u_bit_scan(&mask);

533 534 535 536 537 538
      if (vb_transfer[i]) {
         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
      }
   }

   /* Setup the new vertex buffer. */
539 540
   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
541 542

   /* Move the buffer reference. */
543
   pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
544
   mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
545
   mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
546 547

   return PIPE_OK;
548 549 550
}

static boolean
551
u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
552
                                    unsigned mask[VB_NUM])
553
{
554
   unsigned type;
555
   unsigned fallback_vbs[VB_NUM];
556 557
   /* Set the bit for each buffer which is incompatible, or isn't set. */
   uint32_t unused_vb_mask =
558 559
      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
      ~mgr->enabled_vb_mask;
560 561 562 563 564 565
   uint32_t unused_vb_mask_orig;
   boolean insufficient_buffers = false;

   /* No vertex buffers available at all */
   if (!unused_vb_mask)
      return FALSE;
566 567

   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
568
   mgr->fallback_vbs_mask = 0;
569

570
   /* Find free slots for each type if needed. */
571
   unused_vb_mask_orig = unused_vb_mask;
572 573
   for (type = 0; type < VB_NUM; type++) {
      if (mask[type]) {
574 575 576
         uint32_t index;

         if (!unused_vb_mask) {
577 578
            insufficient_buffers = true;
            break;
579
         }
580 581 582

         index = ffs(unused_vb_mask) - 1;
         fallback_vbs[type] = index;
583
         mgr->fallback_vbs_mask |= 1 << index;
584
         unused_vb_mask &= ~(1 << index);
585
         /*printf("found slot=%i for type=%i\n", index, type);*/
586 587
      }
   }
588

589 590 591 592 593 594
   if (insufficient_buffers) {
      /* not enough vbs for all types supported by the hardware, they will have to share one
       * buffer */
      uint32_t index = ffs(unused_vb_mask_orig) - 1;
      /* When sharing one vertex buffer use per-vertex frequency for everything. */
      fallback_vbs[VB_VERTEX] = index;
595
      mgr->fallback_vbs_mask = 1 << index;
596 597 598 599 600
      mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE];
      mask[VB_CONST] = 0;
      mask[VB_INSTANCE] = 0;
   }

601 602 603 604 605 606
   for (type = 0; type < VB_NUM; type++) {
      if (mask[type]) {
         mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
      }
   }

607 608
   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
   return TRUE;
609 610
}

611
static boolean
612
u_vbuf_translate_begin(struct u_vbuf *mgr,
613
                       const struct pipe_draw_info *info,
614
                       int start_vertex, unsigned num_vertices,
615
                       int min_index, boolean unroll_indices)
616
{
617 618 619 620
   unsigned mask[VB_NUM] = {0};
   struct translate_key key[VB_NUM];
   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
   unsigned i, type;
621 622
   const unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
                                         mgr->ve->used_vb_mask;
623

624
   const int start[VB_NUM] = {
625 626 627
      start_vertex,           /* VERTEX */
      info->start_instance,   /* INSTANCE */
      0                       /* CONST */
628 629
   };

630
   const unsigned num[VB_NUM] = {
631 632 633
      num_vertices,           /* VERTEX */
      info->instance_count,   /* INSTANCE */
      1                       /* CONST */
634 635 636 637 638 639 640 641 642
   };

   memset(key, 0, sizeof(key));
   memset(elem_index, ~0, sizeof(elem_index));

   /* See if there are vertex attribs of each type to translate and
    * which ones. */
   for (i = 0; i < mgr->ve->count; i++) {
      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
643

644
      if (!mgr->vertex_buffer[vb_index].stride) {
645
         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
646
             !(incompatible_vb_mask & (1 << vb_index))) {
647 648
            continue;
         }
649 650
         mask[VB_CONST] |= 1 << vb_index;
      } else if (mgr->ve->ve[i].instance_divisor) {
651
         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
652
             !(incompatible_vb_mask & (1 << vb_index))) {
653 654
            continue;
         }
655 656
         mask[VB_INSTANCE] |= 1 << vb_index;
      } else {
657
         if (!unroll_indices &&
658
             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
659
             !(incompatible_vb_mask & (1 << vb_index))) {
660 661
            continue;
         }
662 663 664
         mask[VB_VERTEX] |= 1 << vb_index;
      }
   }
665

666 667 668
   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);

   /* Find free vertex buffer slots. */
669
   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
670
      return FALSE;
671 672
   }

673
   /* Initialize the translate keys. */
674
   for (i = 0; i < mgr->ve->count; i++) {
675 676
      struct translate_key *k;
      struct translate_element *te;
677
      enum pipe_format output_format = mgr->ve->native_format[i];
678
      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
679
      bit = 1 << vb_index;
680

681
      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
682
          !(incompatible_vb_mask & (1 << vb_index)) &&
683
          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
684 685 686
         continue;
      }

687 688 689 690 691 692 693 694
      /* Set type to what we will translate.
       * Whether vertex, instance, or constant attribs. */
      for (type = 0; type < VB_NUM; type++) {
         if (mask[type] & bit) {
            break;
         }
      }
      assert(type < VB_NUM);
695 696
      if (mgr->ve->ve[i].src_format != output_format)
         assert(translate_is_output_format_supported(output_format));
697 698 699 700 701
      /*printf("velem=%i type=%i\n", i, type);*/

      /* Add the vertex element. */
      k = &key[type];
      elem_index[type][i] = k->nr_elements;
702

703
      te = &k->element[k->nr_elements];
704 705
      te->type = TRANSLATE_ELEMENT_NORMAL;
      te->instance_divisor = 0;
706
      te->input_buffer = vb_index;
707 708
      te->input_format = mgr->ve->ve[i].src_format;
      te->input_offset = mgr->ve->ve[i].src_offset;
709
      te->output_format = output_format;
710
      te->output_offset = k->output_stride;
711

712 713
      k->output_stride += mgr->ve->native_format_size[i];
      k->nr_elements++;
714 715
   }

716 717 718
   /* Translate buffers. */
   for (type = 0; type < VB_NUM; type++) {
      if (key[type].nr_elements) {
719
         enum pipe_error err;
720
         err = u_vbuf_translate_buffers(mgr, &key[type], info, mask[type],
721
                                        mgr->fallback_vbs[type],
722
                                        start[type], num[type], min_index,
723 724 725
                                        unroll_indices && type == VB_VERTEX);
         if (err != PIPE_OK)
            return FALSE;
726

727 728
         /* Fixup the stride for constant attribs. */
         if (type == VB_CONST) {
729
            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
730
         }
731 732 733
      }
   }

734 735
   /* Setup new vertex elements. */
   for (i = 0; i < mgr->ve->count; i++) {
736 737 738
      for (type = 0; type < VB_NUM; type++) {
         if (elem_index[type][i] < key[type].nr_elements) {
            struct translate_element *te = &key[type].element[elem_index[type][i]];
739 740 741 742
            mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
            mgr->fallback_velems.velems[i].src_format = te->output_format;
            mgr->fallback_velems.velems[i].src_offset = te->output_offset;
            mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
743 744

            /* elem_index[type][i] can only be set for one type. */
745 746
            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
747 748 749 750 751
            break;
         }
      }
      /* No translating, just copy the original vertex element over. */
      if (type == VB_NUM) {
752
         memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i],
753
                sizeof(struct pipe_vertex_element));
754
      }
755
   }
756

757 758 759
   mgr->fallback_velems.count = mgr->ve->count;

   u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems);
760
   mgr->using_translate = TRUE;
761
   return TRUE;
762 763
}

764
static void u_vbuf_translate_end(struct u_vbuf *mgr)
765
{
766 767
   unsigned i;

768
   /* Restore vertex elements. */
769 770
   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
   mgr->using_translate = FALSE;
771

772 773 774
   /* Unreference the now-unused VBOs. */
   for (i = 0; i < VB_NUM; i++) {
      unsigned vb = mgr->fallback_vbs[i];
775
      if (vb != ~0u) {
776
         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
777 778 779
         mgr->fallback_vbs[i] = ~0;
      }
   }
780 781 782
   /* This will cause the buffer to be unbound in the driver later. */
   mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask;
   mgr->fallback_vbs_mask = 0;
783 784
}

785
static void *
786
u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
787
                              const struct pipe_vertex_element *attribs)
788
{
789
   struct pipe_context *pipe = mgr->pipe;
790
   unsigned i;
791
   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
792
   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
793
   uint32_t used_buffers = 0;
794 795 796 797

   ve->count = count;

   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
798
   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
799 800 801 802 803

   /* Set the best native format in case the original format is not
    * supported. */
   for (i = 0; i < count; i++) {
      enum pipe_format format = ve->ve[i].src_format;
804
      unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index;
805

806 807
      ve->src_format_size[i] = util_format_get_blocksize(format);

808 809 810
      if (used_buffers & vb_index_bit)
         ve->interleaved_vb_mask |= vb_index_bit;

811
      used_buffers |= vb_index_bit;
812 813

      if (!ve->ve[i].instance_divisor) {
814
         ve->noninstance_vb_mask_any |= vb_index_bit;
815 816
      }

817
      format = mgr->caps.format_translation[format];
818

819
      driver_attribs[i].src_format = format;
820 821 822 823
      ve->native_format[i] = format;
      ve->native_format_size[i] =
            util_format_get_blocksize(ve->native_format[i]);

824 825 826 827
      if (ve->ve[i].src_format != format ||
          (!mgr->caps.velem_src_offset_unaligned &&
           ve->ve[i].src_offset % 4 != 0)) {
         ve->incompatible_elem_mask |= 1 << i;
828
         ve->incompatible_vb_mask_any |= vb_index_bit;
829
      } else {
830
         ve->compatible_vb_mask_any |= vb_index_bit;
831
      }
832 833
   }

834 835 836 837 838 839 840 841 842 843 844
   if (used_buffers & ~mgr->allowed_vb_mask) {
      /* More vertex buffers are used than the hardware supports.  In
       * principle, we only need to make sure that less vertex buffers are
       * used, and mark some of the latter vertex buffers as incompatible.
       * For now, mark all vertex buffers as incompatible.
       */
      ve->incompatible_vb_mask_any = used_buffers;
      ve->compatible_vb_mask_any = 0;
      ve->incompatible_elem_mask = u_bit_consecutive(0, count);
   }

845
   ve->used_vb_mask = used_buffers;
846 847 848
   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;

849
   /* Align the formats and offsets to the size of DWORD if needed. */
850
   if (!mgr->caps.velem_src_offset_unaligned) {
851 852
      for (i = 0; i < count; i++) {
         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
853
         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
854 855 856
      }
   }

857 858 859 860 861 862
   /* Only create driver CSO if no incompatible elements */
   if (!ve->incompatible_elem_mask) {
      ve->driver_cso =
         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
   }

863 864 865
   return ve;
}

866
static void u_vbuf_delete_vertex_elements(struct u_vbuf *mgr, void *cso)
867
{