lp_bld_swizzle.c 25.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/**************************************************************************
 *
 * Copyright 2009 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 **************************************************************************/

28
29
30
31
32
33
34
/**
 * @file
 * Helper functions for swizzling/shuffling.
 *
 * @author Jose Fonseca <jfonseca@vmware.com>
 */

35
#include <inttypes.h>  /* for PRIx64 macro */
36
#include "util/compiler.h"
37
38
39
40
#include "util/u_debug.h"

#include "lp_bld_type.h"
#include "lp_bld_const.h"
41
#include "lp_bld_init.h"
42
#include "lp_bld_logic.h"
43
#include "lp_bld_swizzle.h"
44
#include "lp_bld_pack.h"
45
46


47
LLVMValueRef
48
lp_build_broadcast(struct gallivm_state *gallivm,
49
50
51
52
53
                   LLVMTypeRef vec_type,
                   LLVMValueRef scalar)
{
   LLVMValueRef res;

54
   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
55
56
57
58
59
60
61
      /* scalar */
      assert(vec_type == LLVMTypeOf(scalar));
      res = scalar;
   } else {
      LLVMBuilderRef builder = gallivm->builder;
      const unsigned length = LLVMGetVectorSize(vec_type);
      LLVMValueRef undef = LLVMGetUndef(vec_type);
62
      /* The shuffle vector is always made of int32 elements */
63
      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
64
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
65
66
67

      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));

68
69
      res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
      res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70
71
72
73
74
75
   }

   return res;
}


76
77
78
/**
 * Broadcast
 */
79
80
81
82
LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context *bld,
                          LLVMValueRef scalar)
{
83
   assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
84

85
   return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
86
87
88
}


89
/**
90
 * Combined extract and broadcast (mere shuffle in most cases)
91
92
 */
LLVMValueRef
93
lp_build_extract_broadcast(struct gallivm_state *gallivm,
94
95
96
97
98
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
{
99
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

121
122
         res = lp_build_broadcast(gallivm,
                                  lp_build_vec_type(gallivm, dst_type),
123
124
125
126
                                  vector);
      }
   }
   else {
127
      if (dst_type.length > 1) {
128
         /*
129
          * shuffle - result can be of different length.
130
131
132
          */

         LLVMValueRef shuffle;
133
         shuffle = lp_build_broadcast(gallivm,
134
135
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
136
         res = LLVMBuildShuffleVector(gallivm->builder, vector,
137
                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
138
139
140
                                      shuffle, "");
      }
      else {
141
142
143
144
         /*
          * Trivial extract scalar from vector.
          */
          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
145
146
147
148
149
150
151
      }
   }

   return res;
}


152
/**
James Benton's avatar
James Benton committed
153
 * Swizzle one channel into other channels.
154
 */
155
LLVMValueRef
156
157
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
                            LLVMValueRef a,
James Benton's avatar
James Benton committed
158
159
                            unsigned channel,
                            unsigned num_channels)
160
{
161
   LLVMBuilderRef builder = bld->gallivm->builder;
162
   const struct lp_type type = bld->type;
163
164
165
   const unsigned n = type.length;
   unsigned i, j;

James Benton's avatar
James Benton committed
166
   if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
167
168
      return a;

James Benton's avatar
James Benton committed
169
170
   assert(num_channels == 2 || num_channels == 4);

171
172
173
   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
    * using shuffles here actually causes worst results. More investigation is
    * needed. */
174
175
   if (LLVMIsConstant(a) ||
       type.width >= 16) {
176
177
178
      /*
       * Shuffle.
       */
179
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
180
181
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

James Benton's avatar
James Benton committed
182
183
      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
184
185
            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);

186
      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
187
   }
James Benton's avatar
James Benton committed
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
   else if (num_channels == 2) {
      /*
       * Bit mask and shifts
       *
       *   XY XY .... XY  <= input
       *   0Y 0Y .... 0Y
       *   YY YY .... YY
       *   YY YY .... YY  <= output
       */
      struct lp_type type2;
      LLVMValueRef tmp = NULL;
      int shift;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel, num_channels), "");

      type2 = type;
      type2.floating = FALSE;
      type2.width *= 2;
      type2.length /= 2;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");

212
213
214
215
216
217
218
219
220
221
222
223
224
225
      /*
       * Vector element 0 is always channel X.
       *
       *                        76 54 32 10 (array numbering)
       * Little endian reg in:  YX YX YX YX
       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
       *                        XX XX XX XX if shift left (shift == 1)
       *
       *                        01 23 45 67 (array numbering)
       * Big endian reg in:     XY XY XY XY
       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
       *                        XX XX XX XX if shift right (shift == -1)
       *
       */
226
#if UTIL_ARCH_LITTLE_ENDIAN
James Benton's avatar
James Benton committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
      shift = channel == 0 ? 1 : -1;
#else
      shift = channel == 0 ? -1 : 1;
#endif

      if (shift > 0) {
         tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
      } else if (shift < 0) {
         tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
      }

      assert(tmp);
      if (tmp) {
         a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
245
246
247
248
   else {
      /*
       * Bit mask and recursive shifts
       *
249
250
251
252
253
254
255
256
257
258
259
       * Little-endian registers:
       *
       *   7654 3210
       *   WZYX WZYX .... WZYX  <= input
       *   00Y0 00Y0 .... 00Y0  <= mask
       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
       *
       * Big-endian registers:
       *
       *   0123 4567
260
       *   XYZW XYZW .... XYZW  <= input
261
262
263
264
265
       *   0Y00 0Y00 .... 0Y00  <= mask
       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
       *
       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
266
       */
267
      struct lp_type type4;
268
      const int shifts[4][2] = {
269
270
271
272
273
274
275
         { 1,  2},
         {-1,  2},
         { 1, -2},
         {-1, -2}
      };
      unsigned i;

276
      a = LLVMBuildAnd(builder, a,
277
                       lp_build_const_mask_aos(bld->gallivm,
278
                                               type, 1 << channel, 4), "");
279

280
281
282
283
284
285
286
      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */

      type4 = type;
      type4.floating = FALSE;
287
288
289
      type4.width *= 4;
      type4.length /= 4;

290
      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
291
292
293
294
295

      for(i = 0; i < 2; ++i) {
         LLVMValueRef tmp = NULL;
         int shift = shifts[channel][i];

296
         /* See endianness diagram above */
297
#if UTIL_ARCH_BIG_ENDIAN
298
299
300
301
         shift = -shift;
#endif

         if(shift > 0)
302
            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
303
         if(shift < 0)
304
            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
305
306
307

         assert(tmp);
         if(tmp)
308
            a = LLVMBuildOr(builder, a, tmp, "");
309
310
      }

311
      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
312
313
314
315
   }
}


James Benton's avatar
James Benton committed
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
/**
 * Swizzle a vector consisting of an array of XYZW structs.
 *
 * This fills a vector of dst_len length with the swizzled channels from src.
 *
 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
 *      RGBA RGBA = BGR BGR BG
 *
 * @param swizzles        the swizzle array
 * @param num_swizzles    the number of elements in swizzles
 * @param dst_len         the length of the result
 */
LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
                       LLVMValueRef src,
                       const unsigned char* swizzles,
                       unsigned num_swizzles,
                       unsigned dst_len)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
   unsigned i;

   assert(dst_len < LP_MAX_VECTOR_WIDTH);

   for (i = 0; i < dst_len; ++i) {
      int swizzle = swizzles[i % num_swizzles];

      if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
         shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
      } else {
         shuffles[i] = lp_build_const_int32(gallivm, swizzle);
      }
   }

   return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
}


355
LLVMValueRef
356
357
358
lp_build_swizzle_aos(struct lp_build_context *bld,
                     LLVMValueRef a,
                     const unsigned char swizzles[4])
359
{
360
   LLVMBuilderRef builder = bld->gallivm->builder;
361
362
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
363
364
   unsigned i, j;

365
366
367
368
   if (swizzles[0] == PIPE_SWIZZLE_X &&
       swizzles[1] == PIPE_SWIZZLE_Y &&
       swizzles[2] == PIPE_SWIZZLE_Z &&
       swizzles[3] == PIPE_SWIZZLE_W) {
369
      return a;
370
   }
371

372
373
374
375
   if (swizzles[0] == swizzles[1] &&
       swizzles[1] == swizzles[2] &&
       swizzles[2] == swizzles[3]) {
      switch (swizzles[0]) {
376
377
378
379
      case PIPE_SWIZZLE_X:
      case PIPE_SWIZZLE_Y:
      case PIPE_SWIZZLE_Z:
      case PIPE_SWIZZLE_W:
James Benton's avatar
James Benton committed
380
         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
381
      case PIPE_SWIZZLE_0:
382
         return bld->zero;
383
      case PIPE_SWIZZLE_1:
384
         return bld->one;
385
386
      case LP_BLD_SWIZZLE_DONTCARE:
         return bld->undef;
387
388
389
390
391
      default:
         assert(0);
         return bld->undef;
      }
   }
392

393
394
   if (LLVMIsConstant(a) ||
       type.width >= 16) {
395
396
397
      /*
       * Shuffle.
       */
398
399
      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
400
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
401
402
403
404
405
406
407
408
409
410
      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];

      memset(aux, 0, sizeof aux);

      for(j = 0; j < n; j += 4) {
         for(i = 0; i < 4; ++i) {
            unsigned shuffle;
            switch (swizzles[i]) {
            default:
               assert(0);
411
#if defined(NDEBUG) || defined(DEBUG)
412
               FALLTHROUGH;
413
#endif
414
415
416
417
            case PIPE_SWIZZLE_X:
            case PIPE_SWIZZLE_Y:
            case PIPE_SWIZZLE_Z:
            case PIPE_SWIZZLE_W:
418
               shuffle = j + swizzles[i];
419
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
420
               break;
421
            case PIPE_SWIZZLE_0:
422
               shuffle = type.length + 0;
423
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
424
               if (!aux[0]) {
425
                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
426
427
               }
               break;
428
            case PIPE_SWIZZLE_1:
429
               shuffle = type.length + 1;
430
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
431
               if (!aux[1]) {
432
                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
433
434
               }
               break;
435
436
437
            case LP_BLD_SWIZZLE_DONTCARE:
               shuffles[j + i] = LLVMGetUndef(i32t);
               break;
438
439
440
            }
         }
      }
441

442
443
444
445
446
      for (i = 0; i < n; ++i) {
         if (!aux[i]) {
            aux[i] = undef;
         }
      }
447

448
      return LLVMBuildShuffleVector(builder, a,
449
450
451
452
453
454
455
456
                                    LLVMConstVector(aux, n),
                                    LLVMConstVector(shuffles, n), "");
   } else {
      /*
       * Bit mask and shifts.
       *
       * For example, this will convert BGRA to RGBA by doing
       *
457
       * Little endian:
458
459
460
461
       *   rgba = (bgra & 0x00ff0000) >> 16
       *        | (bgra & 0xff00ff00)
       *        | (bgra & 0x000000ff) << 16
       *
462
463
464
465
466
       * Big endian:A
       *   rgba = (bgra & 0x0000ff00) << 16
       *        | (bgra & 0x00ff00ff)
       *        | (bgra & 0xff000000) >> 16
       *
467
468
469
470
471
       * This is necessary not only for faster cause, but because X86 backend
       * will refuse shuffles of <4 x i8> vectors
       */
      LLVMValueRef res;
      struct lp_type type4;
472
      unsigned cond = 0;
473
      int chan;
474
475
476
477
478
479
      int shift;

      /*
       * Start with a mixture of 1 and 0.
       */
      for (chan = 0; chan < 4; ++chan) {
480
         if (swizzles[chan] == PIPE_SWIZZLE_1) {
481
482
            cond |= 1 << chan;
         }
483
      }
James Benton's avatar
James Benton committed
484
      res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
485
486
487
488
489
490
491
492
493
494

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */
      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

495
496
      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
497
498
499

      /*
       * Mask and shift the channels, trying to group as many channels in the
500
501
       * same shift as possible.  The shift amount is positive for shifts left
       * and negative for shifts right.
502
503
       */
      for (shift = -3; shift <= 3; ++shift) {
504
         uint64_t mask = 0;
505
506
507

         assert(type4.width <= sizeof(mask)*8);

508
509
510
511
512
513
514
515
516
517
518
519
520
521
         /*
          * Vector element numbers follow the XYZW order, so 0 is always X, etc.
          * After widening 4 times we have:
          *
          *                                3210
          * Little-endian register layout: WZYX
          *
          *                                0123
          * Big-endian register layout:    XYZW
          *
          * For little-endian, higher-numbered channels are obtained by a shift right
          * (negative shift amount) and lower-numbered channels by a shift left
          * (positive shift amount).  The opposite is true for big-endian.
          */
522
         for (chan = 0; chan < 4; ++chan) {
523
524
            if (swizzles[chan] < 4) {
               /* We need to move channel swizzles[chan] into channel chan */
525
#if UTIL_ARCH_LITTLE_ENDIAN
526
527
528
               if (swizzles[chan] - chan == -shift) {
                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
               }
529
#else
530
531
532
               if (swizzles[chan] - chan == shift) {
                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
               }
533
#endif
534
535
536
537
538
539
540
            }
         }

         if (mask) {
            LLVMValueRef masked;
            LLVMValueRef shifted;
            if (0)
541
               debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
542

543
            masked = LLVMBuildAnd(builder, a,
544
                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
545
            if (shift > 0) {
546
               shifted = LLVMBuildShl(builder, masked,
547
                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
548
            } else if (shift < 0) {
549
               shifted = LLVMBuildLShr(builder, masked,
550
                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
551
552
553
554
            } else {
               shifted = masked;
            }

555
            res = LLVMBuildOr(builder, res, shifted, "");
556
557
558
         }
      }

559
      return LLVMBuildBitCast(builder, res,
560
                              lp_build_vec_type(bld->gallivm, type), "");
561
562
563
564
   }
}


565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
/**
 * Extended swizzle of a single channel of a SoA vector.
 *
 * @param bld         building context
 * @param unswizzled  array with the 4 unswizzled values
 * @param swizzle     one of the PIPE_SWIZZLE_*
 *
 * @return  the swizzled value.
 */
LLVMValueRef
lp_build_swizzle_soa_channel(struct lp_build_context *bld,
                             const LLVMValueRef *unswizzled,
                             unsigned swizzle)
{
   switch (swizzle) {
580
581
582
583
   case PIPE_SWIZZLE_X:
   case PIPE_SWIZZLE_Y:
   case PIPE_SWIZZLE_Z:
   case PIPE_SWIZZLE_W:
584
      return unswizzled[swizzle];
585
   case PIPE_SWIZZLE_0:
586
      return bld->zero;
587
   case PIPE_SWIZZLE_1:
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
      return bld->one;
   default:
      assert(0);
      return bld->undef;
   }
}


/**
 * Extended swizzle of a SoA vector.
 *
 * @param bld         building context
 * @param unswizzled  array with the 4 unswizzled values
 * @param swizzles    array of PIPE_SWIZZLE_*
 * @param swizzled    output swizzled values
 */
void
lp_build_swizzle_soa(struct lp_build_context *bld,
                     const LLVMValueRef *unswizzled,
                     const unsigned char swizzles[4],
                     LLVMValueRef *swizzled)
{
   unsigned chan;

   for (chan = 0; chan < 4; ++chan) {
      swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
                                                    swizzles[chan]);
   }
}


/**
 * Do an extended swizzle of a SoA vector inplace.
 *
 * @param bld         building context
 * @param values      intput/output array with the 4 values
 * @param swizzles    array of PIPE_SWIZZLE_*
 */
void
lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
                             LLVMValueRef *values,
                             const unsigned char swizzles[4])
{
   LLVMValueRef unswizzled[4];
   unsigned chan;

   for (chan = 0; chan < 4; ++chan) {
      unswizzled[chan] = values[chan];
   }

   lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
}
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657


/**
 * Transpose from AOS <-> SOA
 *
 * @param single_type_lp   type of pixels
 * @param src              the 4 * n pixel input
 * @param dst              the 4 * n pixel output
 */
void
lp_build_transpose_aos(struct gallivm_state *gallivm,
                       struct lp_type single_type_lp,
                       const LLVMValueRef src[4],
                       LLVMValueRef dst[4])
{
   struct lp_type double_type_lp = single_type_lp;
   LLVMTypeRef single_type;
   LLVMTypeRef double_type;
658
   LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL;
659
660
661
662
663
664
665

   double_type_lp.length >>= 1;
   double_type_lp.width  <<= 1;

   double_type = lp_build_vec_type(gallivm, double_type_lp);
   single_type = lp_build_vec_type(gallivm, single_type_lp);

666
   LLVMValueRef double_type_zero = LLVMConstNull(double_type);
667
   /* Interleave x, y, z, w -> xy and zw */
668
669
   if (src[0] || src[1]) {
      LLVMValueRef src0 = src[0];
670
      LLVMValueRef src1 = src[1];
671
672
      if (!src0)
         src0 = LLVMConstNull(single_type);
673
674
      if (!src1)
         src1 = LLVMConstNull(single_type);
675
676
      t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0);
      t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1);
677
678
679
680
681

      /* Cast to double width type for second interleave */
      t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
      t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
   }
682
683
   if (src[2] || src[3]) {
      LLVMValueRef src2 = src[2];
684
      LLVMValueRef src3 = src[3];
685
686
      if (!src2)
         src2 = LLVMConstNull(single_type);
687
688
      if (!src3)
         src3 = LLVMConstNull(single_type);
689
690
      t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0);
      t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1);
691
692
693
694
695
696
697
698
699
700
701
702
703
704

      /* Cast to double width type for second interleave */
      t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
      t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
   }

   if (!t0)
      t0 = double_type_zero;
   if (!t1)
      t1 = double_type_zero;
   if (!t2)
      t2 = double_type_zero;
   if (!t3)
      t3 = double_type_zero;
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719

   /* Interleave xy, zw -> xyzw */
   dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
   dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
   dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
   dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);

   /* Cast back to original single width type */
   dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
   dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
   dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
   dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
}


James Benton's avatar
James Benton committed
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
/**
 * Transpose from AOS <-> SOA for num_srcs
 */
void
lp_build_transpose_aos_n(struct gallivm_state *gallivm,
                         struct lp_type type,
                         const LLVMValueRef* src,
                         unsigned num_srcs,
                         LLVMValueRef* dst)
{
   switch (num_srcs) {
      case 1:
         dst[0] = src[0];
         break;

      case 2:
      {
         /* Note: we must use a temporary incase src == dst */
         LLVMValueRef lo, hi;

         lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
         hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);

         dst[0] = lo;
         dst[1] = hi;
         break;
      }

      case 4:
         lp_build_transpose_aos(gallivm, type, src, dst);
         break;

      default:
         assert(0);
754
   }
James Benton's avatar
James Benton committed
755
756
757
}


758
/**
759
 * Pack n-th element of aos values,
760
 * pad out to destination size.
761
 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
762
763
764
765
766
 */
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
                          struct lp_type src_type,
                          struct lp_type dst_type,
767
768
                          const LLVMValueRef src,
                          unsigned channel)
769
770
771
772
773
774
775
776
777
778
779
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef undef = LLVMGetUndef(i32t);
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   unsigned num_src = src_type.length / 4;
   unsigned num_dst = dst_type.length;
   unsigned i;

   assert(num_src <= num_dst);

   for (i = 0; i < num_src; i++) {
780
      shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
   }
   for (i = num_src; i < num_dst; i++) {
      shuffles[i] = undef;
   }

   if (num_dst == 1) {
      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(shuffles, num_dst), "");
   }
}


/**
 * Unpack and broadcast packed aos values consisting of only the
 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
 */
LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
                                      struct lp_type src_type,
                                      struct lp_type dst_type,
                                      const LLVMValueRef src)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   unsigned num_dst = dst_type.length;
   unsigned num_src = dst_type.length / 4;
   unsigned i;

   assert(num_dst / 4 <= src_type.length);

   for (i = 0; i < num_src; i++) {
      shuffles[i*4] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
   }

   if (num_src == 1) {
      return lp_build_extract_broadcast(gallivm, src_type, dst_type,
                                        src, shuffles[0]);
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(shuffles, num_dst), "");
   }
}