isl.c 97.8 KB
Newer Older
Chad Versace's avatar
Chad Versace committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 * Copyright 2015 Intel Corporation
 *
 *  Permission is hereby granted, free of charge, to any person obtaining a
 *  copy of this software and associated documentation files (the "Software"),
 *  to deal in the Software without restriction, including without limitation
 *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
 *  and/or sell copies of the Software, and to permit persons to whom the
 *  Software is furnished to do so, subject to the following conditions:
 *
 *  The above copyright notice and this permission notice (including the next
 *  paragraph) shall be included in all copies or substantial portions of the
 *  Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 *  IN THE SOFTWARE.
 */

#include <assert.h>
25
26
#include <stdarg.h>
#include <stdio.h>
Chad Versace's avatar
Chad Versace committed
27

28
29
#include "genxml/genX_bits.h"

Chad Versace's avatar
Chad Versace committed
30
#include "isl.h"
31
32
33
34
35
#include "isl_gen4.h"
#include "isl_gen6.h"
#include "isl_gen7.h"
#include "isl_gen8.h"
#include "isl_gen9.h"
36
#include "isl_gen12.h"
37
#include "isl_priv.h"
Chad Versace's avatar
Chad Versace committed
38

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
void
isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2,
                           uint32_t yt1, uint32_t yt2,
                           char *dst, const char *src,
                           uint32_t dst_pitch, int32_t src_pitch,
                           bool has_swizzling,
                           enum isl_tiling tiling,
                           isl_memcpy_type copy_type)
{
#ifdef USE_SSE41
   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
      _isl_memcpy_linear_to_tiled_sse41(
         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
         tiling, copy_type);
      return;
   }
#endif

   _isl_memcpy_linear_to_tiled(
      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
      tiling, copy_type);
}

void
isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2,
                           uint32_t yt1, uint32_t yt2,
                           char *dst, const char *src,
                           int32_t dst_pitch, uint32_t src_pitch,
                           bool has_swizzling,
                           enum isl_tiling tiling,
                           isl_memcpy_type copy_type)
{
#ifdef USE_SSE41
   if (copy_type == ISL_MEMCPY_STREAMING_LOAD) {
      _isl_memcpy_tiled_to_linear_sse41(
         xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
         tiling, copy_type);
      return;
   }
#endif

   _isl_memcpy_tiled_to_linear(
      xt1, xt2, yt1, yt2, dst, src, dst_pitch, src_pitch, has_swizzling,
      tiling, copy_type);
}

85
86
void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char *file, int line, const char *fmt, ...)
87
{
88
89
90
91
92
93
94
95
   va_list ap;
   char buf[512];

   va_start(ap, fmt);
   vsnprintf(buf, sizeof(buf), fmt, ap);
   va_end(ap);

   fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buf);
96
97
}

Chad Versace's avatar
Chad Versace committed
98
void
99
isl_device_init(struct isl_device *dev,
100
                const struct gen_device_info *info,
101
                bool has_bit6_swizzling)
Chad Versace's avatar
Chad Versace committed
102
{
103
104
105
   /* Gen8+ don't have bit6 swizzling, ensure callsite is not confused. */
   assert(!(has_bit6_swizzling && info->gen >= 8));

106
   dev->info = info;
107
   dev->use_separate_stencil = ISL_DEV_GEN(dev) >= 6;
108
   dev->has_bit6_swizzling = has_bit6_swizzling;
109
110
111
112
113

   /* The ISL_DEV macros may be defined in the CFLAGS, thus hardcoding some
    * device properties at buildtime. Verify that the macros with the device
    * properties chosen during runtime.
    */
114
115
   ISL_DEV_GEN_SANITIZE(dev);
   ISL_DEV_USE_SEPARATE_STENCIL_SANITIZE(dev);
116
117
118
119
120
121

   /* Did we break hiz or stencil? */
   if (ISL_DEV_USE_SEPARATE_STENCIL(dev))
      assert(info->has_hiz_and_separate_stencil);
   if (info->must_use_separate_stencil)
      assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
122

123
124
125
   dev->ss.size = RENDER_SURFACE_STATE_length(info) * 4;
   dev->ss.align = isl_align(dev->ss.size, 32);

126
127
   dev->ss.clear_color_state_size =
      isl_align(CLEAR_COLOR_length(info) * 4, 64);
128
129
130
   dev->ss.clear_color_state_offset =
      RENDER_SURFACE_STATE_ClearValueAddress_start(info) / 32 * 4;

131
132
133
134
135
136
137
138
139
   dev->ss.clear_value_size =
      isl_align(RENDER_SURFACE_STATE_RedClearColor_bits(info) +
                RENDER_SURFACE_STATE_GreenClearColor_bits(info) +
                RENDER_SURFACE_STATE_BlueClearColor_bits(info) +
                RENDER_SURFACE_STATE_AlphaClearColor_bits(info), 32) / 8;

   dev->ss.clear_value_offset =
      RENDER_SURFACE_STATE_RedClearColor_start(info) / 32 * 4;

140
141
142
143
144
145
146
147
148
149
   assert(RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) % 8 == 0);
   dev->ss.addr_offset =
      RENDER_SURFACE_STATE_SurfaceBaseAddress_start(info) / 8;

   /* The "Auxiliary Surface Base Address" field starts a bit higher up
    * because the bottom 12 bits are used for other things.  Round down to
    * the nearest dword before.
    */
   dev->ss.aux_addr_offset =
      (RENDER_SURFACE_STATE_AuxiliarySurfaceBaseAddress_start(info) & ~31) / 8;
150

151
   dev->ds.size = _3DSTATE_DEPTH_BUFFER_length(info) * 4;
152
153
154
155
   assert(_3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
   dev->ds.depth_offset =
      _3DSTATE_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;

156
157
158
159
160
   if (dev->use_separate_stencil) {
      dev->ds.size += _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
                      _3DSTATE_HIER_DEPTH_BUFFER_length(info) * 4 +
                      _3DSTATE_CLEAR_PARAMS_length(info) * 4;

161
162
163
164
165
166
167
168
169
170
171
172
173
174
      assert(_3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
      dev->ds.stencil_offset =
         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
         _3DSTATE_STENCIL_BUFFER_SurfaceBaseAddress_start(info) / 8;

      assert(_3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) % 8 == 0);
      dev->ds.hiz_offset =
         _3DSTATE_DEPTH_BUFFER_length(info) * 4 +
         _3DSTATE_STENCIL_BUFFER_length(info) * 4 +
         _3DSTATE_HIER_DEPTH_BUFFER_SurfaceBaseAddress_start(info) / 8;
   } else {
      dev->ds.stencil_offset = 0;
      dev->ds.hiz_offset = 0;
   }
175
176
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
/**
 * @brief Query the set of multisamples supported by the device.
 *
 * This function always returns non-zero, as ISL_SAMPLE_COUNT_1_BIT is always
 * supported.
 */
isl_sample_count_mask_t ATTRIBUTE_CONST
isl_device_get_sample_counts(struct isl_device *dev)
{
   if (ISL_DEV_GEN(dev) >= 9) {
      return ISL_SAMPLE_COUNT_1_BIT |
             ISL_SAMPLE_COUNT_2_BIT |
             ISL_SAMPLE_COUNT_4_BIT |
             ISL_SAMPLE_COUNT_8_BIT |
             ISL_SAMPLE_COUNT_16_BIT;
   } else if (ISL_DEV_GEN(dev) >= 8) {
      return ISL_SAMPLE_COUNT_1_BIT |
             ISL_SAMPLE_COUNT_2_BIT |
             ISL_SAMPLE_COUNT_4_BIT |
             ISL_SAMPLE_COUNT_8_BIT;
   } else if (ISL_DEV_GEN(dev) >= 7) {
      return ISL_SAMPLE_COUNT_1_BIT |
             ISL_SAMPLE_COUNT_4_BIT |
             ISL_SAMPLE_COUNT_8_BIT;
   } else if (ISL_DEV_GEN(dev) >= 6) {
      return ISL_SAMPLE_COUNT_1_BIT |
             ISL_SAMPLE_COUNT_4_BIT;
   } else {
      return ISL_SAMPLE_COUNT_1_BIT;
   }
}

209
/**
210
 * @param[out] info is written only on success
211
 */
212
213
static void
isl_tiling_get_info(enum isl_tiling tiling,
214
                    uint32_t format_bpb,
215
216
                    struct isl_tile_info *tile_info)
{
217
   const uint32_t bs = format_bpb / 8;
218
   struct isl_extent2d logical_el, phys_B;
219

220
221
222
223
224
225
226
227
   if (tiling != ISL_TILING_LINEAR && !isl_is_pow2(format_bpb)) {
      /* It is possible to have non-power-of-two formats in a tiled buffer.
       * The easiest way to handle this is to treat the tile as if it is three
       * times as wide.  This way no pixel will ever cross a tile boundary.
       * This really only works on legacy X and Y tiling formats.
       */
      assert(tiling == ISL_TILING_X || tiling == ISL_TILING_Y0);
      assert(bs % 3 == 0 && isl_is_pow2(format_bpb / 3));
228
229
      isl_tiling_get_info(tiling, format_bpb / 3, tile_info);
      return;
230
   }
231
232
233

   switch (tiling) {
   case ISL_TILING_LINEAR:
234
      assert(bs > 0);
235
236
      logical_el = isl_extent2d(1, 1);
      phys_B = isl_extent2d(bs, 1);
237
238
239
      break;

   case ISL_TILING_X:
240
      assert(bs > 0);
241
242
      logical_el = isl_extent2d(512 / bs, 8);
      phys_B = isl_extent2d(512, 8);
243
244
245
      break;

   case ISL_TILING_Y0:
246
      assert(bs > 0);
247
248
      logical_el = isl_extent2d(128 / bs, 32);
      phys_B = isl_extent2d(128, 32);
249
250
251
      break;

   case ISL_TILING_W:
252
253
      assert(bs == 1);
      logical_el = isl_extent2d(64, 64);
254
255
256
257
258
259
260
261
262
263
264
265
      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfacePitch:
       *
       *    "If the surface is a stencil buffer (and thus has Tile Mode set
       *    to TILEMODE_WMAJOR), the pitch must be set to 2x the value
       *    computed based on width, as the stencil buffer is stored with two
       *    rows interleaved."
       *
       * This, together with the fact that stencil buffers are referred to as
       * being Y-tiled in the PRMs for older hardware implies that the
       * physical size of a W-tile is actually the same as for a Y-tile.
       */
      phys_B = isl_extent2d(128, 32);
266
267
268
269
270
271
      break;

   case ISL_TILING_Yf:
   case ISL_TILING_Ys: {
      bool is_Ys = tiling == ISL_TILING_Ys;

272
      assert(bs > 0);
273
274
275
276
277
      unsigned width = 1 << (6 + (ffs(bs) / 2) + (2 * is_Ys));
      unsigned height = 1 << (6 - (ffs(bs) / 2) + (2 * is_Ys));

      logical_el = isl_extent2d(width / bs, height);
      phys_B = isl_extent2d(width, height);
278
279
      break;
   }
280

281
282
283
284
285
286
287
288
289
290
   case ISL_TILING_HIZ:
      /* HiZ buffers are required to have ISL_FORMAT_HIZ which is an 8x4
       * 128bpb format.  The tiling has the same physical dimensions as
       * Y-tiling but actually has two HiZ columns per Y-tiled column.
       */
      assert(bs == 16);
      logical_el = isl_extent2d(16, 16);
      phys_B = isl_extent2d(128, 32);
      break;

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
   case ISL_TILING_CCS:
      /* CCS surfaces are required to have one of the GENX_CCS_* formats which
       * have a block size of 1 or 2 bits per block and each CCS element
       * corresponds to one cache-line pair in the main surface.  From the Sky
       * Lake PRM Vol. 12 in the section on planes:
       *
       *    "The Color Control Surface (CCS) contains the compression status
       *    of the cache-line pairs. The compression state of the cache-line
       *    pair is specified by 2 bits in the CCS.  Each CCS cache-line
       *    represents an area on the main surface of 16x16 sets of 128 byte
       *    Y-tiled cache-line-pairs. CCS is always Y tiled."
       *
       * The CCS being Y-tiled implies that it's an 8x8 grid of cache-lines.
       * Since each cache line corresponds to a 16x16 set of cache-line pairs,
       * that yields total tile area of 128x128 cache-line pairs or CCS
       * elements.  On older hardware, each CCS element is 1 bit and the tile
       * is 128x256 elements.
       */
      assert(format_bpb == 1 || format_bpb == 2);
      logical_el = isl_extent2d(128, 256 / format_bpb);
      phys_B = isl_extent2d(128, 32);
      break;

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
   case ISL_TILING_GEN12_CCS:
      /* From the Bspec, Gen Graphics > Gen12 > Memory Data Formats > Memory
       * Compression > Memory Compression - Gen12:
       *
       *    4 bits of auxiliary plane data are required for 2 cachelines of
       *    main surface data. This results in a single cacheline of auxiliary
       *    plane data mapping to 4 4K pages of main surface data for the 4K
       *    pages (tile Y ) and 1 64K Tile Ys page.
       *
       * The Y-tiled pairing bit of 9 shown in the table below that Bspec
       * section expresses that the 2 cachelines of main surface data are
       * horizontally adjacent.
       *
       * TODO: Handle Ys, Yf and their pairing bits.
       *
       * Therefore, each CCS cacheline represents a 512Bx32 row area and each
       * element represents a 32Bx4 row area.
       */
      assert(format_bpb == 4);
      logical_el = isl_extent2d(16, 8);
      phys_B = isl_extent2d(64, 1);
      break;

337
338
   default:
      unreachable("not reached");
339
340
341
342
   } /* end switch */

   *tile_info = (struct isl_tile_info) {
      .tiling = tiling,
343
      .format_bpb = format_bpb,
344
345
      .logical_extent_el = logical_el,
      .phys_extent_B = phys_B,
346
347
348
   };
}

349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
bool
isl_color_value_is_zero(union isl_color_value value,
                        enum isl_format format)
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(format);

#define RETURN_FALSE_IF_NOT_0(c, i) \
   if (fmtl->channels.c.bits && value.u32[i] != 0) \
      return false

   RETURN_FALSE_IF_NOT_0(r, 0);
   RETURN_FALSE_IF_NOT_0(g, 1);
   RETURN_FALSE_IF_NOT_0(b, 2);
   RETURN_FALSE_IF_NOT_0(a, 3);

#undef RETURN_FALSE_IF_NOT_0

   return true;
}

369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
bool
isl_color_value_is_zero_one(union isl_color_value value,
                            enum isl_format format)
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(format);

#define RETURN_FALSE_IF_NOT_0_1(c, i, field) \
   if (fmtl->channels.c.bits && value.field[i] != 0 && value.field[i] != 1) \
      return false

   if (isl_format_has_int_channel(format)) {
      RETURN_FALSE_IF_NOT_0_1(r, 0, u32);
      RETURN_FALSE_IF_NOT_0_1(g, 1, u32);
      RETURN_FALSE_IF_NOT_0_1(b, 2, u32);
      RETURN_FALSE_IF_NOT_0_1(a, 3, u32);
   } else {
      RETURN_FALSE_IF_NOT_0_1(r, 0, f32);
      RETURN_FALSE_IF_NOT_0_1(g, 1, f32);
      RETURN_FALSE_IF_NOT_0_1(b, 2, f32);
      RETURN_FALSE_IF_NOT_0_1(a, 3, f32);
   }

#undef RETURN_FALSE_IF_NOT_0_1

   return true;
}

396
397
398
/**
 * @param[out] tiling is set only on success
 */
399
static bool
400
401
402
403
404
405
isl_surf_choose_tiling(const struct isl_device *dev,
                       const struct isl_surf_init_info *restrict info,
                       enum isl_tiling *tiling)
{
   isl_tiling_flags_t tiling_flags = info->tiling_flags;

406
407
408
409
   /* HiZ surfaces always use the HiZ tiling */
   if (info->usage & ISL_SURF_USAGE_HIZ_BIT) {
      assert(info->format == ISL_FORMAT_HIZ);
      assert(tiling_flags == ISL_TILING_HIZ_BIT);
410
      *tiling = isl_tiling_flag_to_enum(tiling_flags);
411
412
413
414
415
416
      return true;
   }

   /* CCS surfaces always use the CCS tiling */
   if (info->usage & ISL_SURF_USAGE_CCS_BIT) {
      assert(isl_format_get_layout(info->format)->txc == ISL_TXC_CCS);
417
418
419
420
421
      UNUSED bool ivb_ccs = ISL_DEV_GEN(dev) < 12 &&
                            tiling_flags == ISL_TILING_CCS_BIT;
      UNUSED bool tgl_ccs = ISL_DEV_GEN(dev) >= 12 &&
                            tiling_flags == ISL_TILING_GEN12_CCS_BIT;
      assert(ivb_ccs != tgl_ccs);
422
      *tiling = isl_tiling_flag_to_enum(tiling_flags);
423
424
425
      return true;
   }

426
   if (ISL_DEV_GEN(dev) >= 6) {
427
      isl_gen6_filter_tiling(dev, info, &tiling_flags);
428
   } else {
429
      isl_gen4_filter_tiling(dev, info, &tiling_flags);
430
431
432
433
434
435
436
437
438
439
440
441
442
   }

   #define CHOOSE(__tiling) \
      do { \
         if (tiling_flags & (1u << (__tiling))) { \
            *tiling = (__tiling); \
            return true; \
          } \
      } while (0)

   /* Of the tiling modes remaining, choose the one that offers the best
    * performance.
    */
443
444
445
446
447
448
449
450
451
452

   if (info->dim == ISL_SURF_DIM_1D) {
      /* Prefer linear for 1D surfaces because they do not benefit from
       * tiling. To the contrary, tiling leads to wasted memory and poor
       * memory locality due to the swizzling and alignment restrictions
       * required in tiled surfaces.
       */
      CHOOSE(ISL_TILING_LINEAR);
   }

453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
   CHOOSE(ISL_TILING_Ys);
   CHOOSE(ISL_TILING_Yf);
   CHOOSE(ISL_TILING_Y0);
   CHOOSE(ISL_TILING_X);
   CHOOSE(ISL_TILING_W);
   CHOOSE(ISL_TILING_LINEAR);

   #undef CHOOSE

   /* No tiling mode accomodates the inputs. */
   return false;
}

static bool
isl_choose_msaa_layout(const struct isl_device *dev,
                 const struct isl_surf_init_info *info,
                 enum isl_tiling tiling,
                 enum isl_msaa_layout *msaa_layout)
{
   if (ISL_DEV_GEN(dev) >= 8) {
473
      return isl_gen8_choose_msaa_layout(dev, info, tiling, msaa_layout);
474
   } else if (ISL_DEV_GEN(dev) >= 7) {
475
      return isl_gen7_choose_msaa_layout(dev, info, tiling, msaa_layout);
476
   } else if (ISL_DEV_GEN(dev) >= 6) {
477
      return isl_gen6_choose_msaa_layout(dev, info, tiling, msaa_layout);
478
   } else {
479
      return isl_gen4_choose_msaa_layout(dev, info, tiling, msaa_layout);
480
481
482
   }
}

483
484
struct isl_extent2d
isl_get_interleaved_msaa_px_size_sa(uint32_t samples)
485
486
487
488
489
490
491
492
493
494
495
{
   assert(isl_is_pow2(samples));

   /* From the Broadwell PRM >> Volume 5: Memory Views >> Computing Mip Level
    * Sizes (p133):
    *
    *    If the surface is multisampled and it is a depth or stencil surface
    *    or Multisampled Surface StorageFormat in SURFACE_STATE is
    *    MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
    *    proceeding: [...]
    */
496
497
498
499
500
501
502
503
504
505
506
507
508
   return (struct isl_extent2d) {
      .width = 1 << ((ffs(samples) - 0) / 2),
      .height = 1 << ((ffs(samples) - 1) / 2),
   };
}

static void
isl_msaa_interleaved_scale_px_to_sa(uint32_t samples,
                                    uint32_t *width, uint32_t *height)
{
   const struct isl_extent2d px_size_sa =
      isl_get_interleaved_msaa_px_size_sa(samples);

509
   if (width)
510
      *width = isl_align(*width, 2) * px_size_sa.width;
511
   if (height)
512
      *height = isl_align(*height, 2) * px_size_sa.height;
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
}

static enum isl_array_pitch_span
isl_choose_array_pitch_span(const struct isl_device *dev,
                            const struct isl_surf_init_info *restrict info,
                            enum isl_dim_layout dim_layout,
                            const struct isl_extent4d *phys_level0_sa)
{
   switch (dim_layout) {
   case ISL_DIM_LAYOUT_GEN9_1D:
   case ISL_DIM_LAYOUT_GEN4_2D:
      if (ISL_DEV_GEN(dev) >= 8) {
         /* QPitch becomes programmable in Broadwell. So choose the
          * most compact QPitch possible in order to conserve memory.
          *
          * From the Broadwell PRM >> Volume 2d: Command Reference: Structures
          * >> RENDER_SURFACE_STATE Surface QPitch (p325):
          *
          *    - Software must ensure that this field is set to a value
          *      sufficiently large such that the array slices in the surface
          *      do not overlap. Refer to the Memory Data Formats section for
          *      information on how surfaces are stored in memory.
          *
          *    - This field specifies the distance in rows between array
          *      slices.  It is used only in the following cases:
          *
          *          - Surface Array is enabled OR
          *          - Number of Mulitsamples is not NUMSAMPLES_1 and
          *            Multisampled Surface Storage Format set to MSFMT_MSS OR
          *          - Surface Type is SURFTYPE_CUBE
          */
         return ISL_ARRAY_PITCH_SPAN_COMPACT;
      } else if (ISL_DEV_GEN(dev) >= 7) {
         /* Note that Ivybridge introduces
          * RENDER_SURFACE_STATE.SurfaceArraySpacing, which provides the
          * driver more control over the QPitch.
          */

         if (phys_level0_sa->array_len == 1) {
            /* The hardware will never use the QPitch. So choose the most
             * compact QPitch possible in order to conserve memory.
             */
            return ISL_ARRAY_PITCH_SPAN_COMPACT;
         }

558
559
         if (isl_surf_usage_is_depth_or_stencil(info->usage) ||
             (info->usage & ISL_SURF_USAGE_HIZ_BIT)) {
560
561
562
563
564
565
566
            /* From the Ivybridge PRM >> Volume 1 Part 1: Graphics Core >>
             * Section 6.18.4.7: Surface Arrays (p112):
             *
             *    If Surface Array Spacing is set to ARYSPC_FULL (note that
             *    the depth buffer and stencil buffer have an implied value of
             *    ARYSPC_FULL):
             */
567
            return ISL_ARRAY_PITCH_SPAN_FULL;
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
         }

         if (info->levels == 1) {
            /* We are able to set RENDER_SURFACE_STATE.SurfaceArraySpacing
             * to ARYSPC_LOD0.
             */
            return ISL_ARRAY_PITCH_SPAN_COMPACT;
         }

         return ISL_ARRAY_PITCH_SPAN_FULL;
      } else if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
                 ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
                 isl_surf_usage_is_stencil(info->usage)) {
         /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
          *
          *    The separate stencil buffer does not support mip mapping, thus
          *    the storage for LODs other than LOD 0 is not needed.
          */
         assert(info->levels == 1);
         return ISL_ARRAY_PITCH_SPAN_COMPACT;
      } else {
         if ((ISL_DEV_GEN(dev) == 5 || ISL_DEV_GEN(dev) == 6) &&
             ISL_DEV_USE_SEPARATE_STENCIL(dev) &&
             isl_surf_usage_is_stencil(info->usage)) {
            /* [ILK-SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
             * Graphics Core >> Section 7.18.3.7: Surface Arrays:
             *
             *    The separate stencil buffer does not support mip mapping,
             *    thus the storage for LODs other than LOD 0 is not needed.
             */
            assert(info->levels == 1);
            assert(phys_level0_sa->array_len == 1);
            return ISL_ARRAY_PITCH_SPAN_COMPACT;
         }

         if (phys_level0_sa->array_len == 1) {
            /* The hardware will never use the QPitch. So choose the most
             * compact QPitch possible in order to conserve memory.
             */
            return ISL_ARRAY_PITCH_SPAN_COMPACT;
         }

         return ISL_ARRAY_PITCH_SPAN_FULL;
      }

   case ISL_DIM_LAYOUT_GEN4_3D:
      /* The hardware will never use the QPitch. So choose the most
       * compact QPitch possible in order to conserve memory.
       */
      return ISL_ARRAY_PITCH_SPAN_COMPACT;
619
620
621
622
623
624

   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
      /* Each array image in the gen6 stencil of HiZ surface is compact in the
       * sense that every LOD is a compact array of the same size as LOD0.
       */
      return ISL_ARRAY_PITCH_SPAN_COMPACT;
625
626
627
628
629
630
631
   }

   unreachable("bad isl_dim_layout");
   return ISL_ARRAY_PITCH_SPAN_FULL;
}

static void
632
633
634
isl_choose_image_alignment_el(const struct isl_device *dev,
                              const struct isl_surf_init_info *restrict info,
                              enum isl_tiling tiling,
635
                              enum isl_dim_layout dim_layout,
636
637
                              enum isl_msaa_layout msaa_layout,
                              struct isl_extent3d *image_align_el)
638
{
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
   if (fmtl->txc == ISL_TXC_MCS) {
      assert(tiling == ISL_TILING_Y0);

      /*
       * IvyBrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
       *
       * Height, width, and layout of MCS buffer in this case must match with
       * Render Target height, width, and layout. MCS buffer is tiledY.
       *
       * To avoid wasting memory, choose the smallest alignment possible:
       * HALIGN_4 and VALIGN_4.
       */
      *image_align_el = isl_extent3d(4, 4, 1);
      return;
   } else if (info->format == ISL_FORMAT_HIZ) {
655
      assert(ISL_DEV_GEN(dev) >= 6);
656
657
658
      if (ISL_DEV_GEN(dev) == 6) {
         /* HiZ surfaces on Sandy Bridge are packed tightly. */
         *image_align_el = isl_extent3d(1, 1, 1);
659
      } else if (ISL_DEV_GEN(dev) < 12) {
660
661
662
663
         /* On gen7+, HiZ surfaces are always aligned to 16x8 pixels in the
          * primary surface which works out to 2x2 HiZ elments.
          */
         *image_align_el = isl_extent3d(2, 2, 1);
664
665
666
667
668
669
      } else {
         /* On gen12+, HiZ surfaces are always aligned to 16x16 pixels in the
          * primary surface which works out to 2x4 HiZ elments.
          * TODO: Verify
          */
         *image_align_el = isl_extent3d(2, 4, 1);
670
      }
671
672
673
      return;
   }

674
675
676
677
   if (ISL_DEV_GEN(dev) >= 12) {
      isl_gen12_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                          msaa_layout, image_align_el);
   } else if (ISL_DEV_GEN(dev) >= 9) {
678
679
      isl_gen9_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                         msaa_layout, image_align_el);
680
   } else if (ISL_DEV_GEN(dev) >= 8) {
681
682
      isl_gen8_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                         msaa_layout, image_align_el);
683
   } else if (ISL_DEV_GEN(dev) >= 7) {
684
685
      isl_gen7_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                          msaa_layout, image_align_el);
686
   } else if (ISL_DEV_GEN(dev) >= 6) {
687
688
      isl_gen6_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                         msaa_layout, image_align_el);
689
   } else {
690
691
      isl_gen4_choose_image_alignment_el(dev, info, tiling, dim_layout,
                                         msaa_layout, image_align_el);
692
693
694
695
696
   }
}

static enum isl_dim_layout
isl_surf_choose_dim_layout(const struct isl_device *dev,
697
                           enum isl_surf_dim logical_dim,
698
699
                           enum isl_tiling tiling,
                           isl_surf_usage_flags_t usage)
700
{
701
702
703
704
705
   /* Sandy bridge needs a special layout for HiZ and stencil. */
   if (ISL_DEV_GEN(dev) == 6 &&
       (tiling == ISL_TILING_W || tiling == ISL_TILING_HIZ))
      return ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ;

706
707
708
   if (ISL_DEV_GEN(dev) >= 9) {
      switch (logical_dim) {
      case ISL_SURF_DIM_1D:
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
         /* From the Sky Lake PRM Vol. 5, "1D Surfaces":
          *
          *    One-dimensional surfaces use a tiling mode of linear.
          *    Technically, they are not tiled resources, but the Tiled
          *    Resource Mode field in RENDER_SURFACE_STATE is still used to
          *    indicate the alignment requirements for this linear surface
          *    (See 1D Alignment requirements for how 4K and 64KB Tiled
          *    Resource Modes impact alignment). Alternatively, a 1D surface
          *    can be defined as a 2D tiled surface (e.g. TileY or TileX) with
          *    a height of 0.
          *
          * In other words, ISL_DIM_LAYOUT_GEN9_1D is only used for linear
          * surfaces and, for tiled surfaces, ISL_DIM_LAYOUT_GEN4_2D is used.
          */
         if (tiling == ISL_TILING_LINEAR)
            return ISL_DIM_LAYOUT_GEN9_1D;
         else
            return ISL_DIM_LAYOUT_GEN4_2D;
727
728
729
730
731
732
733
734
      case ISL_SURF_DIM_2D:
      case ISL_SURF_DIM_3D:
         return ISL_DIM_LAYOUT_GEN4_2D;
      }
   } else {
      switch (logical_dim) {
      case ISL_SURF_DIM_1D:
      case ISL_SURF_DIM_2D:
735
736
737
738
739
740
741
742
743
744
         /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
          *
          * The cube face textures are stored in the same way as 3D surfaces
          * are stored (see section 6.17.5 for details).  For cube surfaces,
          * however, the depth is equal to the number of faces (always 6) and 
          * is not reduced for each MIP.
          */
         if (ISL_DEV_GEN(dev) == 4 && (usage & ISL_SURF_USAGE_CUBE_BIT))
            return ISL_DIM_LAYOUT_GEN4_3D;

745
746
747
748
749
750
751
752
753
754
755
756
         return ISL_DIM_LAYOUT_GEN4_2D;
      case ISL_SURF_DIM_3D:
         return ISL_DIM_LAYOUT_GEN4_3D;
      }
   }

   unreachable("bad isl_surf_dim");
   return ISL_DIM_LAYOUT_GEN4_2D;
}

/**
 * Calculate the physical extent of the surface's first level, in units of
757
 * surface samples.
758
759
760
761
762
763
764
765
766
 */
static void
isl_calc_phys_level0_extent_sa(const struct isl_device *dev,
                               const struct isl_surf_init_info *restrict info,
                               enum isl_dim_layout dim_layout,
                               enum isl_tiling tiling,
                               enum isl_msaa_layout msaa_layout,
                               struct isl_extent4d *phys_level0_sa)
{
767
768
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);

769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
   if (isl_format_is_yuv(info->format))
      isl_finishme("%s:%s: YUV format", __FILE__, __func__);

   switch (info->dim) {
   case ISL_SURF_DIM_1D:
      assert(info->height == 1);
      assert(info->depth == 1);
      assert(info->samples == 1);

      switch (dim_layout) {
      case ISL_DIM_LAYOUT_GEN4_3D:
         unreachable("bad isl_dim_layout");

      case ISL_DIM_LAYOUT_GEN9_1D:
      case ISL_DIM_LAYOUT_GEN4_2D:
784
      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
785
         *phys_level0_sa = (struct isl_extent4d) {
786
787
            .w = info->width,
            .h = 1,
788
789
790
791
792
793
794
795
            .d = 1,
            .a = info->array_len,
         };
         break;
      }
      break;

   case ISL_SURF_DIM_2D:
796
797
798
799
800
      if (ISL_DEV_GEN(dev) == 4 && (info->usage & ISL_SURF_USAGE_CUBE_BIT))
         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_3D);
      else
         assert(dim_layout == ISL_DIM_LAYOUT_GEN4_2D ||
                dim_layout == ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ);
801
802
803
804
805
806
807
808
809
810

      if (tiling == ISL_TILING_Ys && info->samples > 1)
         isl_finishme("%s:%s: multisample TileYs layout", __FILE__, __func__);

      switch (msaa_layout) {
      case ISL_MSAA_LAYOUT_NONE:
         assert(info->depth == 1);
         assert(info->samples == 1);

         *phys_level0_sa = (struct isl_extent4d) {
811
812
            .w = info->width,
            .h = info->height,
813
814
815
816
817
818
819
            .d = 1,
            .a = info->array_len,
         };
         break;

      case ISL_MSAA_LAYOUT_ARRAY:
         assert(info->depth == 1);
820
         assert(info->levels == 1);
821
822
         assert(isl_format_supports_multisampling(dev->info, info->format));
         assert(fmtl->bw == 1 && fmtl->bh == 1);
823
824
825
826
827

         *phys_level0_sa = (struct isl_extent4d) {
            .w = info->width,
            .h = info->height,
            .d = 1,
828
            .a = info->array_len * info->samples,
829
830
831
832
833
         };
         break;

      case ISL_MSAA_LAYOUT_INTERLEAVED:
         assert(info->depth == 1);
834
         assert(info->levels == 1);
835
         assert(isl_format_supports_multisampling(dev->info, info->format));
836
837
838
839
840

         *phys_level0_sa = (struct isl_extent4d) {
            .w = info->width,
            .h = info->height,
            .d = 1,
841
            .a = info->array_len,
842
843
844
845
846
847
848
849
850
851
852
853
854
         };

         isl_msaa_interleaved_scale_px_to_sa(info->samples,
                                             &phys_level0_sa->w,
                                             &phys_level0_sa->h);
         break;
      }
      break;

   case ISL_SURF_DIM_3D:
      assert(info->array_len == 1);
      assert(info->samples == 1);

855
856
857
858
859
      if (fmtl->bd > 1) {
         isl_finishme("%s:%s: compression block with depth > 1",
                      __FILE__, __func__);
      }

860
861
      switch (dim_layout) {
      case ISL_DIM_LAYOUT_GEN9_1D:
862
      case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
863
864
865
866
867
868
         unreachable("bad isl_dim_layout");

      case ISL_DIM_LAYOUT_GEN4_2D:
         assert(ISL_DEV_GEN(dev) >= 9);

         *phys_level0_sa = (struct isl_extent4d) {
869
870
            .w = info->width,
            .h = info->height,
871
872
873
874
875
876
877
878
            .d = 1,
            .a = info->depth,
         };
         break;

      case ISL_DIM_LAYOUT_GEN4_3D:
         assert(ISL_DEV_GEN(dev) < 9);
         *phys_level0_sa = (struct isl_extent4d) {
879
880
            .w = info->width,
            .h = info->height,
881
882
883
884
885
886
887
888
889
            .d = info->depth,
            .a = 1,
         };
         break;
      }
      break;
   }
}

890
891
892
893
894
/**
 * Calculate the pitch between physical array slices, in units of rows of
 * surface elements.
 */
static uint32_t
895
896
897
898
899
900
901
902
isl_calc_array_pitch_el_rows_gen4_2d(
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
      const struct isl_tile_info *tile_info,
      const struct isl_extent3d *image_align_sa,
      const struct isl_extent4d *phys_level0_sa,
      enum isl_array_pitch_span array_pitch_span,
      const struct isl_extent2d *phys_slice0_sa)
903
904
905
906
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
   uint32_t pitch_sa_rows = 0;

907
908
909
910
911
912
913
914
915
916
917
   switch (array_pitch_span) {
   case ISL_ARRAY_PITCH_SPAN_COMPACT:
      pitch_sa_rows = isl_align_npot(phys_slice0_sa->h, image_align_sa->h);
      break;
   case ISL_ARRAY_PITCH_SPAN_FULL: {
      /* The QPitch equation is found in the Broadwell PRM >> Volume 5:
       * Memory Views >> Common Surface Formats >> Surface Layout >> 2D
       * Surfaces >> Surface Arrays.
       */
      uint32_t H0_sa = phys_level0_sa->h;
      uint32_t H1_sa = isl_minify(H0_sa, 1);
918

919
920
      uint32_t h0_sa = isl_align_npot(H0_sa, image_align_sa->h);
      uint32_t h1_sa = isl_align_npot(H1_sa, image_align_sa->h);
921

922
923
924
925
926
927
      uint32_t m;
      if (ISL_DEV_GEN(dev) >= 7) {
         /* The QPitch equation changed slightly in Ivybridge. */
         m = 12;
      } else {
         m = 11;
928
      }
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948

      pitch_sa_rows = h0_sa + h1_sa + (m * image_align_sa->h);

      if (ISL_DEV_GEN(dev) == 6 && info->samples > 1 &&
          (info->height % 4 == 1)) {
         /* [SNB] Errata from the Sandy Bridge PRM >> Volume 4 Part 1:
          * Graphics Core >> Section 7.18.3.7: Surface Arrays:
          *
          *    [SNB] Errata: Sampler MSAA Qpitch will be 4 greater than
          *    the value calculated in the equation above , for every
          *    other odd Surface Height starting from 1 i.e. 1,5,9,13.
          *
          * XXX(chadv): Is the errata natural corollary of the physical
          * layout of interleaved samples?
          */
         pitch_sa_rows += 4;
      }

      pitch_sa_rows = isl_align_npot(pitch_sa_rows, fmtl->bh);
      } /* end case */
949
950
951
952
953
954
      break;
   }

   assert(pitch_sa_rows % fmtl->bh == 0);
   uint32_t pitch_el_rows = pitch_sa_rows / fmtl->bh;

955
956
   if (ISL_DEV_GEN(dev) >= 9 && ISL_DEV_GEN(dev) <= 11 &&
       fmtl->txc == ISL_TXC_CCS) {
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
      /*
       * From the Sky Lake PRM Vol 7, "MCS Buffer for Render Target(s)" (p. 632):
       *
       *    "Mip-mapped and arrayed surfaces are supported with MCS buffer
       *    layout with these alignments in the RT space: Horizontal
       *    Alignment = 128 and Vertical Alignment = 64."
       *
       * From the Sky Lake PRM Vol. 2d, "RENDER_SURFACE_STATE" (p. 435):
       *
       *    "For non-multisampled render target's CCS auxiliary surface,
       *    QPitch must be computed with Horizontal Alignment = 128 and
       *    Surface Vertical Alignment = 256. These alignments are only for
       *    CCS buffer and not for associated render target."
       *
       * The first restriction is already handled by isl_choose_image_alignment_el
       * but the second restriction, which is an extension of the first, only
       * applies to qpitch and must be applied here.
974
975
       *
       * The second restriction disappears on Gen12.
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
       */
      assert(fmtl->bh == 4);
      pitch_el_rows = isl_align(pitch_el_rows, 256 / 4);
   }

   if (ISL_DEV_GEN(dev) >= 9 &&
       info->dim == ISL_SURF_DIM_3D &&
       tile_info->tiling != ISL_TILING_LINEAR) {
      /* From the Skylake BSpec >> RENDER_SURFACE_STATE >> Surface QPitch:
       *
       *    Tile Mode != Linear: This field must be set to an integer multiple
       *    of the tile height
       */
      pitch_el_rows = isl_align(pitch_el_rows, tile_info->logical_extent_el.height);
   }

   return pitch_el_rows;
}

995
996
997
998
999
1000
1001
1002
1003
/**
 * A variant of isl_calc_phys_slice0_extent_sa() specific to
 * ISL_DIM_LAYOUT_GEN4_2D.
 */
static void
isl_calc_phys_slice0_extent_sa_gen4_2d(
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
      enum isl_msaa_layout msaa_layout,
1004
      const struct isl_extent3d *image_align_sa,
1005
1006
1007
1008
1009
      const struct isl_extent4d *phys_level0_sa,
      struct isl_extent2d *phys_slice0_sa)
{
   assert(phys_level0_sa->depth == 1);

1010
   if (info->levels == 1) {
1011
      /* Do not pad the surface to the image alignment.
1012
       *
1013
1014
       * For tiled surfaces, using a reduced alignment here avoids wasting CPU
       * cycles on the below mipmap layout caluclations. Reducing the
1015
1016
1017
1018
1019
       * alignment here is safe because we later align the row pitch and array
       * pitch to the tile boundary. It is safe even for
       * ISL_MSAA_LAYOUT_INTERLEAVED, because phys_level0_sa is already scaled
       * to accomodate the interleaved samples.
       *
1020
       * For linear surfaces, reducing the alignment here permits us to later
1021
1022
1023
1024
       * choose an arbitrary, non-aligned row pitch. If the surface backs
       * a VkBuffer, then an arbitrary pitch may be needed to accomodate
       * VkBufferImageCopy::bufferRowLength.
       */
1025
      *phys_slice0_sa = (struct isl_extent2d) {
1026
1027
         .w = phys_level0_sa->w,
         .h = phys_level0_sa->h,
1028
      };
1029
1030
1031
      return;
   }

1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
   uint32_t slice_top_w = 0;
   uint32_t slice_bottom_w = 0;
   uint32_t slice_left_h = 0;
   uint32_t slice_right_h = 0;

   uint32_t W0 = phys_level0_sa->w;
   uint32_t H0 = phys_level0_sa->h;

   for (uint32_t l = 0; l < info->levels; ++l) {
      uint32_t W = isl_minify(W0, l);
      uint32_t H = isl_minify(H0, l);

1044
1045
      uint32_t w = isl_align_npot(W, image_align_sa->w);
      uint32_t h = isl_align_npot(H, image_align_sa->h);
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055

      if (l == 0) {
         slice_top_w = w;
         slice_left_h = h;
         slice_right_h = h;
      } else if (l == 1) {
         slice_bottom_w = w;
         slice_left_h += h;
      } else if (l == 2) {
         slice_bottom_w += w;
1056
         slice_right_h += h;
1057
1058
1059
1060
1061
1062
1063
1064
      } else {
         slice_right_h += h;
      }
   }

   *phys_slice0_sa = (struct isl_extent2d) {
      .w = MAX(slice_top_w, slice_bottom_w),
      .h = MAX(slice_left_h, slice_right_h),
1065
   };
1066
}
1067

1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
static void
isl_calc_phys_total_extent_el_gen4_2d(
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
      const struct isl_tile_info *tile_info,
      enum isl_msaa_layout msaa_layout,
      const struct isl_extent3d *image_align_sa,
      const struct isl_extent4d *phys_level0_sa,
      enum isl_array_pitch_span array_pitch_span,
      uint32_t *array_pitch_el_rows,
      struct isl_extent2d *total_extent_el)
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);

   struct isl_extent2d phys_slice0_sa;
   isl_calc_phys_slice0_extent_sa_gen4_2d(dev, info, msaa_layout,
                                          image_align_sa, phys_level0_sa,
                                          &phys_slice0_sa);
   *array_pitch_el_rows =
      isl_calc_array_pitch_el_rows_gen4_2d(dev, info, tile_info,
                                           image_align_sa, phys_level0_sa,
                                           array_pitch_span,
                                           &phys_slice0_sa);
   *total_extent_el = (struct isl_extent2d) {
1092
      .w = isl_align_div_npot(phys_slice0_sa.w, fmtl->bw),
1093
      .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
1094
           isl_align_div_npot(phys_slice0_sa.h, fmtl->bh),
1095
1096
1097
   };
}

1098
1099
1100
1101
1102
/**
 * A variant of isl_calc_phys_slice0_extent_sa() specific to
 * ISL_DIM_LAYOUT_GEN4_3D.
 */
static void
1103
isl_calc_phys_total_extent_el_gen4_3d(
1104
1105
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
1106
      const struct isl_extent3d *image_align_sa,
1107
      const struct isl_extent4d *phys_level0_sa,
1108
1109
      uint32_t *array_pitch_el_rows,
      struct isl_extent2d *phys_total_el)
1110
{
1111
1112
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);

1113
   assert(info->samples == 1);
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128

   if (info->dim != ISL_SURF_DIM_3D) {
      /* From the G45 PRM Vol. 1a, "6.17.4.1 Hardware Cube Map Layout":
       *
       * The cube face textures are stored in the same way as 3D surfaces
       * are stored (see section 6.17.5 for details).  For cube surfaces,
       * however, the depth is equal to the number of faces (always 6) and
       * is not reduced for each MIP.
       */
      assert(ISL_DEV_GEN(dev) == 4);
      assert(info->usage & ISL_SURF_USAGE_CUBE_BIT);
      assert(phys_level0_sa->array_len == 6);
   } else {
      assert(phys_level0_sa->array_len == 1);
   }
1129

1130
1131
   uint32_t total_w = 0;
   uint32_t total_h = 0;
1132
1133
1134
1135

   uint32_t W0 = phys_level0_sa->w;
   uint32_t H0 = phys_level0_sa->h;
   uint32_t D0 = phys_level0_sa->d;
1136
   uint32_t A0 = phys_level0_sa->a;
1137
1138

   for (uint32_t l = 0; l < info->levels; ++l) {
1139
1140
      uint32_t level_w = isl_align_npot(isl_minify(W0, l), image_align_sa->w);
      uint32_t level_h = isl_align_npot(isl_minify(H0, l), image_align_sa->h);
1141
      uint32_t level_d = info->dim == ISL_SURF_DIM_3D ? isl_minify(D0, l) : A0;
1142
1143
1144
1145

      uint32_t max_layers_horiz = MIN(level_d, 1u << l);
      uint32_t max_layers_vert = isl_align(level_d, 1u << l) / (1u << l);

1146
1147
      total_w = MAX(total_w, level_w * max_layers_horiz);
      total_h += level_h * max_layers_vert;
1148
1149
   }

1150
1151
1152
1153
1154
1155
1156
1157
1158
   /* GEN4_3D layouts don't really have an array pitch since each LOD has a
    * different number of horizontal and vertical layers.  We have to set it
    * to something, so at least make it true for LOD0.
    */
   *array_pitch_el_rows =
      isl_align_npot(phys_level0_sa->h, image_align_sa->h) / fmtl->bw;
   *phys_total_el = (struct isl_extent2d) {
      .w = isl_assert_div(total_w, fmtl->bw),
      .h = isl_assert_div(total_h, fmtl->bh),
1159
   };
1160
1161
}

1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
/**
 * A variant of isl_calc_phys_slice0_extent_sa() specific to
 * ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ.
 */
static void
isl_calc_phys_total_extent_el_gen6_stencil_hiz(
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
      const struct isl_tile_info *tile_info,
      const struct isl_extent3d *image_align_sa,
      const struct isl_extent4d *phys_level0_sa,
      uint32_t *array_pitch_el_rows,
      struct isl_extent2d *phys_total_el)
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);

   const struct isl_extent2d tile_extent_sa = {
      .w = tile_info->logical_extent_el.w * fmtl->bw,
      .h = tile_info->logical_extent_el.h * fmtl->bh,
   };
   /* Tile size is a multiple of image alignment */
   assert(tile_extent_sa.w % image_align_sa->w == 0);
   assert(tile_extent_sa.h % image_align_sa->h == 0);

   const uint32_t W0 = phys_level0_sa->w;
   const uint32_t H0 = phys_level0_sa->h;

   /* Each image has the same height as LOD0 because the hardware thinks
    * everything is LOD0
    */
   const uint32_t H = isl_align(H0, image_align_sa->h) * phys_level0_sa->a;

   uint32_t total_top_w = 0;
   uint32_t total_bottom_w = 0;
   uint32_t total_h = 0;

   for (uint32_t l = 0; l < info->levels; ++l) {
      const uint32_t W = isl_minify(W0, l);

      const uint32_t w = isl_align(W, tile_extent_sa.w);
      const uint32_t h = isl_align(H, tile_extent_sa.h);

      if (l == 0) {
         total_top_w = w;
         total_h = h;
      } else if (l == 1) {
         total_bottom_w = w;
         total_h += h;
      } else {
         total_bottom_w += w;
      }
   }

   *array_pitch_el_rows =
      isl_assert_div(isl_align(H0, image_align_sa->h), fmtl->bh);
   *phys_total_el = (struct isl_extent2d) {
      .w = isl_assert_div(MAX(total_top_w, total_bottom_w), fmtl->bw),
      .h = isl_assert_div(total_h, fmtl->bh),
   };
}

1223
1224
1225
1226
1227
/**
 * A variant of isl_calc_phys_slice0_extent_sa() specific to
 * ISL_DIM_LAYOUT_GEN9_1D.
 */
static void
1228
isl_calc_phys_total_extent_el_gen9_1d(
1229
1230
1231
1232
      const struct isl_device *dev,
      const struct isl_surf_init_info *restrict info,
      const struct isl_extent3d *image_align_sa,
      const struct isl_extent4d *phys_level0_sa,
1233
1234
      uint32_t *array_pitch_el_rows,
      struct isl_extent2d *phys_total_el)
1235
{
1236
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
1237

1238
   assert(phys_level0_sa->height == 1);
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
   assert(phys_level0_sa->depth == 1);
   assert(info->samples == 1);
   assert(image_align_sa->w >= fmtl->bw);

   uint32_t slice_w = 0;
   const uint32_t W0 = phys_level0_sa->w;

   for (uint32_t l = 0; l < info->levels; ++l) {
      uint32_t W = isl_minify(W0, l);
      uint32_t w = isl_align_npot(W, image_align_sa->w);

      slice_w += w;
   }

1253
1254
1255
1256
1257
   *array_pitch_el_rows = 1;
   *phys_total_el = (struct isl_extent2d) {
      .w = isl_assert_div(slice_w, fmtl->bw),
      .h = phys_level0_sa->array_len,
   };
1258
1259
}

1260
/**
1261
1262
 * Calculate the two-dimensional total physical extent of the surface, in
 * units of surface elements.
1263
1264
 */
static void
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
isl_calc_phys_total_extent_el(const struct isl_device *dev,
                              const struct isl_surf_init_info *restrict info,
                              const struct isl_tile_info *tile_info,
                              enum isl_dim_layout dim_layout,
                              enum isl_msaa_layout msaa_layout,
                              const struct isl_extent3d *image_align_sa,
                              const struct isl_extent4d *phys_level0_sa,
                              enum isl_array_pitch_span array_pitch_span,
                              uint32_t *array_pitch_el_rows,
                              struct isl_extent2d *total_extent_el)
1275
1276
1277
{
   switch (dim_layout) {
   case ISL_DIM_LAYOUT_GEN9_1D:
1278
1279
1280
1281
1282
      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
      isl_calc_phys_total_extent_el_gen9_1d(dev, info,
                                            image_align_sa, phys_level0_sa,
                                            array_pitch_el_rows,
                                            total_extent_el);
1283
      return;
1284
   case ISL_DIM_LAYOUT_GEN4_2D:
1285
1286
1287
1288
1289
      isl_calc_phys_total_extent_el_gen4_2d(dev, info, tile_info, msaa_layout,
                                            image_align_sa, phys_level0_sa,
                                            array_pitch_span,
                                            array_pitch_el_rows,
                                            total_extent_el);
1290
      return;
1291
1292
1293
1294
1295
1296
1297
1298
   case ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ:
      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
      isl_calc_phys_total_extent_el_gen6_stencil_hiz(dev, info, tile_info,
                                                     image_align_sa,
                                                     phys_level0_sa,
                                                     array_pitch_el_rows,
                                                     total_extent_el);
      return;
1299
   case ISL_DIM_LAYOUT_GEN4_3D:
1300
1301
1302
1303
1304
      assert(array_pitch_span == ISL_ARRAY_PITCH_SPAN_COMPACT);
      isl_calc_phys_total_extent_el_gen4_3d(dev, info,
                                            image_align_sa, phys_level0_sa,
                                            array_pitch_el_rows,
                                            total_extent_el);
1305
1306
      return;
   }
1307
1308

   unreachable("invalid value for dim_layout");
1309
1310
1311
}

static uint32_t
1312
1313
isl_calc_row_pitch_alignment(const struct isl_surf_init_info *surf_info,
                             const struct isl_tile_info *tile_info)
1314
{
1315
1316
   if (tile_info->tiling != ISL_TILING_LINEAR)
      return tile_info->phys_extent_B.width;
1317

1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
   /* From the Broadwel PRM >> Volume 2d: Command Reference: Structures >>
    * RENDER_SURFACE_STATE Surface Pitch (p349):
    *
    *    - For linear render target surfaces and surfaces accessed with the
    *      typed data port messages, the pitch must be a multiple of the
    *      element size for non-YUV surface formats.  Pitch must be
    *      a multiple of 2 * element size for YUV surface formats.
    *
    *    - [Requirements for SURFTYPE_BUFFER and SURFTYPE_STRBUF, which we
    *      ignore because isl doesn't do buffers.]
    *
    *    - For other linear surfaces, the pitch can be any multiple of
    *      bytes.
    */
1332
1333
1334
1335
1336
1337
   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);
   const uint32_t bs = fmtl->bpb / 8;

   if (surf_info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
      if (isl_format_is_yuv(surf_info->format)) {
         return 2 * bs;
1338
      } else  {
1339
         return bs;
1340
1341
      }
   }
1342

1343
1344
1345
1346
1347
1348
   return 1;
}

static uint32_t
isl_calc_linear_min_row_pitch(const struct isl_device *dev,
                              const struct isl_surf_init_info *info,
1349
                              const struct isl_extent2d *phys_total_el,
1350
                              uint32_t alignment_B)
1351
1352
1353
1354
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
   const uint32_t bs = fmtl->bpb / 8;

1355
   return isl_align_npot(bs * phys_total_el->w, alignment_B);
1356
1357
1358
1359
1360
1361
}

static uint32_t
isl_calc_tiled_min_row_pitch(const struct isl_device *dev,
                             const struct isl_surf_init_info *surf_info,
                             const struct isl_tile_info *tile_info,
1362
                             const struct isl_extent2d *phys_total_el,
1363
                             uint32_t alignment_B)
1364
1365
1366
1367
1368
1369
1370
{
   const struct isl_format_layout *fmtl = isl_format_get_layout(surf_info->format);

   assert(fmtl->bpb % tile_info->format_bpb == 0);

   const uint32_t tile_el_scale = fmtl->bpb / tile_info->format_bpb;
   const uint32_t total_w_tl =
1371
      isl_align_div(phys_total_el->w * tile_el_scale,
1372
1373
                    tile_info->logical_extent_el.width);

1374
   assert(alignment_B == tile_info->phys_extent_B.width);
1375
   return total_w_tl * tile_info->phys_extent_B.width;
1376
1377
1378
1379
1380
1381
}

static uint32_t
isl_calc_min_row_pitch(const struct isl_device *dev,
                       const struct isl_surf_init_info *surf_info,
                       const struct isl_tile_info *tile_info,
1382
                       const struct isl_extent2d *phys_total_el,
1383
                       uint32_t alignment_B)
1384
1385
{
   if (tile_info->tiling == ISL_TILING_LINEAR) {
1386
      return isl_calc_linear_min_row_pitch(dev, surf_info, phys_total_el,
1387
                                           alignment_B);
1388
1389
   } else {
      return isl_calc_tiled_min_row_pitch(dev, surf_info, tile_info,
1390
                                          phys_total_el, alignment_B);
1391
1392
1393
   }
}

1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
/**
 * Is `pitch` in the valid range for a hardware bitfield, if the bitfield's
 * size is `bits` bits?
 *
 * Hardware pitch fields are offset by 1. For example, if the size of
 * RENDER_SURFACE_STATE::SurfacePitch is B bits, then the range of valid
 * pitches is [1, 2^b] inclusive.  If the surface pitch is N, then
 * RENDER_SURFACE_STATE::SurfacePitch must be set to N-1.
 */
static bool
pitch_in_range(uint32_t n, uint32_t bits)
{
   assert(n != 0);
   return likely(bits != 0 && 1 <= n && n <= (1 << bits));
}

static bool
1411
1412
1413
1414
isl_calc_row_pitch(const struct isl_device *dev,
                   const struct isl_surf_init_info *surf_info,
                   const struct isl_tile_info *tile_info,
                   enum isl_dim_layout dim_layout,
1415
                   const struct isl_extent2d *phys_total_el,
1416
                   uint32_t *out_row_pitch_B)
1417
{
1418
   uint32_t alignment_B =
1419
1420
      isl_calc_row_pitch_alignment(surf_info, tile_info);

1421
   const uint32_t min_row_pitch_B =
1422
      isl_calc_min_row_pitch(dev, surf_info, tile_info, phys_total_el,
1423
                             alignment_B);
1424

1425
   if (surf_info->row_pitch_B != 0) {
1426
      if (surf_info->row_pitch_B < min_row_pitch_B)
1427
1428
         return false;

1429
      if (surf_info->row_pitch_B % alignment_B != 0)
1430
1431
1432
         return false;
   }

1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
   const uint32_t row_pitch_B =
      surf_info->row_pitch_B != 0 ?
         surf_info->row_pitch_B :
      /* According to BSpec: 44930, Gen12's CCS-compressed surface pitches
       * must be 512B-aligned.
       */
      ISL_DEV_GEN(dev) >= 12 &&
      isl_format_supports_ccs_e(dev->info, surf_info->format) ?
         isl_align(min_row_pitch_B, 512) :
      /* Else */
         min_row_pitch_B;

1445
   const uint32_t row_pitch_tl = row_pitch_B / tile_info->phys_extent_B.width;
1446

1447
   if (row_pitch_B == 0)
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
      return false;

   if (dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
      /* SurfacePitch is ignored for this layout. */
      goto done;
   }

   if ((surf_info->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT |
                            ISL_SURF_USAGE_TEXTURE_BIT |
                            ISL_SURF_USAGE_STORAGE_BIT)) &&
1458
       !pitch_in_range(row_pitch_B, RENDER_SURFACE_STATE_SurfacePitch_bits(dev->info)))
1459
1460
1461
1462
      return false;

   if ((surf_info->usage & (ISL_SURF_USAGE_CCS_BIT |
                            ISL_SURF_USAGE_MCS_BIT)) &&
1463
       !pitch_in_range(row_pitch_tl, RENDER_SURFACE_STATE_AuxiliarySurfacePitch_bits(dev->info)))
1464
1465
1466
      return false;

   if ((surf_info->usage & ISL_SURF_USAGE_DEPTH_BIT) &&
1467
       !pitch_in_range(row_pitch_B, _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1468
1469
1470
      return false;

   if ((surf_info->usage & ISL_SURF_USAGE_HIZ_BIT) &&
1471
       !pitch_in_range(row_pitch_B, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
1472
1473
      return false;

1474
1475
<