brw_wm_surface_state.c 48.3 KB
Newer Older
1 2
/*
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics to
4
 develop this 3D driver.
5

6 7 8 9 10 11 12
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
13

14 15 16
 The above copyright notice and this permission notice (including the
 next paragraph) shall be included in all copies or substantial
 portions of the Software.
17

18 19 20 21 22 23 24
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25

26 27 28
 **********************************************************************/
 /*
  * Authors:
29
  *   Keith Whitwell <keithw@vmware.com>
30
  */
31

32

33
#include "main/context.h"
34
#include "main/blend.h"
35
#include "main/mtypes.h"
36
#include "main/samplerobj.h"
37
#include "main/shaderimage.h"
38
#include "program/prog_parameter.h"
39
#include "program/prog_instruction.h"
40
#include "main/framebuffer.h"
41 42 43 44

#include "intel_mipmap_tree.h"
#include "intel_batchbuffer.h"
#include "intel_tex.h"
45
#include "intel_fbo.h"
46
#include "intel_buffer_objects.h"
47 48 49 50

#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
51
#include "brw_wm.h"
52

53 54
GLuint
translate_tex_target(GLenum target)
55 56
{
   switch (target) {
57
   case GL_TEXTURE_1D:
58
   case GL_TEXTURE_1D_ARRAY_EXT:
59 60
      return BRW_SURFACE_1D;

61
   case GL_TEXTURE_RECTANGLE_NV:
62 63
      return BRW_SURFACE_2D;

64
   case GL_TEXTURE_2D:
65
   case GL_TEXTURE_2D_ARRAY_EXT:
66
   case GL_TEXTURE_EXTERNAL_OES:
67 68
   case GL_TEXTURE_2D_MULTISAMPLE:
   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
69 70
      return BRW_SURFACE_2D;

71
   case GL_TEXTURE_3D:
72 73
      return BRW_SURFACE_3D;

74
   case GL_TEXTURE_CUBE_MAP:
75
   case GL_TEXTURE_CUBE_MAP_ARRAY:
76 77
      return BRW_SURFACE_CUBE;

78
   default:
79
      unreachable("not reached");
80 81 82
   }
}

83
uint32_t
84 85 86 87 88 89 90 91 92 93 94 95
brw_get_surface_tiling_bits(uint32_t tiling)
{
   switch (tiling) {
   case I915_TILING_X:
      return BRW_SURFACE_TILED;
   case I915_TILING_Y:
      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
   default:
      return 0;
   }
}

96 97 98 99

uint32_t
brw_get_surface_num_multisamples(unsigned num_samples)
{
100
   if (num_samples > 1)
101 102 103 104 105
      return BRW_SURFACE_MULTISAMPLECOUNT_4;
   else
      return BRW_SURFACE_MULTISAMPLECOUNT_1;
}

106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
void
brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
                      bool is_render_target,
                      unsigned *width, unsigned *height,
                      unsigned *pitch, uint32_t *tiling, unsigned *format)
{
   static const unsigned halign_stencil = 8;

   /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
    * there are half as many rows.
    * In addition, mip-levels are accessed manually by the program and
    * therefore the surface is setup to cover all the mip-levels for one slice.
    * (Hardware is still used to access individual slices).
    */
   *tiling = I915_TILING_Y;
   *pitch = mt->pitch * 2;
   *width = ALIGN(mt->total_width, halign_stencil) * 2;
   *height = (mt->total_height / mt->physical_depth0) / 2;

   if (is_render_target) {
      *format = BRW_SURFACEFORMAT_R8_UINT;
   }
}

130

131 132 133 134 135
/**
 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
 * swizzling.
 */
int
136 137
brw_get_texture_swizzle(const struct gl_context *ctx,
                        const struct gl_texture_object *t)
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
{
   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];

   int swizzles[SWIZZLE_NIL + 1] = {
      SWIZZLE_X,
      SWIZZLE_Y,
      SWIZZLE_Z,
      SWIZZLE_W,
      SWIZZLE_ZERO,
      SWIZZLE_ONE,
      SWIZZLE_NIL
   };

   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
       img->_BaseFormat == GL_DEPTH_STENCIL) {
153 154 155 156 157 158 159 160 161 162 163 164 165
      GLenum depth_mode = t->DepthMode;

      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
       * with depth component data specified with a sized internal format.
       * Otherwise, it's left at the old default, GL_LUMINANCE.
       */
      if (_mesa_is_gles3(ctx) &&
          img->InternalFormat != GL_DEPTH_COMPONENT &&
          img->InternalFormat != GL_DEPTH_STENCIL) {
         depth_mode = GL_RED;
      }

      switch (depth_mode) {
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
      case GL_ALPHA:
         swizzles[0] = SWIZZLE_ZERO;
         swizzles[1] = SWIZZLE_ZERO;
         swizzles[2] = SWIZZLE_ZERO;
         swizzles[3] = SWIZZLE_X;
         break;
      case GL_LUMINANCE:
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_X;
         swizzles[2] = SWIZZLE_X;
         swizzles[3] = SWIZZLE_ONE;
         break;
      case GL_INTENSITY:
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_X;
         swizzles[2] = SWIZZLE_X;
         swizzles[3] = SWIZZLE_X;
         break;
      case GL_RED:
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_ZERO;
         swizzles[2] = SWIZZLE_ZERO;
         swizzles[3] = SWIZZLE_ONE;
         break;
      }
   }

193 194
   GLenum datatype = _mesa_get_format_datatype(img->TexFormat);

195 196 197 198 199
   /* If the texture's format is alpha-only, force R, G, and B to
    * 0.0. Similarly, if the texture's format has no alpha channel,
    * force the alpha value read to 1.0. This allows for the
    * implementation to use an RGBA texture for any of these formats
    * without leaking any unexpected values.
200 201
    */
   switch (img->_BaseFormat) {
202 203 204 205 206
   case GL_ALPHA:
      swizzles[0] = SWIZZLE_ZERO;
      swizzles[1] = SWIZZLE_ZERO;
      swizzles[2] = SWIZZLE_ZERO;
      break;
207
   case GL_LUMINANCE:
208
      if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
209 210 211 212 213
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_X;
         swizzles[2] = SWIZZLE_X;
         swizzles[3] = SWIZZLE_ONE;
      }
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
      break;
   case GL_LUMINANCE_ALPHA:
      if (datatype == GL_SIGNED_NORMALIZED) {
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_X;
         swizzles[2] = SWIZZLE_X;
         swizzles[3] = SWIZZLE_W;
      }
      break;
   case GL_INTENSITY:
      if (datatype == GL_SIGNED_NORMALIZED) {
         swizzles[0] = SWIZZLE_X;
         swizzles[1] = SWIZZLE_X;
         swizzles[2] = SWIZZLE_X;
         swizzles[3] = SWIZZLE_X;
      }
230
      break;
231 232 233
   case GL_RED:
   case GL_RG:
   case GL_RGB:
234 235
      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
         swizzles[3] = SWIZZLE_ONE;
236 237 238
      break;
   }

239 240 241 242 243 244
   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
                        swizzles[GET_SWZ(t->_Swizzle, 1)],
                        swizzles[GET_SWZ(t->_Swizzle, 2)],
                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
}

245 246 247 248 249 250 251
static void
gen4_emit_buffer_surface_state(struct brw_context *brw,
                               uint32_t *out_offset,
                               drm_intel_bo *bo,
                               unsigned buffer_offset,
                               unsigned surface_format,
                               unsigned buffer_size,
252 253
                               unsigned pitch,
                               bool rw)
254 255 256 257 258 259 260 261
{
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                    6 * 4, 32, out_offset);
   memset(surf, 0, 6 * 4);

   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
             surface_format << BRW_SURFACE_FORMAT_SHIFT |
             (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
262
   surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
263 264 265 266 267 268 269 270 271 272 273 274
   surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
             ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
   surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
             (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;

   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
    * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
    * physical cache.  It is mapped in hardware to the sampler cache."
    */
   if (bo) {
      drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
                              bo, buffer_offset,
275 276
                              I915_GEM_DOMAIN_SAMPLER,
                              (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
277 278
   }
}
279

280
void
281 282
brw_update_buffer_texture_surface(struct gl_context *ctx,
                                  unsigned unit,
283
                                  uint32_t *surf_offset)
284 285 286 287 288
{
   struct brw_context *brw = brw_context(ctx);
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
   struct intel_buffer_object *intel_obj =
      intel_buffer_object(tObj->BufferObject);
289 290
   uint32_t size = tObj->BufferSize;
   drm_intel_bo *bo = NULL;
291
   mesa_format format = tObj->_BufferObjectFormat;
292 293
   uint32_t brw_format = brw_format_for_mesa_format(format);
   int texel_size = _mesa_get_format_bytes(format);
294 295 296

   if (intel_obj) {
      size = MIN2(size, intel_obj->Base.Size);
297
      bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
298
   }
299 300 301 302 303 304

   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
      _mesa_problem(NULL, "bad format %s for texture buffer\n",
		    _mesa_get_format_name(format));
   }

305 306 307 308 309 310
   brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
                                       tObj->BufferOffset,
                                       brw_format,
                                       size / texel_size,
                                       texel_size,
                                       false /* rw */);
311 312
}

313
static void
314 315
brw_update_texture_surface(struct gl_context *ctx,
                           unsigned unit,
316 317
                           uint32_t *surf_offset,
                           bool for_gather)
318
{
319 320 321
   struct brw_context *brw = brw_context(ctx);
   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
322
   struct intel_mipmap_tree *mt = intelObj->mt;
323
   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
324
   uint32_t *surf;
325

326
   /* BRW_NEW_TEXTURE_BUFFER */
327
   if (tObj->Target == GL_TEXTURE_BUFFER) {
328
      brw_update_buffer_texture_surface(ctx, unit, surf_offset);
329 330 331
      return;
   }

332
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333
			  6 * 4, 32, surf_offset);
334

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365
   uint32_t tex_format = translate_tex_format(brw, mt->format,
                                              sampler->sRGBDecode);

   if (for_gather) {
      /* Sandybridge's gather4 message is broken for integer formats.
       * To work around this, we pretend the surface is UNORM for
       * 8 or 16-bit formats, and emit shader instructions to recover
       * the real INT/UINT value.  For 32-bit formats, we pretend
       * the surface is FLOAT, and simply reinterpret the resulting
       * bits.
       */
      switch (tex_format) {
      case BRW_SURFACEFORMAT_R8_SINT:
      case BRW_SURFACEFORMAT_R8_UINT:
         tex_format = BRW_SURFACEFORMAT_R8_UNORM;
         break;

      case BRW_SURFACEFORMAT_R16_SINT:
      case BRW_SURFACEFORMAT_R16_UINT:
         tex_format = BRW_SURFACEFORMAT_R16_UNORM;
         break;

      case BRW_SURFACEFORMAT_R32_SINT:
      case BRW_SURFACEFORMAT_R32_UINT:
         tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
         break;

      default:
         break;
      }
   }
366

367 368 369
   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
	      BRW_SURFACE_CUBEFACE_ENABLES |
370
	      tex_format << BRW_SURFACE_FORMAT_SHIFT);
371

372
   surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
373

374
   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
375 376
	      (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
	      (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
377

378
   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
379
	      (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
380
	      (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
381

382
   surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
383
              SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
384

385
   surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
386

387
   /* Emit relocation to surface contents */
388
   drm_intel_bo_emit_reloc(brw->batch.bo,
389
                           *surf_offset + 4,
390 391
                           mt->bo,
                           surf[1] - mt->bo->offset64,
392
                           I915_GEM_DOMAIN_SAMPLER, 0);
393 394
}

395
/**
396
 * Create the constant buffer surface.  Vertex/fragment shader constants will be
397 398
 * read from this buffer with Data Port Read instructions/messages.
 */
399
void
400
brw_create_constant_surface(struct brw_context *brw,
401
			    drm_intel_bo *bo,
402
			    uint32_t offset,
403
			    uint32_t size,
404
			    uint32_t *out_offset)
405
{
406 407
   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
                                       BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
408
                                       size, 1, false);
409 410
}

411 412 413 414 415 416 417 418 419 420
/**
 * Create the buffer surface. Shader buffer variables will be
 * read from / write to this buffer with Data Port Read/Write
 * instructions/messages.
 */
void
brw_create_buffer_surface(struct brw_context *brw,
                          drm_intel_bo *bo,
                          uint32_t offset,
                          uint32_t size,
421
                          uint32_t *out_offset)
422 423 424 425 426 427 428 429 430 431 432
{
   /* Use a raw surface so we can reuse existing untyped read/write/atomic
    * messages. We need these specifically for the fragment shader since they
    * include a pixel mask header that we need to ensure correct behavior
    * with helper invocations, which cannot write to the buffer.
    */
   brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
                                       BRW_SURFACEFORMAT_RAW,
                                       size, 1, true);
}

433 434 435 436
/**
 * Set up a binding table entry for use by stream output logic (transform
 * feedback).
 *
Chris Forbes's avatar
Chris Forbes committed
437
 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
438 439 440 441 442 443 444
 */
void
brw_update_sol_surface(struct brw_context *brw,
                       struct gl_buffer_object *buffer_obj,
                       uint32_t *out_offset, unsigned num_vector_components,
                       unsigned stride_dwords, unsigned offset_dwords)
{
445
   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
446 447 448 449
   uint32_t offset_bytes = 4 * offset_dwords;
   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
                                             offset_bytes,
                                             buffer_obj->Size - offset_bytes);
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
                                    out_offset);
   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
   size_t size_dwords = buffer_obj->Size / 4;
   uint32_t buffer_size_minus_1, width, height, depth, surface_format;

   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
    * too big to map using a single binding table entry?
    */
   assert((size_dwords - offset_dwords) / stride_dwords
          <= BRW_MAX_NUM_BUFFER_ENTRIES);

   if (size_dwords > offset_dwords + num_vector_components) {
      /* There is room for at least 1 transform feedback output in the buffer.
       * Compute the number of additional transform feedback outputs the
       * buffer has room for.
       */
      buffer_size_minus_1 =
         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
   } else {
      /* There isn't even room for a single transform feedback output in the
       * buffer.  We can't configure the binding table entry to prevent output
       * entirely; we'll have to rely on the geometry shader to detect
       * overflow.  But to minimize the damage in case of a bug, set up the
       * binding table entry to just allow a single output.
       */
      buffer_size_minus_1 = 0;
   }
   width = buffer_size_minus_1 & 0x7f;
   height = (buffer_size_minus_1 & 0xfff80) >> 7;
   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;

   switch (num_vector_components) {
   case 1:
      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
      break;
   case 2:
      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
      break;
   case 3:
      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
      break;
   case 4:
      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
      break;
   default:
496
      unreachable("Invalid vector size for transform feedback output");
497 498 499 500 501 502
   }

   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
      surface_format << BRW_SURFACE_FORMAT_SHIFT |
      BRW_SURFACE_RC_READ_WRITE;
503
   surf[1] = bo->offset64 + offset_bytes; /* reloc */
504 505 506 507 508 509 510 511
   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
	      height << BRW_SURFACE_HEIGHT_SHIFT);
   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
   surf[4] = 0;
   surf[5] = 0;

   /* Emit relocation to surface contents. */
512
   drm_intel_bo_emit_reloc(brw->batch.bo,
513 514 515 516 517
			   *out_offset + 4,
			   bo, offset_bytes,
			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}

518 519 520 521 522 523
/* Creates a new WM constant buffer reflecting the current fragment program's
 * constants, if needed by the fragment program.
 *
 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
 * state atom.
 */
524
static void
525
brw_upload_wm_pull_constants(struct brw_context *brw)
526
{
527
   struct brw_stage_state *stage_state = &brw->wm.base;
528
   /* BRW_NEW_FRAGMENT_PROGRAM */
529 530
   struct brw_fragment_program *fp =
      (struct brw_fragment_program *) brw->fragment_program;
531
   /* BRW_NEW_FS_PROG_DATA */
532
   struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
533 534

   /* _NEW_PROGRAM_CONSTANTS */
535
   brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
536
                             stage_state, prog_data);
537
}
538

539
const struct brw_tracked_state brw_wm_pull_constants = {
540
   .dirty = {
541 542
      .mesa = _NEW_PROGRAM_CONSTANTS,
      .brw = BRW_NEW_BATCH |
543 544
             BRW_NEW_FRAGMENT_PROGRAM |
             BRW_NEW_FS_PROG_DATA,
545
   },
546
   .emit = brw_upload_wm_pull_constants,
547 548
};

549 550 551 552 553 554 555 556
/**
 * Creates a null renderbuffer surface.
 *
 * This is used when the shader doesn't write to any color output.  An FB
 * write to target 0 will still be emitted, because that's how the thread is
 * terminated (and computed depth is returned), so we need to have the
 * hardware discard the target 0 color output..
 */
557
static void
558 559 560 561 562
brw_emit_null_surface_state(struct brw_context *brw,
                            unsigned width,
                            unsigned height,
                            unsigned samples,
                            uint32_t *out_offset)
563
{
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
    * Notes):
    *
    *     A null surface will be used in instances where an actual surface is
    *     not bound. When a write message is generated to a null surface, no
    *     actual surface is written to. When a read message (including any
    *     sampling engine message) is generated to a null surface, the result
    *     is all zeros. Note that a null surface type is allowed to be used
    *     with all messages, even if it is not specificially indicated as
    *     supported. All of the remaining fields in surface state are ignored
    *     for null surfaces, with the following exceptions:
    *
    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
    *       depth buffer’s corresponding state for all render target surfaces,
    *       including null.
    *
    *     - Surface Format must be R8G8B8A8_UNORM.
    */
582 583 584 585
   unsigned surface_type = BRW_SURFACE_NULL;
   drm_intel_bo *bo = NULL;
   unsigned pitch_minus_1 = 0;
   uint32_t multisampling_state = 0;
586 587
   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
                                    out_offset);
588

589
   if (samples > 1) {
590 591 592 593 594 595 596 597 598 599 600 601 602 603
      /* On Gen6, null render targets seem to cause GPU hangs when
       * multisampling.  So work around this problem by rendering into dummy
       * color buffer.
       *
       * To decrease the amount of memory needed by the workaround buffer, we
       * set its pitch to 128 bytes (the width of a Y tile).  This means that
       * the amount of memory needed for the workaround buffer is
       * (width_in_tiles + height_in_tiles - 1) tiles.
       *
       * Note that since the workaround buffer will be interpreted by the
       * hardware as an interleaved multisampled buffer, we need to compute
       * width_in_tiles and height_in_tiles by dividing the width and height
       * by 16 rather than the normal Y-tile size of 32.
       */
604 605
      unsigned width_in_tiles = ALIGN(width, 16) / 16;
      unsigned height_in_tiles = ALIGN(height, 16) / 16;
606
      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
607
      brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
608 609 610 611
                         size_needed);
      bo = brw->wm.multisampled_null_render_target_bo;
      surface_type = BRW_SURFACE_2D;
      pitch_minus_1 = 127;
612
      multisampling_state = brw_get_surface_num_multisamples(samples);
613 614 615
   }

   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
616
	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
617
   if (brw->gen < 6) {
618 619 620 621
      surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
		  1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
622
   }
623
   surf[1] = bo ? bo->offset64 : 0;
624 625
   surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
              (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
626 627 628 629 630 631

   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
    * Notes):
    *
    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
    */
632 633 634
   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
   surf[4] = multisampling_state;
635
   surf[5] = 0;
636 637

   if (bo) {
638
      drm_intel_bo_emit_reloc(brw->batch.bo,
639
                              *out_offset + 4,
640 641 642
                              bo, 0,
                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
   }
643
}
644

645 646 647 648 649
/**
 * Sets up a surface state structure to point at the given region.
 * While it is only used for the front/back buffer currently, it should be
 * usable for further buffers when doing ARB_draw_buffer support.
 */
650
static uint32_t
651
brw_update_renderbuffer_surface(struct brw_context *brw,
652 653 654
                                struct gl_renderbuffer *rb,
                                bool layered, unsigned unit,
                                uint32_t surf_index)
655
{
656
   struct gl_context *ctx = &brw->ctx;
657
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
658
   struct intel_mipmap_tree *mt = irb->mt;
659
   uint32_t *surf;
660
   uint32_t tile_x, tile_y;
661
   uint32_t format = 0;
662
   uint32_t offset;
663
   /* _NEW_BUFFERS */
664
   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
665
   /* BRW_NEW_FS_PROG_DATA */
666

667 668
   assert(!layered);

669
   if (rb->TexImage && !brw->has_surface_tile_offset) {
670
      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
671 672 673 674 675 676 677 678

      if (tile_x != 0 || tile_y != 0) {
	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
	  * destination in a miptree unless you actually setup your renderbuffer
	  * as a miptree and used the fragile lod/array_index/etc. controls to
	  * select the image.  So, instead, we just make a new single-level
	  * miptree and render into that.
	  */
679
	 intel_renderbuffer_move_to_temp(brw, irb, false);
680 681 682 683
	 mt = irb->mt;
      }
   }

684 685
   intel_miptree_used_for_rendering(irb->mt);

686
   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
687

688 689 690
   format = brw->render_target_format[rb_format];
   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
691
                    __func__, _mesa_get_format_name(rb_format));
692
   }
693

694 695 696
   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
	      format << BRW_SURFACE_FORMAT_SHIFT);

697
   /* reloc */
698
   assert(mt->offset % mt->cpp == 0);
699
   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
700
	      mt->bo->offset64 + mt->offset);
701 702 703 704

   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);

705 706
   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
	      (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
707

708
   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
709

710 711 712 713 714 715
   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
   /* Note that the low bits of these fields are missing, so
    * there's the possibility of getting in trouble.
    */
   assert(tile_x % 4 == 0);
   assert(tile_y % 2 == 0);
716
   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
717
	      (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
718
	      (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
719

720
   if (brw->gen < 6) {
721
      /* _NEW_COLOR */
722
      if (!ctx->Color.ColorLogicOpEnabled &&
723 724 725 726 727 728 729 730 731 732
	  (ctx->Color.BlendEnabled & (1 << unit)))
	 surf[0] |= BRW_SURFACE_BLEND_ENABLED;

      if (!ctx->Color.ColorMask[unit][0])
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
      if (!ctx->Color.ColorMask[unit][1])
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
      if (!ctx->Color.ColorMask[unit][2])
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;

733 734 735
      /* As mentioned above, disable writes to the alpha component when the
       * renderbuffer is XRGB.
       */
736 737 738 739
      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
	  !ctx->Color.ColorMask[unit][3]) {
	 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
      }
740 741
   }

742
   drm_intel_bo_emit_reloc(brw->batch.bo,
743 744 745 746 747 748 749
                           offset + 4,
                           mt->bo,
                           surf[1] - mt->bo->offset64,
                           I915_GEM_DOMAIN_RENDER,
                           I915_GEM_DOMAIN_RENDER);

   return offset;
750 751 752
}

/**
753
 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
754
 */
755 756 757 758 759
void
brw_update_renderbuffer_surfaces(struct brw_context *brw,
                                 const struct gl_framebuffer *fb,
                                 uint32_t render_target_start,
                                 uint32_t *surf_offset)
760
{
761
   GLuint i;
762 763 764
   const unsigned int w = _mesa_geometric_width(fb);
   const unsigned int h = _mesa_geometric_height(fb);
   const unsigned int s = _mesa_geometric_samples(fb);
765

766
   /* Update surfaces for drawing buffers */
767 768 769
   if (fb->_NumColorDrawBuffers >= 1) {
      for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
         const uint32_t surf_index = render_target_start + i;
770

771
	 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
772
            surf_offset[surf_index] =
773
               brw->vtbl.update_renderbuffer_surface(
774
                  brw, fb->_ColorDrawBuffers[i],
775
                  _mesa_geometric_layers(fb) > 0, i, surf_index);
776
	 } else {
777
            brw->vtbl.emit_null_surface_state(brw, w, h, s,
778
               &surf_offset[surf_index]);
779
	 }
780
      }
781
   } else {
782
      const uint32_t surf_index = render_target_start;
783
      brw->vtbl.emit_null_surface_state(brw, w, h, s,
784
         &surf_offset[surf_index]);
785
   }
786 787 788 789 790 791 792 793 794 795 796 797 798
}

static void
update_renderbuffer_surfaces(struct brw_context *brw)
{
   const struct gl_context *ctx = &brw->ctx;

   /* _NEW_BUFFERS | _NEW_COLOR */
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
   brw_update_renderbuffer_surfaces(
      brw, fb,
      brw->wm.prog_data->binding_table.render_target_start,
      brw->wm.base.surf_offset);
799
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
800 801 802 803
}

const struct brw_tracked_state brw_renderbuffer_surfaces = {
   .dirty = {
804 805
      .mesa = _NEW_BUFFERS |
              _NEW_COLOR,
806 807
      .brw = BRW_NEW_BATCH |
             BRW_NEW_FS_PROG_DATA,
808
   },
809
   .emit = update_renderbuffer_surfaces,
810
};
811

812 813 814 815 816
const struct brw_tracked_state gen6_renderbuffer_surfaces = {
   .dirty = {
      .mesa = _NEW_BUFFERS,
      .brw = BRW_NEW_BATCH,
   },
817
   .emit = update_renderbuffer_surfaces,
818 819
};

820

821
static void
822 823
update_stage_texture_surfaces(struct brw_context *brw,
                              const struct gl_program *prog,
824
                              struct brw_stage_state *stage_state,
825
                              bool for_gather)
826
{
827 828
   if (!prog)
      return;
829

830
   struct gl_context *ctx = &brw->ctx;
831

832
   uint32_t *surf_offset = stage_state->surf_offset;
833 834

   /* BRW_NEW_*_PROG_DATA */
835 836 837 838
   if (for_gather)
      surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
   else
      surf_offset += stage_state->prog_data->binding_table.texture_start;
839

840
   unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
841
   for (unsigned s = 0; s < num_samplers; s++) {
842
      surf_offset[s] = 0;
843

844 845
      if (prog->SamplersUsed & (1 << s)) {
         const unsigned unit = prog->SamplerUnits[s];
846 847

         /* _NEW_TEXTURE */
848
         if (ctx->Texture.Unit[unit]._Current) {
849
            brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
850
         }
851
      }
852 853
   }
}
854

855

856 857 858 859 860 861 862 863 864
/**
 * Construct SURFACE_STATE objects for enabled textures.
 */
static void
brw_update_texture_surfaces(struct brw_context *brw)
{
   /* BRW_NEW_VERTEX_PROGRAM */
   struct gl_program *vs = (struct gl_program *) brw->vertex_program;

865
   /* BRW_NEW_TESS_PROGRAMS */
866 867 868
   struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
   struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;

869 870 871
   /* BRW_NEW_GEOMETRY_PROGRAM */
   struct gl_program *gs = (struct gl_program *) brw->geometry_program;

872 873 874
   /* BRW_NEW_FRAGMENT_PROGRAM */
   struct gl_program *fs = (struct gl_program *) brw->fragment_program;

875 876 877
   /* BRW_NEW_COMPUTE_PROGRAM */
   struct gl_program *cs = (struct gl_program *) brw->compute_program;

878
   /* _NEW_TEXTURE */
879
   update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
880 881
   update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
   update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
882 883
   update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
   update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
884
   update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
885 886 887 888

   /* emit alternate set of surface state for gather. this
    * allows the surface format to be overriden for only the
    * gather4 messages. */
889 890 891
   if (brw->gen < 8) {
      if (vs && vs->UsesGather)
         update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
892 893 894 895
      if (tcs && tcs->UsesGather)
         update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
      if (tes && tes->UsesGather)
         update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
896 897 898 899
      if (gs && gs->UsesGather)
         update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
      if (fs && fs->UsesGather)
         update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
900 901
      if (cs && cs->UsesGather)
         update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
902
   }
903

904
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
905 906
}

907
const struct brw_tracked_state brw_texture_surfaces = {
908
   .dirty = {
909
      .mesa = _NEW_TEXTURE,
910
      .brw = BRW_NEW_BATCH |
911
             BRW_NEW_COMPUTE_PROGRAM |
912
             BRW_NEW_FRAGMENT_PROGRAM |
913
             BRW_NEW_FS_PROG_DATA |
914
             BRW_NEW_GEOMETRY_PROGRAM |
915
             BRW_NEW_GS_PROG_DATA |
916
             BRW_NEW_TESS_PROGRAMS |
917 918
             BRW_NEW_TCS_PROG_DATA |
             BRW_NEW_TES_PROG_DATA |
919
             BRW_NEW_TEXTURE_BUFFER |
920 921
             BRW_NEW_VERTEX_PROGRAM |
             BRW_NEW_VS_PROG_DATA,
922
   },
923
   .emit = brw_update_texture_surfaces,
924 925
};

926 927 928
void
brw_upload_ubo_surfaces(struct brw_context *brw,
			struct gl_shader *shader,
929
                        struct brw_stage_state *stage_state,
930
                        struct brw_stage_prog_data *prog_data)
931
{
932
   struct gl_context *ctx = &brw->ctx;
933 934 935 936

   if (!shader)
      return;

937
   uint32_t *ubo_surf_offsets =
938 939
      &stage_state->surf_offset[prog_data->binding_table.ubo_start];

940 941 942
   for (int i = 0; i < shader->NumUniformBlocks; i++) {
      struct gl_uniform_buffer_binding *binding =
         &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
943

944 945
      if (binding->BufferObject == ctx->Shared->NullBufferObj) {
         brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
946
      } else {
947 948 949 950 951
         struct intel_buffer_object *intel_bo =
            intel_buffer_object(binding->BufferObject);
         drm_intel_bo *bo =
            intel_bufferobj_buffer(brw, intel_bo,
                                   binding->Offset,
952
                                   binding->BufferObject->Size - binding->Offset);
953
         brw_create_constant_surface(brw, bo, binding->Offset,
954
                                     binding->BufferObject->Size - binding->Offset,
955
                                     &ubo_surf_offsets[i]);
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
      }
   }

   uint32_t *ssbo_surf_offsets =
      &stage_state->surf_offset[prog_data->binding_table.ssbo_start];

   for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
      struct gl_shader_storage_buffer_binding *binding =
         &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];

      if (binding->BufferObject == ctx->Shared->NullBufferObj) {
         brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
      } else {
         struct intel_buffer_object *intel_bo =
            intel_buffer_object(binding->BufferObject);
         drm_intel_bo *bo =
            intel_bufferobj_buffer(brw, intel_bo,
                                   binding->Offset,
974
                                   binding->BufferObject->Size - binding->Offset);
975
         brw_create_buffer_surface(brw, bo, binding->Offset,
976
                                   binding->BufferObject->Size - binding->Offset,
977
                                   &ssbo_surf_offsets[i]);
978
      }
979 980
   }

981
   if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
982
      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
983 984 985 986 987
}

static void
brw_upload_wm_ubo_surfaces(struct brw_context *brw)
{
988
   struct gl_context *ctx = &brw->ctx;
989
   /* _NEW_PROGRAM */
990
   struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
991 992 993 994

   if (!prog)
      return;

995
   /* BRW_NEW_FS_PROG_DATA */
996
   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
997
                           &brw->wm.base, &brw->wm.prog_data->base);
998 999 1000 1001
}

const struct brw_tracked_state brw_wm_ubo_surfaces = {
   .dirty = {
1002
      .mesa = _NEW_PROGRAM,
1003
      .brw = BRW_NEW_BATCH |
1004
             BRW_NEW_FS_PROG_DATA |
1005
             BRW_NEW_UNIFORM_BUFFER,
1006 1007 1008 1009
   },
   .emit = brw_upload_wm_ubo_surfaces,
};

1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
static void
brw_upload_cs_ubo_surfaces(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* _NEW_PROGRAM */
   struct gl_shader_program *prog =
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];

   if (!prog)
      return;

   /* BRW_NEW_CS_PROG_DATA */
   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1023
                           &brw->cs.base, &brw->cs.prog_data->base);
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
}

const struct brw_tracked_state brw_cs_ubo_surfaces = {
   .dirty = {
      .mesa = _NEW_PROGRAM,
      .brw = BRW_NEW_BATCH |
             BRW_NEW_CS_PROG_DATA |
             BRW_NEW_UNIFORM_BUFFER,
   },
   .emit = brw_upload_cs_ubo_surfaces,
};

1036 1037
void
brw_upload_abo_surfaces(struct brw_context *brw,
1038
                        struct gl_shader *shader,
1039 1040 1041 1042 1043 1044 1045
                        struct brw_stage_state *stage_state,
                        struct brw_stage_prog_data *prog_data)
{
   struct gl_context *ctx = &brw->ctx;
   uint32_t *surf_offsets =
      &stage_state->surf_offset[prog_data->binding_table.abo_start];

1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
   if (shader && shader->NumAtomicBuffers) {
      for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
         struct gl_atomic_buffer_binding *binding =
            &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
         struct intel_buffer_object *intel_bo =
            intel_buffer_object(binding->BufferObject);
         drm_intel_bo *bo = intel_bufferobj_buffer(
            brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);

         brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
                                             binding->Offset, BRW_SURFACEFORMAT_RAW,
                                             bo->size - binding->Offset, 1, true);
      }
1059

1060
      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1061
   }
1062 1063 1064 1065 1066 1067 1068
}

static void
brw_upload_wm_abo_surfaces(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* _NEW_PROGRAM */
1069
   struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1070 1071

   if (prog) {
1072
      /* BRW_NEW_FS_PROG_DATA */
1073 1074
      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
                              &brw->wm.base, &brw->wm.prog_data->base);
1075 1076 1077 1078 1079 1080
   }
}

const struct brw_tracked_state brw_wm_abo_surfaces = {
   .dirty = {
      .mesa = _NEW_PROGRAM,
1081
      .brw = BRW_NEW_ATOMIC_BUFFER |
1082 1083
             BRW_NEW_BATCH |
             BRW_NEW_FS_PROG_DATA,
1084 1085 1086 1087
   },
   .emit = brw_upload_wm_abo_surfaces,
};

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
static void
brw_upload_cs_abo_surfaces(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* _NEW_PROGRAM */
   struct gl_shader_program *prog =
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];

   if (prog) {
      /* BRW_NEW_CS_PROG_DATA */
1098 1099
      brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
                              &brw->cs.base, &brw->cs.prog_data->base);
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
   }
}

const struct brw_tracked_state brw_cs_abo_surfaces = {
   .dirty = {
      .mesa = _NEW_PROGRAM,
      .brw = BRW_NEW_ATOMIC_BUFFER |
             BRW_NEW_BATCH |
             BRW_NEW_CS_PROG_DATA,
   },
   .emit = brw_upload_cs_abo_surfaces,
};

1113 1114 1115 1116 1117 1118 1119 1120 1121
static void
brw_upload_cs_image_surfaces(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* _NEW_PROGRAM */
   struct gl_shader_program *prog =
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];

   if (prog) {
1122
      /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1123 1124 1125 1126 1127 1128 1129
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
                                &brw->cs.base, &brw->cs.prog_data->base);
   }
}

const struct brw_tracked_state brw_cs_image_surfaces = {
   .dirty = {
1130
      .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1131 1132 1133 1134 1135 1136 1137
      .brw = BRW_NEW_BATCH |
             BRW_NEW_CS_PROG_DATA |
             BRW_NEW_IMAGE_UNITS
   },
   .emit = brw_upload_cs_image_surfaces,
};

1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
static uint32_t
get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
{
   if (access == GL_WRITE_ONLY) {
      return brw_format_for_mesa_format(format);
   } else {
      /* Typed surface reads support a very limited subset of the shader
       * image formats.  Translate it into the closest format the
       * hardware supports.
       */
      if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
          (_mesa_get_format_bytes(format) >= 8 &&
           (brw->gen == 7 && !brw->is_haswell)))
         return BRW_SURFACEFORMAT_RAW;
      else
         return brw_format_for_mesa_format(
            brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
   }
}

1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
static void
update_default_image_param(struct brw_context *brw,
                           struct gl_image_unit *u,
                           unsigned surface_idx,
                           struct brw_image_param *param)
{
   memset(param, 0, sizeof(*param));
   param->surface_idx = surface_idx;
   /* Set the swizzling shifts to all-ones to effectively disable swizzling --
    * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
    * detailed explanation of these parameters.
    */
   param->swizzling[0] = 0xff;
   param->swizzling[1] = 0xff;
}

static void
update_buffer_image_param(struct brw_context *brw,
                          struct gl_image_unit *u,
                          unsigned surface_idx,
                          struct brw_image_param *param)
{
   struct gl_buffer_object *obj = u->TexObj->BufferObject;

   update_default_image_param(brw, u, surface_idx, param);

   param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
}

static void
update_texture_image_param(struct brw_context *brw,
                           struct gl_image_unit *u,
                           unsigned surface_idx,
                           struct brw_image_param *param)
{
   struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;

   update_default_image_param(brw, u, surface_idx, param);

   param->size[0] = minify(mt->logical_width0, u->Level);
   param->size[1] = minify(mt->logical_height0, u->Level);
   param->size[2] = (!u->Layered ? 1 :
                     u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
                     u->TexObj->Target == GL_TEXTURE_3D ?
                     minify(mt->logical_depth0, u->Level) :
                     mt->logical_depth0);

1206
   intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
                                  &param->offset[0],
                                  &param->offset[1]);

   param->stride[0] = mt->cpp;
   param->stride[1] = mt->pitch / mt->cpp;
   param->stride[2] =
      brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
   param->stride[3] =
      brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);

   if (mt->tiling == I915_TILING_X) {
      /* An X tile is a rectangular block of 512x8 bytes. */
      param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
      param->tiling[1] = _mesa_logbase2(8);

      if (brw->has_swizzling) {
         /* Right shifts required to swizzle bits 9 and 10 of the memory
          * address with bit 6.
          */
         param->swizzling[0] = 3;
         param->swizzling[1] = 4;
      }
   } else if (mt->tiling == I915_TILING_Y) {
      /* The layout of a Y-tiled surface in memory isn't really fundamentally
       * different to the layout of an X-tiled surface, we simply pretend that
       * the surface is broken up in a number of smaller 16Bx32 tiles, each
       * one arranged in X-major order just like is the case for X-tiling.
       */
      param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
      param->tiling[1] = _mesa_logbase2(32);

      if (brw->has_swizzling) {
         /* Right shift required to swizzle bit 9 of the memory address with
          * bit 6.
          */
         param->swizzling[0] = 3;
      }
   }

   /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
    * address calculation algorithm (emit_address_calculation() in
    * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
    * modulus equal to the LOD.
    */
   param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
                       0);
}

1255 1256 1257 1258 1259 1260 1261 1262
static void
update_image_surface(struct brw_context *brw,
                     struct gl_image_unit *u,
                     GLenum access,
                     unsigned surface_idx,
                     uint32_t *surf_offset,
                     struct brw_image_param *param)
{
1263
   if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
      struct gl_texture_object *obj = u->TexObj;
      const unsigned format = get_image_format(brw, u->_ActualFormat, access);

      if (obj->Target == GL_TEXTURE_BUFFER) {
         struct intel_buffer_object *intel_obj =
            intel_buffer_object(obj->BufferObject);
         const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
                                      _mesa_get_format_bytes(u->_ActualFormat));

         brw->vtbl.emit_buffer_surface_state(
            brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
            format, intel_obj->Base.Size / texel_size, texel_size,
            access != GL_READ_ONLY);

1278 1279
         update_buffer_image_param(brw, u, surface_idx, param);

1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290
      } else {
         struct intel_texture_object *intel_obj = intel_texture_object(obj);
         struct intel_mipmap_tree *mt = intel_obj->mt;

         if (format == BRW_SURFACEFORMAT_RAW) {
            brw->vtbl.emit_buffer_surface_state(
               brw, surf_offset, mt->bo, mt->offset,
               format, mt->bo->size - mt->offset, 1 /* pitch */,
               access != GL_READ_ONLY);

         } else {
1291
            const unsigned min_layer = obj->MinLayer + u->_Layer;
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
            const unsigned min_level = obj->MinLevel + u->Level;
            const unsigned num_layers = (!u->Layered ? 1 :
                                         obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
                                         mt->logical_depth0);
            const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
                                   obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
                                   GL_TEXTURE_2D_ARRAY : obj->Target);

            brw->vtbl.emit_texture_surface_state(
               brw, mt, target,
               min_layer, min_layer + num_layers,
               min_level, min_level + 1,
               format, SWIZZLE_XYZW,
               surf_offset, access != GL_READ_ONLY, false);
         }
1307 1308

         update_texture_image_param(brw, u, surface_idx, param);
1309 1310 1311 1312
      }

   } else {
      brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1313
      update_default_image_param(brw, u, surface_idx, param);
1314 1315 1316
   }
}

1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
void
brw_upload_image_surfaces(struct brw_context *brw,
                          struct gl_shader *shader,
                          struct brw_stage_state *stage_state,
                          struct brw_stage_prog_data *prog_data)
{
   struct gl_context *ctx = &brw->ctx;

   if (shader && shader->NumImages) {
      for (unsigned i = 0; i < shader->NumImages; i++) {
         struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
         const unsigned surf_idx = prog_data->binding_table.image_start + i;

         update_image_surface(brw, u, shader->ImageAccess[i],
                              surf_idx,
                              &stage_state->surf_offset[surf_idx],
                              &prog_data->image_param[i]);
      }

      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
   }
}

static void
brw_upload_wm_image_surfaces(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* BRW_NEW_FRAGMENT_PROGRAM */
1345
   struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1346 1347

   if (prog) {
1348
      /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1349 1350 1351 1352 1353 1354 1355
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
                                &brw->wm.base, &brw->wm.prog_data->base);
   }
}

const struct brw_tracked_state brw_wm_image_surfaces = {
   .dirty = {
1356
      .mesa = _NEW_TEXTURE,
1357 1358 1359 1360 1361 1362 1363 1364
      .brw = BRW_NEW_BATCH |
             BRW_NEW_FRAGMENT_PROGRAM |
             BRW_NEW_FS_PROG_DATA |
             BRW_NEW_IMAGE_UNITS
   },
   .emit = brw_upload_wm_image_surfaces,
};

1365 1366 1367
void
gen4_init_vtable_surface_functions(struct brw_context *brw)
{
1368 1369
   brw->vtbl.update_texture_surface = brw_update_texture_surface;
   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1370
   brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1371
   brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1372
}
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415

static void
brw_upload_cs_work_groups_surface(struct brw_context *brw)
{
   struct gl_context *ctx = &brw->ctx;
   /* _NEW_PROGRAM */
   struct gl_shader_program *prog =
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];

   if (prog && brw->cs.prog_data->uses_num_work_groups) {
      const unsigned surf_idx =
         brw->cs.prog_data->binding_table.work_groups_start;
      uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
      drm_intel_bo *bo;
      uint32_t bo_offset;

      if (brw->compute.num_work_groups_bo == NULL) {
         bo = NULL;
         intel_upload_data(brw,
                           (void *)brw->compute.num_work_groups,
                           3 * sizeof(GLuint),
                           sizeof(GLuint),
                           &bo,
                           &bo_offset);
      } else {
         bo = brw->compute.num_work_groups_bo;
         bo_offset = brw->compute.num_work_groups_offset;
      }

      brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
                                          bo, bo_offset,
                                          BRW_SURFACEFORMAT_RAW,
                                          3 * sizeof(GLuint), 1, true);
      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
   }
}

const struct brw_tracked_state brw_cs_work_groups_surface = {
   .dirty = {
      .brw = BRW_NEW_CS_WORK_GROUPS
   },
   .emit = brw_upload_cs_work_groups_surface,
};