Commit 91e58972 authored by Damien Lespiau's avatar Damien Lespiau Committed by Ben Widawsky

rendercopy/bdw: Fix the original implementation

For posterity, I've squashed these commits against Damien's request.

rendercopy/gen8: Fix the include guards

rendercopy/gen8: Update the 3DSTATE_MULTISAMPLE opcode

The opcode has changed in BDW.

rendercopy/gen8: Add the VF_TOPOLOGY state

The primitive type has moved out of the 3DPRIMITIVE to its own state,
VF_TOPOLOGY.

rendercopy/gen8: Fixup 3STATE_PS

Update the state to the latest BSpec, in particular the thread count was
using a wrong shift and we were missing kernel2 offset.

rendercopy/gen8: Update 3DSTATE_BASE_ADDRESS

This state has seen its fields moved around a bit, follow the BSpec.

rendercopy/gen8: Allocate 64 VUEs

The simulator screams at us if we try to allocate less than that.

rendercopy/gen8: Surface states have to be 64 bytes a aligned

rendercopy/gen8: Vertical/horizontal align 2 does not exist any more

So set them to 4. This should not matter with rendercopy (which is not
using compressed textures), but it makes the simulator moan.

rendercopy/gen8: Make sure the vertex buffer is 8 bytes aligned

rendercopy/gen8: Adjust 3DSTATE_VERTEX_BUFFERS for gen8

The address of the buffer is now on 48 bits. Also the size was computed
as offset + size where the field is really the size of the buffer
itself, not the end address.

rendercopy/gen8: Update the SF/SBE states for gen8

gen8 has a few changes around those states and a new ones RASTER and
SBE_SWIZ.

rendercopy/gen8: Add the PS_EXTRA and PS_BLEND states

rendercopy/gen8: Fix building with DEBUG_RENDERCOPY defined

The forward declaration was missing the final ';'. Let's move the whole
function at the top instead.

rendercopy/gen8: Update the PS and CONSTANT_PS states

rendercopy/gen8: Fix the red channel selection

Make it output red.

rendercopy/gen8: Update the write -1 shader

With the latest assembler changes from Haihao.

rendercopy/gen8: Remove blit.g8a

There is no diff between this file and blig.g7a. Remove it.

rendercopy/gen8: Fix the surface relocation offset

The surface base address is now at dwords 8/9 so the relocation has to
mirror the change.

rendercopy/gen8: Add the VF_INSTANCING state

Should work without, but doesn't hurt to add it.

rendercopy/gen8: Set the Attribule enable field in PS_EXTRA

When the SF is set up to output some attributes, the pixel shader also
have to be told there's attributes to care about.

rendercopy/gen8: Set the force bits to read URB offset/length

If we want to override the URB offset/length in the SBE state itself, we
need to set the force bits on (new in gen8)
Signed-off-by: default avatarDamien Lespiau <damien.lespiau@intel.com>
Acked-by: Kenneth Graunke's avatarKenneth Graunke <kenneth@whitecape.org>
Signed-off-by: default avatarBen Widawsky <ben@bwidawsk.net>
parent 3f0714a8
#ifndef GEN7_RENDER_H
#define GEN7_RENDER_H
#ifndef GEN8_RENDER_H
#define GEN8_RENDER_H
#include "gen6_render.h"
......@@ -13,7 +13,16 @@
#define GEN7_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 0, 0x05)
#define GEN7_3DSTATE_STENCIL_BUFFER GEN6_3D(3, 0, 0x06)
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER GEN6_3D(3, 0, 0x07)
#define GEN8_3DSTATE_MULTISAMPLE GEN6_3D(3, 0, 0x0d)
# define GEN8_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN8_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_2 (1 << 1)
# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN8_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
# define GEN9_3DSTATE_MULTISAMPLE_NUMSAMPLES_16 (4 << 1)
#define GEN8_3DSTATE_VF_INSTANCING GEN6_3D(3, 0, 0x49)
#define GEN7_3DSTATE_GS GEN6_3D(3, 0, 0x11)
#define GEN7_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN7_3DSTATE_CONSTANT_HS GEN6_3D(3, 0, 0x19)
......@@ -23,9 +32,24 @@
#define GEN7_3DSTATE_DS GEN6_3D(3, 0, 0x1d)
#define GEN7_3DSTATE_STREAMOUT GEN6_3D(3, 0, 0x1e)
#define GEN7_3DSTATE_SBE GEN6_3D(3, 0, 0x1f)
# define GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH (1 << 29)
# define GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET (1 << 28)
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT 5
#define GEN8_3DSTATE_SBE_SWIZ GEN6_3D(3, 0, 0x51)
#define GEN8_3DSTATE_RASTER GEN6_3D(3, 0, 0x50)
# define GEN8_RASTER_FRONT_WINDING_CCW (1 << 21)
# define GEN8_RASTER_CULL_NONE (1 << 16)
#define GEN7_3DSTATE_PS GEN6_3D(3, 0, 0x20)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP \
GEN6_3D(3, 0, 0x21)
#define GEN8_3DSTATE_PS_BLEND GEN6_3D(3, 0, 0x4d)
# define GEN8_PS_BLEND_HAS_WRITEABLE_RT (1 << 30)
#define GEN8_3DSTATE_PS_EXTRA GEN6_3D(3,0, 0x4f)
# define GEN8_PSX_PIXEL_SHADER_VALID (1 << 31)
# define GEN8_PSX_ATTRIBUTE_ENABLE (1 << 8)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN6_3D(3, 0, 0x23)
#define GEN7_3DSTATE_BLEND_STATE_POINTERS GEN6_3D(3, 0, 0x24)
#define GEN7_3DSTATE_DS_STATE_POINTERS GEN6_3D(3, 0, 0x25)
......@@ -41,6 +65,8 @@
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN6_3D(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN6_3D(3, 0, 0x2f)
#define GEN8_3DSTATE_VF_TOPOLOGY GEN6_3D(3, 0, 0x4b)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN6_3D(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS GEN6_3D(3, 1, 0x13)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS GEN6_3D(3, 1, 0x14)
......@@ -49,13 +75,11 @@
/* Some random bits that we care about */
#define GEN7_VB0_BUFFER_ADDR_MOD_EN (1 << 14)
#define GEN7_WM_DISPATCH_ENABLE (1 << 29)
#define GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
#define GEN7_3DSTATE_PS_ATTRIBUTE_ENABLED (1 << 10)
/* Random shifts */
#define GEN7_3DSTATE_WM_MAX_THREADS_SHIFT 24
#define HSW_3DSTATE_WM_MAX_THREADS_SHIFT 23
#define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
/* Shamelessly ripped from mesa */
struct gen8_surface_state
......
This diff is collapsed.
/* Assemble with ".../intel-gen4asm/src/intel-gen4asm -g 7" */
/* Move pixels into g10-g13. The pixel shaader does not load what you want. It
* loads the input data for a plane function to calculate what you want. The
* following is boiler plate code to move our normalized texture coordinates
* (u,v) into g10-g13. It does this 4 subspans (16 pixels) at a time.
*
* This should do the same thing, but it doesn't work for some reason.
* pln(16) g10 g6<0,1,0>F g2<8,8,1>F { align1 };
* pln(16) g12 g6.16<1>F g2<8,8,1>F { align1 };
*/
/* U */
pln (8) g10<1>F g6.0<0,1,0>F g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
pln (8) g11<1>F g6.0<0,1,0>F g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
/* V */
pln (8) g12<1>F g6.16<0,1,0> g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
pln (8) g13<1>F g6.16<0,1,0> g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
/* Next the we want the sampler to fetch the src texture (ie. src buffer). This
* is done with a pretty simple send message. The output goes to g112, which is
* exactly what we're supposed to use in our final send message.
* In intel-gen4asm, we should end up parsed by the following rule:
* predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions
*
* Send message descriptor:
* 28:25 = message len = 4 // our 4 registers have 16 pixels
* 24:20 = response len = 8 // Each pixel is RGBA32, so we need 8 registers
* 19:19 = header present = 0
* 18:17 = SIMD16 = 2
* 16:12 = TYPE = 0 (regular sample)
* 11:08 = Sampler index = ignored/0
* 7:0 = binding table index = src = 1
* 0x8840001
*
* Send message extra descriptor
* 5:5 = End of Thread = 0
* 3:0 = Target Function ID = SFID_SAMPLER (2)
* 0x2
*/
send(16) g112 g10 0x2 0x8840001 { align1 };
/* g112-g119 now contains the sample source input, and all we must do is write
* it out to the destination render target. This is done with the send message
* as well. The only extra bits are to terminate the pixel shader.
*
* Send message descriptor:
* 28:25 = message len = 8 // 16 pixels RGBA32
* 24:20 = response len = 0
* 19:19 = header present = 0
* 17:14 = message type = Render Target Write (12)
* 12:12 = Last Render Target Select = 1
* 10:08 = Message Type = SIMD16 (0)
* 07:00 = Binding Table Index = dest = 0
* 0x10031000
*
* Send message extra descriptor
* 5:5 = End of Thread = 1
* 3:0 = Target Function ID = SFID_DP_RC (5)
* 0x25
*/
send(16) null g112 0x25 0x10031000 { align1, EOT };
/* vim: set ft=c ts=4 sw=2 tw=80 et: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment