Commit 2234f87a authored by Ben Widawsky's avatar Ben Widawsky

lib: add a gen7 rendercopy

Signed-off-by: default avatarBen Widawsky <ben@bwidawsk.net>
parent ab0460ea
......@@ -24,7 +24,9 @@ libintel_tools_la_SOURCES = \
rendercopy_i915.c \
rendercopy_i830.c \
gen6_render.h \
gen7_render.h \
rendercopy_gen6.c \
rendercopy_gen7.c \
rendercopy.h \
intel_reg_map.c \
$(NULL)
......
#ifndef GEN7_RENDER_H
#define GEN7_RENDER_H
#include "gen6_render.h"
#define GEN7_3DSTATE_URB_VS (0x7830 << 16)
#define GEN7_3DSTATE_URB_HS (0x7831 << 16)
#define GEN7_3DSTATE_URB_DS (0x7832 << 16)
#define GEN7_3DSTATE_URB_GS (0x7833 << 16)
#define GEN6_3DSTATE_SCISSOR_STATE_POINTERS GEN6_3D(3, 0, 0xf)
#define GEN7_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 0, 0x04)
#define GEN7_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 0, 0x05)
#define GEN7_3DSTATE_STENCIL_BUFFER GEN6_3D(3, 0, 0x06)
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER GEN6_3D(3, 0, 0x07)
#define GEN7_3DSTATE_GS GEN6_3D(3, 0, 0x11)
#define GEN7_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN7_3DSTATE_CONSTANT_HS GEN6_3D(3, 0, 0x19)
#define GEN7_3DSTATE_CONSTANT_DS GEN6_3D(3, 0, 0x1a)
#define GEN7_3DSTATE_HS GEN6_3D(3, 0, 0x1b)
#define GEN7_3DSTATE_TE GEN6_3D(3, 0, 0x1c)
#define GEN7_3DSTATE_DS GEN6_3D(3, 0, 0x1d)
#define GEN7_3DSTATE_STREAMOUT GEN6_3D(3, 0, 0x1e)
#define GEN7_3DSTATE_SBE GEN6_3D(3, 0, 0x1f)
#define GEN7_3DSTATE_PS GEN6_3D(3, 0, 0x20)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP \
GEN6_3D(3, 0, 0x21)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN6_3D(3, 0, 0x23)
#define GEN7_3DSTATE_BLEND_STATE_POINTERS GEN6_3D(3, 0, 0x24)
#define GEN7_3DSTATE_DS_STATE_POINTERS GEN6_3D(3, 0, 0x25)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS GEN6_3D(3, 0, 0x26)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS GEN6_3D(3, 0, 0x27)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS GEN6_3D(3, 0, 0x28)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS GEN6_3D(3, 0, 0x29)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS GEN6_3D(3, 0, 0x2a)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN6_3D(3, 0, 0x2b)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS GEN6_3D(3, 0, 0x2c)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS GEN6_3D(3, 0, 0x2d)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN6_3D(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN6_3D(3, 0, 0x2f)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN6_3D(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS GEN6_3D(3, 1, 0x13)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS GEN6_3D(3, 1, 0x14)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS GEN6_3D(3, 1, 0x15)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN6_3D(3, 1, 0x16)
/* Some random bits that we care about */
#define GEN7_VB0_BUFFER_ADDR_MOD_EN (1 << 14)
#define GEN7_WM_DISPATCH_ENABLE (1 << 29)
#define GEN7_3DSTATE_PS_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
#define GEN7_3DSTATE_PS_ATTRIBUTE_ENABLED (1 << 10)
/* Random shifts */
#define GEN7_3DSTATE_WM_MAX_THREADS_SHIFT 24
/* Shamelessly ripped from mesa */
struct gen7_surface_state
{
struct {
uint32_t cube_pos_z:1;
uint32_t cube_neg_z:1;
uint32_t cube_pos_y:1;
uint32_t cube_neg_y:1;
uint32_t cube_pos_x:1;
uint32_t cube_neg_x:1;
uint32_t pad2:2;
uint32_t render_cache_read_write:1;
uint32_t pad1:1;
uint32_t surface_array_spacing:1;
uint32_t vert_line_stride_ofs:1;
uint32_t vert_line_stride:1;
uint32_t tile_walk:1;
uint32_t tiled_surface:1;
uint32_t horizontal_alignment:1;
uint32_t vertical_alignment:2;
uint32_t surface_format:9; /**< BRW_SURFACEFORMAT_x */
uint32_t pad0:1;
uint32_t is_array:1;
uint32_t surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
} ss0;
struct {
uint32_t base_addr;
} ss1;
struct {
uint32_t width:14;
uint32_t pad1:2;
uint32_t height:14;
uint32_t pad0:2;
} ss2;
struct {
uint32_t pitch:18;
uint32_t pad:3;
uint32_t depth:11;
} ss3;
struct {
uint32_t multisample_position_palette_index:3;
uint32_t num_multisamples:3;
uint32_t multisampled_surface_storage_format:1;
uint32_t render_target_view_extent:11;
uint32_t min_array_elt:11;
uint32_t rotation:2;
uint32_t pad0:1;
} ss4;
struct {
uint32_t mip_count:4;
uint32_t min_lod:4;
uint32_t pad1:12;
uint32_t y_offset:4;
uint32_t pad0:1;
uint32_t x_offset:7;
} ss5;
struct {
uint32_t pad; /* Multisample Control Surface stuff */
} ss6;
struct {
uint32_t resource_min_lod:12;
/* Only on Haswell */
uint32_t pad0:4;
uint32_t shader_chanel_select_a:3;
uint32_t shader_chanel_select_b:3;
uint32_t shader_chanel_select_g:3;
uint32_t shader_chanel_select_r:3;
uint32_t alpha_clear_color:1;
uint32_t blue_clear_color:1;
uint32_t green_clear_color:1;
uint32_t red_clear_color:1;
} ss7;
};
struct gen7_sampler_state
{
struct
{
uint32_t aniso_algorithm:1;
uint32_t lod_bias:13;
uint32_t min_filter:3;
uint32_t mag_filter:3;
uint32_t mip_filter:2;
uint32_t base_level:5;
uint32_t pad1:1;
uint32_t lod_preclamp:1;
uint32_t default_color_mode:1;
uint32_t pad0:1;
uint32_t disable:1;
} ss0;
struct
{
uint32_t cube_control_mode:1;
uint32_t shadow_function:3;
uint32_t pad:4;
uint32_t max_lod:12;
uint32_t min_lod:12;
} ss1;
struct
{
uint32_t pad:5;
uint32_t default_color_pointer:27;
} ss2;
struct
{
uint32_t r_wrap_mode:3;
uint32_t t_wrap_mode:3;
uint32_t s_wrap_mode:3;
uint32_t pad:1;
uint32_t non_normalized_coord:1;
uint32_t trilinear_quality:2;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t pad0:6;
} ss3;
};
struct gen7_sf_clip_viewport {
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
uint32_t pad0[2];
struct {
float xmin;
float xmax;
float ymin;
float ymax;
} guardband;
float pad1[4];
};
struct gen6_scissor_rect
{
uint32_t xmin:16;
uint32_t ymin:16;
uint32_t xmax:16;
uint32_t ymax:16;
};
#endif
......@@ -63,6 +63,10 @@ typedef void (*render_copyfunc_t)(struct intel_batchbuffer *batch,
unsigned width, unsigned height,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y);
void gen7_render_copyfunc(struct intel_batchbuffer *batch,
struct scratch_buf *src, unsigned src_x, unsigned src_y,
unsigned width, unsigned height,
struct scratch_buf *dst, unsigned dst_x, unsigned dst_y);
void gen6_render_copyfunc(struct intel_batchbuffer *batch,
struct scratch_buf *src, unsigned src_x, unsigned src_y,
unsigned width, unsigned height,
......
This diff is collapsed.
These files are here for reference only.
/* Assemble with ".../intel-gen4asm/src/intel-gen4asm -g 7" */
/* Move pixels into g10-g13. The pixel shaader does not load what you want. It
* loads the input data for a plane function to calculate what you want. The
* following is boiler plate code to move our normalized texture coordinates
* (u,v) into g10-g13. It does this 4 subspans (16 pixels) at a time.
*
* This should do the same thing, but it doesn't work for some reason.
* pln(16) g10 g6<0,1,0>F g2<8,8,1>F { align1 };
* pln(16) g12 g6.16<1>F g2<8,8,1>F { align1 };
*/
/* U */
pln (8) g10<1>F g6.0<0,1,0>F g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
pln (8) g11<1>F g6.0<0,1,0>F g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
/* V */
pln (8) g12<1>F g6.16<0,1,0> g2.0<8,8,1>F { align1 }; /* pixel 0-7 */
pln (8) g13<1>F g6.16<0,1,0> g4.0<8,8,1>F { align1 }; /* pixel 8-15 */
/* Next the we want the sampler to fetch the src texture (ie. src buffer). This
* is done with a pretty simple send message. The output goes to g112, which is
* exactly what we're supposed to use in our final send message.
* In intel-gen4asm, we should end up parsed by the following rule:
* predicate SEND execsize dst sendleadreg sndopr directsrcoperand instoptions
*
* Send message descriptor:
* 28:25 = message len = 4 // our 4 registers have 16 pixels
* 24:20 = response len = 8 // Each pixel is RGBA32, so we need 8 registers
* 19:19 = header present = 0
* 18:17 = SIMD16 = 2
* 16:12 = TYPE = 0 (regular sample)
* 11:08 = Sampler index = ignored/0
* 7:0 = binding table index = src = 1
* 0x8840001
*
* Send message extra descriptor
* 5:5 = End of Thread = 0
* 3:0 = Target Function ID = SFID_SAMPLER (2)
* 0x2
*/
send(16) g112 g10 0x2 0x8840001 { align1 };
/* g112-g119 now contains the sample source input, and all we must do is write
* it out to the destination render target. This is done with the send message
* as well. The only extra bits are to terminate the pixel shader.
*
* Send message descriptor:
* 28:25 = message len = 8 // 16 pixels RGBA32
* 24:20 = response len = 0
* 19:19 = header present = 0
* 17:14 = message type = Render Target Write (12)
* 12:12 = Last Render Target Select = 1
* 10:08 = Message Type = SIMD16 (0)
* 07:00 = Binding Table Index = dest = 0
* 0x10031000
*
* Send message extra descriptor
* 5:5 = End of Thread = 1
* 3:0 = Target Function ID = SFID_DP_RC (5)
* 0x25
*/
send(16) null g112 0x25 0x10031000 { align1, EOT };
/* vim: set ft=c ts=4 sw=2 tw=80 et: */
mov(8) g112:UD 0x3f800000:UD { align1 };
mov(8) g113:UD 0x3f800000:UD { align1 };
mov(8) g114:UD 0x3f800000:UD { align1 };
mov(8) g115:UD 0x3f800000:UD { align1 };
mov(8) g116:UD 0x3f800000:UD { align1 };
mov(8) g117:UD 0x3f800000:UD { align1 };
mov(8) g118:UD 0x3f800000:UD { align1 };
mov(8) g119:UD 0x3f800000:UD { align1 };
send(16) null g112 0x25 0x10031000 { align1, EOT };
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment