mali-job.h 13.3 KB
Newer Older
1
/*
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
2
 * © Copyright 2017-2018 The Panfrost Community
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU licence.
 *
 * A copy of the licence is included with the program, and can also be obtained
 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 *
 */

#ifndef __MALI_JOB_H__
#define __MALI_JOB_H__

#include <config.h>
#include <mali-ioctl.h>

21
#define MALI_SHORT_PTR_BITS (sizeof(uintptr_t)*8)
22 23 24

#define MALI_FBD_HIERARCHY_WEIGHTS 8

25 26
#define MALI_PAYLOAD_SIZE 256

27 28 29 30 31 32 33 34 35 36 37 38 39
enum mali_job_type {
	JOB_NOT_STARTED	= 0,
	JOB_TYPE_NULL = 1,
	JOB_TYPE_SET_VALUE = 2,
	JOB_TYPE_CACHE_FLUSH = 3,
	JOB_TYPE_COMPUTE = 4,
	JOB_TYPE_VERTEX = 5,
	JOB_TYPE_TILER = 7,
	JOB_TYPE_FUSED = 8,
	JOB_TYPE_FRAGMENT = 9,
};

enum mali_gl_mode {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
40 41 42 43 44 45 46
	MALI_GL_POINTS         = 0x1,
	MALI_GL_LINES          = 0x2,
	MALI_GL_LINE_STRIP     = 0x4,
	MALI_GL_LINE_LOOP      = 0x6,
	MALI_GL_TRIANGLES      = 0x8,
	MALI_GL_TRIANGLE_STRIP = 0xA,
	MALI_GL_TRIANGLE_FAN   = 0xC,
47 48
};

49 50
#define MALI_GL_CULL_FACE_BACK  0x80
#define MALI_GL_CULL_FACE_FRONT 0x40
51

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
52 53 54 55
#define MALI_GL_FRONT_FACE(v) (v << 5)
#define MALI_GL_CCW (0)
#define MALI_GL_CW  (1)

56 57 58 59 60 61 62 63 64
/* TODO: Might this actually be a finer bitfield? */
#define MALI_DEPTH_STENCIL_ENABLE 0x6400

#define DS_ENABLE(field) \
	(field == MALI_DEPTH_STENCIL_ENABLE) \
	? "MALI_DEPTH_STENCIL_ENABLE" \
	: (field == 0) ? "0" \
	: "0 /* XXX: Unknown, check hexdump */"

65 66 67 68 69 70 71 72 73 74 75 76 77
/* Used in stencil and depth tests */

enum mali_func {
	MALI_FUNC_NEVER    = 0,
	MALI_FUNC_LESS     = 1,
	MALI_FUNC_EQUAL    = 2,
	MALI_FUNC_LEQUAL   = 3,
	MALI_FUNC_GREATER  = 4,
	MALI_FUNC_NOTEQUAL = 5,
	MALI_FUNC_GEQUAL   = 6,
	MALI_FUNC_ALWAYS   = 7
};

78
/* Flags apply to unknown2_3? */
79

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
80 81 82
#define MALI_HAS_MSAA		(1 << 16)
#define MALI_CAN_DISCARD 	(1 << 21)
#define MALI_HAS_BLEND_SHADER 	(1 << 22)
83 84 85 86 87 88

/* func is mali_func */
#define MALI_DEPTH_FUNC(func)	   (func << 24)
#define MALI_GET_DEPTH_FUNC(flags) ((flags >> 24) & 0x7)
#define MALI_DEPTH_FUNC_MASK	   MALI_DEPTH_FUNC(0x7)
 
89
#define MALI_DEPTH_TEST		(1 << 27)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
90

91
/* Next flags to unknown2_4 */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
92
#define MALI_NO_MSAA		(1 << 30)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
93 94
#define MALI_STENCIL_TEST       (1 << 16)

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
95 96
/* Stencil test state is all encoded in a single u32, just with a lot of
 * enums... */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
97

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
98 99 100 101 102 103 104 105 106 107 108 109 110 111
enum mali_stencil_op {
	MALI_STENCIL_KEEP 	= 0,
	MALI_STENCIL_REPLACE 	= 1,
	MALI_STENCIL_ZERO 	= 2,
	MALI_STENCIL_INVERT 	= 3,
	MALI_STENCIL_INCR_WRAP 	= 4,
	MALI_STENCIL_DECR_WRAP 	= 5,
	MALI_STENCIL_INCR 	= 6,
	MALI_STENCIL_DECR 	= 7
};

struct mali_stencil_test {
	unsigned ref  			: 8;
	unsigned mask 			: 8;
112
	enum mali_func func 		: 3;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
113 114 115 116 117 118
	enum mali_stencil_op sfail 	: 3;
	enum mali_stencil_op dpfail 	: 3;
	enum mali_stencil_op dppass 	: 3;
	unsigned zero			: 4;
} __attribute__((packed));

119
struct mali_shader_meta {
120
	mali_ptr shader;
121
	u32 zero1;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
122

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
123
	/* Counted as number of address slots (i.e. half-precision vec4's) */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
124
	u16 attribute_count;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
125
	u16 varying_count;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
126

127 128 129 130 131
	/* 0x20000600, 0x20000001, etc... not sure */
	u16 unknown1; 

	 /* Whole number of uniform registers used, times two; whole number of
	  * work registers used (no scale). 
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
132 133
	 */

134 135 136
	unsigned work_count : 5;
	unsigned uniform_count : 5;
	unsigned unknown2 : 6;
137

138 139 140 141 142 143 144
	/* Depth factor is exactly as passed to glDepthOffset. Depth units is
	 * equal to the value passed to glDeptOhffset + 1.0f (use
	 * MALI_NEGATIVE) */

	float depth_units;
	float depth_factor;

145 146 147
	u32 unknown2_2;
	u32 unknown2_3;
	u32 unknown2_4;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
148

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
149 150
	struct mali_stencil_test stencil_front;
	struct mali_stencil_test stencil_back;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
151

152 153
	u32 unknown2_7;
	u32 unknown2_8;
154 155 156 157 158

	/* Check for MALI_HAS_BLEND_SHADER to decide how to interpret */

	union {
		mali_ptr blend_shader;
159 160 161

		/* Exact format of this is not known yet */
		u64 blend_equation;
162
	};
163
} __attribute__((packed));
164

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
165
/* This only concerns hardware jobs */
166

167 168
/* Possible values for job_descriptor_size */

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
169
#define MALI_JOB_32 0
170 171
#define MALI_JOB_64 1

172 173 174 175 176 177 178
struct mali_job_descriptor_header {
	u32 exception_status;
	u32 first_incomplete_task;
	u64 fault_pointer;
	u8 job_descriptor_size : 1;
	enum mali_job_type job_type : 7;
	u8 job_barrier : 1;
179
	u8 unknown_flags : 7;
180 181 182
	u16 job_index;
	u16 job_dependency_index_1;
	u16 job_dependency_index_2;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
183 184 185 186 187
	
	union {
		u64 next_job_64;
		u32 next_job_32;
	};
188 189 190 191 192 193 194 195
} __attribute__((packed));

struct mali_payload_set_value {
	u64 out;
	u64 unknown;
} __attribute__((packed));

struct mali_attr {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
196
	mali_ptr elements;
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
	u32 stride;
	u32 size;
} __attribute__((packed));

struct mali_attr_meta {
	u8 index;
	u64 flags :56;
} __attribute__((packed));
ASSERT_SIZEOF_TYPE(struct mali_attr_meta,
		   sizeof(u64), sizeof(u64));

enum mali_fbd_type {
	MALI_SFBD = 0,
	MALI_MFBD = 1,
};

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
213 214
#define FBD_TYPE (1)
#define FBD_MASK (~0x3f)
215

216
struct mali_payload_vertex_tiler {
217 218 219
	/* Exactly as passed to glLineWidth */
	float line_width;

220 221 222
	/* Off by one */
	u32 vertex_count; 

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
223
	u32 unk1; // 0x28000000
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
224
	u32 draw_mode; 
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
225
	u32 zero0;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
226
	u32 zero1;
227 228 229 230 231 232 233 234 235

	/* Like many other strictly nonzero quantities, index_count is
	 * subtracted by one. For an indexed cube, this is equal to 35 = 6
	 * faces * 2 triangles/per face * 3 vertices/per triangle - 1. For
	 * non-indexed draws, equal to vertex_count. */

	u32 index_count;

	/* No hidden structure; literally just a pointer to an array of
236
	 * uint32_t indices. Thanks, guys, for not making my life insane for
237 238 239 240
	 * once! NULL for non-indexed draws. */

	uintptr_t indices;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
241
	u32 zero3;
242
	u32 gl_enables; // 0x5
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
243
	u32 zero4;
244

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
245
	u32 zero5;
246 247
	uintptr_t unknown0;
	uintptr_t unknown1; /* pointer */
248 249 250

	/* For reasons I don't quite understand this is a pointer to a pointer.
	 * That second pointer points to the actual texture descriptor. */
251
	uintptr_t texture_trampoline;
252

253 254 255 256
	/* For OpenGL, from what I've seen, this is intimately connected to
	 * texture_meta. cwabbott says this is not the case under Vulkan, hence
	 * why this field is seperate (Midgard is Vulkan capable) */
	uintptr_t sampler_descriptor;
257

258
	uintptr_t uniforms;
259
	u8 flags : 4;
260 261 262
	uintptr_t _shader_upper : MALI_SHORT_PTR_BITS - 4; /* struct shader_meta */
	uintptr_t attributes; /* struct attribute_buffer[] */
	uintptr_t attribute_meta; /* attribute_meta[] */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
263
	uintptr_t varyings; /* struct attr */
264
	uintptr_t unknown6; /* pointer */
265
	uintptr_t viewport;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
266
	u32 zero6;
267
	mali_ptr framebuffer;
268 269 270
} __attribute__((packed));
//ASSERT_SIZEOF_TYPE(struct mali_payload_vertex_tiler, 256, 256);

271
/* Pointed to from texture_trampoline, mostly unknown still, haven't
272 273
 * managed to replay successfully */

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
274
/* Purposeful off-by-one in width, height fields. For example, a (64, 64)
275
 * texture is stored as (63, 63) in these fields. This adjusts for that.
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
276 277 278
 * There's an identical pattern in the framebuffer descriptor. Even vertex
 * count fields work this way, hence the generic name -- integral fields that
 * are strictly positive generally need this adjustment. */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
279

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
280
#define MALI_POSITIVE(dim) (dim - 1)
281

282 283 284 285
/* Opposite of MALI_POSITIVE, found in the depth_units field */

#define MALI_NEGATIVE(dim) (dim + 1)

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
286
struct mali_texture_descriptor {
287 288 289
	uint16_t width;
	uint16_t height;

290
	uint32_t unknown1;
291

292
	/* No idea on format1 and format2 */
293 294 295

	uint32_t format1;

296 297
	uint32_t unknown3;

298 299
	uint32_t format2;

300 301 302 303
	uint32_t unknown5;
	uint32_t unknown6;
	uint32_t unknown7;

304 305
	mali_ptr swizzled_bitmap_0;
	mali_ptr swizzled_bitmap_1;
306 307
} __attribute__((packed));

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
/* Used as part of filter_mode */

#define MALI_GL_LINEAR 0
#define MALI_GL_NEAREST 1

/* Used to construct low bits of filter_mode */

#define MALI_GL_TEX_MAG(mode) (((mode) & 1) << 0)
#define MALI_GL_TEX_MIN(mode) (((mode) & 1) << 1)

#define MALI_GL_TEX_MAG_MASK (1)
#define MALI_GL_TEX_MIN_MASK (2)

#define MALI_FILTER_NAME(filter) (filter ? "MALI_GL_NEAREST" : "MALI_GL_LINEAR")

323 324 325 326 327 328 329 330 331 332
struct mali_sampler_descriptor {
	uint32_t filter_mode;
	
	/* Apparently the same as one of the fields in texture_descriptor, so
	 * maybe it's how linkage is specified */
	uint32_t unknown1;

	uint32_t unknown2;
} __attribute__((packed));

333 334
/* TODO: What are the floats? Apparently always { -inf, -inf, inf, inf, 0.0,
 * 1.0 }
335
 *
336 337
 * viewport0/viewport1 form the arguments to glViewport. viewport1 is modified
 * by MALI_POSITIVE; viewport0 is as-is.
338
 */
339

340
struct mali_viewport {
341
	float floats[6];
342

343 344 345
	u16 viewport0[2];
	u16 viewport1[2];
} __attribute__((packed));
346

347 348
/* TODO: I have no idea what this could possibly be, whatsoever. */

349
struct mali_unknown6 {
350 351 352 353
	u64 unknown0;
	u64 unknown1;
};

354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
/* From presentations, 16x16 tiles externally. Use shift for fast computation
 * of tile numbers. */

#define MALI_TILE_SHIFT 4
#define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT)

/* Tile coordinates are stored as a compact u32, as only 12 bits are needed to
 * each component. Notice that this provides a theoretical upper bound of (1 <<
 * 12) = 4096 tiles in each direction, addressing a maximum framebuffer of size
 * 65536x65536. Multiplying that together, times another four given that Mali
 * framebuffers are 32-bit ARGB8888, means that this upper bound would take 16
 * gigabytes of RAM just to store the uncompressed framebuffer itself, let
 * alone rendering in real-time to such a buffer.
 *
 * Nice job, guys.*/

370 371 372
/* From mali_kbase_10969_workaround.c */
#define MALI_X_COORD_MASK 0x00000FFF
#define MALI_Y_COORD_MASK 0x0FFF0000
373 374 375

/* Extract parts of a tile coordinate */

376 377 378 379
#define MALI_TILE_COORD_X(coord) ((coord) & MALI_X_COORD_MASK)
#define MALI_TILE_COORD_Y(coord) (((coord) & MALI_Y_COORD_MASK) >> 16)
#define MALI_TILE_COORD_FLAGS(coord) ((coord) & ~(MALI_X_COORD_MASK | MALI_Y_COORD_MASK))

380 381 382 383 384 385 386 387 388 389 390
/* No known flags yet, but just in case...? */

#define MALI_TILE_NO_FLAG (0)

/* Helpers to generate tile coordinates based on the boundary coordinates in
 * screen space. So, with the bounds (0, 0) to (128, 128) for the screen, these
 * functions would convert it to the bounding tiles (0, 0) to (7, 7).
 * Intentional "off-by-one"; finding the tile number is a form of fencepost
 * problem. */

#define MALI_MAKE_TILE_COORDS(X, Y, flag) ((X) | ((Y) << 16) | (flag))
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
391 392 393 394
#define MALI_BOUND_TO_TILE(B, bias) ((B - bias) >> MALI_TILE_SHIFT)
#define MALI_COORDINATE_TO_TILE(W, H, flag, bias) MALI_MAKE_TILE_COORDS(MALI_BOUND_TO_TILE(W, bias), MALI_BOUND_TO_TILE(H, bias), flag)
#define MALI_COORDINATE_TO_TILE_MIN(W, H, flag) MALI_COORDINATE_TO_TILE(W, H, flag, 0) 
#define MALI_COORDINATE_TO_TILE_MAX(W, H, flag) MALI_COORDINATE_TO_TILE(W, H, flag, 1)
395

396
struct mali_payload_fragment {
397 398 399
	/* XXX: WTF? */
	u32 zero;

400 401 402 403 404
	/* XXX: we might be able to translate these into bitfields someday, but
	 * that will only be sensible if the mask of flags is limited to
	 * 0xF0000000 or 0x0000F000. If it's 0xF000F000, feel free to just
	 * remove this comment
	 */
405 406
	u32 min_tile_coord;
	u32 max_tile_coord;
407
	mali_ptr framebuffer;
408
} __attribute__((packed));
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
409
//ASSERT_SIZEOF_TYPE(struct mali_payload_fragment, 12, 16);
410

411
/* (Single?) Framebuffer Descriptor */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
412

413 414 415 416 417 418 419
/* Flags apply to unknown2. With just MSAA_A and MSAA_B, the framebuffer is
 * configured for 4x. With MSAA_8, it is configured for 8x. */

#define MALI_FRAMEBUFFER_MSAA_8 (1 << 3)
#define MALI_FRAMEBUFFER_MSAA_A (1 << 4)
#define MALI_FRAMEBUFFER_MSAA_B (1 << 23)

420 421 422 423 424 425
/* Fast/slow based on whether all three buffers are cleared at once */

#define MALI_CLEAR_FAST         (1 << 18)
#define MALI_CLEAR_SLOW         (1 << 28)
#define MALI_CLEAR_SLOW_STENCIL (1 << 31)

426
struct mali_single_framebuffer {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
427 428
	u32 unknown1;
	u32 flags;
429
	u64 unknown_address_0;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
430 431 432 433
	u64 zero1;
	u64 heap_free_address;

	u32 unknown2; // 0xB8..
434
	u32 clear_flags;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
435
	u32 zero2;
436 437 438 439 440 441 442

	/* Purposeful off-by-one in these fields should be accounted for by the
	 * MALI_DIMENSION macro */

	u16 width;
	u16 height;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
443 444 445 446
	u32 zero3[4];

	u32 weights[8];

447 448
	/* Depth and stencil buffers are interleaved, it appears, as they are
	 * set to the same address in captures. Both fields set to zero if the
449 450 451
	 * buffer is not being cleared. Depending on GL_ENABLE magic, you might
	 * get a zero enable despite the buffer being present; that still is
	 * disabled. */
452

453
	mali_ptr depth_buffer; // not SAME_VA
454
	u64 depth_buffer_enable; 
455 456

	mali_ptr stencil_buffer; // not SAME_VA
457
	u64 stencil_buffer_enable; 
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
458 459 460 461 462 463

	u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
	u32 clear_color_2; // always equal, but unclear function?
	u32 clear_color_3; // always equal, but unclear function?
	u32 clear_color_4; // always equal, but unclear function?

464 465
	/* Set to zero if not cleared */

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
466 467 468 469 470
	float clear_depth_1; // float32, ditto
	float clear_depth_2; // float32, ditto
	float clear_depth_3; // float32, ditto
	float clear_depth_4; // float32, ditto

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
471 472 473
	u32 clear_stencil; // Exactly as it appears in OpenGL

	u32 zero6[7];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
474 475 476 477 478 479 480

	u32 unknown8; // 0x02000000
	u32 unknown9; // 0x00000001

	u64 unknown_address_1; /* Pointing towards... a zero buffer? */
	u64 unknown_address_2;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
481 482 483
	/* Determined by symmetry with the replay soft job, documented in the kernel */
	u64 tiler_jc_list;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
484
	u64 unknown_address_4;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
485 486 487 488

	/* More below this, maybe */
} __attribute__((packed));

489 490
/* Multi? Framebuffer Descriptor */

491 492 493 494
struct mali_tentative_mfbd {
	u64 blah; /* XXX: what the fuck is this? */
	/* This GPU address is unknown, except for the fact there's something
	 * executable here... */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
495
	u64 ugaT;
496 497 498 499
	u32 block1[10];
	u32 unknown1;
	u32 flags;
	u8 block2[16];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
500 501
	u64 heap_free_address;
	u64 unknown2;
502
	u32 weights[MALI_FBD_HIERARCHY_WEIGHTS];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
503
	u64 unknown_gpu_addressN;
504
	u8 block3[88];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
505
	u64 unknown_gpu_address;
506 507 508 509 510 511 512
	u64 unknown3;
	u8 block4[40];
} __attribute__((packed));

/* Originally from chai, which found it from mali_kase_reply.c */

#endif /* __MALI_JOB_H__ */