From 3485b8dc78d8515665158c606b48e7ce21eae1f2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 18:12:31 -0400 Subject: [PATCH 01/31] pan/bi: Use consistent modifier lists in packing If there are modifiers only used by pseudo instructions, not the real instructions, bi_packer can get out-of-sync with bi_opcodes, causing hard-to-debug issues. Do the stupid-simple thing to ensure this doesn't happen. This may be a temporary issue, depending whether ISA.xml and the IR get split out for better Valhall support. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bi_packer.c.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/panfrost/bifrost/bi_packer.c.py b/src/panfrost/bifrost/bi_packer.c.py index c014519f5ccf..601750e2aa3b 100644 --- a/src/panfrost/bifrost/bi_packer.c.py +++ b/src/panfrost/bifrost/bi_packer.c.py @@ -24,9 +24,14 @@ import sys from bifrost_isa import * from mako.template import Template +# Consider pseudo instructions when getting the modifier list +instructions_with_pseudo = parse_instructions(sys.argv[1], include_pseudo = True) +ir_instructions_with_pseudo = partition_mnemonics(instructions_with_pseudo) +modifier_lists = order_modifiers(ir_instructions_with_pseudo) + +# ...but strip for packing instructions = parse_instructions(sys.argv[1]) ir_instructions = partition_mnemonics(instructions) -modifier_lists = order_modifiers(ir_instructions) # Packs sources into an argument. Offset argument to work around a quirk of our # compiler IR when dealing with staging registers (TODO: reorder in the IR to -- GitLab From b70a7c97bb030f52e73ab93261b4ff09d517db44 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 09:22:40 -0400 Subject: [PATCH 02/31] pan/bi: Gate late DCE/CSE on "optimize" Otherwise we can end up with unlowered ATOM.i32 on Valhall. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 6b98657dba1e..b7f1f8ad8e75 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4125,8 +4125,10 @@ bi_compile_variant_nir(nir_shader *nir, bi_lower_fau(ctx); /* Lowering FAU can create redundant moves. Run CSE+DCE to clean up. */ - bi_opt_cse(ctx); - bi_opt_dead_code_eliminate(ctx); + if (likely(optimize)) { + bi_opt_cse(ctx); + bi_opt_dead_code_eliminate(ctx); + } /* Analyze before register allocation to avoid false dependencies. The * skip bit is a function of only the data flow graph and is invariant -- GitLab From 7983a0d0dce409d4a9f20ff25365ef236adfa2c2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 22 Mar 2022 21:47:27 -0400 Subject: [PATCH 03/31] pan/bi: Rename PATOM_C to ATOM This is basically what's native on Valhall. Use the Valhall naming for the pseudo-instruction on Bifrost for consistency. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/ISA.xml | 8 ++++---- src/panfrost/bifrost/bi_schedule.c | 4 ++-- src/panfrost/bifrost/bifrost_compile.c | 10 +++++----- src/panfrost/bifrost/bir.c | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index 4d820fd719e4..0fad891ad264 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -8233,11 +8233,11 @@ - - + + - + aadd @@ -8258,7 +8258,7 @@ - + diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index efcd96161418..01bf6299be0b 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -1234,9 +1234,9 @@ bi_take_instr(bi_context *ctx, struct bi_worklist st, { if (tuple->add && tuple->add->op == BI_OPCODE_CUBEFACE) return bi_lower_cubeface(ctx, clause, tuple); - else if (tuple->add && tuple->add->op == BI_OPCODE_PATOM_C_I32) + else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM_RETURN_I32) return bi_lower_atom_c(ctx, clause, tuple); - else if (tuple->add && tuple->add->op == BI_OPCODE_PATOM_C1_I32) + else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM1_RETURN_I32) return bi_lower_atom_c1(ctx, clause, tuple); else if (tuple->add && tuple->add->op == BI_OPCODE_SEG_ADD_I64) return bi_lower_seg_add(ctx, clause, tuple); diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index b7f1f8ad8e75..ca921e9c42ae 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1094,14 +1094,14 @@ bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, enum bi_atom_opc opc = bi_atom_opc_for_nir(intrinsic); enum bi_atom_opc post_opc = opc; - /* Generate either ATOM_C or ATOM_C1 as required */ + /* Generate either ATOM or ATOM1 as required */ if (bi_promote_atom_c1(opc, arg, &opc)) { - bi_patom_c1_i32_to(b, sr, bi_word(addr, 0), - bi_word(addr, 1), opc, 2); + bi_atom1_return_i32_to(b, sr, bi_word(addr, 0), + bi_word(addr, 1), opc, 2); } else { bi_mov_i32_to(b, sr, arg); - bi_patom_c_i32_to(b, sr, sr, bi_word(addr, 0), - bi_word(addr, 1), opc, 2); + bi_atom_return_i32_to(b, sr, sr, bi_word(addr, 0), + bi_word(addr, 1), opc, 2); } /* Post-process it */ diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index 178950d27f39..45b34badf913 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -86,8 +86,8 @@ bi_count_staging_registers(const bi_instr *ins) unsigned bi_count_read_registers(const bi_instr *ins, unsigned s) { - /* PATOM_C reads 1 but writes 2 */ - if (s == 0 && ins->op == BI_OPCODE_PATOM_C_I32) + /* ATOM reads 1 but writes 2 */ + if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32) return 1; else if (s == 0 && bi_opcode_props[ins->op].sr_read) return bi_count_staging_registers(ins); -- GitLab From 90867e82040c35e8a94af78c1992d76b326bef17 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 22 Mar 2022 21:53:04 -0400 Subject: [PATCH 04/31] pan/bi: Add ATOM_RETURN pseudo-instruction Allows modeling Valhall's atomics better. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/ISA.xml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index 0fad891ad264..d64b4c1d3f61 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -8272,6 +8272,30 @@ + + + + + + + aadd + + + + + + asmin + asmax + aumin + aumax + aand + aor + axor + + + + + -- GitLab From cfde0275e4afe7e48c4f4f9b97f1c85894e47d2f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 22 Mar 2022 22:30:11 -0400 Subject: [PATCH 05/31] pan/bi: Model Valhall-style A(CMP)XCHG Handled consistently with computational atomics. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/ISA.xml | 2 ++ src/panfrost/bifrost/bir.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index d64b4c1d3f61..c4a8c9417401 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -8253,6 +8253,8 @@ aand aor axor + axchg + acmpxchg diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index 45b34badf913..86e42fa7a8c5 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -86,9 +86,9 @@ bi_count_staging_registers(const bi_instr *ins) unsigned bi_count_read_registers(const bi_instr *ins, unsigned s) { - /* ATOM reads 1 but writes 2 */ + /* ATOM reads 1 but writes 2. Exception for ACMPXCHG */ if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32) - return 1; + return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1; else if (s == 0 && bi_opcode_props[ins->op].sr_read) return bi_count_staging_registers(ins); else if (s == 4 && ins->op == BI_OPCODE_BLEND) -- GitLab From 1b7d7ebbab78063478a1b3b243cf3c56bed664f6 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Mar 2022 12:15:37 -0400 Subject: [PATCH 06/31] pan/bi: Allow branch_offset on BLEND Required to model BLEND accurately on Valhall, where it encodes a special relative branch... Midgard style! Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/compiler.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 62d86bdbb394..f60a400c9a2f 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -424,8 +424,6 @@ typedef struct { uint32_t fill; uint32_t index; uint32_t attribute_index; - int32_t byte_offset; - int32_t branch_offset; struct { uint32_t varying_index; @@ -437,6 +435,14 @@ typedef struct { struct { uint32_t sr_count; uint32_t sr_count_2; + + union { + /* Atomics effectively require all three */ + int32_t byte_offset; + + /* BLEND requires all three */ + int32_t branch_offset; + }; }; }; -- GitLab From c7f6b973b210fd27ae677f7cc907cbbdde3cdbe6 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Mar 2022 12:31:28 -0400 Subject: [PATCH 07/31] pan/bi: Check return addresses in blend shaders Required on Valhall, where jumping to 0x0 doesn't automatically terminate the program. Luckily the check is free there too. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index ca921e9c42ae..fd7d2f108798 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -674,9 +674,15 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr) if (b->shader->inputs->is_blend) { /* Jump back to the fragment shader, return address is stored - * in r48 (see above). + * in r48 (see above). On Valhall, only jump if the address is + * nonzero. The check is free there and it implements the "jump + * to 0 terminates the blend shader" that's automatic on + * Bifrost. */ - bi_jump(b, bi_register(48)); + if (b->shader->arch >= 8) + bi_branchzi(b, bi_register(48), bi_register(48), BI_CMPF_NE); + else + bi_jump(b, bi_register(48)); } } -- GitLab From 97a13d6424e788094fd7568c5673f2f7da4e497f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 17:20:33 -0400 Subject: [PATCH 08/31] pan/bi: Augment ST_TILE with register format To model its Valhall incarnation. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/ISA.xml | 6 ++++++ src/panfrost/bifrost/bifrost_compile.c | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index c4a8c9417401..0232347ac007 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -7709,6 +7709,12 @@ v3 v4 + + f32 + f16 + u32 + s32 + diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index fd7d2f108798..d168648cbb1a 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -523,7 +523,8 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, /* Conversion descriptor comes from the compile inputs, pixel * indices derived at run time based on sample ID */ bi_st_tile(b, rgba, bi_pixel_indices(b, rt), bi_register(60), - bi_imm_u32(blend_desc >> 32), BI_VECSIZE_V4); + bi_imm_u32(blend_desc >> 32), + regfmt, BI_VECSIZE_V4); } else if (b->shader->inputs->is_blend) { /* Blend descriptor comes from the compile inputs */ /* Put the result in r0 */ -- GitLab From f5585700be6da8fa87827fe7a172d178fcbe79f1 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 11:57:11 -0400 Subject: [PATCH 09/31] pan/bi: Model LD_VAR_BUF instructions These are indirect versions of LD_VAR_BUF_IMM, taking their index in bytes. Used for indirect varying loads (the NIR lowering is inefficient). Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/ISA.xml | 60 ++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index 0232347ac007..0ae1fba0d34f 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -8598,6 +8598,36 @@ + + + + + none + v2 + v3 + v4 + + + store + retrieve + conditional + clobber + + + f32 + f16 + u32 + u16 + + + center + centroid + sample + explicit + none + + + @@ -8628,6 +8658,36 @@ + + + + + none + v2 + v3 + v4 + + + store + retrieve + conditional + clobber + + + f32 + f16 + u32 + u16 + + + center + centroid + sample + explicit + none + + + -- GitLab From fa841273d4f8902e6709fa6a8a69783512456f3c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 24 Mar 2022 18:10:23 -0400 Subject: [PATCH 10/31] pan/bi: Rename I->action to I->flow For consistency with the Valhall ISA. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index f60a400c9a2f..8009b1ad6fa2 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -390,8 +390,8 @@ typedef struct { enum bi_register_format register_format; enum bi_vecsize vecsize; - /* Action (flow control) associated with a Valhall instruction */ - uint8_t action; + /* Flow control associated with a Valhall instruction */ + uint8_t flow; /* Can we spill the value written here? Used to prevent * useless double fills */ -- GitLab From e8590e0d0438c31def4e2032fd65efb62a26ebc1 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 16:45:35 -0400 Subject: [PATCH 11/31] pan/va: Add ST_TILE instruction Encoded like LD_TILE, required for some MSAA blend shaders. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/ISA.xml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index 84fb00fcea6c..eb68da7e25dc 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -1233,6 +1233,22 @@ Conversion descriptor + + + Store to given render target, specified in the pixel indices descriptor, at + a given location and sample, and convert to the format specified in the + internal conversion descriptor. Used internally in blend shaders. + + + + + + + Pixel indices descriptor + Coverage mask + Conversion descriptor + + Blends a given render target. This loads the API-specified blend state for -- GitLab From 295b802f64bc0385423360653a3f069b810666e2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 11:57:52 -0400 Subject: [PATCH 12/31] pan/va: Add LD_VAR_BUF instructions Like LD_VAR_BUF_IMM but indirect. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/ISA.xml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index eb68da7e25dc..b9ec25ac2591 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -866,6 +866,21 @@ + + Interpolates a given varying from hardware buffer + + + + + + + + + + + + + Load `vecsize` components from the attribute descriptor at entry `index` -- GitLab From cf6d1a81f682c240fcc711ce76efbec7a07efbef Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 23 Mar 2022 20:29:08 -0400 Subject: [PATCH 13/31] pan/va: Add Bifrost-style LD_VAR instructions For use in the legacy non-MALLOC_IDVS flow. Especially useful in blit shaders. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/ISA.xml | 48 ++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index b9ec25ac2591..c645f7a8ea1f 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -881,6 +881,54 @@ + + Interpolates a given varying from a software buffer + + + + + + + + + Varying index and table + + + + Interpolates a given varying from a software buffer + + + + + + + + + + + + + + Fetches a given varying from a software buffer + + + + + + Varying index and table + + + + Fetches a given varying from a software buffer + + + + + + + + + Load `vecsize` components from the attribute descriptor at entry `index` -- GitLab From 76487c7eb48dedf973037d5285a09bd6ca9fe349 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 24 Mar 2022 18:08:27 -0400 Subject: [PATCH 14/31] pan/va: Unify flow control Group together dependency waits and flow control into a single enum. This simplifies the code, clarifies some detail, and ensures consistency moving forward. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/ISA.xml | 43 +++++++++++-------- src/panfrost/bifrost/valhall/asm.py | 30 +++---------- src/panfrost/bifrost/valhall/disasm.py | 32 +------------- .../bifrost/valhall/test/assembler-cases.txt | 34 +++++++-------- 4 files changed, 49 insertions(+), 90 deletions(-) diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index c645f7a8ea1f..e276d6311f58 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -64,34 +64,41 @@ 0x42480000 - + - Every Valhall instruction can perform an action, like wait on dependency - slots. A few special actions are available, specified in the instruction - metadata from this enum. The `wait0126` action is required to wait on + Every Valhall instruction can wait on dependency + slots. A few special flows are available, specified in the instruction + metadata from this enum. The `wait0126` flow is required to wait on dependency slot #6 and should be set on the instruction immediately - preceding `ATEST`. The `barrier` action may be set on any instruction for - subgroup barriers, and should particularly be set with the `BARRIER` - instruction for global barriers. The `td` action only applies to fragment - shaders and is used to terminate helper invocations, it should be set as - early as possible after helper invocations are no longer needed as - determined by data flow analysis. The `return` action is used to terminate - the shader, although it may be overloaded by the `BLEND` instruction. - - The `reconverge` action is required on any instruction immediately + preceding `ATEST`. The `wait` flow should be set for barriers. + The `discard` flow only applies to fragment shaders and is used to + terminate helper invocations, it should be set as early as possible after + helper invocations are no longer needed as determined by data flow + analysis. The `end` flow is used to terminate the shader, although it + may be overloaded by the `BLEND` instruction. + + The `reconverge` flow is required on any instruction immediately preceding a possible change to the mask of active threads in a subgroup. This includes all divergent branches, but it also includes the final instruction at the end of any basic block where the immediate successor (fallthrough) is the target of a divergent branch. - wait0126 - barrier + none + wait0 + wait1 + wait01 + wait2 + wait02 + wait12 + wait012 + wait0126 + wait reconverge - td + discard - return + end @@ -799,7 +806,7 @@ General-purpose barrier. Must use slot #7. Must be paired with a - `.barrier` action on the instruction. + `.wait` flow on the instruction. diff --git a/src/panfrost/bifrost/valhall/asm.py b/src/panfrost/bifrost/valhall/asm.py index 34b81331dcae..fbaedf83905d 100644 --- a/src/panfrost/bifrost/valhall/asm.py +++ b/src/panfrost/bifrost/valhall/asm.py @@ -339,35 +339,15 @@ def parse_asm(line): encoded |= (fau.page << 57) # Encode modifiers - has_action = False + has_flow = False for mod in mods: if len(mod) == 0: continue - if mod in enums['action'].bare_values: - die_if(has_action, "Multiple actions specified") - has_action = True - encoded |= (enums['action'].bare_values.index(mod) << 59) - encoded |= (1 << 62) # Action, not wait - elif mod.startswith('wait'): - die_if(has_action, "Multiple actions specified") - has_action = True - - slots = mod[len('wait'):] - try: - slots = set([int(x) for x in slots]) - except ValueError: - die(f"Expected slots in {mod}") - - known_slots = set([0, 1, 2]) - die_if(not slots.issubset(known_slots), f"Unknown slots in {mod}") - - if 0 in slots: - encoded |= (1 << 59) - if 1 in slots: - encoded |= (1 << 60) - if 2 in slots: - encoded |= (1 << 61) + if mod in enums['flow'].bare_values: + die_if(has_flow, "Multiple flow control modifiers specified") + has_flow = True + encoded |= (enums['flow'].bare_values.index(mod) << 59) else: candidates = [c for c in ins.modifiers if mod in c.bare_values] diff --git a/src/panfrost/bifrost/valhall/disasm.py b/src/panfrost/bifrost/valhall/disasm.py index 30400b305646..30c98b4a94df 100644 --- a/src/panfrost/bifrost/valhall/disasm.py +++ b/src/panfrost/bifrost/valhall/disasm.py @@ -42,34 +42,6 @@ static const uint32_t va_immediates[32] = { % endfor }; -/* Byte 7 has instruction metadata, analogous to Bifrost's clause header */ -struct va_metadata { - bool opcode_high : 1; - unsigned immediate_mode : 2; - unsigned action : 3; - bool do_action : 1; - bool unk3 : 1; -} __attribute__((packed)); - -static inline void -va_print_metadata(FILE *fp, uint8_t meta) -{ - struct va_metadata m; - memcpy(&m, &meta, 1); - - if (m.do_action) { - fputs(valhall_action[m.action], fp); - } else if (m.action) { - fprintf(fp, ".wait%s%s%s", - m.action & (1 << 0) ? "0" : "", - m.action & (1 << 1) ? "1" : "", - m.action & (1 << 2) ? "2" : ""); - } - - if (m.unk3) - fprintf(fp, ".unk3"); -} - static inline void va_print_src(FILE *fp, uint8_t src, unsigned fau_page) { @@ -153,8 +125,8 @@ va_disasm_instr(FILE *fp, uint64_t instr) % endif % endif % endfor - va_print_metadata(fp, instr >> 56); - fputs(" ", fp); + assert((instr & (1ull << 63)) == 0 /* reserved */); + fprintf(fp, "%s ", valhall_flow[instr >> 59]); % if len(op.dests) > 0: <% no_comma = False %> va_print_dest(fp, (instr >> 40), true); diff --git a/src/panfrost/bifrost/valhall/test/assembler-cases.txt b/src/panfrost/bifrost/valhall/test/assembler-cases.txt index 2d82e370fdc5..2aebbe2460e7 100644 --- a/src/panfrost/bifrost/valhall/test/assembler-cases.txt +++ b/src/panfrost/bifrost/valhall/test/assembler-cases.txt @@ -28,8 +28,8 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0 82 3c 27 20 00 c0 a3 01 SHADDX.u64 r0, u2, r60.w0, shift:0x2 40 00 00 18 82 80 60 08 LOAD.i32.unsigned.slot0.wait0 @r0, `r0, offset:0 80 7c 47 20 00 c0 a3 01 SHADDX.u64 r0, u0, `r60.w0, shift:0x4 -40 00 00 38 08 44 61 78 STORE.i128.slot0.return @r4:r5:r6:r7, `r0, offset:0 -00 00 00 00 00 c0 00 78 NOP.return +40 00 00 38 08 44 61 78 STORE.i128.slot0.end @r4:r5:r6:r7, `r0, offset:0 +00 00 00 00 00 c0 00 78 NOP.end 40 c4 c0 9c 01 c1 f0 00 ICMP.u32.gt.m1 r1, `r0, 0x1000000.b3, 0x0 42 00 00 18 02 40 61 50 STORE.i32.slot0.reconverge @r0, `r2, offset:0 00 c9 8f 12 30 c0 a0 00 CLPER.i32.f1 r0, r0, 0x7060504.b0 @@ -46,18 +46,18 @@ e6 00 00 00 00 c1 91 06 MOV.i32 r1, core_id.w0 40 00 0b 10 00 c3 90 00 F16_TO_F32 r3, `r0.h1 00 00 00 00 00 c0 00 40 NOP.wait0126 42 43 04 00 00 c0 a5 00 V2F32_TO_V2F16 r0, `r2, `r3 -40 c0 00 28 90 c0 a5 48 FADD.v2f16.barrier r0, `r0.abs, 0x0.neg +40 c0 00 28 90 c0 a5 48 FADD.v2f16.wait r0, `r0.abs, 0x0.neg c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0 -3c d0 ea 00 02 bc 7d 68 ATEST.td @r60, r60, 0x3F800000, atest_datum.w0 +3c d0 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, 0x3F800000, atest_datum.w0 40 db 05 04 00 c1 a1 00 MKVEC.v2i16 r1, `r0.h00, 0x3C000000.h10 -f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.return @r0:r1, blend_descriptor_0.w0, r60, target:0x0 +f0 00 3c 33 04 40 7f 78 BLEND.slot0.v4.f16.end @r0:r1, blend_descriptor_0.w0, r60, target:0x0 7b 0d 00 40 04 84 5e 08 LEA_BUF_IMM.slot1.wait0 @r4:r5, `r59, table:0xD, index:0x0 00 dd c0 08 14 c2 b2 00 FMA.f32 r2, r0, 0x44000000.neg.h1, 0x0.neg 41 88 c0 00 04 c1 b2 00 FMA.f32 r1, `r1, u8, 0x0.neg 40 88 c0 00 04 c0 b2 10 FMA.f32.wait1 r0, `r0, u8, 0x0.neg -44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.return @r0:r1:r2, `r4, offset:0 -44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.return @r8:r9:r10:r11, `r4, offset:0 -00 00 00 c0 01 c0 45 48 BARRIER.slot7.barrier +44 00 00 32 06 40 61 78 STORE.i96.estream.slot0.end @r0:r1:r2, `r4, offset:0 +44 00 00 39 08 48 61 78 STORE.i128.istream.slot0.end @r8:r9:r10:r11, `r4, offset:0 +00 00 00 c0 01 c0 45 48 BARRIER.slot7.wait 80 00 00 00 82 82 60 00 LOAD.i8.unsigned.slot0 @r2, u0, offset:0 80 00 00 08 82 82 60 00 LOAD.i16.unsigned.slot0 @r2, u0, offset:0 80 00 00 10 82 82 60 00 LOAD.i24.unsigned.slot0 @r2, u0, offset:0 @@ -106,9 +106,9 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1 42 00 00 38 08 44 61 00 STORE.i128.slot0 @r4:r5:r6:r7, `r2, offset:0 41 f8 ff ff 07 c0 1f 50 BRANCHZ.reconverge `r1, offset:-8 7d c0 00 08 10 bc a1 00 IADD.v2u16 r60.h1, `r61.h10, 0x0 -44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0 -44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0 -44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.return @r0:r1:r2:r3, `r4, `r6, offset:0x0 +44 00 46 32 28 40 71 78 ST_CVT.slot0.istream.v4.f32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0 +44 00 46 34 28 40 71 78 ST_CVT.slot0.istream.v4.s32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0 +44 00 46 36 28 40 71 78 ST_CVT.slot0.istream.v4.u32.end @r0:r1:r2:r3, `r4, `r6, offset:0x0 7c c0 12 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x1 7c c0 02 00 26 84 67 00 LEA_TEX_IMM.slot0 @r4:r5:r6, `r60, 0x0, table:0x2, index:0x0 82 81 00 28 f4 82 6a 00 LD_BUFFER.i64.unsigned.slot0 @r2:r3, u2, u1 @@ -123,7 +123,7 @@ c0 01 00 00 00 c4 10 51 IADD_IMM.i32.reconverge r4, 0x0, #0x1 40 44 80 00 01 c0 b8 00 MUX.i32 r0, `r0, `r4, u0 40 44 80 00 02 c0 b8 00 MUX.i32.fp_zero r0, `r0, `r4, u0 40 44 80 00 03 c0 b8 00 MUX.i32.bit r0, `r0, `r4, u0 -00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.td r1, r0 +00 00 00 01 00 c1 99 68 FREXPM.f32.sqrt.discard r1, r0 01 00 02 00 00 c2 9c 00 FRSQ.f32 r2, r1 40 00 02 01 00 c0 99 00 FREXPE.f32.sqrt r0, `r0 41 42 c0 40 04 c0 62 41 FMA_RSCALE_LEFT.f32.wait0126 r0, `r1, `r2, 0x0.neg, `r0 @@ -180,14 +180,14 @@ c0 77 01 0c 00 c2 a8 00 ISUB.s32 r2, 0x0, `r55.h1 00 00 03 00 20 c1 90 00 V2S8_TO_V2F16 r1, r0.b20 40 00 03 00 60 c0 90 00 V2S8_TO_V2F16 r0, `r0.b21 -3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.td @r0:r1:r2:r3, r61, index:0x0 +3d 00 00 b2 88 80 5c 68 LD_VAR_BUF_IMM.f32.slot2.v4.src_f32.sample.store.discard @r0:r1:r2:r3, r61, index:0x0 3d 00 10 72 18 84 5c 00 LD_VAR_BUF_IMM.f32.slot1.v4.src_f32.center.retrieve @r4:r5:r6:r7, r61, index:0x10 c0 00 00 00 00 c8 10 01 IADD_IMM.i32 r8, 0x0, #0x0 c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0 3d 00 14 00 00 ca 90 00 U16_TO_U32 r10, r61.h00 3d 09 00 00 30 c0 1f 50 BRANCHZ.eq.reconverge r61.h0, offset:9 0a 00 00 00 00 cb 91 50 MOV.i32.reconverge r11, r10 -00 00 00 00 00 c0 00 48 NOP.barrier +00 00 00 00 00 c0 00 48 NOP.wait 81 0b 80 33 04 8e 78 00 LD_TILE.v4.f16.slot0 @r14:r15, u1, r11, u0 0b 00 04 00 00 cc 91 00 CLZ.u32 r12, r11 82 4c c0 52 00 cc b4 00 RSHIFT_XOR.i32.not_result r12, u2, `r12.b00, 0x0 @@ -202,15 +202,15 @@ c0 00 00 00 00 c9 10 01 IADD_IMM.i32 r9, 0x0, #0x0 49 3e c0 22 04 c9 b3 30 FMA.v2f16.wait12 r9, `r9, r62.h00, 0x0.neg 47 43 00 00 00 c3 a4 00 FADD.f32 r3, `r7, `r3 43 09 00 08 00 c3 a4 40 FADD.f32.wait0126 r3, `r3, r9.h1 -3c 03 ea 00 02 bc 7d 68 ATEST.td @r60, r60, r3, atest_datum.w0 +3c 03 ea 00 02 bc 7d 68 ATEST.discard @r60, r60, r3, atest_datum.w0 46 42 00 00 00 c2 a4 00 FADD.f32 r2, `r6, `r2 44 40 00 00 00 c0 a4 00 FADD.f32 r0, `r4, `r0 48 7e c0 22 04 ff b3 00 FMA.v2f16 r63, `r8, `r62.h00, 0x0.neg 45 41 00 00 00 c1 a4 00 FADD.f32 r1, `r5, `r1 41 3f 00 08 00 c1 a4 00 FADD.f32 r1, `r1, r63.h1 40 7f 00 04 00 c0 a4 00 FADD.f32 r0, `r0, `r63.h0 -42 49 00 04 00 c2 a4 48 FADD.f32.barrier r2, `r2, `r9.h0 -f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.return @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0 +42 49 00 04 00 c2 a4 48 FADD.f32.wait r2, `r2, `r9.h0 +f0 00 3c 32 08 40 7f 78 BLEND.slot0.v4.f32.end @r0:r1:r2:r3, blend_descriptor_0.w0, r60, target:0x0 c0 00 00 00 00 f6 10 01 IADD_IMM.i32 r54, 0x0, #0x0 c0 f1 00 00 10 c1 2f 08 BRANCHZI.eq.absolute.wait0 0x0, blend_descriptor_0.w1 80 00 c0 17 34 7c 25 01 TEX_FETCH.slot0.f.32.2d @r0:r1:r2:r3, @r60:r61, u0 -- GitLab From 40ed485e32e1d51eec2b8ee790b95f031d1fee84 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 25 Feb 2022 11:52:41 -0500 Subject: [PATCH 15/31] pan/va: Permit encoding more flags Missed the first time around. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/valhall.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/panfrost/bifrost/valhall/valhall.py b/src/panfrost/bifrost/valhall/valhall.py index 6fd89cd8ce21..9ae7000850b1 100644 --- a/src/panfrost/bifrost/valhall/valhall.py +++ b/src/panfrost/bifrost/valhall/valhall.py @@ -161,6 +161,8 @@ class Staging: self.widen = False self.lanes = False self.lane = False + self.halfswizzle = False + self.combine = False self.size = 32 if not self.flags: -- GitLab From 7ad98ae96ec48c157d65c717d4a36bb9e98deec8 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jul 2021 12:04:13 -0400 Subject: [PATCH 16/31] pan/va: Build opcode info structures Filled out the new structures from XML. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 6 +- src/panfrost/bifrost/valhall/meson.build | 9 ++ src/panfrost/bifrost/valhall/valhall.c.py | 166 ++++++++++++++++++++++ src/panfrost/bifrost/valhall/valhall.h | 112 +++++++++++++++ 4 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 src/panfrost/bifrost/valhall/valhall.c.py create mode 100644 src/panfrost/bifrost/valhall/valhall.h diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 97d39575ec5f..7a1df35eacd0 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -21,6 +21,8 @@ subdir('valhall') +inc_valhall = include_directories(['.', 'valhall']) + libpanfrost_bifrost_files = files( 'bi_helper_invocations.c', 'bi_layout.c', @@ -134,8 +136,8 @@ libpanfrost_bifrost_disasm = static_library( libpanfrost_bifrost = static_library( 'panfrost_bifrost', - [libpanfrost_bifrost_files, bi_opcodes_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c], - include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw], + [libpanfrost_bifrost_files, bi_opcodes_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c], + include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw, inc_valhall], dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h], link_with: [libpanfrost_util, libpanfrost_bifrost_disasm, libpanfrost_valhall_disasm], c_args : [no_override_init_args], diff --git a/src/panfrost/bifrost/valhall/meson.build b/src/panfrost/bifrost/valhall/meson.build index ef8ea8cfec5d..e78a4ab4d975 100644 --- a/src/panfrost/bifrost/valhall/meson.build +++ b/src/panfrost/bifrost/valhall/meson.build @@ -19,6 +19,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +valhall_c = custom_target( + 'valhall_c', + input : ['valhall.c.py', 'ISA.xml'], + output : 'valhall.c', + command : [prog_python, '@INPUT@'], + capture : true, + depend_files : files('valhall.py'), +) + valhall_disasm_c = custom_target( 'valhall_disasm_c', input : ['disasm.py', 'ISA.xml'], diff --git a/src/panfrost/bifrost/valhall/valhall.c.py b/src/panfrost/bifrost/valhall/valhall.c.py new file mode 100644 index 000000000000..18ccf9e19fe6 --- /dev/null +++ b/src/panfrost/bifrost/valhall/valhall.c.py @@ -0,0 +1,166 @@ +#encoding=utf-8 + +# Copyright (C) 2021 Collabora, Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +from valhall import immediates, instructions, typesize +from mako.template import Template +from mako import exceptions + +SKIP = set([ + # Extra conversions + "S8_TO_S16", + "S8_TO_F16", + "U8_TO_U16", + "U8_TO_F16", + + # Saturating multiplies + "IMUL.s32", + "IMUL.v2s16", + "IMUL.v4s8", + + # 64-bit support + "NOT.i64", + "IADD.u64", + "IADD.s64", + "ISUB.u64", + "ISUB.s64", + "IMULD.u64", + "SHADDX.u64", + "SHADDX.s64", + "IMULD.u64", + "CLPER.s64", + "CLPER.u64", + "LSHIFT_AND.i64", + "RSHIFT_AND.i64", + "LSHIFT_OR.i64", + "RSHIFT_OR.i64", + "LSHIFT_XOR.i64", + "RSHIFT_XOR.i64", + "ATOM.i64", + "ATOM_RETURN.i64", + "ATOM1_RETURN.i64", + + # CLPER widens + "CLPER.s32", + "CLPER.v2s16", + "CLPER.v4s8", + "CLPER.v2u16", + "CLPER.v4u8", + + # Special cased + "FMA_RSCALE_N.f32", + "FMA_RSCALE_LEFT.f32", + "FMA_RSCALE_SCALE16.f32", + + # Deprecated instruction + "NOT.i32", + + # TODO + "IDP.v4s8", + "IDP.v4u8", + "TEX_DUAL", + "TODO.VAR_TEX", + ]) + +template = """ +#include "valhall.h" +#include "bi_opcodes.h" + +const uint32_t valhall_immediates[32] = { +% for imm in immediates: + ${hex(imm)}, +% endfor +}; + +<% +def ibool(x): + return '1' if x else '0' + +def hasmod(x, mod): + return ibool(any([x.name == mod for x in op.modifiers])) + +%> +const struct va_opcode_info +valhall_opcodes[BI_NUM_OPCODES] = { +% for op in instructions: +% if op.name not in skip: +<% + name = op.name + if name == 'BRANCHZ': + name = 'BRANCHZ.i16' + elif name == 'CUBEFACE2': + name = 'CUBEFACE2_V9' + + sr_control = 0 + + if len(op.staging) > 0: + sr_control = op.staging[0].encoded_flags >> 6 +%> + [BI_OPCODE_${name.replace('.', '_').upper()}] = { + .exact = ${hex(exact(op))}ULL, + .srcs = { +% for src in ([sr for sr in op.staging if sr.read] + op.srcs): + { + .absneg = ${ibool(src.absneg)}, + .swizzle = ${ibool(src.swizzle)}, + .notted = ${ibool(src.notted)}, + .widen = ${ibool(src.widen)}, + .lanes = ${ibool(src.lanes)}, + .halfswizzle = ${ibool(src.halfswizzle)}, + .lane = ${ibool(src.lane)}, + .combine = ${ibool(src.combine)}, +% if src.size in [8, 16, 32, 64]: + .size = VA_SIZE_${src.size}, +% endif + }, +% endfor + }, + .type_size = ${typesize(op.name)}, + .has_dest = ${ibool(len(op.dests) > 0)}, + .unit = VA_UNIT_${op.unit}, + .nr_srcs = ${len(op.srcs)}, + .nr_staging_srcs = ${sum([sr.read for sr in op.staging])}, + .nr_staging_dests = ${sum([sr.write for sr in op.staging])}, + .clamp = ${hasmod(x, 'clamp')}, + .round_mode = ${hasmod(x, 'round_mode')}, + .condition = ${hasmod(x, 'condition')}, + .result_type = ${hasmod(x, 'result_type')}, + .vecsize = ${hasmod(x, 'vector_size')}, + .register_format = ${hasmod(x, 'register_format')}, + .slot = ${hasmod(x, 'slot')}, + .sr_count = ${hasmod(x, 'staging_register_count')}, + .sr_write_count = ${hasmod(x, 'staging_register_write_count')}, + .sr_control = ${sr_control}, + }, +% endif +% endfor +}; +""" + +# Exact value to be ORed in to every opcode +def exact_op(op): + return (op.opcode << 48) | (op.opcode2 << op.secondary_shift) + +try: + print(Template(template).render(immediates = immediates, instructions = instructions, skip = SKIP, exact = exact_op, typesize = typesize)) +except: + print(exceptions.text_error_template().render()) diff --git a/src/panfrost/bifrost/valhall/valhall.h b/src/panfrost/bifrost/valhall/valhall.h new file mode 100644 index 000000000000..debf1f740e74 --- /dev/null +++ b/src/panfrost/bifrost/valhall/valhall.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#ifndef __VALHALL_H +#define __VALHALL_H + +#include +#include "bi_opcodes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const uint32_t valhall_immediates[32]; + +enum va_size { + VA_SIZE_8 = 0, + VA_SIZE_16 = 1, + VA_SIZE_32 = 2, + VA_SIZE_64 = 3, +}; + +enum va_unit { + /** Fused floating-point multiply-add */ + VA_UNIT_FMA = 0, + + /** Type conversion and basic arithmetic */ + VA_UNIT_CVT = 1, + + /** Special function unit */ + VA_UNIT_SFU = 2, + + /** Varying */ + VA_UNIT_V = 3, + + /** General load/store */ + VA_UNIT_LS = 4, + + /** Texture */ + VA_UNIT_T = 5, + + /** Fused varying and texture */ + VA_UNIT_VT = 6, + + /** Produces a message for a unit not otherwise specified */ + VA_UNIT_NONE = 7 +}; + +struct va_src_info { + bool absneg : 1; + bool swizzle : 1; + bool notted : 1; + bool lane : 1; + bool lanes : 1; + bool halfswizzle : 1; + bool widen : 1; + bool combine : 1; + enum va_size size : 2; +} __attribute__((packed)); + +struct va_opcode_info { + uint64_t exact; + struct va_src_info srcs[4]; + uint8_t type_size : 8; + enum va_unit unit : 3; + unsigned nr_srcs : 3; + unsigned nr_staging_srcs : 2; + unsigned nr_staging_dests : 2; + bool has_dest : 1; + bool clamp : 1; + bool round_mode : 1; + bool condition : 1; + bool result_type : 1; + bool vecsize : 1; + bool register_format : 1; + bool slot : 1; + bool sr_count : 1; + bool sr_write_count : 1; + unsigned sr_control : 2; +}; + +extern const struct va_opcode_info +valhall_opcodes[BI_NUM_OPCODES]; + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif -- GitLab From 619566dea18e94e0c1e00effc8200df75b892941 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 24 Mar 2022 17:13:24 -0400 Subject: [PATCH 17/31] pan/va: Generate header containing enums We already collect enums in the ISA description XML. Export them for use in the compiler backend, particularly the packing code. Usually we'd use Mako for templating. In this case, the script is so trivial a template engine didn't seem worth it. (The obvious version with Mako was about 10 lines longer than just prints and f-strings used here.) Signed-off-by: Alyssa Rosenzweig Suggested-by: Icecream95 Part-of: --- src/panfrost/bifrost/meson.build | 2 +- src/panfrost/bifrost/valhall/meson.build | 14 ++++++++ .../bifrost/valhall/valhall_enums.h.py | 34 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 src/panfrost/bifrost/valhall/valhall_enums.h.py diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 7a1df35eacd0..a15e781ed20c 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -138,7 +138,7 @@ libpanfrost_bifrost = static_library( 'panfrost_bifrost', [libpanfrost_bifrost_files, bi_opcodes_c, bi_printer_c, bi_packer_c, bifrost_nir_algebraic_c, valhall_c], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw, inc_valhall], - dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h], + dependencies: [idep_nir, idep_bi_opcodes_h, idep_bi_builder_h, idep_valhall_enums_h], link_with: [libpanfrost_util, libpanfrost_bifrost_disasm, libpanfrost_valhall_disasm], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden', diff --git a/src/panfrost/bifrost/valhall/meson.build b/src/panfrost/bifrost/valhall/meson.build index e78a4ab4d975..b3e54197b057 100644 --- a/src/panfrost/bifrost/valhall/meson.build +++ b/src/panfrost/bifrost/valhall/meson.build @@ -28,6 +28,20 @@ valhall_c = custom_target( depend_files : files('valhall.py'), ) +valhall_enums_h = custom_target( + 'valhall_enums.h', + input : ['valhall_enums.h.py', 'ISA.xml'], + output : 'valhall_enums.h', + command : [prog_python, '@INPUT@'], + capture : true, + depend_files : files('valhall.py'), +) + +idep_valhall_enums_h = declare_dependency( + sources : [valhall_enums_h], + include_directories : include_directories('.'), +) + valhall_disasm_c = custom_target( 'valhall_disasm_c', input : ['disasm.py', 'ISA.xml'], diff --git a/src/panfrost/bifrost/valhall/valhall_enums.h.py b/src/panfrost/bifrost/valhall/valhall_enums.h.py new file mode 100644 index 000000000000..25ed606a2ca2 --- /dev/null +++ b/src/panfrost/bifrost/valhall/valhall_enums.h.py @@ -0,0 +1,34 @@ +#encoding=utf-8 + +# Copyright (C) 2021 Collabora, Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +from valhall import safe_name, enums + +for enum in sorted(enums): + print(f"enum va_{safe_name(enum)} {{") + + for i, value in enumerate(enums[enum].values): + if value.value != 'reserved': + key = safe_name(f"va_{enum}_{value.value}") + print(f" {key.upper()} = {i},") + + print("};\n") -- GitLab From edf284215dc013fdeb3d0b0891f0ff223298666f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 27 Jul 2021 13:24:22 -0400 Subject: [PATCH 18/31] pan/va: Add helpers for swapping bitwise sources Annoyingly different from Bifrost. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/valhall.h | 40 ++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/panfrost/bifrost/valhall/valhall.h b/src/panfrost/bifrost/valhall/valhall.h index debf1f740e74..36bc606a6ab7 100644 --- a/src/panfrost/bifrost/valhall/valhall.h +++ b/src/panfrost/bifrost/valhall/valhall.h @@ -105,6 +105,46 @@ struct va_opcode_info { extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES]; +/* Bifrost specifies the source of bitwise operations as (A, B, shift), but + * Valhall specifies (A, shift, B). We follow Bifrost conventions in the + * compiler, so normalize. + */ + +static inline bool +va_swap_12(enum bi_opcode op) +{ + switch (op) { + case BI_OPCODE_LSHIFT_AND_I32: + case BI_OPCODE_LSHIFT_AND_V2I16: + case BI_OPCODE_LSHIFT_AND_V4I8: + case BI_OPCODE_LSHIFT_OR_I32: + case BI_OPCODE_LSHIFT_OR_V2I16: + case BI_OPCODE_LSHIFT_OR_V4I8: + case BI_OPCODE_LSHIFT_XOR_I32: + case BI_OPCODE_LSHIFT_XOR_V2I16: + case BI_OPCODE_LSHIFT_XOR_V4I8: + case BI_OPCODE_RSHIFT_AND_I32: + case BI_OPCODE_RSHIFT_AND_V2I16: + case BI_OPCODE_RSHIFT_AND_V4I8: + case BI_OPCODE_RSHIFT_OR_I32: + case BI_OPCODE_RSHIFT_OR_V2I16: + case BI_OPCODE_RSHIFT_OR_V4I8: + case BI_OPCODE_RSHIFT_XOR_I32: + case BI_OPCODE_RSHIFT_XOR_V2I16: + case BI_OPCODE_RSHIFT_XOR_V4I8: + return true; + default: + return false; + } +} + +static inline struct va_src_info +va_src_info(enum bi_opcode op, unsigned src) +{ + unsigned idx = (va_swap_12(op) && (src == 1 || src == 2)) ? (3 - src) : src; + return valhall_opcodes[op].srcs[idx]; +} + #ifdef __cplusplus } /* extern C */ #endif -- GitLab From f45654af5953d86b4b82760b51a502fb25244073 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 19 Jul 2021 15:51:52 -0400 Subject: [PATCH 19/31] pan/va: Add packing routines Mostly manual since Valhall is regular. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 2 +- src/panfrost/bifrost/compiler.h | 1 + src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_compiler.h | 82 ++ src/panfrost/bifrost/valhall/va_pack.c | 862 +++++++++++++++++++++ 5 files changed, 947 insertions(+), 1 deletion(-) create mode 100644 src/panfrost/bifrost/valhall/va_compiler.h create mode 100644 src/panfrost/bifrost/valhall/va_pack.c diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index d168648cbb1a..663a655fae6b 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4175,7 +4175,7 @@ bi_compile_variant_nir(nir_shader *nir, if (ctx->arch <= 8) { bi_pack_clauses(ctx, binary, offset); } else { - /* TODO: pack flat */ + bi_pack_valhall(ctx, binary); } if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal) { diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 8009b1ad6fa2..e074421c3dbd 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -1090,6 +1090,7 @@ bi_is_terminal_block(bi_block *block) /* Returns the size of the final clause */ unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission); +void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission); struct bi_packed_tuple { uint64_t lo; diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index a15e781ed20c..ea9e7c751ea1 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -45,6 +45,7 @@ libpanfrost_bifrost_files = files( 'bi_validate.c', 'bir.c', 'bifrost_compile.c', + 'valhall/va_pack.c', ) bifrost_gen_disasm_c = custom_target( diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h new file mode 100644 index 000000000000..a6409ae6fee9 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#ifndef __VALHALL_COMPILER_H +#define __VALHALL_COMPILER_H + +#include "compiler.h" +#include "valhall.h" + +#ifdef __cplusplus +extern "C" { +#endif + +uint64_t va_pack_instr(const bi_instr *I, unsigned flow); + +static inline unsigned +va_fau_page(enum bir_fau value) +{ + /* Uniform slots of FAU have a 7-bit index. The top 2-bits are the page; the + * bottom 5-bits are specified in the source. + */ + if (value & BIR_FAU_UNIFORM) { + unsigned slot = value & ~BIR_FAU_UNIFORM; + unsigned page = slot >> 5; + + assert(page <= 3); + return page; + } + + /* Special indices are also paginated */ + switch (value) { + case BIR_FAU_TLS_PTR: + case BIR_FAU_WLS_PTR: + return 1; + case BIR_FAU_LANE_ID: + case BIR_FAU_CORE_ID: + case BIR_FAU_PROGRAM_COUNTER: + return 3; + default: + return 0; + } +} + +static inline unsigned +va_select_fau_page(const bi_instr *I) +{ + bi_foreach_src(I, s) { + if (I->src[s].type == BI_INDEX_FAU) + return va_fau_page((enum bir_fau) I->src[s].value); + } + + return 0; +} + +#ifdef __cplusplus +} /* extern C */ +#endif + +#endif diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c new file mode 100644 index 000000000000..95f088c79f54 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -0,0 +1,862 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "valhall_enums.h" + +/* This file contains the final passes of the compiler. Running after + * scheduling and RA, the IR is now finalized, so we need to emit it to actual + * bits on the wire (as well as fixup branches) + */ + +/* + * Validate that two adjacent 32-bit sources form an aligned 64-bit register + * pair. This is a compiler invariant, required on Valhall but not on Bifrost. + */ +static void +va_validate_register_pair(const bi_instr *I, unsigned s) +{ + ASSERTED bi_index lo = I->src[s], hi = I->src[s + 1]; + + assert(lo.type == hi.type); + + if (lo.type == BI_INDEX_REGISTER) { + assert(hi.value & 1); + assert(hi.value == lo.value + 1); + } else { + assert(hi.offset & 1); + assert(hi.offset == lo.offset + 1); + } +} + +static unsigned +va_pack_reg(bi_index idx) +{ + assert(idx.type == BI_INDEX_REGISTER); + assert(idx.value < 64); + return idx.value; +} + +static unsigned +va_pack_fau_special(enum bir_fau fau) +{ + switch (fau) { + case BIR_FAU_ATEST_PARAM: return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM; + case BIR_FAU_TLS_PTR: return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER; + case BIR_FAU_WLS_PTR: return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER; + case BIR_FAU_LANE_ID: return VA_FAU_SPECIAL_PAGE_3_LANE_ID; + case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER; + + case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7): + return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0); + + default: + unreachable("Unknown FAU value"); + } +} + +/* + * Encode a 64-bit FAU source. The offset is ignored, so this function can be + * used to encode a 32-bit FAU source by or'ing in the appropriate offset. + */ +static unsigned +va_pack_fau_64(bi_index idx) +{ + assert(idx.type == BI_INDEX_FAU); + + unsigned val = (idx.value & BITFIELD_MASK(5)); + + if (idx.value & BIR_FAU_IMMEDIATE) + return (0x3 << 6) | (val << 1); + else if (idx.value & BIR_FAU_UNIFORM) + return (0x2 << 6) | (val << 1); + else + return (0x7 << 5) | (va_pack_fau_special(idx.value) << 1); +} + +static unsigned +va_pack_src(bi_index idx) +{ + if (idx.type == BI_INDEX_REGISTER) { + unsigned value = va_pack_reg(idx); + if (idx.discard) value |= (1 << 6); + return value; + } else if (idx.type == BI_INDEX_FAU) { + assert(idx.offset <= 1); + return va_pack_fau_64(idx) | idx.offset; + } + + unreachable("Invalid type"); +} + +static unsigned +va_pack_wrmask(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H00: return 0x1; + case BI_SWIZZLE_H11: return 0x2; + case BI_SWIZZLE_H01: return 0x3; + default: unreachable("Invalid write mask"); + } +} + +static enum va_atomic_operation +va_pack_atom_opc(enum bi_atom_opc opc) +{ + switch (opc) { + case BI_ATOM_OPC_AADD: return VA_ATOMIC_OPERATION_AADD; + case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN; + case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX; + case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN; + case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX; + case BI_ATOM_OPC_AAND: return VA_ATOMIC_OPERATION_AAND; + case BI_ATOM_OPC_AOR: return VA_ATOMIC_OPERATION_AOR; + case BI_ATOM_OPC_AXOR: return VA_ATOMIC_OPERATION_AXOR; + case BI_ATOM_OPC_ACMPXCHG: + case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG; + default: unreachable("Invalid atom_opc"); + } +} + +static enum va_atomic_operation_with_1 +va_pack_atom_opc_1(enum bi_atom_opc opc) +{ + switch (opc) { + case BI_ATOM_OPC_AINC: return VA_ATOMIC_OPERATION_WITH_1_AINC; + case BI_ATOM_OPC_ADEC: return VA_ATOMIC_OPERATION_WITH_1_ADEC; + case BI_ATOM_OPC_AUMAX1: return VA_ATOMIC_OPERATION_WITH_1_AUMAX1; + case BI_ATOM_OPC_ASMAX1: return VA_ATOMIC_OPERATION_WITH_1_ASMAX1; + case BI_ATOM_OPC_AOR1: return VA_ATOMIC_OPERATION_WITH_1_AOR1; + default: unreachable("Invalid atom_opc"); + } +} + +static unsigned +va_pack_dest(bi_index index) +{ + return va_pack_reg(index) | (va_pack_wrmask(index.swizzle) << 6); +} + +static enum va_widen +va_pack_widen_f32(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H01: return VA_WIDEN_NONE; + case BI_SWIZZLE_H00: return VA_WIDEN_H0; + case BI_SWIZZLE_H11: return VA_WIDEN_H1; + default: unreachable("Invalid widen"); + } +} + +static enum va_swizzles_16_bit +va_pack_swizzle_f16(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00; + case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10; + case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01; + case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11; + default: unreachable("Invalid swizzle"); + } +} + +static unsigned +va_pack_widen(enum bi_swizzle swz, enum va_size size) +{ + if (size == VA_SIZE_8) { + switch (swz) { + case BI_SWIZZLE_H01: return VA_SWIZZLES_8_BIT_B0123; + case BI_SWIZZLE_H00: return VA_SWIZZLES_8_BIT_B0101; + case BI_SWIZZLE_H11: return VA_SWIZZLES_8_BIT_B2323; + case BI_SWIZZLE_B0000: return VA_SWIZZLES_8_BIT_B0000; + case BI_SWIZZLE_B1111: return VA_SWIZZLES_8_BIT_B1111; + case BI_SWIZZLE_B2222: return VA_SWIZZLES_8_BIT_B2222; + case BI_SWIZZLE_B3333: return VA_SWIZZLES_8_BIT_B3333; + default: unreachable("Exotic swizzles not yet handled"); + } + } else if (size == VA_SIZE_16) { + switch (swz) { + case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00; + case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10; + case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01; + case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11; + case BI_SWIZZLE_B0000: return VA_SWIZZLES_16_BIT_B00; + case BI_SWIZZLE_B1111: return VA_SWIZZLES_16_BIT_B11; + case BI_SWIZZLE_B2222: return VA_SWIZZLES_16_BIT_B22; + case BI_SWIZZLE_B3333: return VA_SWIZZLES_16_BIT_B33; + default: unreachable("Exotic swizzles not yet handled"); + } + } else if (size == VA_SIZE_32) { + switch (swz) { + case BI_SWIZZLE_H01: return VA_SWIZZLES_32_BIT_NONE; + case BI_SWIZZLE_H00: return VA_SWIZZLES_32_BIT_H0; + case BI_SWIZZLE_H11: return VA_SWIZZLES_32_BIT_H1; + case BI_SWIZZLE_B0000: return VA_SWIZZLES_32_BIT_B0; + case BI_SWIZZLE_B1111: return VA_SWIZZLES_32_BIT_B1; + case BI_SWIZZLE_B2222: return VA_SWIZZLES_32_BIT_B2; + case BI_SWIZZLE_B3333: return VA_SWIZZLES_32_BIT_B3; + default: unreachable("Invalid swizzle"); + } + } else { + unreachable("TODO: other type sizes"); + } +} + +static enum va_half_swizzles_8_bit +va_pack_halfswizzle(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00; + case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11; + case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22; + case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33; + case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01; + case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23; + case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02; + default: unreachable("todo: more halfswizzles"); + } +} + +static enum va_lanes_8_bit +va_pack_shift_lanes(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H01: return VA_LANES_8_BIT_B02; + case BI_SWIZZLE_B0000: return VA_LANES_8_BIT_B00; + default: unreachable("todo: more shifts"); + } +} + +static enum va_combine +va_pack_combine(enum bi_swizzle swz) +{ + switch (swz) { + case BI_SWIZZLE_H01: return VA_COMBINE_NONE; + case BI_SWIZZLE_H00: return VA_COMBINE_H0; + case BI_SWIZZLE_H11: return VA_COMBINE_H1; + default: unreachable("Invalid branch lane"); + } +} + +static enum va_source_format +va_pack_source_format(const bi_instr *I) +{ + switch (I->register_format) { + case BI_REGISTER_FORMAT_AUTO: + case BI_REGISTER_FORMAT_S32: + case BI_REGISTER_FORMAT_U32: return VA_SOURCE_FORMAT_SRC_FLAT32; + case BI_REGISTER_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32; + case BI_REGISTER_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16; + default: unreachable("unhandled register format"); + } +} + +static uint64_t +va_pack_alu(const bi_instr *I) +{ + struct va_opcode_info info = valhall_opcodes[I->op]; + uint64_t hex = 0; + + switch (I->op) { + /* Add FREXP flags */ + case BI_OPCODE_FREXPE_F32: + case BI_OPCODE_FREXPE_V2F16: + case BI_OPCODE_FREXPM_F32: + case BI_OPCODE_FREXPM_V2F16: + if (I->sqrt) hex |= 1ull << 24; + if (I->log) hex |= 1ull << 25; + break; + + /* Add mux type */ + case BI_OPCODE_MUX_I32: + case BI_OPCODE_MUX_V2I16: + case BI_OPCODE_MUX_V4I8: + hex |= (uint64_t) I->mux << 32; + break; + + /* Add .eq flag */ + case BI_OPCODE_BRANCHZ_I16: + case BI_OPCODE_BRANCHZI: + assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE); + + if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36); + + if (I->op == BI_OPCODE_BRANCHZI) + hex |= (0x1ull << 40); /* Absolute */ + else + hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8; + + break; + + /* Add arithmetic flag */ + case BI_OPCODE_RSHIFT_AND_I32: + case BI_OPCODE_RSHIFT_AND_V2I16: + case BI_OPCODE_RSHIFT_AND_V4I8: + case BI_OPCODE_RSHIFT_OR_I32: + case BI_OPCODE_RSHIFT_OR_V2I16: + case BI_OPCODE_RSHIFT_OR_V4I8: + case BI_OPCODE_RSHIFT_XOR_I32: + case BI_OPCODE_RSHIFT_XOR_V2I16: + case BI_OPCODE_RSHIFT_XOR_V4I8: + hex |= (uint64_t) I->arithmetic << 34; + break; + + case BI_OPCODE_LEA_BUF_IMM: + /* Buffer table index */ + hex |= 0xD << 8; + break; + + case BI_OPCODE_LEA_ATTR_IMM: + hex |= ((uint64_t) I->table) << 16; + hex |= ((uint64_t) I->attribute_index) << 20; + break; + + case BI_OPCODE_IADD_IMM_I32: + case BI_OPCODE_IADD_IMM_V2I16: + case BI_OPCODE_IADD_IMM_V4I8: + case BI_OPCODE_FADD_IMM_F32: + case BI_OPCODE_FADD_IMM_V2F16: + hex |= ((uint64_t) I->index) << 8; + break; + + case BI_OPCODE_CLPER_I32: + hex |= ((uint64_t) I->inactive_result) << 22; + hex |= ((uint64_t) I->lane_op) << 32; + hex |= ((uint64_t) I->subgroup) << 36; + break; + + case BI_OPCODE_LD_VAR: + case BI_OPCODE_LD_VAR_FLAT: + case BI_OPCODE_LD_VAR_IMM: + case BI_OPCODE_LD_VAR_FLAT_IMM: + case BI_OPCODE_LD_VAR_BUF_F16: + case BI_OPCODE_LD_VAR_BUF_F32: + case BI_OPCODE_LD_VAR_BUF_IMM_F16: + case BI_OPCODE_LD_VAR_BUF_IMM_F32: + case BI_OPCODE_LD_VAR_SPECIAL: + if (I->op == BI_OPCODE_LD_VAR_SPECIAL) + hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */ + else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 || + I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) { + hex |= ((uint64_t) I->index) << 16; + } else if (I->op == BI_OPCODE_LD_VAR_IMM || + I->op == BI_OPCODE_LD_VAR_FLAT_IMM) { + hex |= ((uint64_t) I->table) << 8; + hex |= ((uint64_t) I->index) << 12; + } + + hex |= ((uint64_t) va_pack_source_format(I)) << 24; + hex |= ((uint64_t) I->update) << 36; + hex |= ((uint64_t) I->sample) << 38; + break; + + case BI_OPCODE_LD_ATTR_IMM: + hex |= ((uint64_t) I->table) << 16; + hex |= ((uint64_t) I->attribute_index) << 20; + break; + + case BI_OPCODE_ZS_EMIT: + if (I->stencil) hex |= (1 << 24); + if (I->z) hex |= (1 << 25); + break; + + default: + break; + } + + /* FMA_RSCALE.f32 special modes treated as extra opcodes */ + if (I->op == BI_OPCODE_FMA_RSCALE_F32) { + assert(I->special < 4); + hex |= ((uint64_t) I->special) << 48; + } + + /* Add the normal destination or a placeholder. Staging destinations are + * added elsewhere, as they require special handling for control fields. + */ + if (info.has_dest && info.nr_staging_dests == 0) { + hex |= (uint64_t) va_pack_dest(I->dest[0]) << 40; + } else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) { + assert(bi_is_null(I->dest[0])); + hex |= 0xC0ull << 40; /* Placeholder */ + } + + bool swap12 = va_swap_12(I->op); + + /* First src is staging if we read, skip it when packing sources */ + unsigned src_offset = bi_opcode_props[I->op].sr_read ? 1 : 0; + + for (unsigned i = 0; i < info.nr_srcs; ++i) { + unsigned logical_i = (swap12 && i == 1) ? 2 : (swap12 && i == 2) ? 1 : i; + + struct va_src_info src_info = info.srcs[i]; + enum va_size size = src_info.size; + + bi_index src = I->src[logical_i + src_offset]; + hex |= (uint64_t) va_pack_src(src) << (8 * i); + + if (src_info.notted) { + if (src.neg) hex |= (1ull << 35); + } else if (src_info.absneg) { + unsigned neg_offs = 32 + 2 + ((2 - i) * 2); + unsigned abs_offs = 33 + 2 + ((2 - i) * 2); + + if (src.neg) hex |= 1ull << neg_offs; + if (src.abs) hex |= 1ull << abs_offs; + } else { + assert(!src.neg && "Unexpected negate"); + assert(!src.abs && "Unexpected absolute value"); + } + + if (src_info.swizzle) { + unsigned offs = 24 + ((2 - i) * 2); + unsigned S = src.swizzle; + assert(size == VA_SIZE_16 || size == VA_SIZE_32); + + uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(S) : va_pack_swizzle_f16(S)); + hex |= v << offs; + } else if (src_info.widen) { + unsigned offs = (i == 1) ? 26 : 36; + hex |= (uint64_t) va_pack_widen(src.swizzle, src_info.size) << offs; + } else if (src_info.lane) { + unsigned offs = 28; + assert(i == 0 && "todo: MKVEC"); + if (src_info.size == VA_SIZE_16) { + hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs; + } else if (I->op == BI_OPCODE_BRANCHZ_I16) { + hex |= ((uint64_t) va_pack_combine(src.swizzle) << 37); + } else { + assert(src_info.size == VA_SIZE_8); + unsigned comp = src.swizzle - BI_SWIZZLE_B0000; + assert(comp < 4); + hex |= (uint64_t) comp << offs; + } + } else if (src_info.lanes) { + assert(src_info.size == VA_SIZE_8); + assert(i == 1); + hex |= (uint64_t) va_pack_shift_lanes(src.swizzle) << 26; + } else if (src_info.combine) { + /* Treat as swizzle, subgroup ops not yet supported */ + assert(src_info.size == VA_SIZE_32); + assert(i == 0); + hex |= (uint64_t) va_pack_widen_f32(src.swizzle) << 37; + } else if (src_info.halfswizzle) { + assert(src_info.size == VA_SIZE_8); + assert(i == 0); + hex |= (uint64_t) va_pack_halfswizzle(src.swizzle) << 36; + } else { + assert(src.swizzle == BI_SWIZZLE_H01 && "Unexpected swizzle"); + } + } + + if (info.clamp) hex |= (uint64_t) I->clamp << 32; + if (info.round_mode) hex |= (uint64_t) I->round << 30; + if (info.condition) hex |= (uint64_t) I->cmpf << 32; + if (info.result_type) hex |= (uint64_t) I->result_type << 30; + + return hex; +} + +static uint64_t +va_pack_byte_offset(const bi_instr *I) +{ + int16_t offset = I->byte_offset; + assert(offset == I->byte_offset && "offset overflow"); + + uint16_t offset_as_u16 = offset; + return ((uint64_t) offset_as_u16) << 8; +} + +static uint64_t +va_pack_byte_offset_8(const bi_instr *I) +{ + uint8_t offset = I->byte_offset; + assert(offset == I->byte_offset && "offset overflow"); + + return ((uint64_t) offset) << 8; +} + +static uint64_t +va_pack_load(const bi_instr *I, bool buffer_descriptor) +{ + const uint8_t load_lane_identity[8] = { + VA_LOAD_LANE_8_BIT_B0, + VA_LOAD_LANE_16_BIT_H0, + VA_LOAD_LANE_24_BIT_IDENTITY, + VA_LOAD_LANE_32_BIT_W0, + VA_LOAD_LANE_48_BIT_IDENTITY, + VA_LOAD_LANE_64_BIT_IDENTITY, + VA_LOAD_LANE_96_BIT_IDENTITY, + VA_LOAD_LANE_128_BIT_IDENTITY, + }; + + unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7; + uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36; + + // unsigned + hex |= (1ull << 39); + + if (!buffer_descriptor) + hex |= va_pack_byte_offset(I); + + hex |= (uint64_t) va_pack_src(I->src[0]) << 0; + + if (buffer_descriptor) + hex |= (uint64_t) va_pack_src(I->src[1]) << 8; + + return hex; +} + +static uint64_t +va_pack_memory_access(const bi_instr *I) +{ + switch (I->seg) { + case BI_SEG_TL: return VA_MEMORY_ACCESS_FORCE; + case BI_SEG_POS: return VA_MEMORY_ACCESS_ISTREAM; + case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM; + default: return VA_MEMORY_ACCESS_NONE; + } +} + +static uint64_t +va_pack_store(const bi_instr *I) +{ + uint64_t hex = va_pack_memory_access(I) << 24; + + va_validate_register_pair(I, 1); + hex |= (uint64_t) va_pack_src(I->src[1]) << 0; + + hex |= va_pack_byte_offset(I); + + return hex; +} + +static enum va_lod_mode +va_pack_lod_mode(enum bi_va_lod_mode mode) +{ + switch (mode) { + case BI_VA_LOD_MODE_ZERO_LOD: return VA_LOD_MODE_ZERO; + case BI_VA_LOD_MODE_COMPUTED_LOD: return VA_LOD_MODE_COMPUTED; + case BI_VA_LOD_MODE_EXPLICIT: return VA_LOD_MODE_EXPLICIT; + case BI_VA_LOD_MODE_COMPUTED_BIAS: return VA_LOD_MODE_COMPUTED_BIAS; + case BI_VA_LOD_MODE_GRDESC: return VA_LOD_MODE_GRDESC; + } + + unreachable("Invalid LOD mode"); +} + +static enum va_register_type +va_pack_register_type(enum bi_register_format regfmt) +{ + switch (regfmt) { + case BI_REGISTER_FORMAT_F16: + case BI_REGISTER_FORMAT_F32: + return VA_REGISTER_TYPE_F; + + case BI_REGISTER_FORMAT_U16: + case BI_REGISTER_FORMAT_U32: + return VA_REGISTER_TYPE_U; + + case BI_REGISTER_FORMAT_S16: + case BI_REGISTER_FORMAT_S32: + return VA_REGISTER_TYPE_S; + + default: + unreachable("Invalid register format"); + } +} + +static enum va_register_format +va_pack_register_format(const bi_instr *I) +{ + switch (I->register_format) { + case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO; + case BI_REGISTER_FORMAT_F32: return VA_REGISTER_FORMAT_F32; + case BI_REGISTER_FORMAT_F16: return VA_REGISTER_FORMAT_F16; + case BI_REGISTER_FORMAT_S32: return VA_REGISTER_FORMAT_S32; + case BI_REGISTER_FORMAT_S16: return VA_REGISTER_FORMAT_S16; + case BI_REGISTER_FORMAT_U32: return VA_REGISTER_FORMAT_U32; + case BI_REGISTER_FORMAT_U16: return VA_REGISTER_FORMAT_U16; + default: unreachable("unhandled register format"); + } +} + +uint64_t +va_pack_instr(const bi_instr *I, unsigned flow) +{ + struct va_opcode_info info = valhall_opcodes[I->op]; + + uint64_t hex = info.exact | (((uint64_t) flow) << 59); + hex |= ((uint64_t) va_select_fau_page(I)) << 57; + + if (info.slot) { + unsigned slot = (I->op == BI_OPCODE_BARRIER) ? 7 : 0; + hex |= (slot << 30); + } + + if (info.sr_count) { + bool read = bi_opcode_props[I->op].sr_read; + bi_index sr = read ? I->src[0] : I->dest[0]; + + unsigned count = read ? + bi_count_read_registers(I, 0) : + bi_count_write_registers(I, 0); + + hex |= ((uint64_t) count << 33); + hex |= (uint64_t) va_pack_reg(sr) << 40; + hex |= ((uint64_t) info.sr_control << 46); + } + + if (info.sr_write_count) { + hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36; + hex |= ((uint64_t) va_pack_reg(I->dest[0])) << 16; + } + + if (info.vecsize) + hex |= ((uint64_t) I->vecsize << 28); + + if (info.register_format) + hex |= ((uint64_t) va_pack_register_format(I)) << 24; + + switch (I->op) { + case BI_OPCODE_LOAD_I8: + case BI_OPCODE_LOAD_I16: + case BI_OPCODE_LOAD_I24: + case BI_OPCODE_LOAD_I32: + case BI_OPCODE_LOAD_I48: + case BI_OPCODE_LOAD_I64: + case BI_OPCODE_LOAD_I96: + case BI_OPCODE_LOAD_I128: + hex |= va_pack_load(I, false); + break; + + case BI_OPCODE_LD_BUFFER_I8: + case BI_OPCODE_LD_BUFFER_I16: + case BI_OPCODE_LD_BUFFER_I24: + case BI_OPCODE_LD_BUFFER_I32: + case BI_OPCODE_LD_BUFFER_I48: + case BI_OPCODE_LD_BUFFER_I64: + case BI_OPCODE_LD_BUFFER_I96: + case BI_OPCODE_LD_BUFFER_I128: + hex |= va_pack_load(I, true); + break; + + case BI_OPCODE_STORE_I8: + case BI_OPCODE_STORE_I16: + case BI_OPCODE_STORE_I24: + case BI_OPCODE_STORE_I32: + case BI_OPCODE_STORE_I48: + case BI_OPCODE_STORE_I64: + case BI_OPCODE_STORE_I96: + case BI_OPCODE_STORE_I128: + hex |= va_pack_store(I); + break; + + case BI_OPCODE_ATOM1_RETURN_I32: + /* Permit omitting the destination for plain ATOM1 */ + if (!bi_count_write_registers(I, 0)) { + hex |= (0x40ull << 40); // fake read + } + + /* 64-bit source */ + va_validate_register_pair(I, 0); + hex |= (uint64_t) va_pack_src(I->src[0]) << 0; + hex |= va_pack_byte_offset_8(I); + hex |= ((uint64_t) va_pack_atom_opc_1(I->atom_opc)) << 22; + break; + + case BI_OPCODE_ATOM_I32: + case BI_OPCODE_ATOM_RETURN_I32: + /* 64-bit source */ + va_validate_register_pair(I, 1); + hex |= (uint64_t) va_pack_src(I->src[1]) << 0; + hex |= va_pack_byte_offset_8(I); + hex |= ((uint64_t) va_pack_atom_opc(I->atom_opc)) << 22; + + if (I->op == BI_OPCODE_ATOM_RETURN_I32) + hex |= (0xc0ull << 40); // flags + + if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) + hex |= (1 << 26); /* .compare */ + + break; + + case BI_OPCODE_ST_CVT: + /* Staging read */ + hex |= va_pack_store(I); + + /* Conversion descriptor */ + hex |= (uint64_t) va_pack_src(I->src[3]) << 16; + break; + + case BI_OPCODE_BLEND: + { + /* Source 0 - Blend descriptor (64-bit) */ + hex |= ((uint64_t) va_pack_src(I->src[2])) << 0; + + /* Vaidate that it is a 64-bit register pair */ + assert(I->src[3].type == I->src[2].type); + + if (I->src[2].type == BI_INDEX_REGISTER) { + assert(I->src[3].value & 1); + assert(I->src[3].value == I->src[2].value + 1); + } else { + assert(I->src[3].offset & 1); + assert(I->src[3].offset == I->src[2].offset + 1); + } + + /* Target */ + assert((I->branch_offset & 0x7) == 0); + hex |= ((I->branch_offset >> 3) << 8); + + /* Source 2 - coverage mask */ + hex |= ((uint64_t) va_pack_reg(I->src[1])) << 16; + + /* Vector size */ + unsigned vecsize = 4; + hex |= ((uint64_t) (vecsize - 1) << 28); + + break; + } + + case BI_OPCODE_TEX_SINGLE: + case BI_OPCODE_TEX_FETCH: + case BI_OPCODE_TEX_GATHER: + { + /* Image to read from */ + hex |= ((uint64_t) va_pack_src(I->src[1])) << 0; + + assert(!(I->op == BI_OPCODE_TEX_FETCH && I->shadow)); + + if (I->array_enable) hex |= (1ull << 10); + if (I->texel_offset) hex |= (1ull << 11); + if (I->shadow) hex |= (1ull << 12); + if (I->skip) hex |= (1ull << 39); + if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46); + + if (I->op == BI_OPCODE_TEX_SINGLE) { + assert(I->va_lod_mode < 8); + hex |= ((uint64_t) va_pack_lod_mode(I->va_lod_mode)) << 13; + } + + if (I->op == BI_OPCODE_TEX_GATHER) { + if (I->integer_coordinates) hex |= (1 << 13); + hex |= ((uint64_t) I->fetch_component) << 14; + } + + hex |= (VA_WRITE_MASK_RGBA << 22); + hex |= ((uint64_t) va_pack_register_type(I->register_format)) << 26; + hex |= ((uint64_t) I->dimension) << 28; + + break; + } + + default: + if (!info.exact && I->op != BI_OPCODE_NOP) { + bi_print_instr(I, stderr); + fflush(stderr); + unreachable("Opcode not packable on Valhall"); + } + + hex |= va_pack_alu(I); + break; + } + + return hex; +} + +static bool +va_last_in_block(bi_block *block, bi_instr *I) +{ + return (I->link.next == &block->instructions); +} + +static bool +va_should_return(bi_block *block, bi_instr *I) +{ + /* Don't return within a block */ + if (!va_last_in_block(block, I)) + return false; + + /* Don't return if we're succeeded by instructions */ + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) { + bi_block *succ = block->successors[i]; + + if (succ && !bi_is_terminal_block(succ)) + return false; + } + + return true; +} + +static enum va_flow +va_pack_flow(bi_block *block, bi_instr *I) +{ + if (va_should_return(block, I)) + return VA_FLOW_END; + + if (va_last_in_block(block, I) && bi_reconverge_branches(block)) + return VA_FLOW_RECONVERGE; + + if (I->op == BI_OPCODE_BARRIER) + return VA_FLOW_WAIT; + + if (I->flow) + return I->flow; + + /* TODO: Generalize waits */ + if (valhall_opcodes[I->op].nr_staging_dests > 0 || I->op == BI_OPCODE_BLEND) + return VA_FLOW_WAIT0; + + return VA_FLOW_NONE; +} + +void +bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) +{ + unsigned orig_size = emission->size; + + bi_foreach_block(ctx, block) { + bi_foreach_instr_in_block(block, I) { + unsigned flow = va_pack_flow(block, I); + uint64_t hex = va_pack_instr(I, flow); + util_dynarray_append(emission, uint64_t, hex); + } + } + + /* Pad with zeroes, but keep empty programs empty so they may be omitted + * altogether. Failing to do this would result in a program containing only + * zeroes, which is invalid and will raise an encoding fault. + * + * Pad an extra 16 byte (one instruction) to separate primary and secondary + * shader disassembles. This is not strictly necessary, but it's a good + * practice. 128 bytes is the optimal program alignment on Trym, so pad + * secondary shaders up to 128 bytes. This may help the instruction cache. + */ + if (orig_size != emission->size) { + unsigned aligned = ALIGN_POT(emission->size + 16, 128); + unsigned count = aligned - emission->size; + + memset(util_dynarray_grow(emission, uint8_t, count), 0, count); + } +} -- GitLab From 13d7ca13001064dc7114e6cc26ba2c3e7ff64e47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 21 Jul 2021 20:29:54 -0400 Subject: [PATCH 20/31] pan/va: Optimize add with imm to ADD_IMM Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 4 + src/panfrost/bifrost/compiler.h | 1 + src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_compiler.h | 1 + src/panfrost/bifrost/valhall/va_optimize.c | 113 +++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/va_optimize.c diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 663a655fae6b..88e7f1bb1294 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -4123,6 +4123,10 @@ bi_compile_variant_nir(nir_shader *nir, bi_lower_opt_instruction(I); } + if (ctx->arch >= 9) { + va_optimize(ctx); + } + bi_foreach_block(ctx, block) { bi_lower_branch(ctx, block); } diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index e074421c3dbd..920e18900f05 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -1038,6 +1038,7 @@ void bi_lower_swizzle(bi_context *ctx); void bi_lower_fau(bi_context *ctx); void bi_assign_scoreboard(bi_context *ctx); void bi_register_allocate(bi_context *ctx); +void va_optimize(bi_context *ctx); void bi_lower_opt_instruction(bi_instr *I); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index ea9e7c751ea1..3191d24ec651 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -45,6 +45,7 @@ libpanfrost_bifrost_files = files( 'bi_validate.c', 'bir.c', 'bifrost_compile.c', + 'valhall/va_optimize.c', 'valhall/va_pack.c', ) diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index a6409ae6fee9..d7207cd77b4e 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -34,6 +34,7 @@ extern "C" { #endif +void va_fuse_add_imm(bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); static inline unsigned diff --git a/src/panfrost/bifrost/valhall/va_optimize.c b/src/panfrost/bifrost/valhall/va_optimize.c new file mode 100644 index 000000000000..118321c9a680 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_optimize.c @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" + +/* Valhall specific instruction selection optimizations */ + +static enum bi_opcode +va_op_add_imm(enum bi_opcode op) +{ + switch (op) { + case BI_OPCODE_FADD_F32: return BI_OPCODE_FADD_IMM_F32; + case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16; + case BI_OPCODE_IADD_S32: + case BI_OPCODE_IADD_U32: return BI_OPCODE_IADD_IMM_I32; + case BI_OPCODE_IADD_V2S16: + case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16; + case BI_OPCODE_IADD_V4S8: + case BI_OPCODE_IADD_V4U8: return BI_OPCODE_IADD_IMM_V4I8; + default: return 0; + } +} + +static bool +va_is_add_imm(bi_instr *I, unsigned s) +{ + return I->src[s].swizzle == BI_SWIZZLE_H01 && + !I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round; +} + +static unsigned +va_choose_imm(bi_instr *I) +{ + for (unsigned i = 0; i < 2; ++i) { + if (I->src[i].type == BI_INDEX_CONSTANT) + return i; + } + + return ~0; +} + +/* Lower MOV.i32 #constant --> IADD_IMM.i32 0x0, #constant */ +static void +va_lower_mov_imm(bi_instr *I) +{ + if (I->src[0].type == BI_INDEX_CONSTANT) { + I->op = BI_OPCODE_IADD_IMM_I32; + I->index = I->src[0].value; + I->src[0] = bi_zero(); + } +} + +void +va_fuse_add_imm(bi_instr *I) +{ + if (I->op == BI_OPCODE_MOV_I32) { + va_lower_mov_imm(I); + return; + } + + enum bi_opcode op = va_op_add_imm(I->op); + if (!op) return; + + unsigned s = va_choose_imm(I); + if (s > 1) return; + if (!va_is_add_imm(I, 1 - s)) return; + + I->op = op; + I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle); + + assert(!I->src[s].abs && "redundant .abs set"); + + /* If the constant is negated, flip the sign bit */ + if (I->src[s].neg) { + if (I->op == BI_OPCODE_FADD_IMM_F32) + I->index ^= (1 << 31); + else if (I->op == BI_OPCODE_FADD_IMM_V2F16) + I->index ^= (1 << 31) | (1 << 15); + else + unreachable("unexpected .neg"); + } + + I->src[0] = I->src[1 - s]; + I->src[1] = bi_null(); +} + +void +va_optimize(bi_context *ctx) +{ + bi_foreach_instr_global(ctx, I) { + va_fuse_add_imm(I); + } +} -- GitLab From 676d9c94412b4d753f8f8949759a6a67f43f5345 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 22 Jul 2021 11:59:09 -0400 Subject: [PATCH 21/31] pan/va: Add unit tests for ADD_IMM optimizations Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 3 +- .../bifrost/valhall/test/test-add-imm.cpp | 137 ++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 src/panfrost/bifrost/valhall/test/test-add-imm.cpp diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 3191d24ec651..914bd53f407f 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -160,10 +160,11 @@ if with_tests 'test/test-pack-formats.cpp', 'test/test-packing.cpp', 'test/test-scheduler-predicates.cpp', + 'valhall/test/test-add-imm.cpp', ), c_args : [c_msvc_compat_args, no_override_init_args], gnu_symbol_visibility : 'hidden', - include_directories : [inc_include, inc_src, inc_mesa], + include_directories : [inc_include, inc_src, inc_mesa, inc_valhall], dependencies: [idep_gtest, idep_nir, idep_bi_opcodes_h, idep_bi_builder_h], link_with : [libpanfrost_bifrost], ), diff --git a/src/panfrost/bifrost/valhall/test/test-add-imm.cpp b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp new file mode 100644 index 000000000000..7004bbbd564c --- /dev/null +++ b/src/panfrost/bifrost/valhall/test/test-add-imm.cpp @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "bi_test.h" +#include "bi_builder.h" +#include "util/u_cpu_detect.h" + +#include + +static inline void +add_imm(bi_context *ctx) +{ + bi_foreach_instr_global(ctx, I) { + va_fuse_add_imm(I); + } +} + +#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm) +#define NEGCASE(instr) CASE(instr, instr) + +class AddImm : public testing::Test { +protected: + AddImm() { + mem_ctx = ralloc_context(NULL); + + /* For bi_imm_f16 */ + util_cpu_detect(); + } + + ~AddImm() { + ralloc_free(mem_ctx); + } + + void *mem_ctx; +}; + + +TEST_F(AddImm, Basic) { + CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)), + bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA)); + + CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0), BI_ROUND_NONE), + bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0))); + + CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0), BI_ROUND_NONE), + bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0))); + + CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0)), BI_ROUND_NONE), + bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0))); +} + +TEST_F(AddImm, Commutativty) { + CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2), BI_ROUND_NONE), + bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0))); +} + +TEST_F(AddImm, NoModifiers) { + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0), + BI_ROUND_RTP)); + + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0), + BI_ROUND_NONE)); + + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0), + BI_ROUND_NONE)); + + NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0), + BI_ROUND_NONE)); +} + +TEST_F(AddImm, NoClamp) { + NEGCASE({ + bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), + bi_imm_f32(42.0), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_M1_1; + }); +} + +TEST_F(AddImm, OtherTypes) { + CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_NONE), + bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140)); + + CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false), + bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF)); + + NEGCASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0), BI_ROUND_RTZ)); + NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); + NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); + NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true)); + NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); + NEGCASE(bi_iadd_v2s16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false)); + + NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true)); +} + +TEST_F(AddImm, Int8) { + bi_index idx = bi_register(2); + idx.swizzle = BI_SWIZZLE_B0000; + NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false)); + NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false)); +} -- GitLab From fd1906afea59073780939810e0e46094264677d3 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jul 2021 11:21:29 -0400 Subject: [PATCH 22/31] pan/va: Add FAU validation Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 2 + .../valhall/test/test-validate-fau.cpp | 124 +++++++++++++ src/panfrost/bifrost/valhall/va_compiler.h | 3 + src/panfrost/bifrost/valhall/va_validate.c | 170 ++++++++++++++++++ 4 files changed, 299 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/test/test-validate-fau.cpp create mode 100644 src/panfrost/bifrost/valhall/va_validate.c diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 914bd53f407f..81eb62132539 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -47,6 +47,7 @@ libpanfrost_bifrost_files = files( 'bifrost_compile.c', 'valhall/va_optimize.c', 'valhall/va_pack.c', + 'valhall/va_validate.c', ) bifrost_gen_disasm_c = custom_target( @@ -161,6 +162,7 @@ if with_tests 'test/test-packing.cpp', 'test/test-scheduler-predicates.cpp', 'valhall/test/test-add-imm.cpp', + 'valhall/test/test-validate-fau.cpp', ), c_args : [c_msvc_compat_args, no_override_init_args], gnu_symbol_visibility : 'hidden', diff --git a/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp new file mode 100644 index 000000000000..4275d0359fd1 --- /dev/null +++ b/src/panfrost/bifrost/valhall/test/test-validate-fau.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "bi_test.h" +#include "bi_builder.h" + +#include + +#define CASE(instr, expected) do { \ + if (va_validate_fau(instr) != expected) { \ + fprintf(stderr, "Incorrect validation for:\n"); \ + bi_print_instr(instr, stderr); \ + fprintf(stderr, "\n"); \ + ADD_FAILURE(); \ + } \ +} while(0) + +#define VALID(instr) CASE(instr, true) +#define INVALID(instr) CASE(instr, false) + +class ValidateFau : public testing::Test { +protected: + ValidateFau() { + mem_ctx = ralloc_context(NULL); + b = bit_builder(mem_ctx); + + zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false); + imm1 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 1), false); + imm2 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 2), false); + unif = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false); + unif2 = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 6), false); + core_id = bi_fau(BIR_FAU_CORE_ID, false); + lane_id = bi_fau(BIR_FAU_LANE_ID, false); + } + + ~ValidateFau() { + ralloc_free(mem_ctx); + } + + void *mem_ctx; + bi_builder *b; + bi_index zero, imm1, imm2, unif, unif2, core_id, lane_id; +}; + +TEST_F(ValidateFau, One64BitUniformSlot) +{ + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), + unif, BI_ROUND_NONE)); + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), + unif, BI_ROUND_NONE)); + VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, bi_word(unif, 1), + BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1), + BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_word(unif, 1), + BI_ROUND_NONE)); + + /* Crafted case that appears correct at first glance and was erronously + * marked as valid in early versions of the validator. + */ + INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true), + BI_ROUND_NONE)); +} + +TEST_F(ValidateFau, Combined64BitUniformsConstants) +{ + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), + unif, BI_ROUND_NONE)); + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), zero, + unif, BI_ROUND_NONE)); + VALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm1, BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), zero, bi_word(unif, 1), + unif, BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), zero, imm1, imm2, BI_ROUND_NONE)); +} + +TEST_F(ValidateFau, UniformsOnlyInDefaultMode) +{ + INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), + lane_id, BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_word(unif, 1), + core_id, BI_ROUND_NONE)); +} + +TEST_F(ValidateFau, SingleSpecialImmediate) +{ + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2), + lane_id, BI_ROUND_NONE)); + VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2), + core_id, BI_ROUND_NONE)); + INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, + core_id, BI_ROUND_NONE)); +} + +TEST_F(ValidateFau, SmokeTests) +{ + VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2))); + VALID(bi_mov_i32_to(b, bi_register(1), unif)); + VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), + unif, bi_neg(zero), BI_ROUND_NONE)); +} diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index d7207cd77b4e..1d2a76a3a805 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -34,6 +34,9 @@ extern "C" { #endif +bool va_validate_fau(bi_instr *I); +void va_validate(FILE *fp, bi_context *ctx); +void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); diff --git a/src/panfrost/bifrost/valhall/va_validate.c b/src/panfrost/bifrost/valhall/va_validate.c new file mode 100644 index 000000000000..e404d74c05f9 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_validate.c @@ -0,0 +1,170 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "bi_builder.h" + +/* Valhall has limits on access to fast-access uniforms: + * + * An instruction may access no more than a single 64-bit uniform slot. + * An instruction may access no more than 64-bits of combined uniforms and constants. + * An instruction may access no more than a single special immediate (e.g. lane_id). + * + * We validate these constraints. + * + * An instruction may only access a single page of (special or uniform) FAU. + * This constraint does not need explicit validation: since FAU slots are + * naturally aligned, they never cross page boundaries, so this condition is + * implied by only acesssing a single 64-bit slot. + */ + +struct fau_state { + signed uniform_slot; + bi_index buffer[2]; +}; + +static bool +fau_state_buffer(struct fau_state *fau, bi_index idx) +{ + for (unsigned i = 0; i < ARRAY_SIZE(fau->buffer); ++i) { + if (bi_is_word_equiv(fau->buffer[i], idx)) + return true; + else if (bi_is_null(fau->buffer[i])) { + fau->buffer[i] = idx; + return true; + } + } + + return false; +} + +static bool +fau_state_uniform(struct fau_state *fau, bi_index idx) +{ + /* Each slot is 64-bits. The low/high half is encoded as the offset of the + * bi_index, which we want to ignore. + */ + unsigned slot = (idx.value & 63); + + if (fau->uniform_slot < 0) + fau->uniform_slot = slot; + + return fau->uniform_slot == slot; +} + +static bool +fau_is_special(enum bir_fau fau) +{ + return !(fau & (BIR_FAU_UNIFORM | BIR_FAU_IMMEDIATE)); +} + +static bool +fau_state_special(struct fau_state *fau, bi_index idx) +{ + for (unsigned i = 0; i < ARRAY_SIZE(fau->buffer); ++i) { + bi_index buf = fau->buffer[i]; + bool special = !bi_is_null(buf) && fau_is_special(buf.value); + + if (special && !bi_is_equiv(buf, idx)) + return false; + } + + return true; +} + +static bool +valid_src(struct fau_state *fau, unsigned fau_page, bi_index src) +{ + if (src.type != BI_INDEX_FAU) + return true; + + bool valid = (fau_page == va_fau_page(src.value)); + valid &= fau_state_buffer(fau, src); + + if (src.value & BIR_FAU_UNIFORM) + valid &= fau_state_uniform(fau, src); + else if (fau_is_special(src.value)) + valid &= fau_state_special(fau, src); + + return valid; +} + +bool +va_validate_fau(bi_instr *I) +{ + bool valid = true; + struct fau_state fau = { .uniform_slot = -1 }; + unsigned fau_page = va_select_fau_page(I); + + bi_foreach_src(I, s) { + valid &= valid_src(&fau, fau_page, I->src[s]); + } + + return valid; +} + +void +va_repair_fau(bi_builder *b, bi_instr *I) +{ + struct fau_state fau = { .uniform_slot = -1 }; + unsigned fau_page = va_select_fau_page(I); + + bi_foreach_src(I, s) { + struct fau_state push = fau; + bi_index src = I->src[s]; + + if (!valid_src(&fau, fau_page, src)) { + bi_index copy = bi_mov_i32(b, bi_strip_index(src)); + I->src[s] = bi_replace_index(src, copy); + + /* Rollback update. Since the replacement move doesn't affect FAU + * state, there is no need to call valid_src again. + */ + fau = push; + } + } +} + +void +va_validate(FILE *fp, bi_context *ctx) +{ + bool errors = false; + + bi_foreach_instr_global(ctx, I) { + if (!va_validate_fau(I)) { + if (!errors) { + fprintf(fp, "Validation failed, this is a bug. Shader:\n\n"); + bi_print_shader(ctx, fp); + fprintf(fp, "Offending code:\n"); + } + + bi_print_instr(I, fp); + fprintf(fp, "\n"); + errors = true; + } + } + + if (errors) + exit(1); +} -- GitLab From b8f912e5471da69cbe4b580195f8267ef45e5bb8 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jul 2021 11:21:35 -0400 Subject: [PATCH 23/31] pan/va: Validate FAU before packing These are pre-conditions required for packing. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/va_pack.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index 95f088c79f54..fc7fea6ae0de 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -836,6 +836,8 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) { unsigned orig_size = emission->size; + va_validate(stderr, ctx); + bi_foreach_block(ctx, block) { bi_foreach_instr_in_block(block, I) { unsigned flow = va_pack_flow(block, I); -- GitLab From b796d32564fd5aadc3c0d95011813df237363e82 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 27 Jul 2021 14:17:14 -0400 Subject: [PATCH 24/31] pan/va: Add constant lowering pass Valhall has a lookup table for common constants. Add a pass to take advantage of it, lowering away immediate indices. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_compiler.h | 10 + .../bifrost/valhall/va_lower_constants.c | 179 ++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/va_lower_constants.c diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 81eb62132539..0bc5222c1755 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -45,6 +45,7 @@ libpanfrost_bifrost_files = files( 'bi_validate.c', 'bir.c', 'bifrost_compile.c', + 'valhall/va_lower_constants.c', 'valhall/va_optimize.c', 'valhall/va_pack.c', 'valhall/va_validate.c', diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index 1d2a76a3a805..59b7634e092c 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -38,6 +38,7 @@ bool va_validate_fau(bi_instr *I); void va_validate(FILE *fp, bi_context *ctx); void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); +void va_lower_constants(bi_context *ctx, bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); static inline unsigned @@ -79,6 +80,15 @@ va_select_fau_page(const bi_instr *I) return 0; } +/** Cycle model for Valhall. Results need to be normalized */ +struct va_stats { + /** Counts per pipe */ + unsigned fma, cvt, sfu, v, ls, t; +}; + +void +va_count_instr_stats(bi_instr *I, struct va_stats *stats); + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/panfrost/bifrost/valhall/va_lower_constants.c b/src/panfrost/bifrost/valhall/va_lower_constants.c new file mode 100644 index 000000000000..ede38e871ee8 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_lower_constants.c @@ -0,0 +1,179 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "bi_builder.h" + +/* Only some special immediates are available, as specified in the Table of + * Immediates in the specification. Other immediates must be lowered, either to + * uniforms or to moves. + */ + +static bi_index +va_mov_imm(bi_builder *b, uint32_t imm) +{ + bi_index zero = bi_fau(BIR_FAU_IMMEDIATE | 0, false); + return bi_iadd_imm_i32(b, zero, imm); +} + +static bi_index +va_lut_index_32(uint32_t imm) +{ + for (unsigned i = 0; i < ARRAY_SIZE(valhall_immediates); ++i) { + if (valhall_immediates[i] == imm) + return va_lut(i); + } + + return bi_null(); +} + +static bi_index +va_lut_index_16(uint16_t imm) +{ + uint16_t *arr16 = (uint16_t *) valhall_immediates; + + for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) { + if (arr16[i] == imm) + return bi_half(va_lut(i >> 1), i & 1); + } + + return bi_null(); +} + +UNUSED static bi_index +va_lut_index_8(uint8_t imm) +{ + uint8_t *arr8 = (uint8_t *) valhall_immediates; + + for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) { + if (arr8[i] == imm) + return bi_byte(va_lut(i >> 2), i & 3); + } + + return bi_null(); +} + +static bi_index +va_demote_constant_fp16(uint32_t value) +{ + uint16_t fp16 = _mesa_float_to_half(uif(value)); + + /* Only convert if it is exact */ + if (fui(_mesa_half_to_float(fp16)) == value) + return va_lut_index_16(fp16); + else + return bi_null(); +} + +static bi_index +va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool staging) +{ + /* Try the constant as-is */ + if (!staging) { + bi_index lut = va_lut_index_32(value); + if (!bi_is_null(lut)) return lut; + } + + /* Try using a single half of a FP16 constant */ + bool replicated_halves = (value & 0xFFFF) == (value >> 16); + if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) { + bi_index lut = va_lut_index_16(value & 0xFFFF); + if (!bi_is_null(lut)) return lut; + } + + /* TODO: Distinguish sign extend from zero extend */ +#if 0 + /* Try zero-extending a single byte */ + if (!staging && info.widen && value <= UINT8_MAX) { + bi_index lut = va_lut_index_8(value); + if (!bi_is_null(lut)) return lut; + } + + /* Try zero-extending a single halfword */ + if (!staging && info.widen && value <= UINT16_MAX) { + bi_index lut = va_lut_index_16(value); + if (!bi_is_null(lut)) return lut; + } +#endif + + /* Try demoting the constant to FP16 */ + if (!staging && info.swizzle && info.size == VA_SIZE_32) { + bi_index lut = va_demote_constant_fp16(value); + if (!bi_is_null(lut)) return lut; + } + + /* TODO: Optimize to uniform */ + return va_mov_imm(b, value); +} + +void +va_lower_constants(bi_context *ctx, bi_instr *I) +{ + bi_builder b = bi_init_builder(ctx, bi_before_instr(I)); + + bi_foreach_src(I, s) { + if (I->src[s].type == BI_INDEX_CONSTANT) { + /* abs(#c) is pointless, but -#c occurs in transcendental sequences */ + assert(!I->src[s].abs && "redundant .abs modifier"); + + bool staging = (s < valhall_opcodes[I->op].nr_staging_srcs); + struct va_src_info info = va_src_info(I->op, s); + uint32_t value = I->src[s].value; + enum bi_swizzle swz = I->src[s].swizzle; + + /* Resolve any swizzle, keeping in mind the different interpretations + * swizzles in different contexts. + */ + if (info.size == VA_SIZE_32) { + /* Extracting a half from the 32-bit value */ + if (swz == BI_SWIZZLE_H00) + value = (value & 0xFFFF); + else if (swz == BI_SWIZZLE_H11) + value = (value >> 16); + else + assert(swz == BI_SWIZZLE_H01); + + /* FP16 -> FP32 */ + if (info.swizzle && swz != BI_SWIZZLE_H01) + value = fui(_mesa_half_to_float(value)); + } else if (info.size == VA_SIZE_16) { + assert(swz >= BI_SWIZZLE_H00 && swz <= BI_SWIZZLE_H11); + value = bi_apply_swizzle(value, swz); + } else if (info.size == VA_SIZE_8 && info.lanes) { + /* 8-bit extract */ + unsigned chan = (swz - BI_SWIZZLE_B0000); + assert(chan < 4); + + value = (value >> (8 * chan)) & 0xFF; + } else { + /* TODO: Any other special handling? */ + value = bi_apply_swizzle(value, swz); + } + + bi_index cons = va_resolve_constant(&b, value, info, staging); + cons.neg ^= I->src[s].neg; + I->src[s] = cons; + } + } +} -- GitLab From 9a9b20e6526809fab8d3bb021c85bcec3720620e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jul 2021 16:35:06 -0400 Subject: [PATCH 25/31] pan/va: Add instruction selection lowering pass Valhall removes certain instructions from Bifrost, requiring a canonical lowering. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_compiler.h | 1 + src/panfrost/bifrost/valhall/va_lower_isel.c | 116 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/va_lower_isel.c diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 0bc5222c1755..c5de4927d199 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -46,6 +46,7 @@ libpanfrost_bifrost_files = files( 'bir.c', 'bifrost_compile.c', 'valhall/va_lower_constants.c', + 'valhall/va_lower_isel.c', 'valhall/va_optimize.c', 'valhall/va_pack.c', 'valhall/va_validate.c', diff --git a/src/panfrost/bifrost/valhall/va_compiler.h b/src/panfrost/bifrost/valhall/va_compiler.h index 59b7634e092c..b346561b875a 100644 --- a/src/panfrost/bifrost/valhall/va_compiler.h +++ b/src/panfrost/bifrost/valhall/va_compiler.h @@ -39,6 +39,7 @@ void va_validate(FILE *fp, bi_context *ctx); void va_repair_fau(bi_builder *b, bi_instr *I); void va_fuse_add_imm(bi_instr *I); void va_lower_constants(bi_context *ctx, bi_instr *I); +void va_lower_isel(bi_instr *I); uint64_t va_pack_instr(const bi_instr *I, unsigned flow); static inline unsigned diff --git a/src/panfrost/bifrost/valhall/va_lower_isel.c b/src/panfrost/bifrost/valhall/va_lower_isel.c new file mode 100644 index 000000000000..4ad22f613c4b --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_lower_isel.c @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "bi_builder.h" + +void +va_lower_isel(bi_instr *I) +{ + switch (I->op) { + + /* Integer addition has swizzles and addition with 0 is canonical swizzle */ + case BI_OPCODE_SWZ_V2I16: + I->op = BI_OPCODE_IADD_V2U16; + I->src[1] = bi_zero(); + break; + + case BI_OPCODE_SWZ_V4I8: + I->op = BI_OPCODE_IADD_V4U8; + I->src[1] = bi_zero(); + break; + + /* Needs to output the coverage mask */ + case BI_OPCODE_DISCARD_F32: + assert(bi_is_null(I->dest[0])); + I->dest[0] = bi_register(60); + break; + + /* Extra source in Valhall not yet modeled in the Bifrost IR */ + case BI_OPCODE_ICMP_I32: + I->op = BI_OPCODE_ICMP_U32; + I->src[2] = bi_zero(); + break; + + case BI_OPCODE_ICMP_V2I16: + I->op = BI_OPCODE_ICMP_V2U16; + I->src[2] = bi_zero(); + break; + + case BI_OPCODE_ICMP_V4I8: + I->op = BI_OPCODE_ICMP_V4U8; + I->src[2] = bi_zero(); + break; + + case BI_OPCODE_ICMP_U32: + case BI_OPCODE_ICMP_V2U16: + case BI_OPCODE_ICMP_V4U8: + case BI_OPCODE_ICMP_S32: + case BI_OPCODE_ICMP_V2S16: + case BI_OPCODE_ICMP_V4S8: + case BI_OPCODE_FCMP_F32: + case BI_OPCODE_FCMP_V2F16: + I->src[2] = bi_zero(); + break; + + /* Integer CSEL must have a signedness */ + case BI_OPCODE_CSEL_I32: + case BI_OPCODE_CSEL_V2I16: + assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE); + + I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 : + BI_OPCODE_CSEL_V2U16; + break; + + /* Jump -> conditional branch with condition tied to true. */ + case BI_OPCODE_JUMP: + I->op = I->branch_target ? BI_OPCODE_BRANCHZ_I16 : BI_OPCODE_BRANCHZI; + I->src[1] = I->src[0]; + I->src[0] = bi_zero(); + I->cmpf = BI_CMPF_EQ; + break; + + case BI_OPCODE_AXCHG_I32: + I->op = BI_OPCODE_ATOM_RETURN_I32; + I->atom_opc = BI_ATOM_OPC_AXCHG; + I->sr_count = 1; + break; + + case BI_OPCODE_ACMPXCHG_I32: + I->op = BI_OPCODE_ATOM_RETURN_I32; + I->atom_opc = BI_ATOM_OPC_ACMPXCHG; + /* Reads 2, this is special cased in bir.c */ + I->sr_count = 1; + break; + + case BI_OPCODE_ATOM_RETURN_I32: + if (bi_is_null(I->dest[0])) + I->op = BI_OPCODE_ATOM_I32; + + break; + + default: + break; + } +} -- GitLab From 1745c893124c6a8e554d2a42ad9e8ae05841b075 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 26 Jul 2021 18:05:39 -0400 Subject: [PATCH 26/31] pan/va: Lower branch offsets Logic is lifted from bi_layout.c, adapted to work on instructions (not clauses) and for Valhall's off-by-one semantic which is annoyingly different than Bifrost. (But the same as Midgard -- Bifrost was annoyingly different than Midgard!) Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/va_pack.c | 74 ++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index fc7fea6ae0de..e7bd014fa028 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -831,6 +831,77 @@ va_pack_flow(bi_block *block, bi_instr *I) return VA_FLOW_NONE; } +static unsigned +va_instructions_in_block(bi_block *block) +{ + unsigned offset = 0; + + bi_foreach_instr_in_block(block, _) { + offset++; + } + + return offset; +} + +/* Calculate branch_offset from a branch_target for a direct relative branch */ + +static void +va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I) +{ + /* Precondition: unlowered relative branch */ + bi_block *target = I->branch_target; + assert(target != NULL); + + /* Signed since we might jump backwards */ + signed offset = 0; + + /* Determine if the target block is strictly greater in source order */ + bool forwards = target->name > start->name; + + if (forwards) { + /* We have to jump through this block */ + bi_foreach_instr_in_block_from(start, _, I) { + offset++; + } + + /* We then need to jump over every following block until the target */ + bi_foreach_block_from(ctx, start, blk) { + /* End just before the target */ + if (blk == target) + break; + + /* Count other blocks */ + if (blk != start) + offset += va_instructions_in_block(blk); + } + } else { + /* Jump through the beginning of this block */ + bi_foreach_instr_in_block_from_rev(start, ins, I) { + if (ins != I) + offset--; + } + + /* Jump over preceding blocks up to and including the target to get to + * the beginning of the target */ + bi_foreach_block_from_rev(ctx, start, blk) { + if (blk == start) + continue; + + offset -= va_instructions_in_block(blk); + + /* End just after the target */ + if (blk == target) + break; + } + } + + /* Offset is relative to the next instruction, so bias */ + offset--; + + /* Update the instruction */ + I->branch_offset = offset; +} + void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) { @@ -840,6 +911,9 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) bi_foreach_block(ctx, block) { bi_foreach_instr_in_block(block, I) { + if (I->op == BI_OPCODE_BRANCHZ_I16) + va_lower_branch_target(ctx, block, I); + unsigned flow = va_pack_flow(block, I); uint64_t hex = va_pack_instr(I, flow); util_dynarray_append(emission, uint64_t, hex); -- GitLab From 8a258a685c9ef98ff4967bd7bc777f03432a5eb7 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jul 2021 16:36:04 -0400 Subject: [PATCH 27/31] pan/va: Test instruction selection lowerings Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 1 + .../bifrost/valhall/test/test-lower-isel.cpp | 108 ++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/test/test-lower-isel.cpp diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index c5de4927d199..ffd17f75ce1f 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -165,6 +165,7 @@ if with_tests 'test/test-scheduler-predicates.cpp', 'valhall/test/test-add-imm.cpp', 'valhall/test/test-validate-fau.cpp', + 'valhall/test/test-lower-isel.cpp', ), c_args : [c_msvc_compat_args, no_override_init_args], gnu_symbol_visibility : 'hidden', diff --git a/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp new file mode 100644 index 000000000000..f142e116d2cf --- /dev/null +++ b/src/panfrost/bifrost/valhall/test/test-lower-isel.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "bi_test.h" +#include "bi_builder.h" + +#include + +static inline void +case_cb(bi_context *ctx) +{ + bi_foreach_instr_global(ctx, I) { + va_lower_isel(I); + } +} + +#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, case_cb) +#define NEGCASE(instr) CASE(instr, instr) + +class LowerIsel : public testing::Test { +protected: + LowerIsel() { + mem_ctx = ralloc_context(NULL); + reg = bi_register(1); + } + + ~LowerIsel() { + ralloc_free(mem_ctx); + } + + void *mem_ctx; + bi_index reg; +}; + +TEST_F(LowerIsel, 8BitSwizzles) { + for (unsigned i = 0; i < 4; ++i) { + CASE(bi_swz_v4i8_to(b, reg, bi_byte(reg, i)), + bi_iadd_v4u8_to(b, reg, bi_byte(reg, i), bi_zero(), false)); + } +} + +TEST_F(LowerIsel, 16BitSwizzles) { + for (unsigned i = 0; i < 2; ++i) { + for (unsigned j = 0; j < 2; ++j) { + CASE(bi_swz_v2i16_to(b, reg, bi_swz_16(reg, i, j)), + bi_iadd_v2u16_to(b, reg, bi_swz_16(reg, i, j), bi_zero(), false)); + } + } +} + +TEST_F(LowerIsel, DiscardImplicitR60) { + CASE(bi_discard_f32(b, reg, reg, BI_CMPF_EQ), { + bi_instr *I = bi_discard_f32(b, reg, reg, BI_CMPF_EQ); + I->dest[0] = bi_register(60); + }); +} + +TEST_F(LowerIsel, JumpsLoweredToBranches) { + bi_block block = { }; + + CASE({ + bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF)); + I->branch_target = █ + }, { + bi_instr *I = bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ); + I->branch_target = █ + }); +} + +TEST_F(LowerIsel, IndirectJumpsLoweredToBranches) { + CASE(bi_jump(b, bi_register(17)), + bi_branchzi(b, bi_zero(), bi_register(17), BI_CMPF_EQ)); +} + +TEST_F(LowerIsel, IntegerCSEL) { + CASE(bi_csel_i32(b, reg, reg, reg, reg, BI_CMPF_EQ), + bi_csel_u32(b, reg, reg, reg, reg, BI_CMPF_EQ)); + + CASE(bi_csel_v2i16(b, reg, reg, reg, reg, BI_CMPF_EQ), + bi_csel_v2u16(b, reg, reg, reg, reg, BI_CMPF_EQ)); +} + +TEST_F(LowerIsel, Smoke) { + NEGCASE(bi_fadd_f32_to(b, reg, reg, reg, BI_ROUND_RTP)); + NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT)); + NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT)); +} -- GitLab From 8bc268f2d54895d16a132d4265aebaa05b646050 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 2 Aug 2021 17:11:03 -0400 Subject: [PATCH 28/31] pan/va: Implement the cycle model Will feed into shader-db reporting, and maybe other things eventually. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 1 + src/panfrost/bifrost/valhall/va_perf.c | 77 ++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/va_perf.c diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index ffd17f75ce1f..4fe9909e3c52 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -49,6 +49,7 @@ libpanfrost_bifrost_files = files( 'valhall/va_lower_isel.c', 'valhall/va_optimize.c', 'valhall/va_pack.c', + 'valhall/va_perf.c', 'valhall/va_validate.c', ) diff --git a/src/panfrost/bifrost/valhall/va_perf.c b/src/panfrost/bifrost/valhall/va_perf.c new file mode 100644 index 000000000000..7175302bf258 --- /dev/null +++ b/src/panfrost/bifrost/valhall/va_perf.c @@ -0,0 +1,77 @@ + +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "valhall.h" +#include "bi_builder.h" + +void +va_count_instr_stats(bi_instr *I, struct va_stats *stats) +{ + /* Adjusted for 64-bit arithmetic */ + unsigned words = bi_count_write_registers(I, 0); + + switch (valhall_opcodes[I->op].unit) { + /* Arithmetic is 2x slower for 64-bit than 32-bit */ + case VA_UNIT_FMA: + stats->fma += words; + return; + + case VA_UNIT_CVT: + stats->cvt += words; + return; + + case VA_UNIT_SFU: + stats->sfu += words; + return; + + /* Varying is scaled by 16-bit components interpolated */ + case VA_UNIT_V: + stats->v += (I->vecsize + 1) * + (bi_is_regfmt_16(I->register_format) ? 1 : 2); + return; + + /* We just count load/store and texturing for now */ + case VA_UNIT_LS: + stats->ls++; + return; + + case VA_UNIT_T: + stats->t++; + return; + + /* Fused varying+texture loads 2 FP32 components of varying for texture + * coordinates and then textures */ + case VA_UNIT_VT: + stats->ls += (2 * 2); + stats->t++; + return; + + /* Nothing to do here */ + case VA_UNIT_NONE: + return; + } + + unreachable("Invalid unit"); +} -- GitLab From 18bf478f1e864412792f26b19b63057d663976e9 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 2 Aug 2021 12:30:54 -0400 Subject: [PATCH 29/31] pan/va: Add shader-db support Reports the common subset from Bifrost, as well as Mali offline compiler style normalized cycle counts. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/bifrost_compile.c | 57 +++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 88e7f1bb1294..9f6e4c85e5f9 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -30,6 +30,7 @@ #include "util/u_debug.h" #include "disassemble.h" +#include "valhall/va_compiler.h" #include "valhall/disassemble.h" #include "bifrost_compile.h" #include "compiler.h" @@ -3425,6 +3426,56 @@ bi_print_stats(bi_context *ctx, unsigned size, FILE *fp) ralloc_free(str); } +static void +va_print_stats(bi_context *ctx, unsigned size, FILE *fp) +{ + unsigned nr_ins = 0; + struct va_stats stats = { 0 }; + + /* Count instructions */ + bi_foreach_instr_global(ctx, I) { + nr_ins++; + va_count_instr_stats(I, &stats); + } + + /* Mali G78 peak performance: + * + * 64 FMA instructions per cycle + * 64 CVT instructions per cycle + * 16 SFU instructions per cycle + * 8 x 32-bit varying channels interpolated per cycle + * 4 texture instructions per cycle + * 1 load/store operation per cycle + */ + + float cycles_fma = ((float) stats.fma) / 64.0; + float cycles_cvt = ((float) stats.cvt) / 64.0; + float cycles_sfu = ((float) stats.sfu) / 16.0; + float cycles_v = ((float) stats.v) / 16.0; + float cycles_t = ((float) stats.t) / 4.0; + float cycles_ls = ((float) stats.ls) / 1.0; + + /* Calculate the bound */ + float cycles = MAX2( + MAX3(cycles_fma, cycles_cvt, cycles_sfu), + MAX3(cycles_v, cycles_t, cycles_ls)); + + + /* Thread count and register pressure are traded off */ + unsigned nr_threads = (ctx->info.work_reg_count <= 32) ? 2 : 1; + + /* Dump stats */ + fprintf(stderr, "%s - %s shader: " + "%u inst, %f cycles, %f fma, %f cvt, %f sfu, %f v, " + "%f t, %f ls, %u quadwords, %u threads, %u loops, " + "%u:%u spills:fills\n", + ctx->nir->info.label ?: "", + bi_shader_stage_name(ctx), + nr_ins, cycles, cycles_fma, cycles_cvt, cycles_sfu, + cycles_v, cycles_t, cycles_ls, size / 16, nr_threads, + ctx->loop_count, ctx->spills, ctx->fills); +} + static int glsl_type_size(const struct glsl_type *type, bool bindless) { @@ -4198,7 +4249,11 @@ bi_compile_variant_nir(nir_shader *nir, if ((bifrost_debug & BIFROST_DBG_SHADERDB || inputs->shaderdb) && !skip_internal) { - bi_print_stats(ctx, binary->size - offset, stderr); + if (ctx->arch >= 9) { + va_print_stats(ctx, binary->size - offset, stderr); + } else { + bi_print_stats(ctx, binary->size - offset, stderr); + } } return ctx; -- GitLab From cb76cc1f1d0d93ff1e586f029827735b900964b1 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 19 Nov 2021 15:38:04 -0500 Subject: [PATCH 30/31] pan/va: Add packing unit tests Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/meson.build | 1 + .../bifrost/valhall/test/test-packing.cpp | 318 ++++++++++++++++++ 2 files changed, 319 insertions(+) create mode 100644 src/panfrost/bifrost/valhall/test/test-packing.cpp diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index 4fe9909e3c52..c795017c6fd6 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -167,6 +167,7 @@ if with_tests 'valhall/test/test-add-imm.cpp', 'valhall/test/test-validate-fau.cpp', 'valhall/test/test-lower-isel.cpp', + 'valhall/test/test-packing.cpp', ), c_args : [c_msvc_compat_args, no_override_init_args], gnu_symbol_visibility : 'hidden', diff --git a/src/panfrost/bifrost/valhall/test/test-packing.cpp b/src/panfrost/bifrost/valhall/test/test-packing.cpp new file mode 100644 index 000000000000..ba597ddf7eaa --- /dev/null +++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "va_compiler.h" +#include "bi_test.h" +#include "bi_builder.h" + +#include + +#define CASE(instr, expected) do { \ + uint64_t _value = va_pack_instr(instr, 0); \ + if (_value != expected) { \ + fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, (uint64_t) expected); \ + bi_print_instr(instr, stderr); \ + fprintf(stderr, "\n"); \ + ADD_FAILURE(); \ + } \ +} while(0) + +class ValhallPacking : public testing::Test { +protected: + ValhallPacking() { + mem_ctx = ralloc_context(NULL); + b = bit_builder(mem_ctx); + + zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false); + one = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 8), false); + n4567 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 4), true); + } + + ~ValhallPacking() { + ralloc_free(mem_ctx); + } + + void *mem_ctx; + bi_builder *b; + bi_index zero, one, n4567; +}; + +TEST_F(ValhallPacking, Moves) { + CASE(bi_mov_i32_to(b, bi_register(1), bi_register(2)), + 0x0091c10000000002ULL); + CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false)), + 0x0091c1000000008aULL); +} + +TEST_F(ValhallPacking, Fadd) { + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2), BI_ROUND_NONE), + 0x00a4c00000000201ULL); + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2)), BI_ROUND_NONE), + 0x00a4c02000000201ULL); + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2)), BI_ROUND_NONE), + 0x00a4c01000000201ULL); + + CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false), + bi_swz_16(bi_register(0), true, true), BI_ROUND_NONE), + 0x00a5c0000c000001ULL); + + CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0), BI_ROUND_NONE), + 0x00a5c00028000001ULL); + + CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), + bi_swz_16(bi_register(0), true, false), BI_ROUND_NONE), + 0x00a5c00024000001ULL); + + CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))), + bi_neg(zero), BI_ROUND_NONE), + 0x00a5c0902800c040ULL); + + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), + zero, BI_ROUND_NONE), + 0x00a4c0000000c001ULL); + + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), + bi_neg(zero), BI_ROUND_NONE), + 0x00a4c0100000c001ULL); + + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), + bi_half(bi_register(0), true), BI_ROUND_NONE), + 0x00a4c00008000001ULL); + + CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), + bi_half(bi_register(0), false), BI_ROUND_NONE), + 0x00a4c00004000001ULL); +} + +TEST_F(ValhallPacking, Clper) { + CASE(bi_clper_i32_to(b, bi_register(0), bi_register(0), bi_byte(n4567, 0), + BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16), + 0x00a0c030128fc900); +} + +TEST_F(ValhallPacking, Clamps) { + bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1), + bi_neg(bi_abs(bi_register(2))), + BI_ROUND_NONE); + CASE(I, 0x00a4c03000000201ULL); + + I->clamp = BI_CLAMP_CLAMP_M1_1; + CASE(I, 0x00a4c03200000201ULL); +} + +TEST_F(ValhallPacking, Misc) { + CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false), + bi_neg(zero), BI_ROUND_NONE), + 0x00b2c10400c08841ULL); + + CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))), + BI_ROUND_RTN), + 0x0090c240800d0042ULL); + + CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0), + BI_ROUND_RTN), + 0x00904000a00f0000ULL); + + CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), + bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN), + 0x00904000900f0001ULL); +} + +TEST_F(ValhallPacking, FaddImm) { + CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0), + 0x0114C24847C6C042ULL); + + CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784), + 0x0115C270AC678442ULL); +} + +TEST_F(ValhallPacking, Comparions) { + bi_instr *I = + bi_icmp_v2s16_to(b, bi_register(2), + bi_discard(bi_swz_16(bi_register(3), true, false)), + bi_discard(bi_swz_16(bi_register(2), true, false)), + BI_CMPF_GT, + BI_RESULT_TYPE_M1); + I->src[2] = zero; // TODO: model in the IR + + CASE(I, 0x00f9c21184c04243); + + I->op = BI_OPCODE_FCMP_V2F16; + I->src[1] = bi_discard(bi_swz_16(bi_register(2), false, false)); + CASE(I, 0x00f5c20190c04243); +} + +TEST_F(ValhallPacking, Conversions) { + CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), BI_ROUND_NONE), + 0x0090c22000070042); +} + +TEST_F(ValhallPacking, BranchzI16) { + bi_instr *I = bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ); + I->branch_offset = 1; + CASE(I, 0x001fc03000000102); +} + +TEST_F(ValhallPacking, BranchzI16Backwards) { + bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ); + I->branch_offset = -8; + CASE(I, 0x001fc017fffff8c0); +} + +TEST_F(ValhallPacking, Blend) { + CASE(bi_blend_to(b, bi_null(), bi_register(0), bi_register(60), + bi_fau(BIR_FAU_BLEND_0, false), + bi_fau(BIR_FAU_BLEND_0, true), + bi_null(), BI_REGISTER_FORMAT_F16, 2, 0), + 0x007f4004333c00f0); +} + +TEST_F(ValhallPacking, Mux) { + CASE(bi_mux_i32_to(b, bi_register(0), bi_discard(bi_register(0)), + bi_discard(bi_register(4)), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT), + 0x00b8c00300804440ull); +} + +TEST_F(ValhallPacking, AtestFP16) { + bi_instr *I = bi_atest_to(b, bi_register(60), bi_register(60), + bi_half(bi_register(1), true)); + I->src[2] = bi_fau(BIR_FAU_ATEST_PARAM, false); + + CASE(I, 0x007dbc0208ea013c); +} + +TEST_F(ValhallPacking, AtestFP32) { + bi_instr *I = bi_atest_to(b, bi_register(60), bi_register(60), one); + I->src[2] = bi_fau(BIR_FAU_ATEST_PARAM, false); + CASE(I, 0x007dbc0200ead03c); +} + +TEST_F(ValhallPacking, Transcendentals) { + CASE(bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true), + 0x0099c10001000000); + + CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true), + 0x0099c00001020040); + + CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), + 0x009cc20000020001); + + CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_ROUND_NONE, BI_SPECIAL_LEFT), + 0x0162c00440c04241); +} + +TEST_F(ValhallPacking, Csel) { + CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)), + bi_discard(bi_register(3)), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true), + BI_CMPF_EQ), + 0x0150c10085844342); + + CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)), + bi_discard(bi_register(3)), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true), + BI_CMPF_LT), + 0x0150c10485844342); + + CASE(bi_csel_s32_to(b, bi_register(1), bi_discard(bi_register(2)), + bi_discard(bi_register(3)), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false), + bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true), + BI_CMPF_LT), + 0x0158c10485844342); +} + +TEST_F(ValhallPacking, LdAttrImm) { + bi_instr *I = bi_ld_attr_imm_to(b, bi_register(0), + bi_discard(bi_register(60)), + bi_discard(bi_register(61)), + BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1); + I->table = 1; + + CASE(I, 0x0066800433117d7c); +} + +TEST_F(ValhallPacking, LdVarBufImmF16) { + CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61), + BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER, + BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0), + 0x005d82143300003d); + + CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), + BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE, + BI_UPDATE_STORE, BI_VECSIZE_V4, 0), + 0x005d80843300003d); + + CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), + BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID, + BI_UPDATE_STORE, BI_VECSIZE_V4, 8), + 0x005d80443308003d); +} + +TEST_F(ValhallPacking, LeaBufImm) { + CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))), + 0x005e840400000d7b); +} + +TEST_F(ValhallPacking, StoreSegment) { + CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)), + bi_discard(bi_register(5)), BI_SEG_VARY, 0), + 0x0061400632000044); +} + +TEST_F(ValhallPacking, Convert16To32) { + CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))), + 0x0090c20000140077); + + CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))), + 0x0090c20010140077); + + CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))), + 0x0090c20000150077); + + CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))), + 0x0090c20010150077); + + CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))), + 0x0090c20000040077); + + CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))), + 0x0090c20010040077); +} + +TEST_F(ValhallPacking, Swizzle8) { + bi_instr *I = bi_icmp_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), + zero, BI_CMPF_NE, BI_RESULT_TYPE_I1); + I->src[2] = zero; // TODO: model in the IR + + CASE(I, 0x00f2c14300c0c000); +} + +TEST_F(ValhallPacking, FauPage1) { + CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 32), false)), + 0x0291c10000000080ULL); +} -- GitLab From f31208f778c21b308aa5bb3f9b48915c0718b858 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Mar 2022 11:47:47 -0400 Subject: [PATCH 31/31] pan/va: Lower BLEND to call blend shaders Do this as late as possible. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/valhall/va_pack.c | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index e7bd014fa028..6ab225321f7f 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -24,6 +24,7 @@ #include "va_compiler.h" #include "valhall.h" #include "valhall_enums.h" +#include "bi_builder.h" /* This file contains the final passes of the compiler. Running after * scheduling and RA, the IR is now finalized, so we need to emit it to actual @@ -902,6 +903,55 @@ va_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I) I->branch_offset = offset; } +/* + * Late lowering to insert blend shader calls after BLEND instructions. Required + * to support blend shaders, so this pass may be omitted if it is known that + * blend shaders are never used. + * + * This lowering runs late because it introduces control flow changes without + * modifying the control flow graph. It hardcodes registers, meaning running + * after RA makes sense. Finally, it hardcodes a manually sized instruction + * sequence, requiring it to run after scheduling. + * + * As it is Valhall specific, running it as a pre-pack lowering is sensible. + */ +static void +va_lower_blend(bi_context *ctx) +{ + bool last_blend = true; + + /* Link register (ABI between fragment and blend shaders) */ + bi_index lr = bi_register(48); + + /* Program counter for *next* instruction */ + bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false); + + bi_foreach_instr_global_rev(ctx, I) { + if (I->op != BI_OPCODE_BLEND) + continue; + + bi_builder b = bi_init_builder(ctx, bi_after_instr(I)); + + unsigned prolog_length = 2 * 8; + + if (last_blend) + bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0); + else + bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8); + + bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ); + + /* For fixed function: skip the prologue, or return */ + if (last_blend) + I->flow = 0x7 | 0x8; /* .return */ + else + I->branch_offset = prolog_length; + + /* Iterate backwards makes the last BLEND easy to identify */ + last_blend = false; + } +} + void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) { @@ -909,6 +959,10 @@ bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) va_validate(stderr, ctx); + /* Late lowering */ + if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) + va_lower_blend(ctx); + bi_foreach_block(ctx, block) { bi_foreach_instr_in_block(block, I) { if (I->op == BI_OPCODE_BRANCHZ_I16) -- GitLab