diff --git a/src/panfrost/bifrost/bi_validate.c b/src/panfrost/bifrost/bi_validate.c new file mode 100644 index 0000000000000000000000000000000000000000..d6eab037543fa0180a3fb497f9643a9a30f0f96b --- /dev/null +++ b/src/panfrost/bifrost/bi_validate.c @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2021 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "util/u_memory.h" + +/* Validatation doesn't make sense in release builds */ +#ifndef NDEBUG + +/* Validate that all sources are initialized in all read components. This is + * required for correct register allocation. We check a weaker condition, that + * all sources that are read are written at some point (equivalently, the live + * set is empty at the start of the program). TODO: Strengthen */ + +bool +bi_validate_initialization(bi_context *ctx) +{ + bool success = true; + + /* Calculate the live set */ + bi_block *entry = bi_entry_block(ctx); + unsigned temp_count = bi_max_temp(ctx); + bi_invalidate_liveness(ctx); + bi_compute_liveness(ctx); + + /* Validate that the live set is indeed empty */ + for (unsigned i = 0; i < temp_count; ++i) { + if (entry->live_in[i] == 0) continue; + + fprintf(stderr, "%s%u\n", (i & PAN_IS_REG) ? "r" : "", i >> 1); + success = false; + } + + return success; +} + +void +bi_validate(bi_context *ctx, const char *after) +{ + bool fail = false; + + if (bifrost_debug & BIFROST_DBG_NOVALIDATE) + return; + + if (!bi_validate_initialization(ctx)) { + fprintf(stderr, "Uninitialized data read after %s\n", after); + fail = true; + } + + /* TODO: Validate more invariants */ + + if (fail) { + bi_print_shader(ctx, stderr); + exit(1); + } +} + +#endif /* NDEBUG */ diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h index 436404f0be0b62d7ce81c94caef4b7c0b872d5ec..74b110f204ca400828f0cf4e712ef45e23efa4b3 100644 --- a/src/panfrost/bifrost/bifrost.h +++ b/src/panfrost/bifrost/bifrost.h @@ -37,6 +37,7 @@ #define BIFROST_DBG_INTERNAL 0x0010 #define BIFROST_DBG_NOSCHED 0x0020 #define BIFROST_DBG_INORDER 0x0040 +#define BIFROST_DBG_NOVALIDATE 0x0080 extern int bifrost_debug; diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index a386999c507477977d2b81bbfebfc1e366e0dcda..afacef288319765a41fbfa1df748a1ffa0a8944b 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -45,6 +45,7 @@ static const struct debug_named_value bifrost_debug_options[] = { {"internal", BIFROST_DBG_INTERNAL, "Dump even internal shaders"}, {"nosched", BIFROST_DBG_NOSCHED, "Force trivial bundling"}, {"inorder", BIFROST_DBG_INORDER, "Force in-order bundling"}, + {"novalidate",BIFROST_DBG_NOVALIDATE, "Skip IR validation"}, DEBUG_NAMED_VALUE_END }; @@ -3179,6 +3180,62 @@ nir_invalidate_divergence(struct nir_builder *b, nir_instr *instr, return nir_foreach_ssa_def(instr, nir_invalidate_divergence_ssa, NULL); } +/* Ensure we write exactly 4 components */ +static nir_ssa_def * +bifrost_nir_valid_channel(nir_builder *b, nir_ssa_def *in, + unsigned channel, unsigned first, unsigned mask) +{ + if (!(mask & BITFIELD_BIT(channel))) + channel = first; + + return nir_channel(b, in, channel); +} + +/* Lower fragment store_output instructions to always write 4 components, + * matching the hardware semantic. This may require additional moves. Skipping + * these moves is possible in theory, but invokes undefined behaviour in the + * compiler. The DDK inserts these moves, so we will as well. */ + +static bool +bifrost_nir_lower_blend_components(struct nir_builder *b, + nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_store_output) + return false; + + nir_ssa_def *in = intr->src[0].ssa; + unsigned first = nir_intrinsic_component(intr); + unsigned mask = nir_intrinsic_write_mask(intr); + + assert(first == 0 && "shouldn't get nonzero components"); + + /* Nothing to do */ + if (mask == BITFIELD_MASK(4)) + return false; + + b->cursor = nir_before_instr(&intr->instr); + + /* Replicate the first valid component instead */ + nir_ssa_def *replicated = + nir_vec4(b, bifrost_nir_valid_channel(b, in, 0, first, mask), + bifrost_nir_valid_channel(b, in, 1, first, mask), + bifrost_nir_valid_channel(b, in, 2, first, mask), + bifrost_nir_valid_channel(b, in, 3, first, mask)); + + /* Rewrite to use our replicated version */ + nir_instr_rewrite_src_ssa(instr, &intr->src[0], replicated); + nir_intrinsic_set_component(intr, 0); + nir_intrinsic_set_write_mask(intr, 0xF); + intr->num_components = 4; + + return true; +} + static void bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) { @@ -3281,6 +3338,13 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS(progress, nir, bifrost_nir_lower_algebraic_late); NIR_PASS(progress, nir, nir_opt_dce); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_shader_instructions_pass, + bifrost_nir_lower_blend_components, + nir_metadata_block_index | nir_metadata_dominance, + NULL); + } + /* Backend scheduler is purely local, so do some global optimizations * to reduce register pressure. */ nir_move_options move_all = @@ -3596,6 +3660,8 @@ bifrost_compile_shader_nir(nir_shader *nir, block->name = block_source_count++; } + bi_validate(ctx, "NIR -> BIR"); + /* If the shader doesn't write any colour or depth outputs, it may * still need an ATEST at the very end! */ bool need_dummy_atest = @@ -3611,6 +3677,7 @@ bifrost_compile_shader_nir(nir_shader *nir, /* Runs before constant folding */ bi_lower_swizzle(ctx); + bi_validate(ctx, "Early lowering"); /* Runs before copy prop */ bi_opt_push_ubo(ctx); @@ -3622,6 +3689,7 @@ bifrost_compile_shader_nir(nir_shader *nir, bi_opt_dead_code_eliminate(ctx); bi_opt_cse(ctx); bi_opt_dead_code_eliminate(ctx); + bi_validate(ctx, "Optimization passes"); bi_foreach_block(ctx, block) { bi_lower_branch(block); @@ -3635,6 +3703,7 @@ bifrost_compile_shader_nir(nir_shader *nir, * skip bit is a function of only the data flow graph and is invariant * under valid scheduling. */ bi_analyze_helper_requirements(ctx); + bi_validate(ctx, "Late lowering"); bi_register_allocate(ctx); bi_opt_post_ra(ctx); diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c index fa45ee7d1f11d9369847f65e7827c9335081b084..06983363ae03b2ece679361da37a6c30af6a3bec 100644 --- a/src/panfrost/bifrost/bir.c +++ b/src/panfrost/bifrost/bir.c @@ -86,7 +86,10 @@ bi_count_staging_registers(const bi_instr *ins) unsigned bi_count_read_registers(const bi_instr *ins, unsigned s) { - if (s == 0 && bi_opcode_props[ins->op].sr_read) + /* PATOM_C reads 1 but writes 2 */ + if (s == 0 && ins->op == BI_OPCODE_PATOM_C_I32) + return 1; + else if (s == 0 && bi_opcode_props[ins->op].sr_read) return bi_count_staging_registers(ins); else return 1; @@ -102,6 +105,8 @@ bi_count_write_registers(const bi_instr *ins, unsigned d) return 4; else return bi_count_staging_registers(ins); + } else if (ins->op == BI_OPCODE_SEG_ADD_I64) { + return 2; } return 1; diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 02df7f8ebae20561877f33ef32266f66fb562625..311c99a27306276006960dd4a2a34d3469ce3e1c 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -862,6 +862,12 @@ bi_next_block(bi_block *block) return list_first_entry(&(block->link), bi_block, link); } +static inline bi_block * +bi_entry_block(bi_context *ctx) +{ + return list_first_entry(&ctx->blocks, bi_block, link); +} + /* BIR manipulation */ bool bi_has_arg(const bi_instr *ins, bi_index arg); @@ -904,6 +910,14 @@ bool bi_reads_zero(bi_instr *ins); bool bi_reads_temps(bi_instr *ins, unsigned src); bool bi_reads_t(bi_instr *ins, unsigned src); +#ifndef NDEBUG +bool bi_validate_initialization(bi_context *ctx); +void bi_validate(bi_context *ctx, const char *after_str); +#else +static inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; } +static inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; } +#endif + uint32_t bi_fold_constant(bi_instr *I, bool *unsupported); void bi_opt_constant_fold(bi_context *ctx); diff --git a/src/panfrost/bifrost/meson.build b/src/panfrost/bifrost/meson.build index e18d5a57239193e438666a30175b8a96b83feaaa..38f15391f453207c0b6a4655ce8fd748feb4715b 100644 --- a/src/panfrost/bifrost/meson.build +++ b/src/panfrost/bifrost/meson.build @@ -38,6 +38,7 @@ libpanfrost_bifrost_files = files( 'bi_ra.c', 'bi_schedule.c', 'bi_scoreboard.c', + 'bi_validate.c', 'bir.c', 'bifrost_compile.c', )