diff --git a/src/panfrost/bifrost/ISA.xml b/src/panfrost/bifrost/ISA.xml index e758f682551a6a7fbb8cea887e808af164a80103..8be9a1e88e03f59a60e67fb1f4f23157888d367c 100644 --- a/src/panfrost/bifrost/ISA.xml +++ b/src/panfrost/bifrost/ISA.xml @@ -6396,7 +6396,7 @@ - + @@ -8593,6 +8593,12 @@ u32 u16 + + flat32 + flat16 + f32 + f16 + center centroid @@ -8623,6 +8629,12 @@ u32 u16 + + flat32 + flat16 + f32 + f16 + center centroid @@ -8653,6 +8665,12 @@ u32 u16 + + flat32 + flat16 + f32 + f16 + center centroid @@ -8683,6 +8701,12 @@ u32 u16 + + flat32 + flat16 + f32 + f16 + center centroid diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 4ea115a15d3c6bf82123b9077777adb5f9d66001..b39e4b1eb5b3d9e30fc556dbfc150e9f91585b77 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -552,6 +552,9 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) b->shader->info.bifrost->uses_flat_shading = true; } + enum bi_source_format source_format = + smooth ? BI_SOURCE_FORMAT_F32 : BI_SOURCE_FORMAT_FLAT32; + nir_src *offset = nir_get_io_offset_src(instr); unsigned imm_index = 0; bool immediate = bi_is_intr_immediate(instr, &imm_index, 20); @@ -559,9 +562,9 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) if (b->shader->malloc_idvs && immediate) { /* Immediate index given in bytes. */ - bi_ld_var_buf_imm_f32_to(b, dest, src0, regfmt, sample, update, - vecsize, - bi_varying_offset(b->shader, instr)); + bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, + sample, source_format, update, vecsize, + bi_varying_offset(b->shader, instr)); } else if (immediate && smooth) { I = bi_ld_var_imm_to(b, dest, src0, regfmt, sample, update, vecsize, imm_index); @@ -582,8 +585,9 @@ bi_emit_load_vary(bi_builder *b, nir_intrinsic_instr *instr) if (vbase != 0) idx_bytes = bi_iadd_u32(b, idx, bi_imm_u32(vbase), false); - bi_ld_var_buf_f32_to(b, dest, src0, idx_bytes, regfmt, - sample, update, vecsize); + bi_ld_var_buf_to(b, sz, dest, src0, idx_bytes, regfmt, + sample, source_format, update, + vecsize); } else if (smooth) { if (base != 0) idx = bi_iadd_u32(b, idx, bi_imm_u32(base), false); @@ -4744,6 +4748,76 @@ bi_pack_clauses(bi_context *ctx, struct util_dynarray *binary, unsigned offset) } } +/* + * Build a bit mask of varyings (by location) that are flatshaded. This + * information is needed by lower_mediump_io, as we don't yet support 16-bit + * flat varyings. + * + * Also varyings that are used as texture coordinates should be kept at fp32 so + * the texture instruction may be promoted to VAR_TEX. In general this is a good + * idea, as fp16 texture coordinates are not supported by the hardware and are + * usually inappropriate. (There are both relevant CTS bugs here, even.) + * + * TODO: If we compacted the varyings with some fixup code in the vertex shader, + * we could implement 16-bit flat varyings. Consider if this case matters. + * + * TODO: The texture coordinate handling could be less heavyhanded. + */ +static bool +bi_gather_texcoords(nir_builder *b, nir_instr *instr, void *data) +{ + uint64_t *mask = data; + + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + + int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); + if (coord_idx < 0) + return false; + + nir_src src = tex->src[coord_idx].src; + assert(src.is_ssa); + + nir_ssa_scalar x = nir_ssa_scalar_resolved(src.ssa, 0); + nir_ssa_scalar y = nir_ssa_scalar_resolved(src.ssa, 1); + + if (x.def != y.def) + return false; + + nir_instr *parent = x.def->parent_instr; + + if (parent->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); + + if (intr->intrinsic != nir_intrinsic_load_interpolated_input) + return false; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + *mask |= BITFIELD64_BIT(sem.location); + return false; +} + +static uint64_t +bi_fp32_varying_mask(nir_shader *nir) +{ + uint64_t mask = 0; + + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + nir_foreach_shader_in_variable(var, nir) { + if (var->data.interpolation == INTERP_MODE_FLAT) + mask |= BITFIELD64_BIT(var->data.location); + } + + nir_shader_instructions_pass(nir, bi_gather_texcoords, nir_metadata_all, &mask); + + return mask; +} + static void bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) { @@ -4786,8 +4860,9 @@ bi_finalize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS_V(nir, nir_opt_constant_folding); if (nir->info.stage == MESA_SHADER_FRAGMENT) { - NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, - ~0, false); + NIR_PASS_V(nir, nir_lower_mediump_io, + nir_var_shader_in | nir_var_shader_out, + ~bi_fp32_varying_mask(nir), false); } else { if (gpu_id >= 0x9000) { NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index bfdd27c4d819e8342f2ce9dd600b373df88bd895..a2eb6b1b092fbd600f02205a1fae70679e6bc135 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -487,6 +487,7 @@ typedef struct { enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */ bool skip; /* VAR_TEX, TEXS, TEXC */ bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */ + enum bi_source_format source_format; /* LD_VAR_BUF */ /* Used for valhall texturing */ bool shadow; @@ -500,7 +501,7 @@ typedef struct { }; /* Maximum size, for hashing */ - unsigned flags[11]; + unsigned flags[14]; struct { enum bi_subgroup subgroup; /* WMASK, CLPER */ diff --git a/src/panfrost/bifrost/test/test-optimizer.cpp b/src/panfrost/bifrost/test/test-optimizer.cpp index 03d3f8e34fb57d133fa3c68a0862c2fed34b93fc..e4e16a32ea08a2d52d8cb7c8866b9479613ea2ee 100644 --- a/src/panfrost/bifrost/test/test-optimizer.cpp +++ b/src/panfrost/bifrost/test/test-optimizer.cpp @@ -421,3 +421,20 @@ TEST_F(Optimizer, DoNotFuseMixedSizeResultType) bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), BI_MUX_INT_ZERO)); } + +TEST_F(Optimizer, VarTexCoord32) +{ + CASE({ + bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0); + + bi_index x = bi_temp(b->shader); + bi_index y = bi_temp(b->shader); + bi_instr *split = bi_split_i32_to(b, x, ld); + split->nr_dests = 2; + split->dest[1] = y; + + bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0); + }, { + bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0); + }); +} diff --git a/src/panfrost/bifrost/valhall/ISA.xml b/src/panfrost/bifrost/valhall/ISA.xml index cc138971a6e4212268ff0a48c78ebdeab6c13a6d..71677fe6ce1bd5ce8a05e49bcf135476652500e8 100644 --- a/src/panfrost/bifrost/valhall/ISA.xml +++ b/src/panfrost/bifrost/valhall/ISA.xml @@ -735,9 +735,12 @@ In-memory format of varyings. + + Note: src_flat32 is only valid with 32-bit varying instructions and + src_flat16 is only valid with 16-bit varying instructions. src_flat32 - + src_flat16 src_f32 src_f16 diff --git a/src/panfrost/bifrost/valhall/test/test-packing.cpp b/src/panfrost/bifrost/valhall/test/test-packing.cpp index ce46da36422d48d2c6fb9a113bc7a06a791efbf9..7d44bac1320bfbb21298b7130a4ba73fc3556942 100644 --- a/src/panfrost/bifrost/valhall/test/test-packing.cpp +++ b/src/panfrost/bifrost/valhall/test/test-packing.cpp @@ -258,16 +258,19 @@ TEST_F(ValhallPacking, LdAttrImm) { TEST_F(ValhallPacking, LdVarBufImmF16) { CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61), BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER, + BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0), 0x005d82143300003d); CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE, + BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE, BI_VECSIZE_V4, 0), 0x005d80843300003d); CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61), BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID, + BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE, BI_VECSIZE_V4, 8), 0x005d80443308003d); } diff --git a/src/panfrost/bifrost/valhall/va_pack.c b/src/panfrost/bifrost/valhall/va_pack.c index 5f37934692d48940f3b8c3820e07b5cb6df185a8..b0df089f26f002aa09337ba6d897711ca6d65541 100644 --- a/src/panfrost/bifrost/valhall/va_pack.c +++ b/src/panfrost/bifrost/valhall/va_pack.c @@ -266,14 +266,14 @@ va_pack_combine(enum bi_swizzle swz) static enum va_source_format va_pack_source_format(const bi_instr *I) { - switch (I->register_format) { - case BI_REGISTER_FORMAT_AUTO: - case BI_REGISTER_FORMAT_S32: - case BI_REGISTER_FORMAT_U32: return VA_SOURCE_FORMAT_SRC_FLAT32; - case BI_REGISTER_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32; - case BI_REGISTER_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16; - default: unreachable("unhandled register format"); + switch (I->source_format) { + case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32; + case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16; + case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32; + case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16; } + + unreachable("unhandled source format"); } static uint64_t