Commit 8470ea09 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜
Browse files

wip

parent 5a0caff8
Pipeline #384241 waiting for manual action with stages
......@@ -388,7 +388,13 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
bool msaa = rast->multisample;
UNUSED unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
bool has_blend_shader = false;
for (unsigned c = 0; c < rt_count; ++c)
has_blend_shader |= (blend_shaders[c] != 0);
pan_pack(rsd, RENDERER_STATE, cfg) {
if (panfrost_fs_required(fs, so, &ctx->pipe_framebuffer, zsa)) {
#if PAN_ARCH >= 6
......@@ -408,11 +414,6 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
fs->info.fs.can_early_z && !alpha_to_coverage &&
((enum mali_func) zsa->base.alpha_func == MALI_FUNC_ALWAYS);
bool has_blend_shader = false;
for (unsigned c = 0; c < rt_count; ++c)
has_blend_shader |= (blend_shaders[c] != 0);
/* TODO: Reduce this limit? */
if (has_blend_shader)
cfg.properties.midgard.work_register_count = MAX2(fs->info.work_reg_count, 8);
......@@ -466,7 +467,19 @@ panfrost_prepare_fs_state(struct panfrost_context *ctx,
cfg.multisample_misc.sample_mask = msaa ? ctx->sample_mask : 0xFFFF;
cfg.multisample_misc.evaluate_per_sample =
msaa && (ctx->min_samples > 1);
msaa && ctx->min_samples > 1;
#if PAN_ARCH >= 6
/* MSAA blend shaders need to pass their sample ID to
* LD_TILE/ST_TILE, so we must preload it. Additionally, we
* need per-sample shading for the blend shader, accomplished
* by forcing per-sample shading for the whole program. */
if (msaa && has_blend_shader) {
cfg.multisample_misc.evaluate_per_sample = true;
cfg.preload.fragment.sample_mask_id = true;
}
#endif
cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
cfg.depth_units = rast->offset_units * 2.0f;
......
......@@ -7653,7 +7653,7 @@
</mod>
</ins>
<ins name="+ST_TILE" staging="r=vecsize" mask="0xff800" exact="0xcb800" message="tile" dests="0">
<ins name="+ST_TILE" staging="r=format" mask="0xff800" exact="0xcb800" message="tile" dests="0">
<src start="0"/>
<src start="3"/>
<src start="6" mask="0xf7"/>
......
......@@ -28,7 +28,7 @@
* bits on the wire (as well as fixup branches) */
static uint64_t
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
bi_pack_header(bool is_blend, bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
{
/* next_dependencies are the union of the dependencies of successors'
* dependencies */
......@@ -38,10 +38,14 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
bool staging_barrier = next_1 ? next_1->staging_barrier : false;
staging_barrier |= next_2 ? next_2->staging_barrier : 0;
staging_barrier |= is_blend;
if (is_blend)
clause->flow_control = BIFROST_FLOW_NBTB;
struct bifrost_header header = {
.flow_control =
(next_1 == NULL && next_2 == NULL) ?
(next_1 == NULL && next_2 == NULL && !is_blend) ?
BIFROST_FLOW_END : clause->flow_control,
.terminate_discarded_threads = clause->td,
.next_clause_prefetch = clause->next_clause_prefetch && next_1,
......@@ -51,6 +55,8 @@ bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
.dependency_slot = clause->scoreboard_id,
.message_type = clause->message_type,
.next_message_type = next_1 ? next_1->message_type : 0,
.suppress_inf = is_blend,
.suppress_nan = is_blend,
};
uint64_t u = 0;
......@@ -643,7 +649,7 @@ bi_pack_clause(bi_context *ctx, bi_clause *clause,
unsigned constant_quads =
DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
uint64_t header = bi_pack_header(clause, next_1, next_2);
uint64_t header = bi_pack_header(ctx->inputs->is_blend, clause, next_1, next_2);
uint64_t ec0 = (clause->constants[0] >> 4);
unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
......
......@@ -1634,6 +1634,7 @@ bi_schedule_clause(bi_context *ctx, bi_block *block, struct bi_worklist st, uint
clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH);
break;
case BI_OPCODE_LD_TILE:
case BI_OPCODE_ST_TILE:
if (!ctx->inputs->is_blend)
clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR);
break;
......
......@@ -463,14 +463,61 @@ bi_emit_load_blend_input(bi_builder *b, nir_intrinsic_instr *instr)
}
static void
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
bi_load_sample_id_to(bi_builder *b, bi_index dst)
{
/* Reads 2 or 4 staging registers to cover the input */
unsigned sr_count = (nir_alu_type_get_type_size(T) <= 16) ? 2 : 4;
/* r61[16:23] contains the sampleID, mask it out. Upper bits
* seem to read garbage (despite being architecturally defined
* as zero), so use a 5-bit mask instead of 8-bits */
if (b->shader->inputs->is_blend) {
uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
bi_rshift_and_i32_to(b, dst, bi_register(61), bi_imm_u32(0x1f),
bi_imm_u8(16));
}
static bi_index
bi_load_sample_id(bi_builder *b)
{
bi_index sample_id = bi_temp(b->shader);
bi_load_sample_id_to(b, sample_id);
return sample_id;
}
static bi_index
bi_pixel_indices(bi_builder *b, unsigned rt)
{
/* We want to load the current pixel. */
struct bifrost_pixel_indices pix = {
.y = BIFROST_CURRENT_PIXEL,
.rt = rt
};
uint32_t indices_u32 = 0;
memcpy(&indices_u32, &pix, sizeof(indices_u32));
bi_index indices = bi_imm_u32(indices_u32);
/* Sample index above is left as zero. For multisampling, we need to
* fill in the actual sample ID in the lower byte */
if (b->shader->inputs->blend.nr_samples > 1)
indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
return indices;
}
static void
bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, unsigned rt)
{
/* Reads 2 or 4 staging registers to cover the input */
unsigned size = nir_alu_type_get_type_size(T);
unsigned sr_count = (size <= 16) ? 2 : 4;
const struct panfrost_compile_inputs *inputs = b->shader->inputs;
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
if (inputs->is_blend && inputs->blend.nr_samples > 1) {
/* Conversion descriptor comes from the compile inputs, pixel
* indices derived at run time based on sample ID */
bi_st_tile(b, rgba, bi_pixel_indices(b, rt), bi_register(60),
bi_imm_u32(blend_desc >> 32), BI_VECSIZE_V4);
} else if (b->shader->inputs->is_blend) {
/* Blend descriptor comes from the compile inputs */
/* Put the result in r0 */
bi_blend_to(b, bi_register(0), rgba,
......@@ -1029,23 +1076,11 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
rt = (loc - FRAG_RESULT_DATA0);
}
/* We want to load the current pixel.
* FIXME: The sample to load is currently hardcoded to 0. This should
* be addressed for multi-sample FBs.
*/
struct bifrost_pixel_indices pix = {
.y = BIFROST_CURRENT_PIXEL,
.rt = rt
};
bi_index desc = b->shader->inputs->is_blend ?
bi_imm_u32(b->shader->inputs->blend.bifrost_blend_desc >> 32) :
bi_load_sysval(b, PAN_SYSVAL(RT_CONVERSION, rt | (size << 4)), 1, 0);
uint32_t indices = 0;
memcpy(&indices, &pix, sizeof(indices));
bi_ld_tile_to(b, bi_dest_index(&instr->dest), bi_imm_u32(indices),
bi_ld_tile_to(b, bi_dest_index(&instr->dest), bi_pixel_indices(b, rt),
bi_register(60), desc, (instr->num_components - 1));
}
......@@ -1293,15 +1328,9 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_u16_to_u32_to(b, dst, bi_half(bi_register(61), false));
break;
case nir_intrinsic_load_sample_id: {
/* r61[16:23] contains the sampleID, mask it out. Upper bits
* seem to read garbage (despite being architecturally defined
* as zero), so use a 5-bit mask instead of 8-bits */
bi_rshift_and_i32_to(b, dst, bi_register(61), bi_imm_u32(0x1f),
bi_imm_u8(16));
case nir_intrinsic_load_sample_id:
bi_load_sample_id_to(b, dst);
break;
}
case nir_intrinsic_load_front_face:
/* r58 == 0 means primitive is front facing */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment