Commit 7994929e authored by Boris Brezillon's avatar Boris Brezillon Committed by Marge Bot
Browse files

panfrost: Use the blend shader cache attached to the device



Signed-off-by: Boris Brezillon's avatarBoris Brezillon <boris.brezillon@collabora.com>
Acked-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa@collabora.com>
Part-of: <!9831>
parent bbff09b9
Pipeline #291768 waiting for manual action with stages
......@@ -29,7 +29,6 @@ files_panfrost = files(
'pan_blit.c',
'pan_job.c',
'pan_assemble.c',
'pan_blend_shaders.c',
'pan_blend_cso.c',
'pan_cmdstream.c',
'pan_compute.c',
......
......@@ -28,7 +28,8 @@
#include <stdio.h>
#include "util/u_memory.h"
#include "gallium/auxiliary/util/u_blend.h"
#include "pan_blend_shaders.h"
#include "pan_context.h"
#include "pan_blend_cso.h"
#include "pan_bo.h"
#include "panfrost-quirks.h"
......@@ -59,54 +60,6 @@
* (our subclass of pipe_blend_state).
*/
/* Given an initialized CSO and a particular framebuffer format, grab a
* blend shader, generating and compiling it if it doesn't exist
* (lazy-loading in a way). This routine, when the cache hits, should
* befast, suitable for calling every draw to avoid wacky dirty
* tracking paths. If the cache hits, boom, done. */
struct panfrost_blend_shader *
panfrost_get_blend_shader(struct panfrost_context *ctx,
struct panfrost_blend_state *blend,
enum pipe_format fmt, unsigned nr_samples,
unsigned rt,
const float *constants)
{
/* Prevent NULL collision issues.. */
assert(fmt != 0);
/* Check the cache. Key by the RT and format */
struct hash_table *shaders = ctx->blend_shaders;
struct panfrost_blend_shader_key key = {
.rt = rt,
.format = fmt,
.nr_samples = MAX2(nr_samples, 1),
.has_constants = constants != NULL,
.logicop_enable = blend->base.logicop_enable,
};
if (blend->base.logicop_enable) {
key.logicop_func = blend->base.logicop_func;
} else {
unsigned idx = blend->base.independent_blend_enable ? rt : 0;
if (blend->base.rt[idx].blend_enable)
key.equation = blend->base.rt[idx];
}
struct hash_entry *he = _mesa_hash_table_search(shaders, &key);
struct panfrost_blend_shader *shader = he ? he->data : NULL;
if (!shader) {
/* Cache miss. Build one instead, cache it, and go */
shader = panfrost_create_blend_shader(ctx, blend, &key);
_mesa_hash_table_insert(shaders, &shader->key, shader);
}
panfrost_compile_blend_shader(shader, constants);
return shader;
}
/* Create a blend CSO. Essentially, try to compile a fixed-function
* expression and initialize blend shaders */
......@@ -214,12 +167,6 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
/* Otherwise, we need to grab a shader */
unsigned constant_mask = pan_blend_constant_mask(&pan_blend, rti);
struct panfrost_blend_shader *shader =
panfrost_get_blend_shader(ctx, blend, fmt, nr_samples, rti,
constant_mask ?
ctx->blend_color.color : NULL);
/* Upload the shader, sharing a BO */
if (!(*bo)) {
*bo = panfrost_batch_create_bo(batch, 4096,
......@@ -229,22 +176,26 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti, struc
PAN_BO_ACCESS_FRAGMENT);
}
pthread_mutex_lock(&dev->blend_shaders.lock);
struct pan_blend_shader_variant *shader =
pan_blend_get_shader_locked(dev, &pan_blend, rti);
/* Size check */
assert((*shader_offset + shader->size) < 4096);
assert((*shader_offset + shader->binary.size) < 4096);
memcpy((*bo)->ptr.cpu + *shader_offset, shader->buffer, shader->size);
memcpy((*bo)->ptr.cpu + *shader_offset, shader->binary.data, shader->binary.size);
struct panfrost_blend_final final = {
.is_shader = true,
.shader = {
.work_count = shader->work_count,
.first_tag = shader->first_tag,
.gpu = (*bo)->ptr.gpu + *shader_offset,
},
.load_dest = pan_blend_reads_dest(&pan_blend, rti),
};
*shader_offset += shader->size;
*shader_offset += shader->binary.size;
pthread_mutex_unlock(&dev->blend_shaders.lock);
return final;
}
......
......@@ -54,30 +54,6 @@ struct panfrost_blend_shader_key {
struct pipe_rt_blend_state equation;
};
/* An internal blend shader descriptor, from the compiler */
struct panfrost_blend_shader {
struct panfrost_blend_shader_key key;
struct panfrost_context *ctx;
nir_shader *nir;
/* Blend constants */
float constants[4];
/* The compiled shader */
void *buffer;
/* Byte count of the shader */
unsigned size;
/* Number of 128-bit work registers required by the shader */
unsigned work_count;
/* First instruction tag (for tagging the pointer) */
unsigned first_tag;
};
/* A blend shader descriptor ready for actual use */
struct panfrost_blend_shader_final {
......@@ -86,9 +62,6 @@ struct panfrost_blend_shader_final {
/* First instruction tag (for tagging the pointer) */
unsigned first_tag;
/* Same meaning as panfrost_blend_shader */
unsigned work_count;
};
struct panfrost_blend_equation_final {
......
/*
* © Copyright 2018 Alyssa Rosenzweig
* Copyright (C) 2019-2020 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <stdio.h>
#include "pan_blend_shaders.h"
#include "pan_shader.h"
#include "pan_util.h"
#include "panfrost-quirks.h"
#include "compiler/nir/nir_builder.h"
#include "panfrost/util/nir_lower_blend.h"
#include "panfrost/util/pan_lower_framebuffer.h"
#include "gallium/auxiliary/util/u_blend.h"
#include "util/u_memory.h"
/*
* Implements the command stream portion of programmatic blend shaders.
*
* On Midgard, common blending operations are accelerated by the fixed-function
* blending pipeline. Panfrost supports this fast path via the code in
* pan_blending.c. Nevertheless, uncommon blend modes (including some seemingly
* simple modes present in ES2) require "blend shaders", a special internal
* shader type used for programmable blending.
*
* Blend shaders operate during the normal blending time, but they bypass the
* fixed-function blending pipeline and instead go straight to the Midgard
* shader cores. The shaders themselves are essentially just fragment shaders,
* making heavy use of uint8 arithmetic to manipulate RGB values for the
* framebuffer.
*
* As is typical with Midgard, shader binaries must be accompanied by
* information about the first tag (ORed with the bottom nibble of address,
* like usual) and work registers. Work register count is assumed to be less
* than or equal to the coresponding fragment shader's work count. This
* suggests that blend shader invocation is tied to fragment shader
* execution.
*
* The shaders themselves use the standard ISA. The source pixel colour,
* including alpha, is preloaded into r0 as a vec4 of float32. The destination
* pixel colour must be loaded explicitly via load/store ops, possibly
* performing conversions in software. The blended colour must be stored with a
* fragment writeout in the correct framebuffer format, either in software or
* via conversion opcodes on the load/store pipe.
*
* Blend shaders hardcode constants. Naively, this requires recompilation each
* time the blend color changes, which is a performance risk. Accordingly, we
* 'cheat' a bit: instead of loading the constant, we compile a shader with a
* dummy constant, exporting the offset to the immediate in the shader binary,
* storing this generic binary and metadata in the CSO itself at CSO create
* time.
*
* We then hot patch in the color into this shader at attachment / color change
* time, allowing for CSO create to be the only expensive operation
* (compilation).
*/
struct panfrost_blend_shader *
panfrost_create_blend_shader(struct panfrost_context *ctx,
struct panfrost_blend_state *state,
const struct panfrost_blend_shader_key *key)
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
struct panfrost_blend_shader *res = rzalloc(ctx, struct panfrost_blend_shader);
struct pan_blend_state pan_blend = state->pan;
res->ctx = ctx;
res->key = *key;
/* Build the shader */
pan_blend.rts[key->rt].format = key->format;
pan_blend.rts[key->rt].nr_samples = key->nr_samples;
res->nir = pan_blend_create_shader(dev, &pan_blend, key->rt);
return res;
}
uint64_t
bifrost_get_blend_desc(const struct panfrost_device *dev,
enum pipe_format fmt, unsigned rt, unsigned force_size)
{
const struct util_format_description *desc = util_format_description(fmt);
uint64_t res;
pan_pack(&res, BIFROST_INTERNAL_BLEND, cfg) {
cfg.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
cfg.fixed_function.num_comps = desc->nr_channels;
cfg.fixed_function.rt = rt;
nir_alu_type T = pan_unpacked_type_for_format(desc);
if (force_size)
T = nir_alu_type_get_base_type(T) | force_size;
switch (T) {
case nir_type_float16:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_F16;
break;
case nir_type_float32:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_F32;
break;
case nir_type_int8:
case nir_type_int16:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_I16;
break;
case nir_type_int32:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_I32;
break;
case nir_type_uint8:
case nir_type_uint16:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_U16;
break;
case nir_type_uint32:
cfg.fixed_function.conversion.register_format =
MALI_BIFROST_REGISTER_FILE_FORMAT_U32;
break;
default:
unreachable("Invalid format");
}
cfg.fixed_function.conversion.memory_format =
panfrost_format_to_bifrost_blend(dev, desc, true);
}
return res;
}
void
panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
const float *constants)
{
struct panfrost_device *dev = pan_device(shader->ctx->base.screen);
/* If the shader has already been compiled and the constants match
* or the shader doesn't use the blend constants, we can keep the
* compiled version.
*/
if (shader->buffer &&
(!constants ||
!memcmp(shader->constants, constants, sizeof(shader->constants))))
return;
/* Compile or recompile the NIR shader */
struct panfrost_compile_inputs inputs = {
.gpu_id = dev->gpu_id,
.is_blend = true,
.blend.rt = shader->key.rt,
.blend.nr_samples = shader->key.nr_samples,
.rt_formats = {shader->key.format},
};
if (constants)
memcpy(inputs.blend.constants, constants, sizeof(inputs.blend.constants));
if (pan_is_bifrost(dev)) {
inputs.blend.bifrost_blend_desc =
bifrost_get_blend_desc(dev, shader->key.format, shader->key.rt, 0);
}
struct pan_shader_info info;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
pan_shader_compile(dev, shader->nir, &inputs, &binary, &info);
/* Allow us to patch later */
shader->first_tag = pan_is_bifrost(dev) ? 0 : info.midgard.first_tag;
shader->size = binary.size;
shader->buffer = reralloc_size(shader, shader->buffer, shader->size);
memcpy(shader->buffer, binary.data, shader->size);
shader->work_count = info.work_reg_count;
util_dynarray_fini(&binary);
}
/*
* © Copyright 2018 Alyssa Rosenzweig
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef __PAN_BLEND_SHADERS_H__
#define __PAN_BLEND_SHADERS_H__
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
#include <midgard_pack.h>
#include "pan_context.h"
#include "pan_blend_cso.h"
struct panfrost_blend_shader *
panfrost_create_blend_shader(struct panfrost_context *ctx,
struct panfrost_blend_state *state,
const struct panfrost_blend_shader_key *key);
void
panfrost_compile_blend_shader(struct panfrost_blend_shader *shader,
const float *constants);
uint64_t
bifrost_get_blend_desc(const struct panfrost_device *dev,
enum pipe_format fmt, unsigned rt, unsigned force_size);
#endif
......@@ -36,7 +36,6 @@
#include "pan_job.h"
#include "pan_shader.h"
#include "pan_texture.h"
#include "pan_blend_shaders.h"
/* If a BO is accessed for a particular shader stage, will it be in the primary
* batch (vertex/tiler) or the secondary batch (fragment)? Anything but
......@@ -989,7 +988,8 @@ panfrost_upload_rt_conversion_sysval(struct panfrost_batch *batch, unsigned rt,
if (rt < batch->key.nr_cbufs && batch->key.cbufs[rt]) {
enum pipe_format format = batch->key.cbufs[rt]->format;
uniform->u[0] = bifrost_get_blend_desc(dev, format, rt, 32) >> 32;
uniform->u[0] =
pan_blend_get_bifrost_desc(dev, format, rt, 32) >> 32;
} else {
pan_pack(&uniform->u[0], BIFROST_INTERNAL_CONVERSION, cfg)
cfg.memory_format = dev->formats[PIPE_FORMAT_NONE].hw;
......
......@@ -51,7 +51,6 @@
#include "midgard_pack.h"
#include "pan_screen.h"
#include "pan_blend_shaders.h"
#include "pan_cmdstream.h"
#include "pan_util.h"
#include "decode.h"
......@@ -1665,16 +1664,6 @@ panfrost_set_stream_output_targets(struct pipe_context *pctx,
so->num_targets = num_targets;
}
static uint32_t panfrost_shader_key_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct panfrost_blend_shader_key));
}
static bool panfrost_shader_key_equal(const void *a, const void *b)
{
return !memcmp(a, b, sizeof(struct panfrost_blend_shader_key));
}
struct pipe_context *
panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
{
......@@ -1780,12 +1769,6 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
panfrost_batch_init(ctx);
ctx->blit_blend = rzalloc(ctx, struct panfrost_blend_state);
ctx->blend_shaders =
_mesa_hash_table_create(ctx,
panfrost_shader_key_hash,
panfrost_shader_key_equal);
/* By default mask everything on */
ctx->sample_mask = ~0;
ctx->active_queries = true;
......
......@@ -178,9 +178,6 @@ struct panfrost_context {
unsigned sample_mask;
unsigned min_samples;
struct panfrost_blend_state *blit_blend;
struct hash_table *blend_shaders;
struct panfrost_query *cond_query;
bool cond_cond;
enum pipe_render_cond_flag cond_mode;
......
......@@ -840,23 +840,45 @@ panfrost_load_surface(struct panfrost_batch *batch, struct pipe_surface *surf, u
if (loc >= FRAG_RESULT_DATA0 &&
!panfrost_blend_format(format).internal) {
struct panfrost_blend_shader *b =
panfrost_get_blend_shader(batch->ctx, batch->ctx->blit_blend,
format,
rsrc->base.nr_samples,
loc - FRAG_RESULT_DATA0,
NULL);
struct panfrost_bo *bo = panfrost_batch_create_bo(batch, b->size,
PAN_BO_EXECUTE,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_FRAGMENT);
memcpy(bo->ptr.cpu, b->buffer, b->size);
assert(b->work_count <= 4);
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
struct panfrost_bo *bo =
panfrost_batch_create_bo(batch, 4096,
PAN_BO_EXECUTE,
PAN_BO_ACCESS_PRIVATE |
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_FRAGMENT);
unsigned rt = loc - FRAG_RESULT_DATA0;
struct pan_blend_state blend_state = {
.rt_count = rt + 1,
};
blend_state.rts[rt] = (struct pan_blend_rt_state) {
.format = format,
.nr_samples = rsrc->base.nr_samples,
.equation = {
.blend_enable = true,
.rgb_src_factor = BLEND_FACTOR_ZERO,
.rgb_invert_src_factor = true,
.rgb_dst_factor = BLEND_FACTOR_ZERO,
.rgb_func = BLEND_FUNC_ADD,
.alpha_src_factor = BLEND_FACTOR_ZERO,
.alpha_invert_src_factor = true,
.alpha_dst_factor = BLEND_FACTOR_ZERO,
.alpha_func = BLEND_FUNC_ADD,
.color_mask = 0xf,
},
};
pthread_mutex_lock(&dev->blend_shaders.lock);
struct pan_blend_shader_variant *b =
pan_blend_get_shader_locked(dev, &blend_state,
loc - FRAG_RESULT_DATA0);
assert(b->work_reg_count <= 4);
memcpy(bo->ptr.cpu, b->binary.data, b->binary.size);
blend_shader = bo->ptr.gpu | b->first_tag;
pthread_mutex_unlock(&dev->blend_shaders.lock);
}
struct panfrost_ptr transfer = panfrost_pool_alloc_aligned(&batch->pool,
......
......@@ -685,6 +685,8 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
{
struct panfrost_device *dev = pan_device(pscreen);
pan_blend_shaders_cleanup(dev);
if (dev->ro)
dev->ro->destroy(dev->ro);
panfrost_close_device(dev);
......@@ -856,6 +858,7 @@ panfrost_create_screen(int fd, struct renderonly *ro)
panfrost_resource_screen_init(&screen->base);
panfrost_init_blit_shaders(dev);
pan_blend_shaders_init(dev);
return &screen->base;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment