pan_assemble.c 10.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*
 * © Copyright 2018 Alyssa Rosenzweig
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
28
#include "pan_bo.h"
29
#include "pan_context.h"
30
#include "pan_util.h"
31
#include "panfrost-quirks.h"
32 33 34 35

#include "compiler/nir/nir.h"
#include "nir/tgsi_to_nir.h"
#include "midgard/midgard_compile.h"
36
#include "bifrost/bifrost_compile.h"
37 38 39 40
#include "util/u_dynarray.h"

#include "tgsi/tgsi_dump.h"

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
static unsigned
pan_format_from_nir_base(nir_alu_type base)
{
        switch (base) {
        case nir_type_int:
                return MALI_FORMAT_SINT;
        case nir_type_uint:
        case nir_type_bool:
                return MALI_FORMAT_UINT;
        case nir_type_float:
                return MALI_CHANNEL_FLOAT;
        default:
                unreachable("Invalid base");
        }
}

static unsigned
pan_format_from_nir_size(nir_alu_type base, unsigned size)
{
        if (base == nir_type_float) {
                switch (size) {
                case 16: return MALI_FORMAT_SINT;
                case 32: return MALI_FORMAT_UNORM;
                default:
                        unreachable("Invalid float size for format");
                }
        } else {
                switch (size) {
                case 1:
                case 8:  return MALI_CHANNEL_8;
                case 16: return MALI_CHANNEL_16;
                case 32: return MALI_CHANNEL_32;
                default:
                         unreachable("Invalid int size for format");
                }
        }
}

static enum mali_format
pan_format_from_glsl(const struct glsl_type *type)
{
        enum glsl_base_type glsl_base = glsl_get_base_type(glsl_without_array(type));
        nir_alu_type t = nir_get_nir_type_for_glsl_base_type(glsl_base);

        unsigned base = nir_alu_type_get_base_type(t);
        unsigned size = nir_alu_type_get_type_size(t);

        return pan_format_from_nir_base(base) |
                pan_format_from_nir_size(base, size) |
                MALI_NR_CHANNELS(4);
}

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
static enum bifrost_shader_type
bifrost_blend_type_from_nir(nir_alu_type nir_type)
{
        switch(nir_type) {
        case 0: /* Render target not in use */
                return 0;
        case nir_type_float16:
                return BIFROST_BLEND_F16;
        case nir_type_float32:
                return BIFROST_BLEND_F32;
        case nir_type_int32:
                return BIFROST_BLEND_I32;
        case nir_type_uint32:
                return BIFROST_BLEND_U32;
        case nir_type_int16:
                return BIFROST_BLEND_I16;
        case nir_type_uint16:
                return BIFROST_BLEND_U16;
        default:
                DBG("Unsupported blend shader type for NIR alu type %d", nir_type);
                assert(0);
                return 0;
        }
}

118
void
119 120 121 122 123 124
panfrost_shader_compile(struct panfrost_context *ctx,
                        enum pipe_shader_ir ir_type,
                        const void *ir,
                        gl_shader_stage stage,
                        struct panfrost_shader_state *state,
                        uint64_t *outputs_written)
125
{
126
        struct panfrost_device *dev = pan_device(ctx->base.screen);
127 128 129 130
        uint8_t *dst;

        nir_shader *s;

131 132
        if (ir_type == PIPE_SHADER_IR_NIR) {
                s = nir_shader_clone(NULL, ir);
133
        } else {
134
                assert (ir_type == PIPE_SHADER_IR_TGSI);
135
                s = tgsi_to_nir(ir, ctx->base.screen, false);
136 137
        }

138
        s->info.stage = stage;
139 140 141

        /* Call out to Midgard compiler given the above NIR */

142
        panfrost_program program = {
143 144 145
                .alpha_ref = state->alpha_state.ref_value
        };

146 147 148 149 150 151
        if (dev->quirks & IS_BIFROST) {
                bifrost_compile_shader_nir(s, &program, dev->gpu_id);
        } else {
                midgard_compile_shader_nir(s, &program, false, 0, dev->gpu_id,
                                pan_debug & PAN_DBG_PRECOMPILE);
        }
152 153 154 155 156 157 158 159 160

        /* Prepare the compiled binary for upload */
        int size = program.compiled.size;
        dst = program.compiled.data;

        /* Upload the shader. The lookahead tag is ORed on as a tagged pointer.
         * I bet someone just thought that would be a cute pun. At least,
         * that's how I'd do it. */

161
        if (size) {
162
                state->bo = pan_bo_create(dev, size, PAN_BO_EXECUTE);
163
                memcpy(state->bo->cpu, dst, size);
164 165 166 167 168 169
        }

        if (!(dev->quirks & IS_BIFROST)) {
                /* If size = 0, no shader. Use dummy tag to avoid
                 * INSTR_INVALID_ENC */
                state->first_tag = size ? program.first_tag : 1;
170
        }
171 172 173

        util_dynarray_fini(&program.compiled);

174 175 176
        state->sysval_count = program.sysval_count;
        memcpy(state->sysval, program.sysvals, sizeof(state->sysval[0]) * state->sysval_count);

177 178 179
        bool vertex_id = s->info.system_values_read & (1 << SYSTEM_VALUE_VERTEX_ID);
        bool instance_id = s->info.system_values_read & (1 << SYSTEM_VALUE_INSTANCE_ID);

180
        switch (stage) {
181
        case MESA_SHADER_VERTEX:
182 183
                state->attribute_count = util_bitcount64(s->info.inputs_read);
                state->varying_count = util_bitcount64(s->info.outputs_written);
184 185

                if (vertex_id)
186
                        state->attribute_count = MAX2(state->attribute_count, PAN_VERTEX_ID + 1);
187 188

                if (instance_id)
189
                        state->attribute_count = MAX2(state->attribute_count, PAN_INSTANCE_ID + 1);
190

191 192
                break;
        case MESA_SHADER_FRAGMENT:
193 194
                state->attribute_count = 0;
                state->varying_count = util_bitcount64(s->info.inputs_read);
195 196 197 198
                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
                        state->writes_depth = true;
                if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
                        state->writes_stencil = true;
199
                break;
200 201
        case MESA_SHADER_COMPUTE:
                /* TODO: images */
202 203
                state->attribute_count = 0;
                state->varying_count = 0;
204
                state->shared_size = s->info.cs.shared_size;
205
                break;
206 207 208 209
        default:
                unreachable("Unknown shader state");
        }

210
        state->can_discard = s->info.fs.uses_discard;
211 212
        state->writes_point_size = program.writes_point_size;
        state->reads_point_coord = false;
213
        state->helper_invocations = s->info.fs.needs_helper_invocations;
214
        state->stack_size = program.tls_size;
215

216 217 218
        if (outputs_written)
                *outputs_written = s->info.outputs_written;

219 220 221
        /* Separate as primary uniform count is truncated. Sysvals are prefix
         * uniforms */
        state->uniform_count = s->num_uniforms + program.sysval_count;
222 223
        state->uniform_cutoff = program.uniform_cutoff;
        state->work_reg_count = program.work_register_count;
224

225 226 227 228
        if (dev->quirks & IS_BIFROST)
                for (unsigned i = 0; i < BIFROST_MAX_RENDER_TARGET_COUNT; i++)
                        state->blend_types[i] = bifrost_blend_type_from_nir(program.blend_types[i]);

229 230 231 232 233 234 235 236 237 238 239 240 241
        unsigned default_vec1_swizzle;
        unsigned default_vec2_swizzle;
        unsigned default_vec4_swizzle;

        if (dev->quirks & HAS_SWIZZLES) {
                default_vec1_swizzle = panfrost_get_default_swizzle(1);
                default_vec2_swizzle = panfrost_get_default_swizzle(2);
                default_vec4_swizzle = panfrost_get_default_swizzle(4);
        } else {
                default_vec1_swizzle = panfrost_bifrost_swizzle(1);
                default_vec2_swizzle = panfrost_bifrost_swizzle(2);
                default_vec4_swizzle = panfrost_bifrost_swizzle(4);
        }
242

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
        /* Record the varying mapping for the command stream's bookkeeping */

        unsigned p_varyings[32];
        enum mali_format p_varying_type[32];

        struct exec_list *l_varyings =
                        stage == MESA_SHADER_VERTEX ? &s->outputs : &s->inputs;

        nir_foreach_variable(var, l_varyings) {
                unsigned loc = var->data.driver_location;
                unsigned sz = glsl_count_attribute_slots(var->type, FALSE);

                for (int c = 0; c < sz; ++c) {
                        p_varyings[loc + c] = var->data.location + c;
                        p_varying_type[loc + c] = pan_format_from_glsl(var->type);
                }
        }

261
        /* Iterate the varyings and emit the corresponding descriptor */
262
        for (unsigned i = 0; i < state->varying_count; ++i) {
263
                unsigned location = p_varyings[i];
264

265 266
                /* Default to a vec4 varying */
                struct mali_attr_meta v = {
267
                        .format = p_varying_type[i],
268
                        .swizzle = default_vec4_swizzle,
269
                        .unknown1 = dev->quirks & IS_BIFROST ? 0x0 : 0x2,
270 271
                };

272 273 274
                /* Check for special cases, otherwise assume general varying */

                if (location == VARYING_SLOT_POS) {
275 276 277 278
                        if (stage == MESA_SHADER_FRAGMENT)
                                state->reads_frag_coord = true;
                        else
                                v.format = MALI_VARYING_POS;
279 280 281
                } else if (location == VARYING_SLOT_PSIZ) {
                        v.format = MALI_R16F;
                        v.swizzle = default_vec1_swizzle;
282 283

                        state->writes_point_size = true;
284 285 286
                } else if (location == VARYING_SLOT_PNTC) {
                        v.format = MALI_RG16F;
                        v.swizzle = default_vec2_swizzle;
287 288

                        state->reads_point_coord = true;
289 290 291 292 293
                } else if (location == VARYING_SLOT_FACE) {
                        v.format = MALI_R32I;
                        v.swizzle = default_vec1_swizzle;

                        state->reads_face = true;
294 295
                }

296
                state->varyings[i] = v;
297
                state->varyings_loc[i] = location;
298 299
        }
}