pan_context.c 57.1 KB
Newer Older
1
/*
2
 * Copyright (C) 2019-2020 Collabora, Ltd.
3
 * © Copyright 2018 Alyssa Rosenzweig
4
 * Copyright © 2014-2017 Broadcom
5
 * Copyright (C) 2017 Intel Corporation
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include <sys/poll.h>
#include <errno.h>

31
#include "pan_bo.h"
32
#include "pan_context.h"
33
#include "pan_minmax_cache.h"
34
#include "panfrost-quirks.h"
35 36

#include "util/macros.h"
37
#include "util/format/u_format.h"
38 39 40
#include "util/u_inlines.h"
#include "util/u_upload_mgr.h"
#include "util/u_memory.h"
41
#include "util/u_vbuf.h"
42
#include "util/half_float.h"
43
#include "util/u_helpers.h"
44
#include "util/format/u_format.h"
45
#include "util/u_prim.h"
46
#include "util/u_prim_restart.h"
47 48
#include "indices/u_primconvert.h"
#include "tgsi/tgsi_parse.h"
49
#include "tgsi/tgsi_from_mesa.h"
50
#include "util/u_math.h"
51

52
#include "midgard_pack.h"
53 54 55
#include "pan_screen.h"
#include "pan_blending.h"
#include "pan_blend_shaders.h"
56
#include "pan_cmdstream.h"
57
#include "pan_util.h"
58
#include "decode.h"
59
#include "util/pan_lower_framebuffer.h"
60

61 62 63 64
void
panfrost_emit_midg_tiler(struct panfrost_batch *batch,
                         struct mali_midgard_tiler_packed *tp,
                         unsigned vertex_count)
65
{
66 67
        struct panfrost_device *device = pan_device(batch->ctx->base.screen);
        bool hierarchy = !(device->quirks & MIDGARD_NO_HIER_TILING);
68 69
        unsigned height = batch->key.height;
        unsigned width = batch->key.width;
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
        pan_pack(tp, MIDGARD_TILER, t) {
                t.hierarchy_mask =
                        panfrost_choose_hierarchy_mask(width, height,
                                                       vertex_count, hierarchy);

                /* Compute the polygon header size and use that to offset the body */

                unsigned header_size =
                        panfrost_tiler_header_size(width, height,
                                                   t.hierarchy_mask, hierarchy);

                t.polygon_list_size =
                        panfrost_tiler_full_size(width, height, t.hierarchy_mask,
                                                 hierarchy);

                if (vertex_count) {
                        t.polygon_list =
                                panfrost_batch_get_polygon_list(batch,
                                                                header_size +
                                                                t.polygon_list_size);

92 93
                        t.heap_start = device->tiler_heap->ptr.gpu;
                        t.heap_end = device->tiler_heap->ptr.gpu +
94 95 96 97 98 99 100 101
                                     device->tiler_heap->size;
                } else {
                        struct panfrost_bo *tiler_dummy;

                        tiler_dummy = panfrost_batch_get_tiler_dummy(batch);
                        header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;

                        /* The tiler is disabled, so don't allow the tiler heap */
102
                        t.heap_start = tiler_dummy->ptr.gpu;
103 104 105
                        t.heap_end = t.heap_start;

                        /* Use a dummy polygon list */
106
                        t.polygon_list = tiler_dummy->ptr.gpu;
107 108 109 110 111 112 113 114 115

                        /* Disable the tiler */
                        if (hierarchy)
                                t.hierarchy_mask |= MALI_MIDGARD_TILER_DISABLED;
                        else {
                                t.hierarchy_mask = MALI_MIDGARD_TILER_USER;
                                t.polygon_list_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE + 4;

                                /* We don't have a WRITE_VALUE job, so write the polygon list manually */
116
                                uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->ptr.cpu + header_size);
117 118
                                polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */
                        }
119
                }
120
                t.polygon_list_body = t.polygon_list + header_size;
121
        }
122 123
}

124 125 126 127
static void
panfrost_clear(
        struct pipe_context *pipe,
        unsigned buffers,
128
        const struct pipe_scissor_state *scissor_state,
129 130 131 132
        const union pipe_color_union *color,
        double depth, unsigned stencil)
{
        struct panfrost_context *ctx = pan_context(pipe);
133 134 135 136 137

        /* TODO: panfrost_get_fresh_batch_for_fbo() instantiates a new batch if
         * the existing batch targeting this FBO has draws. We could probably
         * avoid that by replacing plain clears by quad-draws with a specific
         * color/depth/stencil value, thus avoiding the generation of extra
138
         * fragment jobs.
139 140
         */
        struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx);
141
        panfrost_batch_clear(batch, buffers, color, depth, stencil);
142 143
}

144
bool
145 146
panfrost_writes_point_size(struct panfrost_context *ctx)
{
147
        assert(ctx->shader[PIPE_SHADER_VERTEX]);
148
        struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
149

150
        return vs->writes_point_size && ctx->active_prim == PIPE_PRIM_POINTS;
151 152
}

153 154 155 156 157 158 159 160 161
/* The entire frame is in memory -- send it off to the kernel! */

void
panfrost_flush(
        struct pipe_context *pipe,
        struct pipe_fence_handle **fence,
        unsigned flags)
{
        struct panfrost_context *ctx = pan_context(pipe);
162
        struct panfrost_device *dev = pan_device(pipe->screen);
163
        uint32_t syncobj = 0;
164

165 166
        if (fence)
                drmSyncobjCreate(dev->fd, 0, &syncobj);
167

168
        /* Submit all pending jobs */
169
        panfrost_flush_all_batches(ctx, syncobj);
170 171

        if (fence) {
172
                struct panfrost_fence *f = panfrost_fence_create(ctx, syncobj);
173 174 175
                pipe->screen->fence_reference(pipe->screen, fence, NULL);
                *fence = (struct pipe_fence_handle *)f;
        }
Icecream95's avatar
Icecream95 committed
176

177
        if (dev->debug & PAN_DBG_TRACE)
Icecream95's avatar
Icecream95 committed
178
                pandecode_next_frame();
179 180
}

181 182 183 184
static void
panfrost_texture_barrier(struct pipe_context *pipe, unsigned flags)
{
        struct panfrost_context *ctx = pan_context(pipe);
185
        panfrost_flush_all_batches(ctx, 0);
186 187
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
188
#define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_DRAW_MODE_##c;
189 190

static int
191
pan_draw_mode(enum pipe_prim_type mode)
192 193 194 195 196 197 198 199 200 201 202 203 204 205
{
        switch (mode) {
                DEFINE_CASE(POINTS);
                DEFINE_CASE(LINES);
                DEFINE_CASE(LINE_LOOP);
                DEFINE_CASE(LINE_STRIP);
                DEFINE_CASE(TRIANGLES);
                DEFINE_CASE(TRIANGLE_STRIP);
                DEFINE_CASE(TRIANGLE_FAN);
                DEFINE_CASE(QUADS);
                DEFINE_CASE(QUAD_STRIP);
                DEFINE_CASE(POLYGON);

        default:
206
                unreachable("Invalid draw mode");
207 208 209 210 211
        }
}

#undef DEFINE_CASE

212 213 214 215 216 217 218
static bool
panfrost_scissor_culls_everything(struct panfrost_context *ctx)
{
        const struct pipe_scissor_state *ss = &ctx->scissor;

        /* Check if we're scissoring at all */

219
        if (!ctx->rasterizer->base.scissor)
220 221
                return false;

222
        return (ss->minx == ss->maxx) || (ss->miny == ss->maxy);
223 224
}

225 226 227 228 229 230 231 232 233 234 235 236 237 238
/* Count generated primitives (when there is no geom/tess shaders) for
 * transform feedback */

static void
panfrost_statistics_record(
                struct panfrost_context *ctx,
                const struct pipe_draw_info *info)
{
        if (!ctx->active_queries)
                return;

        uint32_t prims = u_prims_for_vertices(info->mode, info->count);
        ctx->prims_generated += prims;

239
        if (!ctx->streamout.num_targets)
240 241 242 243 244
                return;

        ctx->tf_prims_generated += prims;
}

245 246 247 248 249 250 251 252
static void
panfrost_update_streamout_offsets(struct panfrost_context *ctx)
{
        for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
                unsigned count;

                count = u_stream_outputs_for_vertices(ctx->active_prim,
                                                      ctx->vertex_count);
253
                pan_so_target(ctx->streamout.targets[i])->offset += count;
254 255 256
        }
}

257 258 259 260 261
static inline void
pan_emit_draw_descs(struct panfrost_batch *batch,
                struct MALI_DRAW *d, enum pipe_shader_type st)
{
        d->offset_start = batch->ctx->offset_start;
262 263
        d->instance_size = batch->ctx->instance_count > 1 ?
                           batch->ctx->padded_count : 1;
264 265 266 267 268 269

        d->uniform_buffers = panfrost_emit_const_buf(batch, st, &d->push_uniforms);
        d->textures = panfrost_emit_texture_descriptors(batch, st);
        d->samplers = panfrost_emit_sampler_descriptors(batch, st);
}

270 271 272 273 274 275 276 277 278 279 280
static enum mali_index_type
panfrost_translate_index_size(unsigned size)
{
        switch (size) {
        case 1: return MALI_INDEX_TYPE_UINT8;
        case 2: return MALI_INDEX_TYPE_UINT16;
        case 4: return MALI_INDEX_TYPE_UINT32;
        default: unreachable("Invalid index size");
        }
}

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
static void
panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                          const struct pipe_draw_info *info,
                          void *invocation_template,
                          mali_ptr shared_mem, mali_ptr vs_vary,
                          mali_ptr varyings, void *job)
{
        struct panfrost_context *ctx = batch->ctx;
        struct panfrost_device *device = pan_device(ctx->base.screen);

        void *section =
                pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
        memcpy(section, invocation_template, MALI_INVOCATION_LENGTH);

        pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
                cfg.job_task_split = 5;
        }

        pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
300 301 302
                cfg.draw_descriptor_is_64b = true;
                if (!(device->quirks & IS_BIFROST))
                        cfg.texture_descriptor_is_64b = true;
303 304 305 306
                cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX);
                cfg.attributes = panfrost_emit_vertex_data(batch, &cfg.attribute_buffers);
                cfg.varyings = vs_vary;
                cfg.varying_buffers = varyings;
307
                cfg.thread_storage = shared_mem;
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
        }
}

static void
panfrost_emit_primitive_size(struct panfrost_context *ctx,
                             bool points, mali_ptr size_array,
                             void *prim_size)
{
        struct panfrost_rasterizer *rast = ctx->rasterizer;

        pan_pack(prim_size, PRIMITIVE_SIZE, cfg) {
                if (panfrost_writes_point_size(ctx)) {
                        cfg.size_array = size_array;
                } else {
                        cfg.constant = points ?
                                       rast->base.point_size :
                                       rast->base.line_width;
                }
        }
}

static void
panfrost_draw_emit_tiler(struct panfrost_batch *batch,
                         const struct pipe_draw_info *info,
                         void *invocation_template,
                         mali_ptr shared_mem, mali_ptr indices,
                         mali_ptr fs_vary, mali_ptr varyings,
                         mali_ptr pos, mali_ptr psiz, void *job)
{
        struct panfrost_context *ctx = batch->ctx;
        struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
        struct panfrost_device *device = pan_device(ctx->base.screen);
        bool is_bifrost = device->quirks & IS_BIFROST;

        void *section = is_bifrost ?
                        pan_section_ptr(job, BIFROST_TILER_JOB, INVOCATION) :
                        pan_section_ptr(job, MIDGARD_TILER_JOB, INVOCATION);
        memcpy(section, invocation_template, MALI_INVOCATION_LENGTH);

        section = is_bifrost ?
                  pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE) :
                  pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE);
        pan_pack(section, PRIMITIVE, cfg) {
                cfg.draw_mode = pan_draw_mode(info->mode);
353 354
                if (panfrost_writes_point_size(ctx))
                        cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
355
                cfg.first_provoking_vertex = rast->flatshade_first;
356 357 358
                if (info->primitive_restart)
                        cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
                cfg.job_task_split = 6;
359 360 361 362 363 364 365

                if (info->index_size) {
                        cfg.index_type = panfrost_translate_index_size(info->index_size);
                        cfg.indices = indices;
                        cfg.base_vertex_offset = info->index_bias - ctx->offset_start;
                        cfg.index_count = info->count;
                } else {
366 367
                        cfg.index_count = info->indirect && info->indirect->count_from_stream_output ?
                                          pan_so_target(info->indirect->count_from_stream_output)->offset :
368 369 370 371 372 373 374 375 376
                                          ctx->vertex_count;
                }
        }

        bool points = info->mode == PIPE_PRIM_POINTS;
        void *prim_size = is_bifrost ?
                          pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE_SIZE) :
                          pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE_SIZE);

377
        if (is_bifrost) {
378
                panfrost_emit_primitive_size(ctx, points, psiz, prim_size);
379 380 381 382 383
                pan_section_pack(job, BIFROST_TILER_JOB, TILER, cfg) {
                        cfg.address = panfrost_batch_get_bifrost_tiler(batch, ~0);
                }
                pan_section_pack(job, BIFROST_TILER_JOB, PADDING, padding) {}
        }
384 385 386 387 388

        section = is_bifrost ?
                  pan_section_ptr(job, BIFROST_TILER_JOB, DRAW) :
                  pan_section_ptr(job, MIDGARD_TILER_JOB, DRAW);
        pan_pack(section, DRAW, cfg) {
389 390 391 392
                cfg.four_components_per_vertex = true;
                cfg.draw_descriptor_is_64b = true;
                if (!(device->quirks & IS_BIFROST))
                        cfg.texture_descriptor_is_64b = true;
393 394 395 396 397 398 399 400
                cfg.front_face_ccw = rast->front_ccw;
                cfg.cull_front_face = rast->cull_face & PIPE_FACE_FRONT;
                cfg.cull_back_face = rast->cull_face & PIPE_FACE_BACK;
                cfg.position = pos;
                cfg.state = panfrost_emit_frag_shader_meta(batch);
                cfg.viewport = panfrost_emit_viewport(batch);
                cfg.varyings = fs_vary;
                cfg.varying_buffers = varyings;
401
                cfg.thread_storage = shared_mem;
402 403 404

                pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);

405
                if (ctx->occlusion_query && ctx->active_queries) {
406 407 408 409
                        if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
                                cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
                        else
                                cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
410
                        cfg.occlusion = ctx->occlusion_query->bo->ptr.gpu;
411 412 413 414 415 416 417 418 419 420 421
                        panfrost_batch_add_bo(ctx->batch, ctx->occlusion_query->bo,
                                              PAN_BO_ACCESS_SHARED |
                                              PAN_BO_ACCESS_RW |
                                              PAN_BO_ACCESS_FRAGMENT);
                }
        }

        if (!is_bifrost)
                panfrost_emit_primitive_size(ctx, points, psiz, prim_size);
}

422 423 424 425 426 427
static void
panfrost_draw_vbo(
        struct pipe_context *pipe,
        const struct pipe_draw_info *info)
{
        struct panfrost_context *ctx = pan_context(pipe);
428
        struct panfrost_device *device = pan_device(ctx->base.screen);
429

430 431 432 433 434 435 436
        /* First of all, check the scissor to see if anything is drawn at all.
         * If it's not, we drop the draw (mostly a conformance issue;
         * well-behaved apps shouldn't hit this) */

        if (panfrost_scissor_culls_everything(ctx))
                return;

437 438
        int mode = info->mode;

439 440 441 442
        /* Fallback unsupported restart index */
        unsigned primitive_index = (1 << (info->index_size * 8)) - 1;

        if (info->primitive_restart && info->index_size
443
            && info->restart_index != primitive_index) {
444 445 446 447
                util_draw_vbo_without_prim_restart(pipe, info);
                return;
        }

448
        /* Fallback for unsupported modes */
449

450 451
        assert(ctx->rasterizer != NULL);

452
        if (!(ctx->draw_modes & (1 << mode))) {
453 454
                if (info->count < 4) {
                        /* Degenerate case? */
455 456
                        return;
                }
457 458 459 460

                util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
                util_primconvert_draw_vbo(ctx->primconvert, info);
                return;
461 462
        }

463
        /* Now that we have a guaranteed terminating path, find the job. */
464

465 466
        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
        panfrost_batch_set_requirements(batch);
467

468 469
        /* Take into account a negative bias */
        ctx->vertex_count = info->count + abs(info->index_bias);
470
        ctx->instance_count = info->instance_count;
471
        ctx->active_prim = info->mode;
472

473
        bool is_bifrost = device->quirks & IS_BIFROST;
474
        struct panfrost_ptr tiler =
475 476 477 478 479
                panfrost_pool_alloc_aligned(&batch->pool,
                                            is_bifrost ?
                                            MALI_BIFROST_TILER_JOB_LENGTH :
                                            MALI_MIDGARD_TILER_JOB_LENGTH,
                                            64);
480
        struct panfrost_ptr vertex =
481 482 483 484 485
                panfrost_pool_alloc_aligned(&batch->pool,
                                            MALI_COMPUTE_JOB_LENGTH,
                                            64);

        unsigned vertex_count = ctx->vertex_count;
486

487
        mali_ptr shared_mem = panfrost_batch_reserve_framebuffer(batch);
488

489
        unsigned min_index = 0, max_index = 0;
490 491 492 493 494 495 496 497 498 499 500 501
        mali_ptr indices = 0;

        if (info->index_size) {
                indices = panfrost_get_index_buffer_bounded(ctx, info,
                                                            &min_index,
                                                            &max_index);

                /* Use the corresponding values */
                vertex_count = max_index - min_index + 1;
                ctx->offset_start = min_index + info->index_bias;
        } else {
                ctx->offset_start = info->start;
502 503 504 505
        }

        /* Encode the padded vertex count */

506
        if (info->instance_count > 1)
507
                ctx->padded_count = panfrost_padded_vertex_count(vertex_count);
508
        else
509
                ctx->padded_count = vertex_count;
510

511 512
        panfrost_statistics_record(ctx, info);

513
        struct mali_invocation_packed invocation;
514
        panfrost_pack_work_groups_compute(&invocation,
515 516
                                          1, vertex_count, info->instance_count,
                                          1, 1, 1, true);
517

518
        /* Emit all sort of descriptors. */
519
        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
520

521
        panfrost_emit_varying_descriptor(batch,
522
                                         ctx->padded_count *
523
                                         ctx->instance_count,
524 525
                                         &vs_vary, &fs_vary, &varyings,
                                         &pos, &psiz);
526

527
        /* Fire off the draw itself */
528
        panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
529
                                  vs_vary, varyings, vertex.cpu);
530
        panfrost_draw_emit_tiler(batch, info, &invocation, shared_mem, indices,
531
                                 fs_vary, varyings, pos, psiz, tiler.cpu);
532
        panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
533 534 535

        /* Adjust the batch stack size based on the new shader stack sizes. */
        panfrost_batch_adjust_stack_size(batch);
536 537

        /* Increment transform feedback offsets */
538
        panfrost_update_streamout_offsets(ctx);
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
}

/* CSO state */

static void
panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
{
        free(hwcso);
}

static void *
panfrost_create_rasterizer_state(
        struct pipe_context *pctx,
        const struct pipe_rasterizer_state *cso)
{
        struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);

        so->base = *cso;

558 559 560
        /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
        assert(cso->offset_clamp == 0.0);

561 562 563 564 565 566 567 568 569
        return so;
}

static void
panfrost_bind_rasterizer_state(
        struct pipe_context *pctx,
        void *hwcso)
{
        struct panfrost_context *ctx = pan_context(pctx);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
570
        ctx->rasterizer = hwcso;
571 572 573 574 575 576 577 578 579
}

static void *
panfrost_create_vertex_elements_state(
        struct pipe_context *pctx,
        unsigned num_elements,
        const struct pipe_vertex_element *elements)
{
        struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
580
        struct panfrost_device *dev = pan_device(pctx->screen);
581 582 583 584 585 586 587

        so->num_elements = num_elements;
        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);

        for (int i = 0; i < num_elements; ++i) {
                enum pipe_format fmt = elements[i].src_format;
                const struct util_format_description *desc = util_format_description(fmt);
588 589
                so->formats[i] = dev->formats[desc->format].hw;
                assert(so->formats[i]);
590 591
        }

592
        /* Let's also prepare vertex builtins */
593 594
        so->formats[PAN_VERTEX_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
        so->formats[PAN_INSTANCE_ID] = dev->formats[PIPE_FORMAT_R32_UINT].hw;
595

596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
        return so;
}

static void
panfrost_bind_vertex_elements_state(
        struct pipe_context *pctx,
        void *hwcso)
{
        struct panfrost_context *ctx = pan_context(pctx);
        ctx->vertex = hwcso;
}

static void *
panfrost_create_shader_state(
        struct pipe_context *pctx,
611 612
        const struct pipe_shader_state *cso,
        enum pipe_shader_type stage)
613 614
{
        struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
615
        struct panfrost_device *dev = pan_device(pctx->screen);
616 617 618 619 620 621 622
        so->base = *cso;

        /* Token deep copy to prevent memory corruption */

        if (cso->type == PIPE_SHADER_IR_TGSI)
                so->base.tokens = tgsi_dup_tokens(so->base.tokens);

623
        /* Precompile for shader-db if we need to */
624
        if (unlikely((dev->debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) {
625 626
                struct panfrost_context *ctx = pan_context(pctx);

627
                struct panfrost_shader_state state = { 0 };
628 629
                uint64_t outputs_written;

630 631 632 633
                panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
                                        so->base.ir.nir,
                                        tgsi_processor_to_shader_stage(stage),
                                        &state, &outputs_written);
634 635
        }

636 637 638 639 640 641 642 643
        return so;
}

static void
panfrost_delete_shader_state(
        struct pipe_context *pctx,
        void *so)
{
644 645 646
        struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;

        if (cso->base.type == PIPE_SHADER_IR_TGSI) {
647
                /* TODO: leaks TGSI tokens! */
648 649
        }

650 651
        for (unsigned i = 0; i < cso->variant_count; ++i) {
                struct panfrost_shader_state *shader_state = &cso->variants[i];
652
                panfrost_bo_unreference(shader_state->bo);
653 654 655 656

                if (shader_state->upload.rsrc)
                        pipe_resource_reference(&shader_state->upload.rsrc, NULL);

657 658
                shader_state->bo = NULL;
        }
659
        free(cso->variants);
660

661

662 663 664 665 666 667 668 669 670
        free(so);
}

static void *
panfrost_create_sampler_state(
        struct pipe_context *pctx,
        const struct pipe_sampler_state *cso)
{
        struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
671 672
        struct panfrost_device *device = pan_device(pctx->screen);

673 674
        so->base = *cso;

675
        if (device->quirks & IS_BIFROST)
676
                panfrost_sampler_desc_init_bifrost(cso, (struct mali_bifrost_sampler_packed *) &so->hw);
677
        else
678
                panfrost_sampler_desc_init(cso, &so->hw);
679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699

        return so;
}

static void
panfrost_bind_sampler_states(
        struct pipe_context *pctx,
        enum pipe_shader_type shader,
        unsigned start_slot, unsigned num_sampler,
        void **sampler)
{
        assert(start_slot == 0);

        struct panfrost_context *ctx = pan_context(pctx);

        /* XXX: Should upload, not just copy? */
        ctx->sampler_count[shader] = num_sampler;
        memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
}

static bool
700
panfrost_variant_matches(
701 702 703
        struct panfrost_context *ctx,
        struct panfrost_shader_state *variant,
        enum pipe_shader_type type)
704
{
705
        struct panfrost_device *dev = pan_device(ctx->base.screen);
706

707 708 709 710 711 712 713 714 715 716
        if (variant->outputs_read) {
                struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;

                unsigned i;
                BITSET_FOREACH_SET(i, &variant->outputs_read, 8) {
                        enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;

                        if ((fb->nr_cbufs > i) && fb->cbufs[i])
                                fmt = fb->cbufs[i]->format;

717 718 719 720 721 722
                        const struct util_format_description *desc =
                                util_format_description(fmt);

                        if (pan_format_class_load(desc, dev->quirks) == PAN_FORMAT_NATIVE)
                                fmt = PIPE_FORMAT_NONE;

723 724 725 726 727
                        if (variant->rt_formats[i] != fmt)
                                return false;
                }
        }

728 729 730 731
        /* Otherwise, we're good to go */
        return true;
}

732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
/**
 * Fix an uncompiled shader's stream output info, and produce a bitmask
 * of which VARYING_SLOT_* are captured for stream output.
 *
 * Core Gallium stores output->register_index as a "slot" number, where
 * slots are assigned consecutively to all outputs in info->outputs_written.
 * This naive packing of outputs doesn't work for us - we too have slots,
 * but the layout is defined by the VUE map, which we won't have until we
 * compile a specific shader variant.  So, we remap these and simply store
 * VARYING_SLOT_* in our copy's output->register_index fields.
 *
 * We then produce a bitmask of outputs which are used for SO.
 *
 * Implementation from iris.
 */

static uint64_t
update_so_info(struct pipe_stream_output_info *so_info,
               uint64_t outputs_written)
{
	uint64_t so_outputs = 0;
753
	uint8_t reverse_map[64] = {0};
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	unsigned slot = 0;

	while (outputs_written)
		reverse_map[slot++] = u_bit_scan64(&outputs_written);

	for (unsigned i = 0; i < so_info->num_outputs; i++) {
		struct pipe_stream_output *output = &so_info->output[i];

		/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
		output->register_index = reverse_map[output->register_index];

		so_outputs |= 1ull << output->register_index;
	}

	return so_outputs;
}

771
static void
772
panfrost_bind_shader_state(
773
        struct pipe_context *pctx,
774 775
        void *hwcso,
        enum pipe_shader_type type)
776 777
{
        struct panfrost_context *ctx = pan_context(pctx);
778
        struct panfrost_device *dev = pan_device(ctx->base.screen);
779 780
        ctx->shader[type] = hwcso;

781
        if (!hwcso) return;
782

783
        /* Match the appropriate variant */
784

785 786
        signed variant = -1;
        struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
787

788
        for (unsigned i = 0; i < variants->variant_count; ++i) {
789
                if (panfrost_variant_matches(ctx, &variants->variants[i], type)) {
790 791
                        variant = i;
                        break;
792
                }
793
        }
794

795 796 797
        if (variant == -1) {
                /* No variant matched, so create a new one */
                variant = variants->variant_count++;
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816

                if (variants->variant_count > variants->variant_space) {
                        unsigned old_space = variants->variant_space;

                        variants->variant_space *= 2;
                        if (variants->variant_space == 0)
                                variants->variant_space = 1;

                        /* Arbitrary limit to stop runaway programs from
                         * creating an unbounded number of shader variants. */
                        assert(variants->variant_space < 1024);

                        unsigned msize = sizeof(struct panfrost_shader_state);
                        variants->variants = realloc(variants->variants,
                                                     variants->variant_space * msize);

                        memset(&variants->variants[old_space], 0,
                               (variants->variant_space - old_space) * msize);
                }
817

818
                struct panfrost_shader_state *v =
819
                                &variants->variants[variant];
820 821

                if (type == PIPE_SHADER_FRAGMENT) {
822 823 824 825 826 827 828
                        struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
                        for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
                                enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;

                                if ((fb->nr_cbufs > i) && fb->cbufs[i])
                                        fmt = fb->cbufs[i]->format;

829 830 831 832 833 834
                                const struct util_format_description *desc =
                                        util_format_description(fmt);

                                if (pan_format_class_load(desc, dev->quirks) == PAN_FORMAT_NATIVE)
                                        fmt = PIPE_FORMAT_NONE;

835 836
                                v->rt_formats[i] = fmt;
                        }
837
                }
838
        }
839

840 841
        /* Select this variant */
        variants->active_variant = variant;
842

843
        struct panfrost_shader_state *shader_state = &variants->variants[variant];
844
        assert(panfrost_variant_matches(ctx, shader_state, type));
845

846
        /* We finally have a variant, so compile it */
847

848
        if (!shader_state->compiled) {
849 850
                uint64_t outputs_written = 0;

851 852 853 854 855 856
                panfrost_shader_compile(ctx, variants->base.type,
                                        variants->base.type == PIPE_SHADER_IR_NIR ?
                                        variants->base.ir.nir :
                                        variants->base.tokens,
                                        tgsi_processor_to_shader_stage(type),
                                        shader_state,
857
                                        &outputs_written);
858

859
                shader_state->compiled = true;
860 861 862 863 864 865 866

                /* Fixup the stream out information, since what Gallium returns
                 * normally is mildly insane */

                shader_state->stream_output = variants->base.stream_output;
                shader_state->so_mask =
                        update_so_info(&shader_state->stream_output, outputs_written);
867
        }
868 869
}

870 871 872 873 874 875 876 877 878 879 880 881
static void *
panfrost_create_vs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso)
{
        return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
}

static void *
panfrost_create_fs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso)
{
        return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
}

882
static void
883
panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
884
{
885 886
        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
}
887

888 889 890 891
static void
panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
{
        panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
892 893 894 895 896 897 898 899 900 901 902
}

static void
panfrost_set_vertex_buffers(
        struct pipe_context *pctx,
        unsigned start_slot,
        unsigned num_buffers,
        const struct pipe_vertex_buffer *buffers)
{
        struct panfrost_context *ctx = pan_context(pctx);

903
        util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
904 905 906 907 908 909 910 911 912 913 914
}

static void
panfrost_set_constant_buffer(
        struct pipe_context *pctx,
        enum pipe_shader_type shader, uint index,
        const struct pipe_constant_buffer *buf)
{
        struct panfrost_context *ctx = pan_context(pctx);
        struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];

915
        util_copy_constant_buffer(&pbuf->cb[index], buf);
916

917
        unsigned mask = (1 << index);
918

919 920 921
        if (unlikely(!buf)) {
                pbuf->enabled_mask &= ~mask;
                pbuf->dirty_mask &= ~mask;
922 923 924
                return;
        }

925 926
        pbuf->enabled_mask |= mask;
        pbuf->dirty_mask |= mask;
927 928 929 930 931 932 933 934 935 936 937
}

static void
panfrost_set_stencil_ref(
        struct pipe_context *pctx,
        const struct pipe_stencil_ref *ref)
{
        struct panfrost_context *ctx = pan_context(pctx);
        ctx->stencil_ref = *ref;
}

938 939 940 941
void
panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
                                struct pipe_context *pctx,
                                struct pipe_resource *texture)
942
{
943
        struct panfrost_device *device = pan_device(pctx->screen);
944
        struct panfrost_resource *prsrc = (struct panfrost_resource *)texture;
945
        enum pipe_format format = so->base.format;
946
        assert(prsrc->bo);
947

948
        /* Format to access the stencil portion of a Z32_S8 texture */
Icecream95's avatar
Icecream95 committed
949
        if (format == PIPE_FORMAT_X32_S8X24_UINT) {
950 951 952 953 954 955
                assert(prsrc->separate_stencil);
                texture = &prsrc->separate_stencil->base;
                prsrc = (struct panfrost_resource *)texture;
                format = texture->format;
        }

Icecream95's avatar
Icecream95 committed
956 957 958 959 960 961 962 963 964 965 966 967
        const struct util_format_description *desc = util_format_description(format);

        bool fake_rgtc = !panfrost_supports_compressed_format(device, MALI_BC4_UNORM);

        if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC && fake_rgtc) {
                if (desc->is_snorm)
                        format = PIPE_FORMAT_R8G8B8A8_SNORM;
                else
                        format = PIPE_FORMAT_R8G8B8A8_UNORM;
                desc = util_format_description(format);
        }

968
        so->texture_bo = prsrc->bo->ptr.gpu;
969
        so->modifier = prsrc->modifier;
970

971
        unsigned char user_swizzle[4] = {
972 973 974 975
                so->base.swizzle_r,
                so->base.swizzle_g,
                so->base.swizzle_b,
                so->base.swizzle_a
976 977
        };

978 979 980 981
        /* In the hardware, array_size refers specifically to array textures,
         * whereas in Gallium, it also covers cubemaps */

        unsigned array_size = texture->array_size;
982
        unsigned depth = texture->depth0;
983

984
        if (so->base.target == PIPE_TEXTURE_CUBE) {
985 986
                /* TODO: Cubemap arrays */
                assert(array_size == 6);
987
                array_size /= 6;
988 989
        }

990 991 992 993 994 995 996 997 998 999 1000
        /* MSAA only supported for 2D textures (and 2D texture arrays via an
         * extension currently unimplemented */

        if (so->base.target == PIPE_TEXTURE_2D) {
                assert(depth == 1);
                depth = texture->nr_samples;
        } else {
                /* MSAA only supported for 2D textures */
                assert(texture->nr_samples <= 1);
        }

1001 1002
        enum mali_texture_dimension type =
                panfrost_translate_texture_dimension(so->base.target);
1003

1004
        if (device->quirks & IS_BIFROST) {
1005
                unsigned size = panfrost_estimate_texture_payload_size(
1006 1007 1008 1009
                                so->base.u.tex.first_level,
                                so->base.u.tex.last_level,
                                so->base.u.tex.first_layer,
                                so->base.u.tex.last_layer,
1010
                                texture->nr_samples,
1011
                                type, prsrc->modifier);
1012

1013
                so->bo = panfrost_bo_create(device, size, 0);