v3d_job.c 20.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * Copyright © 2014-2017 Broadcom
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

24
/** @file v3d_job.c
25
 *
26
 * Functions for submitting V3D render jobs to the kernel.
27
28
29
 */

#include <xf86drm.h>
30
#include "v3d_context.h"
31
32
33
34
/* The OQ/semaphore packets are the same across V3D versions. */
#define V3D_VERSION 33
#include "broadcom/cle/v3dx_pack.h"
#include "broadcom/common/v3d_macros.h"
35
36
37
38
39
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "util/set.h"
#include "broadcom/clif/clif_dump.h"

Emma Anholt's avatar
Emma Anholt committed
40
void
41
v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
42
43
{
        set_foreach(job->bos, entry) {
44
45
                struct v3d_bo *bo = (struct v3d_bo *)entry->key;
                v3d_bo_unreference(&bo);
46
47
        }

48
        _mesa_hash_table_remove_key(v3d->jobs, &job->key);
49

50
51
52
53
        if (job->write_prscs) {
                set_foreach(job->write_prscs, entry) {
                        const struct pipe_resource *prsc = entry->key;

54
                        _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
55
56
57
                }
        }

58
        for (int i = 0; i < job->nr_cbufs; i++) {
59
                if (job->cbufs[i]) {
60
61
                        _mesa_hash_table_remove_key(v3d->write_jobs,
                                                    job->cbufs[i]->texture);
62
63
64
65
                        pipe_surface_reference(&job->cbufs[i], NULL);
                }
        }
        if (job->zsbuf) {
66
67
                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
                if (rsc->separate_stencil)
68
69
                        _mesa_hash_table_remove_key(v3d->write_jobs,
                                                    &rsc->separate_stencil->base);
70

71
72
                _mesa_hash_table_remove_key(v3d->write_jobs,
                                            job->zsbuf->texture);
73
74
                pipe_surface_reference(&job->zsbuf, NULL);
        }
75
76
        if (job->bbuf)
                pipe_surface_reference(&job->bbuf, NULL);
77

78
79
        if (v3d->job == job)
                v3d->job = NULL;
80

81
82
83
84
85
        v3d_destroy_cl(&job->bcl);
        v3d_destroy_cl(&job->rcl);
        v3d_destroy_cl(&job->indirect);
        v3d_bo_unreference(&job->tile_alloc);
        v3d_bo_unreference(&job->tile_state);
86
87
88
89

        ralloc_free(job);
}

Emma Anholt's avatar
Emma Anholt committed
90
struct v3d_job *
91
v3d_job_create(struct v3d_context *v3d)
92
{
93
        struct v3d_job *job = rzalloc(v3d, struct v3d_job);
94

95
        job->v3d = v3d;
96

97
98
99
        v3d_init_cl(job, &job->bcl);
        v3d_init_cl(job, &job->rcl);
        v3d_init_cl(job, &job->indirect);
100
101
102
103
104
105
106
107
108
109
110
111
112

        job->draw_min_x = ~0;
        job->draw_min_y = ~0;
        job->draw_max_x = 0;
        job->draw_max_y = 0;

        job->bos = _mesa_set_create(job,
                                    _mesa_hash_pointer,
                                    _mesa_key_pointer_equal);
        return job;
}

void
113
v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
114
115
116
117
118
119
120
{
        if (!bo)
                return;

        if (_mesa_set_search(job->bos, bo))
                return;

121
        v3d_bo_reference(bo);
122
        _mesa_set_add(job->bos, bo);
123
        job->referenced_size += bo->size;
124
125
126
127
128
129
130
131
132
133
134
135

        uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;

        if (job->submit.bo_handle_count >= job->bo_handles_size) {
                job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
                bo_handles = reralloc(job, bo_handles,
                                      uint32_t, job->bo_handles_size);
                job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
        }
        bo_handles[job->submit.bo_handle_count++] = bo->handle;
}

136
void
137
v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
138
{
139
        struct v3d_context *v3d = job->v3d;
140
141
142
143
144
145
146
147

        if (!job->write_prscs) {
                job->write_prscs = _mesa_set_create(job,
                                                    _mesa_hash_pointer,
                                                    _mesa_key_pointer_equal);
        }

        _mesa_set_add(job->write_prscs, prsc);
148
        _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
149
150
}

151
152
153
154
155
156
157
158
159
160
161
void
v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
{
        hash_table_foreach(v3d->jobs, entry) {
                struct v3d_job *job = entry->data;

                if (_mesa_set_search(job->bos, bo))
                        v3d_job_submit(v3d, job);
        }
}

162
163
164
165
166
167
168
169
170
171
172
void
v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
{
        v3d_job_add_write_resource(job, prsc);

        if (!job->tf_write_prscs)
                job->tf_write_prscs = _mesa_pointer_set_create(job);

        _mesa_set_add(job->tf_write_prscs, prsc);
}

173
174
175
176
177
178
179
180
181
182
183
184
185
static bool
v3d_job_writes_resource_from_tf(struct v3d_job *job,
                                struct pipe_resource *prsc)
{
        if (!job->tf_enabled)
                return false;

        if (!job->tf_write_prscs)
                return false;

        return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
}

186
void
187
v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
188
                                struct pipe_resource *prsc,
189
190
                                enum v3d_flush_cond flush_cond,
                                bool is_compute_pipeline)
191
{
192
        struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
193
                                                           prsc);
194
195
196
197
198
199
200
201
202
203
204
205
        struct v3d_resource *rsc = v3d_resource(prsc);

        /* We need to sync if graphics pipeline reads a resource written
         * by the compute pipeline. The same would be needed for the case of
         * graphics-compute dependency but nowadays all compute jobs
         * are serialized with the previous submitted job.
         */
        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
           v3d->sync_on_last_compute_job = true;
           rsc->compute_written = false;
        }

206
207
208
209
210
211
        if (!entry)
                return;

        struct v3d_job *job = entry->data;

        bool needs_flush;
212
213
        switch (flush_cond) {
        case V3D_FLUSH_ALWAYS:
214
                needs_flush = true;
215
216
217
218
219
220
221
222
223
224
225
226
227
                break;
        case V3D_FLUSH_NOT_CURRENT_JOB:
                needs_flush = !v3d->job || v3d->job != job;
                break;
        case V3D_FLUSH_DEFAULT:
        default:
                /* For writes from TF in the same job we use the "Wait for TF"
                 * feature provided by the hardware so we don't want to flush.
                 * The exception to this is when the caller is about to map the
                 * resource since in that case we don't have a 'Wait for TF'
                 * command the in command stream. In this scenario the caller
                 * is expected to set 'always_flush' to True.
                 */
228
                needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
229
        }
230
231
232

        if (needs_flush)
                v3d_job_submit(v3d, job);
233
234
235
}

void
236
v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
237
                                struct pipe_resource *prsc,
238
239
                                enum v3d_flush_cond flush_cond,
                                bool is_compute_pipeline)
240
{
241
        struct v3d_resource *rsc = v3d_resource(prsc);
242

243
244
245
246
247
248
        /* We only need to force the flush on TF writes, which is the only
         * case where we might skip the flush to use the 'Wait for TF'
         * command. Here we are flushing for a read, which means that the
         * caller intends to write to the resource, so we don't care if
         * there was a previous TF write to it.
         */
249
250
        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
                                        is_compute_pipeline);
251

252
253
        hash_table_foreach(v3d->jobs, entry) {
                struct v3d_job *job = entry->data;
254

255
                if (!_mesa_set_search(job->bos, rsc->bo))
256
                        continue;
257
258
259
260
261
262
263
264
265
266

                bool needs_flush;
                switch (flush_cond) {
                case V3D_FLUSH_NOT_CURRENT_JOB:
                        needs_flush = !v3d->job || v3d->job != job;
                        break;
                case V3D_FLUSH_ALWAYS:
                case V3D_FLUSH_DEFAULT:
                default:
                        needs_flush = true;
267
                }
268
269
270
271
272
273
274
275

                if (needs_flush)
                        v3d_job_submit(v3d, job);

                /* Reminder: v3d->jobs is safe to keep iterating even
                 * after deletion of an entry.
                 */
                continue;
276
277
278
279
        }
}

/**
280
 * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
281
 *
282
 * If we've already started rendering to this FBO, then return the same job,
283
284
285
286
 * otherwise make a new one.  If we're beginning rendering to an FBO, make
 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
 * have been flushed.
 */
287
288
struct v3d_job *
v3d_get_job(struct v3d_context *v3d,
289
290
            uint32_t nr_cbufs,
            struct pipe_surface **cbufs,
291
292
            struct pipe_surface *zsbuf,
            struct pipe_surface *bbuf)
293
294
{
        /* Return the existing job for this FBO if we have one */
295
        struct v3d_job_key local_key = {
296
297
298
299
300
301
302
                .cbufs = {
                        cbufs[0],
                        cbufs[1],
                        cbufs[2],
                        cbufs[3],
                },
                .zsbuf = zsbuf,
303
                .bbuf = bbuf,
304
        };
305
        struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
306
307
308
309
310
311
312
                                                           &local_key);
        if (entry)
                return entry->data;

        /* Creating a new job.  Make sure that any previous jobs reading or
         * writing these buffers are flushed.
         */
313
        struct v3d_job *job = v3d_job_create(v3d);
314
        job->nr_cbufs = nr_cbufs;
315

316
        for (int i = 0; i < job->nr_cbufs; i++) {
317
                if (cbufs[i]) {
318
                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
319
320
                                                        V3D_FLUSH_DEFAULT,
                                                        false);
321
322
323
324
325
326
327
                        pipe_surface_reference(&job->cbufs[i], cbufs[i]);

                        if (cbufs[i]->texture->nr_samples > 1)
                                job->msaa = true;
                }
        }
        if (zsbuf) {
328
                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
329
330
                                                V3D_FLUSH_DEFAULT,
                                                false);
331
332
333
334
                pipe_surface_reference(&job->zsbuf, zsbuf);
                if (zsbuf->texture->nr_samples > 1)
                        job->msaa = true;
        }
335
336
337
338
339
        if (bbuf) {
                pipe_surface_reference(&job->bbuf, bbuf);
                if (bbuf->texture->nr_samples > 1)
                        job->msaa = true;
        }
340

341
        for (int i = 0; i < job->nr_cbufs; i++) {
342
                if (cbufs[i])
343
                        _mesa_hash_table_insert(v3d->write_jobs,
344
345
                                                cbufs[i]->texture, job);
        }
346
        if (zsbuf) {
347
                _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
348

349
350
351
                struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
                if (rsc->separate_stencil) {
                        v3d_flush_jobs_reading_resource(v3d,
352
                                                        &rsc->separate_stencil->base,
353
354
                                                        V3D_FLUSH_DEFAULT,
                                                        false);
355
356
357
358
359
360
                        _mesa_hash_table_insert(v3d->write_jobs,
                                                &rsc->separate_stencil->base,
                                                job);
                }
        }

361
        memcpy(&job->key, &local_key, sizeof(local_key));
362
        _mesa_hash_table_insert(v3d->jobs, &job->key, job);
363
364
365
366

        return job;
}

367
368
struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context *v3d)
369
{
370
371
        if (v3d->job)
                return v3d->job;
372

373
        uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
374
375
        struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
        struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
376
        struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
377

378
379
380
        if (v3d->framebuffer.samples >= 1)
                job->msaa = true;

381
382
383
384
385
        v3d_get_tile_buffer_size(job->msaa, job->nr_cbufs,
                                 job->cbufs, job->bbuf,
                                 &job->tile_width,
                                 &job->tile_height,
                                 &job->internal_bpp);
386

387
        /* The dirty flags are tracking what's been updated while v3d->job has
388
389
390
         * been bound, so set them all to ~0 when switching between jobs.  We
         * also need to reset all state at the start of rendering.
         */
391
        v3d->dirty = ~0;
392
393
394
395

        /* If we're binding to uninitialized buffers, no need to load their
         * contents before drawing.
         */
396
        for (int i = 0; i < nr_cbufs; i++) {
397
                if (cbufs[i]) {
398
                        struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
399
                        if (!rsc->writes)
400
                                job->clear |= PIPE_CLEAR_COLOR0 << i;
401
402
403
404
                }
        }

        if (zsbuf) {
405
                struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
406
                if (!rsc->writes)
407
408
409
410
411
412
413
                        job->clear |= PIPE_CLEAR_DEPTH;

                if (rsc->separate_stencil)
                        rsc = rsc->separate_stencil;

                if (!rsc->writes)
                        job->clear |= PIPE_CLEAR_STENCIL;
414
415
        }

416
        job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
417
                                         job->tile_width);
418
        job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
419
420
                                         job->tile_height);

421
        v3d->job = job;
422
423
424
425
426

        return job;
}

static void
427
v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
428
{
429
        if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
430
431
                return;

432
        struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
433
434
                                                stderr,
                                                V3D_DEBUG & V3D_DEBUG_CL);
435
436
437
438
439
440
441
442
443
444
445

        set_foreach(job->bos, entry) {
                struct v3d_bo *bo = (void *)entry->key;
                char *name = ralloc_asprintf(NULL, "%s_0x%x",
                                             bo->name, bo->offset);

                v3d_bo_map(bo);
                clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);

                ralloc_free(name);
        }
446

447
        clif_dump(clif, &job->submit);
448

449
        clif_dump_destroy(clif);
450
451
}

452
453
454
455
456
static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
{
        assert(v3d->prim_counts);

457
        perf_debug("stalling on TF counts readback\n");
458
459
        struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
        if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
460
                uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
461
                v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
462
463
464
465
466
                /* When we only have a vertex shader we determine the primitive
                 * count in the CPU so don't update it here again.
                 */
                if (v3d->prog.gs)
                        v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
467
468
469
        }
}

470
471
472
473
/**
 * Submits the job to the kernel and then reinitializes it.
 */
void
474
v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
475
{
476
        struct v3d_screen *screen = v3d->screen;
477

478
479
480
        if (!job->needs_flush)
                goto done;

481
        if (screen->devinfo.ver >= 41)
482
483
484
                v3d41_emit_rcl(job);
        else
                v3d33_emit_rcl(job);
485
486

        if (cl_offset(&job->bcl) > 0) {
487
                if (screen->devinfo.ver >= 41)
488
                        v3d41_bcl_epilogue(v3d, job);
489
                else
490
                        v3d33_bcl_epilogue(v3d, job);
491
492
        }

493
494
495
496
497
498
499
        /* While the RCL will implicitly depend on the last RCL to have
         * finished, we also need to block on any previous TFU job we may have
         * dispatched.
         */
        job->submit.in_sync_rcl = v3d->out_sync;

        /* Update the sync object for the last rendering by our context. */
500
        job->submit.out_sync = v3d->out_sync;
501

502
503
504
        job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
        job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);

505
506
507
508
509
510
511
512
513
514
515
516
517
518
        if (v3d->active_perfmon) {
                assert(screen->has_perfmon);
                job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
        }

        /* If we are submitting a job with a different perfmon, we need to
         * ensure the previous one fully finishes before starting this;
         * otherwise it would wrongly mix counter results.
         */
        if (v3d->active_perfmon != v3d->last_perfmon) {
                v3d->last_perfmon = v3d->active_perfmon;
                job->submit.in_sync_bcl = v3d->out_sync;
        }

519
520
521
522
        job->submit.flags = 0;
        if (job->tmu_dirty_rcl && screen->has_cache_flush)
                job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;

523
        /* On V3D 4.1, the tile alloc/state setup moved to register writes
524
         * instead of binner packets.
525
526
         */
        if (screen->devinfo.ver >= 41) {
527
                v3d_job_add_bo(job, job->tile_alloc);
528
529
530
                job->submit.qma = job->tile_alloc->offset;
                job->submit.qms = job->tile_alloc->size;

531
                v3d_job_add_bo(job, job->tile_state);
532
533
534
                job->submit.qts = job->tile_state->offset;
        }

535
        v3d_clif_dump(v3d, job);
536
537
538
539

        if (!(V3D_DEBUG & V3D_DEBUG_NORAST)) {
                int ret;

540
                ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
541
542
543
544
545
                static bool warned = false;
                if (ret && !warned) {
                        fprintf(stderr, "Draw call returned %s.  "
                                        "Expect corruption.\n", strerror(errno));
                        warned = true;
546
547
548
                } else if (!ret) {
                        if (v3d->active_perfmon)
                                v3d->active_perfmon->job_submitted = true;
549
                }
550
551
552
553
554

                /* If we are submitting a job in the middle of transform
                 * feedback we need to read the primitive counts and accumulate
                 * them, otherwise they will be reset at the start of the next
                 * draw when we emit the Tile Binning Mode Configuration packet.
555
556
557
558
559
560
561
562
                 *
                 * If the job doesn't have any TF draw calls, then we know
                 * the primitive count must be zero and we can skip stalling
                 * for this. This also fixes a problem because it seems that
                 * in this scenario the counters are not reset with the Tile
                 * Binning Mode Configuration packet, which would translate
                 * to us reading an obsolete (possibly non-zero) value from
                 * the GPU counters.
563
                 */
564
                if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0)
565
                        v3d_read_and_accumulate_primitive_counters(v3d);
566
567
568
        }

done:
569
        v3d_job_free(v3d, job);
570
571
572
}

static bool
573
v3d_job_compare(const void *a, const void *b)
574
{
575
        return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;
576
577
578
}

static uint32_t
579
v3d_job_hash(const void *key)
580
{
581
        return _mesa_hash_data(key, sizeof(struct v3d_job_key));
582
583
584
}

void
585
v3d_job_init(struct v3d_context *v3d)
586
{
587
588
589
590
        v3d->jobs = _mesa_hash_table_create(v3d,
                                            v3d_job_hash,
                                            v3d_job_compare);
        v3d->write_jobs = _mesa_hash_table_create(v3d,
591
592
593
594
                                                  _mesa_hash_pointer,
                                                  _mesa_key_pointer_equal);
}