bi_pack.c 26.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 * Copyright (C) 2020 Collabora, Ltd.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "compiler.h"
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
25

26
27
28
29
30
/* This file contains the final passes of the compiler. Running after
 * scheduling and RA, the IR is now finalized, so we need to emit it to actual
 * bits on the wire (as well as fixup branches) */

static uint64_t
31
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2, bool tdd)
32
{
33
34
35
        /* next_dependencies are the union of the dependencies of successors'
         * dependencies */

36
37
        unsigned dependency_wait = next_1 ? next_1->dependencies : 0;
        dependency_wait |= next_2 ? next_2->dependencies : 0;
38

39
        struct bifrost_header header = {
40
41
                .flow_control =
                        (next_1 == NULL) ? BIFROST_FLOW_END :
42
                        clause->flow_control,
43
                .terminate_discarded_threads = tdd,
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
44
                .next_clause_prefetch = clause->next_clause_prefetch && next_1,
45
46
                .staging_barrier = clause->staging_barrier,
                .staging_register = clause->staging_register,
47
48
                .dependency_wait = dependency_wait,
                .dependency_slot = clause->scoreboard_id,
49
50
                .message_type = clause->message_type,
                .next_message_type = next_1 ? next_1->message_type : 0,
51
52
                .suppress_inf = true,
                .suppress_nan = true,
53
54
55
56
57
58
59
        };

        uint64_t u = 0;
        memcpy(&u, &header, sizeof(header));
        return u;
}

60
61
62
63
64
65
66
/* The uniform/constant slot allows loading a contiguous 64-bit immediate or
 * pushed uniform per bundle. Figure out which one we need in the bundle (the
 * scheduler needs to ensure we only have one type per bundle), validate
 * everything, and rewrite away the register/uniform indices to use 3-bit
 * sources directly. */

static unsigned
67
bi_lookup_constant(bi_clause *clause, uint32_t cons, bool *hi)
68
69
{
        for (unsigned i = 0; i < clause->constant_count; ++i) {
70
71
                /* Try to apply to top or to bottom */
                uint64_t top = clause->constants[i];
72

73
                if (cons == ((uint32_t) top | (cons & 0xF)))
74
75
                        return i;

76
                if (cons == (top >> 32ul)) {
77
                        *hi = true;
78
                        return i;
79
                }
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
        }

        unreachable("Invalid constant accessed");
}

static unsigned
bi_constant_field(unsigned idx)
{
        assert(idx <= 5);

        const unsigned values[] = {
                4, 5, 6, 7, 2, 3
        };

        return values[idx] << 4;
}

static bool
98
99
bi_assign_fau_idx_single(bi_registers *regs,
                         bi_clause *clause,
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
100
                         bi_instr *ins,
101
102
                         bool assigned,
                         bool fast_zero)
103
104
105
106
{
        if (!ins)
                return assigned;

Boris Brezillon's avatar
Boris Brezillon committed
107
108
109
110
111
112
113
        if (ins->op == BI_OPCODE_ATEST) {
                /* ATEST FAU index must point to the ATEST parameter datum slot */
                assert(!assigned && !clause->branch_constant);
                regs->fau_idx = BIR_FAU_ATEST_PARAM;
                return true;
        }

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
114
115
116
        if (ins->branch_target && clause->branch_constant) {
                /* By convention branch constant is last XXX: this whole thing
                 * is a hack, FIXME */
117
118
119
120
121
122
123
124
125
                unsigned idx = clause->constant_count - 1;

                /* We can only jump to clauses which are qword aligned so the
                 * bottom 4-bits of the offset are necessarily 0 */
                unsigned lo = 0;

                /* Build the constant */
                unsigned C = bi_constant_field(idx) | lo;

126
127
                if (assigned && regs->fau_idx != C)
                        unreachable("Mismatched fau_idx: branch");
128

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
129
130
131
132
133
                bi_foreach_src(ins, s) {
                        if (ins->src[s].type == BI_INDEX_CONSTANT)
                                ins->src[s] = bi_passthrough(BIFROST_SRC_FAU_HI);
                }

134
                regs->fau_idx = C;
135
136
137
                return true;
        }

138
        bi_foreach_src(ins, s) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
139
                if (ins->src[s].type == BI_INDEX_CONSTANT) {
140
                        bool hi = false;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
141
142
                        uint32_t cons = ins->src[s].value;
                        unsigned swizzle = ins->src[s].swizzle;
143
144
145

                        /* FMA can encode zero for free */
                        if (cons == 0 && fast_zero) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
146
147
148
                                assert(!ins->src[s].abs && !ins->src[s].neg);
                                ins->src[s] = bi_passthrough(BIFROST_SRC_STAGE);
                                ins->src[s].swizzle = swizzle;
149
150
151
                                continue;
                        }

152
                        unsigned idx = bi_lookup_constant(clause, cons, &hi);
153
154
                        unsigned lo = clause->constants[idx] & 0xF;
                        unsigned f = bi_constant_field(idx) | lo;
155

156
                        if (assigned && regs->fau_idx != f)
157
158
                                unreachable("Mismatched uniform/const field: imm");

159
                        regs->fau_idx = f;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
160
161
                        ins->src[s] = bi_passthrough(hi ? BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO);
                        ins->src[s].swizzle = swizzle;
162
                        assigned = true;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
163
164
165
166
167
168
169
170
                } else if (ins->src[s].type == BI_INDEX_FAU) {
                        bool hi = ins->src[s].offset > 0;

                        assert(!assigned || regs->fau_idx == ins->src[s].value);
                        assert(ins->src[s].swizzle == BI_SWIZZLE_H01);
                        regs->fau_idx = ins->src[s].value;
                        ins->src[s] = bi_passthrough(hi ? BIFROST_SRC_FAU_HI :
                                        BIFROST_SRC_FAU_LO);
Boris Brezillon's avatar
Boris Brezillon committed
171
                        assigned = true;
172
173
174
175
176
177
178
                }
        }

        return assigned;
}

static void
179
bi_assign_fau_idx(bi_clause *clause,
180
                  bi_bundle *bundle)
181
182
{
        bool assigned =
183
                bi_assign_fau_idx_single(&bundle->regs, clause, bundle->fma, false, true);
184

185
        bi_assign_fau_idx_single(&bundle->regs, clause, bundle->add, assigned, false);
186
187
}

188
/* Assigns a slot for reading, before anything is written */
189
190

static void
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
191
bi_assign_slot_read(bi_registers *regs, bi_index src)
192
193
{
        /* We only assign for registers */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
194
        if (src.type != BI_INDEX_REGISTER)
195
196
                return;

197
        /* Check if we already assigned the slot */
198
        for (unsigned i = 0; i <= 1; ++i) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
199
                if (regs->slot[i] == src.value && regs->enabled[i])
200
201
202
                        return;
        }

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
203
        if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
204
205
206
207
208
209
                return;

        /* Assign it now */

        for (unsigned i = 0; i <= 1; ++i) {
                if (!regs->enabled[i]) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
210
                        regs->slot[i] = src.value;
211
212
213
214
215
                        regs->enabled[i] = true;
                        return;
                }
        }

216
        if (!regs->slot23.slot3) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
217
                regs->slot[2] = src.value;
218
                regs->slot23.slot2 = BIFROST_OP_READ;
219
                return;
220
        }
221

222
223
        bi_print_slots(regs, stderr);
        unreachable("Failed to find a free slot for src");
224
225
}

226
static bi_registers
227
bi_assign_slots(bi_bundle *now, bi_bundle *prev)
228
{
229
        /* We assign slots for the main register mechanism. Special ops
230
231
232
         * use the data registers, which has its own mechanism entirely
         * and thus gets skipped over here. */

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
233
        bool read_dreg = now->add &&
234
                bi_opcode_props[(now->add)->op].sr_read;
235

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
236
        bool write_dreg = now->add &&
237
                bi_opcode_props[(now->add)->op].sr_write;
238

239
240
        /* First, assign reads */

241
242
        if (now->fma)
                bi_foreach_src(now->fma, src)
243
                        bi_assign_slot_read(&now->regs, (now->fma)->src[src]);
244

245
246
        if (now->add) {
                bi_foreach_src(now->add, src) {
247
                        if (!(src == 0 && read_dreg))
248
                                bi_assign_slot_read(&now->regs, (now->add)->src[src]);
249
250
                }
        }
251

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
252
253
254
        /* Next, assign writes. Staging writes are assigned separately, but
         * +ATEST wants its destination written to both a staging register
         * _and_ a regular write, because it may not generate a message */
255

256
257
        if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {
                bi_index idx = prev->add->dest[0];
258

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
259
260
                if (idx.type == BI_INDEX_REGISTER) {
                        now->regs.slot[3] = idx.value;
261
                        now->regs.slot23.slot3 = BIFROST_OP_WRITE;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
262
263
264
265
                }
        }

        if (prev->fma) {
266
                bi_index idx = (prev->fma)->dest[0];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
267
268
269
270
271
272
273
274
275
276
277
278

                if (idx.type == BI_INDEX_REGISTER) {
                        if (now->regs.slot23.slot3) {
                                /* Scheduler constraint: cannot read 3 and write 2 */
                                assert(!now->regs.slot23.slot2);
                                now->regs.slot[2] = idx.value;
                                now->regs.slot23.slot2 = BIFROST_OP_WRITE;
                        } else {
                                now->regs.slot[3] = idx.value;
                                now->regs.slot23.slot3 = BIFROST_OP_WRITE;
                                now->regs.slot23.slot3_fma = true;
                        }
279
280
281
                }
        }

282
        return now->regs;
283
284
}

285
286
static enum bifrost_reg_mode
bi_pack_register_mode(bi_registers r)
287
{
288
289
290
        /* Handle idle special case for first instructions */
        if (r.first_instruction && !(r.slot23.slot2 | r.slot23.slot3))
                return BIFROST_IDLE_1;
291

292
293
294
295
        /* Otherwise, use the LUT */
        for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {
                if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)
                        return i;
296
297
        }

298
299
        bi_print_slots(&r, stderr);
        unreachable("Invalid slot assignment");
300
301
}

302
static uint64_t
303
bi_pack_registers(bi_registers regs)
304
{
305
        enum bifrost_reg_mode mode = bi_pack_register_mode(regs);
306
        struct bifrost_regs s = { 0 };
307
308
        uint64_t packed = 0;

309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
        /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for
         * first instruction and adds 16 when reg 2 == reg 3 */

        unsigned ctrl;
        bool r2_equals_r3 = false;

        if (regs.first_instruction) {
                /* Bit 3 implicitly must be clear for first instructions.
                 * The affected patterns all write both ADD/FMA, but that
                 * is forbidden for the first instruction, so this does
                 * not add additional encoding constraints */
                assert(!(mode & 0x8));

                /* Move bit 4 to bit 3, since bit 3 is clear */
                ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);

                /* If we can let r2 equal r3, we have to or the hardware raises
                 * INSTR_INVALID_ENC (it's unclear why). */
                if (!(regs.slot23.slot2 && regs.slot23.slot3))
                        r2_equals_r3 = true;
        } else {
                /* We force r2=r3 or not for the upper bit */
                ctrl = (mode & 0xF);
                r2_equals_r3 = (mode & 0x10);
        }

335
336
        if (regs.enabled[1]) {
                /* Gotta save that bit!~ Required by the 63-x trick */
337
                assert(regs.slot[1] > regs.slot[0]);
338
339
340
                assert(regs.enabled[0]);

                /* Do the 63-x trick, see docs/disasm */
341
342
343
                if (regs.slot[0] > 31) {
                        regs.slot[0] = 63 - regs.slot[0];
                        regs.slot[1] = 63 - regs.slot[1];
344
345
                }

346
347
                assert(regs.slot[0] <= 31);
                assert(regs.slot[1] <= 63);
348
349

                s.ctrl = ctrl;
350
351
                s.reg1 = regs.slot[1];
                s.reg0 = regs.slot[0];
352
        } else {
353
                /* slot 1 disabled, so set to zero and use slot 1 for ctrl */
354
                s.ctrl = 0;
355
356
357
                s.reg1 = ctrl << 2;

                if (regs.enabled[0]) {
358
359
                        /* Bit 0 upper bit of slot 0 */
                        s.reg1 |= (regs.slot[0] >> 5);
360

361
362
                        /* Rest of slot 0 in usual spot */
                        s.reg0 = (regs.slot[0] & 0b11111);
363
                } else {
364
                        /* Bit 1 set if slot 0 also disabled */
365
366
367
368
                        s.reg1 |= (1 << 1);
                }
        }

369
370
371
        /* Force r2 =/!= r3 as needed */
        if (r2_equals_r3) {
                assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3));
372

373
374
375
376
377
378
379
380
                if (regs.slot23.slot2)
                        regs.slot[3] = regs.slot[2];
                else
                        regs.slot[2] = regs.slot[3];
        } else if (!regs.first_instruction) {
                /* Enforced by the encoding anyway */
                assert(regs.slot[2] != regs.slot[3]);
        }
381

382
383
        s.reg2 = regs.slot[2];
        s.reg3 = regs.slot[3];
384
        s.fau_idx = regs.fau_idx;
385
386
387

        memcpy(&packed, &s, sizeof(s));
        return packed;
388
389
390
391
392
393
394
}

struct bi_packed_bundle {
        uint64_t lo;
        uint64_t hi;
};

395
/* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix
396
397
398
 * this up at pack time. (Scheduling doesn't care.) */

static void
399
bi_flip_slots(bi_registers *regs)
400
{
401
402
403
404
        if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {
                unsigned temp = regs->slot[0];
                regs->slot[0] = regs->slot[1];
                regs->slot[1] = temp;
405
406
407
408
        }

}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
409
410
411
412
413
414
/* Lower CUBEFACE2 to a CUBEFACE1/CUBEFACE2. This is a hack so the scheduler
 * doesn't have to worry about this while we're just packing singletons */

static void
bi_lower_cubeface2(bi_context *ctx, bi_bundle *bundle)
{
415
        bi_instr *old = bundle->add;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
416

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
417
        /* Filter for +CUBEFACE2 */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
418
        if (!old || old->op != BI_OPCODE_CUBEFACE2)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
419
420
421
422
423
424
425
                return;

        /* This won't be used once we emit non-singletons, for now this is just
         * a fact of our scheduler and allows us to clobber FMA */
        assert(!bundle->fma);

        /* Construct an FMA op */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
426
427
428
429
430
431
        bi_instr *new = rzalloc(ctx, bi_instr);
        new->op = BI_OPCODE_CUBEFACE1;
        /* no dest, just a temporary */
        new->src[0] = old->src[0];
        new->src[1] = old->src[1];
        new->src[2] = old->src[2];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
432
433

        /* Emit the instruction */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
434
        list_addtail(&new->link, &old->link);
435
        bundle->fma = new;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
436
437
438

        /* Now replace the sources of the CUBEFACE2 with a single passthrough
         * from the CUBEFACE1 (and a side-channel) */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
        old->src[0] = bi_passthrough(BIFROST_SRC_STAGE);
        old->src[1] = old->src[2] = bi_null();
}

static inline enum bifrost_packed_src
bi_get_src_slot(bi_registers *regs, unsigned reg)
{
        if (regs->slot[0] == reg && regs->enabled[0])
                return BIFROST_SRC_PORT0;
        else if (regs->slot[1] == reg && regs->enabled[1])
                return BIFROST_SRC_PORT1;
        else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
                return BIFROST_SRC_PORT2;
        else
                unreachable("Tried to access register with no port");
}

static inline enum bifrost_packed_src
bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
{
        if (!ins)
                return 0;

        bi_index src = ins->src[s];

        if (src.type == BI_INDEX_REGISTER)
                return bi_get_src_slot(regs, src.value);
        else if (src.type == BI_INDEX_PASS)
                return src.value;
        else if (bi_is_null(src) && ins->op == BI_OPCODE_ZS_EMIT && s < 2)
                return BIFROST_SRC_STAGE;
        else {
                /* TODO make safer */
                return BIFROST_SRC_STAGE;
        }
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
474
475
}

476
static struct bi_packed_bundle
477
bi_pack_bundle(bi_clause *clause, bi_bundle bundle, bi_bundle prev, bool first_bundle, gl_shader_stage stage)
478
{
479
        bi_assign_slots(&bundle, &prev);
480
        bi_assign_fau_idx(clause, &bundle);
481
        bundle.regs.first_instruction = first_bundle;
482

483
        bi_flip_slots(&bundle.regs);
484

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
485
        bool sr_read = bundle.add &&
486
                bi_opcode_props[(bundle.add)->op].sr_read;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
487

488
        uint64_t reg = bi_pack_registers(bundle.regs);
489
490
491
492
493
494
495
496
497
498
        uint64_t fma = bi_pack_fma(bundle.fma,
                        bi_get_src_new(bundle.fma, &bundle.regs, 0),
                        bi_get_src_new(bundle.fma, &bundle.regs, 1),
                        bi_get_src_new(bundle.fma, &bundle.regs, 2),
                        bi_get_src_new(bundle.fma, &bundle.regs, 3));

        uint64_t add = bi_pack_add(bundle.add,
                        bi_get_src_new(bundle.add, &bundle.regs, sr_read + 0),
                        bi_get_src_new(bundle.add, &bundle.regs, sr_read + 1),
                        bi_get_src_new(bundle.add, &bundle.regs, sr_read + 2),
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
499
500
501
                        0);

        if (bundle.add) {
502
                bi_instr *add = bundle.add;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
503
504
505
506
507
508
509
510
511
512
513
514
515
516

                bool sr_write = bi_opcode_props[add->op].sr_write;

                if (sr_read) {
                        assert(add->src[0].type == BI_INDEX_REGISTER);
                        clause->staging_register = add->src[0].value;

                        if (sr_write)
                                assert(bi_is_equiv(add->src[0], add->dest[0]));
                } else if (sr_write) {
                        assert(add->dest[0].type == BI_INDEX_REGISTER);
                        clause->staging_register = add->dest[0].value;
                }
        }
517
518
519
520
521
522
523
524
525

        struct bi_packed_bundle packed = {
                .lo = reg | (fma << 35) | ((add & 0b111111) << 58),
                .hi = add >> 6
        };

        return packed;
}

526
/* Packs the next two constants as a dedicated constant quadword at the end of
527
528
529
530
531
532
533
534
535
536
537
538
 * the clause, returning the number packed. There are two cases to consider:
 *
 * Case #1: Branching is not used. For a single constant copy the upper nibble
 * over, easy.
 *
 * Case #2: Branching is used. For a single constant, it suffices to set the
 * upper nibble to 4 and leave the latter constant 0, which matches what the
 * blob does.
 *
 * Extending to multiple constants is considerably more tricky and left for
 * future work.
 */
539
540
541
542
543
544
545
546

static unsigned
bi_pack_constants(bi_context *ctx, bi_clause *clause,
                unsigned index,
                struct util_dynarray *emission)
{
        /* After these two, are we done? Determines tag */
        bool done = clause->constant_count <= (index + 2);
547
        ASSERTED bool only = clause->constant_count <= (index + 1);
548

549
550
551
        /* Is the constant we're packing for a branch? */
        bool branches = clause->branch_constant && done;

552
553
        /* TODO: Pos */
        assert(index == 0 && clause->bundle_count == 1);
554
555
        assert(only);

556
557
        /* Compute branch offset instead of a dummy 0 */
        if (branches) {
558
                bi_instr *br = clause->bundles[clause->bundle_count - 1].add;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
559
                assert(br && br->branch_target);
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

                /* Put it in the high place */
                int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
                int32_t bytes = qwords * 16;

                /* Copy so we get proper sign behaviour */
                uint32_t raw = 0;
                memcpy(&raw, &bytes, sizeof(raw));

                /* Clear off top bits for the magic bits */
                raw &= ~0xF0000000;

                /* Put in top 32-bits */
                clause->constants[index + 0] = ((uint64_t) raw) << 32ull;
        }

576
        uint64_t hi = clause->constants[index + 0] >> 60ull;
577
578
579
580
581

        struct bifrost_fmt_constant quad = {
                .pos = 0, /* TODO */
                .tag = done ? BIFROST_FMTC_FINAL : BIFROST_FMTC_CONSTANTS,
                .imm_1 = clause->constants[index + 0] >> 4,
582
                .imm_2 = ((hi < 8) ? (hi << 60ull) : 0) >> 4,
583
584
        };

585
586
587
588
589
590
591
        if (branches) {
                /* Branch offsets are less than 60-bits so this should work at
                 * least for now */
                quad.imm_1 |= (4ull << 60ull) >> 4;
                assert (hi == 0);
        }

592
593
        /* XXX: On G71, Connor observed that the difference of the top 4 bits
         * of the second constant with the first must be less than 8, otherwise
594
595
596
597
         * we have to swap them. On G52, I'm able to reproduce a similar issue
         * but with a different workaround (modeled above with a single
         * constant, unclear how to workaround for multiple constants.) Further
         * investigation needed. Possibly an errata. XXX */
598

599
600
601
602
603
        util_dynarray_append(emission, struct bifrost_fmt_constant, quad);

        return 2;
}

604
static void
605
606
bi_pack_clause(bi_context *ctx, bi_clause *clause,
                bi_clause *next_1, bi_clause *next_2,
607
608
                struct util_dynarray *emission, gl_shader_stage stage,
                bool tdd)
609
{
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
610
        /* TODO After the deadline lowering */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
611
612
        bi_lower_cubeface2(ctx, &clause->bundles[0]);

613
        struct bi_packed_bundle ins_1 = bi_pack_bundle(clause, clause->bundles[0], clause->bundles[0], true, stage);
614
615
        assert(clause->bundle_count == 1);

616
617
618
        /* State for packing constants throughout */
        unsigned constant_index = 0;

619
        struct bifrost_fmt1 quad_1 = {
620
                .tag = clause->constant_count ? BIFROST_FMT1_CONSTANTS : BIFROST_FMT1_FINAL,
621
                .header = bi_pack_header(clause, next_1, next_2, tdd),
622
623
624
                .ins_1 = ins_1.lo,
                .ins_2 = ins_1.hi & ((1 << 11) - 1),
                .ins_0 = (ins_1.hi >> 11) & 0b111,
625
626
627
        };

        util_dynarray_append(emission, struct bifrost_fmt1, quad_1);
628
629
630
631
632
633
634

        /* Pack the remaining constants */

        while (constant_index < clause->constant_count) {
                constant_index += bi_pack_constants(ctx, clause,
                                constant_index, emission);
        }
635
636
637
638
639
}

static bi_clause *
bi_next_clause(bi_context *ctx, pan_block *block, bi_clause *clause)
{
640
641
642
643
        /* Try the first clause in this block if we're starting from scratch */
        if (!clause && !list_is_empty(&((bi_block *) block)->clauses))
                return list_first_entry(&((bi_block *) block)->clauses, bi_clause, link);

644
        /* Try the next clause in this block */
645
        if (clause && clause->link.next != &((bi_block *) block)->clauses)
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
                return list_first_entry(&(clause->link), bi_clause, link);

        /* Try the next block, or the one after that if it's empty, etc .*/
        pan_block *next_block = pan_next_block(block);

        bi_foreach_block_from(ctx, next_block, block) {
                bi_block *blk = (bi_block *) block;

                if (!list_is_empty(&blk->clauses))
                        return list_first_entry(&(blk->clauses), bi_clause, link);
        }

        return NULL;
}

661
662
663
664
665
666
667
668
669
/* We should terminate discarded threads if there may be discarded threads (a
 * fragment shader) and helper invocations are not used. Further logic may be
 * required for future discard/demote differentiation
 */

static bool
bi_terminate_discarded_threads(bi_context *ctx)
{
        if (ctx->stage == MESA_SHADER_FRAGMENT)
670
                return !ctx->nir->info.fs.needs_quad_helper_invocations;
671
672
673
674
        else
                return false;
}

675
676
677
678
679
680
681
682
683
static void
bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
                          const bi_clause *clause)
{
        /* No need to collect return addresses when we're in a blend shader. */
        if (ctx->is_blend)
                return;

        const bi_bundle *bundle = &clause->bundles[clause->bundle_count - 1];
684
        const bi_instr *ins = bundle->add;
685

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
686
        if (!ins || ins->op != BI_OPCODE_BLEND)
687
688
689
690
691
692
693
694
695
                return;

        /* We don't support non-terminal blend instructions yet.
         * That would requires fixing blend shaders to restore the registers
         * they use before jumping back to the fragment shader, which is
         * currently not supported.
         */
        assert(0);

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
696
#if 0
697
698
699
700
701
        assert(ins->blend_location < ARRAY_SIZE(ctx->blend_ret_offsets));
        assert(!ctx->blend_ret_offsets[ins->blend_location]);
        ctx->blend_ret_offsets[ins->blend_location] =
                util_dynarray_num_elements(emission, uint8_t);
        assert(!(ctx->blend_ret_offsets[ins->blend_location] & 0x7));
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
702
#endif
703
704
}

705
706
707
void
bi_pack(bi_context *ctx, struct util_dynarray *emission)
{
708
709
        bool tdd = bi_terminate_discarded_threads(ctx);

710
711
712
        bi_foreach_block(ctx, _block) {
                bi_block *block = (bi_block *) _block;

713
714
715
716
717
718
                /* Passthrough the first clause of where we're branching to for
                 * the last clause of the block (the clause with the branch) */

                bi_clause *succ_clause = block->base.successors[1] ?
                        bi_next_clause(ctx, block->base.successors[0], NULL) : NULL;

719
                bi_foreach_clause_in_block(block, clause) {
720
721
                        bool is_last = clause->link.next == &block->clauses;

722
                        bi_clause *next = bi_next_clause(ctx, _block, clause);
723
724
                        bi_clause *next_2 = is_last ? succ_clause : NULL;

725
                        bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage, tdd);
726
727
728

                        if (!is_last)
                                bi_collect_blend_ret_addr(ctx, emission, clause);
729
730
731
                }
        }
}