bifrost_compile.c 89.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/*
 * Copyright (C) 2020 Collabora Ltd.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors (Collabora):
 *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 */

#include "main/mtypes.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/nir_types.h"
#include "compiler/nir/nir_builder.h"
31
#include "util/u_debug.h"
32
33
34

#include "disassemble.h"
#include "bifrost_compile.h"
35
#include "bifrost_nir.h"
36
#include "compiler.h"
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
37
#include "bi_quirks.h"
38
39
#include "bi_print.h"

40
41
42
static const struct debug_named_value debug_options[] = {
        {"msgs",      BIFROST_DBG_MSGS,		"Print debug messages"},
        {"shaders",   BIFROST_DBG_SHADERS,	"Dump shaders in NIR and MIR"},
43
        {"shaderdb",  BIFROST_DBG_SHADERDB,	"Print statistics"},
44
        {"verbose",   BIFROST_DBG_VERBOSE,	"Disassemble verbosely"},
45
46
47
48
49
        DEBUG_NAMED_VALUE_END
};

DEBUG_GET_ONCE_FLAGS_OPTION(bifrost_debug, "BIFROST_MESA_DEBUG", debug_options, 0)

50
51
52
/* TODO: This is not thread safe!! */
static unsigned SHADER_DB_COUNT = 0;

53
54
55
56
57
58
59
int bifrost_debug = 0;

#define DBG(fmt, ...) \
		do { if (bifrost_debug & BIFROST_DBG_MSGS) \
			fprintf(stderr, "%s:%d: "fmt, \
				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)

60
static bi_block *emit_cf_list(bi_context *ctx, struct exec_list *list);
61
62
63
64
65
66
67
68
69
static bi_instruction *bi_emit_branch(bi_context *ctx);

static void
emit_jump(bi_context *ctx, nir_jump_instr *instr)
{
        bi_instruction *branch = bi_emit_branch(ctx);

        switch (instr->type) {
        case nir_jump_break:
70
                branch->branch_target = ctx->break_block;
71
72
                break;
        case nir_jump_continue:
73
                branch->branch_target = ctx->continue_block;
74
75
76
77
78
                break;
        default:
                unreachable("Unhandled jump type");
        }

79
        pan_block_add_successor(&ctx->current_block->base, &branch->branch_target->base);
80
        ctx->current_block->base.unconditional_jumps = true;
81
82
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
83
static bi_instruction
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
84
bi_load_old(enum bi_class T, nir_intrinsic_instr *instr, unsigned offset_idx)
85
{
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
86
87
        bi_instruction load = {
                .type = T,
88
                .vector_channels = instr->num_components,
89
90
        };

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
91
92
93
        const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];

        if (info->has_dest)
94
                load.dest = pan_dest_index(&instr->dest);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
95

96
97
        if (info->has_dest && nir_intrinsic_has_dest_type(instr))
                load.dest_type = nir_intrinsic_dest_type(instr);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
98

99
100
        nir_src *offset = nir_get_io_offset_src(instr);

101
102
103
104
105
106
107
108
        load.src_types[offset_idx] = nir_type_uint32;
        if (nir_src_is_const(*offset)) {
                load.src[offset_idx] = BIR_INDEX_CONSTANT | 0;
                load.constant.u64 = nir_src_as_uint(*offset) +
                                    nir_intrinsic_base(instr);
        } else {
                load.src[offset_idx] = pan_src_index(offset);
        }
109

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
110
111
112
        return load;
}

113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
static void
bi_emit_ld_output(bi_context *ctx, nir_intrinsic_instr *instr)
{
        assert(ctx->is_blend);

        bi_instruction ins = {
                .type = BI_LOAD_TILE,
                .vector_channels = instr->num_components,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_float16,
                .src = {
                        /* PixelIndices */
                        BIR_INDEX_CONSTANT,
                        /* PixelCoverage: we simply pass r60 which contains the cumulative
                         * coverage bitmap
                         */
                        BIR_INDEX_REGISTER | 60,
                        /* InternalConversionDescriptor (see src/panfrost/lib/midgard.xml for more
                         * details)
			 */
                        BIR_INDEX_CONSTANT | 32
                },
                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 },
        };

        /* We want to load the current pixel.
         * FIXME: The sample to load is currently hardcoded to 0. This should
         * be addressed for multi-sample FBs.
         */
        struct bifrost_pixel_indices pix = {
                .y = BIFROST_CURRENT_PIXEL,
        };
        memcpy(&ins.constant.u64, &pix, sizeof(pix));

        /* Only keep the conversion part of the blend descriptor. */
        ins.constant.u64 |= ctx->blend_desc & 0xffffffff00000000ULL;

        bi_emit(ctx, ins);
}

153
static enum bi_sample
154
155
156
157
bi_interp_for_intrinsic(nir_intrinsic_op op)
{
        switch (op) {
        case nir_intrinsic_load_barycentric_centroid:
158
                return BI_SAMPLE_CENTROID;
159
        case nir_intrinsic_load_barycentric_sample:
160
                return BI_SAMPLE_SAMPLE;
161
162
        case nir_intrinsic_load_barycentric_pixel:
        default:
163
                return BI_SAMPLE_CENTER;
164
165
166
        }
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
167
168
169
static void
bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
{
170
171
172
        bi_instruction ins = {
                .type = BI_LOAD_VAR,
                .load_vary = {
173
                        .interp_mode = BI_SAMPLE_CENTER,
174
                        .update_mode = BI_UPDATE_STORE,
175
176
177
178
179
180
181
182
                        .reuse = false,
                        .flat = instr->intrinsic != nir_intrinsic_load_interpolated_input,
                },
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_dest_bit_size(instr->dest),
                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 },
                .vector_channels = instr->num_components,
        };
183

184
185
186
187
188
189
190
191
        if (instr->intrinsic == nir_intrinsic_load_interpolated_input) {
                nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]);
                if (parent) {
                        ins.load_vary.interp_mode =
                                bi_interp_for_intrinsic(parent->intrinsic);
                }
        }

192
        if (ins.load_vary.interp_mode == BI_SAMPLE_CENTER) {
193
194
                /* Zero it out for center interpolation */
                ins.src[0] = BIR_INDEX_ZERO;
195
196
        } else {
                /* R61 contains sample mask stuff, TODO RA XXX */
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
                ins.src[0] = BIR_INDEX_REGISTER | 61;
        }

        nir_src *offset = nir_get_io_offset_src(instr);
        if (nir_src_is_const(*offset)) {
                unsigned offset_val = nir_intrinsic_base(instr) +
                                      nir_src_as_uint(*offset);

                if (offset_val < 20) {
                        ins.load_vary.immediate = true;
                        ins.load_vary.index = offset_val;
                } else {
                        ins.src[1] = BIR_INDEX_CONSTANT | 0;
                        ins.constant.u64 = offset_val;
                }
        } else {
                ins.src[1] = pan_src_index(offset);
214
215
        }

216
217
218
        bi_emit(ctx, ins);
}

219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
static void
bi_emit_ld_blend_input(bi_context *ctx, nir_intrinsic_instr *instr)
{
        ASSERTED nir_io_semantics sem = nir_intrinsic_io_semantics(instr);

        /* We don't support dual-source blending yet. */
        assert(sem.location == VARYING_SLOT_COL0);

        bi_instruction ins = {
                .type = BI_COMBINE,
                .dest_type = nir_type_uint32,
                .dest = pan_dest_index(&instr->dest),
                .src_types = {
                        nir_type_uint32, nir_type_uint32,
                        nir_type_uint32, nir_type_uint32,
                },

                /* Source color is passed through r0-r3.
                 * TODO: We should probably find a way to avoid this
                 * combine/mov and use r0-r3 directly.
                 */
                .src = {
                        BIR_INDEX_REGISTER | 0,
                        BIR_INDEX_REGISTER | 1,
                        BIR_INDEX_REGISTER | 2,
                        BIR_INDEX_REGISTER | 3,
                },
        };

        bi_emit(ctx, ins);
}

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
static void
bi_emit_atest(bi_context *ctx, unsigned rgba, nir_alu_type T)
{
        bi_instruction ins = {
                .type = BI_ATEST,
                .src = {
                        BIR_INDEX_REGISTER | 60 /* TODO: RA */,
                        rgba,
                },
                .src_types = { nir_type_uint32, T },
                .swizzle = {
                        { 0 },
                        { 3, 0 } /* swizzle out the alpha */
                },
                .dest = BIR_INDEX_REGISTER | 60 /* TODO: RA */,
                .dest_type = nir_type_uint32,
        };

        bi_emit(ctx, ins);
}

272
static void
273
bi_emit_blend(bi_context *ctx, unsigned rgba, nir_alu_type T, unsigned rt)
274
275
276
{
        bi_instruction blend = {
                .type = BI_BLEND,
277
                .blend_location = rt,
278
                .src = {
279
280
                        rgba,
                        BIR_INDEX_REGISTER | 60 /* TODO: RA */
281
                },
282
                .src_types = {
283
                        T,
284
285
286
                        nir_type_uint32,
                        nir_type_uint32,
                        nir_type_uint32,
287
                },
288
                .swizzle = {
289
290
                        { 0, 1, 2, 3 },
                        { 0 }
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
291
292
                },
                .dest_type = nir_type_uint32,
293
                .vector_channels = 4
294
295
        };

296
297
298
299
300
301
302
303
304
305
        if (ctx->is_blend) {
                /* Blend descriptor comes from the compile inputs */
                blend.src[2] = BIR_INDEX_CONSTANT | 0;
                blend.src[3] = BIR_INDEX_CONSTANT | 32;
                blend.constant.u64 = ctx->blend_desc;

                /* Put the result in r0 */
                blend.dest = BIR_INDEX_REGISTER | 0;
        } else {
                /* Blend descriptor comes from the FAU RAM */
306
307
                blend.src[2] = BIR_INDEX_FAU | (BIR_FAU_BLEND_0 + rt);
                blend.src[3] = blend.src[2] | BIR_FAU_HI;
308
309
310
311
312
313
314
315

                /* By convention, the return address is stored in r48 and will
                 * be used by the blend shader to jump back to the fragment
                 * shader when it's done.
                 */
                blend.dest = BIR_INDEX_REGISTER | 48;
        }

316
        assert(blend.blend_location < 8);
317
        assert(ctx->blend_types);
318
        assert(blend.src_types[0]);
319
320
        ctx->blend_types[blend.blend_location] = blend.src_types[0];

321
        bi_emit(ctx, blend);
322
323
}

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
static void
bi_emit_zs_emit(bi_context *ctx, unsigned z, unsigned stencil)
{
        bi_instruction ins = {
                .type = BI_ZS_EMIT,
                .src = {
                        z,
                        stencil,
                        BIR_INDEX_REGISTER | 60 /* TODO: RA */,
                },
                .src_types = {
                        nir_type_float32,
                        nir_type_uint8,
                        nir_type_uint32,
                },
                .swizzle = { { 0 }, { 0 }, { 0 } },
                .dest = BIR_INDEX_REGISTER | 60 /* TODO: RA */,
                .dest_type = nir_type_uint32,
        };

        bi_emit(ctx, ins);
}

347
348
349
static void
bi_emit_frag_out(bi_context *ctx, nir_intrinsic_instr *instr)
{
350
351
352
353
354
355
356
357
358
        bool combined = instr->intrinsic ==
                nir_intrinsic_store_combined_output_pan;

        unsigned writeout = combined ? nir_intrinsic_component(instr) :
                PAN_WRITEOUT_C;

        bool emit_blend = writeout & (PAN_WRITEOUT_C);
        bool emit_zs = writeout & (PAN_WRITEOUT_Z | PAN_WRITEOUT_S);

359
360
361
362
363
        const nir_variable *var =
                nir_find_variable_with_driver_location(ctx->nir, nir_var_shader_out,
                         nir_intrinsic_base(instr));
        assert(var);

364
365
366
367
368
369
370
371
        if (!ctx->emitted_atest && !ctx->is_blend) {
                bi_emit_atest(ctx,
                        pan_src_index(&instr->src[0]),
                        nir_intrinsic_src_type(instr));

                ctx->emitted_atest = true;
        }

372
        if (emit_zs) {
373
374
375
376
377
378
                unsigned z = writeout & PAN_WRITEOUT_Z ?
                        pan_src_index(&instr->src[2]) : 0;
                unsigned s = writeout & PAN_WRITEOUT_S ?
                        pan_src_index(&instr->src[3]) : 0;

                bi_emit_zs_emit(ctx, z, s);
379
380
381
        }

        if (emit_blend) {
382
383
384
385
386
387
                unsigned loc = var->data.location;
                assert(loc == FRAG_RESULT_COLOR || loc >= FRAG_RESULT_DATA0);

                unsigned rt = loc == FRAG_RESULT_COLOR ? 0 :
                        (loc - FRAG_RESULT_DATA0);

388
389
390
                bi_emit_blend(ctx,
                                pan_src_index(&instr->src[0]),
                                nir_intrinsic_src_type(instr),
391
                                rt);
392
        }
393
394
395
396
397
398
399
400

        if (ctx->is_blend) {
                /* Jump back to the fragment shader, return address is stored
                 * in r48 (see above).
                 */
                bi_instruction *ret = bi_emit_branch(ctx);
                ret->src[2] = BIR_INDEX_REGISTER | 48;
        }
401
402
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
403
404
405
static bi_instruction
bi_load_with_r61(enum bi_class T, nir_intrinsic_instr *instr)
{
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
406
        bi_instruction ld = bi_load_old(T, instr, 2);
407
408
409
        ld.src[0] = BIR_INDEX_REGISTER | 61; /* TODO: RA */
        ld.src[1] = BIR_INDEX_REGISTER | 62;
        ld.src_types[0] = nir_type_uint32;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
410
        ld.src_types[1] = nir_type_uint32;
411
        ld.format = instr->intrinsic == nir_intrinsic_store_output ?
412
413
414
415
416
417
418
419
420
                    nir_intrinsic_src_type(instr) :
                    nir_intrinsic_dest_type(instr);

        /* Promote to immediate instruction if we can */
        if (ld.src[0] & BIR_INDEX_CONSTANT && ld.constant.u64 < 16) {
                ld.attribute.immediate = true;
                ld.attribute.index = ld.constant.u64;
        }

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
421
422
423
        return ld;
}

424
425
426
static void
bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr)
{
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
427
        bi_instruction address = bi_load_with_r61(BI_LOAD_VAR_ADDRESS, instr);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
428
        address.dest = bi_make_temp(ctx);
429
        address.dest_type = nir_type_uint32;
430
        address.vector_channels = 3;
431

432
433
434
435
436
437
438
439
        /* Only look at the total components needed. In effect, we fill in all
         * the intermediate "holes" in the write mask, since we can't mask off
         * stores. Since nir_lower_io_to_temporaries ensures each varying is
         * written at most once, anything that's masked out is undefined, so it
         * doesn't matter what we write there. So we may as well do the
         * simplest thing possible. */
        unsigned nr = util_last_bit(nir_intrinsic_write_mask(instr));
        assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
440

441
442
443
        bi_instruction st = {
                .type = BI_STORE_VAR,
                .src = {
444
                        pan_src_index(&instr->src[0]),
445
                        address.dest, address.dest, address.dest,
446
                },
447
                .src_types = {
448
449
                        nir_type_uint32,
                        nir_type_uint32, nir_type_uint32, nir_type_uint32,
450
                },
451
                .swizzle = {
452
                        { 0 },
453
                        { 0 }, { 1 }, { 2}
454
                },
455
                .vector_channels = nr,
456
457
        };

458
459
460
        for (unsigned i = 0; i < nr; ++i)
                st.swizzle[0][i] = i;

461
462
463
464
        bi_emit(ctx, address);
        bi_emit(ctx, st);
}

465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
static void
bi_emit_ld_ubo(bi_context *ctx, nir_intrinsic_instr *instr)
{
        /* nir_lower_uniforms_to_ubo() should have been called, reserving
         * UBO #0 for uniforms even if the shaders doesn't have uniforms.
         */
        assert(ctx->nir->info.first_ubo_is_default_ubo);

        bool offset_is_const = nir_src_is_const(instr->src[1]);
        unsigned dyn_offset = pan_src_index(&instr->src[1]);
        uint32_t const_offset = 0;

        if (nir_src_is_const(instr->src[1]))
                const_offset = nir_src_as_uint(instr->src[1]);

        if (nir_src_is_const(instr->src[0]) &&
            nir_src_as_uint(instr->src[0]) == 0 &&
            ctx->sysvals.sysval_count) {
                if (offset_is_const) {
                        const_offset += 16 * ctx->sysvals.sysval_count;
                } else {
                        bi_instruction add = {
                                .type = BI_IMATH,
                                .op.imath = BI_IMATH_ADD,
                                .dest = bi_make_temp(ctx),
                                .dest_type = nir_type_uint32,
                                .src = { dyn_offset, BIR_INDEX_CONSTANT | 0, BIR_INDEX_ZERO },
                                .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 },
                                .constant.u64 = 16 * ctx->sysvals.sysval_count,
                        };

                        bi_emit(ctx, add);
                        dyn_offset = add.dest;
                }
        }

        bi_instruction ld = {
                .type = BI_LOAD_UNIFORM,
503
                .segment = BI_SEG_UBO,
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
                .vector_channels = instr->num_components,
                .src_types = { nir_type_uint32, nir_type_uint32 },
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_uint | nir_dest_bit_size(instr->dest),
        };

        if (offset_is_const) {
                ld.src[0] = BIR_INDEX_CONSTANT | 0;
                ld.constant.u64 |= const_offset;
        } else {
                ld.src[0] = dyn_offset;
        }

        if (nir_src_is_const(instr->src[0])) {
                ld.src[1] = BIR_INDEX_CONSTANT | 32;
                ld.constant.u64 |= nir_src_as_uint(instr->src[0]) << 32;
        } else {
                ld.src[1] = pan_src_index(&instr->src[0]);
        }

        bi_emit(ctx, ld);
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
static void
bi_emit_sysval(bi_context *ctx, nir_instr *instr,
                unsigned nr_components, unsigned offset)
{
        nir_dest nir_dest;

        /* Figure out which uniform this is */
        int sysval = panfrost_sysval_for_instr(instr, &nir_dest);
        void *val = _mesa_hash_table_u64_search(ctx->sysvals.sysval_to_id, sysval);

        /* Sysvals are prefix uniforms */
        unsigned uniform = ((uintptr_t) val) - 1;

        /* Emit the read itself -- this is never indirect */

        bi_instruction load = {
                .type = BI_LOAD_UNIFORM,
544
                .segment = BI_SEG_UBO,
545
                .vector_channels = nr_components,
546
                .src = { BIR_INDEX_CONSTANT, BIR_INDEX_ZERO },
547
                .src_types = { nir_type_uint32, nir_type_uint32 },
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
548
                .constant = { (uniform * 16) + offset },
549
                .dest = pan_dest_index(&nir_dest),
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
550
551
552
553
554
555
                .dest_type = nir_type_uint32, /* TODO */
        };

        bi_emit(ctx, load);
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5
 * gl_FragCoord.z = ld_vary(fragz)
 * gl_FragCoord.w = ld_vary(fragw)
 */

static void
bi_emit_ld_frag_coord(bi_context *ctx, nir_intrinsic_instr *instr)
{
        /* Future proofing for mediump fragcoord at some point.. */
        nir_alu_type T = nir_type_float32;

        /* First, sketch a combine */
        bi_instruction combine = {
                .type = BI_COMBINE,
                .dest_type = nir_type_uint32,
                .dest = pan_dest_index(&instr->dest),
                .src_types = { T, T, T, T },
        };

        /* Second, handle xy */
        for (unsigned i = 0; i < 2; ++i) {
                bi_instruction conv = {
                        .type = BI_CONVERT,
                        .dest_type = T,
                        .dest = bi_make_temp(ctx),
                        .src = {
                                /* TODO: RA XXX */
                                BIR_INDEX_REGISTER | 59
                        },
                        .src_types = { nir_type_uint16 },
                        .swizzle = { { i } }
                };

                bi_instruction add = {
                        .type = BI_ADD,
                        .dest_type = T,
                        .dest = bi_make_temp(ctx),
                        .src = { conv.dest, BIR_INDEX_CONSTANT },
                        .src_types = { T, T },
                };

                float half = 0.5;
                memcpy(&add.constant.u32, &half, sizeof(float));

                bi_emit(ctx, conv);
                bi_emit(ctx, add);

                combine.src[i] = add.dest;
        }

        /* Third, zw */
        for (unsigned i = 0; i < 2; ++i) {
                bi_instruction load = {
                        .type = BI_LOAD_VAR,
                        .load_vary = {
611
                                .interp_mode = BI_SAMPLE_CENTER,
612
                                .update_mode = BI_UPDATE_CLOBBER,
613
                                .var_id = (i == 0) ?
614
615
                                          BI_VARYING_NAME_FRAG_Z :
                                          BI_VARYING_NAME_FRAG_W,
616
                                .special = true,
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
617
618
619
620
621
                                .reuse = false,
                                .flat = true
                        },
                        .vector_channels = 1,
                        .dest_type = nir_type_float32,
622
                        .format = nir_type_float32,
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
623
                        .dest = bi_make_temp(ctx),
624
625
                        .src[0] = BIR_INDEX_PASS | BIFROST_SRC_FAU_LO,
                        .src_types[0] = nir_type_uint32,
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
626
627
628
629
630
631
632
633
634
635
636
                };

                bi_emit(ctx, load);

                combine.src[i + 2] = load.dest;
        }

        /* Finally, emit the combine */
        bi_emit(ctx, combine);
}

637
638
639
640
641
642
643
644
645
646
647
648
649
650
static void
bi_emit_discard(bi_context *ctx, nir_intrinsic_instr *instr)
{
        /* Goofy lowering */
        bi_instruction discard = {
                .type = BI_DISCARD,
                .cond = BI_COND_EQ,
                .src_types = { nir_type_uint32, nir_type_uint32 },
                .src = { BIR_INDEX_ZERO, BIR_INDEX_ZERO },
        };

        bi_emit(ctx, discard);
}

651
652
653
654
655
static void
bi_fuse_cond(bi_instruction *csel, nir_alu_src cond,
                unsigned *constants_left, unsigned *constant_shift,
                unsigned comps, bool float_only);

656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
static void
bi_emit_discard_if(bi_context *ctx, nir_intrinsic_instr *instr)
{
        nir_src cond = instr->src[0];
        nir_alu_type T = nir_type_uint | nir_src_bit_size(cond);

        bi_instruction discard = {
                .type = BI_DISCARD,
                .cond = BI_COND_NE,
                .src_types = { T, T },
                .src = {
                        pan_src_index(&cond),
                        BIR_INDEX_ZERO
                },
        };

672
673
674
675
676
677
678
679
680
681
682
        /* Try to fuse in the condition */
        unsigned constants_left = 1, constant_shift = 0;

        /* Scalar so no swizzle */
        nir_alu_src wrap = {
                .src = instr->src[0]
        };

        /* May or may not succeed but we're optimistic */
        bi_fuse_cond(&discard, wrap, &constants_left, &constant_shift, 1, true);

683
684
685
        bi_emit(ctx, discard);
}

686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
static void
bi_emit_blend_const(bi_context *ctx, nir_intrinsic_instr *instr)
{
        assert(ctx->is_blend);

        unsigned comp;
        switch (instr->intrinsic) {
        case nir_intrinsic_load_blend_const_color_r_float: comp = 0; break;
        case nir_intrinsic_load_blend_const_color_g_float: comp = 1; break;
        case nir_intrinsic_load_blend_const_color_b_float: comp = 2; break;
        case nir_intrinsic_load_blend_const_color_a_float: comp = 3; break;
        default: unreachable("Invalid load blend constant intrinsic");
        }

        bi_instruction move = {
                .type = BI_MOV,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_uint32,
                .src = { BIR_INDEX_CONSTANT },
                .src_types = { nir_type_uint32 },
        };

        memcpy(&move.constant.u32, &ctx->blend_constants[comp], sizeof(float));

        bi_emit(ctx, move);
}

713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
static void
bi_emit_sample_id(bi_context *ctx, nir_intrinsic_instr *instr)
{
        bi_instruction ins = {
                .type = BI_BITWISE,
                .op.bitwise = BI_BITWISE_AND,
                .bitwise.rshift = true,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_uint32,
                .src = {
                        /* r61[16:23] contains the sampleID */
                        BIR_INDEX_REGISTER | 61,
                        /* mask */
                        BIR_INDEX_CONSTANT | 0,
                        /* shift */
                        BIR_INDEX_CONSTANT | 32,
                },
                .src_types = {
                        nir_type_uint32,
                        nir_type_uint32,
                        nir_type_uint8,
                },
                .constant.u64 = 0xffull | (0x10ull << 32ull)
        };

        bi_emit(ctx, ins);
}

741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
static void
bi_emit_front_face(bi_context *ctx, nir_intrinsic_instr *instr)
{
        bi_instruction ins = {
                .type = BI_CMP,
                .cond = BI_COND_EQ,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_uint32,
                .src = {
                        /* r58 == 0 means primitive is front facing */
                        BIR_INDEX_REGISTER | 58,
                        BIR_INDEX_ZERO,
                },
                .src_types = {
                        nir_type_uint32,
                        nir_type_uint32,
                },
        };

        bi_emit(ctx, ins);
}

763
764
765
766
767
static void
bi_emit_point_coord(bi_context *ctx, nir_intrinsic_instr *instr)
{
        bi_instruction ins = {
                .type = BI_LOAD_VAR,
768
                .load_vary = {
769
                        .update_mode = BI_UPDATE_CLOBBER,
770
                        .var_id = BI_VARYING_NAME_POINT,
771
                        .special = true,
772
                },
773
774
775
776
                .vector_channels = 2,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_float32,
                .format = nir_type_float32,
777
778
                .src[0] = BIR_INDEX_ZERO,
                .src_types[0] = nir_type_uint32,
779
780
781
782
783
        };

        bi_emit(ctx, ins);
}

784
785
786
787
788
789
790
791
792
793
794
795
796
797
static void
bi_emit_vertex_id(bi_context *ctx, nir_intrinsic_instr *instr)
{
        bi_instruction mov = {
                .type = BI_MOV,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_int32,
                .src = { BIR_INDEX_REGISTER | 61 },
                .src_types = { nir_type_int32 },
        };

        bi_emit(ctx, mov);
}

798
799
800
801
802
803
804
805
806
807
808
809
810
811
static void
bi_emit_instance_id(bi_context *ctx, nir_intrinsic_instr *instr)
{
        bi_instruction mov = {
                .type = BI_MOV,
                .dest = pan_dest_index(&instr->dest),
                .dest_type = nir_type_int32,
                .src = { BIR_INDEX_REGISTER | 62 },
                .src_types = { nir_type_int32 },
        };

        bi_emit(ctx, mov);
}

812
813
814
815
816
817
static void
emit_intrinsic(bi_context *ctx, nir_intrinsic_instr *instr)
{

        switch (instr->intrinsic) {
        case nir_intrinsic_load_barycentric_pixel:
818
819
        case nir_intrinsic_load_barycentric_centroid:
        case nir_intrinsic_load_barycentric_sample:
820
821
822
                /* stub */
                break;
        case nir_intrinsic_load_interpolated_input:
823
        case nir_intrinsic_load_input:
824
825
826
                if (ctx->is_blend)
                        bi_emit_ld_blend_input(ctx, instr);
                else if (ctx->stage == MESA_SHADER_FRAGMENT)
827
828
                        bi_emit_ld_vary(ctx, instr);
                else if (ctx->stage == MESA_SHADER_VERTEX)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
829
                        bi_emit(ctx, bi_load_with_r61(BI_LOAD_ATTR, instr));
830
831
832
                else {
                        unreachable("Unsupported shader stage");
                }
833
                break;
834

835
836
837
        case nir_intrinsic_store_output:
                if (ctx->stage == MESA_SHADER_FRAGMENT)
                        bi_emit_frag_out(ctx, instr);
838
839
840
841
                else if (ctx->stage == MESA_SHADER_VERTEX)
                        bi_emit_st_vary(ctx, instr);
                else
                        unreachable("Unsupported shader stage");
842
                break;
843

844
845
846
847
848
        case nir_intrinsic_store_combined_output_pan:
                assert(ctx->stage == MESA_SHADER_FRAGMENT);
                bi_emit_frag_out(ctx, instr);
                break;

849
850
851
852
        case nir_intrinsic_load_ubo:
                bi_emit_ld_ubo(ctx, instr);
                break;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
853
854
855
856
        case nir_intrinsic_load_frag_coord:
                bi_emit_ld_frag_coord(ctx, instr);
                break;

857
858
859
860
        case nir_intrinsic_discard:
                bi_emit_discard(ctx, instr);
                break;

861
862
863
864
        case nir_intrinsic_discard_if:
                bi_emit_discard_if(ctx, instr);
                break;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
865
866
867
868
        case nir_intrinsic_load_ssbo_address:
                bi_emit_sysval(ctx, &instr->instr, 1, 0);
                break;

869
        case nir_intrinsic_get_ssbo_size:
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
870
871
872
                bi_emit_sysval(ctx, &instr->instr, 1, 8);
                break;

873
874
875
876
        case nir_intrinsic_load_output:
                bi_emit_ld_output(ctx, instr);
                break;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
877
878
879
880
881
882
883
        case nir_intrinsic_load_viewport_scale:
        case nir_intrinsic_load_viewport_offset:
        case nir_intrinsic_load_num_work_groups:
        case nir_intrinsic_load_sampler_lod_parameters_pan:
                bi_emit_sysval(ctx, &instr->instr, 3, 0);
                break;

884
885
886
887
888
889
890
        case nir_intrinsic_load_blend_const_color_r_float:
        case nir_intrinsic_load_blend_const_color_g_float:
        case nir_intrinsic_load_blend_const_color_b_float:
        case nir_intrinsic_load_blend_const_color_a_float:
                bi_emit_blend_const(ctx, instr);
                break;

891
892
893
894
	case nir_intrinsic_load_sample_id:
                bi_emit_sample_id(ctx, instr);
                break;

895
896
897
898
	case nir_intrinsic_load_front_face:
                bi_emit_front_face(ctx, instr);
                break;

899
900
901
902
        case nir_intrinsic_load_point_coord:
                bi_emit_point_coord(ctx, instr);
                break;

903
904
905
906
        case nir_intrinsic_load_vertex_id:
                bi_emit_vertex_id(ctx, instr);
                break;

907
908
909
910
        case nir_intrinsic_load_instance_id:
                bi_emit_instance_id(ctx, instr);
                break;

911
        default:
912
                unreachable("Unknown intrinsic");
913
914
915
916
                break;
        }
}

917
918
919
920
static void
emit_load_const(bi_context *ctx, nir_load_const_instr *instr)
{
        /* Make sure we've been lowered */
921
922
923
924
925
926
927
928
929
930
        assert(instr->def.num_components <= (32 / instr->def.bit_size));

        /* Accumulate all the channels of the constant, as if we did an
         * implicit SEL over them */
        uint32_t acc = 0;

        for (unsigned i = 0; i < instr->def.num_components; ++i) {
                unsigned v = nir_const_value_as_uint(instr->value[i], instr->def.bit_size);
                acc |= (v << (i * instr->def.bit_size));
        }
931
932
933

        bi_instruction move = {
                .type = BI_MOV,
934
                .dest = pan_ssa_index(&instr->def),
935
                .dest_type = nir_type_uint32,
936
937
938
                .src = {
                        BIR_INDEX_CONSTANT
                },
939
                .src_types = {
940
                        nir_type_uint32,
941
                },
942
                .constant = {
943
                        .u32 = acc
944
945
946
947
948
949
                }
        };

        bi_emit(ctx, move);
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
950
951
952
953
954
#define BI_CASE_CMP(op) \
        case op##8: \
        case op##16: \
        case op##32: \

955
956
957
958
static enum bi_class
bi_class_for_nir_alu(nir_op op)
{
        switch (op) {
959
        case nir_op_fadd:
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
960
        case nir_op_fsub:
961
                return BI_ADD;
962
963

        case nir_op_iadd:
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
964
        case nir_op_isub:
965
                return BI_IMATH;
966

967
968
969
        case nir_op_imul:
                return BI_IMUL;

970
971
972
        case nir_op_iand:
        case nir_op_ior:
        case nir_op_ixor:
973
        case nir_op_inot:
974
        case nir_op_ishl:
Boris Brezillon's avatar
Boris Brezillon committed
975
        case nir_op_ishr:
Boris Brezillon's avatar
Boris Brezillon committed
976
        case nir_op_ushr:
977
978
                return BI_BITWISE;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
979
980
981
        BI_CASE_CMP(nir_op_flt)
        BI_CASE_CMP(nir_op_fge)
        BI_CASE_CMP(nir_op_feq)
982
        BI_CASE_CMP(nir_op_fneu)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
983
984
985
986
        BI_CASE_CMP(nir_op_ilt)
        BI_CASE_CMP(nir_op_ige)
        BI_CASE_CMP(nir_op_ieq)
        BI_CASE_CMP(nir_op_ine)
987
        BI_CASE_CMP(nir_op_uge)
Boris Brezillon's avatar
Boris Brezillon committed
988
        BI_CASE_CMP(nir_op_ult)
989
990
                return BI_CMP;

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
991
992
993
        case nir_op_b8csel:
        case nir_op_b16csel:
        case nir_op_b32csel:
994
995
                return BI_CSEL;

996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
        case nir_op_i2i8:
        case nir_op_i2i16:
        case nir_op_i2i32:
        case nir_op_i2i64:
        case nir_op_u2u8:
        case nir_op_u2u16:
        case nir_op_u2u32:
        case nir_op_u2u64:
        case nir_op_f2i16:
        case nir_op_f2i32:
        case nir_op_f2i64:
        case nir_op_f2u16:
        case nir_op_f2u32:
        case nir_op_f2u64:
        case nir_op_i2f16:
        case nir_op_i2f32:
        case nir_op_i2f64:
        case nir_op_u2f16:
        case nir_op_u2f32:
        case nir_op_u2f64:
1016
1017
1018
1019
        case nir_op_f2f16:
        case nir_op_f2f32:
        case nir_op_f2f64:
        case nir_op_f2fmp:
1020
1021
                return BI_CONVERT;

1022
1023
1024
1025
1026
1027
1028
1029
1030
        case nir_op_vec2:
        case nir_op_vec3:
        case nir_op_vec4:
                return BI_COMBINE;

        case nir_op_vec8:
        case nir_op_vec16:
                unreachable("should've been lowered");

1031
        case nir_op_ffma:
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
        case nir_op_fmul:
                return BI_FMA;

        case nir_op_imin:
        case nir_op_imax:
        case nir_op_umin:
        case nir_op_umax:
        case nir_op_fmin:
        case nir_op_fmax:
                return BI_MINMAX;

1043
        case nir_op_fsat:
1044
1045
        case nir_op_fneg:
        case nir_op_fabs:
1046
                return BI_FMOV;
1047
1048
1049
        case nir_op_mov:
                return BI_MOV;

1050
1051
1052
1053
1054
1055
        case nir_op_fround_even:
        case nir_op_fceil:
        case nir_op_ffloor:
        case nir_op_ftrunc:
                return BI_ROUND;

1056
1057
        case nir_op_frcp:
        case nir_op_frsq:
1058
        case nir_op_iabs:
1059
                return BI_SPECIAL_ADD;
1060

1061
1062
        default:
                unreachable("Unknown ALU op");
1063
1064
1065
        }
}

1066
1067
1068
1069
1070
/* Gets a bi_cond for a given NIR comparison opcode. In soft mode, it will
 * return BI_COND_ALWAYS as a sentinel if it fails to do so (when used for
 * optimizations). Otherwise it will bail (when used for primary code
 * generation). */

1071
static enum bi_cond
1072
bi_cond_for_nir(nir_op op, bool soft)
1073
1074
{
        switch (op) {
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1075
1076
        BI_CASE_CMP(nir_op_flt)
        BI_CASE_CMP(nir_op_ilt)
Boris Brezillon's avatar
Boris Brezillon committed
1077
        BI_CASE_CMP(nir_op_ult)
1078
                return BI_COND_LT;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1079
1080
1081

        BI_CASE_CMP(nir_op_fge)
        BI_CASE_CMP(nir_op_ige)
1082
        BI_CASE_CMP(nir_op_uge)
1083
                return BI_COND_GE;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1084
1085
1086

        BI_CASE_CMP(nir_op_feq)
        BI_CASE_CMP(nir_op_ieq)
1087
                return BI_COND_EQ;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1088

1089
        BI_CASE_CMP(nir_op_fneu)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1090
        BI_CASE_CMP(nir_op_ine)
1091
1092
                return BI_COND_NE;
        default:
1093
1094
1095
1096
                if (soft)
                        return BI_COND_ALWAYS;
                else
                        unreachable("Invalid compare");
1097
1098
1099
        }
}

1100
1101
static void
bi_copy_src(bi_instruction *alu, nir_alu_instr *instr, unsigned i, unsigned to,
1102
            unsigned *constants_left, unsigned *constant_shift)
1103
1104
1105
1106
1107
1108
1109
1110
1111
{
        unsigned bits = nir_src_bit_size(instr->src[i].src);
        unsigned dest_bits = nir_dest_bit_size(instr->dest.dest);

        alu->src_types[to] = nir_op_infos[instr->op].input_types[i]
                | bits;

        /* Try to inline a constant */
        if (nir_src_is_const(instr->src[i].src) && *constants_left && (dest_bits == bits)) {
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
                uint64_t mask = (1ull << dest_bits) - 1;
                uint64_t cons = nir_src_as_uint(instr->src[i].src);

                /* Try to reuse a constant */
                for (unsigned i = 0; i < (*constant_shift); i += dest_bits) {
                        if (((alu->constant.u64 >> i) & mask) == cons) {
                                alu->src[to] = BIR_INDEX_CONSTANT | i;
                                return;
                        }
                }
1122

1123
                alu->constant.u64 |= cons << *constant_shift;
1124
1125
                alu->src[to] = BIR_INDEX_CONSTANT | (*constant_shift);
                --(*constants_left);
1126
                (*constant_shift) += MAX2(dest_bits, 32); /* lo/hi */
1127
1128
1129
                return;
        }

1130
        alu->src[to] = pan_src_index(&instr->src[i].src);
1131

1132
1133
1134
1135
        /* Copy swizzle for all vectored components, replicating last component
         * to fill undersized */

        unsigned vec = alu->type == BI_COMBINE ? 1 :
1136
                MAX2(1, 32 / bits);
1137

1138
        unsigned comps = nir_ssa_alu_instr_src_components(instr, i);
1139
1140
        for (unsigned j = 0; j < vec; ++j)
                alu->swizzle[to][j] = instr->src[i].swizzle[MIN2(j, comps - 1)];
1141
1142
1143
}

static void
1144
1145
1146
bi_fuse_cond(bi_instruction *csel, nir_alu_src cond,
                unsigned *constants_left, unsigned *constant_shift,
                unsigned comps, bool float_only)
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
{
        /* Bail for vector weirdness */
        if (cond.swizzle[0] != 0)
                return;

        if (!cond.src.is_ssa)
                return;

        nir_ssa_def *def = cond.src.ssa;
        nir_instr *parent = def->parent_instr;

        if (parent->type != nir_instr_type_alu)
                return;

        nir_alu_instr *alu = nir_instr_as_alu(parent);

        /* Try to match a condition */
        enum bi_cond bcond = bi_cond_for_nir(alu->op, true);

        if (bcond == BI_COND_ALWAYS)
                return;

1169
1170
1171
1172
1173
1174
1175
1176
1177
        /* Some instructions can't compare ints */
        if (float_only) {
                nir_alu_type T = nir_op_infos[alu->op].input_types[0];
                T = nir_alu_type_get_base_type(T);

                if (T != nir_type_float)
                        return;
        }

1178
        /* We found one, let's fuse it in */
1179
        csel->cond = bcond;
1180
1181
        bi_copy_src(csel, alu, 0, 0, constants_left, constant_shift);
        bi_copy_src(csel, alu, 1, 1, constants_left, constant_shift);
1182
1183
}

1184
1185
1186
static void
emit_alu(bi_context *ctx, nir_alu_instr *instr)
{
1187
1188
1189
1190
1191
        /* Try some special functions */
        switch (instr->op) {
        case nir_op_fexp2:
                bi_emit_fexp2(ctx, instr);
                return;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1192
1193
1194
        case nir_op_flog2:
                bi_emit_flog2(ctx, instr);
                return;
1195
1196
1197
1198
        case nir_op_fddx:
        case nir_op_fddy:
                bi_emit_deriv(ctx, instr);
                return;
1199
1200
1201
1202
1203
        default:
                break;
        }

        /* Otherwise, assume it's something we can handle normally */
1204
1205
        bi_instruction alu = {
                .type = bi_class_for_nir_alu(instr->op),
1206
                .dest = pan_dest_index(&instr->dest.dest),
1207
1208
1209
1210
                .dest_type = nir_op_infos[instr->op].output_type
                        | nir_dest_bit_size(instr->dest.dest),
        };

1211
        /* TODO: Implement lowering of special functions for older Bifrost */
1212
        assert(alu.type != BI_SPECIAL_ADD || !(ctx->quirks & BIFROST_NO_FAST_OP));
1213

1214
        unsigned comps = nir_dest_num_components(instr->dest.dest);
1215
1216
        bool vector = comps > MAX2(1, 32 / nir_dest_bit_size(instr->dest.dest));
        assert(!vector || alu.type == BI_COMBINE || alu.type == BI_MOV);
1217

1218
1219
1220
        if (!instr->dest.dest.is_ssa) {
                for (unsigned i = 0; i < comps; ++i)
                        assert(instr->dest.write_mask);
1221
1222
        }

1223
1224
1225
1226
1227
1228
1229
1230
        /* We inline constants as we go. This tracks how many constants have
         * been inlined, since we're limited to 64-bits of constants per
         * instruction */

        unsigned dest_bits = nir_dest_bit_size(instr->dest.dest);
        unsigned constants_left = (64 / dest_bits);
        unsigned constant_shift = 0;

1231
1232
1233
        if (alu.type == BI_COMBINE)
                constants_left = 0;

1234
1235
1236
1237
1238
        /* Copy sources */

        unsigned num_inputs = nir_op_infos[instr->op].num_inputs;
        assert(num_inputs <= ARRAY_SIZE(alu.src));

1239
1240
1241
1242
1243
1244
        for (unsigned i = 0; i < num_inputs; ++i) {
                unsigned f = 0;

                if (i && alu.type == BI_CSEL)
                        f++;

1245
                bi_copy_src(&alu, instr, i, i + f, &constants_left, &constant_shift);
1246
        }
1247
1248
1249
1250
1251

        /* Op-specific fixup */
        switch (instr->op) {
        case nir_op_fmul:
                alu.src[2] = BIR_INDEX_ZERO; /* FMA */
1252
                alu.src_types[2] = alu.src_types[1];
1253
                break;
1254
        case nir_op_fsat:
1255
                alu.clamp = BI_CLAMP_CLAMP_0_1; /* FMOV */
1256
                break;
1257
        case nir_op_fneg:
1258
                alu.src_neg[0] = true; /* FMOV */
1259
1260
                break;
        case nir_op_fabs:
1261
                alu.src_abs[0] = true; /* FMOV */
1262
                break;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1263
        case nir_op_fsub:
1264
                alu.src_neg[1] = true; /* FADD */
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1265
                break;
1266
1267
        case nir_op_iadd:
                alu.op.imath = BI_IMATH_ADD;
1268
1269
                /* Carry */
                alu.src[2] = BIR_INDEX_ZERO;
1270
1271
1272
                break;
        case nir_op_isub:
                alu.op.imath = BI_IMATH_SUB;
1273
1274
                /* Borrow */
                alu.src[2] = BIR_INDEX_ZERO;
1275
                break;
1276
1277
1278
        case nir_op_iabs:
                alu.op.special = BI_SPECIAL_IABS;
                break;
1279
        case nir_op_inot:
1280
                /* no dedicated bitwise not, but we can invert sources. convert to ~(a | 0) */
1281
                alu.op.bitwise = BI_BITWISE_OR;
1282
                alu.bitwise.dest_invert = true;
1283
                alu.src[1] = BIR_INDEX_ZERO;
1284
1285
                /* zero shift */
                alu.src[2] = BIR_INDEX_ZERO;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1286
                alu.src_types[2] = nir_type_uint8;
1287
                break;
Boris Brezillon's avatar
Boris Brezillon committed
1288
1289
1290
        case nir_op_ushr:
                alu.bitwise.rshift = true;
                /* fallthrough */
1291
1292
1293
1294
        case nir_op_ishl:
                alu.op.bitwise = BI_BITWISE_OR;
                /* move src1 to src2 and replace with zero. underlying op is (src0 << src2) | src1 */
                alu.src[2] = alu.src[1];
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1295
                alu.src_types[2] = nir_type_uint8;
1296
                alu.src[1] = BIR_INDEX_ZERO;
1297
                break;
Boris Brezillon's avatar
Boris Brezillon committed
1298
1299
1300
1301
1302
1303
1304
1305
        case nir_op_ishr:
                alu.op.bitwise = BI_BITWISE_ARSHIFT;
                alu.bitwise.rshift = true;
                /* move src1 to src2 and replace with zero. underlying op is (src0 >> src2) */
                alu.src[2] = alu.src[1];
                alu.src_types[2] = nir_type_uint8;
                alu.src[1] = BIR_INDEX_ZERO;
                break;
1306
1307
1308
        case nir_op_imul:
                alu.op.imul = BI_IMUL_IMUL;
                break;
1309
1310
1311
1312
1313
        case nir_op_fmax:
        case nir_op_imax:
        case nir_op_umax:
                alu.op.minmax = BI_MINMAX_MAX; /* MINMAX */
                break;
1314
1315
1316
1317
1318
1319
        case nir_op_frcp:
                alu.op.special = BI_SPECIAL_FRCP;
                break;
        case nir_op_frsq:
                alu.op.special = BI_SPECIAL_FRSQ;
                break;
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1320
1321
1322
1323
1324
1325
        BI_CASE_CMP(nir_op_flt)
        BI_CASE_CMP(nir_op_ilt)
        BI_CASE_CMP(nir_op_fge)
        BI_CASE_CMP(nir_op_ige)
        BI_CASE_CMP(nir_op_feq)
        BI_CASE_CMP(nir_op_ieq)
1326
        BI_CASE_CMP(nir_op_fneu)
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1327
        BI_CASE_CMP(nir_op_ine)
1328
        BI_CASE_CMP(nir_op_uge)
Boris Brezillon's avatar
Boris Brezillon committed
1329
        BI_CASE_CMP(nir_op_ult)