midgard_compile.c 108 KB
Newer Older
1
/*
2
 * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3
 * Copyright (C) 2019-2020 Collabora, Ltd.
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <err.h>

34
#include "main/mtypes.h"
35
36
37
38
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/nir_types.h"
#include "compiler/nir/nir_builder.h"
#include "util/half_float.h"
39
#include "util/u_math.h"
40
#include "util/u_debug.h"
41
42
43
44
45
46
47
#include "util/u_dynarray.h"
#include "util/list.h"
#include "main/mtypes.h"

#include "midgard.h"
#include "midgard_nir.h"
#include "midgard_compile.h"
48
#include "midgard_ops.h"
49
#include "helpers.h"
50
#include "compiler.h"
51
#include "midgard_quirks.h"
52
53
#include "panfrost-quirks.h"
#include "panfrost/util/pan_lower_framebuffer.h"
54
55
56

#include "disassemble.h"

57
static const struct debug_named_value debug_options[] = {
58
59
        {"msgs",      MIDGARD_DBG_MSGS,		"Print debug messages"},
        {"shaders",   MIDGARD_DBG_SHADERS,	"Dump shaders in NIR and MIR"},
60
        {"shaderdb",  MIDGARD_DBG_SHADERDB,     "Prints shader-db statistics"},
61
        DEBUG_NAMED_VALUE_END
62
63
64
65
};

DEBUG_GET_ONCE_FLAGS_OPTION(midgard_debug, "MIDGARD_MESA_DEBUG", debug_options, 0)

66
67
/* TODO: This is not thread safe!! */
static unsigned SHADER_DB_COUNT = 0;
68

69
70
71
72
73
74
int midgard_debug = 0;

#define DBG(fmt, ...) \
		do { if (midgard_debug & MIDGARD_DBG_MSGS) \
			fprintf(stderr, "%s:%d: "fmt, \
				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
75
76
77
78
79
static midgard_block *
create_empty_block(compiler_context *ctx)
{
        midgard_block *blk = rzalloc(ctx, midgard_block);

80
        blk->base.predecessors = _mesa_set_create(blk,
81
82
83
                        _mesa_hash_pointer,
                        _mesa_key_pointer_equal);

84
        blk->base.name = ctx->block_source_count++;
85
86
87
88
89
90
91
92
93
94

        return blk;
}

static void
schedule_barrier(compiler_context *ctx)
{
        midgard_block *temp = ctx->after_block;
        ctx->after_block = create_empty_block(ctx);
        ctx->block_count++;
95
96
97
        list_addtail(&ctx->after_block->base.link, &ctx->blocks);
        list_inithead(&ctx->after_block->base.instructions);
        pan_block_add_successor(&ctx->current_block->base, &ctx->after_block->base);
98
99
100
101
        ctx->current_block = ctx->after_block;
        ctx->after_block = temp;
}

102
103
104
105
/* Helpers to generate midgard_instruction's using macro magic, since every
 * driver seems to do it that way */

#define EMIT(op, ...) emit_mir_instruction(ctx, v_##op(__VA_ARGS__));
106

107
#define M_LOAD_STORE(name, store, T) \
108
109
110
	static midgard_instruction m_##name(unsigned ssa, unsigned address) { \
		midgard_instruction i = { \
			.type = TAG_LOAD_STORE_4, \
111
                        .mask = 0xF, \
112
                        .dest = ~0, \
113
                        .src = { ~0, ~0, ~0, ~0 }, \
114
                        .swizzle = SWIZZLE_IDENTITY_4, \
115
                        .op = midgard_op_##name, \
116
117
118
119
			.load_store = { \
				.address = address \
			} \
		}; \
120
                \
121
                if (store) { \
122
                        i.src[0] = ssa; \
123
                        i.src_types[0] = T; \
124
                        i.dest_type = T; \
125
                } else { \
126
                        i.dest = ssa; \
127
128
                        i.dest_type = T; \
                } \
129
130
131
		return i; \
	}

132
133
#define M_LOAD(name, T) M_LOAD_STORE(name, false, T)
#define M_STORE(name, T) M_LOAD_STORE(name, true, T)
134

135
136
137
138
139
140
M_LOAD(ld_attr_32, nir_type_uint32);
M_LOAD(ld_vary_32, nir_type_uint32);
M_LOAD(ld_ubo_int4, nir_type_uint32);
M_LOAD(ld_int4, nir_type_uint32);
M_STORE(st_int4, nir_type_uint32);
M_LOAD(ld_color_buffer_32u, nir_type_uint32);
141
M_LOAD(ld_color_buffer_as_fp16, nir_type_float16);
142
M_LOAD(ld_color_buffer_as_fp32, nir_type_float32);
143
144
145
M_STORE(st_vary_32, nir_type_uint32);
M_LOAD(ld_cubemap_coords, nir_type_uint32);
M_LOAD(ld_compute_id, nir_type_uint32);
146
147
148
149
150
151

static midgard_instruction
v_branch(bool conditional, bool invert)
{
        midgard_instruction ins = {
                .type = TAG_ALU_4,
152
                .unit = ALU_ENAB_BRANCH,
153
154
155
156
                .compact_branch = true,
                .branch = {
                        .conditional = conditional,
                        .invert_conditional = invert
157
                },
158
                .dest = ~0,
159
                .src = { ~0, ~0, ~0, ~0 },
160
161
162
163
164
165
166
167
168
169
170
171
172
        };

        return ins;
}

static void
attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
{
        ins->has_constants = true;
        memcpy(&ins->constants, constants, 16);
}

static int
173
glsl_type_size(const struct glsl_type *type, bool bindless)
174
175
176
177
178
{
        return glsl_count_attribute_slots(type, false);
}

/* Lower fdot2 to a vector multiplication followed by channel addition  */
179
180
static bool
midgard_nir_lower_fdot2_instr(nir_builder *b, nir_instr *instr, void *data)
181
{
182
183
184
185
        if (instr->type != nir_instr_type_alu)
                return false;

        nir_alu_instr *alu = nir_instr_as_alu(instr);
186
        if (alu->op != nir_op_fdot2)
187
                return false;
188
189
190
191
192
193
194
195

        b->cursor = nir_before_instr(&alu->instr);

        nir_ssa_def *src0 = nir_ssa_for_alu_src(b, alu, 0);
        nir_ssa_def *src1 = nir_ssa_for_alu_src(b, alu, 1);

        nir_ssa_def *product = nir_fmul(b, src0, src1);

196
197
198
        nir_ssa_def *sum = nir_fadd(b,
                                    nir_channel(b, product, 0),
                                    nir_channel(b, product, 1));
199
200
201

        /* Replace the fdot2 with this sum */
        nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
202
203

        return true;
204
205
206
207
208
}

static bool
midgard_nir_lower_fdot2(nir_shader *shader)
{
209
210
211
212
        return nir_shader_instructions_pass(shader,
                                            midgard_nir_lower_fdot2_instr,
                                            nir_metadata_block_index | nir_metadata_dominance,
                                            NULL);
213
214
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
static bool
mdg_is_64(const nir_instr *instr, const void *_unused)
{
        const nir_alu_instr *alu = nir_instr_as_alu(instr);

        if (nir_dest_bit_size(alu->dest.dest) == 64)
                return true;

        switch (alu->op) {
        case nir_op_umul_high:
        case nir_op_imul_high:
                return true;
        default:
                return false;
        }
}

232
233
/* Flushes undefined values to zero */

234
static void
235
optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
236
237
{
        bool progress;
238
239
240
241
        unsigned lower_flrp =
                (nir->options->lower_flrp16 ? 16 : 0) |
                (nir->options->lower_flrp32 ? 32 : 0) |
                (nir->options->lower_flrp64 ? 64 : 0);
242
243

        NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
244
        NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_fast);
245

246
        nir_lower_tex_options lower_tex_options = {
247
                .lower_txs_lod = true,
248
249
250
                .lower_txp = ~0,
                .lower_tex_without_implicit_lod =
                        (quirks & MIDGARD_EXPLICIT_LOD),
251
                .lower_tg4_broadcom_swizzle = true,
252
253
254

                /* TODO: we have native gradient.. */
                .lower_txd = true,
255
        };
256

257
        NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
258

259
260
261
        /* Must lower fdot2 after tex is lowered */
        NIR_PASS(progress, nir, midgard_nir_lower_fdot2);

262
263
264
265
266
        /* T720 is broken. */

        if (quirks & MIDGARD_BROKEN_LOD)
                NIR_PASS_V(nir, midgard_nir_lod_errata);

267
268
        NIR_PASS(progress, nir, midgard_nir_lower_algebraic_early);

269
270
271
272
273
274
275
        do {
                progress = false;

                NIR_PASS(progress, nir, nir_lower_var_copies);
                NIR_PASS(progress, nir, nir_lower_vars_to_ssa);

                NIR_PASS(progress, nir, nir_copy_prop);
276
                NIR_PASS(progress, nir, nir_opt_remove_phis);
277
278
279
280
281
282
                NIR_PASS(progress, nir, nir_opt_dce);
                NIR_PASS(progress, nir, nir_opt_dead_cf);
                NIR_PASS(progress, nir, nir_opt_cse);
                NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
                NIR_PASS(progress, nir, nir_opt_algebraic);
                NIR_PASS(progress, nir, nir_opt_constant_folding);
283
284

                if (lower_flrp != 0) {
285
                        bool lower_flrp_progress = false;
286
287
288
289
                        NIR_PASS(lower_flrp_progress,
                                 nir,
                                 nir_lower_flrp,
                                 lower_flrp,
290
                                 false /* always_precise */);
291
292
293
294
295
296
297
298
299
300
301
302
                        if (lower_flrp_progress) {
                                NIR_PASS(progress, nir,
                                         nir_opt_constant_folding);
                                progress = true;
                        }

                        /* Nothing should rematerialize any flrps, so we only
                         * need to do this lowering once.
                         */
                        lower_flrp = 0;
                }

303
                NIR_PASS(progress, nir, nir_opt_undef);
304
                NIR_PASS(progress, nir, nir_lower_undef_to_zero);
305

306
307
308
309
310
                NIR_PASS(progress, nir, nir_opt_loop_unroll,
                         nir_var_shader_in |
                         nir_var_shader_out |
                         nir_var_function_temp);

311
                NIR_PASS(progress, nir, nir_opt_vectorize, NULL, NULL);
312
313
        } while (progress);

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
314
315
        NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);

316
317
318
319
        /* Run after opts so it can hit more */
        if (!is_blend)
                NIR_PASS(progress, nir, nir_fuse_io_16);

320
321
322
323
324
325
326
327
328
329
330
331
332
        /* Must be run at the end to prevent creation of fsin/fcos ops */
        NIR_PASS(progress, nir, midgard_nir_scale_trig);

        do {
                progress = false;

                NIR_PASS(progress, nir, nir_opt_dce);
                NIR_PASS(progress, nir, nir_opt_algebraic);
                NIR_PASS(progress, nir, nir_opt_constant_folding);
                NIR_PASS(progress, nir, nir_copy_prop);
        } while (progress);

        NIR_PASS(progress, nir, nir_opt_algebraic_late);
333
        NIR_PASS(progress, nir, nir_opt_algebraic_distribute_src_mods);
334
335
336
337
338

        /* We implement booleans as 32-bit 0/~0 */
        NIR_PASS(progress, nir, nir_lower_bool_to_int32);

        /* Now that booleans are lowered, we can run out late opts */
339
        NIR_PASS(progress, nir, midgard_nir_lower_algebraic_late);
340
        NIR_PASS(progress, nir, midgard_nir_cancel_inot);
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362

        NIR_PASS(progress, nir, nir_copy_prop);
        NIR_PASS(progress, nir, nir_opt_dce);

        /* Take us out of SSA */
        NIR_PASS(progress, nir, nir_lower_locals_to_regs);
        NIR_PASS(progress, nir, nir_convert_from_ssa, true);

        /* We are a vector architecture; write combine where possible */
        NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
        NIR_PASS(progress, nir, nir_lower_vec_to_movs);

        NIR_PASS(progress, nir, nir_opt_dce);
}

/* Do not actually emit a load; instead, cache the constant for inlining */

static void
emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
{
        nir_ssa_def def = instr->def;

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
        midgard_constants *consts = rzalloc(NULL, midgard_constants);

        assert(instr->def.num_components * instr->def.bit_size <= sizeof(*consts) * 8);

#define RAW_CONST_COPY(bits)                                         \
        nir_const_value_to_array(consts->u##bits, instr->value,      \
                                 instr->def.num_components, u##bits)

        switch (instr->def.bit_size) {
        case 64:
                RAW_CONST_COPY(64);
                break;
        case 32:
                RAW_CONST_COPY(32);
                break;
        case 16:
                RAW_CONST_COPY(16);
                break;
        case 8:
                RAW_CONST_COPY(8);
                break;
        default:
                unreachable("Invalid bit_size for load_const instruction\n");
        }
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
387
388

        /* Shifted for SSA, +1 for off-by-one */
389
        _mesa_hash_table_u64_insert(ctx->ssa_constants, (def.index << 1) + 1, consts);
390
391
}

392
393
394
395
396
397
398
399
400
/* Normally constants are embedded implicitly, but for I/O and such we have to
 * explicitly emit a move with the constant source */

static void
emit_explicit_constant(compiler_context *ctx, unsigned node, unsigned to)
{
        void *constant_value = _mesa_hash_table_u64_search(ctx->ssa_constants, node + 1);

        if (constant_value) {
401
                midgard_instruction ins = v_mov(SSA_FIXED_REGISTER(REGISTER_CONSTANT), to);
402
403
404
405
406
                attach_constants(ctx, &ins, constant_value, node + 1);
                emit_mir_instruction(ctx, ins);
        }
}

407
408
409
410
411
412
413
414
415
416
417
418
419
static bool
nir_is_non_scalar_swizzle(nir_alu_src *src, unsigned nr_components)
{
        unsigned comp = src->swizzle[0];

        for (unsigned c = 1; c < nr_components; ++c) {
                if (src->swizzle[c] != comp)
                        return true;
        }

        return false;
}

420
421
422
423
424
425
426
427
428
#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared) \
        case nir_intrinsic_##nir: \
                emit_atomic(ctx, instr, is_shared, midgard_op_##op); \
                break;

#define ATOMIC_CASE(ctx, instr, nir, op) \
        ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true); \
        ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false);

429
#define ALU_CASE(nir, _op) \
430
431
	case nir_op_##nir: \
		op = midgard_alu_op_##_op; \
432
                assert(src_bitsize == dst_bitsize); \
433
		break;
434

435
436
437
438
439
440
#define ALU_CASE_RTZ(nir, _op) \
	case nir_op_##nir: \
		op = midgard_alu_op_##_op; \
                roundmode = MIDGARD_RTZ; \
		break;

441
#define ALU_CHECK_CMP() \
442
443
444
                assert(src_bitsize == 16 || src_bitsize == 32); \
                assert(dst_bitsize == 16 || dst_bitsize == 32); \

445
446
447
448
#define ALU_CASE_BCAST(nir, _op, count) \
        case nir_op_##nir: \
                op = midgard_alu_op_##_op; \
                broadcast_swizzle = count; \
449
                ALU_CHECK_CMP(); \
450
                break;
451

452
#define ALU_CASE_CMP(nir, _op) \
453
454
	case nir_op_##nir: \
		op = midgard_alu_op_##_op; \
455
456
                ALU_CHECK_CMP(); \
                break;
457

458
459
460
461
462
463
/* Compare mir_lower_invert */
static bool
nir_accepts_inot(nir_op op, unsigned src)
{
        switch (op) {
        case nir_op_ior:
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
464
        case nir_op_iand: /* TODO: b2f16 */
465
466
467
468
469
470
471
472
473
474
        case nir_op_ixor:
                return true;
        case nir_op_b32csel:
                /* Only the condition */
                return (src == 0);
        default:
                return false;
        }
}

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
475
476
477
478
479
480
481
482
483
484
485
486
static bool
mir_accept_dest_mod(compiler_context *ctx, nir_dest **dest, nir_op op)
{
        if (pan_has_dest_mod(dest, op)) {
                assert((*dest)->is_ssa);
                BITSET_SET(ctx->already_emitted, (*dest)->ssa.index);
                return true;
        }

        return false;
}

487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
/* Look for floating point mods. We have the mods fsat, fsat_signed,
 * and fpos. We also have the relations (note 3 * 2 = 6 cases):
 *
 * fsat_signed(fpos(x)) = fsat(x)
 * fsat_signed(fsat(x)) = fsat(x)
 * fpos(fsat_signed(x)) = fsat(x)
 * fpos(fsat(x)) = fsat(x)
 * fsat(fsat_signed(x)) = fsat(x)
 * fsat(fpos(x)) = fsat(x)
 *
 * So by cases any composition of output modifiers is equivalent to
 * fsat alone.
 */
static unsigned
mir_determine_float_outmod(compiler_context *ctx, nir_dest **dest, unsigned prior_outmod)
{
        bool fpos = mir_accept_dest_mod(ctx, dest, nir_op_fclamp_pos);
        bool fsat = mir_accept_dest_mod(ctx, dest, nir_op_fsat);
        bool ssat = mir_accept_dest_mod(ctx, dest, nir_op_fsat_signed);
        bool prior = (prior_outmod != midgard_outmod_none);
        int count = (int) prior + (int) fpos + (int) ssat + (int) fsat;

        return ((count > 1) || fsat) ? midgard_outmod_sat :
                                fpos ? midgard_outmod_pos :
                                ssat ? midgard_outmod_sat_signed :
                                prior_outmod;
}

515
static void
516
mir_copy_src(midgard_instruction *ins, nir_alu_instr *instr, unsigned i, unsigned to, bool *abs, bool *neg, bool *not, enum midgard_roundmode *roundmode, bool is_int, unsigned bcast_count)
517
{
518
        nir_alu_src src = instr->src[i];
519
520
521
522
523
524
525
526
527

        if (!is_int) {
                if (pan_has_source_mod(&src, nir_op_fneg))
                        *neg = !(*neg);

                if (pan_has_source_mod(&src, nir_op_fabs))
                        *abs = true;
        }

528
529
530
        if (nir_accepts_inot(instr->op, i) && pan_has_source_mod(&src, nir_op_inot))
                *not = true;

531
532
533
534
535
536
537
538
539
540
541
542
543
544
        if (roundmode) {
                if (pan_has_source_mod(&src, nir_op_fround_even))
                        *roundmode = MIDGARD_RTE;

                if (pan_has_source_mod(&src, nir_op_ftrunc))
                        *roundmode = MIDGARD_RTZ;

                if (pan_has_source_mod(&src, nir_op_ffloor))
                        *roundmode = MIDGARD_RTN;

                if (pan_has_source_mod(&src, nir_op_fceil))
                        *roundmode = MIDGARD_RTP;
        }

545
        unsigned bits = nir_src_bit_size(src.src);
546

547
        ins->src[to] = nir_src_index(NULL, &src.src);
548
        ins->src_types[to] = nir_op_infos[instr->op].input_types[i] | bits;
549
550
551
552
553
554

        for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; ++c) {
                ins->swizzle[to][c] = src.swizzle[
                        (!bcast_count || c < bcast_count) ? c :
                                (bcast_count - 1)];
        }
555
556
}

557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
/* Midgard features both fcsel and icsel, depending on whether you want int or
 * float modifiers. NIR's csel is typeless, so we want a heuristic to guess if
 * we should emit an int or float csel depending on what modifiers could be
 * placed. In the absense of modifiers, this is probably arbitrary. */

static bool
mir_is_bcsel_float(nir_alu_instr *instr)
{
        nir_op intmods[] = {
                nir_op_i2i8, nir_op_i2i16,
                nir_op_i2i32, nir_op_i2i64
        };

        nir_op floatmods[] = {
                nir_op_fabs, nir_op_fneg,
                nir_op_f2f16, nir_op_f2f32,
                nir_op_f2f64
        };

        nir_op floatdestmods[] = {
                nir_op_fsat, nir_op_fsat_signed, nir_op_fclamp_pos,
                nir_op_f2f16, nir_op_f2f32
        };

        signed score = 0;

        for (unsigned i = 1; i < 3; ++i) {
                nir_alu_src s = instr->src[i];
                for (unsigned q = 0; q < ARRAY_SIZE(intmods); ++q) {
                        if (pan_has_source_mod(&s, intmods[q]))
                                score--;
                }
        }

        for (unsigned i = 1; i < 3; ++i) {
                nir_alu_src s = instr->src[i];
                for (unsigned q = 0; q < ARRAY_SIZE(floatmods); ++q) {
                        if (pan_has_source_mod(&s, floatmods[q]))
                                score++;
                }
        }

        for (unsigned q = 0; q < ARRAY_SIZE(floatdestmods); ++q) {
                nir_dest *dest = &instr->dest.dest;
                if (pan_has_dest_mod(&dest, floatdestmods[q]))
                        score++;
        }

        return (score > 0);
}

608
609
610
static void
emit_alu(compiler_context *ctx, nir_alu_instr *instr)
{
611
612
613
614
615
        nir_dest *dest = &instr->dest.dest;

        if (dest->is_ssa && BITSET_TEST(ctx->already_emitted, dest->ssa.index))
                return;

616
617
618
619
620
621
622
623
        /* Derivatives end up emitted on the texture pipe, not the ALUs. This
         * is handled elsewhere */

        if (instr->op == nir_op_fddx || instr->op == nir_op_fddy) {
                midgard_emit_derivatives(ctx, instr);
                return;
        }

624
        bool is_ssa = dest->is_ssa;
625

626
        unsigned nr_components = nir_dest_num_components(*dest);
627
        unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
628
        unsigned op = 0;
629

630
631
632
633
634
635
636
        /* Number of components valid to check for the instruction (the rest
         * will be forced to the last), or 0 to use as-is. Relevant as
         * ball-type instructions have a channel count in NIR but are all vec4
         * in Midgard */

        unsigned broadcast_swizzle = 0;

637
638
639
        /* Should we swap arguments? */
        bool flip_src12 = false;

640
641
        ASSERTED unsigned src_bitsize = nir_src_bit_size(instr->src[0].src);
        ASSERTED unsigned dst_bitsize = nir_dest_bit_size(*dest);
642

643
644
        enum midgard_roundmode roundmode = MIDGARD_RTE;

645
        switch (instr->op) {
646
647
648
649
650
651
                ALU_CASE(fadd, fadd);
                ALU_CASE(fmul, fmul);
                ALU_CASE(fmin, fmin);
                ALU_CASE(fmax, fmax);
                ALU_CASE(imin, imin);
                ALU_CASE(imax, imax);
652
653
                ALU_CASE(umin, umin);
                ALU_CASE(umax, umax);
654
                ALU_CASE(ffloor, ffloor);
655
656
                ALU_CASE(fround_even, froundeven);
                ALU_CASE(ftrunc, ftrunc);
657
658
659
660
661
662
                ALU_CASE(fceil, fceil);
                ALU_CASE(fdot3, fdot3);
                ALU_CASE(fdot4, fdot4);
                ALU_CASE(iadd, iadd);
                ALU_CASE(isub, isub);
                ALU_CASE(imul, imul);
663
664
                ALU_CASE(imul_high, imul);
                ALU_CASE(umul_high, imul);
665
666
667
668

                /* Zero shoved as second-arg */
                ALU_CASE(iabs, iabsdiff);

669
670
671
                ALU_CASE(uabs_isub, iabsdiff);
                ALU_CASE(uabs_usub, uabsdiff);

672
                ALU_CASE(mov, imov);
673

674
675
676
677
678
679
680
                ALU_CASE_CMP(feq32, feq);
                ALU_CASE_CMP(fneu32, fne);
                ALU_CASE_CMP(flt32, flt);
                ALU_CASE_CMP(ieq32, ieq);
                ALU_CASE_CMP(ine32, ine);
                ALU_CASE_CMP(ilt32, ilt);
                ALU_CASE_CMP(ult32, ult);
681

682
683
684
685
686
687
688
689
690
691
692
                /* We don't have a native b2f32 instruction. Instead, like many
                 * GPUs, we exploit booleans as 0/~0 for false/true, and
                 * correspondingly AND
                 * by 1.0 to do the type conversion. For the moment, prime us
                 * to emit:
                 *
                 * iand [whatever], #0
                 *
                 * At the end of emit_alu (as MIR), we'll fix-up the constant
                 */

693
694
695
                ALU_CASE_CMP(b2f32, iand);
                ALU_CASE_CMP(b2f16, iand);
                ALU_CASE_CMP(b2i32, iand);
696

697
                /* Likewise, we don't have a dedicated f2b32 instruction, but
698
                 * we can do a "not equal to 0.0" test. */
699

700
701
                ALU_CASE_CMP(f2b32, fne);
                ALU_CASE_CMP(i2b32, ine);
702

703
704
705
706
707
708
                ALU_CASE(frcp, frcp);
                ALU_CASE(frsq, frsqrt);
                ALU_CASE(fsqrt, fsqrt);
                ALU_CASE(fexp2, fexp2);
                ALU_CASE(flog2, flog2);

709
710
711
712
                ALU_CASE_RTZ(f2i64, f2i_rte);
                ALU_CASE_RTZ(f2u64, f2u_rte);
                ALU_CASE_RTZ(i2f64, i2f_rte);
                ALU_CASE_RTZ(u2f64, u2f_rte);
713

714
715
716
717
                ALU_CASE_RTZ(f2i32, f2i_rte);
                ALU_CASE_RTZ(f2u32, f2u_rte);
                ALU_CASE_RTZ(i2f32, i2f_rte);
                ALU_CASE_RTZ(u2f32, u2f_rte);
718

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
719
720
721
                ALU_CASE_RTZ(f2i8, f2i_rte);
                ALU_CASE_RTZ(f2u8, f2u_rte);

722
723
724
725
                ALU_CASE_RTZ(f2i16, f2i_rte);
                ALU_CASE_RTZ(f2u16, f2u_rte);
                ALU_CASE_RTZ(i2f16, i2f_rte);
                ALU_CASE_RTZ(u2f16, u2f_rte);
726

727
728
729
                ALU_CASE(fsin, fsin);
                ALU_CASE(fcos, fcos);

730
731
732
                /* We'll get 0 in the second arg, so:
                 * ~a = ~(a | 0) = nor(a, 0) */
                ALU_CASE(inot, inor);
733
734
735
736
737
738
739
                ALU_CASE(iand, iand);
                ALU_CASE(ior, ior);
                ALU_CASE(ixor, ixor);
                ALU_CASE(ishl, ishl);
                ALU_CASE(ishr, iasr);
                ALU_CASE(ushr, ilsr);

740
741
                ALU_CASE_BCAST(b32all_fequal2, fball_eq, 2);
                ALU_CASE_BCAST(b32all_fequal3, fball_eq, 3);
742
                ALU_CASE_CMP(b32all_fequal4, fball_eq);
743

744
745
                ALU_CASE_BCAST(b32any_fnequal2, fbany_neq, 2);
                ALU_CASE_BCAST(b32any_fnequal3, fbany_neq, 3);
746
                ALU_CASE_CMP(b32any_fnequal4, fbany_neq);
747

748
749
                ALU_CASE_BCAST(b32all_iequal2, iball_eq, 2);
                ALU_CASE_BCAST(b32all_iequal3, iball_eq, 3);
750
                ALU_CASE_CMP(b32all_iequal4, iball_eq);
751

752
753
                ALU_CASE_BCAST(b32any_inequal2, ibany_neq, 2);
                ALU_CASE_BCAST(b32any_inequal3, ibany_neq, 3);
754
                ALU_CASE_CMP(b32any_inequal4, ibany_neq);
755

756
757
758
759
                /* Source mods will be shoved in later */
                ALU_CASE(fabs, fmov);
                ALU_CASE(fneg, fmov);
                ALU_CASE(fsat, fmov);
760
761
                ALU_CASE(fsat_signed, fmov);
                ALU_CASE(fclamp_pos, fmov);
762

763
764
765
        /* For size conversion, we use a move. Ideally though we would squash
         * these ops together; maybe that has to happen after in NIR as part of
         * propagation...? An earlier algebraic pass ensured we step down by
766
767
768
769
770
771
772
         * only / exactly one size. If stepping down, we use a dest override to
         * reduce the size; if stepping up, we use a larger-sized move with a
         * half source and a sign/zero-extension modifier */

        case nir_op_i2i8:
        case nir_op_i2i16:
        case nir_op_i2i32:
773
        case nir_op_i2i64:
774
775
        case nir_op_u2u8:
        case nir_op_u2u16:
776
        case nir_op_u2u32:
777
778
        case nir_op_u2u64:
        case nir_op_f2f16:
779
780
781
782
        case nir_op_f2f32:
        case nir_op_f2f64: {
                if (instr->op == nir_op_f2f16 || instr->op == nir_op_f2f32 ||
                    instr->op == nir_op_f2f64)
783
784
785
                        op = midgard_alu_op_fmov;
                else
                        op = midgard_alu_op_imov;
786

787
788
789
                break;
        }

790
        /* For greater-or-equal, we lower to less-or-equal and flip the
791
792
         * arguments */

793
794
795
796
797
798
799
800
801
802
        case nir_op_fge:
        case nir_op_fge32:
        case nir_op_ige32:
        case nir_op_uge32: {
                op =
                        instr->op == nir_op_fge   ? midgard_alu_op_fle :
                        instr->op == nir_op_fge32 ? midgard_alu_op_fle :
                        instr->op == nir_op_ige32 ? midgard_alu_op_ile :
                        instr->op == nir_op_uge32 ? midgard_alu_op_ule :
                        0;
803

804
                flip_src12 = true;
805
                ALU_CHECK_CMP();
806
807
808
                break;
        }

809
        case nir_op_b32csel: {
810
                bool mixed = nir_is_non_scalar_swizzle(&instr->src[0], nr_components);
811
812
813
814
                bool is_float = mir_is_bcsel_float(instr);
                op = is_float ?
                        (mixed ? midgard_alu_op_fcsel_v : midgard_alu_op_fcsel) :
                        (mixed ? midgard_alu_op_icsel_v : midgard_alu_op_icsel);
815

816
817
818
                break;
        }

819
820
821
822
823
824
825
826
        case nir_op_unpack_32_2x16:
        case nir_op_unpack_32_4x8:
        case nir_op_pack_32_2x16:
        case nir_op_pack_32_4x8: {
                op = midgard_alu_op_imov;
                break;
        }

827
        default:
828
                DBG("Unhandled ALU op %s\n", nir_op_infos[instr->op].name);
829
830
831
832
                assert(0);
                return;
        }

833
834
835
836
837
838
839
840
        /* Promote imov to fmov if it might help inline a constant */
        if (op == midgard_alu_op_imov && nir_src_is_const(instr->src[0].src)
                        && nir_src_bit_size(instr->src[0].src) == 32
                        && nir_is_same_comp_swizzle(instr->src[0].swizzle,
                                nir_src_num_components(instr->src[0].src))) {
                op = midgard_alu_op_fmov;
        }

841
        /* Midgard can perform certain modifiers on output of an ALU op */
842
843
844

        unsigned outmod = 0;
        bool is_int = midgard_is_integer_op(op);
845

846
847
848
        if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) {
                outmod = midgard_outmod_int_high;
        } else if (midgard_is_integer_out_op(op)) {
849
                outmod = midgard_outmod_int_wrap;
850
851
852
853
854
855
        } else if (instr->op == nir_op_fsat) {
                outmod = midgard_outmod_sat;
        } else if (instr->op == nir_op_fsat_signed) {
                outmod = midgard_outmod_sat_signed;
        } else if (instr->op == nir_op_fclamp_pos) {
                outmod = midgard_outmod_pos;
856
        }
857

858
        /* Fetch unit, quirks, etc information */
859
        unsigned opcode_props = alu_opcode_props[op].props;
860
        bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24;
861

862
        if (!midgard_is_integer_out_op(op)) {
863
                outmod = mir_determine_float_outmod(ctx, &dest, outmod);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
864
865
        }

866
867
        midgard_instruction ins = {
                .type = TAG_ALU_4,
868
                .dest = nir_dest_index(dest),
869
                .dest_type = nir_op_infos[instr->op].output_type
870
                        | nir_dest_bit_size(*dest),
871
                .roundmode = roundmode,
872
873
        };

874
875
876
        enum midgard_roundmode *roundptr = (opcode_props & MIDGARD_ROUNDS) ?
                &ins.roundmode : NULL;

877
878
879
880
881
        for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i)
                ins.src[i] = ~0;

        if (quirk_flipped_r24) {
                ins.src[0] = ~0;
882
                mir_copy_src(&ins, instr, 0, 1, &ins.src_abs[1], &ins.src_neg[1], &ins.src_invert[1], roundptr, is_int, broadcast_swizzle);
883
        } else {
884
885
886
887
888
889
890
891
892
893
894
                for (unsigned i = 0; i < nr_inputs; ++i) {
                        unsigned to = i;

                        if (instr->op == nir_op_b32csel) {
                                /* The condition is the first argument; move
                                 * the other arguments up one to be a binary
                                 * instruction for Midgard with the condition
                                 * last */

                                if (i == 0)
                                        to = 2;
895
896
                                else if (flip_src12)
                                        to = 2 - i;
897
898
899
900
901
                                else
                                        to = i - 1;
                        } else if (flip_src12) {
                                to = 1 - to;
                        }
902

903
                        mir_copy_src(&ins, instr, i, to, &ins.src_abs[to], &ins.src_neg[to], &ins.src_invert[to], roundptr, is_int, broadcast_swizzle);
904
905
906
907
908
909

                        /* (!c) ? a : b = c ? b : a */
                        if (instr->op == nir_op_b32csel && ins.src_invert[2]) {
                                ins.src_invert[2] = false;
                                flip_src12 ^= true;
                        }
910
                }
911
912
        }

913
        if (instr->op == nir_op_fneg || instr->op == nir_op_fabs) {
914
                /* Lowered to move */
915
                if (instr->op == nir_op_fneg)
916
                        ins.src_neg[1] ^= true;
917
918

                if (instr->op == nir_op_fabs)
919
                        ins.src_abs[1] = true;
920
921
        }

922
923
        ins.mask = mask_of(nr_components);

924
925
926
        /* Apply writemask if non-SSA, keeping in mind that we can't write to
         * components that don't exist. Note modifier => SSA => !reg => no
         * writemask, so we don't have to worry about writemasks here.*/
927
928

        if (!is_ssa)
929
                ins.mask &= instr->dest.write_mask;
930

931
        ins.op = op;
932
        ins.outmod = outmod;
933

934
935
        /* Late fixup for emulated instructions */

936
        if (instr->op == nir_op_b2f32 || instr->op == nir_op_b2i32) {
937
938
939
940
941
                /* Presently, our second argument is an inline #0 constant.
                 * Switch over to an embedded 1.0 constant (that can't fit
                 * inline, since we're 32-bit, not 16-bit like the inline
                 * constants) */

942
943
                ins.has_inline_constant = false;
                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
944
                ins.src_types[1] = nir_type_float32;
945
                ins.has_constants = true;
946

947
948
949
950
                if (instr->op == nir_op_b2f32)
                        ins.constants.f32[0] = 1.0f;
                else
                        ins.constants.i32[0] = 1;
951

Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
952
953
954
955
956
957
958
959
                for (unsigned c = 0; c < 16; ++c)
                        ins.swizzle[1][c] = 0;
        } else if (instr->op == nir_op_b2f16) {
                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
                ins.src_types[1] = nir_type_float16;
                ins.has_constants = true;
                ins.constants.i16[0] = _mesa_float_to_half(1.0);

960
961
                for (unsigned c = 0; c < 16; ++c)
                        ins.swizzle[1][c] = 0;
962
963
        } else if (nr_inputs == 1 && !quirk_flipped_r24) {
                /* Lots of instructions need a 0 plonked in */
964
965
                ins.has_inline_constant = false;
                ins.src[1] = SSA_FIXED_REGISTER(REGISTER_CONSTANT);
966
                ins.src_types[1] = ins.src_types[0];
967
                ins.has_constants = true;
968
                ins.constants.u32[0] = 0;
969
970
971

                for (unsigned c = 0; c < 16; ++c)
                        ins.swizzle[1][c] = 0;
972
973
974
        } else if (instr->op == nir_op_pack_32_2x16) {
                ins.dest_type = nir_type_uint16;
                ins.mask = mask_of(nr_components * 2);
975
                ins.is_pack = true;
976
977
978
        } else if (instr->op == nir_op_pack_32_4x8) {
                ins.dest_type = nir_type_uint8;
                ins.mask = mask_of(nr_components * 4);
979
                ins.is_pack = true;
980
981
982
        } else if (instr->op == nir_op_unpack_32_2x16) {
                ins.dest_type = nir_type_uint32;
                ins.mask = mask_of(nr_components >> 1);
983
                ins.is_pack = true;
984
985
986
        } else if (instr->op == nir_op_unpack_32_4x8) {
                ins.dest_type = nir_type_uint32;
                ins.mask = mask_of(nr_components >> 2);
987
                ins.is_pack = true;
988
989
        }

990
991
992
993
        if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
                /* To avoid duplicating the lookup tables (probably), true LUT
                 * instructions can only operate as if they were scalars. Lower
                 * them here by changing the component. */
994

995
                unsigned orig_mask = ins.mask;
996

997
998
999
                unsigned swizzle_back[MIR_VEC_COMPONENTS];
                memcpy(&swizzle_back, ins.swizzle[0], sizeof(swizzle_back));

1000
1001
1002
                midgard_instruction ins_split[MIR_VEC_COMPONENTS];
                unsigned ins_count = 0;

1003
                for (int i = 0; i < nr_components; ++i) {
1004
1005
1006
                        /* Mask the associated component, dropping the
                         * instruction if needed */

1007
1008
                        ins.mask = 1 << i;
                        ins.mask &= orig_mask;
1009

1010
1011
1012
1013
1014
1015
1016
1017
                        for (unsigned j = 0; j < ins_count; ++j) {
                                if (swizzle_back[i] == ins_split[j].swizzle[0][0]) {
                                        ins_split[j].mask |= ins.mask;
                                        ins.mask = 0;
                                        break;
                                }
                        }

1018
                        if (!ins.mask)
1019
                                continue;
1020

1021
                        for (unsigned j = 0; j < MIR_VEC_COMPONENTS; ++j)
1022
                                ins.swizzle[0][j] = swizzle_back[i]; /* Pull from the correct component */
1023

1024
1025
1026
1027
1028
1029
1030
                        ins_split[ins_count] = ins;

                        ++ins_count;
                }

                for (unsigned i = 0; i < ins_count; ++i) {
                        emit_mir_instruction(ctx, ins_split[i]);
1031
1032
1033
1034
1035
1036
                }
        } else {
                emit_mir_instruction(ctx, ins);
        }
}

1037
1038
#undef ALU_CASE

1039
1040
static void
mir_set_intr_mask(nir_instr *instr, midgard_instruction *ins, bool is_read)
1041
1042
{
        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1043
1044
        unsigned nir_mask = 0;
        unsigned dsize = 0;
1045

1046
1047
1048
1049
1050
1051
1052
1053
1054
        if (is_read) {
                nir_mask = mask_of(nir_intrinsic_dest_components(intr));
                dsize = nir_dest_bit_size(intr->dest);
        } else {
                nir_mask = nir_intrinsic_write_mask(intr);
                dsize = 32;
        }

        /* Once we have the NIR mask, we need to normalize to work in 32-bit space */
1055
        unsigned bytemask = pan_to_bytemask(dsize, nir_mask);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1056
        ins->dest_type = nir_type_uint | dsize;
1057
        mir_set_bytemask(ins, bytemask);
1058
1059
}

1060
1061
1062
/* Uniforms and UBOs use a shared code path, as uniforms are just (slightly
 * optimized) versions of UBO #0 */

1063
static midgard_instruction *
1064
emit_ubo_read(
1065
        compiler_context *ctx,
1066
        nir_instr *instr,
1067
1068
1069
        unsigned dest,
        unsigned offset,
        nir_src *indirect_offset,
1070
        unsigned indirect_shift,
1071
        unsigned index)
1072
1073
1074
{
        /* TODO: half-floats */

1075
        midgard_instruction ins = m_ld_ubo_int4(dest, 0);
1076
        ins.constants.u32[0] = offset;
1077
1078
1079

        if (instr->type == nir_instr_type_intrinsic)
                mir_set_intr_mask(instr, &ins, true);
1080

1081
        if (indirect_offset) {
1082
                ins.src[2] = nir_src_index(ctx, indirect_offset);
Alyssa Rosenzweig's avatar
Alyssa Rosenzweig committed
1083
                ins.src_types[2] = nir_type_uint32;
1084
                ins.load_store.arg_2 = (indirect_shift << 5);
1085
1086
1087
1088
1089

                /* X component for the whole swizzle to prevent register
                 * pressure from ballooning from the extra components */
                for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[2]); ++i)
                        ins.swizzle[2][i] = 0;
1090
        } else {
1091
                ins.load_store.arg_2 = 0x1E;
1092
        }
1093

1094
1095
        ins.load_store.arg_1 = index;

1096
        return emit_mir_instruction(ctx, ins);
1097
1098
}

1099
1100
/* Globals are like UBOs if you squint. And shared memory is like globals if
 * you squint even harder */
1101
1102

static void
1103
emit_global(
1104
1105
1106
1107
        compiler_context *ctx,
        nir_instr *instr,
        bool is_read,
        unsigned srcdest,
1108
        nir_src *offset,
1109
        bool is_shared)
1110
1111
1112
{
        /* TODO: types */

1113
        midgard_instruction ins;
1114
1115

        if (is_read)
1116
                ins = m_ld_int4(srcdest, 0);
1117
        else
1118
                ins = m_st_int4(srcdest, 0);
1119

1120
        mir_set_offset(ctx, &ins, offset, is_shared);
1121
        mir_set_intr_mask(instr, &ins, is_read);
1122

1123
1124
1125
1126
1127
1128
1129
1130
1131
        /* Set a valid swizzle for masked out components */
        assert(ins.mask);
        unsigned first_component = __builtin_ffs(ins.mask) - 1;

        for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) {
                if (!(ins.mask & (1 << i)))
                        ins.swizzle[0][i] = first_component;
        }

1132
1133
1134
        emit_mir_instruction(ctx, ins);
}

1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
/* If is_shared is off, the only other possible value are globals, since
 * SSBO's are being lowered to globals through a NIR pass. */
static void
emit_atomic(
        compiler_context *ctx,
        nir_intrinsic_instr *instr,
        bool is_shared,
        midgard_load_store_op op)
{
        unsigned bitsize = nir_src_bit_size(instr->src[1]);
        nir_alu_type type =
                (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax) ?
                nir_type_int : nir_type_uint;

        unsigned dest = nir_dest_index(&instr->dest);
        unsigned val = nir_src_index(ctx, &instr->src[1]);
        emit_explicit_constant(ctx, val, val);

        midgard_instruction ins = {
                .type = TAG_LOAD_STORE_4,
                .mask = 0xF,
                .dest = dest,
                .src = { ~0, ~0, ~0, val },
                .src_types = { 0, 0, 0, type | bitsize },
                .op = op
        };

        nir_src *src_offset = nir_get_io_offset_src(instr);

        /* cmpxchg takes an extra value in arg_2, so we don't use it for the offset */
        if (op == midgard_op_atomic_cmpxchg) {
                unsigned addr = nir_src_index(ctx, src_offset);

                ins.src[1] = addr;
                ins.src_types[1] = nir_type_uint | nir_src_bit_size(*src_offset);

                unsigned xchg_val = nir_src_index(ctx, &instr->src[2]);
                emit_explicit_constant(ctx, xchg_val, xchg_val);

                ins.src[2] = val;
                ins.src_types[2] = type | bitsize;
                ins.src[3] = xchg_val;

                if (is_shared)
                        ins.load_store.arg_1 |= 0x6E;
        } else {
                mir_set_offset(ctx, &ins, src_offset, is_shared);
        }

        mir_set_intr_mask(&instr->instr, &ins, true);

        emit_mir_instruction(ctx, ins);
}

1189
1190
static void
emit_varying_read(
1191
1192
1193
        compiler_context *ctx,
        unsigned dest, unsigned offset,
        unsigned nr_comp, unsigned component,
1194
        nir_src *indirect_offset, nir_alu_type type, bool flat)
1195
1196
1197
1198
1199
{
        /* XXX: Half-floats? */
        /* TODO: swizzle, mask */

        midgard_instruction ins = m_ld_vary_32(dest, offset);
1200
        ins.mask = mask_of(nr_comp);
1201
1202
1203
1204
1205
1206
        ins.dest_type = type;

        if (type == nir_type_float16) {
                /* Ensure we are aligned so we can pack it later */
                ins.mask = mask_of(ALIGN_POT(nr_comp, 2));
        }
1207
1208
1209

        for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i)
                ins.swizzle[0][i] = MIN2(i + component, COMPONENT_W);
1210
1211