bir.c 7.97 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
 * Copyright (C) 2020 Collabora Ltd.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors (Collabora):
 *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
 */

#include "compiler.h"

29
/* Does an instruction respect clamps and source mods? Depend
30
31
32
 * on the types involved */

bool
33
bi_has_clamp(bi_instruction *ins)
34
35
36
37
38
39
40
{
        bool classy = bi_class_props[ins->type] & BI_MODS;
        bool floaty = nir_alu_type_get_base_type(ins->dest_type) == nir_type_float;

        return classy && floaty;
}

41
/* Have to check source for e.g. compares */
42
43
44
45

bool
bi_has_source_mods(bi_instruction *ins)
{
46
47
48
49
        bool classy = bi_class_props[ins->type] & BI_MODS;
        bool floaty = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_float;

        return classy && floaty;
50
51
52
53
54
55
56
57
58
59
60
61
62
63
}

/* A source is swizzled if the op is swizzlable, in 8-bit or
 * 16-bit mode, and the swizzled op. TODO: multi args */

bool
bi_is_src_swizzled(bi_instruction *ins, unsigned s)
{
        bool classy = bi_class_props[ins->type] & BI_SWIZZLABLE;
        bool small = nir_alu_type_get_type_size(ins->dest_type) < 32;
        bool first = (s == 0); /* TODO: prop? */

        return classy && small && first;
}
64
65
66
67
68
69
70
71
72
73
74
75
76
77

bool
bi_has_arg(bi_instruction *ins, unsigned arg)
{
        if (!ins)
                return false;

        bi_foreach_src(ins, s) {
                if (ins->src[s] == arg)
                        return true;
        }

        return false;
}
78

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
uint16_t
bi_from_bytemask(uint16_t bytemask, unsigned bytes)
{
        unsigned value = 0;

        for (unsigned c = 0, d = 0; c < 16; c += bytes, ++d) {
                bool a = (bytemask & (1 << c)) != 0;

                for (unsigned q = c; q < bytes; ++q)
                        assert(((bytemask & (1 << q)) != 0) == a);

                value |= (a << d);
        }

        return value;
}

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
 * 32-bit. Note auto reads to 32-bit registers even if the memory format is
 * 16-bit, so is considered as such here */

static bool
bi_is_regfmt_16(enum bi_register_format fmt)
{
        switch  (fmt) {
        case BI_REGISTER_FORMAT_F16:
        case BI_REGISTER_FORMAT_S16:
        case BI_REGISTER_FORMAT_U16:
                return true;
        case BI_REGISTER_FORMAT_F32:
        case BI_REGISTER_FORMAT_S32:
        case BI_REGISTER_FORMAT_U32:
        case BI_REGISTER_FORMAT_AUTO:
                return false;
        default:
                unreachable("Invalid register format");
        }
}

static unsigned
bi_count_staging_registers(bi_instr *ins)
{
        enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
        unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */

        switch (count) {
        case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
                return count;
        case BI_SR_COUNT_FORMAT:
                return bi_is_regfmt_16(ins->register_format) ?
                        DIV_ROUND_UP(vecsize, 2) : vecsize;
        case BI_SR_COUNT_VECSIZE:
                return vecsize;
        case BI_SR_COUNT_SR_COUNT:
                return ins->sr_count;
        }

        unreachable("Invalid sr_count");
}

139
unsigned
140
bi_get_component_count(bi_instruction *ins, signed src)
141
{
142
143
144
145
146
        /* Discards and branches are oddball since they're not BI_VECTOR but no
         * destination. So special case.. */
        if (ins->type == BI_DISCARD || ins->type == BI_BRANCH)
                return 1;

147
        if (bi_class_props[ins->type] & BI_VECTOR) {
148
149
                assert(ins->vector_channels);
                return (src <= 0) ? ins->vector_channels : 1;
150
        } else {
151
                unsigned dest_bytes = nir_alu_type_get_type_size(ins->dest_type);
152
                unsigned src_bytes = nir_alu_type_get_type_size(ins->src_types[MAX2(src, 0)]);
153
154
155
156
157
158
159
160

                /* If there's either f32 on either end, it's only a single
                 * component, etc. */

                unsigned bytes = src < 0 ? dest_bytes : src_bytes;

                if (ins->type == BI_CONVERT)
                        bytes = MAX2(dest_bytes, src_bytes);
161
162
163
164
                
                if (ins->type == BI_ATEST || ins->type == BI_SELECT)
                        return 1;

165
                return MAX2(32 / bytes, 1);
166
167
168
        }
}

169
170
171
172
173
174
175
uint16_t
bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
{
        uint16_t mask = 0x0;

        bi_foreach_src(ins, s) {
                if (ins->src[s] != node) continue;
176
                unsigned component_count = bi_get_component_count(ins, s);
177
178
                nir_alu_type T = ins->src_types[s];
                unsigned size = nir_alu_type_get_type_size(T);
179
                unsigned bytes = size / 8;
180
181
                unsigned cmask = (1 << bytes) - 1;

182
                for (unsigned i = 0; i < component_count; ++i) {
183
184
185
186
187
188
189
                        unsigned c = ins->swizzle[s][i];
                        mask |= (cmask << (c * bytes));
                }
        }

        return mask;
}
190
191
192
193

uint64_t
bi_get_immediate(bi_instruction *ins, unsigned index)
{
194
195
196
        unsigned v = ins->src[index];
        assert(v & BIR_INDEX_CONSTANT);
        unsigned shift = v & ~BIR_INDEX_CONSTANT;
197
198
199
200
201

        /* Don't invoke undefined behaviour on shift */
        if (shift == 64)
                return 0;

202
203
204
205
206
207
208
209
210
        uint64_t shifted = ins->constant.u64 >> shift;

        /* Mask off the accessed part */
        unsigned sz = nir_alu_type_get_type_size(ins->src_types[index]);

        if (sz == 64)
                return shifted;
        else
                return shifted & ((1ull << sz) - 1);
211
}
212
213
214
215

bool
bi_writes_component(bi_instruction *ins, unsigned comp)
{
216
217
        return comp < bi_get_component_count(ins, -1);
}
218

219
220
221
222
/* Determine effective writemask for RA/DCE, noting that we currently act
 * per-register hence aligning. TODO: when real write masks are handled in
 * packing (not for a while), update this routine, removing the align */

223
224
225
unsigned
bi_writemask(bi_instruction *ins)
{
226
227
        nir_alu_type T = ins->dest_type;
        unsigned size = nir_alu_type_get_type_size(T);
228
229
        unsigned bytes_per_comp = size / 8;
        unsigned components = bi_get_component_count(ins, -1);
230
        unsigned bytes = ALIGN_POT(bytes_per_comp * components, 4);
231
232
233
        unsigned mask = (1 << bytes) - 1;
        unsigned shift = ins->dest_offset * 4; /* 32-bit words */
        return (mask << shift);
234
}
235
236
237
238
239
240
241
242
243
244

/* Rewrites uses of an index. This is O(nc) to the program and number of
 * uses, so combine lowering is effectively O(n^2).  Better bookkeeping
 * would bring down to linear if that's an issue. */

void
bi_rewrite_uses(bi_context *ctx,
                unsigned old, unsigned oldc,
                unsigned new, unsigned newc)
{
245
246
        assert(newc >= oldc);

247
248
249
250
251
252
253
254
255
256
257
258
259
        bi_foreach_instr_global(ctx, ins) {
                bi_foreach_src(ins, s) {
                        if (ins->src[s] != old) continue;

                        for (unsigned i = 0; i < 16; ++i)
                                ins->swizzle[s][i] += (newc - oldc);

                        ins->src[s] = new;
                }
        }
}