ac_nir_to_llvm.c 176 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * Copyright © 2016 Bas Nieuwenhuizen
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

24
25
#include <llvm/Config/llvm-config.h>

26
#include "ac_nir_to_llvm.h"
27
#include "ac_llvm_build.h"
28
#include "ac_llvm_util.h"
29
30
31
#include "ac_binary.h"
#include "sid.h"
#include "nir/nir.h"
32
#include "nir/nir_deref.h"
33
#include "util/bitscan.h"
34
#include "util/u_math.h"
35
#include "ac_shader_abi.h"
36
#include "ac_shader_util.h"
37

38
39
40
struct ac_nir_context {
	struct ac_llvm_context ac;
	struct ac_shader_abi *abi;
41
	const struct ac_shader_args *args;
42
43

	gl_shader_stage stage;
44
	shader_info *info;
45

46
47
	LLVMValueRef *ssa_defs;

48
	LLVMValueRef scratch;
49
	LLVMValueRef constant_data;
50

51
52
53
54
	struct hash_table *defs;
	struct hash_table *phis;
	struct hash_table *vars;

55
	LLVMValueRef main_function;
56
57
58
59
60
61
62
	LLVMBasicBlockRef continue_block;
	LLVMBasicBlockRef break_block;

	int num_locals;
	LLVMValueRef *locals;
};

63
64
65
66
67
static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
					   nir_deref_instr *deref_instr,
					   const nir_instr *instr,
					   bool image);

68
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
69
				     nir_deref_instr *deref_instr,
70
				     enum ac_descriptor_type desc_type,
71
				     const nir_instr *instr,
72
				     LLVMValueRef index,
73
				     bool image, bool write);
74

75
76
77
78
79
80
static void
build_store_values_extended(struct ac_llvm_context *ac,
			     LLVMValueRef *values,
			     unsigned value_count,
			     unsigned value_stride,
			     LLVMValueRef vec)
Dave Airlie's avatar
Dave Airlie committed
81
{
82
83
	LLVMBuilderRef builder = ac->builder;
	unsigned i;
84

85
86
87
88
89
90
	for (i = 0; i < value_count; i++) {
		LLVMValueRef ptr = values[i * value_stride];
		LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
		LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
		LLVMBuildStore(builder, value, ptr);
	}
Dave Airlie's avatar
Dave Airlie committed
91
92
}

93
94
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
                                const nir_ssa_def *def)
95
{
96
97
98
	LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
	if (def->num_components > 1) {
		type = LLVMVectorType(type, def->num_components);
99
	}
100
	return type;
101
102
}

103
static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
104
{
105
	assert(src.is_ssa);
106
	return nir->ssa_defs[src.ssa->index];
107
108
}

109
static LLVMValueRef
110
get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
111
{
112
113
114
115
	LLVMValueRef ptr = get_src(ctx, src);
	ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
	int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));

116
117
	LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);

118
	return LLVMBuildBitCast(ctx->ac.builder, ptr,
119
				LLVMPointerType(type, addr_space), "");
120
121
}

122
123
static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
                                   const struct nir_block *b)
124
{
125
126
	struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
	return (LLVMBasicBlockRef)entry->data;
127
128
}

129
130
131
static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
                                nir_alu_src src,
                                unsigned num_components)
132
{
133
134
	LLVMValueRef value = get_src(ctx, src.src);
	bool need_swizzle = false;
135

136
137
138
139
140
141
	assert(value);
	unsigned src_components = ac_get_llvm_num_components(value);
	for (unsigned i = 0; i < num_components; ++i) {
		assert(src.swizzle[i] < src_components);
		if (src.swizzle[i] != i)
			need_swizzle = true;
142
143
	}

144
145
146
147
148
149
	if (need_swizzle || num_components != src_components) {
		LLVMValueRef masks[] = {
		    LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
		    LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
		    LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
		    LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
150

151
152
153
154
155
156
157
158
159
160
161
		if (src_components > 1 && num_components == 1) {
			value = LLVMBuildExtractElement(ctx->ac.builder, value,
			                                masks[0], "");
		} else if (src_components == 1 && num_components > 1) {
			LLVMValueRef values[] = {value, value, value, value};
			value = ac_build_gather_values(&ctx->ac, values, num_components);
		} else {
			LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
			value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
		                                       swizzle, "");
		}
162
	}
163
	assert(!src.negate);
164
	assert(!src.abs);
165
	return value;
166
167
}

168
169
170
static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
                                 LLVMIntPredicate pred, LLVMValueRef src0,
                                 LLVMValueRef src1)
171
{
172
173
174
175
	LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
	return LLVMBuildSelect(ctx->builder, result,
	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
	                       ctx->i32_0, "");
176
177
}

178
179
180
static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
                                   LLVMRealPredicate pred, LLVMValueRef src0,
                                   LLVMValueRef src1)
181
{
182
183
184
185
186
187
188
	LLVMValueRef result;
	src0 = ac_to_float(ctx, src0);
	src1 = ac_to_float(ctx, src1);
	result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
	return LLVMBuildSelect(ctx->builder, result,
	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
			       ctx->i32_0, "");
189
190
}

191
192
193
194
static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
					 const char *intrin,
					 LLVMTypeRef result_type,
					 LLVMValueRef src0)
195
{
196
197
198
199
	char name[64];
	LLVMValueRef params[] = {
		ac_to_float(ctx, src0),
	};
200

201
	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
202
203
204
						 ac_get_elem_bits(ctx, result_type));
	assert(length < sizeof(name));
	return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
205
206
}

207
208
209
210
static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
				       const char *intrin,
				       LLVMTypeRef result_type,
				       LLVMValueRef src0, LLVMValueRef src1)
211
{
212
213
214
215
216
217
	char name[64];
	LLVMValueRef params[] = {
		ac_to_float(ctx, src0),
		ac_to_float(ctx, src1),
	};

218
	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
219
220
221
						 ac_get_elem_bits(ctx, result_type));
	assert(length < sizeof(name));
	return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
222
223
}

224
225
226
227
static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
					 const char *intrin,
					 LLVMTypeRef result_type,
					 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
228
{
229
230
231
232
233
234
	char name[64];
	LLVMValueRef params[] = {
		ac_to_float(ctx, src0),
		ac_to_float(ctx, src1),
		ac_to_float(ctx, src2),
	};
235

236
	ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
237
238
239
						 ac_get_elem_bits(ctx, result_type));
	assert(length < sizeof(name));
	return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
240
241
}

242
243
static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
			       LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
244
{
245
246
247
	LLVMTypeRef src1_type = LLVMTypeOf(src1);
	LLVMTypeRef src2_type = LLVMTypeOf(src2);

248
249
	assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);

250
251
252
253
254
255
256
257
	if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
	    LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
		src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
	} else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
		   LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
		src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
	}

258
259
	LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
				       ctx->i32_0, "");
260
261
262
	return LLVMBuildSelect(ctx->builder, v,
			       ac_to_integer_or_pointer(ctx, src1),
			       ac_to_integer_or_pointer(ctx, src2), "");
263
264
}

265
266
267
268
269
270
static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
			      LLVMValueRef src0)
{
	return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
}

271
272
273
static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
				    const char *intrin,
				    LLVMValueRef src0, LLVMValueRef src1)
274
{
275
276
277
278
279
280
	LLVMTypeRef ret_type;
	LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
	LLVMValueRef res;
	LLVMValueRef params[] = { src0, src1 };
	ret_type = LLVMStructTypeInContext(ctx->context, types,
					   2, true);
281

282
283
284
285
286
287
	res = ac_build_intrinsic(ctx, intrin, ret_type,
				 params, 2, AC_FUNC_ATTR_READNONE);

	res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
	res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
	return res;
288
289
}

290
static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
291
292
			     LLVMValueRef src0,
			     unsigned bitsize)
293
{
294
295
296
297
298
	LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
					   LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
					   "");
	result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");

Samuel Pitoiset's avatar
Samuel Pitoiset committed
299
300
301
302
	switch (bitsize) {
	case 16:
		return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
	case 32:
303
		return result;
Samuel Pitoiset's avatar
Samuel Pitoiset committed
304
305
306
307
308
	case 64:
		return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
	default:
		unreachable("Unsupported bit size.");
	}
309
310
}

311
312
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
			     LLVMValueRef src0)
313
{
314
315
316
317
318
	src0 = ac_to_float(ctx, src0);
	LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
	return LLVMBuildSExt(ctx->builder,
			     LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
			     ctx->i32, "");
319
320
}

321
322
323
static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
			     LLVMValueRef src0,
			     unsigned bitsize)
324
{
325
	LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
326

Samuel Pitoiset's avatar
Samuel Pitoiset committed
327
	switch (bitsize) {
328
329
	case 8:
		return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
Samuel Pitoiset's avatar
Samuel Pitoiset committed
330
331
332
	case 16:
		return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
	case 32:
333
		return result;
Samuel Pitoiset's avatar
Samuel Pitoiset committed
334
335
336
337
338
	case 64:
		return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
	default:
		unreachable("Unsupported bit size.");
	}
339
340
}

341
342
static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
			     LLVMValueRef src0)
343
{
344
345
346
347
348
	LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
	return LLVMBuildSExt(ctx->builder,
			     LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
			     ctx->i32, "");
}
349

350
351
352
353
354
static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
			       LLVMValueRef src0)
{
	LLVMValueRef result;
	LLVMValueRef cond = NULL;
355

356
357
	src0 = ac_to_float(ctx, src0);
	result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
358

359
	if (ctx->chip_class >= GFX8) {
360
361
362
363
364
		LLVMValueRef args[2];
		/* Check if the result is a denormal - and flush to 0 if so. */
		args[0] = result;
		args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
		cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
365
366
	}

367
368
	/* need to convert back up to f32 */
	result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
369

370
	if (ctx->chip_class >= GFX8)
371
372
		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
	else {
373
		/* for GFX6-GFX7 */
374
375
376
377
378
		/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
		 * so compare the result and flush to 0 if it's smaller.
		 */
		LLVMValueRef temp, cond2;
		temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
379
		cond = LLVMBuildFCmp(ctx->builder, LLVMRealOGT,
380
381
				     LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
				     temp, "");
382
		cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE,
383
384
385
				      temp, ctx->f32_0, "");
		cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
386
	}
387
	return result;
388
389
}

390
391
static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
				   LLVMValueRef src0, LLVMValueRef src1)
392
{
393
394
395
	LLVMValueRef dst64, result;
	src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
	src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
396

397
398
399
400
	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
	dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
	return result;
401
402
}

403
404
static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
				   LLVMValueRef src0, LLVMValueRef src1)
405
{
406
407
408
	LLVMValueRef dst64, result;
	src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
	src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
409

410
411
412
413
	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
	dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
	return result;
414
415
}

416
417
static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
			     LLVMValueRef bits, LLVMValueRef offset)
418
{
419
420
421
422
423
424
425
426
427
	/* mask = ((1 << bits) - 1) << offset */
	return LLVMBuildShl(ctx->builder,
			    LLVMBuildSub(ctx->builder,
					 LLVMBuildShl(ctx->builder,
						      ctx->i32_1,
						      bits, ""),
					 ctx->i32_1, ""),
			    offset, "");
}
428

429
430
431
432
static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
					 LLVMValueRef mask, LLVMValueRef insert,
					 LLVMValueRef base)
{
433
	/* Calculate:
434
	 *   (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
435
436
	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
	 */
437
438
439
	return LLVMBuildXor(ctx->builder, base,
			    LLVMBuildAnd(ctx->builder, mask,
					 LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
440
441
}

442
443
444
445
static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
				   LLVMValueRef src0,
				   LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
							LLVMValueRef args[2]))
446
{
447
	LLVMValueRef comp[2];
448

449
450
451
	src0 = ac_to_float(ctx, src0);
	comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
	comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
452

453
	return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
454
}
455

456
457
458
459
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
					  LLVMValueRef src0)
{
	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
460
	LLVMValueRef temps[2], val;
461
	int i;
462

463
464
465
466
467
	for (i = 0; i < 2; i++) {
		val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
		val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
468
	}
469
	return ac_build_gather_values(ctx, temps, 2);
470
}
471

472
473
474
475
476
477
478
static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
			      nir_op op,
			      LLVMValueRef src0)
{
	unsigned mask;
	int idx;
	LLVMValueRef result;
479

480
481
482
483
484
485
	if (op == nir_op_fddx_fine)
		mask = AC_TID_MASK_LEFT;
	else if (op == nir_op_fddy_fine)
		mask = AC_TID_MASK_TOP;
	else
		mask = AC_TID_MASK_TOP_LEFT;
486

487
488
489
490
491
492
493
	/* for DDX we want to next X pixel, DDY next Y pixel. */
	if (op == nir_op_fddx_fine ||
	    op == nir_op_fddx_coarse ||
	    op == nir_op_fddx)
		idx = 1;
	else
		idx = 2;
494

495
496
	result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
	return result;
497
498
}

499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
struct waterfall_context {
	LLVMBasicBlockRef phi_bb[2];
	bool use_waterfall;
};

/* To deal with divergent descriptors we can create a loop that handles all
 * lanes with the same descriptor on a given iteration (henceforth a
 * waterfall loop).
 *
 * These helper create the begin and end of the loop leaving the caller
 * to implement the body.
 * 
 * params:
 *  - ctx is the usal nir context
 *  - wctx is a temporary struct containing some loop info. Can be left uninitialized.
 *  - value is the possibly divergent value for which we built the loop
 *  - divergent is whether value is actually divergent. If false we just pass
 *     things through.
 */
static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
				    struct waterfall_context *wctx,
				    LLVMValueRef value, bool divergent)
{
	/* If the app claims the value is divergent but it is constant we can
	 * end up with a dynamic index of NULL. */
	if (!value)
		divergent = false;

	wctx->use_waterfall = divergent;
	if (!divergent)
		return value;

	ac_build_bgnloop(&ctx->ac, 6000);

	LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);

	LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
					    scalar_value, "uniform_active");

	wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
	ac_build_ifcc(&ctx->ac, active, 6001);

	return scalar_value;
}

static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
				   struct waterfall_context *wctx,
				   LLVMValueRef value)
{
	LLVMValueRef ret = NULL;
	LLVMValueRef phi_src[2];
	LLVMValueRef cc_phi_src[2] = {
		LLVMConstInt(ctx->ac.i32, 0, false),
		LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
	};

	if (!wctx->use_waterfall)
		return value;

	wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);

	ac_build_endif(&ctx->ac, 6001);

	if (value) {
		phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
		phi_src[1] = value;

		ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
	}

	/*
	 * By using the optimization barrier on the exit decision, we decouple
	 * the operations from the break, and hence avoid LLVM hoisting the
	 * opteration into the break block.
	 */
	LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
	ac_build_optimization_barrier(&ctx->ac, &cc);

	LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
	ac_build_ifcc(&ctx->ac, active, 6002);
	ac_build_break(&ctx->ac);
	ac_build_endif(&ctx->ac, 6002);

	ac_build_endloop(&ctx->ac, 6000);
	return ret;
}

586
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
587
{
588
589
590
591
	LLVMValueRef src[4], result = NULL;
	unsigned num_components = instr->dest.dest.ssa.num_components;
	unsigned src_components;
	LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
592

593
594
595
596
597
598
599
600
	assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
	switch (instr->op) {
	case nir_op_vec2:
	case nir_op_vec3:
	case nir_op_vec4:
		src_components = 1;
		break;
	case nir_op_pack_half_2x16:
601
602
	case nir_op_pack_snorm_2x16:
	case nir_op_pack_unorm_2x16:
603
604
605
606
607
608
609
610
611
612
613
614
		src_components = 2;
		break;
	case nir_op_unpack_half_2x16:
		src_components = 1;
		break;
	case nir_op_cube_face_coord:
	case nir_op_cube_face_index:
		src_components = 3;
		break;
	default:
		src_components = num_components;
		break;
615
	}
616
617
	for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
		src[i] = get_alu_src(ctx, instr->src[i], src_components);
618

619
	switch (instr->op) {
620
	case nir_op_mov:
621
622
623
624
625
		result = src[0];
		break;
	case nir_op_fneg:
	        src[0] = ac_to_float(&ctx->ac, src[0]);
		result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
626
627
628
629
630
631
632
		if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
			/* fneg will be optimized by backend compiler with sign
			 * bit removed via XOR. This is probably a LLVM bug.
			 */
			result = ac_build_canonicalize(&ctx->ac, result,
						       instr->dest.dest.ssa.bit_size);
		}
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
		break;
	case nir_op_ineg:
		result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
		break;
	case nir_op_inot:
		result = LLVMBuildNot(ctx->ac.builder, src[0], "");
		break;
	case nir_op_iadd:
		result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_fadd:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		src[1] = ac_to_float(&ctx->ac, src[1]);
		result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_fsub:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		src[1] = ac_to_float(&ctx->ac, src[1]);
		result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_isub:
		result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_imul:
		result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_imod:
		result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_umod:
		result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
		break;
Marek Olšák's avatar
Marek Olšák committed
665
666
667
668
669
670
671
672
673
674
675
	case nir_op_fmod:
		/* lower_fmod only lower 16-bit and 32-bit fmod */
		assert(instr->dest.dest.ssa.bit_size == 64);
		src[0] = ac_to_float(&ctx->ac, src[0]);
		src[1] = ac_to_float(&ctx->ac, src[1]);
		result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
		result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
		                              ac_to_float_type(&ctx->ac, def_type), result);
		result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
		result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
		break;
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
	case nir_op_irem:
		result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_idiv:
		result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_udiv:
		result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_fmul:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		src[1] = ac_to_float(&ctx->ac, src[1]);
		result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_frcp:
		src[0] = ac_to_float(&ctx->ac, src[0]);
692
		result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
693
694
695
696
697
698
699
700
701
702
703
		break;
	case nir_op_iand:
		result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_ior:
		result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_ixor:
		result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
		break;
	case nir_op_ishl:
Samuel Pitoiset's avatar
Samuel Pitoiset committed
704
705
706
707
708
709
710
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
					       LLVMTypeOf(src[0]), "");
		else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
						LLVMTypeOf(src[0]), "");
		result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
711
712
		break;
	case nir_op_ishr:
Samuel Pitoiset's avatar
Samuel Pitoiset committed
713
714
715
716
717
718
719
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
					       LLVMTypeOf(src[0]), "");
		else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
						LLVMTypeOf(src[0]), "");
		result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
720
721
		break;
	case nir_op_ushr:
Samuel Pitoiset's avatar
Samuel Pitoiset committed
722
723
724
725
726
727
728
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
					       LLVMTypeOf(src[0]), "");
		else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
			src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
						LLVMTypeOf(src[0]), "");
		result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
729
		break;
730
	case nir_op_ilt32:
731
732
		result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
		break;
733
	case nir_op_ine32:
734
735
		result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
		break;
736
	case nir_op_ieq32:
737
738
		result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
		break;
739
	case nir_op_ige32:
740
741
		result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
		break;
742
	case nir_op_ult32:
743
744
		result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
		break;
745
	case nir_op_uge32:
746
747
		result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
		break;
748
	case nir_op_feq32:
749
750
		result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
		break;
751
	case nir_op_fne32:
752
753
		result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
		break;
754
	case nir_op_flt32:
755
756
		result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
		break;
757
	case nir_op_fge32:
758
759
760
761
762
		result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
		break;
	case nir_op_fabs:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
763
764
765
766
767
768
769
		if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
			/* fabs will be optimized by backend compiler with sign
			 * bit removed via AND.
			 */
			result = ac_build_canonicalize(&ctx->ac, result,
						       instr->dest.dest.ssa.bit_size);
		}
770
771
772
773
774
		break;
	case nir_op_iabs:
		result = emit_iabs(&ctx->ac, src[0]);
		break;
	case nir_op_imax:
775
		result = ac_build_imax(&ctx->ac, src[0], src[1]);
776
777
		break;
	case nir_op_imin:
778
		result = ac_build_imin(&ctx->ac, src[0], src[1]);
779
780
		break;
	case nir_op_umax:
781
		result = ac_build_umax(&ctx->ac, src[0], src[1]);
782
783
		break;
	case nir_op_umin:
784
		result = ac_build_umin(&ctx->ac, src[0], src[1]);
785
786
787
788
		break;
	case nir_op_isign:
		result = ac_build_isign(&ctx->ac, src[0],
					instr->dest.dest.ssa.bit_size);
789
		break;
790
791
792
793
	case nir_op_fsign:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		result = ac_build_fsign(&ctx->ac, src[0],
					instr->dest.dest.ssa.bit_size);
794
		break;
795
796
797
	case nir_op_ffloor:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
798
		break;
799
800
801
	case nir_op_ftrunc:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
802
		break;
803
804
805
	case nir_op_fceil:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
806
		break;
807
808
809
	case nir_op_fround_even:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
		                              ac_to_float_type(&ctx->ac, def_type),src[0]);
810
		break;
811
812
813
814
	case nir_op_ffract:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		result = ac_build_fract(&ctx->ac, src[0],
					instr->dest.dest.ssa.bit_size);
815
		break;
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
	case nir_op_fsin:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
		break;
	case nir_op_fcos:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
		break;
	case nir_op_fsqrt:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
		break;
	case nir_op_fexp2:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
		break;
	case nir_op_flog2:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
		break;
	case nir_op_frsq:
		result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
839
		result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
840
		break;
841
842
	case nir_op_frexp_exp:
		src[0] = ac_to_float(&ctx->ac, src[0]);
843
844
845
846
847
		result = ac_build_frexp_exp(&ctx->ac, src[0],
					    ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
			result = LLVMBuildSExt(ctx->ac.builder, result,
					       ctx->ac.i32, "");
848
849
850
		break;
	case nir_op_frexp_sig:
		src[0] = ac_to_float(&ctx->ac, src[0]);
851
852
		result = ac_build_frexp_mant(&ctx->ac, src[0],
					     instr->dest.dest.ssa.bit_size);
853
		break;
854
855
856
857
	case nir_op_fpow:
		result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
		break;
858
859
860
861
862
863
	case nir_op_fmax:
		result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
		if (ctx->ac.chip_class < GFX9 &&
		    instr->dest.dest.ssa.bit_size == 32) {
			/* Only pre-GFX9 chips do not flush denorms. */
864
865
			result = ac_build_canonicalize(&ctx->ac, result,
						       instr->dest.dest.ssa.bit_size);
866
867
868
869
870
871
872
873
		}
		break;
	case nir_op_fmin:
		result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
		if (ctx->ac.chip_class < GFX9 &&
		    instr->dest.dest.ssa.bit_size == 32) {
			/* Only pre-GFX9 chips do not flush denorms. */
874
875
			result = ac_build_canonicalize(&ctx->ac, result,
						       instr->dest.dest.ssa.bit_size);
876
877
878
		}
		break;
	case nir_op_ffma:
Marek Olšák's avatar
Marek Olšák committed
879
880
		/* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
		result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
881
882
883
884
		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
		break;
	case nir_op_ldexp:
		src[0] = ac_to_float(&ctx->ac, src[0]);
885
		if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
886
			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
887
888
		else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
889
890
891
		else
			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
		break;
892
893
894
895
896
897
	case nir_op_bfm:
		result = emit_bfm(&ctx->ac, src[0], src[1]);
		break;
	case nir_op_bitfield_select:
		result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
		break;
898
899
	case nir_op_ubfe:
		result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
900
		break;
901
902
	case nir_op_ibfe:
		result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
903
904
		break;
	case nir_op_bitfield_reverse:
905
		result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
906
907
		break;
	case nir_op_bit_count:
908
		result = ac_build_bit_count(&ctx->ac, src[0]);
909
910
911
912
913
914
915
916
		break;
	case nir_op_vec2:
	case nir_op_vec3:
	case nir_op_vec4:
		for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
			src[i] = ac_to_integer(&ctx->ac, src[i]);
		result = ac_build_gather_values(&ctx->ac, src, num_components);
		break;
917
	case nir_op_f2i8:
918
	case nir_op_f2i16:
919
920
921
922
923
	case nir_op_f2i32:
	case nir_op_f2i64:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
		break;
924
	case nir_op_f2u8:
925
	case nir_op_f2u16:
926
927
928
929
930
	case nir_op_f2u32:
	case nir_op_f2u64:
		src[0] = ac_to_float(&ctx->ac, src[0]);
		result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
		break;
931
	case nir_op_i2f16:
932
933
934
935
	case nir_op_i2f32:
	case nir_op_i2f64:
		result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
		break;
936
	case nir_op_u2f16:
937
938
939
940
	case nir_op_u2f32:
	case nir_op_u2f64:
		result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
		break;
941
	case nir_op_f2f16_rtz:
942
		src[0] = ac_to_float(&ctx->ac, src[0]);
943
944
		if (LLVMTypeOf(src[0]) == ctx->ac.f64)
			src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
945
946
947
		LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
		result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
		result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
948
		break;
949
	case nir_op_f2f16_rtne:
950
	case nir_op_f2f16:
951
	case nir_op_f2f32:
952
	case nir_op_f2f64:
953
		src[0] = ac_to_float(&ctx->ac, src[0]);
954
955
956
957
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
			result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
		else
			result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
958
		break;
959
	case nir_op_u2u8:
960
	case nir_op_u2u16:
961
962
963
964
965
966
967
	case nir_op_u2u32:
	case nir_op_u2u64:
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
			result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
		else
			result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
		break;
968
	case nir_op_i2i8:
969
	case nir_op_i2i16:
970
971
972
973
974
975
976
	case nir_op_i2i32:
	case nir_op_i2i64:
		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
			result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
		else
			result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
		break;
977
	case nir_op_b32csel:
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
		result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
		break;
	case nir_op_find_lsb:
		result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
		break;
	case nir_op_ufind_msb:
		result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
		break;
	case nir_op_ifind_msb:
		result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
		break;
	case nir_op_uadd_carry:
		result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
		break;
	case nir_op_usub_borrow:
		result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
		break;
995
996
997
	case nir_op_b2f16:
	case nir_op_b2f32:
	case nir_op_b2f64:
998
		result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
999
		break;
1000
	case nir_op_f2b32:
1001
1002
		result = emit_f2b(&ctx->ac, src[0]);
		break;
1003
	case nir_op_b2i8:
1004
1005
1006
	case nir_op_b2i16:
	case nir_op_b2i32:
	case nir_op_b2i64:
1007
1008
		result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
		break;
1009
	case nir_op_i2b32:
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
		result = emit_i2b(&ctx->ac, src[0]);
		break;
	case nir_op_fquantize2f16:
		result = emit_f2f16(&ctx->ac, src[0]);
		break;
	case nir_op_umul_high:
		result = emit_umul_high(&ctx->ac, src[0], src[1]);
		break;
	case nir_op_imul_high:
		result = emit_imul_high(&ctx->ac, src[0], src[1]);
		break;
	case nir_op_pack_half_2x16:
1022
1023
1024
1025
1026
1027
1028
		result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
		break;
	case nir_op_pack_snorm_2x16:
		result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
		break;
	case nir_op_pack_unorm_2x16:
		result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
		break;
	case nir_op_unpack_half_2x16:
		result = emit_unpack_half_2x16(&ctx->ac, src[0]);
		break;
	case nir_op_fddx:
	case nir_op_fddy:
	case nir_op_fddx_fine:
	case nir_op_fddy_fine:
	case nir_op_fddx_coarse:
	case nir_op_fddy_coarse:
		result = emit_ddxy(ctx, instr->op, src[0]);
1040
		break;
1041

1042
1043
1044
1045
1046
1047
1048
1049
	case nir_op_unpack_64_2x32_split_x: {
		assert(ac_get_llvm_num_components(src[0]) == 1);
		LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
						    ctx->ac.v2i32,
						    "");
		result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
						 ctx->ac.i32_0, "");
		break;
1050
1051
	}

1052
1053
1054
1055
1056
1057
1058
1059
1060
	case nir_op_unpack_64_2x32_split_y: {
		assert(ac_get_llvm_num_components(src[0]) == 1);
		LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
						    ctx->ac.v2i32,
						    "");
		result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
						 ctx->ac.i32_1, "");
		break;
	}
1061

1062
	case nir_op_pack_64_2x32_split: {
1063
		LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1064
1065
1066
		result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
		break;
	}
1067

1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
	case nir_op_pack_32_2x16_split: {
		LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
		result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
		break;
	}

	case nir_op_unpack_32_2x16_split_x: {
		LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
						    ctx->ac.v2i16,
						    "");
		result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
						 ctx->ac.i32_0, "");
		break;
	}

	case nir_op_unpack_32_2x16_split_y: {
		LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
						    ctx->ac.v2i16,
						    "");
		result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
						 ctx->ac.i32_1, "");
		break;
	}

1092
1093
1094
1095
1096
1097
	case nir_op_cube_face_coord: {
		src[0] = ac_to_float(&ctx->ac, src[0]);
		LLVMValueRef results[2];
		LLVMValueRef in[3];
		for (unsigned chan = 0; chan < 3; chan++)
			in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
Rhys Perry's avatar
Rhys Perry committed
1098
		results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
1099
						ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
Rhys Perry's avatar
Rhys Perry committed
1100
		results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
1101
						ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
Rhys Perry's avatar
Rhys Perry committed