ac_llvm_build.c 135 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_build.h"

#include <llvm-c/Core.h>

#include "c11/threads.h"

#include <assert.h>
#include <stdio.h>

#include "ac_llvm_util.h"
36
#include "ac_exp_param.h"
37
38
#include "util/bitscan.h"
#include "util/macros.h"
39
#include "util/u_atomic.h"
40
#include "util/u_math.h"
41
42
#include "sid.h"

43
44
#include "shader_enums.h"

Timothy Arceri's avatar
Timothy Arceri committed
45
46
47
48
49
50
51
52
53
54
#define AC_LLVM_INITIAL_CF_DEPTH 4

/* Data for if/else/endif and bgnloop/endloop control flow structures.
 */
struct ac_llvm_flow {
	/* Loop exit or next part of if/else/endif. */
	LLVMBasicBlockRef next_block;
	LLVMBasicBlockRef loop_entry_block;
};

55
56
57
58
59
/* Initialize module-independent parts of the context.
 *
 * The caller is responsible for initializing ctx::module and ctx::builder.
 */
void
60
ac_llvm_context_init(struct ac_llvm_context *ctx,
61
		     struct ac_llvm_compiler *compiler,
62
		     enum chip_class chip_class, enum radeon_family family,
63
64
		     enum ac_float_mode float_mode, unsigned wave_size,
		     unsigned ballot_mask_bits)
65
66
67
{
	LLVMValueRef args[1];

68
69
	ctx->context = LLVMContextCreate();

70
	ctx->chip_class = chip_class;
71
	ctx->family = family;
72
	ctx->wave_size = wave_size;
73
	ctx->ballot_mask_bits = ballot_mask_bits;
74
75
76
	ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
						       : compiler->tm,
				       ctx->context);
77
	ctx->builder = ac_create_builder(ctx->context, float_mode);
78
79
80
81

	ctx->voidt = LLVMVoidTypeInContext(ctx->context);
	ctx->i1 = LLVMInt1TypeInContext(ctx->context);
	ctx->i8 = LLVMInt8TypeInContext(ctx->context);
82
	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
83
	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
84
	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
85
	ctx->intptr = ctx->i32;
86
	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
87
	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
88
	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
89
	ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
90
	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
91
	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
92
	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
93
	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
94
	ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
95
	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
96
	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
97
	ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
98
	ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
99

100
101
	ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
	ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
102
103
	ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
	ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
104
105
	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
106
107
	ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
	ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
108
109
	ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
	ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
110
111
	ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
	ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
112
	ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
113
	ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
114

115
116
117
	ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
	ctx->i1true = LLVMConstInt(ctx->i1, 1, false);

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
						     "range", 5);

	ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
							       "invariant.load", 14);

	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);

	args[0] = LLVMConstReal(ctx->f32, 2.5);
	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);

	ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
							"amdgpu.uniform", 14);

	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
133
	ctx->flow = calloc(1, sizeof(*ctx->flow));
134
135
}

Timothy Arceri's avatar
Timothy Arceri committed
136
137
138
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx)
{
139
	free(ctx->flow->stack);
Timothy Arceri's avatar
Timothy Arceri committed
140
141
142
143
	free(ctx->flow);
	ctx->flow = NULL;
}

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
int
ac_get_llvm_num_components(LLVMValueRef value)
{
	LLVMTypeRef type = LLVMTypeOf(value);
	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
	                              ? LLVMGetVectorSize(type)
	                              : 1;
	return num_components;
}

LLVMValueRef
ac_llvm_extract_elem(struct ac_llvm_context *ac,
		     LLVMValueRef value,
		     int index)
{
159
160
	if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
		assert(index == 0);
161
		return value;
162
	}
163
164
165
166
167

	return LLVMBuildExtractElement(ac->builder, value,
				       LLVMConstInt(ac->i32, index, false), "");
}

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
int
ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
{
	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
		type = LLVMGetElementType(type);

	if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
		return LLVMGetIntTypeWidth(type);

	if (type == ctx->f16)
		return 16;
	if (type == ctx->f32)
		return 32;
	if (type == ctx->f64)
		return 64;

	unreachable("Unhandled type kind in get_elem_bits");
}

187
188
189
190
191
192
193
194
unsigned
ac_get_type_size(LLVMTypeRef type)
{
	LLVMTypeKind kind = LLVMGetTypeKind(type);

	switch (kind) {
	case LLVMIntegerTypeKind:
		return LLVMGetIntTypeWidth(type) / 8;
195
196
	case LLVMHalfTypeKind:
		return 2;
197
198
	case LLVMFloatTypeKind:
		return 4;
199
	case LLVMDoubleTypeKind:
200
		return 8;
201
	case LLVMPointerTypeKind:
202
		if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
203
			return 4;
204
205
206
207
208
209
210
211
212
213
214
215
216
		return 8;
	case LLVMVectorTypeKind:
		return LLVMGetVectorSize(type) *
		       ac_get_type_size(LLVMGetElementType(type));
	case LLVMArrayTypeKind:
		return LLVMGetArrayLength(type) *
		       ac_get_type_size(LLVMGetElementType(type));
	default:
		assert(0);
		return 0;
	}
}

217
218
static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
219
220
221
	if (t == ctx->i8)
		return ctx->i8;
	else if (t == ctx->f16 || t == ctx->i16)
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
		return ctx->i16;
	else if (t == ctx->f32 || t == ctx->i32)
		return ctx->i32;
	else if (t == ctx->f64 || t == ctx->i64)
		return ctx->i64;
	else
		unreachable("Unhandled integer size");
}

LLVMTypeRef
ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
	if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
		LLVMTypeRef elem_type = LLVMGetElementType(t);
		return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
		                      LLVMGetVectorSize(t));
	}
239
240
241
242
243
244
245
246
247
248
	if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
		switch (LLVMGetPointerAddressSpace(t)) {
		case AC_ADDR_SPACE_GLOBAL:
			return ctx->i64;
		case AC_ADDR_SPACE_LDS:
			return ctx->i32;
		default:
			unreachable("unhandled address space");
		}
	}
249
250
251
252
253
254
255
	return to_integer_type_scalar(ctx, t);
}

LLVMValueRef
ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
{
	LLVMTypeRef type = LLVMTypeOf(v);
256
257
258
	if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
		return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
	}
259
260
261
	return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
}

262
263
264
265
266
267
268
269
270
LLVMValueRef
ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
{
	LLVMTypeRef type = LLVMTypeOf(v);
	if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
		return v;
	return ac_to_integer(ctx, v);
}

271
272
static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
273
274
275
	if (t == ctx->i8)
		return ctx->i8;
	else if (t == ctx->i16 || t == ctx->f16)
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
		return ctx->f16;
	else if (t == ctx->i32 || t == ctx->f32)
		return ctx->f32;
	else if (t == ctx->i64 || t == ctx->f64)
		return ctx->f64;
	else
		unreachable("Unhandled float size");
}

LLVMTypeRef
ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
{
	if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
		LLVMTypeRef elem_type = LLVMGetElementType(t);
		return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
		                      LLVMGetVectorSize(t));
	}
	return to_float_type_scalar(ctx, t);
}

LLVMValueRef
ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
{
	LLVMTypeRef type = LLVMTypeOf(v);
	return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
}


304
LLVMValueRef
305
306
307
ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
		   LLVMTypeRef return_type, LLVMValueRef *params,
		   unsigned param_count, unsigned attrib_mask)
308
{
309
	LLVMValueRef function, call;
310
	bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329

	function = LLVMGetNamedFunction(ctx->module, name);
	if (!function) {
		LLVMTypeRef param_types[32], function_type;
		unsigned i;

		assert(param_count <= 32);

		for (i = 0; i < param_count; ++i) {
			assert(params[i]);
			param_types[i] = LLVMTypeOf(params[i]);
		}
		function_type =
		    LLVMFunctionType(return_type, param_types, param_count, 0);
		function = LLVMAddFunction(ctx->module, name, function_type);

		LLVMSetFunctionCallConv(function, LLVMCCallConv);
		LLVMSetLinkage(function, LLVMExternalLinkage);

330
331
		if (!set_callsite_attrs)
			ac_add_func_attributes(ctx->context, function, attrib_mask);
332
	}
333
334
335
336
337

	call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
	if (set_callsite_attrs)
		ac_add_func_attributes(ctx->context, call, attrib_mask);
	return call;
338
339
}

340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/**
 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
 * intrinsic names).
 */
void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
{
	LLVMTypeRef elem_type = type;

	assert(bufsize >= 8);

	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
		int ret = snprintf(buf, bufsize, "v%u",
					LLVMGetVectorSize(type));
		if (ret < 0) {
			char *type_name = LLVMPrintTypeToString(type);
			fprintf(stderr, "Error building type name for: %s\n",
				type_name);
357
			LLVMDisposeMessage(type_name);
358
359
360
361
362
363
364
365
366
367
368
			return;
		}
		elem_type = LLVMGetElementType(type);
		buf += ret;
		bufsize -= ret;
	}
	switch (LLVMGetTypeKind(elem_type)) {
	default: break;
	case LLVMIntegerTypeKind:
		snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
		break;
369
370
371
	case LLVMHalfTypeKind:
		snprintf(buf, bufsize, "f16");
		break;
372
373
374
375
376
377
378
379
380
	case LLVMFloatTypeKind:
		snprintf(buf, bufsize, "f32");
		break;
	case LLVMDoubleTypeKind:
		snprintf(buf, bufsize, "f64");
		break;
	}
}

381
382
383
384
385
386
387
388
389
390
391
392
393
394
/**
 * Helper function that builds an LLVM IR PHI node and immediately adds
 * incoming edges.
 */
LLVMValueRef
ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
	     unsigned count_incoming, LLVMValueRef *values,
	     LLVMBasicBlockRef *blocks)
{
	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
	LLVMAddIncoming(phi, values, blocks, count_incoming);
	return phi;
}

Marek Olšák's avatar
Marek Olšák committed
395
396
397
398
399
400
void ac_build_s_barrier(struct ac_llvm_context *ctx)
{
	ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
			   0, AC_FUNC_ATTR_CONVERGENT);
}

401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
/* Prevent optimizations (at least of memory accesses) across the current
 * point in the program by emitting empty inline assembly that is marked as
 * having side effects.
 *
 * Optionally, a value can be passed through the inline assembly to prevent
 * LLVM from hoisting calls to ReadNone functions.
 */
void
ac_build_optimization_barrier(struct ac_llvm_context *ctx,
			      LLVMValueRef *pvgpr)
{
	static int counter = 0;

	LLVMBuilderRef builder = ctx->builder;
	char code[16];

	snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));

	if (!pvgpr) {
		LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
		LLVMBuildCall(builder, inlineasm, NULL, 0, "");
	} else {
		LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
		LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
		LLVMValueRef vgpr = *pvgpr;
		LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
		unsigned vgpr_size = ac_get_type_size(vgpr_type);
		LLVMValueRef vgpr0;

		assert(vgpr_size % 4 == 0);

		vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
		vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
		vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
		vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
		vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");

		*pvgpr = vgpr;
	}
}

443
444
445
LLVMValueRef
ac_build_shader_clock(struct ac_llvm_context *ctx)
{
Marek Olšák's avatar
Marek Olšák committed
446
447
448
	const char *intr = HAVE_LLVM >= 0x0900 && ctx->chip_class >= GFX8 ?
				"llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter";
	LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0);
449
450
451
	return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
}

452
453
454
455
LLVMValueRef
ac_build_ballot(struct ac_llvm_context *ctx,
		LLVMValueRef value)
{
456
457
458
459
460
461
462
463
464
465
	const char *name;

	if (HAVE_LLVM >= 0x900) {
		if (ctx->wave_size == 64)
			name = "llvm.amdgcn.icmp.i64.i32";
		else
			name = "llvm.amdgcn.icmp.i32.i32";
	} else {
		name = "llvm.amdgcn.icmp.i32";
	}
466
467
468
469
470
471
472
473
474
475
476
	LLVMValueRef args[3] = {
		value,
		ctx->i32_0,
		LLVMConstInt(ctx->i32, LLVMIntNE, 0)
	};

	/* We currently have no other way to prevent LLVM from lifting the icmp
	 * calls to a dominating basic block.
	 */
	ac_build_optimization_barrier(ctx, &args[0]);

477
	args[0] = ac_to_integer(ctx, args[0]);
478

479
	return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
480
481
482
483
484
				  AC_FUNC_ATTR_NOUNWIND |
				  AC_FUNC_ATTR_READNONE |
				  AC_FUNC_ATTR_CONVERGENT);
}

Marek Olšák's avatar
Marek Olšák committed
485
486
487
LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
				 LLVMValueRef value)
{
488
	const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i1" : "llvm.amdgcn.icmp.i1";
Marek Olšák's avatar
Marek Olšák committed
489
490
491
492
493
494
	LLVMValueRef args[3] = {
		value,
		ctx->i1false,
		LLVMConstInt(ctx->i32, LLVMIntNE, 0),
	};

495
	return ac_build_intrinsic(ctx, name, ctx->i64, args, 3,
Marek Olšák's avatar
Marek Olšák committed
496
497
498
499
500
				  AC_FUNC_ATTR_NOUNWIND |
				  AC_FUNC_ATTR_READNONE |
				  AC_FUNC_ATTR_CONVERGENT);
}

501
502
503
504
505
506
507
508
509
510
511
512
513
LLVMValueRef
ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
{
	LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
	LLVMValueRef vote_set = ac_build_ballot(ctx, value);
	return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
}

LLVMValueRef
ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
{
	LLVMValueRef vote_set = ac_build_ballot(ctx, value);
	return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
514
			     LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
515
516
517
518
519
520
521
522
523
524
525
526
}

LLVMValueRef
ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
{
	LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
	LLVMValueRef vote_set = ac_build_ballot(ctx, value);

	LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
					 vote_set, active_set, "");
	LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
					  vote_set,
527
					  LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
528
529
530
	return LLVMBuildOr(ctx->builder, all, none, "");
}

531
532
533
534
535
536
537
538
539
540
541
542
543
544
LLVMValueRef
ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
			       unsigned value_count, unsigned component)
{
	LLVMValueRef vec = NULL;

	if (value_count == 1) {
		return values[component];
	} else if (!value_count)
		unreachable("value_count is 0");

	for (unsigned i = component; i < value_count + component; i++) {
		LLVMValueRef value = values[i];

545
		if (i == component)
546
547
548
549
550
551
552
			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
		LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
		vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
	}
	return vec;
}

553
554
555
556
557
LLVMValueRef
ac_build_gather_values_extended(struct ac_llvm_context *ctx,
				LLVMValueRef *values,
				unsigned value_count,
				unsigned value_stride,
558
559
				bool load,
				bool always_vector)
560
561
{
	LLVMBuilderRef builder = ctx->builder;
Marek Olšák's avatar
Marek Olšák committed
562
	LLVMValueRef vec = NULL;
563
564
	unsigned i;

565
	if (value_count == 1 && !always_vector) {
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
		if (load)
			return LLVMBuildLoad(builder, values[0], "");
		return values[0];
	} else if (!value_count)
		unreachable("value_count is 0");

	for (i = 0; i < value_count; i++) {
		LLVMValueRef value = values[i * value_stride];
		if (load)
			value = LLVMBuildLoad(builder, value, "");

		if (!i)
			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
		vec = LLVMBuildInsertElement(builder, vec, value, index, "");
	}
	return vec;
}

LLVMValueRef
ac_build_gather_values(struct ac_llvm_context *ctx,
		       LLVMValueRef *values,
		       unsigned value_count)
{
590
	return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
591
592
}

593
594
/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
 * channels with undef. Extract at most src_channels components from the input.
595
 */
596
597
598
599
600
static LLVMValueRef
ac_build_expand(struct ac_llvm_context *ctx,
		LLVMValueRef value,
		unsigned src_channels,
		unsigned dst_channels)
601
602
{
	LLVMTypeRef elemtype;
603
	LLVMValueRef chan[dst_channels];
604
605
606
607

	if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
		unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));

608
		if (src_channels == dst_channels && vec_size == dst_channels)
609
610
			return value;

611
612
613
		src_channels = MIN2(src_channels, vec_size);

		for (unsigned i = 0; i < src_channels; i++)
614
615
616
617
			chan[i] = ac_llvm_extract_elem(ctx, value, i);

		elemtype = LLVMGetElementType(LLVMTypeOf(value));
	} else {
618
619
		if (src_channels) {
			assert(src_channels == 1);
620
621
622
623
624
			chan[0] = value;
		}
		elemtype = LLVMTypeOf(value);
	}

625
626
627
628
629
	for (unsigned i = src_channels; i < dst_channels; i++)
		chan[i] = LLVMGetUndef(elemtype);

	return ac_build_gather_values(ctx, chan, dst_channels);
}
630

631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
/* Extract components [start, start + channels) from a vector.
 */
LLVMValueRef
ac_extract_components(struct ac_llvm_context *ctx,
		      LLVMValueRef value,
		      unsigned start,
		      unsigned channels)
{
	LLVMValueRef chan[channels];

	for (unsigned i = 0; i < channels; i++)
		chan[i] = ac_llvm_extract_elem(ctx, value, i + start);

	return ac_build_gather_values(ctx, chan, channels);
}

647
648
649
650
651
652
653
654
/* Expand a scalar or vector to <4 x type> by filling the remaining channels
 * with undef. Extract at most num_channels components from the input.
 */
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
				     LLVMValueRef value,
				     unsigned num_channels)
{
	return ac_build_expand(ctx, value, num_channels, 4);
655
656
}

Marek Olšák's avatar
Marek Olšák committed
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
{
	unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
	const char *name;

	if (type_size == 2)
		name = "llvm.rint.f16";
	else if (type_size == 4)
		name = "llvm.rint.f32";
	else
		name = "llvm.rint.f64";

	return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
				  AC_FUNC_ATTR_READNONE);
}

673
LLVMValueRef
674
675
676
ac_build_fdiv(struct ac_llvm_context *ctx,
	      LLVMValueRef num,
	      LLVMValueRef den)
677
{
678
679
680
681
682
683
	/* If we do (num / den), LLVM >= 7.0 does:
	 *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
	 *
	 * If we do (num * (1 / den)), LLVM does:
	 *    return num * v_rcp_f32(den);
	 */
684
	LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
Marek Olšák's avatar
Marek Olšák committed
685
	LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
686
	LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
687

688
	/* Use v_rcp_f32 instead of precise division. */
689
690
691
692
693
	if (!LLVMIsConstant(ret))
		LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
	return ret;
}

694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
/* See fast_idiv_by_const.h. */
/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
				LLVMValueRef num,
				LLVMValueRef multiplier,
				LLVMValueRef pre_shift,
				LLVMValueRef post_shift,
				LLVMValueRef increment)
{
	LLVMBuilderRef builder = ctx->builder;

	num = LLVMBuildLShr(builder, num, pre_shift, "");
	num = LLVMBuildMul(builder,
			   LLVMBuildZExt(builder, num, ctx->i64, ""),
			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
	num = LLVMBuildAdd(builder, num,
			   LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
	return LLVMBuildLShr(builder, num, post_shift, "");
}

/* See fast_idiv_by_const.h. */
/* If num != UINT_MAX, this more efficient version can be used. */
/* Set: increment = util_fast_udiv_info::increment; */
LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
				    LLVMValueRef num,
				    LLVMValueRef multiplier,
				    LLVMValueRef pre_shift,
				    LLVMValueRef post_shift,
				    LLVMValueRef increment)
{
	LLVMBuilderRef builder = ctx->builder;

	num = LLVMBuildLShr(builder, num, pre_shift, "");
	num = LLVMBuildNUWAdd(builder, num, increment, "");
	num = LLVMBuildMul(builder,
			   LLVMBuildZExt(builder, num, ctx->i64, ""),
			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
	return LLVMBuildLShr(builder, num, post_shift, "");
}

/* See fast_idiv_by_const.h. */
/* Both operands must fit in 31 bits and the divisor must not be 1. */
LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
					      LLVMValueRef num,
					      LLVMValueRef multiplier,
					      LLVMValueRef post_shift)
{
	LLVMBuilderRef builder = ctx->builder;

	num = LLVMBuildMul(builder,
			   LLVMBuildZExt(builder, num, ctx->i64, ""),
			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
	return LLVMBuildLShr(builder, num, post_shift, "");
}

755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
 * already multiplied by two. id is the cube face number.
 */
struct cube_selection_coords {
	LLVMValueRef stc[2];
	LLVMValueRef ma;
	LLVMValueRef id;
};

static void
build_cube_intrinsic(struct ac_llvm_context *ctx,
		     LLVMValueRef in[3],
		     struct cube_selection_coords *out)
{
770
771
772
773
774
775
776
777
778
779
	LLVMTypeRef f32 = ctx->f32;

	out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
					 f32, in, 3, AC_FUNC_ATTR_READNONE);
	out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
					 f32, in, 3, AC_FUNC_ATTR_READNONE);
	out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
				     f32, in, 3, AC_FUNC_ATTR_READNONE);
	out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
				     f32, in, 3, AC_FUNC_ATTR_READNONE);
780
781
782
783
784
785
786
787
788
789
790
}

/**
 * Build a manual selection sequence for cube face sc/tc coordinates and
 * major axis vector (multiplied by 2 for consistency) for the given
 * vec3 \p coords, for the face implied by \p selcoords.
 *
 * For the major axis, we always adjust the sign to be in the direction of
 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
 * the selcoords major axis.
 */
791
static void build_cube_select(struct ac_llvm_context *ctx,
792
793
794
795
796
			      const struct cube_selection_coords *selcoords,
			      const LLVMValueRef *coords,
			      LLVMValueRef *out_st,
			      LLVMValueRef *out_ma)
{
797
	LLVMBuilderRef builder = ctx->builder;
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
	LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
	LLVMValueRef is_ma_positive;
	LLVMValueRef sgn_ma;
	LLVMValueRef is_ma_z, is_not_ma_z;
	LLVMValueRef is_ma_y;
	LLVMValueRef is_ma_x;
	LLVMValueRef sgn;
	LLVMValueRef tmp;

	is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
		selcoords->ma, LLVMConstReal(f32, 0.0), "");
	sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
		LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");

	is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
	is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
	is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
		LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");

	/* Select sc */
819
	tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
820
	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
821
		LLVMBuildSelect(builder, is_ma_z, sgn_ma,
822
823
824
825
826
			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");

	/* Select tc */
	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
827
	sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
828
829
830
831
832
833
		LLVMConstReal(f32, -1.0), "");
	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");

	/* Select ma */
	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
		LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
834
835
836
	tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
				 ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
	*out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
837
838
839
840
}

void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
841
		       bool is_deriv, bool is_array, bool is_lod,
842
843
844
845
846
847
848
849
850
		       LLVMValueRef *coords_arg,
		       LLVMValueRef *derivs_arg)
{

	LLVMBuilderRef builder = ctx->builder;
	struct cube_selection_coords selcoords;
	LLVMValueRef coords[3];
	LLVMValueRef invma;

851
	if (is_array && !is_lod) {
Marek Olšák's avatar
Marek Olšák committed
852
		LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
853
854
855
856
857
858
859
860
861
862
863
864
865

		/* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
		 *
		 *    "For Array forms, the array layer used will be
		 *
		 *       max(0, min(d−1, floor(layer+0.5)))
		 *
		 *     where d is the depth of the texture array and layer
		 *     comes from the component indicated in the tables below.
		 *     Workaroudn for an issue where the layer is taken from a
		 *     helper invocation which happens to fall on a different
		 *     layer due to extrapolation."
		 *
866
		 * GFX8 and earlier attempt to implement this in hardware by
867
868
869
870
871
872
		 * clamping the value of coords[2] = (8 * layer) + face.
		 * Unfortunately, this means that the we end up with the wrong
		 * face when clamping occurs.
		 *
		 * Clamp the layer earlier to work around the issue.
		 */
873
		if (ctx->chip_class <= GFX8) {
874
875
876
877
878
879
			LLVMValueRef ge0;
			ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
			tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
		}

		coords_arg[3] = tmp;
880
881
	}

882
883
	build_cube_intrinsic(ctx, coords_arg, &selcoords);

884
	invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
885
			ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
886
	invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922

	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");

	coords[2] = selcoords.id;

	if (is_deriv && derivs_arg) {
		LLVMValueRef derivs[4];
		int axis;

		/* Convert cube derivatives to 2D derivatives. */
		for (axis = 0; axis < 2; axis++) {
			LLVMValueRef deriv_st[2];
			LLVMValueRef deriv_ma;

			/* Transform the derivative alongside the texture
			 * coordinate. Mathematically, the correct formula is
			 * as follows. Assume we're projecting onto the +Z face
			 * and denote by dx/dh the derivative of the (original)
			 * X texture coordinate with respect to horizontal
			 * window coordinates. The projection onto the +Z face
			 * plane is:
			 *
			 *   f(x,z) = x/z
			 *
			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
			 *
			 * This motivatives the implementation below.
			 *
			 * Whether this actually gives the expected results for
			 * apps that might feed in derivatives obtained via
			 * finite differences is anyone's guess. The OpenGL spec
			 * seems awfully quiet about how textureGrad for cube
			 * maps should be handled.
			 */
923
			build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
					  deriv_st, &deriv_ma);

			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");

			for (int i = 0; i < 2; ++i)
				derivs[axis * 2 + i] =
					LLVMBuildFSub(builder,
						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
		}

		memcpy(derivs_arg, derivs, sizeof(derivs));
	}

	/* Shift the texture coordinate. This must be applied after the
	 * derivative calculation.
	 */
	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");

	if (is_array) {
		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
		/* coords_arg.w component - array_index for cube arrays */
Marek Olšák's avatar
Marek Olšák committed
947
		coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
	}

	memcpy(coords_arg, coords, sizeof(coords));
}


LLVMValueRef
ac_build_fs_interp(struct ac_llvm_context *ctx,
		   LLVMValueRef llvm_chan,
		   LLVMValueRef attr_number,
		   LLVMValueRef params,
		   LLVMValueRef i,
		   LLVMValueRef j)
{
	LLVMValueRef args[5];
	LLVMValueRef p1;

	args[0] = i;
	args[1] = llvm_chan;
	args[2] = attr_number;
	args[3] = params;

970
971
	p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
				ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
972
973
974
975
976
977
978

	args[0] = p1;
	args[1] = j;
	args[2] = llvm_chan;
	args[3] = attr_number;
	args[4] = params;

979
980
	return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
				  ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
981
982
}

983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
LLVMValueRef
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
		       LLVMValueRef llvm_chan,
		       LLVMValueRef attr_number,
		       LLVMValueRef params,
		       LLVMValueRef i,
		       LLVMValueRef j)
{
	LLVMValueRef args[6];
	LLVMValueRef p1;

	args[0] = i;
	args[1] = llvm_chan;
	args[2] = attr_number;
	args[3] = ctx->i1false;
	args[4] = params;

	p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
				ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);

	args[0] = p1;
	args[1] = j;
	args[2] = llvm_chan;
	args[3] = attr_number;
	args[4] = ctx->i1false;
	args[5] = params;

	return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
				  ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
}

1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
		       LLVMValueRef parameter,
		       LLVMValueRef llvm_chan,
		       LLVMValueRef attr_number,
		       LLVMValueRef params)
{
	LLVMValueRef args[4];

	args[0] = parameter;
	args[1] = llvm_chan;
	args[2] = attr_number;
	args[3] = params;

1028
1029
	return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
				  ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
1030
1031
}

1032
1033
1034
1035
1036
1037
1038
1039
LLVMValueRef
ac_build_gep_ptr(struct ac_llvm_context *ctx,
	         LLVMValueRef base_ptr,
	         LLVMValueRef index)
{
	return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
}

1040
1041
1042
1043
1044
1045
LLVMValueRef
ac_build_gep0(struct ac_llvm_context *ctx,
	      LLVMValueRef base_ptr,
	      LLVMValueRef index)
{
	LLVMValueRef indices[2] = {
Marek Olšák's avatar
Marek Olšák committed
1046
		ctx->i32_0,
1047
1048
		index,
	};
Marek Olšák's avatar
Marek Olšák committed
1049
	return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
1050
1051
}

1052
1053
1054
1055
LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
				  LLVMValueRef index)
{
	return LLVMBuildPointerCast(ctx->builder,
1056
				    LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
1057
1058
1059
				    LLVMTypeOf(ptr), "");
}

1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
void
ac_build_indexed_store(struct ac_llvm_context *ctx,
		       LLVMValueRef base_ptr, LLVMValueRef index,
		       LLVMValueRef value)
{
	LLVMBuildStore(ctx->builder, value,
		       ac_build_gep0(ctx, base_ptr, index));
}

/**
 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
 * It's equivalent to doing a load from &base_ptr[index].
 *
 * \param base_ptr  Where the array starts.
 * \param index     The element index into the array.
 * \param uniform   Whether the base_ptr and index can be assumed to be
1076
1077
 *                  dynamically uniform (i.e. load to an SGPR)
 * \param invariant Whether the load is invariant (no other opcodes affect it)
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
 * \param no_unsigned_wraparound
 *    For all possible re-associations and re-distributions of an expression
 *    "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
 *    without inbounds in base_ptr), this parameter is true if "addr + offset"
 *    does not result in an unsigned integer wraparound. This is used for
 *    optimal code generation of 32-bit pointer arithmetic.
 *
 *    For example, a 32-bit immediate offset that causes a 32-bit unsigned
 *    integer wraparound can't be an imm offset in s_load_dword, because
 *    the instruction performs "addr + offset" in 64 bits.
 *
 *    Expected usage for bindless textures by chaining GEPs:
 *      // possible unsigned wraparound, don't use InBounds:
 *      ptr1 = LLVMBuildGEP(base_ptr, index);
 *      image = load(ptr1); // becomes "s_load ptr1, 0"
 *
 *      ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
 *      sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
1096
 */
1097
1098
static LLVMValueRef
ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1099
1100
		     LLVMValueRef index, bool uniform, bool invariant,
		     bool no_unsigned_wraparound)
1101
{
1102
	LLVMValueRef pointer, result;
1103
1104

	if (no_unsigned_wraparound &&
1105
	    LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
1106
		pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
1107
	else
1108
		pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
1109
1110
1111

	if (uniform)
		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
1112
1113
1114
1115
	result = LLVMBuildLoad(ctx->builder, pointer, "");
	if (invariant)
		LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
	return result;
1116
1117
}

1118
1119
LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
			   LLVMValueRef index)
1120
{
1121
	return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
1122
1123
1124
1125
1126
}

LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
				     LLVMValueRef base_ptr, LLVMValueRef index)
{
1127
	return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
1128
1129
}

1130
1131
/* This assumes that there is no unsigned integer wraparound during the address
 * computation, excluding all GEPs within base_ptr. */
1132
1133
1134
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
				   LLVMValueRef base_ptr, LLVMValueRef index)
{
1135
1136
1137
1138
1139
1140
1141
1142
	return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
}

/* See ac_build_load_custom() documentation. */
LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
				   LLVMValueRef base_ptr, LLVMValueRef index)
{
	return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
1143
1144
}

1145
1146
static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
				      unsigned cache_policy)
1147
{
1148
1149
	return cache_policy |
	       (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
1150
1151
}

1152
1153
1154
1155
1156
1157
1158
1159
static void
ac_build_llvm8_buffer_store_common(struct ac_llvm_context *ctx,
				   LLVMValueRef rsrc,
				   LLVMValueRef data,
				   LLVMValueRef vindex,
				   LLVMValueRef voffset,
				   LLVMValueRef soffset,
				   unsigned num_channels,
1160
				   LLVMTypeRef return_channel_type,
1161
				   unsigned cache_policy,
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
				   bool use_format,
				   bool structurized)
{
	LLVMValueRef args[6];
	int idx = 0;
	args[idx++] = data;
	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
	if (structurized)
		args[idx++] = vindex ? vindex : ctx->i32_0;
	args[idx++] = voffset ? voffset : ctx->i32_0;
	args[idx++] = soffset ? soffset : ctx->i32_0;
1173
	args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
1174
	unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
1175
	const char *indexing_kind = structurized ? "struct" : "raw";
1176
1177
1178
1179
	char name[256], type_name[8];

	LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type;
	ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
1180
1181
1182

	if (use_format) {
		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
1183
			 indexing_kind, type_name);
1184
1185
	} else {
		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
1186
			 indexing_kind, type_name);
1187
1188
1189
	}

	ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
1190
			   AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
1191
1192
1193
1194
1195
1196
1197
1198
1199
}

void
ac_build_buffer_store_format(struct ac_llvm_context *ctx,
			     LLVMValueRef rsrc,
			     LLVMValueRef data,
			     LLVMValueRef vindex,
			     LLVMValueRef voffset,
			     unsigned num_channels,
1200
			     unsigned cache_policy)
1201
{
1202
1203
1204
1205
	ac_build_llvm8_buffer_store_common(ctx, rsrc, data, vindex,
					   voffset, NULL, num_channels,
					   ctx->f32, cache_policy,
					   true, true);
1206
1207
}

1208
1209
1210
1211
1212
/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
 * or v4i32 (num_channels=3,4).
 */
void
1213
1214
1215
1216
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
			    LLVMValueRef rsrc,
			    LLVMValueRef vdata,
			    unsigned num_channels,
1217
			    LLVMValueRef voffset,
1218
1219
			    LLVMValueRef soffset,
			    unsigned inst_offset,
1220
			    unsigned cache_policy,
1221
			    bool swizzle_enable_hint)
1222
{
1223
	/* Split 3 channel stores, because only LLVM 9+ support 3-channel
1224
	 * intrinsics. */
1225
	if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
1226
1227
1228
1229
1230
1231
1232
1233
1234
		LLVMValueRef v[3], v01;

		for (int i = 0; i < 3; i++) {
			v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
					LLVMConstInt(ctx->i32, i, 0), "");
		}
		v01 = ac_build_gather_values(ctx, v, 2);

		ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
1235
					    soffset, inst_offset, cache_policy,
1236
					    swizzle_enable_hint);
1237
1238
		ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
					    soffset, inst_offset + 8,
1239
					    cache_policy,
1240
					    swizzle_enable_hint);
1241
1242
		return;
	}
1243

1244
1245
1246
1247
	/* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
	 * (voffset is swizzled, but soffset isn't swizzled).
	 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
	 */
1248
1249
1250
1251
1252
1253
1254
	if (!swizzle_enable_hint) {
		LLVMValueRef offset = soffset;

		if (inst_offset)
			offset = LLVMBuildAdd(ctx->builder, offset,
					      LLVMConstInt(ctx->i32, inst_offset, 0), "");

1255
1256
1257
1258
1259
1260
1261
1262
		ac_build_llvm8_buffer_store_common(ctx, rsrc,
						   ac_to_float(ctx, vdata),
						   ctx->i32_0,
						   voffset, offset,
						   num_channels,
						   ctx->f32,
						   cache_policy,
						   false, false);
1263
		return;
1264
1265
	}

1266
	static const unsigned dfmts[] = {
1267
1268
1269
1270
1271
		V_008F0C_BUF_DATA_FORMAT_32,
		V_008F0C_BUF_DATA_FORMAT_32_32,
		V_008F0C_BUF_DATA_FORMAT_32_32_32,
		V_008F0C_BUF_DATA_FORMAT_32_32_32_32
	};
1272
1273
1274
	unsigned dfmt = dfmts[num_channels - 1];
	unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
	LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
1275

1276
	ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
1277
			           immoffset, num_channels, dfmt, nfmt, cache_policy);
1278
1279
}

1280
1281
1282
1283
1284
1285
1286
static LLVMValueRef
ac_build_llvm8_buffer_load_common(struct ac_llvm_context *ctx,
				  LLVMValueRef rsrc,
				  LLVMValueRef vindex,
				  LLVMValueRef voffset,
				  LLVMValueRef soffset,
				  unsigned num_channels,
1287
				  LLVMTypeRef channel_type,
1288
				  unsigned cache_policy,
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
				  bool can_speculate,
				  bool use_format,
				  bool structurized)
{
	LLVMValueRef args[5];
	int idx = 0;
	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
	if (structurized)
		args[idx++] = vindex ? vindex : ctx->i32_0;
	args[idx++] = voffset ? voffset : ctx->i32_0;
	args[idx++] = soffset ? soffset : ctx->i32_0;
1300
	args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
1301
	unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
1302
	const char *indexing_kind = structurized ? "struct" : "raw";
1303
1304
1305
1306
	char name[256], type_name[8];

	LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
	ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
1307
1308
1309

	if (use_format) {
		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
1310
			 indexing_kind, type_name);
1311
1312
	} else {
		snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
1313
			 indexing_kind, type_name);
1314
1315
	}

1316
	return ac_build_intrinsic(ctx, name, type, args, idx,
1317
1318
1319
				  ac_get_load_intr_attribs(can_speculate));
}

1320
1321
1322
1323
1324
1325
1326
1327
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
		     LLVMValueRef rsrc,
		     int num_channels,
		     LLVMValueRef vindex,
		     LLVMValueRef voffset,
		     LLVMValueRef soffset,
		     unsigned inst_offset,
1328
		     unsigned cache_policy,
1329
1330
		     bool can_speculate,
		     bool allow_smem)
1331
{
1332
1333
1334
1335
1336
1337
	LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
	if (voffset)
		offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
	if (soffset)
		offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");

1338
	if (allow_smem && !(cache_policy & ac_slc) &&
1339
	    (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
1340
1341
		assert(vindex == NULL);

1342
		LLVMValueRef result[8];
1343
1344
1345
1346
1347
1348

		for (int i = 0; i < num_channels; i++) {
			if (i) {
				offset