si_pipe.c 44.8 KB
Newer Older
Tom Stellard's avatar
Tom Stellard committed
1
2
/*
 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
Marek Olšák's avatar
Marek Olšák committed
3
4
 * Copyright 2018 Advanced Micro Devices, Inc.
 * All Rights Reserved.
Tom Stellard's avatar
Tom Stellard committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
25
26
27

#include "si_pipe.h"
#include "si_public.h"
28
#include "si_shader_internal.h"
29
#include "si_compute.h"
30
#include "sid.h"
31

32
#include "ac_llvm_util.h"
33
#include "radeon/radeon_uvd.h"
34
#include "util/disk_cache.h"
35
#include "util/u_log.h"
Tom Stellard's avatar
Tom Stellard committed
36
#include "util/u_memory.h"
Bas Nieuwenhuizen's avatar
Bas Nieuwenhuizen committed
37
#include "util/u_suballoc.h"
38
#include "util/u_tests.h"
39
#include "util/u_upload_mgr.h"
40
#include "util/xmlconfig.h"
Tom Stellard's avatar
Tom Stellard committed
41
#include "vl/vl_decoder.h"
42
#include "driver_ddebug/dd_util.h"
Tom Stellard's avatar
Tom Stellard committed
43

44
45
46
47
#include "gallium/winsys/radeon/drm/radeon_drm_public.h"
#include "gallium/winsys/amdgpu/drm/amdgpu_public.h"
#include <xf86drm.h>

48
49
#include <llvm/Config/llvm-config.h>

50
51
52
static struct pipe_context *si_create_context(struct pipe_screen *screen,
                                              unsigned flags);

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
static const struct debug_named_value debug_options[] = {
	/* Shader logging options: */
	{ "vs", DBG(VS), "Print vertex shaders" },
	{ "ps", DBG(PS), "Print pixel shaders" },
	{ "gs", DBG(GS), "Print geometry shaders" },
	{ "tcs", DBG(TCS), "Print tessellation control shaders" },
	{ "tes", DBG(TES), "Print tessellation evaluation shaders" },
	{ "cs", DBG(CS), "Print compute shaders" },
	{ "noir", DBG(NO_IR), "Don't print the LLVM IR"},
	{ "notgsi", DBG(NO_TGSI), "Don't print the TGSI"},
	{ "noasm", DBG(NO_ASM), "Don't print disassembled shaders"},
	{ "preoptir", DBG(PREOPT_IR), "Print the LLVM IR before initial optimizations" },

	/* Shader compiler options the shader cache should be aware of: */
	{ "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." },
68
	{ "gisel", DBG(GISEL), "Enable LLVM global instruction selector." },
69
70
71
72
73
74
	{ "w32ge", DBG(W32_GE), "Use Wave32 for vertex, tessellation, and geometry shaders." },
	{ "w32ps", DBG(W32_PS), "Use Wave32 for pixel shaders." },
	{ "w32cs", DBG(W32_CS), "Use Wave32 for computes shaders." },
	{ "w64ge", DBG(W64_GE), "Use Wave64 for vertex, tessellation, and geometry shaders." },
	{ "w64ps", DBG(W64_PS), "Use Wave64 for pixel shaders." },
	{ "w64cs", DBG(W64_CS), "Use Wave64 for computes shaders." },
75
76
77
78
79
80
81
82
83
84
85
86
87

	/* Shader compiler options (with no effect on the shader cache): */
	{ "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" },
	{ "mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand" },
	{ "nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants." },

	/* Information logging options: */
	{ "info", DBG(INFO), "Print driver information" },
	{ "tex", DBG(TEX), "Print texture info" },
	{ "compute", DBG(COMPUTE), "Print compute info" },
	{ "vm", DBG(VM), "Print virtual addresses when creating resources" },

	/* Driver options: */
88
89
	{ "forcedma", DBG(FORCE_SDMA), "Use SDMA for all operations when possible." },
	{ "nodma", DBG(NO_SDMA), "Disable SDMA" },
90
	{ "nodmaclear", DBG(NO_SDMA_CLEARS), "Disable SDMA clears" },
91
	{ "nodmacopyimage", DBG(NO_SDMA_COPY_IMAGE), "Disable SDMA image copies" },
92
93
94
	{ "nowc", DBG(NO_WC), "Disable GTT write combining" },
	{ "check_vm", DBG(CHECK_VM), "Check VM faults and dump debug info." },
	{ "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." },
95
	{ "zerovram", DBG(ZERO_VRAM), "Clear VRAM allocations." },
96
97

	/* 3D engine options: */
98
	{ "nogfx", DBG(NO_GFX), "Disable graphics. Only multimedia compute paths can be used." },
99
	{ "nongg", DBG(NO_NGG), "Disable NGG and use the legacy pipeline." },
100
101
102
	{ "alwayspd", DBG(ALWAYS_PD), "Always enable the primitive discard compute shader." },
	{ "pd", DBG(PD), "Enable the primitive discard compute shader for large draw calls." },
	{ "nopd", DBG(NO_PD), "Disable the primitive discard compute shader." },
103
104
105
106
107
108
109
110
111
112
113
114
115
116
	{ "switch_on_eop", DBG(SWITCH_ON_EOP), "Program WD/IA to switch on end-of-packet." },
	{ "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" },
	{ "nodpbb", DBG(NO_DPBB), "Disable DPBB." },
	{ "nodfsm", DBG(NO_DFSM), "Disable DFSM." },
	{ "dpbb", DBG(DPBB), "Enable DPBB." },
	{ "dfsm", DBG(DFSM), "Enable DFSM." },
	{ "nohyperz", DBG(NO_HYPERZ), "Disable Hyper-Z" },
	{ "norbplus", DBG(NO_RB_PLUS), "Disable RB+." },
	{ "no2d", DBG(NO_2D_TILING), "Disable 2D tiling" },
	{ "notiling", DBG(NO_TILING), "Disable tiling" },
	{ "nodcc", DBG(NO_DCC), "Disable DCC." },
	{ "nodccclear", DBG(NO_DCC_CLEAR), "Disable DCC fast clear." },
	{ "nodccfb", DBG(NO_DCC_FB), "Disable separate DCC on the main framebuffer" },
	{ "nodccmsaa", DBG(NO_DCC_MSAA), "Disable DCC for MSAA" },
117
	{ "nofmask", DBG(NO_FMASK), "Disable MSAA compression" },
118
119
120
121
122
123

	/* Tests: */
	{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
	{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
	{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
	{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
124
	{ "testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance" },
125
	{ "testgds", DBG(TEST_GDS), "Test GDS." },
126
127
	{ "testgdsmm", DBG(TEST_GDS_MM), "Test GDS memory management." },
	{ "testgdsoamm", DBG(TEST_GDS_OA_MM), "Test GDS OA memory management." },
128
129
130
131

	DEBUG_NAMED_VALUE_END /* must be last */
};

132
void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler)
133
{
134
135
136
	/* Only create the less-optimizing version of the compiler on APUs
	 * predating Ryzen (Raven). */
	bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
137
				       sscreen->info.chip_class <= GFX8;
138

139
140
	enum ac_target_machine_options tm_options =
		(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
141
		(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
142
143
		(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
		(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
144
		(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
145
146
		(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
		(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
147

148
	ac_init_llvm_once();
149
	ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
150
	compiler->passes = ac_create_llvm_passes(compiler->tm);
151

152
153
	if (compiler->tm_wave32)
		compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32);
154
155
	if (compiler->low_opt_tm)
		compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
156
157
}

158
static void si_destroy_compiler(struct ac_llvm_compiler *compiler)
159
{
160
	ac_destroy_llvm_compiler(compiler);
161
162
}

Tom Stellard's avatar
Tom Stellard committed
163
164
165
/*
 * pipe_context
 */
166
static void si_destroy_context(struct pipe_context *context)
Tom Stellard's avatar
Tom Stellard committed
167
{
168
	struct si_context *sctx = (struct si_context *)context;
169
	int i;
Tom Stellard's avatar
Tom Stellard committed
170

171
172
173
	util_queue_finish(&sctx->screen->shader_compiler_queue);
	util_queue_finish(&sctx->screen->shader_compiler_queue_low_priority);

174
175
176
177
	/* Unreference the framebuffer normally to disable related logic
	 * properly.
	 */
	struct pipe_framebuffer_state fb = {};
178
179
	if (context->set_framebuffer_state)
		context->set_framebuffer_state(context, &fb);
180

181
	si_release_all_descriptors(sctx);
182

183
	if (sctx->chip_class >= GFX10 && sctx->has_graphics)
184
185
		gfx10_destroy_query(sctx);

186
187
	pipe_resource_reference(&sctx->esgs_ring, NULL);
	pipe_resource_reference(&sctx->gsvs_ring, NULL);
188
	pipe_resource_reference(&sctx->tess_rings, NULL);
189
	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
190
	pipe_resource_reference(&sctx->sample_pos_buffer, NULL);
191
	si_resource_reference(&sctx->border_color_buffer, NULL);
192
	free(sctx->border_color_table);
193
194
195
	si_resource_reference(&sctx->scratch_buffer, NULL);
	si_resource_reference(&sctx->compute_scratch_buffer, NULL);
	si_resource_reference(&sctx->wait_mem_scratch, NULL);
196

197
	si_pm4_free_state(sctx, sctx->init_config, ~0);
198
199
	if (sctx->init_config_gs_rings)
		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
Jan Vesely's avatar
Jan Vesely committed
200
	for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
201
		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
202

203
	if (sctx->fixed_func_tcs_shader.cso)
204
		sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
205
	if (sctx->custom_dsa_flush)
206
		sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
207
	if (sctx->custom_blend_resolve)
208
		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_resolve);
209
	if (sctx->custom_blend_fmask_decompress)
210
		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_fmask_decompress);
211
	if (sctx->custom_blend_eliminate_fastclear)
212
		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_eliminate_fastclear);
213
	if (sctx->custom_blend_dcc_decompress)
214
		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_dcc_decompress);
215
	if (sctx->vs_blit_pos)
216
		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos);
217
	if (sctx->vs_blit_pos_layered)
218
		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos_layered);
219
	if (sctx->vs_blit_color)
220
		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color);
221
	if (sctx->vs_blit_color_layered)
222
		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color_layered);
223
	if (sctx->vs_blit_texcoord)
224
		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord);
225
226
227
228
	if (sctx->cs_clear_buffer)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer);
	if (sctx->cs_copy_buffer)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer);
229
230
231
232
	if (sctx->cs_copy_image)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image);
	if (sctx->cs_copy_image_1d_array)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_image_1d_array);
233
234
235
236
	if (sctx->cs_clear_render_target)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target);
	if (sctx->cs_clear_render_target_1d_array)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_render_target_1d_array);
237
238
	if (sctx->cs_clear_12bytes_buffer)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_12bytes_buffer);
239
240
	if (sctx->cs_dcc_retile)
		sctx->b.delete_compute_state(&sctx->b, sctx->cs_dcc_retile);
Tom Stellard's avatar
Tom Stellard committed
241

242
243
244
245
246
247
248
249
250
	for (unsigned i = 0; i < ARRAY_SIZE(sctx->cs_fmask_expand); i++) {
		for (unsigned j = 0; j < ARRAY_SIZE(sctx->cs_fmask_expand[i]); j++) {
			if (sctx->cs_fmask_expand[i][j]) {
				sctx->b.delete_compute_state(&sctx->b,
							     sctx->cs_fmask_expand[i][j]);
			}
		}
	}

251
252
	if (sctx->blitter)
		util_blitter_destroy(sctx->blitter);
Tom Stellard's avatar
Tom Stellard committed
253

254
	/* Release DCC stats. */
255
256
	for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {
		assert(!sctx->dcc_stats[i].query_active);
257

258
259
260
261
		for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++)
			if (sctx->dcc_stats[i].ps_stats[j])
				sctx->b.destroy_query(&sctx->b,
							sctx->dcc_stats[i].ps_stats[j]);
262

263
		si_texture_reference(&sctx->dcc_stats[i].tex, NULL);
264
265
	}

266
267
	if (sctx->query_result_shader)
		sctx->b.delete_compute_state(&sctx->b, sctx->query_result_shader);
268
269
	if (sctx->sh_query_result_shader)
		sctx->b.delete_compute_state(&sctx->b, sctx->sh_query_result_shader);
270

271
272
	if (sctx->gfx_cs)
		sctx->ws->cs_destroy(sctx->gfx_cs);
273
274
	if (sctx->sdma_cs)
		sctx->ws->cs_destroy(sctx->sdma_cs);
275
276
	if (sctx->ctx)
		sctx->ws->ctx_destroy(sctx->ctx);
277

278
279
280
281
282
283
	if (sctx->b.stream_uploader)
		u_upload_destroy(sctx->b.stream_uploader);
	if (sctx->b.const_uploader)
		u_upload_destroy(sctx->b.const_uploader);
	if (sctx->cached_gtt_allocator)
		u_upload_destroy(sctx->cached_gtt_allocator);
284

285
286
	slab_destroy_child(&sctx->pool_transfers);
	slab_destroy_child(&sctx->pool_transfers_unsync);
287

288
289
	if (sctx->allocator_zeroed_memory)
		u_suballocator_destroy(sctx->allocator_zeroed_memory);
290

291
292
	sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
	sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
293
	sctx->ws->fence_reference(&sctx->last_ib_barrier_fence, NULL);
294
	si_resource_reference(&sctx->eop_bug_scratch, NULL);
295
296
297
298
299
	si_resource_reference(&sctx->index_ring, NULL);
	si_resource_reference(&sctx->barrier_buf, NULL);
	si_resource_reference(&sctx->last_ib_barrier_buf, NULL);
	pb_reference(&sctx->gds, NULL);
	pb_reference(&sctx->gds_oa, NULL);
300

301
	si_destroy_compiler(&sctx->compiler);
302

303
	si_saved_cs_reference(&sctx->current_saved_cs, NULL);
304

305
306
307
308
309
	_mesa_hash_table_destroy(sctx->tex_handles, NULL);
	_mesa_hash_table_destroy(sctx->img_handles, NULL);

	util_dynarray_fini(&sctx->resident_tex_handles);
	util_dynarray_fini(&sctx->resident_img_handles);
310
311
	util_dynarray_fini(&sctx->resident_tex_needs_color_decompress);
	util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
312
	util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress);
313
	si_unref_sdma_uploads(sctx);
314
	free(sctx->sdma_uploads);
315
	FREE(sctx);
Tom Stellard's avatar
Tom Stellard committed
316
317
}

318
static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx)
319
320
{
	struct si_context *sctx = (struct si_context *)ctx;
321
	struct si_screen *sscreen = sctx->screen;
322
	enum pipe_reset_status status = sctx->ws->ctx_query_reset_status(sctx->ctx);
323

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
	if (status != PIPE_NO_RESET) {
		/* Call the state tracker to set a no-op API dispatch. */
		if (sctx->device_reset_callback.reset) {
			sctx->device_reset_callback.reset(sctx->device_reset_callback.data,
							  status);
		}

		/* Re-create the auxiliary context, because it won't submit
		 * any new IBs due to a GPU reset.
		 */
		simple_mtx_lock(&sscreen->aux_context_lock);

		struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
		sscreen->aux_context->set_log_context(sscreen->aux_context, NULL);
		sscreen->aux_context->destroy(sscreen->aux_context);

		sscreen->aux_context = si_create_context(&sscreen->b,
			(sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) |
			(sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY));
		sscreen->aux_context->set_log_context(sscreen->aux_context, aux_log);
		simple_mtx_unlock(&sscreen->aux_context_lock);
345
346
	}
	return status;
347
348
349
350
351
352
353
354
}

static void si_set_device_reset_callback(struct pipe_context *ctx,
					   const struct pipe_device_reset_callback *cb)
{
	struct si_context *sctx = (struct si_context *)ctx;

	if (cb)
355
		sctx->device_reset_callback = *cb;
356
	else
357
358
		memset(&sctx->device_reset_callback, 0,
		       sizeof(sctx->device_reset_callback));
359
360
}

361
362
363
364
365
366
367
368
369
370
371
372
373
374
/* Apitrace profiling:
 *   1) qapitrace : Tools -> Profile: Measure CPU & GPU times
 *   2) In the middle panel, zoom in (mouse wheel) on some bad draw call
 *      and remember its number.
 *   3) In Mesa, enable queries and performance counters around that draw
 *      call and print the results.
 *   4) glretrace --benchmark --markers ..
 */
static void si_emit_string_marker(struct pipe_context *ctx,
				  const char *string, int len)
{
	struct si_context *sctx = (struct si_context *)ctx;

	dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
375

376
377
	if (sctx->log)
		u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
378
379
}

380
381
382
383
static void si_set_debug_callback(struct pipe_context *ctx,
				  const struct pipe_debug_callback *cb)
{
	struct si_context *sctx = (struct si_context *)ctx;
384
385
386
387
	struct si_screen *screen = sctx->screen;

	util_queue_finish(&screen->shader_compiler_queue);
	util_queue_finish(&screen->shader_compiler_queue_low_priority);
388
389
390
391
392
393
394

	if (cb)
		sctx->debug = *cb;
	else
		memset(&sctx->debug, 0, sizeof(sctx->debug));
}

395
396
397
398
static void si_set_log_context(struct pipe_context *ctx,
			       struct u_log_context *log)
{
	struct si_context *sctx = (struct si_context *)ctx;
399
	sctx->log = log;
400
401
402

	if (log)
		u_log_add_auto_logger(log, si_auto_log_cs, sctx);
403
404
}

405
406
407
408
409
410
411
412
413
414
415
416
417
418
static void si_set_context_param(struct pipe_context *ctx,
				 enum pipe_context_param param,
				 unsigned value)
{
	struct radeon_winsys *ws = ((struct si_context *)ctx)->ws;

	switch (param) {
	case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE:
		ws->pin_threads_to_L3_cache(ws, value);
		break;
	default:;
	}
}

419
static struct pipe_context *si_create_context(struct pipe_screen *screen,
420
                                              unsigned flags)
Tom Stellard's avatar
Tom Stellard committed
421
{
422
	struct si_screen* sscreen = (struct si_screen *)screen;
423
424
425
426
427
428
429

	/* Don't create a context if it's not compute-only and hw is compute-only. */
	if (!sscreen->info.has_graphics &&
	    !(flags & PIPE_CONTEXT_COMPUTE_ONLY))
		return NULL;

	struct si_context *sctx = CALLOC_STRUCT(si_context);
430
	struct radeon_winsys *ws = sscreen->ws;
431
	int shader, i;
432
	bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0;
Tom Stellard's avatar
Tom Stellard committed
433

434
	if (!sctx)
Tom Stellard's avatar
Tom Stellard committed
435
436
		return NULL;

437
	sctx->has_graphics = sscreen->info.chip_class == GFX6 ||
438
439
			     !(flags & PIPE_CONTEXT_COMPUTE_ONLY);

440
441
442
	if (flags & PIPE_CONTEXT_DEBUG)
		sscreen->record_llvm_ir = true; /* racy but not critical */

443
444
445
	sctx->b.screen = screen; /* this must be set first */
	sctx->b.priv = NULL;
	sctx->b.destroy = si_destroy_context;
446
	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
447
	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
Tom Stellard's avatar
Tom Stellard committed
448

449
450
	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
451

452
453
454
	sctx->ws = sscreen->ws;
	sctx->family = sscreen->info.family;
	sctx->chip_class = sscreen->info.chip_class;
455

456
457
	if (sctx->chip_class == GFX7 ||
	    sctx->chip_class == GFX8 ||
458
	    sctx->chip_class == GFX9) {
459
		sctx->eop_bug_scratch = si_resource(
460
461
			pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
					   16 * sscreen->info.num_render_backends));
462
		if (!sctx->eop_bug_scratch)
463
464
465
			goto fail;
	}

466
	/* Initialize context allocators. */
467
	sctx->allocator_zeroed_memory =
468
		u_suballocator_create(&sctx->b, 128 * 1024,
469
				      0, PIPE_USAGE_DEFAULT,
470
				      SI_RESOURCE_FLAG_UNMAPPABLE |
471
				      SI_RESOURCE_FLAG_CLEAR, false);
472
	if (!sctx->allocator_zeroed_memory)
473
		goto fail;
Tom Stellard's avatar
Tom Stellard committed
474

475
	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
476
						    0, PIPE_USAGE_STREAM,
477
						    SI_RESOURCE_FLAG_READ_ONLY);
478
	if (!sctx->b.stream_uploader)
479
480
		goto fail;

481
	sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024,
482
						       0, PIPE_USAGE_STAGING, 0);
483
	if (!sctx->cached_gtt_allocator)
484
485
		goto fail;

486
487
	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
	if (!sctx->ctx)
488
489
		goto fail;

490
	if (sscreen->info.num_rings[RING_DMA] &&
491
	    !(sscreen->debug_flags & DBG(NO_SDMA)) &&
492
493
494
495
496
	    /* SDMA causes corruption on RX 580:
	     *    https://gitlab.freedesktop.org/mesa/mesa/issues/1399
	     *    https://gitlab.freedesktop.org/mesa/mesa/issues/1889
	     */
	    (sctx->chip_class != GFX8 || sscreen->debug_flags & DBG(FORCE_SDMA)) &&
497
498
499
500
	    /* SDMA timeouts sometimes on gfx10 so disable it for now. See:
	     *    https://bugs.freedesktop.org/show_bug.cgi?id=111481
	     *    https://gitlab.freedesktop.org/mesa/mesa/issues/1907
	     */
501
	    (sctx->chip_class != GFX10 || sscreen->debug_flags & DBG(FORCE_SDMA))) {
502
		sctx->sdma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
503
504
						   (void*)si_flush_dma_cs,
						   sctx, stop_exec_on_failure);
505
506
	}

507
	bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->sdma_cs;
508
509
510
511
	sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024,
						 0, PIPE_USAGE_DEFAULT,
						 SI_RESOURCE_FLAG_32BIT |
						 (use_sdma_upload ?
512
							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : 0));
513
514
515
516
517
518
	if (!sctx->b.const_uploader)
		goto fail;

	if (use_sdma_upload)
		u_upload_enable_flush_explicit(sctx->b.const_uploader);

519
520
	sctx->gfx_cs = ws->cs_create(sctx->ctx,
				     sctx->has_graphics ? RING_GFX : RING_COMPUTE,
521
				     (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure);
522

523
524
525
526
527
528
	/* Border colors. */
	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
					  sizeof(*sctx->border_color_table));
	if (!sctx->border_color_table)
		goto fail;

529
	sctx->border_color_buffer = si_resource(
530
		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT,
531
				   SI_MAX_BORDER_COLORS *
532
				   sizeof(*sctx->border_color_table)));
533
534
535
536
	if (!sctx->border_color_buffer)
		goto fail;

	sctx->border_color_map =
537
		ws->buffer_map(sctx->border_color_buffer->buf,
538
539
540
541
			       NULL, PIPE_TRANSFER_WRITE);
	if (!sctx->border_color_map)
		goto fail;

542
	sctx->ngg = sscreen->use_ngg;
543

544
	/* Initialize context functions used by graphics and compute. */
545
546
547
548
549
	if (sctx->chip_class >= GFX10)
		sctx->emit_cache_flush = gfx10_emit_cache_flush;
	else
		sctx->emit_cache_flush = si_emit_cache_flush;

550
551
552
553
554
555
556
	sctx->b.emit_string_marker = si_emit_string_marker;
	sctx->b.set_debug_callback = si_set_debug_callback;
	sctx->b.set_log_context = si_set_log_context;
	sctx->b.set_context_param = si_set_context_param;
	sctx->b.get_device_reset_status = si_get_reset_status;
	sctx->b.set_device_reset_callback = si_set_device_reset_callback;

557
	si_init_all_descriptors(sctx);
558
559
560
561
562
563
	si_init_buffer_functions(sctx);
	si_init_clear_functions(sctx);
	si_init_blit_functions(sctx);
	si_init_compute_functions(sctx);
	si_init_compute_blit_functions(sctx);
	si_init_debug_functions(sctx);
564
	si_init_fence_functions(sctx);
565
	si_init_query_functions(sctx);
566
	si_init_state_compute_functions(sctx);
567
	si_init_context_texture_functions(sctx);
568

569
570
	/* Initialize graphics-only context functions. */
	if (sctx->has_graphics) {
571
572
		if (sctx->chip_class >= GFX10)
			gfx10_init_query(sctx);
573
574
575
576
577
578
579
580
581
582
		si_init_msaa_functions(sctx);
		si_init_shader_functions(sctx);
		si_init_state_functions(sctx);
		si_init_streamout_functions(sctx);
		si_init_viewport_functions(sctx);

		sctx->blitter = util_blitter_create(&sctx->b);
		if (sctx->blitter == NULL)
			goto fail;
		sctx->blitter->skip_viewport_restore = true;
Tom Stellard's avatar
Tom Stellard committed
583

584
585
586
587
		/* Some states are expected to be always non-NULL. */
		sctx->noop_blend = util_blitter_get_noop_blend_state(sctx->blitter);
		sctx->queued.named.blend = sctx->noop_blend;

588
589
590
		sctx->noop_dsa = util_blitter_get_noop_dsa_state(sctx->blitter);
		sctx->queued.named.dsa = sctx->noop_dsa;

591
592
593
594
		sctx->discard_rasterizer_state =
			util_blitter_get_discard_rasterizer_state(sctx->blitter);
		sctx->queued.named.rasterizer = sctx->discard_rasterizer_state;

595
		si_init_draw_functions(sctx);
596
		si_initialize_prim_discard_tunables(sctx);
597
598
599
	}

	/* Initialize SDMA functions. */
600
	if (sctx->chip_class >= GFX7)
601
602
		cik_init_sdma_functions(sctx);
	else
603
		sctx->dma_copy = si_resource_copy_region;
604

605
	if (sscreen->debug_flags & DBG(FORCE_SDMA))
606
607
		sctx->b.resource_copy_region = sctx->dma_copy;

608
	sctx->sample_mask = 0xffff;
609

610
611
612
613
614
615
616
617
618
	/* Initialize multimedia functions. */
	if (sscreen->info.has_hw_decode) {
		sctx->b.create_video_codec = si_uvd_create_decoder;
		sctx->b.create_video_buffer = si_video_buffer_create;
	} else {
		sctx->b.create_video_codec = vl_create_decoder;
		sctx->b.create_video_buffer = vl_video_buffer_create;
	}

619
	if (sctx->chip_class >= GFX9) {
620
		sctx->wait_mem_scratch = si_resource(
621
			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 8));
622
623
624
625
		if (!sctx->wait_mem_scratch)
			goto fail;

		/* Initialize the memory. */
626
627
		si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4,
				 V_370_MEM, V_370_ME, &sctx->wait_mem_number);
628
629
	}

630
	/* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
631
	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
632
	if (sctx->chip_class == GFX7) {
633
		sctx->null_const_buf.buffer =
634
635
			pipe_aligned_buffer_create(screen,
						   SI_RESOURCE_FLAG_32BIT,
636
						   PIPE_USAGE_DEFAULT, 16,
637
						   sctx->screen->info.tcc_cache_line_size);
638
639
		if (!sctx->null_const_buf.buffer)
			goto fail;
640
		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
641

642
643
		unsigned start_shader = sctx->has_graphics ? 0 :  PIPE_SHADER_COMPUTE;
		for (shader = start_shader; shader < SI_NUM_SHADERS; shader++) {
644
			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
645
				sctx->b.set_constant_buffer(&sctx->b, shader, i,
646
							      &sctx->null_const_buf);
647
648
649
			}
		}

650
651
		si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
				 &sctx->null_const_buf);
652
653
		si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
				 &sctx->null_const_buf);
654
655
656
657
658
659
		si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
				 &sctx->null_const_buf);
		si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
				 &sctx->null_const_buf);
		si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
				 &sctx->null_const_buf);
660
661
	}

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
	uint64_t max_threads_per_block;
	screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
				  PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
				  &max_threads_per_block);

	/* The maximum number of scratch waves. Scratch space isn't divided
	 * evenly between CUs. The number is only a function of the number of CUs.
	 * We can decrease the constant to decrease the scratch buffer size.
	 *
	 * sctx->scratch_waves must be >= the maximum posible size of
	 * 1 threadgroup, so that the hw doesn't hang from being unable
	 * to start any.
	 *
	 * The recommended value is 4 per CU at most. Higher numbers don't
	 * bring much benefit, but they still occupy chip resources (think
	 * async compute). I've seen ~2% performance difference between 4 and 32.
678
	 */
679
	sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units,
680
				   max_threads_per_block / 64);
681

682
683
684
685
686
687
688
689
	/* Bindless handles. */
	sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
						    _mesa_key_pointer_equal);
	sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
						    _mesa_key_pointer_equal);

	util_dynarray_init(&sctx->resident_tex_handles, NULL);
	util_dynarray_init(&sctx->resident_img_handles, NULL);
690
691
	util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL);
	util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
692
	util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
693

694
695
696
697
698
699
	sctx->sample_pos_buffer =
		pipe_buffer_create(sctx->b.screen, 0, PIPE_USAGE_DEFAULT,
				   sizeof(sctx->sample_positions));
	pipe_buffer_write(&sctx->b, sctx->sample_pos_buffer, 0,
			  sizeof(sctx->sample_positions), &sctx->sample_positions);

700
701
	/* this must be last */
	si_begin_new_gfx_cs(sctx);
702

703
	if (sctx->chip_class == GFX7) {
704
705
706
707
		/* Clear the NULL constant buffer, because loads should return zeros.
		 * Note that this forces CP DMA to be used, because clover deadlocks
		 * for some reason when the compute codepath is used.
		 */
708
709
710
		uint32_t clear_value = 0;
		si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
				sctx->null_const_buf.buffer->width0,
711
				&clear_value, 4, SI_COHERENCY_SHADER, true);
712
	}
713
	return &sctx->b;
714
fail:
715
	fprintf(stderr, "radeonsi: Failed to create a context.\n");
716
	si_destroy_context(&sctx->b);
717
	return NULL;
Tom Stellard's avatar
Tom Stellard committed
718
719
}

720
721
722
723
static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
						   void *priv, unsigned flags)
{
	struct si_screen *sscreen = (struct si_screen *)screen;
724
725
	struct pipe_context *ctx;

726
	if (sscreen->debug_flags & DBG(CHECK_VM))
727
728
729
		flags |= PIPE_CONTEXT_DEBUG;

	ctx = si_create_context(screen, flags);
730
731
732
733

	if (!(flags & PIPE_CONTEXT_PREFER_THREADED))
		return ctx;

734
735
	/* Clover (compute-only) is unsupported. */
	if (flags & PIPE_CONTEXT_COMPUTE_ONLY)
736
737
738
739
		return ctx;

	/* When shaders are logged to stderr, asynchronous compilation is
	 * disabled too. */
740
	if (sscreen->debug_flags & DBG_ALL_SHADERS)
741
742
		return ctx;

743
744
	/* Use asynchronous flushes only on amdgpu, since the radeon
	 * implementation for fence_server_sync is incomplete. */
745
	return threaded_context_create(ctx, &sscreen->pool_transfers,
746
				       si_replace_buffer_storage,
747
				       sscreen->info.is_amdgpu ? si_create_fence : NULL,
748
				       &((struct si_context*)ctx)->tc);
749
750
}

Tom Stellard's avatar
Tom Stellard committed
751
752
753
/*
 * pipe_screen
 */
754
static void si_destroy_screen(struct pipe_screen* pscreen)
Tom Stellard's avatar
Tom Stellard committed
755
{
756
	struct si_screen *sscreen = (struct si_screen *)pscreen;
Marek Olšák's avatar
Marek Olšák committed
757
758
	struct si_shader_part *parts[] = {
		sscreen->vs_prologs,
Marek Olšák's avatar
Marek Olšák committed
759
		sscreen->tcs_epilogs,
760
		sscreen->gs_prologs,
Marek Olšák's avatar
Marek Olšák committed
761
		sscreen->ps_prologs,
Marek Olšák's avatar
Marek Olšák committed
762
		sscreen->ps_epilogs
Marek Olšák's avatar
Marek Olšák committed
763
764
	};
	unsigned i;
Tom Stellard's avatar
Tom Stellard committed
765

766
	if (!sscreen->ws->unref(sscreen->ws))
767
768
		return;

769
	simple_mtx_destroy(&sscreen->aux_context_lock);
770
771
772
773
774
775
776
777

	struct u_log_context *aux_log = ((struct si_context *)sscreen->aux_context)->log;
	if (aux_log) {
		sscreen->aux_context->set_log_context(sscreen->aux_context, NULL);
		u_log_context_destroy(aux_log);
		FREE(aux_log);
	}

778
779
	sscreen->aux_context->destroy(sscreen->aux_context);

780
	util_queue_destroy(&sscreen->shader_compiler_queue);
781
	util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
782

783
784
785
	/* Release the reference on glsl types of the compiler threads. */
	glsl_type_singleton_decref();

786
787
	for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
		si_destroy_compiler(&sscreen->compiler[i]);
788

789
790
	for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
		si_destroy_compiler(&sscreen->compiler_lowp[i]);
791

Marek Olšák's avatar
Marek Olšák committed
792
793
794
795
796
797
	/* Free shader parts. */
	for (i = 0; i < ARRAY_SIZE(parts); i++) {
		while (parts[i]) {
			struct si_shader_part *part = parts[i];

			parts[i] = part->next;
798
			si_shader_binary_clean(&part->binary);
Marek Olšák's avatar
Marek Olšák committed
799
800
801
			FREE(part);
		}
	}
802
	simple_mtx_destroy(&sscreen->shader_parts_mutex);
803
	si_destroy_shader_cache(sscreen);
804

805
	si_destroy_perfcounters(sscreen);
806
	si_gpu_load_kill_thread(sscreen);
807

808
	simple_mtx_destroy(&sscreen->gpu_load_mutex);
809

810
	slab_destroy_parent(&sscreen->pool_transfers);
811

812
813
	disk_cache_destroy(sscreen->disk_shader_cache);
	sscreen->ws->destroy(sscreen->ws);
814
	FREE(sscreen);
Tom Stellard's avatar
Tom Stellard committed
815
816
}

817
static void si_init_gs_info(struct si_screen *sscreen)
818
{
819
820
	sscreen->gs_table_depth = ac_get_gs_table_depth(sscreen->info.chip_class,
							sscreen->info.family);
821
822
}

823
824
static void si_test_vmfault(struct si_screen *sscreen)
{
825
	struct pipe_context *ctx = sscreen->aux_context;
826
827
	struct si_context *sctx = (struct si_context *)ctx;
	struct pipe_resource *buf =
828
		pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64);
829
830
831
832
833
834

	if (!buf) {
		puts("Buffer allocation failed.");
		exit(1);
	}

835
	si_resource(buf)->gpu_address = 0; /* cause a VM fault */
836

837
	if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) {
Marek Olšák's avatar
Marek Olšák committed
838
839
		si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0,
				      SI_COHERENCY_NONE, L2_BYPASS);
840
841
842
		ctx->flush(ctx, NULL, 0);
		puts("VM fault test: CP - done.");
	}
843
	if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) {
844
		si_sdma_clear_buffer(sctx, buf, 0, 4, 0);
845
846
847
		ctx->flush(ctx, NULL, 0);
		puts("VM fault test: SDMA - done.");
	}
848
	if (sscreen->debug_flags & DBG(TEST_VMFAULT_SHADER)) {
849
850
851
852
853
854
		util_test_constant_buffer(ctx, buf);
		puts("VM fault test: Shader - done.");
	}
	exit(0);
}

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
static void si_test_gds_memory_management(struct si_context *sctx,
					  unsigned alloc_size, unsigned alignment,
					  enum radeon_bo_domain domain)
{
	struct radeon_winsys *ws = sctx->ws;
	struct radeon_cmdbuf *cs[8];
	struct pb_buffer *gds_bo[ARRAY_SIZE(cs)];

	for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) {
		cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE,
				      NULL, NULL, false);
		gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0);
		assert(gds_bo[i]);
	}

	for (unsigned iterations = 0; iterations < 20000; iterations++) {
		for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) {
			/* This clears GDS with CP DMA.
			 *
			 * We don't care if GDS is present. Just add some packet
			 * to make the GPU busy for a moment.
			 */
			si_cp_dma_clear_buffer(sctx, cs[i], NULL, 0, alloc_size, 0,
					       SI_CPDMA_SKIP_BO_LIST_UPDATE |
					       SI_CPDMA_SKIP_CHECK_CS_SPACE |
					       SI_CPDMA_SKIP_GFX_SYNC, 0, 0);

			ws->cs_add_buffer(cs[i], gds_bo[i], domain,
					  RADEON_USAGE_READWRITE, 0);
			ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL);
		}
	}
	exit(0);
}

890
static void si_disk_cache_create(struct si_screen *sscreen)
891
{
892
	/* Don't use the cache if shader dumping is enabled. */
893
	if (sscreen->debug_flags & DBG_ALL_SHADERS)
894
		return;
895

896
897
898
	struct mesa_sha1 ctx;
	unsigned char sha1[20];
	char cache_id[20 * 2 + 1];
899

900
901
902
903
904
905
906
907
908
909
910
	_mesa_sha1_init(&ctx);

	if (!disk_cache_get_function_identifier(si_disk_cache_create, &ctx) ||
	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo,
						&ctx))
		return;

	_mesa_sha1_final(&ctx, sha1);
	disk_cache_format_hex_id(cache_id, sha1, 20 * 2);

	/* These flags affect shader compilation. */
911
	#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
912
	uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
913
	/* Reserve left-most bit for tgsi/nir selector */
914
915
916
	assert(!(shader_debug_flags & (1u << 31)));
	shader_debug_flags |= (uint32_t)
		((sscreen->options.enable_nir & 0x1) << 31);
917

918
919
920
921
	/* Add the high bits of 32-bit addresses, which affects
	 * how 32-bit addresses are expanded to 64 bits.
	 */
	STATIC_ASSERT(ALL_FLAGS <= UINT_MAX);
922
923
924
	assert((int16_t)sscreen->info.address32_hi == (int32_t)sscreen