Compare revisions

Dave Airlie · Dave Airlie · Dave Airlie · Dave Airlie · Dave Airlie · Dave Airlie
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -204,7 +204,7 @@ x86_build-base:
    - .fdo.container-build@debian
    - .container
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_build-base "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &x86_build-base "2020-08-13-gold"

 .use-x86_build-base:
  extends:
@@ -222,7 +222,7 @@ x86_build:
  extends:
    - .use-x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_build "2020-08-08-glvnd"
+    FDO_DISTRIBUTION_TAG: &x86_build "2020-08-13-gold"

 .use-x86_build:
  variables:
@@ -236,7 +236,7 @@ i386_build:
  extends:
    - .use-x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &i386_build "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &i386_build "2020-08-13-gold"

 .use-i386_build:
  variables:
@@ -250,7 +250,7 @@ ppc64el_build:
  extends:
    - .use-x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &ppc64el_build "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &ppc64el_build "2020-08-13-gold"

 .use-ppc64el_build:
  variables:
@@ -264,7 +264,7 @@ s390x_build:
  extends:
    - .use-x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &s390x_build "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &s390x_build "2020-08-13-gold"

 .use-s390x_build:
  variables:
@@ -277,7 +277,7 @@ s390x_build:
 x86_test-base:
  extends: x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_test-base "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &x86_test-base "2020-08-13-gold"

 .use-x86_test-base:
  extends:
@@ -294,19 +294,19 @@ x86_test-base:
 x86_test-gl:
  extends: .use-x86_test-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_test-gl "2020-08-14-xcb-shm"
+    FDO_DISTRIBUTION_TAG: &x86_test-gl "2020-08-24-gold"

 # Debian 10 based x86 test image for VK
 x86_test-vk:
  extends: .use-x86_test-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_test-vk "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &x86_test-vk "2020-08-13-gold"

 # Debian 9 based x86 build image (old LLVM)
 x86_build_old:
  extends: x86_build-base
  variables:
-    FDO_DISTRIBUTION_TAG: &x86_build_old "2020-07-28-x86-2"
+    FDO_DISTRIBUTION_TAG: &x86_build_old "2020-08-13-gold"
    FDO_DISTRIBUTION_VERSION: stretch-slim

 .use-x86_build_old:
@@ -322,7 +322,7 @@ arm_build:
    - .fdo.container-build@debian@arm64v8
    - .container
  variables:
-    FDO_DISTRIBUTION_TAG: &arm_build "2020-08-04-nfs-2"
+    FDO_DISTRIBUTION_TAG: &arm_build "2020-08-13-gold"

 .use-arm_build:
  variables:
@@ -337,7 +337,7 @@ arm_test-base:
    - .fdo.container-build@debian
    - .container
  variables:
-    FDO_DISTRIBUTION_TAG: &arm_test-base "2020-07-28-libdrm"
+    FDO_DISTRIBUTION_TAG: &arm_test-base "2020-08-13-gold"

 .use-arm_test-base:
  extends:
@@ -355,7 +355,7 @@ arm64_test:
  extends:
    - .use-arm_test-base
  variables:
-    FDO_DISTRIBUTION_TAG: &arm64_test "2020-08-18"
+    FDO_DISTRIBUTION_TAG: &arm64_test "2020-08-24-gold"

 .use-arm64_test:
  variables:

--- a/.gitlab-ci/build-apitrace.sh
+++ b/.gitlab-ci/build-apitrace.sh
@@ -23,7 +23,10 @@ APITRACE_VERSION="9.0"
 git clone https://github.com/apitrace/apitrace.git --single-branch --no-checkout /apitrace
 pushd /apitrace
 git checkout "$APITRACE_VERSION"
-cmake -G Ninja -B_build -H. -DCMAKE_BUILD_TYPE=Release -DENABLE_GUI=False -DENABLE_WAFFLE=on -DWaffle_DIR=/usr/local/lib/cmake/Waffle/ $EXTRA_CMAKE_ARGS
+# Note: The cmake stuff for waffle in apitrace fails to use waffle's library
+# directory.  Just force the issue here.
+env LDFLAGS="-L/usr/local/lib" \
+    cmake -G Ninja -B_build -H. -DCMAKE_BUILD_TYPE=Release -DENABLE_GUI=False -DENABLE_WAFFLE=on -DWaffle_DIR=/usr/local/lib/cmake/Waffle/ $EXTRA_CMAKE_ARGS
 ninja -C _build
 mkdir build
 cp _build/apitrace build

--- a/.gitlab-ci/container/container_pre_build.sh
+++ b/.gitlab-ci/container/container_pre_build.sh
@@ -12,6 +12,13 @@ export PATH=/usr/lib/ccache:$PATH
 export CC="/usr/lib/ccache/gcc"
 export CXX="/usr/lib/ccache/g++"

+# Force linkers to gold, since it's so much faster for building.  We can't use
+# lld because we're on old debian and it's buggy.  ming fails meson builds
+# with it with "meson.build:21:0: ERROR: Unable to determine dynamic linker"
+find /usr/bin -name \*-ld -o -name ld | \
+    grep -v mingw | \
+    xargs -n 1 -I '{}' ln -sf '{}.gold' '{}'
+
 ccache --show-stats

 # Make a wrapper script for ninja to always include the -j flags

--- a/.gitlab-ci/container/lava_build.sh
+++ b/.gitlab-ci/container/lava_build.sh
@@ -134,6 +134,17 @@ rm -rf /libdrm
 mkdir -p kernel
 wget -qO- ${KERNEL_URL} | tar -xz --strip-components=1 -C kernel
 pushd kernel
+
+# The kernel doesn't like the gold linker (or the old lld in our debians).
+# Sneak in some override symlinks during kernel build until we can update
+# debian (they'll get blown away by the rm of the kernel dir at the end).
+mkdir -p ld-links
+for i in /usr/bin/*-ld /usr/bin/ld; do
+    i=`basename $i`
+    ln -sf /usr/bin/$i.bfd ld-links/$i
+done
+export PATH=`pwd`/ld-links:$PATH
+
 ./scripts/kconfig/merge_config.sh ${DEFCONFIG} ../.gitlab-ci/${KERNEL_ARCH}.config
 make ${KERNEL_IMAGE_NAME}
 for image in ${KERNEL_IMAGE_NAME}; do

--- a/.gitlab-ci/container/x86_build.sh
+++ b/.gitlab-ci/container/x86_build.sh
@@ -58,7 +58,7 @@ export           WAYLAND_RELEASES=https://wayland.freedesktop.org/releases
 export         XORGMACROS_VERSION=util-macros-1.19.0
 export           XCBPROTO_VERSION=xcb-proto-1.13
 export             LIBXCB_VERSION=libxcb-1.13
-export         LIBWAYLAND_VERSION=wayland-1.15.0
+export         LIBWAYLAND_VERSION=wayland-1.17.0
 export  WAYLAND_PROTOCOLS_VERSION=wayland-protocols-1.12

 wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2

--- a/.gitlab-ci/deqp-runner.sh
+++ b/.gitlab-ci/deqp-runner.sh
@@ -238,7 +238,7 @@ check_vk_device_name() {
    $DEQP $DEQP_OPTIONS --deqp-case=dEQP-VK.info.device --deqp-log-filename=$RESULTS/deqp-info.qpa
    DEVICENAME=`grep deviceName $RESULTS/deqp-info.qpa | sed 's|deviceName: ||g'`
    echo "deviceName: $DEVICENAME"
-    if [ -n "$DEQP_EXPECTED_RENDERER" -a $DEVICENAME != "$DEQP_EXPECTED_RENDERER" ]; then
+    if [ -n "$DEQP_EXPECTED_RENDERER" -a "x$DEVICENAME" != "x$DEQP_EXPECTED_RENDERER" ]; then
        echo "Expected deviceName $DEQP_EXPECTED_RENDERER"
        exit 1
    fi

--- a/docs/features.txt
+++ b/docs/features.txt
@@ -135,8 +135,8 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virg
  GL_ARB_texture_buffer_object_rgb32                    DONE (freedreno, i965/gen6+, softpipe, swr)
  GL_ARB_texture_cube_map_array                         DONE (i965/gen6+, nv50, softpipe, swr, zink)
  GL_ARB_texture_gather                                 DONE (freedreno, i965/gen6+, nv50, softpipe, swr, v3d)
-  GL_ARB_texture_query_lod                              DONE (freedreno, i965, nv50, softpipe, swr, v3d)
-  GL_ARB_transform_feedback2                            DONE (i965/gen6+, nv50, softpipe, swr, v3d)
+  GL_ARB_texture_query_lod                              DONE (freedreno, i965, nv50, softpipe, swr, v3d, panfrost)
+  GL_ARB_transform_feedback2                            DONE (i965/gen6+, nv50, softpipe, swr, v3d, panfrost)
  GL_ARB_transform_feedback3                            DONE (i965/gen7+, softpipe, swr)


@@ -162,7 +162,7 @@ GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl
  GL_ARB_conservative_depth                             DONE (all drivers that support GLSL 1.30)
  GL_ARB_shading_language_420pack                       DONE (all drivers that support GLSL 1.30)
  GL_ARB_shading_language_packing                       DONE (all drivers)
-  GL_ARB_internalformat_query                           DONE (freedreno, i965, nv50, softpipe, swr, v3d, zink)
+  GL_ARB_internalformat_query                           DONE (freedreno, i965, nv50, softpipe, swr, v3d, zink, panfrost)
  GL_ARB_map_buffer_alignment                           DONE (all drivers)


@@ -171,7 +171,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, r600, radeonsi, llvmpipe, virgl
  GL_ARB_arrays_of_arrays                               DONE (all drivers that support GLSL 1.30)
  GL_ARB_ES3_compatibility                              DONE (all drivers that support GLSL 3.30)
  GL_ARB_clear_buffer_object                            DONE (all drivers)
-  GL_ARB_compute_shader                                 DONE (freedreno/a5xx+, i965, softpipe, v3d)
+  GL_ARB_compute_shader                                 DONE (freedreno/a5xx+, i965, softpipe, v3d, panfrost)
  GL_ARB_copy_image                                     DONE (i965, nv50, softpipe, swr)
  GL_KHR_debug                                          DONE (all drivers)
  GL_ARB_explicit_uniform_location                      DONE (all drivers that support GLSL)
@@ -183,7 +183,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, r600, radeonsi, llvmpipe, virgl
  GL_ARB_program_interface_query                        DONE (all drivers)
  GL_ARB_robust_buffer_access_behavior                  DONE (i965)
  GL_ARB_shader_image_size                              DONE (freedreno/a5xx+, i965, softpipe, v3d)
-  GL_ARB_shader_storage_buffer_object                   DONE (freedreno/a5xx+, i965, softpipe, v3d)
+  GL_ARB_shader_storage_buffer_object                   DONE (freedreno/a5xx+, i965, softpipe, v3d, panfrost)
  GL_ARB_stencil_texturing                              DONE (freedreno, i965/hsw+, nv50, softpipe, swr, v3d)
  GL_ARB_texture_buffer_range                           DONE (freedreno, nv50, i965, softpipe, swr)
  GL_ARB_texture_query_levels                           DONE (all drivers that support GLSL 1.30)
@@ -207,7 +207,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, r600, radeonsi, llvmpipe
  GL_ARB_multi_bind                                     DONE (all drivers)
  GL_ARB_query_buffer_object                            DONE (i965/hsw+, virgl)
  GL_ARB_texture_mirror_clamp_to_edge                   DONE (i965, nv50, softpipe, swr, virgl, panfrost)
-  GL_ARB_texture_stencil8                               DONE (freedreno, i965/hsw+, nv50, softpipe, swr, virgl, v3d)
+  GL_ARB_texture_stencil8                               DONE (freedreno, i965/hsw+, nv50, softpipe, swr, virgl, v3d, panfrost)
  GL_ARB_vertex_type_10f_11f_11f_rev                    DONE (i965, nv50, softpipe, swr, virgl, zink, panfrost)

 GL 4.5, GLSL 4.50 -- all DONE: nvc0, r600, radeonsi, llvmpipe
@@ -256,7 +256,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, r600, radeonsi, virgl, v3d, l
  GL_ARB_shader_storage_buffer_object                   DONE (freedreno/a5xx+, i965/gen7+, softpipe)
  GL_ARB_shading_language_packing                       DONE (all drivers)
  GL_ARB_separate_shader_objects                        DONE (all drivers)
-  GL_ARB_stencil_texturing                              DONE (freedreno, nv50, softpipe, swr)
+  GL_ARB_stencil_texturing                              DONE (freedreno, nv50, softpipe, swr, panfrost)
  GL_ARB_texture_multisample (Multisample textures)     DONE (freedreno/a5xx+, i965/gen7+, nv50, softpipe, panfrost)
  GL_ARB_texture_storage_multisample                    DONE (all drivers that support GL_ARB_texture_multisample)
  GL_ARB_vertex_attrib_binding                          DONE (all drivers)

--- a/meson.build
+++ b/meson.build
@@ -427,8 +427,10 @@ else
 endif

 # Android uses emutls for versions <= P/28. For USE_ELF_TLS we need ELF TLS.
-if not ['windows', 'freebsd'].contains(host_machine.system()) and (not with_platform_android or get_option('platform-sdk-version') >= 29)
+use_elf_tls = false
+if not ['windows', 'freebsd', 'openbsd'].contains(host_machine.system()) and (not with_platform_android or get_option('platform-sdk-version') >= 29)
  pre_args += '-DUSE_ELF_TLS'
+  use_elf_tls = true
 endif

 if with_glx != 'disabled'
@@ -949,6 +951,8 @@ elif host_machine.system() == 'windows'
  else
    pre_args += ['-D__MSVCRT_VERSION__=0x0700']
  endif
+elif host_machine.system() == 'openbsd'
+  pre_args += '-D_ISOC11_SOURCE'
 endif

 # Check for generic C arguments
@@ -1202,7 +1206,7 @@ if not ['linux'].contains(host_machine.system())
  endif
 endif

-foreach h : ['xlocale.h', 'linux/futex.h', 'endian.h', 'dlfcn.h', 'execinfo.h', 'sys/shm.h', 'cet.h']
+foreach h : ['xlocale.h', 'linux/futex.h', 'endian.h', 'dlfcn.h', 'execinfo.h', 'sys/shm.h', 'cet.h', 'pthread_np.h']
  if cc.check_header(h)
    pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
  endif
@@ -1849,6 +1853,12 @@ if dep_dl.found()
  gl_priv_libs += '-ldl'
 endif

+# FIXME: autotools lists this as incomplete
+gbm_priv_libs = []
+if dep_dl.found()
+  gbm_priv_libs += '-ldl'
+endif
+
 pkg = import('pkgconfig')

 if host_machine.system() == 'windows'

--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -444,7 +444,7 @@ radv_dump_shader(struct radv_pipeline *pipeline,
 		shader->ir_string);
 	fprintf(f, "DISASM:\n%s\n", shader->disasm_string);

-	radv_shader_dump_stats(pipeline->device, shader, stage, f);
+	radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
 }

 static void

--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3104,10 +3104,9 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
 		if (nir[i]) {
 			ralloc_free(nir[i]);

-			if (radv_can_dump_shader_stats(device, modules[i]))
-				radv_shader_dump_stats(device,
-						       pipeline->shaders[i],
-						       i, stderr);
+			if (radv_can_dump_shader_stats(device, modules[i])) {
+				radv_dump_shader_stats(device, pipeline, i, stderr);
+			}
 		}
 	}


--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -47,8 +47,6 @@

 #include "aco_interface.h"

-#include "util/string_buffer.h"
-
 static const struct nir_shader_compiler_options nir_options_llvm = {
 	.vertex_id_zero_based = true,
 	.lower_scmp = true,
@@ -1507,66 +1505,6 @@ radv_get_max_waves(struct radv_device *device,
 	return max_simd_waves;
 }

-static void
-generate_shader_stats(struct radv_device *device,
-		      struct radv_shader_variant *variant,
-		      gl_shader_stage stage,
-		      struct _mesa_string_buffer *buf)
-{
-	struct ac_shader_config *conf = &variant->config;
-	unsigned max_simd_waves = radv_get_max_waves(device, variant, stage);
-
-	if (stage == MESA_SHADER_FRAGMENT) {
-		_mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
-					   "SPI_PS_INPUT_ADDR = 0x%04x\n"
-					   "SPI_PS_INPUT_ENA  = 0x%04x\n",
-					   conf->spi_ps_input_addr, conf->spi_ps_input_ena);
-	}
-
-	_mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
-				   "SGPRS: %d\n"
-				   "VGPRS: %d\n"
-				   "Spilled SGPRs: %d\n"
-				   "Spilled VGPRs: %d\n"
-				   "PrivMem VGPRS: %d\n"
-				   "Code Size: %d bytes\n"
-				   "LDS: %d blocks\n"
-				   "Scratch: %d bytes per wave\n"
-				   "Max Waves: %d\n",
-				   conf->num_sgprs, conf->num_vgprs,
-				   conf->spilled_sgprs, conf->spilled_vgprs,
-				   variant->info.private_mem_vgprs, variant->exec_size,
-				   conf->lds_size, conf->scratch_bytes_per_wave,
-				   max_simd_waves);
-
-	if (variant->statistics) {
-		_mesa_string_buffer_printf(buf, "*** COMPILER STATS ***\n");
-		for (unsigned i = 0; i < variant->statistics->count; i++) {
-			struct radv_compiler_statistic_info *info = &variant->statistics->infos[i];
-			uint32_t value = variant->statistics->values[i];
-			_mesa_string_buffer_printf(buf, "%s: %lu\n", info->name, value);
-		}
-	}
-
-	_mesa_string_buffer_printf(buf, "********************\n\n\n");
-}
-
-void
-radv_shader_dump_stats(struct radv_device *device,
-		       struct radv_shader_variant *variant,
-		       gl_shader_stage stage,
-		       FILE *file)
-{
-	struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256);
-
-	generate_shader_stats(device, variant, stage, buf);
-
-	fprintf(file, "\n%s:\n", radv_get_shader_name(&variant->info, stage));
-	fprintf(file, "%s", buf->buf);
-
-	_mesa_string_buffer_destroy(buf);
-}
-
 VkResult
 radv_GetShaderInfoAMD(VkDevice _device,
 		      VkPipeline _pipeline,
@@ -1579,7 +1517,6 @@ radv_GetShaderInfoAMD(VkDevice _device,
 	RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
 	gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
 	struct radv_shader_variant *variant = pipeline->shaders[stage];
-	struct _mesa_string_buffer *buf;
 	VkResult result = VK_SUCCESS;

 	/* Spec doesn't indicate what to do if the stage is invalid, so just
@@ -1631,16 +1568,19 @@ radv_GetShaderInfoAMD(VkDevice _device,
 		}

 		break;
-	case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
-		buf = _mesa_string_buffer_create(NULL, 1024);
+	case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: {
+		char *out;
+	        size_t outsize;
+	        FILE *memf = open_memstream(&out, &outsize);

-		_mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(&variant->info, stage));
-		_mesa_string_buffer_printf(buf, "%s\n\n", variant->ir_string);
-		_mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
-		generate_shader_stats(device, variant, stage, buf);
+		fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage));
+		fprintf(memf, "%s\n\n", variant->ir_string);
+		fprintf(memf, "%s\n\n", variant->disasm_string);
+		radv_dump_shader_stats(device, pipeline, stage, memf);
+		fclose(memf);

 		/* Need to include the null terminator. */
-		size_t length = buf->length + 1;
+		size_t length = outsize + 1;

 		if (!pInfo) {
 			*pInfoSize = length;
@@ -1648,14 +1588,15 @@ radv_GetShaderInfoAMD(VkDevice _device,
 			size_t size = *pInfoSize;
 			*pInfoSize = length;

-			memcpy(pInfo, buf->buf, MIN2(size, length));
+			memcpy(pInfo, out, MIN2(size, length));

 			if (size < length)
 				result = VK_INCOMPLETE;
 		}

-		_mesa_string_buffer_destroy(buf);
+		free(out);
 		break;
+	}
 	default:
 		/* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
 		result = VK_ERROR_FEATURE_NOT_PRESENT;
@@ -1664,3 +1605,100 @@ radv_GetShaderInfoAMD(VkDevice _device,

 	return result;
 }
+
+VkResult
+radv_dump_shader_stats(struct radv_device *device,
+		       struct radv_pipeline *pipeline,
+		       gl_shader_stage stage, FILE *output)
+{
+	struct radv_shader_variant *shader = pipeline->shaders[stage];
+	VkPipelineExecutablePropertiesKHR *props = NULL;
+	uint32_t prop_count = 0;
+	VkResult result;
+
+	VkPipelineInfoKHR pipeline_info = {};
+	pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
+	pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
+
+	result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
+							 &pipeline_info,
+							 &prop_count, NULL);
+	if (result != VK_SUCCESS)
+		return result;
+
+	props = calloc(prop_count, sizeof(*props));
+	if (!props)
+		return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+	result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
+							 &pipeline_info,
+							 &prop_count, props);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	for (unsigned i = 0; i < prop_count; i++) {
+		if (!(props[i].stages & mesa_to_vk_shader_stage(stage)))
+			continue;
+
+		VkPipelineExecutableStatisticKHR *stats = NULL;
+		uint32_t stat_count = 0;
+		VkResult result;
+
+		VkPipelineExecutableInfoKHR exec_info = {};
+		exec_info.pipeline = radv_pipeline_to_handle(pipeline);
+		exec_info.executableIndex = i;
+
+		result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
+								 &exec_info,
+								 &stat_count, NULL);
+		if (result != VK_SUCCESS)
+			goto fail;
+
+		stats = calloc(stat_count, sizeof(*stats));
+		if (!stats) {
+			result = VK_ERROR_OUT_OF_HOST_MEMORY;
+			goto fail;
+		}
+
+		result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
+								 &exec_info,
+								 &stat_count, stats);
+		if (result != VK_SUCCESS) {
+			free(stats);
+			goto fail;
+		}
+
+		fprintf(output, "\n%s:\n",
+			radv_get_shader_name(&shader->info, stage));
+		fprintf(output, "*** SHADER STATS ***\n");
+
+		for (unsigned i = 0; i < stat_count; i++) {
+			fprintf(output, "%s: ", stats[i].name);
+			switch (stats[i].format) {
+			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+				fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false");
+				break;
+			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+				fprintf(output, "%"PRIi64, stats[i].value.i64);
+				break;
+			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+				fprintf(output, "%"PRIu64, stats[i].value.u64);
+				break;
+			case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+				fprintf(output, "%f", stats[i].value.f64);
+				break;
+			default:
+				unreachable("Invalid pipeline statistic format");
+			}
+			fprintf(output, "\n");
+		}
+
+		fprintf(output, "********************\n\n\n");
+
+		free(stats);
+	}
+
+fail:
+	free(props);
+	return result;
+}
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -510,12 +510,6 @@ const char *
 radv_get_shader_name(struct radv_shader_info *info,
 		     gl_shader_stage stage);

-void
-radv_shader_dump_stats(struct radv_device *device,
-		       struct radv_shader_variant *variant,
-		       gl_shader_stage stage,
-		       FILE *file);
-
 bool
 radv_can_dump_shader(struct radv_device *device,
 		     struct radv_shader_module *module,
@@ -525,6 +519,11 @@ bool
 radv_can_dump_shader_stats(struct radv_device *device,
 			   struct radv_shader_module *module);

+VkResult
+radv_dump_shader_stats(struct radv_device *device,
+		       struct radv_pipeline *pipeline,
+		       gl_shader_stage stage, FILE *output);
+
 static inline unsigned
 shader_io_get_unique_index(gl_varying_slot slot)
 {

--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -658,8 +658,8 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
        /* V3D 4.1 and later allow TMU read along with a VPM read or write, and
         * WRTMUC with a TMU magic register write (other than tmuc).
         */
-        if ((a->sig.ldtmu && v3d_qpu_uses_vpm(b)) ||
-            (b->sig.ldtmu && v3d_qpu_uses_vpm(a))) {
+        if ((a->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(b)) ||
+            (b->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(a))) {
                return true;
        }


--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -591,7 +591,6 @@ v3d_qpu_add_op_reads_vpm(enum  v3d_qpu_add_op op)
 {
        switch (op) {
        case V3D_QPU_A_VPMSETUP:
-        case V3D_QPU_A_VPMWT:
        case V3D_QPU_A_LDVPMV_IN:
        case V3D_QPU_A_LDVPMV_OUT:
        case V3D_QPU_A_LDVPMD_IN:
@@ -610,7 +609,6 @@ v3d_qpu_add_op_writes_vpm(enum  v3d_qpu_add_op op)
 {
        switch (op) {
        case V3D_QPU_A_VPMSETUP:
-        case V3D_QPU_A_VPMWT:
        case V3D_QPU_A_STVPMV:
        case V3D_QPU_A_STVPMD:
        case V3D_QPU_A_STVPMP:
@@ -737,12 +735,27 @@ v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
        return false;
 }

+static bool
+v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
+{
+        return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
+               inst->alu.add.op == V3D_QPU_A_VPMWT;
+}
+
 bool
-v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
+v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
 {
        return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
 }

+bool
+v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
+{
+        return v3d_qpu_reads_vpm(inst) ||
+               v3d_qpu_writes_vpm(inst) ||
+               v3d_qpu_waits_vpm(inst);
+}
+
 bool
 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
                  const struct v3d_qpu_instr *inst)

--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -462,6 +462,7 @@ bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,

--- a/src/compiler/builtin_type_macros.h
+++ b/src/compiler/builtin_type_macros.h
@@ -167,6 +167,14 @@ DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY,       GLSL_TYPE
 DECL_TYPE(uimage2DMS,      GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_UINT)
 DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_UINT)

+/* OpenCL image types */
+DECL_TYPE(vbuffer, GL_IMAGE_BUFFER, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF, 0, 0, GLSL_TYPE_VOID)
+DECL_TYPE(vimage1D, GL_IMAGE_1D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_VOID)
+DECL_TYPE(vimage2D, GL_IMAGE_2D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_VOID)
+DECL_TYPE(vimage3D, GL_IMAGE_3D, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_VOID)
+DECL_TYPE(vimage1DArray, GL_IMAGE_1D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D, 0, 1, GLSL_TYPE_VOID)
+DECL_TYPE(vimage2DArray, GL_IMAGE_2D_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D, 0, 1, GLSL_TYPE_VOID)
+
 DECL_TYPE(subpassInput,    0,                                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_SUBPASS,    0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(subpassInputMS,  0,                                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_SUBPASS_MS, 0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(isubpassInput,   0,                                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_SUBPASS,    0, 0, GLSL_TYPE_INT)

--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1334,6 +1334,8 @@ nir_visitor::visit(ir_call *ir)
         } else if (op == nir_intrinsic_image_deref_load ||
                    op == nir_intrinsic_image_deref_store) {
            instr->num_components = 4;
+            nir_intrinsic_set_type(instr,
+               nir_get_nir_type_for_glsl_base_type(type->sampled_type));
         }

         if (op == nir_intrinsic_image_deref_size ||

--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -1018,6 +1018,19 @@ glsl_type::get_image_instance(enum glsl_sampler_dim dim,
      case GLSL_SAMPLER_DIM_EXTERNAL:
         return error_type;
      }
+   case GLSL_TYPE_VOID:
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         return (array ? vimage1DArray_type : vimage1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         return (array ? vimage2DArray_type : vimage2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         return (array ? error_type : vimage3D_type);
+      case GLSL_SAMPLER_DIM_BUF:
+         return (array ? error_type : vbuffer_type);
+      default:
+         return error_type;
+      }
   default:
      return error_type;
   }

--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -801,6 +801,8 @@ add_ssa_def_cb(nir_ssa_def *def, void *state)
         nir_cf_node_get_function(&instr->block->cf_node);

      def->index = impl->ssa_alloc++;
+
+      impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
   }

   return true;
@@ -1503,6 +1505,8 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
         nir_cf_node_get_function(&instr->block->cf_node);

      def->index = impl->ssa_alloc++;
+
+      impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
   } else {
      def->index = UINT_MAX;
   }
@@ -1843,6 +1847,8 @@ nir_index_ssa_defs(nir_function_impl *impl)
 {
   unsigned index = 0;

+   impl->valid_metadata &= ~nir_metadata_live_ssa_defs;
+
   nir_foreach_block_unstructured(block, impl) {
      nir_foreach_instr(instr, block)
         nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
@@ -2285,6 +2291,9 @@ nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
                            bool bindless)
 {
   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
+   nir_alu_type type = nir_type_invalid;
+   if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_TYPE])
+      type = nir_intrinsic_type(intrin);

   switch (intrin->intrinsic) {
 #define CASE(op) \
@@ -2323,6 +2332,8 @@ nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
   nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
   nir_intrinsic_set_access(intrin, access | var->data.access);
   nir_intrinsic_set_format(intrin, var->data.image.format);
+   if (nir_intrinsic_infos[intrin->intrinsic].index_map[NIR_INTRINSIC_TYPE])
+      nir_intrinsic_set_type(intrin, type);

   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
                         nir_src_for_ssa(src));

--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -545,6 +545,16 @@ typedef struct nir_variable {
            enum pipe_format format;
         } image;

+         struct {
+            /**
+             * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
+             */
+            unsigned is_inline_sampler : 1;
+            unsigned addressing_mode : 3;
+            unsigned normalized_coordinates : 1;
+            unsigned filter_mode : 1;
+         } sampler;
+
         struct {
            /**
             * Transform feedback buffer.
@@ -768,7 +778,7 @@ typedef struct nir_ssa_def {
   /** generic SSA definition index. */
   unsigned index;

-   /** Index into the live_in and live_out bitfields */
+   /** Ordered SSA definition index used by nir_liveness. */
   unsigned live_index;

   /** Instruction which produces this SSA value. */
@@ -2603,7 +2613,9 @@ typedef struct nir_block {
    */
   int16_t dom_pre_index, dom_post_index;

-   /* live in and out for this block; used for liveness analysis */
+   /* SSA def live in and out for this block; used for liveness analysis.
+    * Indexed by ssa_def->index
+    */
   BITSET_WORD *live_in;
   BITSET_WORD *live_out;
 } nir_block;
No results found