Compare revisions

Timothy Arceri · Timothy Arceri · Vinson Lee · Jonathan Marek · Benjamin Cheng · Eric Engestrom
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -558,6 +558,7 @@ meson-classic:
    EXTRA_OPTION: >
      -D osmesa=classic
      -D tools=all
+      -D werror=true

 .meson-cross:
  extends:

--- a/.gitlab-ci/deqp-freedreno-a630-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt
@@ -4,7 +4,6 @@ dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_neg_z_and_p
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_clear
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
 dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
-dEQP-VK.compute.basic.shared_var_single_group
 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
 dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump
 dEQP-VK.glsl.linkage.varying.struct.mat3x2

--- a/.gitlab-ci/deqp-virgl-gl-fails.txt
+++ b/.gitlab-ci/deqp-virgl-gl-fails.txt
@@ -640,6 +640,7 @@ dEQP-GLES31.functional.draw_indirect.compute_interop.separate.drawelements_compu
 dEQP-GLES31.functional.draw_indirect.compute_interop.separate.drawelements_compute_data_and_indices
 dEQP-GLES31.functional.draw_indirect.compute_interop.separate.drawelements_compute_indices
 dEQP-GLES31.functional.draw_indirect.random.20
+dEQP-GLES31.functional.fbo.no_attachments.maximums.all
 dEQP-GLES31.functional.image_load_store.2d_array.atomic.add_r32i_result
 dEQP-GLES31.functional.image_load_store.2d_array.atomic.add_r32i_return_value
 dEQP-GLES31.functional.image_load_store.2d_array.atomic.add_r32ui_result
@@ -4784,7 +4785,6 @@ KHR-GL30.transform_feedback.draw_xfb_stream_instanced_test
 KHR-GL30.transform_feedback.get_xfb_varying
 KHR-GL30.transform_feedback.query_vertex_interleaved_test
 KHR-GL30.transform_feedback.query_vertex_separate_test
-KHR-GL31.CommonBugs.CommonBug_ParenthesisInLayoutQualifierIntegerValue
 KHR-GL31.transform_feedback.capture_vertex_interleaved_test
 KHR-GL31.transform_feedback.capture_vertex_separate_test
 KHR-GL31.transform_feedback.discard_vertex_test
@@ -4793,7 +4793,6 @@ KHR-GL31.transform_feedback.draw_xfb_stream_test
 KHR-GL31.transform_feedback.draw_xfb_stream_instanced_test
 KHR-GL31.transform_feedback.query_vertex_interleaved_test
 KHR-GL31.transform_feedback.query_vertex_separate_test
-KHR-GL32.CommonBugs.CommonBug_ParenthesisInLayoutQualifierIntegerValue
 KHR-GL32.transform_feedback.capture_vertex_interleaved_test
 KHR-GL32.transform_feedback.capture_vertex_separate_test
 KHR-GL32.transform_feedback.discard_vertex_test

--- a/.gitlab-ci/piglit/glslparser.txt
+++ b/.gitlab-ci/piglit/glslparser.txt
@@ -208,7 +208,6 @@ spec/amd_vertex_shader_viewport_index/preprocessor/disabled-undefined-core.tese:
 spec/amd_vertex_shader_viewport_index/preprocessor/disabled-undefined-core.vert: skip
 spec/amd_vertex_shader_viewport_index/preprocessor/enabled-compat.frag: skip
 spec/amd_vertex_shader_viewport_index/preprocessor/enabled-compat.vert: skip
-spec/arb_arrays_of_arrays/compiler/glsl-4.20-basic-types.frag: skip
 spec/arb_arrays_of_arrays/preprocessor/disabled-defined-compat.frag: skip
 spec/arb_arrays_of_arrays/preprocessor/disabled-defined-compat.vert: skip
 spec/arb_arrays_of_arrays/preprocessor/disabled-defined-core.comp: skip
@@ -287,14 +286,6 @@ spec/arb_bindless_texture/compiler/samplers/return-struct.frag: skip
 spec/arb_bindless_texture/compiler/samplers/struct-inside-uniform-block.frag: skip
 spec/arb_bindless_texture/compiler/samplers/temporary.vert: skip
 spec/arb_bindless_texture/compiler/samplers/uniform-block-initializer.frag: skip
-spec/arb_compute_shader/compiler/barrier.comp: skip
-spec/arb_compute_shader/compiler/groupmemorybarrier.comp: skip
-spec/arb_compute_shader/compiler/memorybarrieratomiccounter.comp: skip
-spec/arb_compute_shader/compiler/memorybarrierbuffer.comp: skip
-spec/arb_compute_shader/compiler/memorybarrierimage.comp: skip
-spec/arb_compute_shader/compiler/memorybarriershared.comp: skip
-spec/arb_compute_shader/compiler/shared-atomics.comp: skip
-spec/arb_compute_shader/compiler/shared-variables.comp: skip
 spec/arb_compute_shader/preprocessor/disabled-defined-compat.frag: skip
 spec/arb_compute_shader/preprocessor/disabled-defined-compat.vert: skip
 spec/arb_compute_shader/preprocessor/disabled-defined-core.comp: skip
@@ -686,14 +677,20 @@ spec/arb_shader_image_size/preprocessor/disabled-undefined-core.geom: skip
 spec/arb_shader_image_size/preprocessor/disabled-undefined-core.tesc: skip
 spec/arb_shader_image_size/preprocessor/disabled-undefined-core.tese: skip
 spec/arb_shader_image_size/preprocessor/disabled-undefined-core.vert: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.comp: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.frag: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.geom: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.tesc: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.tese: skip
+spec/arb_shader_precision/preprocessor/disabled-defined-core.vert: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.comp: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.frag: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.geom: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.tesc: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.tese: skip
+spec/arb_shader_precision/preprocessor/disabled-undefined-core.vert: skip
 spec/arb_shader_precision/preprocessor/enabled-compat.frag: skip
 spec/arb_shader_precision/preprocessor/enabled-compat.vert: skip
-spec/arb_shader_precision/preprocessor/enabled-core.comp: skip
-spec/arb_shader_precision/preprocessor/enabled-core.frag: skip
-spec/arb_shader_precision/preprocessor/enabled-core.geom: skip
-spec/arb_shader_precision/preprocessor/enabled-core.tesc: skip
-spec/arb_shader_precision/preprocessor/enabled-core.tese: skip
-spec/arb_shader_precision/preprocessor/enabled-core.vert: skip
 spec/arb_shader_stencil_export/preprocessor/disabled-defined-compat.frag: skip
 spec/arb_shader_stencil_export/preprocessor/disabled-defined-compat.vert: skip
 spec/arb_shader_stencil_export/preprocessor/disabled-defined-core.comp: skip
@@ -2667,10 +2664,10 @@ spec/oes_texture_storage_multisample_2d_array/preprocessor/disabled-undefined-es
 summary:
       name:  results
       ----  --------
-       pass:    12166
+       pass:    12169
       fail:        2
      crash:        4
-       skip:     2660
+       skip:     2657
    timeout:        0
       warn:        0
 incomplete:        0

--- a/.gitlab-ci/piglit/quick_gl.txt
+++ b/.gitlab-ci/piglit/quick_gl.txt
@@ -391,9 +391,6 @@ spec/!opengl 3.1/draw-buffers-errors: skip
 spec/!opengl 3.2/gl-3.2-adj-prims pv-first: fail
 spec/!opengl 3.2/layered-rendering/clear-color-mismatched-layer-count: fail
 spec/!opengl 4.2/gl-max-vertex-attrib-stride: skip
-spec/!opengl 4.2/required-renderbuffer-attachment-formats: skip
-spec/!opengl 4.2/required-sized-texture-formats: skip
-spec/!opengl 4.2/required-texture-attachment-formats: skip
 spec/!opengl 4.3/get_glsl_version: skip
 spec/!opengl 4.5/compare-framebuffer-parameter-with-get: skip
 spec/!opengl 4.5/named-framebuffer-draw-buffers-errors: skip
@@ -619,8 +616,10 @@ spec/arb_shader_image_load_store/invalid/imageload/address bounds test/image2d/r
 spec/arb_shader_image_load_store/invalid/imageload/address bounds test/image2d/rg8_snorm: fail
 spec/arb_shader_image_load_store/invalid/imageload/address bounds test/image2d/rg8i: fail
 spec/arb_shader_image_load_store/invalid/imageload/address bounds test/image2d/rg8ui: fail
-spec/arb_shader_image_load_store/max-size/imagecube max size test/8192x8192x6x1: skip
-spec/arb_shader_image_load_store/max-size/imagecubearray max size test/8192x8192x6x1: skip
+spec/arb_shader_image_load_store/max-size/image2dmsarray max size test/4x16384x8x8: skip
+spec/arb_shader_image_load_store/max-size/image2dmsarray max size test/4x8x16384x8: skip
+spec/arb_shader_image_load_store/max-size/imagecube max size test/16384x16384x6x1: skip
+spec/arb_shader_image_load_store/max-size/imagecubearray max size test/16384x16384x6x1: skip
 spec/arb_shader_texture_image_samples/builtin-image/r8/compute/image2dms samples test/2x8x96x1: skip
 spec/arb_shader_texture_image_samples/builtin-image/r8/compute/image2dmsarray samples test/2x8x16x6: skip
 spec/arb_shader_texture_image_samples/builtin-image/r8/fragment/image2dms samples test/2x8x96x1: skip
@@ -1686,10 +1685,10 @@ wgl/wgl-sanity: skip
 summary:
       name:  results
       ----  --------
-       pass:    21839
+       pass:    21840
       fail:      215
      crash:        0
-       skip:     1446
+       skip:     1445
    timeout:        0
       warn:        6
 incomplete:        0

--- a/.gitlab-ci/piglit/quick_shader.txt
+++ b/.gitlab-ci/piglit/quick_shader.txt
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -140,29 +140,29 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virg
  GL_ARB_transform_feedback3                            DONE (i965/gen7+, softpipe, swr)


-GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, virgl
+GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl

-  GL_ARB_ES2_compatibility                              DONE (freedreno, i965, nv50, llvmpipe, softpipe, swr, v3d, zink)
+  GL_ARB_ES2_compatibility                              DONE (freedreno, i965, nv50, softpipe, swr, v3d, zink)
  GL_ARB_get_program_binary                             DONE (0 or 1 binary formats)
  GL_ARB_separate_shader_objects                        DONE (all drivers)
  GL_ARB_shader_precision                               DONE (i965/gen7+, all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                            DONE (i965/gen7+, llvmpipe, softpipe, swr)
-  GL_ARB_viewport_array                                 DONE (i965, nv50, llvmpipe, softpipe, swr, zink)
+  GL_ARB_vertex_attrib_64bit                            DONE (i965/gen7+, softpipe, swr)
+  GL_ARB_viewport_array                                 DONE (i965, nv50, softpipe, swr, zink)


-GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, r600, radeonsi, virgl
+GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl

-  GL_ARB_texture_compression_bptc                       DONE (freedreno, i965, llvmpipe, softpipe, swr, zink)
+  GL_ARB_texture_compression_bptc                       DONE (freedreno, i965, softpipe, swr, zink)
  GL_ARB_compressed_texture_pixel_storage               DONE (all drivers)
-  GL_ARB_shader_atomic_counters                         DONE (freedreno/a5xx+, i965, llvmpipe, softpipe, v3d)
+  GL_ARB_shader_atomic_counters                         DONE (freedreno/a5xx+, i965, softpipe, v3d)
  GL_ARB_texture_storage                                DONE (all drivers)
-  GL_ARB_transform_feedback_instanced                   DONE (freedreno, i965, nv50, llvmpipe, softpipe, swr, v3d)
-  GL_ARB_base_instance                                  DONE (freedreno, i965, nv50, llvmpipe, softpipe, swr, v3d)
-  GL_ARB_shader_image_load_store                        DONE (freedreno/a5xx+, i965, llvmpipe, softpipe, v3d)
+  GL_ARB_transform_feedback_instanced                   DONE (freedreno, i965, nv50, softpipe, swr, v3d)
+  GL_ARB_base_instance                                  DONE (freedreno, i965, nv50, softpipe, swr, v3d)
+  GL_ARB_shader_image_load_store                        DONE (freedreno/a5xx+, i965, softpipe, v3d)
  GL_ARB_conservative_depth                             DONE (all drivers that support GLSL 1.30)
  GL_ARB_shading_language_420pack                       DONE (all drivers that support GLSL 1.30)
  GL_ARB_shading_language_packing                       DONE (all drivers)
-  GL_ARB_internalformat_query                           DONE (freedreno, i965, nv50, llvmpipe, softpipe, swr, v3d, zink)
+  GL_ARB_internalformat_query                           DONE (freedreno, i965, nv50, softpipe, swr, v3d, zink)
  GL_ARB_map_buffer_alignment                           DONE (all drivers)



--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
 GL_ARB_compute_variable_group_size on Iris.
 GL_ARB_gpu_shader5 on llvmpipe
-GL 4.0 on llvmpipe
+GL 4.2 on llvmpipe
 GL_EXT_shader_group_vote on GLES3.
 VK_AMD_texture_gather_bias_lod on RADV.
 VK_AMD_gpu_shader_half_float on RADV/ACO.

--- a/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h
+++ b/src/amd/addrlib/src/chip/gfx10/gfx10_gb_reg.h
@@ -45,7 +45,7 @@
 #define BIGENDIAN_CPU
 #endif

-union GB_ADDR_CONFIG
+union GB_ADDR_CONFIG_gfx10
 {
    struct
    {

--- a/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
+++ b/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
@@ -755,7 +755,7 @@ BOOL_32 Gfx10Lib::HwlInitGlobalParams(
    const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
 {
    BOOL_32        valid = TRUE;
-    GB_ADDR_CONFIG gbAddrConfig;
+    GB_ADDR_CONFIG_gfx10 gbAddrConfig;

    gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;


--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -9979,15 +9979,15 @@ static bool export_fs_mrt_z(isel_context *ctx)
   return true;
 }

-static bool export_fs_mrt_color(isel_context *ctx, int slot,
-                                unsigned write_mask, Temp *outputs)
+static bool export_fs_mrt_color(isel_context *ctx, int slot)
 {
   Builder bld(ctx->program, ctx->block);
+   unsigned write_mask = ctx->outputs.mask[slot];
   Operand values[4];

   for (unsigned i = 0; i < 4; ++i) {
      if (write_mask & (1 << i)) {
-         values[i] = Operand(outputs[i]);
+         values[i] = Operand(ctx->outputs.temps[slot * 4u + i]);
      } else {
         values[i] = Operand(v1);
      }
@@ -9997,6 +9997,7 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot,
   unsigned enabled_channels = 0;
   aco_opcode compr_op = (aco_opcode)0;

+   slot -= FRAG_RESULT_DATA0;
   target = V_008DFC_SQ_EXP_MRT + slot;
   col_format = (ctx->options->key.fs.col_format >> (4 * slot)) & 0xf;

@@ -10181,7 +10182,6 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot,

 static void create_fs_exports(isel_context *ctx)
 {
-   unsigned compacted_mrt_index = 0;
   bool exported = false;

   /* Export depth, stencil and sample mask. */
@@ -10191,15 +10191,9 @@ static void create_fs_exports(isel_context *ctx)
      exported |= export_fs_mrt_z(ctx);

   /* Export all color render targets. */
-   for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) {
+   for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
      if (ctx->outputs.mask[i])
-         if (export_fs_mrt_color(ctx, compacted_mrt_index,
-                                 ctx->outputs.mask[i],
-                                 &ctx->outputs.temps[i * 4u])) {
-            compacted_mrt_index++;
-            exported = true;
-         }
-   }
+         exported |= export_fs_mrt_color(ctx, i);

   if (!exported)
      create_null_export(ctx);

--- a/src/amd/vulkan/radv_android.c
+++ b/src/amd/vulkan/radv_android.c
@@ -398,6 +398,7 @@ radv_AcquireImageANDROID(
 		                                                 .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
 		                                                 .fd = semaphore_fd,
 		                                                 .semaphore = semaphore,
+		                                                 .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
 		                                            });
 	}

@@ -409,6 +410,7 @@ radv_AcquireImageANDROID(
 		                                         .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
 		                                         .fd = fence_fd,
 		                                         .fence = fence,
+		                                         .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
 		                                     });
 	}


--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1360,6 +1360,13 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
 		cb_color_info &= C_028C70_DCC_ENABLE;
 	}

+	if (!radv_layout_can_fast_clear(image, layout, in_render_loop,
+	                                radv_image_queue_family_mask(image,
+	                                                             cmd_buffer->queue_family_index,
+	                                                             cmd_buffer->queue_family_index))) {
+		cb_color_info &= C_028C70_COMPRESSION;
+	}
+
 	if (radv_image_is_tc_compat_cmask(image) &&
 	    (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
 	     radv_is_dcc_decompress_pipeline(cmd_buffer))) {
@@ -1369,6 +1376,19 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
 		cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
 	}

+	if (radv_image_has_fmask(image) &&
+	    (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+	     radv_is_hw_resolve_pipeline(cmd_buffer))) {
+		/* Make sure FMASK is enabled if it has been cleared because:
+		 *
+		 * 1) it's required for FMASK_DECOMPRESS operations to avoid
+		 * GPU hangs
+		 * 2) it's necessary for CB_RESOLVE which can read compressed
+		 * FMASK data anyways.
+		 */
+		cb_color_info |= S_028C70_COMPRESSION(1);
+	}
+
 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
 			radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
 			radeon_emit(cmd_buffer->cs, cb->cb_color_base);
@@ -2151,8 +2171,11 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
 		bool gfx10_perfect = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;

 		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+			/* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
+			 * covered tiles, discards, and early depth testing. For more details,
+			 * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
 			db_count_control =
-				S_028004_PERFECT_ZPASS_COUNTS(has_perfect_queries) |
+				S_028004_PERFECT_ZPASS_COUNTS(1) |
 				S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
 				S_028004_SAMPLE_RATE(sample_rate) |
 				S_028004_ZPASS_ENABLE(1) |

--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -250,6 +250,25 @@ radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
 	       meta_state->fast_clear_flush.dcc_decompress_pipeline;
 }

+/**
+ * Return whether the bound pipeline is the hardware resolve path.
+ */
+static inline bool
+radv_is_hw_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+
+	for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
+		VkFormat format = radv_fs_key_format_exemplars[i];
+		unsigned fs_key = radv_format_meta_fs_key(format);
+
+		if (radv_pipeline_to_handle(pipeline) == meta_state->resolve.pipeline[fs_key])
+			return true;
+	}
+	return false;
+}
+
 /* common nir builder helpers */
 #include "nir/nir_builder.h"


--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3586,7 +3586,8 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
 			values[j] = ac_to_float(&ctx->ac,
 						radv_load_output(ctx, i, j));

-		bool ret = si_export_mrt_color(ctx, values, index,
+		bool ret = si_export_mrt_color(ctx, values,
+					       i - FRAG_RESULT_DATA0,
 					       &color_args[index]);
 		if (ret)
 			index++;

--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -511,10 +511,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	unsigned exp_fmt[MAX_RTS] = {0};
-	unsigned is_int8[MAX_RTS] = {0}, is_int10[MAX_RTS] = {0};
-	unsigned col_format = 0;
-	unsigned col_format_is_int8 = 0, col_format_is_int10 = 0;
+	unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
+	unsigned num_targets;

 	for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
 		unsigned cf;
@@ -531,45 +529,42 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 			                                  blend_enable,
 							  blend->need_src_alpha & (1 << i));

-			is_int8[i] = format_is_int8(attachment->format);
-			is_int10[i] = format_is_int10(attachment->format);
+			if (format_is_int8(attachment->format))
+				is_int8 |= 1 << i;
+			if (format_is_int10(attachment->format))
+				is_int10 |= 1 << i;
 		}

-		exp_fmt[i] = cf;
+		col_format |= cf << (4 * i);
 	}

-	if (!exp_fmt[0] && blend->need_src_alpha & (1 << 0)) {
+	if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
 		/* When a subpass doesn't have any color attachments, write the
 		 * alpha channel of MRT0 when alpha coverage is enabled because
 		 * the depth attachment needs it.
 		 */
-		exp_fmt[0] = V_028714_SPI_SHADER_32_AR;
+		col_format |= V_028714_SPI_SHADER_32_AR;
 	}

-	/* The output for dual source blending should have the same format as
-	 * the first output.
+	/* If the i-th target format is set, all previous target formats must
+	 * be non-zero to avoid hangs.
 	 */
-	if (blend->mrt0_is_dual_src) {
-		col_format |= (exp_fmt[0] << 4) | exp_fmt[0];
-		col_format_is_int8 |= (is_int8[0] << 1) | is_int8[0];
-		col_format_is_int10 |= (is_int10[0] << 1) | is_int10[0];
-	} else {
-		/* Remove holes in SPI_SHADER_COL_FORMAT. */
-		unsigned num_color_targets = 0;
-		for (unsigned i = 0; i < MAX_RTS; i++) {
-			if (!exp_fmt[i])
-				continue;
-
-			col_format |= exp_fmt[i] << (4 * num_color_targets);
-			col_format_is_int8 |= is_int8[i] << num_color_targets;
-			col_format_is_int10 |= is_int10[i] << num_color_targets;
-			num_color_targets++;
+	num_targets = (util_last_bit(col_format) + 3) / 4;
+	for (unsigned i = 0; i < num_targets; i++) {
+		if (!(col_format & (0xf << (i * 4)))) {
+			col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
 		}
 	}

+	/* The output for dual source blending should have the same format as
+	 * the first output.
+	 */
+	if (blend->mrt0_is_dual_src)
+		col_format |= (col_format & 0xf) << 4;
+
 	blend->spi_shader_col_format = col_format;
-	blend->col_format_is_int8 = col_format_is_int8;
-	blend->col_format_is_int10 = col_format_is_int10;
+	blend->col_format_is_int8 = is_int8;
+	blend->col_format_is_int10 = is_int10;
 }

 /*

--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -846,6 +846,18 @@ radv_nir_shader_info_pass(const struct nir_shader *nir,
 	info->float_controls_mode = nir->info.float_controls_execution_mode;

 	if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+		/* If the i-th output is used, all previous outputs must be
+		 * non-zero to match the target format.
+		 * TODO: compact MRT to avoid holes and to remove this
+		 * workaround.
+		 */
+		unsigned num_targets = (util_last_bit(info->ps.cb_shader_mask) + 3) / 4;
+		for (unsigned i = 0; i < num_targets; i++) {
+			if (!(info->ps.cb_shader_mask & (0xf << (i * 4)))) {
+				info->ps.cb_shader_mask |= 0xf << (i * 4);
+			}
+		}
+
 		if (key->fs.is_dual_src) {
 			info->ps.cb_shader_mask |= (info->ps.cb_shader_mask & 0xf) << 4;
 		}

--- a/src/broadcom/Makefile.sources
+++ b/src/broadcom/Makefile.sources
@@ -41,6 +41,7 @@ BROADCOM_FILES = \
 	compiler/v3d_compiler.h \
 	compiler/v3d_nir_lower_image_load_store.c \
 	compiler/v3d_nir_lower_io.c \
+	compiler/v3d_nir_lower_line_smooth.c \
 	compiler/v3d_nir_lower_scratch.c \
 	compiler/v3d_nir_lower_txf_ms.c \
 	qpu/qpu_disasm.c \

--- a/src/broadcom/compiler/meson.build
+++ b/src/broadcom/compiler/meson.build
@@ -37,6 +37,7 @@ libbroadcom_compiler_files = files(
  'v3d_compiler.h',
  'v3d_nir_lower_io.c',
  'v3d_nir_lower_image_load_store.c',
+  'v3d_nir_lower_line_smooth.c',
  'v3d_nir_lower_logic_ops.c',
  'v3d_nir_lower_scratch.c',
  'v3d_nir_lower_txf_ms.c',

--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2165,6 +2165,20 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                               vir_uniform(c, QUNIFORM_ALPHA_REF, 0));
                break;

+        case nir_intrinsic_load_line_coord:
+                ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, c->line_x));
+                break;
+
+        case nir_intrinsic_load_line_width:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_uniform(c, QUNIFORM_LINE_WIDTH, 0));
+                break;
+
+        case nir_intrinsic_load_aa_line_width:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_uniform(c, QUNIFORM_AA_LINE_WIDTH, 0));
+                break;
+
        case nir_intrinsic_load_sample_mask_in:
                ntq_store_dest(c, &instr->dest, 0, vir_MSF(c));
                break;
@@ -2720,7 +2734,10 @@ nir_to_vir(struct v3d_compile *c)
                        c->point_x = emit_fragment_varying(c, NULL, 0, 0);
                        c->point_y = emit_fragment_varying(c, NULL, 0, 0);
                        c->uses_implicit_point_line_varyings = true;
-                } else if (c->fs_key->is_lines && c->devinfo->ver < 40) {
+                } else if (c->fs_key->is_lines &&
+                           (c->devinfo->ver < 40 ||
+                            (c->s->info.system_values_read &
+                             BITFIELD64_BIT(SYSTEM_VALUE_LINE_COORD)))) {
                        c->line_x = emit_fragment_varying(c, NULL, 0, 0);
                        c->uses_implicit_point_line_varyings = true;
                }
No results found