Skip to content

nir2dxil: Use native helper lane intrinsic on SM >= 6.6

Pedro J. Estébanez requested to merge RandomShaper/mesa:dxil_helper_lane into main

Original GLSL:

#version 450

layout(set = 0, binding = 0, std430) buffer restrict SomeData {
	uint data[];
}
some_data;

void main() {
	if (gl_HelperInvocation) {
		some_data.data[0] = uint(gl_HelperInvocation);
	}
}

Resulting DXIL output before this PR or after this PR on SM < 6.6:

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

%dx.types.Handle = type { i8* }
%struct.RWByteAddressBuffer = type { i32 }

define void @main() {
  ; Interesting part BEGIN
  %1 = shl i32 1, 0
  %2 = call i32 @dx.op.coverage.i32(i32 91)  ; Coverage()
  %3 = and i32 %2, %1
  ; Interesting part END
  %4 = icmp eq i32 %3, 0
  br i1 %4, label %5, label %8

; <label>:5                                       ; preds = %0
  %6 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
  %7 = and i32 0, -4
  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %6, i32 %7, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
  br label %9

; <label>:8                                       ; preds = %0
  br label %9

; <label>:9                                       ; preds = %8, %5
  ret void
}

; Function Attrs: nounwind readnone
declare i32 @dx.op.coverage.i32(i32) #0

; Function Attrs: nounwind readonly
declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1

; Function Attrs: nounwind
declare void @dx.op.bufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8) #2

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }

!llvm.ident = !{!0}
!dx.version = !{!1}
!dx.valver = !{!2}
!dx.shaderModel = !{!3}
!dx.resources = !{!4}
!dx.typeAnnotations = !{!7}
!dx.entryPoints = !{!11}

!0 = !{!"Mesa version 22.3.0-devel (git-b02e9ef35a)"}
!1 = !{i32 1, i32 2}
!2 = !{i32 1, i32 4}
!3 = !{!"ps", i32 6, i32 2}
!4 = !{null, !5, null, null}
!5 = !{!6}
!6 = !{i32 0, [1 x %struct.RWByteAddressBuffer]* undef, !"some_data", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
!7 = !{i32 1, void ()* @main, !8}
!8 = !{!9}
!9 = !{i32 0, !10, !10}
!10 = !{}
!11 = !{void ()* @main, !"main", null, !4, !12}
!12 = !{i32 0, i64 16}

Resulting DXIL after this PR and with SM >= 6.6:

target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"

%dx.types.Handle = type { i8* }
%struct.RWByteAddressBuffer = type { i32 }

define void @main() {
  ; Interesting part BEGIN
  %1 = call i1 @dx.op.isHelperLane.i32(i32 221)  ; IsHelperLane()
  ; Interesting part END
  br i1 %1, label %2, label %5

; <label>:2                                       ; preds = %0
  %3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
  %4 = and i32 0, -4
  call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %3, i32 %4, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i8 1)  ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
  br label %6

; <label>:5                                       ; preds = %0
  br label %6

; <label>:6                                       ; preds = %5, %2
  ret void
}

; Function Attrs: nounwind readnone
declare i1 @dx.op.isHelperLane.i32(i32) #0

; Function Attrs: nounwind readonly
declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1

; Function Attrs: nounwind
declare void @dx.op.bufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8) #2

; Function Attrs: nounwind readonly
declare i1 @dx.op.isHelperLane.i1(i32) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind }

!llvm.ident = !{!0}
!dx.version = !{!1}
!dx.valver = !{!1}
!dx.shaderModel = !{!2}
!dx.resources = !{!3}
!dx.typeAnnotations = !{!6}
!dx.entryPoints = !{!10}

!0 = !{!"Mesa version 22.3.0-devel (git-b02e9ef35a)"}
!1 = !{i32 1, i32 4}
!2 = !{!"ps", i32 6, i32 4}
!3 = !{null, !4, null, null}
!4 = !{!5}
!5 = !{i32 0, [1 x %struct.RWByteAddressBuffer]* undef, !"some_data", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
!6 = !{i32 1, void ()* @main, !7}
!7 = !{!8}
!8 = !{i32 0, !9, !9}
!9 = !{}
!10 = !{void ()* @main, !"main", null, !3, !11}
!11 = !{i32 0, i64 16}

A similar check is needed in microsoft/vulkan/ and maybe microsoft/clc/ as well, but it was not trivial how to let the shader model flow down to the relevant code spot, so I decided not do it myself.

CC @jenatali

Edited by Pedro J. Estébanez

Merge request reports