ssbo-atomiccompswap-int tests fail with llvmpipe
When set LP_NUM_THREADS=1, the result is as follow:
Atomic counter 0 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649225
Test failure on line 76
Atomic counter 1 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 77
Atomic counter 2 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 78
Atomic counter 3 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 79
Atomic counter 4 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 80
Atomic counter 5 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 81
Atomic counter 6 test failed: Reference == Observed
Reference: 4294967295
Observed: 2290649224
Test failure on line 82
Atomic counter 7 test failed: Reference == Observed
Reference: 0
Observed: 167
Test failure on line 83
Some of the shader codes are as follows:
int f;
uint i;
int c;
/* This is an open-coded atomicAdd. */
do {
f = value;
i = uint(f) / 4u;
c = int(i / 32u);
/* Stop when values won't fit in the mask array. */
if (c >= mask.length()) {
color = vec4(0.0, 0.0, 1.0, 1.0);
return ;
}
} while (f != atomicCompSwap(value, f, f + 4));
uint bit = i % 32u;
uint m = 1u << bit;
I found the atomicCompSwap function to be fine, but value and f got it wrong.
Some of the LLVM IR are as follows:
bgnloop: ; preds = %loop_end39, %75
%126 = load <4 x i32>, <4 x i32>* %5, align 16
%maskcb = and <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %126
%maskfull = and <4 x i32> %125, %maskcb
store <4 x i32> zeroinitializer, <4 x i32>* %4, align 16
...
if-true-block: ; preds = %loop_begin16
%134 = extractelement <4 x i32> zeroinitializer, i32 %131
%"&context.num_ssbos_ptr[]" = getelementptr [16 x i32], [16 x i32]* %context.num_ssbos_ptr, i32 0, i32 %134
%"context.num_ssbos_ptr[]" = load i32, i32* %"&context.num_ssbos_ptr[]", align 4
%"&context.ssbos_ptr[]" = getelementptr [16 x i32*], [16 x i32*]* %context.ssbos_ptr, i32 0, i32 %134
%"context.ssbos_ptr[]" = load i32*, i32** %"&context.ssbos_ptr[]", align 8
%135 = ashr i32 %"context.num_ssbos_ptr[]", 2
%136 = add i32 %133, 0
%137 = icmp ult i32 %136, %135
%138 = sext i1 %137 to i32
%139 = and i32 -1, %138
%140 = icmp ne i32 %139, 0
br i1 %140, label %if-true-block19, label %if-false-block
if-true-block19: ; preds = %if-true-block
%141 = getelementptr i32, i32* %"context.ssbos_ptr[]", i32 %136
%"context.ssbos_ptr[][]" = load i32, i32* %141, align 4
%142 = load <4 x i32>, <4 x i32>* %4, align 16
%143 = insertelement <4 x i32> %142, i32 %"context.ssbos_ptr[][]", i32 %131
store <4 x i32> %143, <4 x i32>* %4, align 16
br label %endif-block18
if-false-block: ; preds = %if-true-block
%144 = load <4 x i32>, <4 x i32>* %4, align 16
%145 = insertelement <4 x i32> %144, i32 0, i32 %131
store <4 x i32> %145, <4 x i32>* %4, align 16
br label %endif-block18
endif-block18: ; preds = %if-false-block, %if-true-block19
br label %endif-block
endif-block: ; preds = %loop_begin16, %endif-block18
%146 = add i32 %131, 1
store i32 %146, i32* %loop_counter17, align 4
%147 = icmp uge i32 %146, 4
br i1 %147, label %loop_end, label %loop_begin16
loop_end: ; preds = %endif-block
%148 = load i32, i32* %loop_counter17, align 4
%149 = load <4 x i32>, <4 x i32>* %4, align 16
%150 = lshr <4 x i32> %149, <i32 2, i32 2, i32 2, i32 2>
%151 = lshr <4 x i32> %149, <i32 7, i32 7, i32 7, i32 7>
%152 = icmp sge <4 x i32> %151, <i32 7, i32 7, i32 7, i32 7>
%153 = sext <4 x i1> %152 to <4 x i32>
%154 = and <4 x i32> %125, %153
...
In loop_end should not be lshr directly with %4
, because %4
is initialized to 0 in bgnloop. LLVM IR seems to look wrong,
so I think it could be that there was something wrong with the nir to LLVM IR, but why?