Skip to content

Commit 377c859

Browse files
committed
[DAGCombine] Fix multi-use miscompile in load combine (llvm#81586)
The load combine replaces a number of original loads with one new loads and also replaces the output chains of the original loads with the output chain of the new load. This is incorrect if the original load is retained (due to multi-use), as it may get incorrectly reordered. Fix this by using makeEquivalentMemoryOrdering() instead, which will create a TokenFactor with both chains. Fixes llvm#80911. (cherry picked from commit 25b9ed6)
1 parent 0d656f0 commit 377c859

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9253,7 +9253,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
92539253

92549254
// Transfer chain users from old loads to the new load.
92559255
for (LoadSDNode *L : Loads)
9256-
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
9256+
DAG.makeEquivalentMemoryOrdering(L, NewLoad);
92579257

92589258
if (!NeedsBswap)
92599259
return NewLoad;

llvm/test/CodeGen/X86/load-combine.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,3 +1282,35 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
12821282
%tmp8 = or i32 %tmp7, %tmp30
12831283
ret i32 %tmp8
12841284
}
1285+
1286+
define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
1287+
; CHECK-LABEL: pr80911_vector_load_multiuse:
1288+
; CHECK: # %bb.0:
1289+
; CHECK-NEXT: pushl %esi
1290+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1291+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
1292+
; CHECK-NEXT: movl (%edx), %esi
1293+
; CHECK-NEXT: movzwl (%edx), %eax
1294+
; CHECK-NEXT: movl $0, (%ecx)
1295+
; CHECK-NEXT: movl %esi, (%edx)
1296+
; CHECK-NEXT: popl %esi
1297+
; CHECK-NEXT: retl
1298+
;
1299+
; CHECK64-LABEL: pr80911_vector_load_multiuse:
1300+
; CHECK64: # %bb.0:
1301+
; CHECK64-NEXT: movl (%rdi), %ecx
1302+
; CHECK64-NEXT: movzwl (%rdi), %eax
1303+
; CHECK64-NEXT: movl $0, (%rsi)
1304+
; CHECK64-NEXT: movl %ecx, (%rdi)
1305+
; CHECK64-NEXT: retq
1306+
%load = load <4 x i8>, ptr %ptr, align 16
1307+
store i32 0, ptr %clobber
1308+
store <4 x i8> %load, ptr %ptr, align 16
1309+
%e1 = extractelement <4 x i8> %load, i64 1
1310+
%e1.ext = zext i8 %e1 to i32
1311+
%e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
1312+
%e0 = extractelement <4 x i8> %load, i64 0
1313+
%e0.ext = zext i8 %e0 to i32
1314+
%res = or i32 %e1.ext.shift, %e0.ext
1315+
ret i32 %res
1316+
}

0 commit comments

Comments
 (0)