Skip to content

Commit 11512e7

Browse files
author
Diogo N. Sampaio
committed
[ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine
Summary: PerformVMOVRRDCombine ommits adding a offset of 4 to the PointerInfo, when converting a f64 = load[M] to {i32, i32} = {load[M], load[M + 4]} Which would allow the machine scheduller to break dependencies with the second load. - pr42638 Reviewers: eli.friedman, dmgreen, ostannard Reviewed By: ostannard Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64870 llvm-svn: 366423
1 parent 83748cc commit 11512e7

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11748,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
1174811748

1174911749
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
1175011750
DAG.getConstant(4, DL, MVT::i32));
11751-
SDValue NewLD2 = DAG.getLoad(
11752-
MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
11753-
std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
11751+
11752+
SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
11753+
LD->getPointerInfo().getWithOffset(4),
11754+
std::min(4U, LD->getAlignment()),
11755+
LD->getMemOperand()->getFlags());
1175411756

1175511757
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
1175611758
if (DCI.DAG.getDataLayout().isBigEndian())
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; RUN: llc -stop-after=machine-scheduler -debug-only dagcombine,selectiondag -o - %s 2>&1 | FileCheck %s
2+
; REQUIRES: asserts
3+
; pr42638
4+
target triple = "armv8r-arm-none-eabi"
5+
%struct.__va_list = type { i8* }
6+
define double @foo(i32 %P0, ...) #0 {
7+
entry:
8+
%V1 = alloca [8 x i8], align 8
9+
%vl = alloca %struct.__va_list, align 4
10+
%0 = getelementptr inbounds [8 x i8], [8 x i8]* %V1, i32 0, i32 0
11+
call void asm sideeffect "", "r"(i8* nonnull %0)
12+
%1 = bitcast %struct.__va_list* %vl to i8*
13+
call void @llvm.va_start(i8* nonnull %1)
14+
%2 = bitcast %struct.__va_list* %vl to double**
15+
%argp.cur3 = load double*, double** %2, align 4
16+
%v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4
17+
ret double %v.sroa.0.0.copyload
18+
}
19+
20+
declare void @llvm.va_start(i8*)
21+
22+
attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64" }
23+
24+
; Ensures that the machine scheduler does not move accessing the upper
25+
; 32 bits of the double to before actually storing it to memory
26+
27+
; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4>
28+
; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)>
29+
; CHECK: INLINEASM
30+
; CHECK: (load 4 from [[MEM]] + 4)
31+
; CHECK-NOT: (store 4 into [[MEM]] + 4)
32+
33+

0 commit comments

Comments
 (0)