Skip to content

Commit c53eb93

Browse files
authored
PeepholeOpt: Immediately check if a reg_sequence compose supports a subregister (#128279)
This is a quick fix for EXPENSIVE_CHECKS bot failures. I still think we could defer looking for a compatible subregister further up the use-def chain, and should be able to check compatibilty with the ultimate found source.
1 parent 01cc1d1 commit c53eb93

File tree

2 files changed

+45
-4
lines changed

2 files changed

+45
-4
lines changed

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,10 +1991,6 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
19911991
// If we did not find an exact match, see if we can do a composition to
19921992
// extract a sub-subregister.
19931993
for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
1994-
// We don't check if the resulting class supports the subregister index
1995-
// yet. This will occur before any rewrite when looking for an eligible
1996-
// source.
1997-
19981994
LaneBitmask DefMask = TRI->getSubRegIndexLaneMask(DefSubReg);
19991995
LaneBitmask ThisOpRegMask = TRI->getSubRegIndexLaneMask(RegSeqInput.SubIdx);
20001996

@@ -2012,6 +2008,17 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
20122008

20132009
unsigned ComposedDefInSrcReg1 =
20142010
TRI->composeSubRegIndices(RegSeqInput.SubReg, ReverseDefCompose);
2011+
2012+
// TODO: We should be able to defer checking if the result register class
2013+
// supports the index to continue looking for a rewritable source.
2014+
//
2015+
// TODO: Should we modify the register class to support the index?
2016+
const TargetRegisterClass *SrcRC = MRI.getRegClass(RegSeqInput.Reg);
2017+
const TargetRegisterClass *SrcWithSubRC =
2018+
TRI->getSubClassWithSubReg(SrcRC, ComposedDefInSrcReg1);
2019+
if (SrcRC != SrcWithSubRC)
2020+
return ValueTrackerResult();
2021+
20152022
return ValueTrackerResult(RegSeqInput.Reg, ComposedDefInSrcReg1);
20162023
}
20172024

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mattr=+mve.fp,+fp64 -verify-machineinstrs < %s | FileCheck %s
3+
4+
; Check that peephole-opt doesn't introduce an invalid subregister use
5+
6+
target triple = "thumbv8.1m.main-none-none-eabi"
7+
8+
define <4 x float> @reg_sequence_subreg_compose_failure(<4 x float> %a, <2 x float> %b) {
9+
; CHECK-LABEL: reg_sequence_subreg_compose_failure:
10+
; CHECK: @ %bb.0: @ %entry
11+
; CHECK-NEXT: vmov d0, r0, r1
12+
; CHECK-NEXT: mov r0, sp
13+
; CHECK-NEXT: vmov d1, r2, r3
14+
; CHECK-NEXT: vldrw.u32 q1, [r0]
15+
; CHECK-NEXT: vldr s0, .LCPI0_0
16+
; CHECK-NEXT: vmov.f32 s8, s1
17+
; CHECK-NEXT: vmov.f32 s9, s3
18+
; CHECK-NEXT: vmul.f32 q1, q2, q1
19+
; CHECK-NEXT: vmov.f32 s2, s0
20+
; CHECK-NEXT: vmov.f32 s1, s4
21+
; CHECK-NEXT: vmov.f32 s3, s5
22+
; CHECK-NEXT: vmov r0, r1, d0
23+
; CHECK-NEXT: vmov r2, r3, d1
24+
; CHECK-NEXT: bx lr
25+
; CHECK-NEXT: .p2align 2
26+
; CHECK-NEXT: @ %bb.1:
27+
; CHECK-NEXT: .LCPI0_0:
28+
; CHECK-NEXT: .long 0x00000000 @ float 0
29+
entry:
30+
%a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
31+
%mul = fmul <2 x float> %a.imag, %b
32+
%interleaved.vec = shufflevector <2 x float> zeroinitializer, <2 x float> %mul, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
33+
ret <4 x float> %interleaved.vec
34+
}

0 commit comments

Comments
 (0)