Skip to content

Commit ab553d6

Browse files
SwSw
Sw
authored and
Sw
committed
Merged master:f0de8d09409 into amd-gfx:200eabe9376
Local branch amd-gfx 200eabe Merged master:5aa5c943f7d into amd-gfx:81fd7fd17ec Remote branch master f0de8d0 [Arm] Do not lower vmax/vmin to Neon instructions
2 parents 200eabe + f0de8d0 commit ab553d6

29 files changed

+1686
-241
lines changed

clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,8 @@ class BugReporterContext {
726726
class NoteTag : public ProgramPointTag {
727727
public:
728728
using Callback =
729-
std::function<std::string(BugReporterContext &, BugReport &)>;
729+
std::function<std::string(BugReporterContext &,
730+
PathSensitiveBugReport &)>;
730731

731732
private:
732733
static int Kind;
@@ -743,7 +744,7 @@ class NoteTag : public ProgramPointTag {
743744
}
744745

745746
Optional<std::string> generateMessage(BugReporterContext &BRC,
746-
BugReport &R) const {
747+
PathSensitiveBugReport &R) const {
747748
std::string Msg = Cb(BRC, R);
748749
if (Msg.empty())
749750
return None;

clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,12 @@ class CheckerContext {
258258
/// @param IsPrunable Whether the note is prunable. It allows BugReporter
259259
/// to omit the note from the report if it would make the displayed
260260
/// bug path significantly shorter.
261-
const NoteTag *getNoteTag(std::function<std::string(BugReport &)> &&Cb,
262-
bool IsPrunable = false) {
261+
const NoteTag
262+
*getNoteTag(std::function<std::string(PathSensitiveBugReport &)> &&Cb,
263+
bool IsPrunable = false) {
263264
return getNoteTag(
264-
[Cb](BugReporterContext &, BugReport &BR) { return Cb(BR); },
265+
[Cb](BugReporterContext &,
266+
PathSensitiveBugReport &BR) { return Cb(BR); },
265267
IsPrunable);
266268
}
267269

@@ -274,7 +276,8 @@ class CheckerContext {
274276
/// bug path significantly shorter.
275277
const NoteTag *getNoteTag(std::function<std::string()> &&Cb,
276278
bool IsPrunable = false) {
277-
return getNoteTag([Cb](BugReporterContext &, BugReport &) { return Cb(); },
279+
return getNoteTag([Cb](BugReporterContext &,
280+
PathSensitiveBugReport &) { return Cb(); },
278281
IsPrunable);
279282
}
280283

@@ -286,7 +289,8 @@ class CheckerContext {
286289
/// bug path significantly shorter.
287290
const NoteTag *getNoteTag(StringRef Note, bool IsPrunable = false) {
288291
return getNoteTag(
289-
[Note](BugReporterContext &, BugReport &) { return std::string(Note); },
292+
[Note](BugReporterContext &,
293+
PathSensitiveBugReport &) { return std::string(Note); },
290294
IsPrunable);
291295
}
292296

clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void CXXSelfAssignmentChecker::checkBeginFunction(CheckerContext &C) const {
5353

5454
ProgramStateRef SelfAssignState = State->bindLoc(Param, ThisVal, LCtx);
5555
const NoteTag *SelfAssignTag =
56-
C.getNoteTag([MD](BugReport &BR) -> std::string {
56+
C.getNoteTag([MD](PathSensitiveBugReport &BR) -> std::string {
5757
SmallString<256> Msg;
5858
llvm::raw_svector_ostream Out(Msg);
5959
Out << "Assuming " << MD->getParamDecl(0)->getName() << " == *this";
@@ -63,7 +63,7 @@ void CXXSelfAssignmentChecker::checkBeginFunction(CheckerContext &C) const {
6363

6464
ProgramStateRef NonSelfAssignState = State->bindLoc(Param, ParamVal, LCtx);
6565
const NoteTag *NonSelfAssignTag =
66-
C.getNoteTag([MD](BugReport &BR) -> std::string {
66+
C.getNoteTag([MD](PathSensitiveBugReport &BR) -> std::string {
6767
SmallString<256> Msg;
6868
llvm::raw_svector_ostream Out(Msg);
6969
Out << "Assuming " << MD->getParamDecl(0)->getName() << " != *this";

clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@ void FuchsiaHandleChecker::checkPostCall(const CallEvent &Call,
382382
}
383383
const NoteTag *T = nullptr;
384384
if (!Notes.empty()) {
385-
T = C.getNoteTag(
386-
[this, Notes{std::move(Notes)}](BugReport &BR) -> std::string {
385+
T = C.getNoteTag([this, Notes{std::move(Notes)}](
386+
PathSensitiveBugReport &BR) -> std::string {
387387
if (&BR.getBugType() != &UseAfterReleaseBugType &&
388388
&BR.getBugType() != &LeakBugType &&
389389
&BR.getBugType() != &DoubleReleaseBugType)

clang/lib/StaticAnalyzer/Checkers/MIGChecker.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -210,15 +210,16 @@ void MIGChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const {
210210
if (!PVD || State->contains<RefCountedParameters>(PVD))
211211
return;
212212

213-
const NoteTag *T = C.getNoteTag([this, PVD](BugReport &BR) -> std::string {
214-
if (&BR.getBugType() != &BT)
215-
return "";
216-
SmallString<64> Str;
217-
llvm::raw_svector_ostream OS(Str);
218-
OS << "Value passed through parameter '" << PVD->getName()
219-
<< "\' is deallocated";
220-
return std::string(OS.str());
221-
});
213+
const NoteTag *T =
214+
C.getNoteTag([this, PVD](PathSensitiveBugReport &BR) -> std::string {
215+
if (&BR.getBugType() != &BT)
216+
return "";
217+
SmallString<64> Str;
218+
llvm::raw_svector_ostream OS(Str);
219+
OS << "Value passed through parameter '" << PVD->getName()
220+
<< "\' is deallocated";
221+
return std::string(OS.str());
222+
});
222223
C.addTransition(State->set<ReleasedParameter>(true), T);
223224
}
224225

clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void ReturnValueChecker::checkPostCall(const CallEvent &Call,
9999

100100
std::string Name = getName(Call);
101101
const NoteTag *CallTag = C.getNoteTag(
102-
[Name, ExpectedValue](BugReport &) -> std::string {
102+
[Name, ExpectedValue](PathSensitiveBugReport &) -> std::string {
103103
SmallString<128> Msg;
104104
llvm::raw_svector_ostream Out(Msg);
105105

clang/lib/StaticAnalyzer/Core/CoreEngine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) {
221221
if (L.getSrc()->getTerminator().isVirtualBaseBranch() &&
222222
L.getDst() == *L.getSrc()->succ_begin()) {
223223
ProgramPoint P = L.withTag(getNoteTags().makeNoteTag(
224-
[](BugReporterContext &, BugReport &) -> std::string {
224+
[](BugReporterContext &, PathSensitiveBugReport &) -> std::string {
225225
// TODO: Just call out the name of the most derived class
226226
// when we know it.
227227
return "Virtual base initialization skipped because "

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,12 +1420,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
14201420
}
14211421

14221422
if (Subtarget->hasNEON()) {
1423-
// vmin and vmax aren't available in a scalar form, so we use
1424-
// a NEON instruction with an undef lane instead.
1425-
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1426-
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1427-
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1428-
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1423+
// vmin and vmax aren't available in a scalar form, so we can use
1424+
// a NEON instruction with an undef lane instead. This has a performance
1425+
// penalty on some cores, so we don't do this unless we have been
1426+
// asked to by the core tuning model.
1427+
if (Subtarget->useNEONForSinglePrecisionFP()) {
1428+
setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
1429+
setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
1430+
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
1431+
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
1432+
}
14291433
setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal);
14301434
setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal);
14311435
setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -508,20 +508,31 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
508508
return true;
509509
}
510510

511+
static bool isVectorPredicated(MachineInstr *MI) {
512+
int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
513+
return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
514+
}
515+
516+
static bool isRegInClass(const MachineOperand &MO,
517+
const TargetRegisterClass *Class) {
518+
return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());
519+
}
520+
511521
bool LowOverheadLoop::ValidateLiveOuts() const {
512522
// Collect Q-regs that are live in the exit blocks. We don't collect scalars
513523
// because they won't be affected by lane predication.
514524
const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
515525
SmallSet<Register, 2> LiveOuts;
516-
SmallVector<MachineBasicBlock*, 2> ExitBlocks;
526+
SmallVector<MachineBasicBlock *, 2> ExitBlocks;
517527
ML.getExitBlocks(ExitBlocks);
518528
for (auto *MBB : ExitBlocks)
519529
for (const MachineBasicBlock::RegisterMaskPair &RegMask : MBB->liveins())
520530
if (QPRs->contains(RegMask.PhysReg))
521531
LiveOuts.insert(RegMask.PhysReg);
522532

523533
// Collect the instructions in the loop body that define the live-out values.
524-
SmallPtrSet<MachineInstr*, 2> LiveMIs;
534+
SmallPtrSet<MachineInstr *, 2> LiveMIs;
535+
assert(ML.getNumBlocks() == 1 && "Expected single block loop!");
525536
MachineBasicBlock *MBB = ML.getHeader();
526537
for (auto Reg : LiveOuts)
527538
if (auto *MI = RDA.getLocalLiveOutMIDef(MBB, Reg))
@@ -534,12 +545,98 @@ bool LowOverheadLoop::ValidateLiveOuts() const {
534545
// equivalent when we perform the predication transformation; so we know that
535546
// any VPT predicated instruction is predicated upon VCTP. Any live-out
536547
// instruction needs to be predicated, so check this here.
537-
for (auto *MI : LiveMIs) {
538-
int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
539-
if (PIdx == -1 || MI->getOperand(PIdx+1).getReg() != ARM::VPR)
548+
for (auto *MI : LiveMIs)
549+
if (!isVectorPredicated(MI))
540550
return false;
551+
552+
// We want to find out if the tail-predicated version of this loop will
553+
// produce the same values as the loop in its original form. For this to
554+
// be true, the newly inserted implicit predication must not change the
555+
// the (observable) results.
556+
// We're doing this because many instructions in the loop will not be
557+
// predicated and so the conversion from VPT predication to tail-predication
558+
// can result in different values being produced; due to the tail-predication
559+
// preventing many instructions from updating their falsely predicated
560+
// lanes. This analysis assumes that all the instructions perform lane-wise
561+
// operations and don't perform any exchanges.
562+
// A masked load, whether through VPT or tail predication, will write zeros
563+
// to any of the falsely predicated bytes. So, from the loads, we know that
564+
// the false lanes are zeroed and here we're trying to track that those false
565+
// lanes remain zero, or where they change, the differences are masked away
566+
// by their user(s).
567+
// All MVE loads and stores have to be predicated, so we know that any load
568+
// operands, or stored results are equivalent already. Other explicitly
569+
// predicated instructions will perform the same operation in the original
570+
// loop and the tail-predicated form too. Because of this, we can insert
571+
// loads, stores and other predicated instructions into our KnownFalseZeros
572+
// set and build from there.
573+
SetVector<MachineInstr *> UnknownFalseLanes;
574+
SmallPtrSet<MachineInstr *, 4> KnownFalseZeros;
575+
for (auto &MI : *MBB) {
576+
const MCInstrDesc &MCID = MI.getDesc();
577+
uint64_t Flags = MCID.TSFlags;
578+
if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
579+
continue;
580+
581+
if (isVectorPredicated(&MI)) {
582+
KnownFalseZeros.insert(&MI);
583+
continue;
584+
}
585+
586+
if (MI.getNumDefs() == 0)
587+
continue;
588+
589+
// Only evaluate instructions which produce a single value.
590+
assert((MI.getNumDefs() == 1 && MI.defs().begin()->isReg()) &&
591+
"Expected no more than one register def");
592+
593+
Register DefReg = MI.defs().begin()->getReg();
594+
for (auto &MO : MI.operands()) {
595+
if (!isRegInClass(MO, QPRs) || !MO.isUse() || MO.getReg() != DefReg)
596+
continue;
597+
598+
// If this instruction overwrites one of its operands, and that register
599+
// has known lanes, then this instruction also has known predicated false
600+
// lanes.
601+
if (auto *OpDef = RDA.getMIOperand(&MI, MO)) {
602+
if (KnownFalseZeros.count(OpDef)) {
603+
KnownFalseZeros.insert(&MI);
604+
break;
605+
}
606+
}
607+
}
608+
if (!KnownFalseZeros.count(&MI))
609+
UnknownFalseLanes.insert(&MI);
541610
}
542611

612+
auto HasKnownUsers = [this](MachineInstr *MI, const MachineOperand &MO,
613+
SmallPtrSetImpl<MachineInstr *> &Knowns) {
614+
SmallPtrSet<MachineInstr *, 2> Uses;
615+
RDA.getGlobalUses(MI, MO.getReg(), Uses);
616+
for (auto *Use : Uses) {
617+
if (Use != MI && !Knowns.count(Use))
618+
return false;
619+
}
620+
return true;
621+
};
622+
623+
// Now for all the unknown values, see if they're only consumed by known
624+
// instructions. Visit in reverse so that we can start at the values being
625+
// stored and then we can work towards the leaves, hopefully adding more
626+
// instructions to KnownFalseZeros.
627+
for (auto *MI : reverse(UnknownFalseLanes)) {
628+
for (auto &MO : MI->operands()) {
629+
if (!isRegInClass(MO, QPRs) || !MO.isDef())
630+
continue;
631+
if (!HasKnownUsers(MI, MO, KnownFalseZeros)) {
632+
LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
633+
<< TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
634+
return false;
635+
}
636+
}
637+
// Any unknown false lanes have been masked away by the user(s).
638+
KnownFalseZeros.insert(MI);
639+
}
543640
return true;
544641
}
545642

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33868,24 +33868,46 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3386833868
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
3386933869

3387033870
// Handle 128-bit lane shuffles of 256-bit vectors.
33871-
// If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
33872-
// we need to use the zeroing feature.
33873-
// TODO - this should support binary shuffles.
33874-
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
33875-
!(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
33876-
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
33871+
if (RootVT.is256BitVector() && NumBaseMaskElts == 2) {
3387733872
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
3387833873
return SDValue(); // Nothing to do!
3387933874
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
33880-
unsigned PermMask = 0;
33881-
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
33882-
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
33883-
33884-
Res = DAG.getBitcast(ShuffleVT, V1);
33885-
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
33886-
DAG.getUNDEF(ShuffleVT),
33887-
DAG.getTargetConstant(PermMask, DL, MVT::i8));
33888-
return DAG.getBitcast(RootVT, Res);
33875+
33876+
// If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
33877+
// we need to use the zeroing feature.
33878+
if (UnaryShuffle &&
33879+
!(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
33880+
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
33881+
unsigned PermMask = 0;
33882+
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
33883+
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
33884+
33885+
Res = DAG.getBitcast(ShuffleVT, V1);
33886+
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
33887+
DAG.getUNDEF(ShuffleVT),
33888+
DAG.getTargetConstant(PermMask, DL, MVT::i8));
33889+
return DAG.getBitcast(RootVT, Res);
33890+
}
33891+
33892+
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
33893+
if (!UnaryShuffle && !IsEVEXShuffle) {
33894+
assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
33895+
"Unexpected shuffle sentinel value");
33896+
// Prefer blends to X86ISD::VPERM2X128.
33897+
if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
33898+
(BaseMask[0] == 2 && BaseMask[1] == 1))) {
33899+
unsigned PermMask = 0;
33900+
PermMask |= ((BaseMask[0] & 3) << 0);
33901+
PermMask |= ((BaseMask[1] & 3) << 4);
33902+
33903+
Res = DAG.getNode(
33904+
X86ISD::VPERM2X128, DL, ShuffleVT,
33905+
DAG.getBitcast(ShuffleVT, isInRange(BaseMask[0], 0, 2) ? V1 : V2),
33906+
DAG.getBitcast(ShuffleVT, isInRange(BaseMask[1], 0, 2) ? V1 : V2),
33907+
DAG.getTargetConstant(PermMask, DL, MVT::i8));
33908+
return DAG.getBitcast(RootVT, Res);
33909+
}
33910+
}
3388933911
}
3389033912

3389133913
// For masks that have been widened to 128-bit elements or more,

llvm/test/CodeGen/ARM/fp16-fullfp16.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,9 @@ define void @test_minimum(half* %p) {
446446
; CHECK-LABEL: test_minimum:
447447
; CHECK: vldr.16 s2, [r0]
448448
; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
449-
; CHECK-NEXT: vmin.f16 d0, d1, d0
449+
; CHECK-NEXT: vcmp.f16 s2, s0
450+
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
451+
; CHECK-NEXT: vselge.f16 s0, s0, s2
450452
; CHECK-NEXT: vstr.16 s0, [r0]
451453
; CHECK-NEXT: bx lr
452454
%a = load half, half* %p, align 2
@@ -460,7 +462,9 @@ define void @test_maximum(half* %p) {
460462
; CHECK-LABEL: test_maximum:
461463
; CHECK: vldr.16 s2, [r0]
462464
; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
463-
; CHECK-NEXT: vmax.f16 d0, d1, d0
465+
; CHECK-NEXT: vcmp.f16 s0, s2
466+
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
467+
; CHECK-NEXT: vselge.f16 s0, s0, s2
464468
; CHECK-NEXT: vstr.16 s0, [r0]
465469
; CHECK-NEXT: bx lr
466470
%a = load half, half* %p, align 2

llvm/test/CodeGen/ARM/fp16-promote.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,9 @@ define void @test_maxnum(half* %p, half* %q) #0 {
665665
; CHECK-LIBCALL: bl __aeabi_h2f
666666
; CHECK-LIBCALL-VFP: vmov.f32 s{{[0-9]+}}, #1.000000e+00
667667
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
668-
; CHECK-VFP: vmin.f32
668+
; CHECK-VFP: vcmp.f32
669+
; CHECK-VFP: vmrs
670+
; CHECK-VFP: vmovlt.f32
669671
; CHECK-NOVFP: bl __aeabi_fcmpge
670672
; CHECK-FP16: vcvtb.f16.f32
671673
; CHECK-LIBCALL: bl __aeabi_f2h
@@ -683,7 +685,9 @@ define void @test_minimum(half* %p) #0 {
683685
; CHECK-LIBCALL: bl __aeabi_h2f
684686
; CHECK-LIBCALL-VFP: vmov.f32 s0, #1.000000e+00
685687
; CHECK-NOVFP: mov r{{[0-9]+}}, #1065353216
686-
; CHECK-VFP: vmax.f32
688+
; CHECK-VFP: vcmp.f32
689+
; CHECK-VFP: vmrs
690+
; CHECK-VFP: vmovhi.f32
687691
; CHECK-NOVFP: bl __aeabi_fcmple
688692
; CHECK-FP16: vcvtb.f16.f32
689693
; CHECK-LIBCALL: bl __aeabi_f2h

0 commit comments

Comments
 (0)