Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 8e070ff

Browse files
committed
[ARM] Enable mixed types in ARM CGP
Previously, during the search, all values had to have the same 'TypeSize', which is equal to number of bits of the integer type of the icmp operand. All values in the tree had to match this size; meaning that, if we searched from i16, we wouldn't accept i8s. A change in type size requires zext and truncs to perform the casts so, to allow mixed narrow types, the handling of these instructions is now slightly different: - we allow casts if their result or operand is <= TypeSize. - zexts are sinks if their result > TypeSize. - truncs are still sinks if their operand == TypeSize. - truncs are still sources if their result == TypeSize. The transformation bails on finding an icmp that operates on data smaller than the current TypeSize. Differential Revision: https://reviews.llvm.org/D54108 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346480 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0a1353a commit 8e070ff

File tree

4 files changed

+298
-70
lines changed

4 files changed

+298
-70
lines changed

lib/Target/ARM/ARMCodeGenPrepare.cpp

+73-61
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class IRPromoter {
126126
SmallPtrSetImpl<Instruction*> &SafeToPromote);
127127
void TruncateSinks(SmallPtrSetImpl<Value*> &Sources,
128128
SmallPtrSetImpl<Instruction*> &Sinks);
129-
void Cleanup(SmallPtrSetImpl<Instruction*> &Sinks);
129+
void Cleanup(SmallPtrSetImpl<Value*> &Visited);
130130

131131
public:
132132
IRPromoter(Module *M) : M(M), Ctx(M->getContext()),
@@ -180,6 +180,18 @@ static bool generateSignBits(Value *V) {
180180
Opc == Instruction::SRem;
181181
}
182182

183+
static bool EqualTypeSize(Value *V) {
184+
return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
185+
}
186+
187+
static bool LessOrEqualTypeSize(Value *V) {
188+
return V->getType()->getScalarSizeInBits() <= ARMCodeGenPrepare::TypeSize;
189+
}
190+
191+
static bool GreaterThanTypeSize(Value *V) {
192+
return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize;
193+
}
194+
183195
/// Some instructions can use 8- and 16-bit operands, and we don't need to
184196
/// promote anything larger. We disallow booleans to make life easier when
185197
/// dealing with icmps but allow any other integer that is <= 16 bits. Void
@@ -194,11 +206,10 @@ static bool isSupportedType(Value *V) {
194206
if (auto *Ld = dyn_cast<LoadInst>(V))
195207
Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
196208

197-
const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
198-
if (!IntTy)
209+
if (!isa<IntegerType>(Ty))
199210
return false;
200211

201-
return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
212+
return LessOrEqualTypeSize(V);
202213
}
203214

204215
/// Return true if the given value is a source in the use-def chain, producing
@@ -221,7 +232,7 @@ static bool isSource(Value *V) {
221232
else if (auto *Call = dyn_cast<CallInst>(V))
222233
return Call->hasRetAttr(Attribute::AttrKind::ZExt);
223234
else if (auto *Trunc = dyn_cast<TruncInst>(V))
224-
return isSupportedType(Trunc);
235+
return EqualTypeSize(Trunc);
225236
return false;
226237
}
227238

@@ -232,18 +243,15 @@ static bool isSink(Value *V) {
232243
// TODO The truncate also isn't actually necessary because we would already
233244
// proved that the data value is kept within the range of the original data
234245
// type.
235-
auto UsesNarrowValue = [](Value *V) {
236-
return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
237-
};
238246

239247
if (auto *Store = dyn_cast<StoreInst>(V))
240-
return UsesNarrowValue(Store->getValueOperand());
248+
return LessOrEqualTypeSize(Store->getValueOperand());
241249
if (auto *Return = dyn_cast<ReturnInst>(V))
242-
return UsesNarrowValue(Return->getReturnValue());
250+
return LessOrEqualTypeSize(Return->getReturnValue());
243251
if (auto *Trunc = dyn_cast<TruncInst>(V))
244-
return UsesNarrowValue(Trunc->getOperand(0));
252+
return EqualTypeSize(Trunc->getOperand(0));
245253
if (auto *ZExt = dyn_cast<ZExtInst>(V))
246-
return UsesNarrowValue(ZExt->getOperand(0));
254+
return GreaterThanTypeSize(ZExt);
247255
if (auto *ICmp = dyn_cast<ICmpInst>(V))
248256
return ICmp->isSigned();
249257

@@ -649,36 +657,37 @@ void IRPromoter::TruncateSinks(SmallPtrSetImpl<Value*> &Sources,
649657
}
650658
}
651659
}
652-
653660
}
654661

655-
void IRPromoter::Cleanup(SmallPtrSetImpl<Instruction*> &Sinks) {
656-
// Some zext sinks will now have become redundant, along with their trunc
657-
// operands, so remove them.
658-
for (auto I : Sinks) {
659-
if (auto *ZExt = dyn_cast<ZExtInst>(I)) {
660-
if (ZExt->getDestTy() != ExtTy)
661-
continue;
662+
void IRPromoter::Cleanup(SmallPtrSetImpl<Value*> &Visited) {
663+
// Some zexts will now have become redundant, along with their trunc
664+
// operands, so remove them
665+
for (auto V : Visited) {
666+
if (!isa<ZExtInst>(V))
667+
continue;
662668

663-
Value *Src = ZExt->getOperand(0);
664-
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
665-
LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary zext\n");
666-
ReplaceAllUsersOfWith(ZExt, Src);
667-
InstsToRemove.push_back(ZExt);
668-
continue;
669-
}
669+
auto ZExt = cast<ZExtInst>(V);
670+
if (ZExt->getDestTy() != ExtTy)
671+
continue;
670672

671-
// For any truncs that we insert to handle zexts, we can replace the
672-
// result of the zext with the input to the trunc.
673-
if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
674-
auto *Trunc = cast<TruncInst>(Src);
675-
assert(Trunc->getOperand(0)->getType() == ExtTy &&
676-
"expected inserted trunc to be operating on i32");
677-
LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: "
678-
<< *Trunc->getOperand(0));
679-
ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
680-
InstsToRemove.push_back(ZExt);
681-
}
673+
Value *Src = ZExt->getOperand(0);
674+
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
675+
LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast.\n");
676+
ReplaceAllUsersOfWith(ZExt, Src);
677+
InstsToRemove.push_back(ZExt);
678+
continue;
679+
}
680+
681+
// For any truncs that we insert to handle zexts, we can replace the
682+
// result of the zext with the input to the trunc.
683+
if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
684+
auto *Trunc = cast<TruncInst>(Src);
685+
assert(Trunc->getOperand(0)->getType() == ExtTy &&
686+
"expected inserted trunc to be operating on i32");
687+
LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: "
688+
<< *Trunc->getOperand(0));
689+
ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
690+
InstsToRemove.push_back(ZExt);
682691
}
683692
}
684693

@@ -728,27 +737,25 @@ void IRPromoter::Mutate(Type *OrigTy,
728737

729738
// Finally, remove unecessary zexts and truncs, delete old instructions and
730739
// clear the data structures.
731-
Cleanup(Sinks);
740+
Cleanup(Visited);
732741

733-
LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete:\n");
734-
LLVM_DEBUG(dbgs();
735-
for (auto *V : Sources)
736-
V->dump();
737-
for (auto *I : NewInsts)
738-
I->dump();
739-
for (auto *V : Visited) {
740-
if (!Sources.count(V))
741-
V->dump();
742-
});
742+
LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n");
743743
}
744744

745745
/// We accept most instructions, as well as Arguments and ConstantInsts. We
746746
/// Disallow casts other than zext and truncs and only allow calls if their
747747
/// return value is zeroext. We don't allow opcodes that can introduce sign
748748
/// bits.
749749
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
750-
if (isa<ICmpInst>(V))
751-
return true;
750+
if (auto *I = dyn_cast<ICmpInst>(V)) {
751+
// Now that we allow small types than TypeSize, only allow icmp of
752+
// TypeSize because they will require a trunc to be legalised.
753+
// TODO: Allow icmp of smaller types, and calculate at the end
754+
// whether the transform would be beneficial.
755+
if (isa<PointerType>(I->getOperand(0)->getType()))
756+
return true;
757+
return EqualTypeSize(I->getOperand(0));
758+
}
752759

753760
// Memory instructions
754761
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
@@ -766,12 +773,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
766773
isa<LoadInst>(V))
767774
return isSupportedType(V);
768775

769-
// Truncs can be either sources or sinks.
770-
if (auto *Trunc = dyn_cast<TruncInst>(V))
771-
return isSupportedType(Trunc) || isSupportedType(Trunc->getOperand(0));
776+
if (isa<SExtInst>(V))
777+
return false;
772778

773-
if (isa<CastInst>(V) && !isa<SExtInst>(V))
774-
return isSupportedType(cast<CastInst>(V)->getOperand(0));
779+
if (auto *Cast = dyn_cast<CastInst>(V))
780+
return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
775781

776782
// Special cases for calls as we need to check for zeroext
777783
// TODO We should accept calls even if they don't have zeroext, as they can
@@ -901,13 +907,17 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
901907
// Calls can be both sources and sinks.
902908
if (isSink(V))
903909
Sinks.insert(cast<Instruction>(V));
910+
904911
if (isSource(V))
905912
Sources.insert(V);
906-
else if (auto *I = dyn_cast<Instruction>(V)) {
907-
// Visit operands of any instruction visited.
908-
for (auto &U : I->operands()) {
909-
if (!AddLegalInst(U))
910-
return false;
913+
914+
if (!isSink(V) && !isSource(V)) {
915+
if (auto *I = dyn_cast<Instruction>(V)) {
916+
// Visit operands of any instruction visited.
917+
for (auto &U : I->operands()) {
918+
if (!AddLegalInst(U))
919+
return false;
920+
}
911921
}
912922
}
913923

@@ -973,6 +983,8 @@ bool ARMCodeGenPrepare::runOnFunction(Function &F) {
973983
if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
974984
continue;
975985

986+
LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
987+
976988
for (auto &Op : CI.operands()) {
977989
if (auto *I = dyn_cast<Instruction>(Op))
978990
MadeChange |= TryToPromote(I);

test/CodeGen/ARM/CGP/arm-cgp-calls.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,10 @@ define i16 @test_call(i8 zeroext %arg) {
2222
ret i16 %conv
2323
}
2424

25-
; Test that the transformation bails when it finds that i16 is larger than i8.
26-
; TODO: We should be able to remove the uxtb in these cases.
2725
; CHECK-LABEL: promote_i8_sink_i16_1
2826
; CHECK: bl dummy_i8
2927
; CHECK: add{{.*}} r0, #1
30-
; CHECK: uxtb r0, r0
28+
; CHECK-NOT: uxt
3129
; CHECK: cmp r0
3230
define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) {
3331
%call = tail call zeroext i8 @dummy_i8(i8 %arg0)

0 commit comments

Comments
 (0)