Skip to content

Commit f2b15be

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:36dece001325bbf00129c48ddb3c83668b0ac36e into amd-gfx:22255e0e3056
Local branch amd-gfx 22255e0 Merged main:f95710c76519c611868c16f92586b6d0baedad54 into amd-gfx:b97a8ed014a8 Remote branch main 36dece0 [AMDGPU] Add missing GFX10 buffer format d16 hi instructions (llvm#84809)
2 parents 22255e0 + 36dece0 commit f2b15be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+779
-879
lines changed

clang/lib/AST/Interp/ByteCodeExprGen.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3138,16 +3138,17 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
31383138
return this->discard(SubExpr);
31393139

31403140
std::optional<PrimType> ResT = classify(E);
3141+
auto prepareResult = [=]() -> bool {
3142+
if (!ResT && !Initializing) {
3143+
std::optional<unsigned> LocalIndex =
3144+
allocateLocal(SubExpr, /*IsExtended=*/false);
3145+
if (!LocalIndex)
3146+
return false;
3147+
return this->emitGetPtrLocal(*LocalIndex, E);
3148+
}
31413149

3142-
// Prepare storage for result.
3143-
if (!ResT && !Initializing) {
3144-
std::optional<unsigned> LocalIndex =
3145-
allocateLocal(SubExpr, /*IsExtended=*/false);
3146-
if (!LocalIndex)
3147-
return false;
3148-
if (!this->emitGetPtrLocal(*LocalIndex, E))
3149-
return false;
3150-
}
3150+
return true;
3151+
};
31513152

31523153
// The offset of the temporary, if we created one.
31533154
unsigned SubExprOffset = ~0u;
@@ -3167,6 +3168,8 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
31673168

31683169
switch (E->getOpcode()) {
31693170
case UO_Minus:
3171+
if (!prepareResult())
3172+
return false;
31703173
if (!createTemp())
31713174
return false;
31723175
for (unsigned I = 0; I != 2; ++I) {
@@ -3179,9 +3182,22 @@ bool ByteCodeExprGen<Emitter>::VisitComplexUnaryOperator(
31793182
}
31803183
break;
31813184

3182-
case UO_AddrOf:
3185+
case UO_Plus: // +x
3186+
case UO_AddrOf: // &x
3187+
case UO_Deref: // *x
31833188
return this->delegate(SubExpr);
31843189

3190+
case UO_LNot:
3191+
if (!this->visit(SubExpr))
3192+
return false;
3193+
if (!this->emitComplexBoolCast(SubExpr))
3194+
return false;
3195+
if (!this->emitInvBool(E))
3196+
return false;
3197+
if (PrimType ET = classifyPrim(E->getType()); ET != PT_Bool)
3198+
return this->emitCast(PT_Bool, ET, E);
3199+
return true;
3200+
31853201
case UO_Real:
31863202
return this->emitComplexReal(SubExpr);
31873203

clang/test/AST/Interp/complex.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@ void blah() {
1414
_Static_assert((0.0 + 0.0j) == (0.0 + 0.0j), "");
1515
_Static_assert((0.0 + 0.0j) != (0.0 + 0.0j), ""); // both-error {{static assertion}} \
1616
// both-note {{evaluates to}}
17+
18+
const _Complex float FC = {0.0f, 0.0f};
19+
_Static_assert(!FC, "");
20+
const _Complex float FI = {0, 0};
21+
_Static_assert(!FI, "");

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ struct IntrinsicLibrary {
338338
mlir::Value genSign(mlir::Type, llvm::ArrayRef<mlir::Value>);
339339
mlir::Value genSind(mlir::Type, llvm::ArrayRef<mlir::Value>);
340340
fir::ExtendedValue genSize(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
341+
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
341342
mlir::Value genSpacing(mlir::Type resultType,
342343
llvm::ArrayRef<mlir::Value> args);
343344
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,10 @@ static constexpr IntrinsicHandler handlers[]{
567567
{"dim", asAddr, handleDynamicOptional},
568568
{"kind", asValue}}},
569569
/*isElemental=*/false},
570+
{"sizeof",
571+
&I::genSizeOf,
572+
{{{"a", asBox}}},
573+
/*isElemental=*/false},
570574
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
571575
{"spacing", &I::genSpacing},
572576
{"spread",
@@ -5946,6 +5950,20 @@ IntrinsicLibrary::genSize(mlir::Type resultType,
59465950
.getResults()[0];
59475951
}
59485952

5953+
// SIZEOF
5954+
fir::ExtendedValue
5955+
IntrinsicLibrary::genSizeOf(mlir::Type resultType,
5956+
llvm::ArrayRef<fir::ExtendedValue> args) {
5957+
assert(args.size() == 1);
5958+
mlir::Value box = fir::getBase(args[0]);
5959+
mlir::Value eleSize = builder.create<fir::BoxEleSizeOp>(loc, resultType, box);
5960+
if (!fir::isArray(args[0]))
5961+
return eleSize;
5962+
mlir::Value arraySize = builder.createConvert(
5963+
loc, resultType, fir::runtime::genSize(builder, loc, box));
5964+
return builder.create<mlir::arith::MulIOp>(loc, eleSize, arraySize);
5965+
}
5966+
59495967
// TAND
59505968
mlir::Value IntrinsicLibrary::genTand(mlir::Type resultType,
59515969
llvm::ArrayRef<mlir::Value> args) {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
! Test SIZEOF lowering for polymorphic entities.
2+
! RUN: bbc -emit-hlfir --polymorphic-type -o - %s | FileCheck %s
3+
4+
integer(8) function test1(x)
5+
class(*) :: x
6+
test1 = sizeof(x)
7+
end function
8+
! CHECK-LABEL: func.func @_QPtest1(
9+
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest1Ex"} : (!fir.class<none>) -> (!fir.class<none>, !fir.class<none>)
10+
! CHECK: %[[VAL_4:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.class<none>) -> i64
11+
! CHECK: hlfir.assign %[[VAL_4]] to %{{.*}} : i64, !fir.ref<i64>
12+
13+
integer(8) function test2(x)
14+
class(*) :: x(:, :)
15+
test2 = sizeof(x)
16+
end function
17+
! CHECK-LABEL: func.func @_QPtest2(
18+
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFtest2Ex"} : (!fir.class<!fir.array<?x?xnone>>) -> (!fir.class<!fir.array<?x?xnone>>, !fir.class<!fir.array<?x?xnone>>)
19+
! CHECK: %[[VAL_4:.*]] = fir.box_elesize %[[VAL_3]]#1 : (!fir.class<!fir.array<?x?xnone>>) -> i64
20+
! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.class<!fir.array<?x?xnone>>) -> !fir.box<none>
21+
! CHECK: %[[VAL_9:.*]] = fir.call @_FortranASize(%[[VAL_7]], %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32) -> i64
22+
! CHECK: %[[VAL_10:.*]] = arith.muli %[[VAL_4]], %[[VAL_9]] : i64
23+
! CHECK: hlfir.assign %[[VAL_10]] to %{{.*}} : i64, !fir.ref<i64>

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 492365
19+
#define LLVM_MAIN_REVISION 492371
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5934,13 +5934,16 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
59345934

59355935
// Keep track of the last MI we inserted. Later on, we might be able to save
59365936
// a copy using it.
5937-
MachineInstr *PrevMI = nullptr;
5937+
MachineInstr *PrevMI = ScalarToVec;
59385938
for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
59395939
// Note that if we don't do a subregister copy, we can end up making an
59405940
// extra register.
5941-
PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(),
5942-
i - 1, RB, MIB);
5943-
DstVec = PrevMI->getOperand(0).getReg();
5941+
Register OpReg = I.getOperand(i).getReg();
5942+
// Do not emit inserts for undefs
5943+
if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5944+
PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5945+
DstVec = PrevMI->getOperand(0).getReg();
5946+
}
59445947
}
59455948

59465949
// If DstTy's size in bits is less than 128, then emit a subregister copy
@@ -5973,11 +5976,27 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
59735976
RegOp.setReg(Reg);
59745977
RBI.constrainGenericRegister(DstReg, *RC, MRI);
59755978
} else {
5976-
// We don't need a subregister copy. Save a copy by re-using the
5977-
// destination register on the final insert.
5978-
assert(PrevMI && "PrevMI was null?");
5979+
// We either have a vector with all elements (except the first one) undef or
5980+
// at least one non-undef non-first element. In the first case, we need to
5981+
// constrain the output register ourselves as we may have generated an
5982+
// INSERT_SUBREG operation which is a generic operation for which the
5983+
// output regclass cannot be automatically chosen.
5984+
//
5985+
// In the second case, there is no need to do this as it may generate an
5986+
// instruction like INSvi32gpr where the regclass can be automatically
5987+
// chosen.
5988+
//
5989+
// Also, we save a copy by re-using the destination register on the final
5990+
// insert.
59795991
PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
59805992
constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5993+
5994+
Register DstReg = PrevMI->getOperand(0).getReg();
5995+
if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5996+
const TargetRegisterClass *RC =
5997+
getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5998+
RBI.constrainGenericRegister(DstReg, *RC, MRI);
5999+
}
59816000
}
59826001

59836002
I.eraseFromParent();

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2691,9 +2691,8 @@ defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>;
26912691
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>;
26922692
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>;
26932693
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>;
2694-
// FIXME-GFX10: Add following instructions:
2695-
//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
2696-
//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
2694+
defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
2695+
defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
26972696
defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>;
26982697
defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>;
26992698
defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>;

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6100,6 +6100,9 @@ NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
61006100

61016101
if (AI->isFloatingPointOperation()) {
61026102
if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
6103+
if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&
6104+
STI.getPTXVersion() >= 63)
6105+
return AtomicExpansionKind::None;
61036106
if (Ty->isFloatTy())
61046107
return AtomicExpansionKind::None;
61056108
if (Ty->isDoubleTy() && STI.hasAtomAddF64())

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,13 @@ defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
16301630
defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
16311631
".add", atomic_load_add_64_gen, i64imm, imm>;
16321632

1633+
defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
1634+
atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1635+
defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
1636+
atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1637+
defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
1638+
atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1639+
16331640
defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
16341641
atomic_load_add_g, f32imm, fpimm>;
16351642
defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
@@ -2007,6 +2014,9 @@ multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
20072014
SDNode Imm, ValueType ImmTy,
20082015
list<Predicate> Preds> {
20092016
let AddedComplexity = 1 in {
2017+
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2018+
(ins Int16Regs:$src, regclass:$b),
2019+
(Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
20102020
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20112021
(ins Int32Regs:$src, regclass:$b),
20122022
(Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
@@ -2017,6 +2027,9 @@ multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
20172027
// tablegen can't infer argument types from Intrinsic (though it can
20182028
// from Instruction) so we have to enforce specific type on
20192029
// immediates via explicit cast to ImmTy.
2030+
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2031+
(ins Int16Regs:$src, ImmType:$b),
2032+
(Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
20202033
def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20212034
(ins Int32Regs:$src, ImmType:$b),
20222035
(Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
@@ -2136,6 +2149,8 @@ multiclass ATOM2_add_impl<string OpStr> {
21362149
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
21372150
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
21382151
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2152+
defm _f16 : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
2153+
[hasSM<70>, hasPTX<63>]>;
21392154
defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
21402155
[]>;
21412156
defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,

llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -266,12 +266,8 @@ body: |
266266
; CHECK-LABEL: name: undef_elts_different_regbanks
267267
; CHECK: liveins: $w0
268268
; CHECK: %val:gpr32all = COPY $w0
269-
; CHECK: %undef:gpr32 = IMPLICIT_DEF
270269
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
271-
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
272-
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef
273-
; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef
274-
; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef
270+
; CHECK: %bv:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
275271
; CHECK: $q0 = COPY %bv
276272
; CHECK: RET_ReallyLR implicit $q0
277273
%val:gpr(s32) = COPY $w0

llvm/test/CodeGen/AArch64/GlobalISel/select-shufflevec-undef-mask-elt.mir

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,18 @@ body: |
1919
; CHECK: liveins: $d0
2020
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
2121
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
22-
; CHECK: [[DEF1:%[0-9]+]]:gpr32 = IMPLICIT_DEF
23-
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
24-
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DEF]], %subreg.ssub
25-
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, [[DEF1]]
26-
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
22+
; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
23+
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.ssub
24+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSERT_SUBREG]].dsub
2725
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
2826
; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
27+
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
28+
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
2929
; CHECK: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
30-
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.dsub
31-
; CHECK: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF
32-
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[COPY1]], %subreg.dsub
30+
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY1]], %subreg.dsub
3331
; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0
34-
; CHECK: [[DEF5:%[0-9]+]]:fpr128 = IMPLICIT_DEF
35-
; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[LDRDui]], %subreg.dsub
32+
; CHECK: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF
33+
; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[LDRDui]], %subreg.dsub
3634
; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[INSERT_SUBREG3]]
3735
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub
3836
; CHECK: $d0 = COPY [[COPY2]]

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7777
; CHECK-GI-NEXT: and w8, w8, w10
7878
; CHECK-GI-NEXT: orr w8, w9, w8
7979
; CHECK-GI-NEXT: fmov s0, w8
80-
; CHECK-GI-NEXT: mov v0.s[1], w8
8180
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
8281
; CHECK-GI-NEXT: ret
8382
%neg = xor <1 x i32> %C, <i32 -1>

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7979
; CHECK-GI-NEXT: bic w8, w10, w8
8080
; CHECK-GI-NEXT: orr w8, w9, w8
8181
; CHECK-GI-NEXT: fmov s0, w8
82-
; CHECK-GI-NEXT: mov v0.s[1], w8
8382
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
8483
; CHECK-GI-NEXT: ret
8584
%and = and <1 x i32> %C, %B

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
252252
; CHECK-GI-NEXT: add w8, w8, w9
253253
; CHECK-GI-NEXT: eor w8, w8, w9
254254
; CHECK-GI-NEXT: fmov s0, w8
255-
; CHECK-GI-NEXT: mov v0.s[1], w8
256255
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
257256
; CHECK-GI-NEXT: ret
258257
entry:
@@ -308,11 +307,6 @@ define <3 x i8> @abs_v3i8(<3 x i8> %a){
308307
; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
309308
; CHECK-GI-NEXT: fmov s1, w2
310309
; CHECK-GI-NEXT: mov v0.b[2], v1.b[0]
311-
; CHECK-GI-NEXT: mov v0.b[3], v0.b[0]
312-
; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
313-
; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
314-
; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
315-
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
316310
; CHECK-GI-NEXT: abs v0.8b, v0.8b
317311
; CHECK-GI-NEXT: umov w0, v0.b[0]
318312
; CHECK-GI-NEXT: umov w1, v0.b[1]

llvm/test/CodeGen/AArch64/arm64-dup.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -373,11 +373,9 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
373373
;
374374
; CHECK-GI-LABEL: test_build_illegal:
375375
; CHECK-GI: // %bb.0:
376-
; CHECK-GI-NEXT: mov.h v1[1], v0[0]
377376
; CHECK-GI-NEXT: mov s0, v0[3]
378-
; CHECK-GI-NEXT: mov.h v1[2], v0[0]
379-
; CHECK-GI-NEXT: mov.h v1[3], v0[0]
380-
; CHECK-GI-NEXT: fmov d0, d1
377+
; CHECK-GI-NEXT: mov.h v0[3], v0[0]
378+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
381379
; CHECK-GI-NEXT: ret
382380
%val = extractelement <4 x i32> %in, i32 3
383381
%smallval = trunc i32 %val to i16

0 commit comments

Comments
 (0)