-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64][GlobalISel] Select TBL/TBX Intrinsics #92914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesPatch is 55.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92914.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 3b3c1fc8b27bf..b7af19ef139c5 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -227,6 +227,8 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
+ void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
+ unsigned Opc, bool isExt);
bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -6537,6 +6539,48 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_neon_tbl2:
+ SelectTable(I, MRI, 2,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Two
+ : AArch64::TBLv16i8Two,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbl3:
+ SelectTable(I, MRI, 3,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Three
+ : AArch64::TBLv16i8Three,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbl4:
+ SelectTable(I, MRI, 4,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBLv8i8Four
+ : AArch64::TBLv16i8Four,
+ false);
+ return true;
+ case Intrinsic::aarch64_neon_tbx2:
+ SelectTable(I, MRI, 2,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Two
+ : AArch64::TBXv16i8Two,
+ true);
+ return true;
+ case Intrinsic::aarch64_neon_tbx3:
+ SelectTable(I, MRI, 3,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Three
+ : AArch64::TBXv16i8Three,
+ true);
+ return true;
+ case Intrinsic::aarch64_neon_tbx4:
+ SelectTable(I, MRI, 4,
+ MRI.getType(I.getOperand(0).getReg()) == LLT::fixed_vector(8, 8)
+ ? AArch64::TBXv8i8Four
+ : AArch64::TBXv16i8Four,
+ true);
+ return true;
case Intrinsic::swift_async_context_addr:
auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
{Register(AArch64::FP)})
@@ -6552,6 +6596,28 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
return false;
}
+void AArch64InstructionSelector::SelectTable(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ unsigned NumVec, unsigned Opc,
+ bool isExt) {
+ // Create the REG_SEQUENCE
+ SmallVector<Register, 4> Regs;
+ for (unsigned i = 0; i < NumVec; i++)
+ Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
+ Register RegSeq = createQTuple(Regs, MIB);
+
+ Register DstReg = I.getOperand(0).getReg();
+ Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
+ MachineInstrBuilder Instr;
+ if (isExt) {
+ Register Reg = I.getOperand(2).getReg();
+ Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
+ } else
+ Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
+ constrainSelectedInstRegOperands(*Instr, TII, TRI, RBI);
+ I.eraseFromParent();
+}
+
InstructionSelector::ComplexRendererFns
AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
auto MaybeImmed = getImmedFromMO(Root);
diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index b89232c03f136..a5675df982d65 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
; CHECK-LABEL: tbl1_8b:
@@ -20,136 +21,189 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
}
define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK-LABEL: tbl2_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK-LABEL: tbl2_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl2_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl2_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbl3_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbl3_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl3_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl3_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
ret <16 x i8> %tmp3
}
define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbl4_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
ret <8 x i8> %tmp3
}
define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbl4_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: tbl4_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: tbl4_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT: ret
%tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
ret <16 x i8> %tmp3
}
-; CHECK-LABEL: .LCPI8_0:
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
-; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4
-; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4
-; CHECK-NEXT: mov.s v0[1], v1[1]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
+; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
+; CHECK-SD-NEXT: mov.s v0[1], v1[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI8_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4
+; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4
+; CHECK-GI-NEXT: mov.d v0[1], v1[0]
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %s
}
-; CHECK-LABEL: .LCPI9_0:
-; CHECK-NEXT: .byte 0 // 0x0
-; CHECK-NEXT: .byte 4 // 0x4
-; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 16 // 0x10
-; CHECK-NEXT: .byte 20 // 0x14
-; CHECK-NEXT: .byte 24 // 0x18
-; CHECK-NEXT: .byte 28 // 0x1c
-; CHECK-NEXT: .byte 32 // 0x20
-; CHECK-NEXT: .byte 36 // 0x24
-; CHECK-NEXT: .byte 40 // 0x28
-; CHECK-NEXT: .byte 44 // 0x2c
-; CHECK-NEXT: .byte 48 // 0x30
-; CHECK-NEXT: .byte 52 // 0x34
-; CHECK-NEXT: .byte 56 // 0x38
-; CHECK-NEXT: .byte 60 // 0x3c
-
define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: adrp x8, .LCPI9_0
+; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI9_1
+; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT: adrp x8, .LCPI9_0
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4
+; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT: ret
%t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
%s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@@ -157,38 +211,72 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c
}
define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
-; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s4, w0
-; CHECK-NEXT: mov w8, #32 // =0x20
-; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
-; CHECK-NEXT...
[truncated]
|
aemerson
approved these changes
May 24, 2024
davemgreen
reviewed
May 28, 2024
193957b
to
4c6666d
Compare
davemgreen
approved these changes
May 29, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like a nice one. LGTM
chuongg3
added a commit
that referenced
this pull request
May 29, 2024
4c6666d
to
72919e3
Compare
vg0204
pushed a commit
to vg0204/llvm-project
that referenced
this pull request
May 29, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.