-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[HEXAGON] Fix hvx-isel for extract_subvector op #129672
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fixes a crash with extract_subvectors in Hexagon backend seen when the source vector is a vector-pair and result vector is not hvx vector size. LLVM Issue: llvm#128775 Change-Id: Iff32cfc37f9c8d2d72e2d9f0e86fa2547d217cc1
@llvm/pr-subscribers-backend-hexagon Author: None (aankit-ca) ChangesFixes a crash with extract_subvectors in Hexagon backend seen when the source vector is a vector-pair and result vector is not hvx vector size. LLVM Issue: #128775 Patch is 38.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129672.diff 4 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index deffe6369df17..b9d549c21b978 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1265,11 +1265,15 @@ HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
// the subvector of interest. The subvector will never overlap two single
// vectors.
if (isHvxPairTy(VecTy)) {
- if (Idx * ElemWidth >= 8*HwLen)
+ unsigned SubIdx = Hexagon::vsub_lo;
+ if (Idx * ElemWidth >= 8*HwLen) {
+ SubIdx = Hexagon::vsub_hi;
Idx -= VecTy.getVectorNumElements() / 2;
+ }
- VecV = OrigOp;
- if (typeSplit(VecTy).first == ResTy)
+ VecTy = typeSplit(VecTy).first;
+ VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
+ if (VecTy == ResTy)
return VecV;
}
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll
index ac51662242de8..196b37678be61 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/fp-to-int.ll
@@ -13,13 +13,13 @@ define void @f16s8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(##32768,#1)
; CHECK-NEXT: r4 = #14
-; CHECK-NEXT: v1 = vmem(r0+#0)
+; CHECK-NEXT: v0 = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.h = vsplat(r3)
; CHECK-NEXT: r6 = #5
; CHECK-NEXT: v3.h = vasl(v0.h,r2)
-; CHECK-NEXT: v0.cur = vmem(r0+#1)
+; CHECK-NEXT: v1 = vmem(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v4.h = vsplat(r4)
@@ -33,55 +33,55 @@ define void @f16s8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = #16
-; CHECK-NEXT: v5.h = vasl(v1.h,r6)
-; CHECK-NEXT: q1 = vcmp.gt(v7.h,v0.h)
+; CHECK-NEXT: v5.h = vasl(v0.h,r6)
+; CHECK-NEXT: q1 = vcmp.gt(v7.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v6.h = vsplat(r3)
-; CHECK-NEXT: v27.h = vasr(v3.h,r5)
+; CHECK-NEXT: v28.h = vasr(v3.h,r5)
; CHECK-NEXT: v5 = vor(v5,v2)
-; CHECK-NEXT: q0 = vcmp.gt(v7.h,v1.h)
+; CHECK-NEXT: q0 = vcmp.gt(v7.h,v0.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v9.h = vsplat(r4)
; CHECK-NEXT: v8.h = vasr(v8.h,r5)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v26.h = vasl(v0.h,r6)
-; CHECK-NEXT: v0.h = vsub(v4.h,v27.h)
+; CHECK-NEXT: v27.h = vasl(v1.h,r6)
+; CHECK-NEXT: v1.h = vsub(v4.h,v28.h)
; CHECK-NEXT: v4.h = vsub(v4.h,v8.h)
-; CHECK-NEXT: v28 = vmux(q0,v2,v9)
+; CHECK-NEXT: v29 = vmux(q0,v2,v9)
; CHECK-NEXT: }
; CHECK-NEXT: {
+; CHECK-NEXT: v1.h = vmin(v1.h,v6.h)
+; CHECK-NEXT: v0 = vor(v27,v2)
; CHECK-NEXT: v4.h = vmin(v4.h,v6.h)
-; CHECK-NEXT: v1 = vor(v26,v2)
-; CHECK-NEXT: v0.h = vmin(v0.h,v6.h)
; CHECK-NEXT: v2 = vmux(q1,v2,v9)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: q2 = vcmp.gt(v4.h,v7.h)
-; CHECK-NEXT: q3 = vcmp.gt(v0.h,v7.h)
+; CHECK-NEXT: q2 = vcmp.gt(v1.h,v7.h)
+; CHECK-NEXT: q3 = vcmp.gt(v4.h,v7.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v5.h = vlsr(v5.h,v4.h)
+; CHECK-NEXT: v5.h = vlsr(v5.h,v1.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1.h = vlsr(v1.h,v0.h)
-; CHECK-NEXT: v29.h = vsub(v7.h,v5.h)
+; CHECK-NEXT: v0.h = vlsr(v0.h,v4.h)
+; CHECK-NEXT: v30.h = vsub(v7.h,v5.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v30.h = vsub(v7.h,v1.h)
-; CHECK-NEXT: v5 = vmux(q0,v29,v5)
+; CHECK-NEXT: v31.h = vsub(v7.h,v0.h)
+; CHECK-NEXT: v5 = vmux(q0,v30,v5)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vmux(q1,v30,v1)
-; CHECK-NEXT: v31 = vmux(q2,v5,v28)
+; CHECK-NEXT: v0 = vmux(q1,v31,v0)
+; CHECK-NEXT: v1 = vmux(q2,v5,v29)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vmux(q3,v1,v2)
+; CHECK-NEXT: v0 = vmux(q3,v0,v2)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.b = vpack(v1.h,v31.h):sat
+; CHECK-NEXT: v0.b = vpack(v0.h,v1.h):sat
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: vmem(r1+#0) = v0.new
; CHECK-NEXT: }
@@ -491,127 +491,127 @@ define void @f32s8_0(ptr %a0, ptr %a1) #0 {
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: {
-; CHECK-NEXT: r4 = ##-2147483648
; CHECK-NEXT: r3:2 = combine(#1,#8)
-; CHECK-NEXT: v5 = vmem(r0+#0)
+; CHECK-NEXT: r4 = ##-2147483648
+; CHECK-NEXT: v5 = vmem(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vsplat(r4)
+; CHECK-NEXT: v0 = vsplat(r4)
; CHECK-NEXT: r7 = #30
; CHECK-NEXT: r6 = #24
-; CHECK-NEXT: v2 = vmem(r0+#2)
+; CHECK-NEXT: v4 = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v10 = vsplat(r7)
; CHECK-NEXT: r5 = #32
-; CHECK-NEXT: v8.w = vasl(v4.w,r3)
-; CHECK-NEXT: v4.cur = vmem(r0+#1)
+; CHECK-NEXT: v9.w = vasl(v5.w,r3)
+; CHECK-NEXT: v1 = vmem(r0+#3)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v7.w = vasl(v5.w,r3)
-; CHECK-NEXT: v12 = vxor(v12,v12)
-; CHECK-NEXT: v8.w = vsub(v8.w,v1.w)
-; CHECK-NEXT: v0 = vmem(r0+#3)
+; CHECK-NEXT: v8.w = vasl(v4.w,r3)
+; CHECK-NEXT: v14 = vxor(v14,v14)
+; CHECK-NEXT: v9.w = vsub(v9.w,v0.w)
+; CHECK-NEXT: v2 = vmem(r0+#2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v13 = vsplat(r5)
-; CHECK-NEXT: v11.w = vasl(v0.w,r3)
-; CHECK-NEXT: v7.w = vsub(v7.w,v1.w)
-; CHECK-NEXT: q0 = vcmp.gt(v12.w,v5.w)
+; CHECK-NEXT: v11.w = vasl(v2.w,r3)
+; CHECK-NEXT: v8.w = vsub(v8.w,v0.w)
+; CHECK-NEXT: q1 = vcmp.gt(v14.w,v5.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v9.w = vasl(v2.w,r3)
-; CHECK-NEXT: q1 = vcmp.gt(v12.w,v4.w)
-; CHECK-NEXT: v11.w = vsub(v11.w,v1.w)
+; CHECK-NEXT: v12.w = vasl(v1.w,r3)
+; CHECK-NEXT: q0 = vcmp.gt(v14.w,v4.w)
+; CHECK-NEXT: v11.w = vsub(v11.w,v0.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = ##2147483647
; CHECK-NEXT: r7 = #64
-; CHECK-NEXT: v8.w = vasr(v8.w,r6)
+; CHECK-NEXT: v9.w = vasr(v9.w,r6)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v22 = vsplat(r3)
-; CHECK-NEXT: v7.w = vasr(v7.w,r6)
-; CHECK-NEXT: v19.w = vsub(v9.w,v1.w)
-; CHECK-NEXT: v8.w = vsub(v10.w,v8.w)
+; CHECK-NEXT: v18 = vsplat(r3)
+; CHECK-NEXT: v7.w = vasl(v5.w,r2)
+; CHECK-NEXT: v19.w = vsub(v12.w,v0.w)
+; CHECK-NEXT: v9.w = vsub(v10.w,v9.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v20.w = vasl(v4.w,r2)
-; CHECK-NEXT: v27 = vmux(q1,v1,v22)
-; CHECK-NEXT: v25 = vmux(q0,v1,v22)
-; CHECK-NEXT: v7.w = vsub(v10.w,v7.w)
+; CHECK-NEXT: v8.w = vasr(v8.w,r6)
+; CHECK-NEXT: v25 = vmux(q1,v0,v18)
+; CHECK-NEXT: v23 = vmux(q0,v0,v18)
+; CHECK-NEXT: v9.w = vmin(v9.w,v13.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v6.w = vasl(v5.w,r2)
+; CHECK-NEXT: v6.w = vasl(v4.w,r2)
+; CHECK-NEXT: v7 = vor(v7,v0)
+; CHECK-NEXT: v8.w = vsub(v10.w,v8.w)
+; CHECK-NEXT: q3 = vcmp.gt(v9.w,v14.w)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: v11.w = vasr(v11.w,r6)
; CHECK-NEXT: v8.w = vmin(v8.w,v13.w)
-; CHECK-NEXT: v9 = vor(v20,v1)
-; CHECK-NEXT: v21.w = vmin(v7.w,v13.w)
+; CHECK-NEXT: v6 = vor(v6,v0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v5.w = vasr(v19.w,r6)
-; CHECK-NEXT: q3 = vcmp.gt(v8.w,v12.w)
-; CHECK-NEXT: v6 = vor(v6,v1)
-; CHECK-NEXT: q2 = vcmp.gt(v21.w,v12.w)
+; CHECK-NEXT: v11.w = vsub(v10.w,v11.w)
+; CHECK-NEXT: q2 = vcmp.gt(v8.w,v14.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v11.w = vasr(v11.w,r6)
+; CHECK-NEXT: v3.w = vasl(v1.w,r2)
; CHECK-NEXT: v5.w = vsub(v10.w,v5.w)
+; CHECK-NEXT: v21.w = vmin(v11.w,v13.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v3.w = vasl(v2.w,r2)
-; CHECK-NEXT: v10.w = vsub(v10.w,v11.w)
+; CHECK-NEXT: v20.w = vasl(v2.w,r2)
+; CHECK-NEXT: v3 = vor(v3,v0)
; CHECK-NEXT: v5.w = vmin(v5.w,v13.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v23.w = vasl(v0.w,r2)
-; CHECK-NEXT: v3 = vor(v3,v1)
-; CHECK-NEXT: v10.w = vmin(v10.w,v13.w)
+; CHECK-NEXT: v7.w = vlsr(v7.w,v9.w)
+; CHECK-NEXT: v12 = vor(v20,v0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v8.w = vlsr(v9.w,v8.w)
-; CHECK-NEXT: v4 = vor(v23,v1)
+; CHECK-NEXT: v6.w = vlsr(v6.w,v8.w)
+; CHECK-NEXT: v24.w = vsub(v14.w,v7.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v6.w = vlsr(v6.w,v21.w)
-; CHECK-NEXT: v26.w = vsub(v12.w,v8.w)
+; CHECK-NEXT: v26.w = vlsr(v12.w,v21.w)
+; CHECK-NEXT: v22.w = vsub(v14.w,v6.w)
+; CHECK-NEXT: v7 = vmux(q1,v24,v7)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3.w = vlsr(v3.w,v5.w)
-; CHECK-NEXT: v24.w = vsub(v12.w,v6.w)
-; CHECK-NEXT: v8 = vmux(q1,v26,v8)
+; CHECK-NEXT: v6 = vmux(q0,v22,v6)
+; CHECK-NEXT: q0 = vcmp.gt(v14.w,v2.w)
+; CHECK-NEXT: v27.w = vsub(v14.w,v26.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v4.w = vlsr(v4.w,v10.w)
-; CHECK-NEXT: v6 = vmux(q0,v24,v6)
-; CHECK-NEXT: q0 = vcmp.gt(v12.w,v2.w)
-; CHECK-NEXT: v28.w = vsub(v12.w,v3.w)
+; CHECK-NEXT: v2 = vmux(q3,v7,v25)
+; CHECK-NEXT: v29.w = vsub(v14.w,v3.w)
+; CHECK-NEXT: q3 = vcmp.gt(v14.w,v1.w)
+; CHECK-NEXT: v6 = vmux(q2,v6,v23)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v2 = vmux(q3,v8,v27)
-; CHECK-NEXT: v29.w = vsub(v12.w,v4.w)
-; CHECK-NEXT: q3 = vcmp.gt(v12.w,v0.w)
-; CHECK-NEXT: v6 = vmux(q2,v6,v25)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: v30 = vmux(q0,v1,v22)
-; CHECK-NEXT: v3 = vmux(q0,v28,v3)
-; CHECK-NEXT: q2 = vcmp.gt(v5.w,v12.w)
-; CHECK-NEXT: v4 = vmux(q3,v29,v4)
+; CHECK-NEXT: v30 = vmux(q0,v0,v18)
+; CHECK-NEXT: v28 = vmux(q0,v27,v26)
+; CHECK-NEXT: q2 = vcmp.gt(v21.w,v14.w)
+; CHECK-NEXT: v3 = vmux(q3,v29,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.h = vpack(v2.w,v6.w):sat
-; CHECK-NEXT: v1 = vmux(q3,v1,v22)
-; CHECK-NEXT: q3 = vcmp.gt(v10.w,v12.w)
-; CHECK-NEXT: v0 = vmux(q2,v3,v30)
+; CHECK-NEXT: v0 = vmux(q3,v0,v18)
+; CHECK-NEXT: q3 = vcmp.gt(v5.w,v14.w)
+; CHECK-NEXT: v1 = vmux(q2,v28,v30)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vmux(q3,v4,v1)
+; CHECK-NEXT: v0 = vmux(q3,v3,v0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.h = vpack(v1.w,v0.w):sat
+; CHECK-NEXT: v0.h = vpack(v0.w,v1.w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v31.b = vpack(v3.h,v2.h):sat
@@ -638,13 +638,13 @@ define void @f32s8_1(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(##-2147483648,#8)
; CHECK-NEXT: r4 = #1
-; CHECK-NEXT: v1 = vmem(r0+#0)
+; CHECK-NEXT: v1 = vmem(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v3 = vsplat(r3)
; CHECK-NEXT: r5 = #30
; CHECK-NEXT: v4.w = vasl(v0.w,r4)
-; CHECK-NEXT: v0.cur = vmem(r0+#1)
+; CHECK-NEXT: v0.cur = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v5.w = vasl(v1.w,r4)
@@ -653,64 +653,64 @@ define void @f32s8_1(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: r4 = #32
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v6 = vsplat(r5)
-; CHECK-NEXT: v7 = vsplat(r4)
+; CHECK-NEXT: v7 = vsplat(r5)
+; CHECK-NEXT: v8 = vsplat(r4)
; CHECK-NEXT: v2.w = vasl(v1.w,r2)
; CHECK-NEXT: v5.w = vsub(v5.w,v3.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v4.w = vasr(v4.w,r6)
-; CHECK-NEXT: v26 = vxor(v26,v26)
+; CHECK-NEXT: v27 = vxor(v27,v27)
; CHECK-NEXT: v2 = vor(v2,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = ##2147483647
; CHECK-NEXT: v5.w = vasr(v5.w,r6)
-; CHECK-NEXT: q0 = vcmp.gt(v26.w,v1.w)
+; CHECK-NEXT: q0 = vcmp.gt(v27.w,v0.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v27 = vsplat(r3)
-; CHECK-NEXT: v4.w = vsub(v6.w,v4.w)
-; CHECK-NEXT: q2 = vcmp.gt(v26.w,v0.w)
-; CHECK-NEXT: v5.w = vsub(v6.w,v5.w)
+; CHECK-NEXT: v28 = vsplat(r3)
+; CHECK-NEXT: v6.w = vasl(v0.w,r2)
+; CHECK-NEXT: v4.w = vsub(v7.w,v4.w)
+; CHECK-NEXT: q2 = vcmp.gt(v27.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v8.w = vasl(v0.w,r2)
-; CHECK-NEXT: v4.w = vmin(v4.w,v7.w)
-; CHECK-NEXT: v30 = vmux(q0,v3,v27)
-; CHECK-NEXT: v5.w = vmin(v5.w,v7.w)
+; CHECK-NEXT: v5.w = vsub(v7.w,v5.w)
+; CHECK-NEXT: v4.w = vmin(v4.w,v8.w)
+; CHECK-NEXT: v31 = vmux(q0,v3,v28)
+; CHECK-NEXT: v6 = vor(v6,v3)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v25 = vor(v8,v3)
-; CHECK-NEXT: v1 = vmux(q2,v3,v27)
-; CHECK-NEXT: q3 = vcmp.gt(v4.w,v26.w)
-; CHECK-NEXT: q1 = vcmp.gt(v5.w,v26.w)
+; CHECK-NEXT: v5.w = vmin(v5.w,v8.w)
+; CHECK-NEXT: q1 = vcmp.gt(v4.w,v27.w)
+; CHECK-NEXT: v0 = vmux(q2,v3,v28)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r2 = #64
-; CHECK-NEXT: v2.w = vlsr(v2.w,v5.w)
+; CHECK-NEXT: v6.w = vlsr(v6.w,v4.w)
+; CHECK-NEXT: q3 = vcmp.gt(v5.w,v27.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v28.w = vlsr(v25.w,v4.w)
-; CHECK-NEXT: v29.w = vsub(v26.w,v2.w)
+; CHECK-NEXT: v2.w = vlsr(v2.w,v5.w)
+; CHECK-NEXT: v29.w = vsub(v27.w,v6.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v6.w = vsub(v26.w,v28.w)
-; CHECK-NEXT: v0 = vmux(q0,v29,v2)
+; CHECK-NEXT: v30.w = vsub(v27.w,v2.w)
+; CHECK-NEXT: v1 = vmux(q0,v29,v6)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v31 = vmux(q2,v6,v28)
-; CHECK-NEXT: v0 = vmux(q1,v0,v30)
+; CHECK-NEXT: v2 = vmux(q2,v30,v2)
+; CHECK-NEXT: v1 = vmux(q1,v1,v31)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: q3 = vsetq(r2)
-; CHECK-NEXT: v1 = vmux(q3,v31,v1)
+; CHECK-NEXT: v0 = vmux(q3,v2,v0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.h = vpack(v1.w,v0.w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.h = vpack(v1.w,v0.w):sat
+; CHECK-NEXT: v0.h = vpack(v0.w,v1.w):sat
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v0.b = vpack(v2.h,v0.h):sat
@@ -808,13 +808,13 @@ define void @f32s16_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(##-2147483648,#1)
; CHECK-NEXT: r4 = #30
-; CHECK-NEXT: v1 = vmem(r0+#0)
+; CHECK-NEXT: v0 = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2 = vsplat(r3)
; CHECK-NEXT: r6 = #8
; CHECK-NEXT: v3.w = vasl(v0.w,r2)
-; CHECK-NEXT: v0.cur = vmem(r0+#1)
+; CHECK-NEXT: v1 = vmem(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v4 = vsplat(r4)
@@ -828,55 +828,55 @@ define void @f32s16_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r3 = #32
-; CHECK-NEXT: v5.w = vasl(v1.w,r6)
-; CHECK-NEXT: q1 = vcmp.gt(v7.w,v0.w)
+; CHECK-NEXT: v5.w = vasl(v0.w,r6)
+; CHECK-NEXT: q1 = vcmp.gt(v7.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v6 = vsplat(r3)
-; CHECK-NEXT: v27.w = vasr(v3.w,r5)
+; CHECK-NEXT: v28.w = vasr(v3.w,r5)
; CHECK-NEXT: v5 = vor(v5,v2)
-; CHECK-NEXT: q0 = vcmp.gt(v7.w,v1.w)
+; CHECK-NEXT: q0 = vcmp.gt(v7.w,v0.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v9 = vsplat(r4)
; CHECK-NEXT: v8.w = vasr(v8.w,r5)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v26.w = vasl(v0.w,r6)
-; CHECK-NEXT: v0.w = vsub(v4.w,v27.w)
+; CHECK-NEXT: v27.w = vasl(v1.w,r6)
+; CHECK-NEXT: v1.w = vsub(v4.w,v28.w)
; CHECK-NEXT: v4.w = vsub(v4.w,v8.w)
-; CHECK-NEXT: v28 = vmux(q0,v2,v9)
+; CHECK-NEXT: v29 = vmux(q0,v2,v9)
; CHECK-NEXT: }
; CHECK-NEXT: {
+; CHECK-NEXT: v1.w = vmin(v1.w,v6.w)
+; CHECK-NEXT: v0 = vor(v27,v2)
; CHECK-NEXT: v4.w = vmin(v4.w,v6.w)
-; CHECK-NEXT: v1 = vor(v26,v2)
-; CHECK-NEXT: v0.w = vmin(v0.w,v6.w)
; CHECK-NEXT: v2 = vmux(q1,v2,v9)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: q2 = vcmp.gt(v4.w,v7.w)
-; CHECK-NEXT: q3 = vcmp.gt(v0.w,v7.w)
+; CHECK-NEXT: q2 = vcmp.gt(v1.w,v7.w)
+; CHECK-NEXT: q3 = vcmp.gt(v4.w,v7.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v5.w = vlsr(v5.w,v4.w)
+; CHECK-NEXT: v5.w = vlsr(v5.w,v1.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1.w = vlsr(v1.w,v0.w)
-; CHECK-NEXT: v29.w = vsub(v7.w,v5.w)
+; CHECK-NEXT: v0.w = vlsr(v0.w,v4.w)
+; CHECK-NEXT: v30.w = vsub(v7.w,v5.w)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v30.w = vsub(v7.w,v1.w)
-; CHECK-NEXT: v5 = vmux(q0,v29,v5)
+; CHECK-NEXT: v31.w = vsub(v7.w,v0.w)
+; CHECK-NEXT: v5 = vmux(q0,v30,v5)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vmux(q1,v30,v1)
-; CHECK-NEXT: v31 = vmux(q2,v5,v28)
+; CHECK-NEXT: v0 = vmux(q1,v31,v0)
+; CHECK-NEXT: v1 = vmux(q2,v5,v29)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v1 = vmux(q3,v1,v2)
+; CHECK-NEXT: v0 = vmux(q3,v0,v2)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v0.h = vpack(v1.w,v31.w):sat
+; CHECK-NEXT: v0.h = vpack(v0.w,v1.w):sat
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: vmem(r1+#0) = v0.new
; CHECK-NEXT: }
@@ -1097,13 +1097,13 @@ define void @f16u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = combine(##32768,#1)
; CHECK-NEXT: r4 = #14
-; CHECK-NEXT: v0 = vmem(r0+#1)
+; CHECK-NEXT: v0 = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v2.h = vsplat(r3)
; CHECK-NEXT: r7:6 = combine(#11,#16)
; CHECK-NEXT: v3.h = vasl(v0.h,r2)
-; CHECK-NEXT: v1 = vmem(r0+#0)
+; CHECK-NEXT: v1 = vmem(r0+#1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v6.h = vsplat(r4)
@@ -1113,7 +1113,7 @@ define void @f16u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v7.h = vsplat(r6)
-; CHECK-NEXT: v5.h = vasl(v1.h,r5)
+; CHECK-NEXT: v5.h = vasl(v0.h,r5)
; CHECK-NEXT: v4.h = vsub(v4.h,v2.h)
; CHECK-NEXT: v28 = vxor(v28,v28)
; CHECK-NEXT: }
@@ -1125,28 +1125,26 @@ define void @f16u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: {
; CHECK-NEXT: v29.h = vsplat(r2)
; CHECK-NEXT: v4.h = vasr(v4.h,r7)
-; CHECK-NEXT: q2 = vcmp.gt(v28.h,v1.h)
+; CHECK-NEXT: q2 = vcmp.gt(v28.h,v0.h)
; CHECK-NEXT: v3.h = vsub(v6.h,v3.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v8.h = vasl(v0.h,r5)
-; CHECK-NEXT: q3 = vcmp.gt(v28.h,v0.h)
+; CHECK-NEXT: v8.h = vasl(v1.h,r5)
+; CHECK-NEXT: q3 = vcmp.gt(v28.h,v1.h)
; CHECK-NEXT: v4.h = vsub(v6.h,v4.h)
; CHECK-NEXT: v3.h = vmin(v3.h,v7.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v4.h = vmin(v4.h,v7.h)
; CHECK-NEXT: v2 = vor(v8,v2)
-; CHECK-NEXT: q1 = vcmp.gt(v28.h,v3.h)
+; CHECK-NEXT: q0 = vcmp.gt(v28.h,v3.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: q0 = vcmp.gt(v28.h,v4.h)
+; CHECK-NEXT: v5.h = vlsr(v5.h,v3.h)
+; CHECK-NEXT: q1 = vcmp.gt(v28.h,v4.h)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v5.h = vlsr(v5.h,v4.h)
-; CHECK-NEXT: }
-; CHECK-NEXT: {
-; CHECK-NEXT: v2.h = vlsr(v2.h,v3.h)
+; CHECK-NEXT: v2.h = vlsr(v2.h,v4.h)
; CHECK-NEXT: v30 = vmux(q0,v29,v5)
; CHECK-NEXT: }
; CHECK-NEXT: {
@@ -1552,7 +1550,7 @@ define void @f32u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: v5 = vmem(r0+#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: v3 = vsplat(r4)
+; CHECK-NEXT: v4 = vsplat(r4)
; CHECK-NEXT: r5 = #30
; CHECK-NEXT: r6 = #24
; CHECK-NEXT: v2 = vmem(r0+#1)
@@ -1561,32 +1559,32 @@ define void @f32u8_0(ptr %a0, ptr %a1) #0 {
; CHECK-NEXT: v14 = vsplat(r5)
; CHECK-NEXT: r4 = #32
; CHECK-NEXT: v8.w = vasl(v5.w,r2)
-; CHECK-NEXT: v0 = vmem(r0+#3)
+; CHECK-NEXT: v0 = vmem(r0+#2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: v9.w = va...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 1adb00110e35c6963175ecc000e42caf858b4c07 bd6624800c440b285c74a3f78a983672111158d1 --extensions cpp -- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index b9d549c21b..f77bda3138 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1266,7 +1266,7 @@ HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
// vectors.
if (isHvxPairTy(VecTy)) {
unsigned SubIdx = Hexagon::vsub_lo;
- if (Idx * ElemWidth >= 8*HwLen) {
+ if (Idx * ElemWidth >= 8 * HwLen) {
SubIdx = Hexagon::vsub_hi;
Idx -= VecTy.getVectorNumElements() / 2;
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can confirm that this fixed the reduced test case. Thanks!
Please fix code formatting before merging. |
BTW if it's useful @aankit-ca I use https://github.com/barisione/clang-format-hooks to automagically fix the formatting on my commits (while still allowing me to review the changes in case I need to override them). |
Change-Id: I1d7d67c833da7f5058e2552150d19abe1268b786
Thank you Brian. I'll take a look at it. |
Fixes a crash with extract_subvectors in Hexagon backend seen when the source vector is a vector-pair and result vector is not hvx vector size. LLVM Issue: llvm#128775 Fixes llvm#128775 --------- Co-authored-by: aankit-quic <[email protected]> (cherry picked from commit 29d3fc3)
Fixes a crash with extract_subvectors in Hexagon backend seen when the source vector is a vector-pair and result vector is not hvx vector size. LLVM Issue: llvm#128775 Fixes llvm#128775 --------- Co-authored-by: aankit-quic <[email protected]>
Fixes a crash with extract_subvectors in Hexagon backend seen when the source vector is a vector-pair and result vector is not hvx vector size.
LLVM Issue: #128775