llvm · phoebewang · Feb 24, 2025 · Feb 20, 2025 · Feb 21, 2025 · RKSimon
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4066,9 +4066,12 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
                                 const SDLoc &dl, unsigned vectorWidth) {
   EVT VT = Vec.getValueType();
   EVT ElVT = VT.getVectorElementType();
-  unsigned Factor = VT.getSizeInBits() / vectorWidth;
-  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
-                                  VT.getVectorNumElements() / Factor);
+  unsigned ResultNumElts =
+      (VT.getVectorNumElements() * vectorWidth) / VT.getSizeInBits();
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, ResultNumElts);
+
+  assert(ResultVT.getSizeInBits() == vectorWidth &&
+         "Illegal subvector extraction");
 
   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();

diff --git a/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll b/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; Ensure assertion is not hit when folding concat of two contiguous extract_subvector operations
+; from a source with a non-power-of-two vector length.
+; RUN: llc -mtriple=x86_64 -mattr=+avx2 < %s | FileCheck %s
+
+define void @foo(ptr %pDst) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovups %ymm0, 16(%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %0 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %2 = getelementptr i8, ptr %pDst, i64 16
+  %3 = getelementptr i8, ptr %pDst, i64 32
+  store <4 x float> %0, ptr %2, align 1
+  store <4 x float> %1, ptr %3, align 1
+  ret void
+}