Skip to content

[InstCombine] Copy flags of extractelement for extelt -> icmp combine #86366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 24, 2024

Conversation

marcauberer
Copy link
Member

Fixes #86164

@llvmbot
Copy link
Member

llvmbot commented Mar 23, 2024

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-transforms

Author: Marc Auberer (marcauberer)

Changes

Fixes #86164


Full diff: https://github.com/llvm/llvm-project/pull/86366.diff

4 Files Affected:

  • (modified) llvm/include/llvm/IR/InstrTypes.h (+10)
  • (modified) llvm/lib/IR/Instructions.cpp (+9)
  • (modified) llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (+2-1)
  • (modified) llvm/test/Transforms/InstCombine/scalarization.ll (+14)
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index e8c2cba8418dc8..fb2b591b51cbc0 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1058,6 +1058,16 @@ class CmpInst : public Instruction {
   static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1,
                          Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
 
+  /// Construct a compare instruction, given the opcode, the predicate and
+  /// the two operands.  Optionally (if InstBefore is specified) insert the
+  /// instruction into a BasicBlock right before the specified instruction.
+  /// The specified Instruction is allowed to be a dereferenced end iterator.
+  /// Create a CmpInst
+  static CmpInst *CreateWithFlags(OtherOps Op,
+                                  Predicate Pred, Value *S1,
+                                  Value *S2, const Instruction *FlagsSource,
+                                  const Twine &Name = "", Instruction *InsertBefore = nullptr);
+
   /// Get the opcode casted to the right type
   OtherOps getOpcode() const {
     return static_cast<OtherOps>(Instruction::getOpcode());
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 494d50f89e374c..c33f660f47d4cb 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -4623,6 +4623,15 @@ CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
                       S1, S2, Name);
 }
 
+CmpInst *
+CmpInst::CreateWithFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2,
+                const Instruction *FlagsSource, const Twine &Name,
+                Instruction *InsertBefore) {
+  CmpInst *Inst = Create(Op, Pred, S1, S2, Name, InsertBefore);
+  Inst->copyIRFlags(FlagsSource);
+  return Inst;
+}
+
 void CmpInst::swapOperands() {
   if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
     IC->swapOperands();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c7f4fb17648c87..5bccf3d471ef69 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -487,7 +487,8 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
     // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
     Value *E0 = Builder.CreateExtractElement(X, Index);
     Value *E1 = Builder.CreateExtractElement(Y, Index);
-    return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
+    Instruction *SrcInst = cast<Instruction>(SrcVec);
+    return CmpInst::CreateWithFlags(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1, SrcInst);
   }
 
   if (auto *I = dyn_cast<Instruction>(SrcVec)) {
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index fe6dc526bd50ee..5ab960ece54d9d 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -341,6 +341,20 @@ define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
   ret i1 %ext
 }
 
+define i1 @extractelt_vector_fcmp_copy_flags(<4 x float> %x, <4 x i1> %y) {
+; CHECK-LABEL: @extractelt_vector_fcmp_copy_flags(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp nsz arcp oeq float [[TMP1]], 0.000000e+00
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %cmp = fcmp nsz arcp oeq <4 x float> %x, zeroinitializer
+  %and = and <4 x i1> %cmp, %y
+  %r = extractelement <4 x i1> %and, i32 2
+  ret i1 %r
+}
+
 define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
 ;
 ; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(

@marcauberer marcauberer force-pushed the instcombine/fmf-on-fcmps branch from 8681c4b to 20af0fa Compare March 23, 2024 00:20
Copy link

✅ With the latest revision this PR passed the Python code formatter.

Copy link

✅ With the latest revision this PR passed the C/C++ code formatter.

@marcauberer marcauberer force-pushed the instcombine/fmf-on-fcmps branch from 20af0fa to 9c8eb1e Compare March 23, 2024 01:41
@tschuett tschuett requested review from tschuett and dtcxzyw March 23, 2024 11:02
@tschuett
Copy link

Thanks!

@marcauberer marcauberer force-pushed the instcombine/fmf-on-fcmps branch from 9c8eb1e to 9992597 Compare March 23, 2024 13:40
@marcauberer
Copy link
Member Author

@tschuett @dtcxzyw could one of you merge? I don't have permission.

@tschuett tschuett merged commit b3fe27f into llvm:main Mar 24, 2024
@marcauberer marcauberer deleted the instcombine/fmf-on-fcmps branch March 24, 2024 15:23
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

FMF on fcmps
4 participants