llvm · zahiraam · Dec 11, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 24, 2023
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -243,6 +243,16 @@ New Compiler Flags
 
 * ``-fopenacc`` was added as a part of the effort to support OpenACC in clang.
 
+* ``-fcx-limited-range`` enables the naive mathematical formulas for complex
+  division and multiplication with no NaN checking of results. The default is
+  ``-fno-cx-limited-range``, but this option is enabled by ``-ffast-math``.
+
+* ``-fcx-fortran-rules`` enables the naive mathematical formulas for complex
+  multiplication and enables application of Smith's algorithm for complex
+  division. See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8
+  (1962). The default is ``-fno-cx-fortran-rules``.
+
+
 Deprecated Compiler Flags
 -------------------------
 
@@ -872,6 +882,9 @@ Floating Point Support in Clang
   ``__builtin_exp10f128`` builtins.
 - Add ``__builtin_iszero``, ``__builtin_issignaling`` and
   ``__builtin_issubnormal``.
+- Add support for C99's ``#pragma STDC CX_LIMITED_RANGE`` feature.  This
+  enables the naive mathematical formulas for complex multiplication and
+  division, which are faster but do not correctly handle overflow and infinities.
 
 AST Matchers
 ------------

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
@@ -1468,6 +1468,7 @@ floating point semantic models: precise (the default), strict, and fast.
    With the exception of ``-ffp-contract=fast``, using any of the options
    below to disable any of the individual optimizations in ``-ffast-math``
    will cause ``__FAST_MATH__`` to no longer be set.
+   ``-ffast-math`` enables ``-fcx-limited-range``.
 
    This option implies:
 
@@ -1834,6 +1835,20 @@ floating point semantic models: precise (the default), strict, and fast.
    * ``16`` - Forces ``_Float16`` operations to be emitted without using excess
      precision arithmetic.
 
+.. option:: -fcx-limited-range:
+
+   This option enables the naive mathematical formulas for complex division and
+   multiplication with no NaN checking of results. The default is
+   ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math``
+   option.
+
+.. option:: -fcx-fortran-rules:
+
+   This option enables the naive mathematical formulas for complex
+   multiplication and enables application of Smith's algorithm for complex
+   division. See SMITH, R. L. Algorithm 116: Complex division. Commun.
+   ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``.
+
 .. _floating-point-environment:
 
 Accessing the floating point environment

diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def
@@ -28,4 +28,5 @@ OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc)
 OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod)
 OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16ExcessPrecision)
 OPTION(MathErrno, bool, 1, BFloat16ExcessPrecision)
+OPTION(ComplexRange, LangOptions::ComplexRangeKind, 2, MathErrno)
 #undef OPTION
diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
@@ -102,6 +102,7 @@ FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))
 FEATURE(swiftasynccc,
   PP.getTargetInfo().checkCallingConvention(CC_SwiftAsync) ==
   clang::TargetInfo::CCCR_OK)
+FEATURE(pragma_stdc_cx_limited_range, true)
 // Objective-C features
 FEATURE(objc_arr, LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
 FEATURE(objc_arc, LangOpts.ObjCAutoRefCount)

diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -220,6 +220,8 @@ BENIGN_LANGOPT(NoSignedZero      , 1, 0, "Permit Floating Point optimization wit
 BENIGN_LANGOPT(AllowRecip        , 1, 0, "Permit Floating Point reciprocal")
 BENIGN_LANGOPT(ApproxFunc        , 1, 0, "Permit Floating Point approximation")
 
+ENUM_LANGOPT(ComplexRange, ComplexRangeKind, 2, CX_Full, "Enable use of range reduction for complex arithmetics.")
+
 BENIGN_LANGOPT(ObjCGCBitmapPrint , 1, 0, "printing of GC's bitmap layout for __weak/__strong ivars")
 
 BENIGN_LANGOPT(AccessControl     , 1, 1, "C++ access control")

diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
@@ -391,6 +391,8 @@ class LangOptions : public LangOptionsBase {
     IncompleteOnly = 3,
   };
 
+  enum ComplexRangeKind { CX_Full, CX_Limited, CX_Fortran };
+
 public:
   /// The used language standard.
   LangStandard::Kind LangStd;
@@ -740,6 +742,7 @@ class FPOptions {
       setAllowFEnvAccess(true);
     else
       setAllowFEnvAccess(LangOptions::FPM_Off);
+    setComplexRange(LO.getComplexRange());
   }
 
   bool allowFPContractWithinStatement() const {

diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
@@ -908,6 +908,11 @@ PRAGMA_ANNOTATION(pragma_fenv_access_ms)
 // handles them.
 PRAGMA_ANNOTATION(pragma_fenv_round)
 
+// Annotation for #pragma STDC CX_LIMITED_RANGE
+// The lexer produces these so that they only take effect when the parser
+// handles them.
+PRAGMA_ANNOTATION(pragma_cx_limited_range)
+
 // Annotation for #pragma float_control
 // The lexer produces these so that they only take effect when the parser
 // handles them.

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -1010,6 +1010,30 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block",
   NegFlag<SetFalse, [], [ClangOption, CC1Option], "Don't assume">,
   BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;
 
+def fcx_limited_range : Joined<["-"], "fcx-limited-range">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Basic algebraic expansions of complex arithmetic operations "
+           "involving are enabled.">;
+
+def fno_cx_limited_range : Joined<["-"], "fno-cx-limited-range">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Basic algebraic expansions of complex arithmetic operations "
+           "involving are disabled.">;
+
+def fcx_fortran_rules : Joined<["-"], "fcx-fortran-rules">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Range reduction is enabled for complex arithmetic operations.">;
+
+def fno_cx_fortran_rules : Joined<["-"], "fno-cx-fortran-rules">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Range reduction is disabled for complex arithmetic operations.">;
+
+def complex_range_EQ : Joined<["-"], "complex-range=">, Group<f_Group>,
+  Visibility<[CC1Option]>,
+  Values<"full,limited,fortran">, NormalizedValuesScope<"LangOptions">,
+  NormalizedValues<["CX_Full", "CX_Limited", "CX_Fortran"]>,
+  MarshallingInfoEnum<LangOpts<"ComplexRange">, "CX_Full">;
+
 // OpenCL-only Options
 def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>,
   Visibility<[ClangOption, CC1Option]>,

diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
@@ -772,6 +772,10 @@ class Parser : public CodeCompletionHandler {
   /// #pragma STDC FENV_ROUND...
   void HandlePragmaFEnvRound();
 
+  /// Handle the annotation token produced for
+  /// #pragma STDC CX_LIMITED_RANGE...
+  void HandlePragmaCXLimitedRange();
+
   /// Handle the annotation token produced for
   /// #pragma float_control
   void HandlePragmaFloatControl();

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
@@ -10997,6 +10997,11 @@ class Sema final {
   /// \#pragma STDC FENV_ACCESS
   void ActOnPragmaFEnvAccess(SourceLocation Loc, bool IsEnabled);
 
+  /// ActOnPragmaCXLimitedRange - Called on well formed
+  /// \#pragma STDC CX_LIMITED_RANGE
+  void ActOnPragmaCXLimitedRange(SourceLocation Loc,
+                                 LangOptions::ComplexRangeKind Range);
+
   /// Called on well formed '\#pragma clang fp' that has option 'exceptions'.
   void ActOnPragmaFPExceptions(SourceLocation Loc,
                                LangOptions::FPExceptionModeKind);

diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -275,6 +275,10 @@ class ComplexExprEmitter
   ComplexPairTy EmitBinSub(const BinOpInfo &Op);
   ComplexPairTy EmitBinMul(const BinOpInfo &Op);
   ComplexPairTy EmitBinDiv(const BinOpInfo &Op);
+  ComplexPairTy EmitAlgebraicDiv(llvm::Value *A, llvm::Value *B, llvm::Value *C,
+                                 llvm::Value *D);
+  ComplexPairTy EmitRangeReductionDiv(llvm::Value *A, llvm::Value *B,
+                                      llvm::Value *C, llvm::Value *D);
 
   ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
                                         const BinOpInfo &Op);
@@ -781,6 +785,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) {
       ResR = Builder.CreateFSub(AC, BD, "mul_r");
       ResI = Builder.CreateFAdd(AD, BC, "mul_i");
 
+      if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited ||
+          Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran)
+        return ComplexPairTy(ResR, ResI);
+
       // Emit the test for the real part becoming NaN and create a branch to
       // handle it. We test for NaN by comparing the number to itself.
       Value *IsRNaN = Builder.CreateFCmpUNO(ResR, ResR, "isnan_cmp");
@@ -846,23 +854,139 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) {
   return ComplexPairTy(ResR, ResI);
 }
 
+ComplexPairTy ComplexExprEmitter::EmitAlgebraicDiv(llvm::Value *LHSr,
+                                                   llvm::Value *LHSi,
+                                                   llvm::Value *RHSr,
+                                                   llvm::Value *RHSi) {
+  // (a+ib) / (c+id) = ((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd))
+  llvm::Value *DSTr, *DSTi;
+
+  llvm::Value *AC = Builder.CreateFMul(LHSr, RHSr); // a*c
+  llvm::Value *BD = Builder.CreateFMul(LHSi, RHSi); // b*d
+  llvm::Value *ACpBD = Builder.CreateFAdd(AC, BD);  // ac+bd
+
+  llvm::Value *CC = Builder.CreateFMul(RHSr, RHSr); // c*c
+  llvm::Value *DD = Builder.CreateFMul(RHSi, RHSi); // d*d
+  llvm::Value *CCpDD = Builder.CreateFAdd(CC, DD);  // cc+dd
+
+  llvm::Value *BC = Builder.CreateFMul(LHSi, RHSr); // b*c
+  llvm::Value *AD = Builder.CreateFMul(LHSr, RHSi); // a*d
+  llvm::Value *BCmAD = Builder.CreateFSub(BC, AD);  // bc-ad
+
+  DSTr = Builder.CreateFDiv(ACpBD, CCpDD);
+  DSTi = Builder.CreateFDiv(BCmAD, CCpDD);
+  return ComplexPairTy(DSTr, DSTi);
+}
+
+// EmitFAbs - Emit a call to @llvm.fabs.
+static llvm::Value *EmitllvmFAbs(CodeGenFunction &CGF, llvm::Value *Value) {
+  llvm::Function *Func =
+      CGF.CGM.getIntrinsic(llvm::Intrinsic::fabs, Value->getType());
+  llvm::Value *Call = CGF.Builder.CreateCall(Func, Value);
+  return Call;
+}
+
+// EmitRangeReductionDiv - Implements Smith's algorithm for complex division.
+// SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962).
+ComplexPairTy ComplexExprEmitter::EmitRangeReductionDiv(llvm::Value *LHSr,
+                                                        llvm::Value *LHSi,
+                                                        llvm::Value *RHSr,
+                                                        llvm::Value *RHSi) {
+  // (a + ib) / (c + id) = (e + if)
+  llvm::Value *FAbsRHSr = EmitllvmFAbs(CGF, RHSr); // |c|
+  llvm::Value *FAbsRHSi = EmitllvmFAbs(CGF, RHSi); // |d|
+  // |c| >= |d|
+  llvm::Value *IsR = Builder.CreateFCmpUGT(FAbsRHSr, FAbsRHSi, "abs_cmp");
+
+  llvm::BasicBlock *TrueBB =
+      CGF.createBasicBlock("abs_rhsr_greater_or_equal_abs_rhsi");
+  llvm::BasicBlock *FalseBB =
+      CGF.createBasicBlock("abs_rhsr_less_than_abs_rhsi");
+  llvm::BasicBlock *ContBB = CGF.createBasicBlock("complex_div");
+  Builder.CreateCondBr(IsR, TrueBB, FalseBB);
+
+  CGF.EmitBlock(TrueBB);
+  // abs(c) >= abs(d)
+  // r = d/c
+  // tmp = c + rd
+  // e = (a + br)/tmp
+  // f = (b - ar)/tmp
+  llvm::Value *DdC = Builder.CreateFDiv(RHSi, RHSr); // r=d/c
+
+  llvm::Value *RD = Builder.CreateFMul(DdC, RHSi);  // rd
+  llvm::Value *CpRD = Builder.CreateFAdd(RHSr, RD); // tmp=c+rd
+
+  llvm::Value *T3 = Builder.CreateFMul(LHSi, DdC);   // br
+  llvm::Value *T4 = Builder.CreateFAdd(LHSr, T3);    // a+br
+  llvm::Value *DSTTr = Builder.CreateFDiv(T4, CpRD); // (a+br)/tmp
+
+  llvm::Value *T5 = Builder.CreateFMul(LHSr, DdC);   // ar
+  llvm::Value *T6 = Builder.CreateFSub(LHSi, T5);    // b-ar
+  llvm::Value *DSTTi = Builder.CreateFDiv(T6, CpRD); // (b-ar)/tmp
+  Builder.CreateBr(ContBB);
+
+  CGF.EmitBlock(FalseBB);
+  // abs(c) < abs(d)
+  // r = c/d
+  // tmp = d + rc
+  // e = (ar + b)/tmp
+  // f = (br - a)/tmp
+  llvm::Value *CdD = Builder.CreateFDiv(RHSr, RHSi); // r=c/d
+
+  llvm::Value *RC = Builder.CreateFMul(CdD, RHSr);  // rc
+  llvm::Value *DpRC = Builder.CreateFAdd(RHSi, RC); // tmp=d+rc
+
+  llvm::Value *T7 = Builder.CreateFMul(LHSr, RC);    // ar
+  llvm::Value *T8 = Builder.CreateFAdd(T7, LHSi);    // ar+b
+  llvm::Value *DSTFr = Builder.CreateFDiv(T8, DpRC); // (ar+b)/tmp
+
+  llvm::Value *T9 = Builder.CreateFMul(LHSi, CdD);    // br
+  llvm::Value *T10 = Builder.CreateFSub(T9, LHSr);    // br-a
+  llvm::Value *DSTFi = Builder.CreateFDiv(T10, DpRC); // (br-a)/tmp
+  Builder.CreateBr(ContBB);
+
+  // Phi together the computation paths.
+  CGF.EmitBlock(ContBB);
+  llvm::PHINode *VALr = Builder.CreatePHI(DSTTr->getType(), 2);
+  VALr->addIncoming(DSTTr, TrueBB);
+  VALr->addIncoming(DSTFr, FalseBB);
+  llvm::PHINode *VALi = Builder.CreatePHI(DSTTi->getType(), 2);
+  VALi->addIncoming(DSTTi, TrueBB);
+  VALi->addIncoming(DSTFi, FalseBB);
+  return ComplexPairTy(VALr, VALi);
+}
+
 // See C11 Annex G.5.1 for the semantics of multiplicative operators on complex
 // typed values.
 ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
   llvm::Value *LHSr = Op.LHS.first, *LHSi = Op.LHS.second;
   llvm::Value *RHSr = Op.RHS.first, *RHSi = Op.RHS.second;
-
   llvm::Value *DSTr, *DSTi;
   if (LHSr->getType()->isFloatingPointTy()) {
-    // If we have a complex operand on the RHS and FastMath is not allowed, we
-    // delegate to a libcall to handle all of the complexities and minimize
-    // underflow/overflow cases. When FastMath is allowed we construct the
-    // divide inline using the same algorithm as for integer operands.
-    //
-    // FIXME: We would be able to avoid the libcall in many places if we
-    // supported imaginary types in addition to complex types.
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Op.FPFeatures);
-    if (RHSi && !CGF.getLangOpts().FastMath) {
+    if (!RHSi) {
+      assert(LHSi && "Can have at most one non-complex operand!");
+
+      DSTr = Builder.CreateFDiv(LHSr, RHSr);
+      DSTi = Builder.CreateFDiv(LHSi, RHSr);
+      return ComplexPairTy(DSTr, DSTi);
+    }
+    llvm::Value *OrigLHSi = LHSi;
+    if (!LHSi)
+      LHSi = llvm::Constant::getNullValue(RHSi->getType());
+    if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran)
+      return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi);
+    else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited)
+      return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi);
+    else if (!CGF.getLangOpts().FastMath) {
+      LHSi = OrigLHSi;
+      // If we have a complex operand on the RHS and FastMath is not allowed, we
+      // delegate to a libcall to handle all of the complexities and minimize
+      // underflow/overflow cases. When FastMath is allowed we construct the
+      // divide inline using the same algorithm as for integer operands.
+      //
+      // FIXME: We would be able to avoid the libcall in many places if we
+      // supported imaginary types in addition to complex types.
       BinOpInfo LibCallOp = Op;
       // If LHS was a real, supply a null imaginary part.
       if (!LHSi)
@@ -884,30 +1008,8 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
       case llvm::Type::FP128TyID:
         return EmitComplexBinOpLibCall("__divtc3", LibCallOp);
       }
-    } else if (RHSi) {
-      if (!LHSi)
-        LHSi = llvm::Constant::getNullValue(RHSi->getType());
-
-      // (a+ib) / (c+id) = ((ac+bd)/(cc+dd)) + i((bc-ad)/(cc+dd))
-      llvm::Value *AC = Builder.CreateFMul(LHSr, RHSr); // a*c
-      llvm::Value *BD = Builder.CreateFMul(LHSi, RHSi); // b*d
-      llvm::Value *ACpBD = Builder.CreateFAdd(AC, BD); // ac+bd
-
-      llvm::Value *CC = Builder.CreateFMul(RHSr, RHSr); // c*c
-      llvm::Value *DD = Builder.CreateFMul(RHSi, RHSi); // d*d
-      llvm::Value *CCpDD = Builder.CreateFAdd(CC, DD); // cc+dd
-
-      llvm::Value *BC = Builder.CreateFMul(LHSi, RHSr); // b*c
-      llvm::Value *AD = Builder.CreateFMul(LHSr, RHSi); // a*d
-      llvm::Value *BCmAD = Builder.CreateFSub(BC, AD); // bc-ad
-
-      DSTr = Builder.CreateFDiv(ACpBD, CCpDD);
-      DSTi = Builder.CreateFDiv(BCmAD, CCpDD);
     } else {
-      assert(LHSi && "Can have at most one non-complex operand!");
-
-      DSTr = Builder.CreateFDiv(LHSr, RHSr);
-      DSTi = Builder.CreateFDiv(LHSi, RHSr);
+      return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi);
     }
   } else {
     assert(Op.LHS.second && Op.RHS.second &&