@@ -440,6 +440,21 @@ static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp) {
440
440
SqrtOp->getType ()->isHalfTy ();
441
441
}
442
442
443
+ // / Return true if we can easily prove that use U is uniform.
444
+ static bool isTriviallyUniform (const Use &U) {
445
+ Value *V = U.get ();
446
+ if (isa<Constant>(V))
447
+ return true ;
448
+ if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
449
+ if (!AMDGPU::isIntrinsicAlwaysUniform (II->getIntrinsicID ()))
450
+ return false ;
451
+ // If II and U are in different blocks then there is a possibility of
452
+ // temporal divergence.
453
+ return II->getParent () == cast<Instruction>(U.getUser ())->getParent ();
454
+ }
455
+ return false ;
456
+ }
457
+
443
458
std::optional<Instruction *>
444
459
GCNTTIImpl::instCombineIntrinsic (InstCombiner &IC, IntrinsicInst &II) const {
445
460
Intrinsic::ID IID = II.getIntrinsicID ();
@@ -1060,46 +1075,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
1060
1075
return IC.replaceOperand (II, 0 , UndefValue::get (VDstIn->getType ()));
1061
1076
}
1062
1077
case Intrinsic::amdgcn_permlane64:
1063
- // A constant value is trivially uniform.
1064
- if (Constant *C = dyn_cast<Constant>(II.getArgOperand (0 ))) {
1065
- return IC.replaceInstUsesWith (II, C);
1066
- }
1067
- break ;
1068
1078
case Intrinsic::amdgcn_readfirstlane:
1069
1079
case Intrinsic::amdgcn_readlane: {
1070
- // A constant value is trivially uniform.
1071
- if (Constant *C = dyn_cast<Constant>(II.getArgOperand (0 ))) {
1072
- return IC.replaceInstUsesWith (II, C);
1073
- }
1074
-
1075
- // The rest of these may not be safe if the exec may not be the same between
1076
- // the def and use.
1077
- Value *Src = II.getArgOperand (0 );
1078
- Instruction *SrcInst = dyn_cast<Instruction>(Src);
1079
- if (SrcInst && SrcInst->getParent () != II.getParent ())
1080
- break ;
1081
-
1082
- // readfirstlane (readfirstlane x) -> readfirstlane x
1083
- // readlane (readfirstlane x), y -> readfirstlane x
1084
- if (match (Src,
1085
- PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1086
- return IC.replaceInstUsesWith (II, Src);
1087
- }
1088
-
1089
- if (IID == Intrinsic::amdgcn_readfirstlane) {
1090
- // readfirstlane (readlane x, y) -> readlane x, y
1091
- if (match (Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1092
- return IC.replaceInstUsesWith (II, Src);
1093
- }
1094
- } else {
1095
- // readlane (readlane x, y), y -> readlane x, y
1096
- if (match (Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1097
- PatternMatch::m_Value (),
1098
- PatternMatch::m_Specific (II.getArgOperand (1 ))))) {
1099
- return IC.replaceInstUsesWith (II, Src);
1100
- }
1101
- }
1102
-
1080
+ // If the first argument is uniform these intrinsics return it unchanged.
1081
+ const Use &Src = II.getArgOperandUse (0 );
1082
+ if (isTriviallyUniform (Src))
1083
+ return IC.replaceInstUsesWith (II, Src.get ());
1103
1084
break ;
1104
1085
}
1105
1086
case Intrinsic::amdgcn_trig_preop: {
0 commit comments