[LoopVectorize][X86] amdlibm-calls.ll - add 2/4/8/16 vector widths test checks for fallback to llvm intrinsics

RKSimon · RKSimon · commit 81acc84997d6 · 2024-08-29T17:31:55.000+01:00
Check for cases where there isn't a amdlib call but it still vectorises the math call
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -80,6 +80,7 @@ define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -102,6 +103,7 @@ for.end:
 
 define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
@@ -130,6 +132,7 @@ define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -152,6 +155,7 @@ for.end:
 
 define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
@@ -180,6 +184,7 @@ define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -202,6 +207,7 @@ for.end:
 
 define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
@@ -230,6 +236,7 @@ define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -252,6 +259,7 @@ for.end:
 
 define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
@@ -280,6 +288,7 @@ define void @tan_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -302,6 +311,7 @@ for.end:
 
 define void @tan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
@@ -330,6 +340,7 @@ define void @tan_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -352,6 +363,7 @@ for.end:
 
 define void @tan_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
@@ -770,6 +782,7 @@ define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
+; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -797,6 +810,7 @@ define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %ex
 ; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
+; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -821,6 +835,7 @@ for.end:
 
 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
+; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
@@ -848,6 +863,7 @@ for.end:
 
 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
+; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
@@ -878,6 +894,7 @@ define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -900,6 +917,7 @@ for.end:
 
 define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
@@ -928,6 +946,7 @@ define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -950,6 +969,7 @@ for.end:
 
 define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
@@ -978,6 +998,7 @@ define void @log_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1000,6 +1021,7 @@ for.end:
 
 define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
@@ -1028,6 +1050,7 @@ define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1050,6 +1073,7 @@ for.end:
 
 define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
@@ -1078,6 +1102,7 @@ define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1100,6 +1125,7 @@ for.end:
 
 define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
@@ -1128,6 +1154,7 @@ define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1150,6 +1177,7 @@ for.end:
 
 define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
@@ -1175,6 +1203,7 @@ for.end:
 
 define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
@@ -1200,6 +1229,7 @@ for.end:
 
 define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
@@ -1228,6 +1258,7 @@ define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1250,6 +1281,7 @@ for.end:
 
 define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
@@ -1278,6 +1310,7 @@ define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -1300,6 +1333,7 @@ for.end:
 
 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])