Skip to content

Commit bf2d322

Browse files
adam-yangGroverkss
authored andcommitted
[HLSL][SPIRV] Added clamp intrinsic (llvm#113394)
Fixes llvm#88052 - Added the following intrinsics: - `int_spv_uclamp` - `int_spv_sclamp` - `int_spv_fclamp` - Updated DirectX counterparts to have the same three clamp intrinsics. - Update the clamp.hlsl unit tests to include SPIRV - Added the SPIRV specific tests
1 parent 7bead4f commit bf2d322

File tree

12 files changed

+825
-196
lines changed

12 files changed

+825
-196
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18838,14 +18838,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1883818838
Value *OpMax = EmitScalarExpr(E->getArg(2));
1883918839

1884018840
QualType Ty = E->getArg(0)->getType();
18841-
bool IsUnsigned = false;
1884218841
if (auto *VecTy = Ty->getAs<VectorType>())
1884318842
Ty = VecTy->getElementType();
18844-
IsUnsigned = Ty->isUnsignedIntegerType();
18843+
18844+
Intrinsic::ID Intr;
18845+
if (Ty->isFloatingType()) {
18846+
Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
18847+
} else if (Ty->isUnsignedIntegerType()) {
18848+
Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
18849+
} else {
18850+
assert(Ty->isSignedIntegerType());
18851+
Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
18852+
}
1884518853
return Builder.CreateIntrinsic(
18846-
/*ReturnType=*/OpX->getType(),
18847-
IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18848-
ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18854+
/*ReturnType=*/OpX->getType(), Intr,
18855+
ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
1884918856
}
1885018857
case Builtin::BI__builtin_hlsl_cross: {
1885118858
Value *Op0 = EmitScalarExpr(E->getArg(0));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ class CGHLSLRuntime {
9595
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
9696
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
9797
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
98+
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
99+
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
100+
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
98101

99102
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)
100103

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
22

33
// CHECK-LABEL: builtin_test_clamp_int4
4-
// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
5-
// CHECK: ret <4 x i32> %dx.clamp
4+
// CHECK: %hlsl.clamp = call <4 x i32> @llvm.dx.sclamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
5+
// CHECK: ret <4 x i32> %hlsl.clamp
66
int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) {
77
return __builtin_hlsl_elementwise_clamp(p0, p1, p2);
88
}
Lines changed: 92 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,133 +1,143 @@
11
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
22
// RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
3-
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF
3+
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
4+
// RUN: -DTARGET=dx -DFNATTRS=noundef
45
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
56
// RUN: -emit-llvm -disable-llvm-passes -o - | \
6-
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF
7+
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \
8+
// RUN: -DTARGET=dx -DFNATTRS=noundef
9+
// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
10+
// RUN: -fnative-half-type -emit-llvm -disable-llvm-passes -o - | \
11+
// RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF \
12+
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
13+
// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute %s \
14+
// RUN: -emit-llvm -disable-llvm-passes -o - | \
15+
// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF \
16+
// RUN: -DTARGET=spv -DFNATTRS="spir_func noundef"
717

818
#ifdef __HLSL_ENABLE_16_BIT
9-
// NATIVE_HALF-LABEL: define noundef i16 @_Z16test_clamp_short
10-
// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
19+
// NATIVE_HALF: define [[FNATTRS]] i16 @_Z16test_clamp_short
20+
// NATIVE_HALF: call i16 @llvm.[[TARGET]].sclamp.i16(
1121
int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); }
12-
// NATIVE_HALF-LABEL: define noundef <2 x i16> @_Z17test_clamp_short2
13-
// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16(
22+
// NATIVE_HALF: define [[FNATTRS]] <2 x i16> @_Z17test_clamp_short2
23+
// NATIVE_HALF: call <2 x i16> @llvm.[[TARGET]].sclamp.v2i16(
1424
int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); }
15-
// NATIVE_HALF-LABEL: define noundef <3 x i16> @_Z17test_clamp_short3
16-
// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
25+
// NATIVE_HALF: define [[FNATTRS]] <3 x i16> @_Z17test_clamp_short3
26+
// NATIVE_HALF: call <3 x i16> @llvm.[[TARGET]].sclamp.v3i16
1727
int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); }
18-
// NATIVE_HALF-LABEL: define noundef <4 x i16> @_Z17test_clamp_short4
19-
// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
28+
// NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z17test_clamp_short4
29+
// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].sclamp.v4i16
2030
int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
2131

22-
// NATIVE_HALF-LABEL: define noundef i16 @_Z17test_clamp_ushort
23-
// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16(
32+
// NATIVE_HALF: define [[FNATTRS]] i16 @_Z17test_clamp_ushort
33+
// NATIVE_HALF: call i16 @llvm.[[TARGET]].uclamp.i16(
2434
uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
25-
// NATIVE_HALF-LABEL: define noundef <2 x i16> @_Z18test_clamp_ushort2
26-
// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16
35+
// NATIVE_HALF: define [[FNATTRS]] <2 x i16> @_Z18test_clamp_ushort2
36+
// NATIVE_HALF: call <2 x i16> @llvm.[[TARGET]].uclamp.v2i16
2737
uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
28-
// NATIVE_HALF-LABEL: define noundef <3 x i16> @_Z18test_clamp_ushort3
29-
// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16
38+
// NATIVE_HALF: define [[FNATTRS]] <3 x i16> @_Z18test_clamp_ushort3
39+
// NATIVE_HALF: call <3 x i16> @llvm.[[TARGET]].uclamp.v3i16
3040
uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
31-
// NATIVE_HALF-LABEL: define noundef <4 x i16> @_Z18test_clamp_ushort4
32-
// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16
41+
// NATIVE_HALF: define [[FNATTRS]] <4 x i16> @_Z18test_clamp_ushort4
42+
// NATIVE_HALF: call <4 x i16> @llvm.[[TARGET]].uclamp.v4i16
3343
uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
3444
#endif
3545

36-
// CHECK-LABEL: define noundef i32 @_Z14test_clamp_int
37-
// CHECK: call i32 @llvm.dx.clamp.i32(
46+
// CHECK: define [[FNATTRS]] i32 @_Z14test_clamp_int
47+
// CHECK: call i32 @llvm.[[TARGET]].sclamp.i32(
3848
int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); }
39-
// CHECK-LABEL: define noundef <2 x i32> @_Z15test_clamp_int2
40-
// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
49+
// CHECK: define [[FNATTRS]] <2 x i32> @_Z15test_clamp_int2
50+
// CHECK: call <2 x i32> @llvm.[[TARGET]].sclamp.v2i32
4151
int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); }
42-
// CHECK-LABEL: define noundef <3 x i32> @_Z15test_clamp_int3
43-
// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
52+
// CHECK: define [[FNATTRS]] <3 x i32> @_Z15test_clamp_int3
53+
// CHECK: call <3 x i32> @llvm.[[TARGET]].sclamp.v3i32
4454
int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
45-
// CHECK-LABEL: define noundef <4 x i32> @_Z15test_clamp_int4
46-
// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
55+
// CHECK: define [[FNATTRS]] <4 x i32> @_Z15test_clamp_int4
56+
// CHECK: call <4 x i32> @llvm.[[TARGET]].sclamp.v4i32
4757
int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
4858

49-
// CHECK-LABEL: define noundef i32 @_Z15test_clamp_uint
50-
// CHECK: call i32 @llvm.dx.uclamp.i32(
59+
// CHECK: define [[FNATTRS]] i32 @_Z15test_clamp_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].uclamp.i32(
5161
int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
52-
// CHECK-LABEL: define noundef <2 x i32> @_Z16test_clamp_uint2
53-
// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32
62+
// CHECK: define [[FNATTRS]] <2 x i32> @_Z16test_clamp_uint2
63+
// CHECK: call <2 x i32> @llvm.[[TARGET]].uclamp.v2i32
5464
uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
55-
// CHECK-LABEL: define noundef <3 x i32> @_Z16test_clamp_uint3
56-
// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32
65+
// CHECK: define [[FNATTRS]] <3 x i32> @_Z16test_clamp_uint3
66+
// CHECK: call <3 x i32> @llvm.[[TARGET]].uclamp.v3i32
5767
uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
58-
// CHECK-LABEL: define noundef <4 x i32> @_Z16test_clamp_uint4
59-
// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32
68+
// CHECK: define [[FNATTRS]] <4 x i32> @_Z16test_clamp_uint4
69+
// CHECK: call <4 x i32> @llvm.[[TARGET]].uclamp.v4i32
6070
uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
6171

62-
// CHECK-LABEL: define noundef i64 @_Z15test_clamp_long
63-
// CHECK: call i64 @llvm.dx.clamp.i64(
72+
// CHECK: define [[FNATTRS]] i64 @_Z15test_clamp_long
73+
// CHECK: call i64 @llvm.[[TARGET]].sclamp.i64(
6474
int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); }
65-
// CHECK-LABEL: define noundef <2 x i64> @_Z16test_clamp_long2
66-
// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
75+
// CHECK: define [[FNATTRS]] <2 x i64> @_Z16test_clamp_long2
76+
// CHECK: call <2 x i64> @llvm.[[TARGET]].sclamp.v2i64
6777
int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); }
68-
// CHECK-LABEL: define noundef <3 x i64> @_Z16test_clamp_long3
69-
// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
78+
// CHECK: define [[FNATTRS]] <3 x i64> @_Z16test_clamp_long3
79+
// CHECK: call <3 x i64> @llvm.[[TARGET]].sclamp.v3i64
7080
int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
71-
// CHECK-LABEL: define noundef <4 x i64> @_Z16test_clamp_long4
72-
// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
81+
// CHECK: define [[FNATTRS]] <4 x i64> @_Z16test_clamp_long4
82+
// CHECK: call <4 x i64> @llvm.[[TARGET]].sclamp.v4i64
7383
int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
7484

75-
// CHECK-LABEL: define noundef i64 @_Z16test_clamp_ulong
76-
// CHECK: call i64 @llvm.dx.uclamp.i64(
85+
// CHECK: define [[FNATTRS]] i64 @_Z16test_clamp_ulong
86+
// CHECK: call i64 @llvm.[[TARGET]].uclamp.i64(
7787
uint64_t test_clamp_ulong(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
78-
// CHECK-LABEL: define noundef <2 x i64> @_Z17test_clamp_ulong2
79-
// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64
88+
// CHECK: define [[FNATTRS]] <2 x i64> @_Z17test_clamp_ulong2
89+
// CHECK: call <2 x i64> @llvm.[[TARGET]].uclamp.v2i64
8090
uint64_t2 test_clamp_ulong2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
81-
// CHECK-LABEL: define noundef <3 x i64> @_Z17test_clamp_ulong3
82-
// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64
91+
// CHECK: define [[FNATTRS]] <3 x i64> @_Z17test_clamp_ulong3
92+
// CHECK: call <3 x i64> @llvm.[[TARGET]].uclamp.v3i64
8393
uint64_t3 test_clamp_ulong3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
84-
// CHECK-LABEL: define noundef <4 x i64> @_Z17test_clamp_ulong4
85-
// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64
94+
// CHECK: define [[FNATTRS]] <4 x i64> @_Z17test_clamp_ulong4
95+
// CHECK: call <4 x i64> @llvm.[[TARGET]].uclamp.v4i64
8696
uint64_t4 test_clamp_ulong4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
8797

88-
// NATIVE_HALF-LABEL: define noundef half @_Z15test_clamp_half
89-
// NATIVE_HALF: call half @llvm.dx.clamp.f16(
90-
// NO_HALF-LABEL: define noundef float @_Z15test_clamp_half
91-
// NO_HALF: call float @llvm.dx.clamp.f32(
98+
// NATIVE_HALF: define [[FNATTRS]] half @_Z15test_clamp_half
99+
// NATIVE_HALF: call half @llvm.[[TARGET]].nclamp.f16(
100+
// NO_HALF: define [[FNATTRS]] float @_Z15test_clamp_half
101+
// NO_HALF: call float @llvm.[[TARGET]].nclamp.f32(
92102
half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
93-
// NATIVE_HALF-LABEL: define noundef <2 x half> @_Z16test_clamp_half2
94-
// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16
95-
// NO_HALF-LABEL: define noundef <2 x float> @_Z16test_clamp_half2
96-
// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32(
103+
// NATIVE_HALF: define [[FNATTRS]] <2 x half> @_Z16test_clamp_half2
104+
// NATIVE_HALF: call <2 x half> @llvm.[[TARGET]].nclamp.v2f16
105+
// NO_HALF: define [[FNATTRS]] <2 x float> @_Z16test_clamp_half2
106+
// NO_HALF: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32(
97107
half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
98-
// NATIVE_HALF-LABEL: define noundef <3 x half> @_Z16test_clamp_half3
99-
// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16
100-
// NO_HALF-LABEL: define noundef <3 x float> @_Z16test_clamp_half3
101-
// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32(
108+
// NATIVE_HALF: define [[FNATTRS]] <3 x half> @_Z16test_clamp_half3
109+
// NATIVE_HALF: call <3 x half> @llvm.[[TARGET]].nclamp.v3f16
110+
// NO_HALF: define [[FNATTRS]] <3 x float> @_Z16test_clamp_half3
111+
// NO_HALF: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32(
102112
half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
103-
// NATIVE_HALF-LABEL: define noundef <4 x half> @_Z16test_clamp_half4
104-
// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16
105-
// NO_HALF-LABEL: define noundef <4 x float> @_Z16test_clamp_half4
106-
// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32(
113+
// NATIVE_HALF: define [[FNATTRS]] <4 x half> @_Z16test_clamp_half4
114+
// NATIVE_HALF: call <4 x half> @llvm.[[TARGET]].nclamp.v4f16
115+
// NO_HALF: define [[FNATTRS]] <4 x float> @_Z16test_clamp_half4
116+
// NO_HALF: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32(
107117
half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }
108118

109-
// CHECK-LABEL: define noundef float @_Z16test_clamp_float
110-
// CHECK: call float @llvm.dx.clamp.f32(
119+
// CHECK: define [[FNATTRS]] float @_Z16test_clamp_float
120+
// CHECK: call float @llvm.[[TARGET]].nclamp.f32(
111121
float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
112-
// CHECK-LABEL: define noundef <2 x float> @_Z17test_clamp_float2
113-
// CHECK: call <2 x float> @llvm.dx.clamp.v2f32
122+
// CHECK: define [[FNATTRS]] <2 x float> @_Z17test_clamp_float2
123+
// CHECK: call <2 x float> @llvm.[[TARGET]].nclamp.v2f32
114124
float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
115-
// CHECK-LABEL: define noundef <3 x float> @_Z17test_clamp_float3
116-
// CHECK: call <3 x float> @llvm.dx.clamp.v3f32
125+
// CHECK: define [[FNATTRS]] <3 x float> @_Z17test_clamp_float3
126+
// CHECK: call <3 x float> @llvm.[[TARGET]].nclamp.v3f32
117127
float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
118-
// CHECK-LABEL: define noundef <4 x float> @_Z17test_clamp_float4
119-
// CHECK: call <4 x float> @llvm.dx.clamp.v4f32
128+
// CHECK: define [[FNATTRS]] <4 x float> @_Z17test_clamp_float4
129+
// CHECK: call <4 x float> @llvm.[[TARGET]].nclamp.v4f32
120130
float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }
121131

122-
// CHECK-LABEL: define noundef double @_Z17test_clamp_double
123-
// CHECK: call double @llvm.dx.clamp.f64(
132+
// CHECK: define [[FNATTRS]] double @_Z17test_clamp_double
133+
// CHECK: call double @llvm.[[TARGET]].nclamp.f64(
124134
double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
125-
// CHECK-LABEL: define noundef <2 x double> @_Z18test_clamp_double2
126-
// CHECK: call <2 x double> @llvm.dx.clamp.v2f64
135+
// CHECK: define [[FNATTRS]] <2 x double> @_Z18test_clamp_double2
136+
// CHECK: call <2 x double> @llvm.[[TARGET]].nclamp.v2f64
127137
double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
128-
// CHECK-LABEL: define noundef <3 x double> @_Z18test_clamp_double3
129-
// CHECK: call <3 x double> @llvm.dx.clamp.v3f64
138+
// CHECK: define [[FNATTRS]] <3 x double> @_Z18test_clamp_double3
139+
// CHECK: call <3 x double> @llvm.[[TARGET]].nclamp.v3f64
130140
double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
131-
// CHECK-LABEL: define noundef <4 x double> @_Z18test_clamp_double4
132-
// CHECK: call <4 x double> @llvm.dx.clamp.v4f64
141+
// CHECK: define [[FNATTRS]] <4 x double> @_Z18test_clamp_double4
142+
// CHECK: call <4 x double> @llvm.[[TARGET]].nclamp.v4f64
133143
double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;
4343

4444
def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
4545
def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
46-
def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
4746
def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
47+
def int_dx_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
48+
def int_dx_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
4849
def int_dx_cross : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
4950
def int_dx_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
5051

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ let TargetPrefix = "spv" in {
9090
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
9191
def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
9292
def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
93+
def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
94+
def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
95+
def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
9396

9497
// Create resource handle given the binding information. Returns a
9598
// type appropriate for the kind of resource given the set id, binding id,

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,10 @@ static bool isIntrinsicExpansion(Function &F) {
5353
case Intrinsic::pow:
5454
case Intrinsic::dx_all:
5555
case Intrinsic::dx_any:
56-
case Intrinsic::dx_clamp:
5756
case Intrinsic::dx_cross:
5857
case Intrinsic::dx_uclamp:
58+
case Intrinsic::dx_sclamp:
59+
case Intrinsic::dx_nclamp:
5960
case Intrinsic::dx_degrees:
6061
case Intrinsic::dx_lerp:
6162
case Intrinsic::dx_length:
@@ -452,29 +453,21 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
452453
return Builder.CreateFMul(X, PiOver180);
453454
}
454455

455-
static Intrinsic::ID getMaxForClamp(Type *ElemTy,
456-
Intrinsic::ID ClampIntrinsic) {
456+
static Intrinsic::ID getMaxForClamp(Intrinsic::ID ClampIntrinsic) {
457457
if (ClampIntrinsic == Intrinsic::dx_uclamp)
458458
return Intrinsic::umax;
459-
assert(ClampIntrinsic == Intrinsic::dx_clamp);
460-
if (ElemTy->isVectorTy())
461-
ElemTy = ElemTy->getScalarType();
462-
if (ElemTy->isIntegerTy())
459+
if (ClampIntrinsic == Intrinsic::dx_sclamp)
463460
return Intrinsic::smax;
464-
assert(ElemTy->isFloatingPointTy());
461+
assert(ClampIntrinsic == Intrinsic::dx_nclamp);
465462
return Intrinsic::maxnum;
466463
}
467464

468-
static Intrinsic::ID getMinForClamp(Type *ElemTy,
469-
Intrinsic::ID ClampIntrinsic) {
465+
static Intrinsic::ID getMinForClamp(Intrinsic::ID ClampIntrinsic) {
470466
if (ClampIntrinsic == Intrinsic::dx_uclamp)
471467
return Intrinsic::umin;
472-
assert(ClampIntrinsic == Intrinsic::dx_clamp);
473-
if (ElemTy->isVectorTy())
474-
ElemTy = ElemTy->getScalarType();
475-
if (ElemTy->isIntegerTy())
468+
if (ClampIntrinsic == Intrinsic::dx_sclamp)
476469
return Intrinsic::smin;
477-
assert(ElemTy->isFloatingPointTy());
470+
assert(ClampIntrinsic == Intrinsic::dx_nclamp);
478471
return Intrinsic::minnum;
479472
}
480473

@@ -485,9 +478,9 @@ static Value *expandClampIntrinsic(CallInst *Orig,
485478
Value *Max = Orig->getOperand(2);
486479
Type *Ty = X->getType();
487480
IRBuilder<> Builder(Orig);
488-
auto *MaxCall = Builder.CreateIntrinsic(
489-
Ty, getMaxForClamp(Ty, ClampIntrinsic), {X, Min}, nullptr, "dx.max");
490-
return Builder.CreateIntrinsic(Ty, getMinForClamp(Ty, ClampIntrinsic),
481+
auto *MaxCall = Builder.CreateIntrinsic(Ty, getMaxForClamp(ClampIntrinsic),
482+
{X, Min}, nullptr, "dx.max");
483+
return Builder.CreateIntrinsic(Ty, getMinForClamp(ClampIntrinsic),
491484
{MaxCall, Max}, nullptr, "dx.min");
492485
}
493486

@@ -555,7 +548,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
555548
Result = expandCrossIntrinsic(Orig);
556549
break;
557550
case Intrinsic::dx_uclamp:
558-
case Intrinsic::dx_clamp:
551+
case Intrinsic::dx_sclamp:
552+
case Intrinsic::dx_nclamp:
559553
Result = expandClampIntrinsic(Orig, IntrinsicId);
560554
break;
561555
case Intrinsic::dx_degrees:

0 commit comments

Comments
 (0)