Skip to content

Commit c330ca8

Browse files
committed
[X86] Add intrinsics for kand/kandn/knot/kor/kxnor/kxor with 8, 32, and 64-bit mask registers.
This also adds a second intrinsic name for the 16-bit mask versions. These intrinsics match gcc and icc. They just aren't published in the Intel Intrinsics Guide so I only recently found they existed. llvm-svn: 340719
1 parent 9a02228 commit c330ca8

File tree

8 files changed

+427
-12
lines changed

8 files changed

+427
-12
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,10 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512p
10051005
TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf")
10061006
TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf")
10071007

1008+
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
10081009
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
1010+
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
1011+
TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw")
10091012

10101013
TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
10111014
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1734,14 +1737,29 @@ TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx5
17341737
TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq")
17351738
TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
17361739
TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
1740+
TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
17371741
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
1742+
TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
1743+
TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw")
1744+
TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
17381745
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
1746+
TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
1747+
TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw")
1748+
TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
17391749
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
1750+
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
1751+
TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
17401752
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
17411753
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
17421754
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
1755+
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
17431756
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
1757+
TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
1758+
TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw")
1759+
TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
17441760
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
1761+
TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
1762+
TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
17451763
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
17461764
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
17471765
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8603,8 +8603,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF,
86038603
}
86048604

86058605
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
8606-
unsigned NumElts, ArrayRef<Value *> Ops,
8606+
ArrayRef<Value *> Ops,
86078607
bool InvertLHS = false) {
8608+
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
86088609
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
86098610
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
86108611

@@ -10013,7 +10014,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1001310014

1001410015
case X86::BI__builtin_ia32_kortestchi:
1001510016
case X86::BI__builtin_ia32_kortestzhi: {
10016-
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
10017+
Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
1001710018
Value *C;
1001810019
if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
1001910020
C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
@@ -10023,26 +10024,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1002310024
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
1002410025
}
1002510026

10027+
case X86::BI__builtin_ia32_kandqi:
1002610028
case X86::BI__builtin_ia32_kandhi:
10027-
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
10029+
case X86::BI__builtin_ia32_kandsi:
10030+
case X86::BI__builtin_ia32_kanddi:
10031+
return EmitX86MaskLogic(*this, Instruction::And, Ops);
10032+
case X86::BI__builtin_ia32_kandnqi:
1002810033
case X86::BI__builtin_ia32_kandnhi:
10029-
return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
10034+
case X86::BI__builtin_ia32_kandnsi:
10035+
case X86::BI__builtin_ia32_kandndi:
10036+
return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
10037+
case X86::BI__builtin_ia32_korqi:
1003010038
case X86::BI__builtin_ia32_korhi:
10031-
return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
10039+
case X86::BI__builtin_ia32_korsi:
10040+
case X86::BI__builtin_ia32_kordi:
10041+
return EmitX86MaskLogic(*this, Instruction::Or, Ops);
10042+
case X86::BI__builtin_ia32_kxnorqi:
1003210043
case X86::BI__builtin_ia32_kxnorhi:
10033-
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
10044+
case X86::BI__builtin_ia32_kxnorsi:
10045+
case X86::BI__builtin_ia32_kxnordi:
10046+
return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
10047+
case X86::BI__builtin_ia32_kxorqi:
1003410048
case X86::BI__builtin_ia32_kxorhi:
10035-
return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
10036-
case X86::BI__builtin_ia32_knothi: {
10037-
Ops[0] = getMaskVecValue(*this, Ops[0], 16);
10038-
return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
10039-
Builder.getInt16Ty());
10049+
case X86::BI__builtin_ia32_kxorsi:
10050+
case X86::BI__builtin_ia32_kxordi:
10051+
return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
10052+
case X86::BI__builtin_ia32_knotqi:
10053+
case X86::BI__builtin_ia32_knothi:
10054+
case X86::BI__builtin_ia32_knotsi:
10055+
case X86::BI__builtin_ia32_knotdi: {
10056+
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
10057+
Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
10058+
return Builder.CreateBitCast(Builder.CreateNot(Res),
10059+
Ops[0]->getType());
1004010060
}
1004110061

1004210062
case X86::BI__builtin_ia32_kunpckdi:
1004310063
case X86::BI__builtin_ia32_kunpcksi:
1004410064
case X86::BI__builtin_ia32_kunpckhi: {
10045-
unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
10065+
unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
1004610066
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
1004710067
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
1004810068
uint32_t Indices[64];

clang/lib/Headers/avx512bwintrin.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,78 @@ typedef unsigned long long __mmask64;
3535
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
3636
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
3737

38+
static __inline __mmask32 __DEFAULT_FN_ATTRS
39+
_knot_mask32(__mmask32 __M)
40+
{
41+
return __builtin_ia32_knotsi(__M);
42+
}
43+
44+
static __inline __mmask64 __DEFAULT_FN_ATTRS
45+
_knot_mask64(__mmask64 __M)
46+
{
47+
return __builtin_ia32_knotdi(__M);
48+
}
49+
50+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
51+
_kand_mask32(__mmask32 __A, __mmask32 __B)
52+
{
53+
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
54+
}
55+
56+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
57+
_kand_mask64(__mmask64 __A, __mmask64 __B)
58+
{
59+
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
60+
}
61+
62+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
63+
_kandn_mask32(__mmask32 __A, __mmask32 __B)
64+
{
65+
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
66+
}
67+
68+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
69+
_kandn_mask64(__mmask64 __A, __mmask64 __B)
70+
{
71+
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
72+
}
73+
74+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
75+
_kor_mask32(__mmask32 __A, __mmask32 __B)
76+
{
77+
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
78+
}
79+
80+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
81+
_kor_mask64(__mmask64 __A, __mmask64 __B)
82+
{
83+
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
84+
}
85+
86+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
87+
_kxnor_mask32(__mmask32 __A, __mmask32 __B)
88+
{
89+
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
90+
}
91+
92+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
93+
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
94+
{
95+
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
96+
}
97+
98+
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
99+
_kxor_mask32(__mmask32 __A, __mmask32 __B)
100+
{
101+
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
102+
}
103+
104+
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
105+
_kxor_mask64(__mmask64 __A, __mmask64 __B)
106+
{
107+
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
108+
}
109+
38110
/* Integer compare */
39111

40112
#define _mm512_cmp_epi8_mask(a, b, p) \

clang/lib/Headers/avx512dqintrin.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,43 @@
3030

3131
/* Define the default attributes for the functions in this file. */
3232
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
33+
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
34+
35+
static __inline __mmask8 __DEFAULT_FN_ATTRS
36+
_knot_mask8(__mmask8 __M)
37+
{
38+
return __builtin_ia32_knotqi(__M);
39+
}
40+
41+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42+
_kand_mask8(__mmask8 __A, __mmask8 __B)
43+
{
44+
return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
45+
}
46+
47+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48+
_kandn_mask8(__mmask8 __A, __mmask8 __B)
49+
{
50+
return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
51+
}
52+
53+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54+
_kor_mask8(__mmask8 __A, __mmask8 __B)
55+
{
56+
return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
57+
}
58+
59+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60+
_kxnor_mask8(__mmask8 __A, __mmask8 __B)
61+
{
62+
return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
63+
}
64+
65+
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66+
_kxor_mask8(__mmask8 __A, __mmask8 __B)
67+
{
68+
return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
69+
}
3370

3471
static __inline__ __m512i __DEFAULT_FN_ATTRS512
3572
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
@@ -1257,5 +1294,6 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
12571294
(__mmask8)(U))
12581295

12591296
#undef __DEFAULT_FN_ATTRS512
1297+
#undef __DEFAULT_FN_ATTRS
12601298

12611299
#endif

clang/lib/Headers/avx512fintrin.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8369,6 +8369,13 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
83698369
return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
83708370
}
83718371

8372+
#define _kand_mask16 _mm512_kand
8373+
#define _kandn_mask16 _mm512_kandn
8374+
#define _knot_mask16 _mm512_knot
8375+
#define _kor_mask16 _mm512_kor
8376+
#define _kxnor_mask16 _mm512_kxnor
8377+
#define _kxor_mask16 _mm512_kxor
8378+
83728379
static __inline__ void __DEFAULT_FN_ATTRS512
83738380
_mm512_stream_si512 (__m512i * __P, __m512i __A)
83748381
{

0 commit comments

Comments
 (0)