Skip to content

Commit 6b136b1

Browse files
committed
[Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.
This patch implements custom codegen for the vec_replace_elt and vec_replace_unaligned builtins. These builtins map to the @llvm.ppc.altivec.vinsw and @llvm.ppc.altivec.vinsd intrinsics depending on the arguments. The main motivation for doing custom codegen for these intrinsics is because there are float and double versions of the builtin. Normally, the converting the float to an integer would be done via fptoui in the IR. This is incorrect as fptoui truncates the value and we must ensure the value is not truncated. Therefore, we provide custom codegen to utilize bitcast instead as bitcasts do not truncate. Differential Revision: https://reviews.llvm.org/D83500
1 parent d1aa143 commit 6b136b1

File tree

6 files changed

+290
-0
lines changed

6 files changed

+290
-0
lines changed

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,8 @@ BUILTIN(__builtin_altivec_vinshvlx, "V8UsV8UsUiV8Us", "")
409409
BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsUiV8Us", "")
410410
BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiUiV4Ui", "")
411411
BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiUiV4Ui", "")
412+
BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
413+
BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")
412414

413415
// P10 Vector Extract built-ins.
414416
BUILTIN(__builtin_altivec_vextdubvlx, "V2ULLiV16UcV16UcUi", "")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14224,6 +14224,63 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
1422414224
Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
1422514225
return Builder.CreateCall(F, {X, Undef});
1422614226
}
14227+
case PPC::BI__builtin_altivec_vec_replace_elt:
14228+
case PPC::BI__builtin_altivec_vec_replace_unaligned: {
14229+
// The third argument of vec_replace_elt and vec_replace_unaligned must
14230+
// be a compile time constant and will be emitted either to the vinsw
14231+
// or vinsd instruction.
14232+
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
14233+
assert(ArgCI &&
14234+
"Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
14235+
llvm::Type *ResultType = ConvertType(E->getType());
14236+
llvm::Function *F = nullptr;
14237+
Value *Call = nullptr;
14238+
int64_t ConstArg = ArgCI->getSExtValue();
14239+
unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
14240+
bool Is32Bit = false;
14241+
assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
14242+
// The input to vec_replace_elt is an element index, not a byte index.
14243+
if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
14244+
ConstArg *= ArgWidth / 8;
14245+
if (ArgWidth == 32) {
14246+
Is32Bit = true;
14247+
// When the second argument is 32 bits, it can either be an integer or
14248+
// a float. The vinsw intrinsic is used in this case.
14249+
F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
14250+
// Fix the constant according to endianess.
14251+
if (getTarget().isLittleEndian())
14252+
ConstArg = 12 - ConstArg;
14253+
} else {
14254+
// When the second argument is 64 bits, it can either be a long long or
14255+
// a double. The vinsd intrinsic is used in this case.
14256+
F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
14257+
// Fix the constant for little endian.
14258+
if (getTarget().isLittleEndian())
14259+
ConstArg = 8 - ConstArg;
14260+
}
14261+
Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
14262+
// Depending on ArgWidth, the input vector could be a float or a double.
14263+
// If the input vector is a float type, bitcast the inputs to integers. Or,
14264+
// if the input vector is a double, bitcast the inputs to 64-bit integers.
14265+
if (!Ops[1]->getType()->isIntegerTy(ArgWidth)) {
14266+
Ops[0] = Builder.CreateBitCast(
14267+
Ops[0], Is32Bit ? llvm::FixedVectorType::get(Int32Ty, 4)
14268+
: llvm::FixedVectorType::get(Int64Ty, 2));
14269+
Ops[1] = Builder.CreateBitCast(Ops[1], Is32Bit ? Int32Ty : Int64Ty);
14270+
}
14271+
// Emit the call to vinsw or vinsd.
14272+
Call = Builder.CreateCall(F, Ops);
14273+
// Depending on the builtin, bitcast to the approriate result type.
14274+
if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
14275+
!Ops[1]->getType()->isIntegerTy())
14276+
return Builder.CreateBitCast(Call, ResultType);
14277+
else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
14278+
Ops[1]->getType()->isIntegerTy())
14279+
return Call;
14280+
else
14281+
return Builder.CreateBitCast(Call,
14282+
llvm::FixedVectorType::get(Int8Ty, 16));
14283+
}
1422714284
case PPC::BI__builtin_altivec_vpopcntb:
1422814285
case PPC::BI__builtin_altivec_vpopcnth:
1422914286
case PPC::BI__builtin_altivec_vpopcntw:

clang/lib/Headers/altivec.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17837,6 +17837,14 @@ vec_blendv(vector double __a, vector double __b,
1783717837
return __builtin_vsx_xxblendvd(__a, __b, __c);
1783817838
}
1783917839

17840+
/* vec_replace_elt */
17841+
17842+
#define vec_replace_elt __builtin_altivec_vec_replace_elt
17843+
17844+
/* vec_replace_unaligned */
17845+
17846+
#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
17847+
1784017848
/* vec_splati */
1784117849

1784217850
#define vec_splati(__a) \

clang/lib/Sema/SemaChecking.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2570,6 +2570,17 @@ static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) {
25702570
dyn_cast<ArraySubscriptExpr>(Arg->IgnoreParens()));
25712571
}
25722572

2573+
static bool isEltOfVectorTy(ASTContext &Context, CallExpr *Call, Sema &S,
2574+
QualType VectorTy, QualType EltTy) {
2575+
QualType VectorEltTy = VectorTy->castAs<VectorType>()->getElementType();
2576+
if (!Context.hasSameType(VectorEltTy, EltTy)) {
2577+
S.Diag(Call->getBeginLoc(), diag::err_typecheck_call_different_arg_types)
2578+
<< Call->getSourceRange() << VectorEltTy << EltTy;
2579+
return false;
2580+
}
2581+
return true;
2582+
}
2583+
25732584
static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) {
25742585
QualType ArgType = Arg->getType();
25752586
if (ArgType->getAsPlaceholderType())
@@ -3222,6 +3233,14 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
32223233
return SemaVSXCheck(TheCall);
32233234
case PPC::BI__builtin_altivec_vgnb:
32243235
return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
3236+
case PPC::BI__builtin_altivec_vec_replace_elt:
3237+
case PPC::BI__builtin_altivec_vec_replace_unaligned: {
3238+
QualType VecTy = TheCall->getArg(0)->getType();
3239+
QualType EltTy = TheCall->getArg(1)->getType();
3240+
unsigned Width = Context.getIntWidth(EltTy);
3241+
return SemaBuiltinConstantArgRange(TheCall, 2, 0, Width == 32 ? 12 : 8) ||
3242+
!isEltOfVectorTy(Context, TheCall, *this, VecTy, EltTy);
3243+
}
32253244
case PPC::BI__builtin_vsx_xxeval:
32263245
return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
32273246
case PPC::BI__builtin_altivec_vsldbi:

clang/test/CodeGen/builtins-ppc-p10vector.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ vector signed __int128 vsi128a, vsi128b;
2121
vector unsigned __int128 vui128a, vui128b, vui128c;
2222
vector float vfa, vfb;
2323
vector double vda, vdb;
24+
float fa;
25+
double da;
26+
signed int sia;
2427
signed int *iap;
2528
unsigned int uia, uib, *uiap;
2629
signed char *cap;
@@ -1011,6 +1014,126 @@ vector double test_vec_blend_d(void) {
10111014
return vec_blendv(vda, vdb, vullc);
10121015
}
10131016

1017+
vector signed int test_vec_replace_elt_si(void) {
1018+
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
1019+
// CHECK-BE-NEXT: ret <4 x i32>
1020+
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
1021+
// CHECK-LE-NEXT: ret <4 x i32>
1022+
return vec_replace_elt(vsia, sia, 0);
1023+
}
1024+
1025+
vector unsigned int test_vec_replace_elt_ui(void) {
1026+
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
1027+
// CHECK-BE-NEXT: ret <4 x i32>
1028+
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
1029+
// CHECK-LE-NEXT: ret <4 x i32>
1030+
return vec_replace_elt(vuia, uia, 1);
1031+
}
1032+
1033+
vector float test_vec_replace_elt_f(void) {
1034+
// CHECK-BE: bitcast float %{{.+}} to i32
1035+
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
1036+
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
1037+
// CHECK-BE-NEXT: ret <4 x float>
1038+
// CHECK-LE: bitcast float %{{.+}} to i32
1039+
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
1040+
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
1041+
// CHECK-LE-NEXT: ret <4 x float>
1042+
return vec_replace_elt(vfa, fa, 2);
1043+
}
1044+
1045+
vector signed long long test_vec_replace_elt_sll(void) {
1046+
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
1047+
// CHECK-BE-NEXT: ret <2 x i64>
1048+
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
1049+
// CHECK-LE-NEXT: ret <2 x i64>
1050+
return vec_replace_elt(vslla, llb, 0);
1051+
}
1052+
1053+
vector unsigned long long test_vec_replace_elt_ull(void) {
1054+
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
1055+
// CHECK-BE-NEXT: ret <2 x i64>
1056+
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
1057+
// CHECK-LE-NEXT: ret <2 x i64>
1058+
return vec_replace_elt(vulla, ulla, 0);
1059+
}
1060+
1061+
vector double test_vec_replace_elt_d(void) {
1062+
// CHECK-BE: bitcast double %{{.+}} to i64
1063+
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
1064+
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
1065+
// CHECK-BE-NEXT: ret <2 x double>
1066+
// CHECK-LE: bitcast double %{{.+}} to i64
1067+
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
1068+
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
1069+
// CHECK-LE-NEXT: ret <2 x double>
1070+
return vec_replace_elt(vda, da, 1);
1071+
}
1072+
1073+
vector unsigned char test_vec_replace_unaligned_si(void) {
1074+
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
1075+
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1076+
// CHECK-BE-NEXT: ret <16 x i8>
1077+
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
1078+
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1079+
// CHECK-LE-NEXT: ret <16 x i8>
1080+
return vec_replace_unaligned(vsia, sia, 6);
1081+
}
1082+
1083+
vector unsigned char test_vec_replace_unaligned_ui(void) {
1084+
// CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
1085+
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1086+
// CHECK-BE-NEXT: ret <16 x i8>
1087+
// CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
1088+
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1089+
// CHECK-LE-NEXT: ret <16 x i8>
1090+
return vec_replace_unaligned(vuia, uia, 8);
1091+
}
1092+
1093+
vector unsigned char test_vec_replace_unaligned_f(void) {
1094+
// CHECK-BE: bitcast float %{{.+}} to i32
1095+
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
1096+
// CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1097+
// CHECK-BE-NEXT: ret <16 x i8>
1098+
// CHECK-LE: bitcast float %{{.+}} to i32
1099+
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
1100+
// CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
1101+
// CHECK-LE-NEXT: ret <16 x i8>
1102+
return vec_replace_unaligned(vfa, fa, 12);
1103+
}
1104+
1105+
vector unsigned char test_vec_replace_unaligned_sll(void) {
1106+
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
1107+
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1108+
// CHECK-BE-NEXT: ret <16 x i8>
1109+
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
1110+
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1111+
// CHECK-LE-NEXT: ret <16 x i8>
1112+
return vec_replace_unaligned(vslla, llb, 6);
1113+
}
1114+
1115+
vector unsigned char test_vec_replace_unaligned_ull(void) {
1116+
// CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
1117+
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1118+
// CHECK-BE-NEXT: ret <16 x i8>
1119+
// CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
1120+
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1121+
// CHECK-LE-NEXT: ret <16 x i8>
1122+
return vec_replace_unaligned(vulla, ulla, 7);
1123+
}
1124+
1125+
vector unsigned char test_vec_replace_unaligned_d(void) {
1126+
// CHECK-BE: bitcast double %{{.+}} to i64
1127+
// CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
1128+
// CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1129+
// CHECK-BE-NEXT: ret <16 x i8>
1130+
// CHECK-LE: bitcast double %{{.+}} to i64
1131+
// CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
1132+
// CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
1133+
// CHECK-LE-NEXT: ret <16 x i8>
1134+
return vec_replace_unaligned(vda, da, 8);
1135+
}
1136+
10141137
vector unsigned char test_vec_insertl_uc(void) {
10151138
// CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
10161139
// CHECK-BE-NEXT: ret <16 x i8>
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// REQUIRES: powerpc-registered-target
2+
3+
// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
4+
// RUN: -triple powerpc64le-unknown-unknown -fsyntax-only %s -verify
5+
// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
6+
// RUN: -triple powerpc64-unknown-unknown -fsyntax-only %s -verify
7+
8+
#include <altivec.h>
9+
10+
vector signed int vsia;
11+
vector unsigned int vuia;
12+
vector signed long long vslla;
13+
vector unsigned long long vulla;
14+
vector float vfa;
15+
vector double vda;
16+
signed int sia;
17+
unsigned int uia;
18+
signed long long slla;
19+
unsigned long long ulla;
20+
float fa;
21+
double da;
22+
23+
vector signed int test_vec_replace_elt_si(void) {
24+
return vec_replace_elt(vsia, sia, 13); // expected-error {{argument value 13 is outside the valid range [0, 12]}}
25+
}
26+
27+
vector unsigned int test_vec_replace_elt_ui(void) {
28+
return vec_replace_elt(vuia, sia, 1); // expected-error {{arguments are of different types ('unsigned int' vs 'int')}}
29+
}
30+
31+
vector float test_vec_replace_elt_f(void) {
32+
return vec_replace_elt(vfa, fa, 20); // expected-error {{argument value 20 is outside the valid range [0, 12]}}
33+
}
34+
35+
vector float test_vec_replace_elt_f_2(void) {
36+
return vec_replace_elt(vfa, da, 0); // expected-error {{arguments are of different types ('float' vs 'double')}}
37+
}
38+
39+
vector signed long long test_vec_replace_elt_sll(void) {
40+
return vec_replace_elt(vslla, slla, 9); // expected-error {{argument value 9 is outside the valid range [0, 8]}}
41+
}
42+
43+
vector unsigned long long test_vec_replace_elt_ull(void) {
44+
return vec_replace_elt(vulla, vda, 0); // expected-error {{arguments are of different types ('unsigned long long' vs '__vector double' (vector of 2 'double' values))}}
45+
}
46+
47+
vector unsigned long long test_vec_replace_elt_ull_2(void) {
48+
return vec_replace_elt(vulla, vulla, vsia); // expected-error {{argument to '__builtin_altivec_vec_replace_elt' must be a constant integer}}
49+
}
50+
51+
vector double test_vec_replace_elt_d(void) {
52+
return vec_replace_elt(vda, da, 33); // expected-error {{argument value 33 is outside the valid range [0, 8]}}
53+
}
54+
55+
vector unsigned char test_vec_replace_unaligned_si(void) {
56+
return vec_replace_unaligned(vsia, da, 6); // expected-error {{arguments are of different types ('int' vs 'double')}}
57+
}
58+
59+
vector unsigned char test_vec_replace_unaligned_ui(void) {
60+
return vec_replace_unaligned(vuia, uia, 14); // expected-error {{argument value 14 is outside the valid range [0, 12]}}
61+
}
62+
63+
vector unsigned char test_vec_replace_unaligned_f(void) {
64+
return vec_replace_unaligned(vfa, fa, 19); // expected-error {{argument value 19 is outside the valid range [0, 12]}}
65+
}
66+
67+
vector unsigned char test_vec_replace_unaligned_sll(void) {
68+
return vec_replace_unaligned(vslla, fa, 0); // expected-error {{arguments are of different types ('long long' vs 'float')}}
69+
}
70+
71+
vector unsigned char test_vec_replace_unaligned_ull(void) {
72+
return vec_replace_unaligned(vulla, ulla, 12); // expected-error {{argument value 12 is outside the valid range [0, 8]}}
73+
}
74+
75+
vector unsigned char test_vec_replace_unaligned_d(void) {
76+
return vec_replace_unaligned(vda, fa, 8); // expected-error {{arguments are of different types ('double' vs 'float')}}
77+
}
78+
79+
vector unsigned char test_vec_replace_unaligned_d_2(void) {
80+
return vec_replace_unaligned(vda, vda, da); // expected-error {{argument to '__builtin_altivec_vec_replace_unaligned' must be a constant integer}}
81+
}

0 commit comments

Comments
 (0)