Skip to content

Commit 6bf2b40

Browse files
committed
[WebAssembly] Expand SIMD shifts while V8's implementation disagrees
Summary: V8 currently implements SIMD shifts as taking an immediate operation, which disagrees with the spec proposal and the toolchain implementation. As a stopgap measure to get things working, unroll all vector shifts. Since this is a temporary measure, there are no tests. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, dmgreen, llvm-commits Differential Revision: https://reviews.llvm.org/D56520 llvm-svn: 351151
1 parent 33eb4d9 commit 6bf2b40

File tree

2 files changed

+61
-7
lines changed

2 files changed

+61
-7
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1155,19 +1155,49 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
11551155
return SDValue();
11561156
}
11571157

1158+
static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
1159+
EVT LaneT = Op.getSimpleValueType().getVectorElementType();
1160+
// 32-bit and 64-bit unrolled shifts will have proper semantics
1161+
if (LaneT.bitsGE(MVT::i32))
1162+
return DAG.UnrollVectorOp(Op.getNode());
1163+
// Otherwise mask the shift value to get proper semantics from 32-bit shift
1164+
SDLoc DL(Op);
1165+
SDValue ShiftVal = Op.getOperand(1);
1166+
uint64_t MaskVal = LaneT.getSizeInBits() - 1;
1167+
SDValue MaskedShiftVal = DAG.getNode(
1168+
ISD::AND, // mask opcode
1169+
DL, ShiftVal.getValueType(), // masked value type
1170+
ShiftVal, // original shift value operand
1171+
DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand
1172+
);
1173+
1174+
return DAG.UnrollVectorOp(
1175+
DAG.getNode(Op.getOpcode(), // original shift opcode
1176+
DL, Op.getValueType(), // original return type
1177+
Op.getOperand(0), // original vector operand,
1178+
MaskedShiftVal // new masked shift value operand
1179+
)
1180+
.getNode());
1181+
}
1182+
11581183
SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
11591184
SelectionDAG &DAG) const {
11601185
SDLoc DL(Op);
11611186

11621187
// Only manually lower vector shifts
11631188
assert(Op.getSimpleValueType().isVector());
11641189

1190+
// Expand all vector shifts until V8 fixes its implementation
1191+
// TODO: remove this once V8 is fixed
1192+
if (!Subtarget->hasUnimplementedSIMD128())
1193+
return UnrollVectorShift(Op, DAG);
1194+
11651195
// Unroll non-splat vector shifts
11661196
BuildVectorSDNode *ShiftVec;
11671197
SDValue SplatVal;
11681198
if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
11691199
!(SplatVal = ShiftVec->getSplatValue()))
1170-
return DAG.UnrollVectorOp(Op.getNode());
1200+
return UnrollVectorShift(Op, DAG);
11711201

11721202
// All splats except i64x2 const splats are handled by patterns
11731203
ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);

llvm/test/CodeGen/WebAssembly/simd-arith.ll

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,11 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
9090
; NO-SIMD128-NOT: i8x16
9191
; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}}
9292
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
93-
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
93+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
94+
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
95+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
96+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
97+
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
9498
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
9599
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
96100
; Skip 14 lanes
@@ -122,7 +126,11 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
122126
; NO-SIMD128-NOT: i8x16
123127
; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}}
124128
; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
125-
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
129+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
130+
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
131+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
132+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
133+
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
126134
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
127135
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
128136
; Skip 14 lanes
@@ -154,7 +162,11 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
154162
; NO-SIMD128-NOT: i8x16
155163
; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}}
156164
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
157-
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
165+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}}
166+
; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
167+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
168+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
169+
; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
158170
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
159171
; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
160172
; Skip 14 lanes
@@ -304,7 +316,11 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
304316
; NO-SIMD128-NOT: i16x8
305317
; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}}
306318
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
307-
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
319+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
320+
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
321+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
322+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
323+
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
308324
; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
309325
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
310326
; Skip 6 lanes
@@ -335,7 +351,11 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
335351
; NO-SIMD128-NOT: i16x8
336352
; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}}
337353
; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}}
338-
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
354+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
355+
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
356+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
357+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
358+
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
339359
; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
340360
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
341361
; Skip 6 lanes
@@ -366,7 +386,11 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
366386
; NO-SIMD128-NOT: i16x8
367387
; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}}
368388
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}}
369-
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}}
389+
; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}}
390+
; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}}
391+
; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}}
392+
; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}}
393+
; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}}
370394
; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
371395
; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
372396
; Skip 6 lanes

0 commit comments

Comments
 (0)