Skip to content

Commit cfaeee6

Browse files
authored
release/18.x: [SystemZ] Fix overflow flag for i128 USUBO (#86491)
We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: #83268
1 parent 767b61c commit cfaeee6

File tree

3 files changed

+58
-0
lines changed

3 files changed

+58
-0
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4252,6 +4252,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
42524252
if (N->getValueType(0) == MVT::i128) {
42534253
unsigned BaseOp = 0;
42544254
unsigned FlagOp = 0;
4255+
bool IsBorrow = false;
42554256
switch (Op.getOpcode()) {
42564257
default: llvm_unreachable("Unknown instruction!");
42574258
case ISD::UADDO:
@@ -4261,13 +4262,17 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
42614262
case ISD::USUBO:
42624263
BaseOp = ISD::SUB;
42634264
FlagOp = SystemZISD::VSCBI;
4265+
IsBorrow = true;
42644266
break;
42654267
}
42664268
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
42674269
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
42684270
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
42694271
DAG.getValueType(MVT::i1));
42704272
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4273+
if (IsBorrow)
4274+
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4275+
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
42714276
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
42724277
}
42734278

@@ -4340,6 +4345,7 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
43404345
if (VT == MVT::i128) {
43414346
unsigned BaseOp = 0;
43424347
unsigned FlagOp = 0;
4348+
bool IsBorrow = false;
43434349
switch (Op.getOpcode()) {
43444350
default: llvm_unreachable("Unknown instruction!");
43454351
case ISD::UADDO_CARRY:
@@ -4349,14 +4355,21 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
43494355
case ISD::USUBO_CARRY:
43504356
BaseOp = SystemZISD::VSBI;
43514357
FlagOp = SystemZISD::VSBCBI;
4358+
IsBorrow = true;
43524359
break;
43534360
}
4361+
if (IsBorrow)
4362+
Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4363+
Carry, DAG.getConstant(1, DL, Carry.getValueType()));
43544364
Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
43554365
SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
43564366
SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
43574367
Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
43584368
DAG.getValueType(MVT::i1));
43594369
Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4370+
if (IsBorrow)
4371+
Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4372+
Flag, DAG.getConstant(1, DL, Flag.getValueType()));
43604373
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
43614374
}
43624375

@@ -6611,6 +6624,27 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND(
66116624
return NewSelect;
66126625
}
66136626
}
6627+
// Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6628+
// of the result is smaller than the size of X and all the truncated bits
6629+
// of X are already zero.
6630+
if (N0.getOpcode() == ISD::XOR &&
6631+
N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6632+
N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6633+
N0.getOperand(1).getOpcode() == ISD::Constant) {
6634+
SDValue X = N0.getOperand(0).getOperand(0);
6635+
if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6636+
KnownBits Known = DAG.computeKnownBits(X);
6637+
APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6638+
N0.getValueSizeInBits(),
6639+
VT.getSizeInBits());
6640+
if (TruncatedBits.isSubsetOf(Known.Zero)) {
6641+
X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6642+
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
6643+
return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6644+
X, DAG.getConstant(Mask, SDLoc(N0), VT));
6645+
}
6646+
}
6647+
}
66146648
return SDValue();
66156649
}
66166650

llvm/test/CodeGen/SystemZ/int-usub-12.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ define zeroext i1 @f1(i128 %a, i128 %b, ptr %res) {
1111
; CHECK-NEXT: vscbiq %v2, %v1, %v0
1212
; CHECK-NEXT: vlgvg %r2, %v2, 1
1313
; CHECK-NEXT: vsq %v0, %v1, %v0
14+
; CHECK-NEXT: xilf %r2, 1
1415
; CHECK-NEXT: vst %v0, 0(%r4), 3
1516
; CHECK-NEXT: br %r14
1617
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
@@ -27,6 +28,7 @@ define zeroext i1 @f2(i128 %a, i128 %b) {
2728
; CHECK-NEXT: vl %v1, 0(%r2), 3
2829
; CHECK-NEXT: vscbiq %v0, %v1, %v0
2930
; CHECK-NEXT: vlgvg %r2, %v0, 1
31+
; CHECK-NEXT: xilf %r2, 1
3032
; CHECK-NEXT: br %r14
3133
%t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b)
3234
%obit = extractvalue {i128, i1} %t, 1
@@ -46,5 +48,25 @@ define i128 @f3(i128 %a, i128 %b) {
4648
ret i128 %val
4749
}
4850

51+
define i128 @f4(i128 %a, i128 %b) {
52+
; CHECK-LABEL: f4:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vl %v0, 0(%r4), 3
55+
; CHECK-NEXT: vl %v1, 0(%r3), 3
56+
; CHECK-NEXT: vscbiq %v2, %v1, %v0
57+
; CHECK-NEXT: vlgvf %r0, %v2, 3
58+
; CHECK-NEXT: vgbm %v2, 0
59+
; CHECK-NEXT: xilf %r0, 1
60+
; CHECK-NEXT: jl .LBB3_2
61+
; CHECK-NEXT: # %bb.1:
62+
; CHECK-NEXT: vsq %v2, %v1, %v0
63+
; CHECK-NEXT: .LBB3_2:
64+
; CHECK-NEXT: vst %v2, 0(%r2), 3
65+
; CHECK-NEXT: br %r14
66+
%val = call i128 @llvm.usub.sat.i128(i128 %a, i128 %b)
67+
ret i128 %val
68+
}
69+
4970
declare {i128, i1} @llvm.usub.with.overflow.i128(i128, i128) nounwind readnone
71+
declare i128 @llvm.usub.sat.i128(i128, i128) nounwind readnone
5072

llvm/test/CodeGen/SystemZ/int-usub-13.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) {
1515
; CHECK-NEXT: vlgvg %r2, %v5, 1
1616
; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4
1717
; CHECK-NEXT: vsq %v1, %v3, %v2
18+
; CHECK-NEXT: xilf %r2, 1
1819
; CHECK-NEXT: vst %v1, 16(%r4), 3
1920
; CHECK-NEXT: vst %v0, 0(%r4), 3
2021
; CHECK-NEXT: br %r14
@@ -35,6 +36,7 @@ define zeroext i1 @f2(i256 %a, i256 %b) {
3536
; CHECK-NEXT: vscbiq %v2, %v3, %v2
3637
; CHECK-NEXT: vsbcbiq %v0, %v1, %v0, %v2
3738
; CHECK-NEXT: vlgvg %r2, %v0, 1
39+
; CHECK-NEXT: xilf %r2, 1
3840
; CHECK-NEXT: br %r14
3941
%t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b)
4042
%obit = extractvalue {i256, i1} %t, 1

0 commit comments

Comments
 (0)