Skip to content

Commit 38f9c01

Browse files
authored
SystemZ: Stop casting fp typed atomic loads in the IR (#90768)
shouldCastAtomicLoadInIR is a hack that should be removed. Simple bitcasting of operations should be in the domain of ordinary type legalization and does not need to be done in the IR. This introduces a code quality regression due to the hack currently used to avoid using 128-bit values in the case where the floating point value is ultimately used as an integer. This would be avoidable if there were always a legal 128-bit type (like v2i64). This is a pretty niche situation so I assume it's not important. I implemented about 85% of the work necessary to make v2i64 legal, but it was taking too long and I lack the necessary familiarity with systemz to complete it. I've pushed it here for someone to pick up: https://github.com/arsenm/llvm-project/pull/new/systemz-legal-v2i64 Depends #90861
1 parent 8805465 commit 38f9c01

File tree

2 files changed

+60
-22
lines changed

2 files changed

+60
-22
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
294294
// the atomic operations in order to exploit SystemZ instructions.
295295
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
296296
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
297+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
297298

298299
// Mark sign/zero extending atomic loads as legal, which will make
299300
// DAGCombiner fold extensions into atomic loads if possible.
@@ -935,9 +936,6 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const
935936

936937
TargetLowering::AtomicExpansionKind
937938
SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
938-
// Lower fp128 the same way as i128.
939-
if (LI->getType()->isFP128Ty())
940-
return AtomicExpansionKind::CastToInteger;
941939
return AtomicExpansionKind::None;
942940
}
943941

@@ -4550,7 +4548,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
45504548
SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
45514549
SelectionDAG &DAG) const {
45524550
auto *Node = cast<AtomicSDNode>(Op.getNode());
4553-
assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
4551+
assert(
4552+
(Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4553+
"Only custom lowering i128 or f128.");
45544554
// Use same code to handle both legal and non-legal i128 types.
45554555
SmallVector<SDValue, 2> Results;
45564556
LowerOperationWrapper(Node, Results, DAG);
@@ -6249,6 +6249,26 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
62496249
}
62506250
}
62516251

6252+
// Manually lower a bitcast to avoid introducing illegal types after type
6253+
// legalization.
6254+
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,
6255+
SDValue Chain, const SDLoc &SL) {
6256+
SDValue Hi =
6257+
DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
6258+
SDValue Lo =
6259+
DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
6260+
6261+
Hi = DAG.getBitcast(MVT::f64, Hi);
6262+
Lo = DAG.getBitcast(MVT::f64, Lo);
6263+
6264+
SDNode *Pair = DAG.getMachineNode(
6265+
SystemZ::REG_SEQUENCE, SL, MVT::f128,
6266+
{DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6267+
DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6268+
DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6269+
return SDValue(Pair, 0);
6270+
}
6271+
62526272
// Lower operations with invalid operand or result types (currently used
62536273
// only for 128-bit integer types).
62546274
void
@@ -6263,8 +6283,25 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
62636283
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
62646284
SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
62656285
DL, Tys, Ops, MVT::i128, MMO);
6266-
Results.push_back(lowerGR128ToI128(DAG, Res));
6267-
Results.push_back(Res.getValue(1));
6286+
6287+
EVT VT = N->getValueType(0);
6288+
6289+
if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6290+
SDValue Lowered = lowerGR128ToI128(DAG, Res);
6291+
Results.push_back(DAG.getBitcast(VT, Lowered));
6292+
Results.push_back(Res.getValue(1));
6293+
} else {
6294+
// For the f128 case, after type legalization, we cannot produce a bitcast
6295+
// with an illegal type (i.e. i128), so manually lower it.
6296+
//
6297+
// FIXME: Really v2i64 should be legal, and should be used in place of
6298+
// unttyped. Then we could emit the bitcast which will potentially fold
6299+
// into the use.
6300+
SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
6301+
Results.push_back(Cast);
6302+
Results.push_back(Res.getValue(1));
6303+
}
6304+
62686305
break;
62696306
}
62706307
case ISD::ATOMIC_STORE: {

llvm/test/CodeGen/SystemZ/atomic-load-08.ll

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,27 @@
1-
; Test long double atomic loads. These are emitted by the Clang FE as i128
2-
; loads with a bitcast, and this test case gets converted into that form as
3-
; well by the AtomicExpand pass.
1+
; Test long double atomic loads.
42
;
53
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefixes=CHECK,BASE %s
64
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck -check-prefixes=CHECK,Z13 %s
7-
85
; TODO: Is it worth testing softfp with vector?
96
; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+soft-float | FileCheck -check-prefixes=SOFTFP %s
107

8+
; FIXME: Without vector support, v2i64 should be legal and we should
9+
; introduce a simple bitcast, which could fold into the store use
10+
; avoid the intermediate f registers.
1111
define void @f1(ptr %ret, ptr %src) {
1212
; CHECK-LABEL: f1:
1313
; CHECK: # %bb.0:
14-
; CHECK-NEXT: lpq %r0, 0(%r3)
15-
; CHECK-NEXT: stg %r1, 8(%r2)
16-
; CHECK-NEXT: stg %r0, 0(%r2)
17-
; CHECK-NEXT: br %r14
14+
; Z13-NEXT: lpq %r0, 0(%r3)
15+
; Z13-NEXT: stg %r1, 8(%r2)
16+
; Z13-NEXT: stg %r0, 0(%r2)
17+
; Z13-NEXT: br %r14
18+
19+
; BASE: lpq %r0, 0(%r3)
20+
; BASE-NEXT: ldgr %f0, %r0
21+
; BASE-NEXT: ldgr %f2, %r1
22+
; BASE-NEXT: std %f0, 0(%r2)
23+
; BASE-NEXT: std %f2, 8(%r2)
24+
; BASE-NEXT: br %r14
1825

1926
; SOFTFP-LABEL: f1:
2027
; SOFTFP: # %bb.0:
@@ -30,23 +37,17 @@ define void @f1(ptr %ret, ptr %src) {
3037
define void @f1_fpuse(ptr %ret, ptr %src) {
3138
; CHECK-LABEL: f1_fpuse:
3239
; CHECK: # %bb.0:
33-
; BASE-NEXT: aghi %r15, -176
34-
; BASE-NEXT: .cfi_def_cfa_offset 336
35-
3640
; CHECK-NEXT: lpq %r0, 0(%r3)
3741

38-
; BASE-NEXT: stg %r1, 168(%r15)
39-
; BASE-NEXT: stg %r0, 160(%r15)
40-
; BASE-NEXT: ld %f0, 160(%r15)
41-
; BASE-NEXT: ld %f2, 168(%r15)
42+
; BASE-NEXT: ldgr %f0, %r0
43+
; BASE-NEXT: ldgr %f2, %r1
4244

4345
; Z13-NEXT: vlvgp %v0, %r0, %r1
4446
; Z13-NEXT: vrepg %v2, %v0, 1
4547

4648
; CHECK-NEXT: axbr %f0, %f0
4749
; CHECK-NEXT: std %f0, 0(%r2)
4850
; CHECK-NEXT: std %f2, 8(%r2)
49-
; BASE-NEXT: aghi %r15, 176
5051
; CHECK-NEXT: br %r14
5152

5253

0 commit comments

Comments
 (0)