Skip to content

Commit 3a573dc

Browse files
[RISCV][VLOPT] Add support for integer multiply-add instructions (#112216)
This adds support for these instructions.
1 parent 74486dc commit 3a573dc

File tree

2 files changed

+184
-1
lines changed

2 files changed

+184
-1
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,14 @@ static bool isSupportedInstr(const MachineInstr &MI) {
570570
case RISCV::VWMULU_VV:
571571
case RISCV::VWMULU_VX:
572572
// Vector Single-Width Integer Multiply-Add Instructions
573-
// FIXME: Add support
573+
case RISCV::VMACC_VV:
574+
case RISCV::VMACC_VX:
575+
case RISCV::VNMSAC_VV:
576+
case RISCV::VNMSAC_VX:
577+
case RISCV::VMADD_VV:
578+
case RISCV::VMADD_VX:
579+
case RISCV::VNMSUB_VV:
580+
case RISCV::VNMSUB_VX:
574581
// Vector Widening Integer Multiply-Add Instructions
575582
case RISCV::VWMACCU_VV:
576583
case RISCV::VWMACCU_VX:

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,182 @@ define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %
12691269
ret <vscale x 4 x i32> %2
12701270
}
12711271

1272+
define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1273+
; NOVLOPT-LABEL: vmacc_vv:
1274+
; NOVLOPT: # %bb.0:
1275+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1276+
; NOVLOPT-NEXT: vmacc.vv v8, v8, v10
1277+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1278+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
1279+
; NOVLOPT-NEXT: ret
1280+
;
1281+
; VLOPT-LABEL: vmacc_vv:
1282+
; VLOPT: # %bb.0:
1283+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1284+
; VLOPT-NEXT: vmacc.vv v8, v8, v10
1285+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1286+
; VLOPT-NEXT: vadd.vv v8, v8, v10
1287+
; VLOPT-NEXT: ret
1288+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1289+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1290+
ret <vscale x 4 x i32> %2
1291+
}
1292+
1293+
define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1294+
; NOVLOPT-LABEL: vmacc_vx:
1295+
; NOVLOPT: # %bb.0:
1296+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1297+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1298+
; NOVLOPT-NEXT: vmacc.vx v10, a0, v8
1299+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1300+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
1301+
; NOVLOPT-NEXT: ret
1302+
;
1303+
; VLOPT-LABEL: vmacc_vx:
1304+
; VLOPT: # %bb.0:
1305+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1306+
; VLOPT-NEXT: vmv2r.v v10, v8
1307+
; VLOPT-NEXT: vmacc.vx v10, a0, v8
1308+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1309+
; VLOPT-NEXT: vadd.vv v8, v10, v8
1310+
; VLOPT-NEXT: ret
1311+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1312+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1313+
ret <vscale x 4 x i32> %2
1314+
}
1315+
1316+
define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1317+
; NOVLOPT-LABEL: vmadd_vv:
1318+
; NOVLOPT: # %bb.0:
1319+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1320+
; NOVLOPT-NEXT: vmadd.vv v8, v8, v10
1321+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1322+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
1323+
; NOVLOPT-NEXT: ret
1324+
;
1325+
; VLOPT-LABEL: vmadd_vv:
1326+
; VLOPT: # %bb.0:
1327+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1328+
; VLOPT-NEXT: vmadd.vv v8, v8, v10
1329+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1330+
; VLOPT-NEXT: vadd.vv v8, v8, v10
1331+
; VLOPT-NEXT: ret
1332+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1333+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1334+
ret <vscale x 4 x i32> %2
1335+
}
1336+
1337+
define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1338+
; NOVLOPT-LABEL: vmadd_vx:
1339+
; NOVLOPT: # %bb.0:
1340+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1341+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1342+
; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
1343+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1344+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
1345+
; NOVLOPT-NEXT: ret
1346+
;
1347+
; VLOPT-LABEL: vmadd_vx:
1348+
; VLOPT: # %bb.0:
1349+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1350+
; VLOPT-NEXT: vmv2r.v v10, v8
1351+
; VLOPT-NEXT: vmadd.vx v10, a0, v8
1352+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1353+
; VLOPT-NEXT: vadd.vv v8, v10, v8
1354+
; VLOPT-NEXT: ret
1355+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1356+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1357+
ret <vscale x 4 x i32> %2
1358+
}
1359+
1360+
define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1361+
; NOVLOPT-LABEL: vnmsac_vv:
1362+
; NOVLOPT: # %bb.0:
1363+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1364+
; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10
1365+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1366+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
1367+
; NOVLOPT-NEXT: ret
1368+
;
1369+
; VLOPT-LABEL: vnmsac_vv:
1370+
; VLOPT: # %bb.0:
1371+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1372+
; VLOPT-NEXT: vnmsac.vv v8, v8, v10
1373+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1374+
; VLOPT-NEXT: vadd.vv v8, v8, v10
1375+
; VLOPT-NEXT: ret
1376+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1377+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1378+
ret <vscale x 4 x i32> %2
1379+
}
1380+
1381+
define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1382+
; NOVLOPT-LABEL: vnmsac_vx:
1383+
; NOVLOPT: # %bb.0:
1384+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1385+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1386+
; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
1387+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1388+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
1389+
; NOVLOPT-NEXT: ret
1390+
;
1391+
; VLOPT-LABEL: vnmsac_vx:
1392+
; VLOPT: # %bb.0:
1393+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1394+
; VLOPT-NEXT: vmv2r.v v10, v8
1395+
; VLOPT-NEXT: vnmsac.vx v10, a0, v8
1396+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1397+
; VLOPT-NEXT: vadd.vv v8, v10, v8
1398+
; VLOPT-NEXT: ret
1399+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1400+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1401+
ret <vscale x 4 x i32> %2
1402+
}
1403+
1404+
define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1405+
; NOVLOPT-LABEL: vnmsub_vv:
1406+
; NOVLOPT: # %bb.0:
1407+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1408+
; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10
1409+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1410+
; NOVLOPT-NEXT: vadd.vv v8, v8, v10
1411+
; NOVLOPT-NEXT: ret
1412+
;
1413+
; VLOPT-LABEL: vnmsub_vv:
1414+
; VLOPT: # %bb.0:
1415+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1416+
; VLOPT-NEXT: vnmsub.vv v8, v8, v10
1417+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1418+
; VLOPT-NEXT: vadd.vv v8, v8, v10
1419+
; VLOPT-NEXT: ret
1420+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1421+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1422+
ret <vscale x 4 x i32> %2
1423+
}
1424+
1425+
define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1426+
; NOVLOPT-LABEL: vnmsub_vx:
1427+
; NOVLOPT: # %bb.0:
1428+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1429+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1430+
; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
1431+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1432+
; NOVLOPT-NEXT: vadd.vv v8, v10, v8
1433+
; NOVLOPT-NEXT: ret
1434+
;
1435+
; VLOPT-LABEL: vnmsub_vx:
1436+
; VLOPT: # %bb.0:
1437+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1438+
; VLOPT-NEXT: vmv2r.v v10, v8
1439+
; VLOPT-NEXT: vnmsub.vx v10, a0, v8
1440+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1441+
; VLOPT-NEXT: vadd.vv v8, v10, v8
1442+
; VLOPT-NEXT: ret
1443+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1444+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1445+
ret <vscale x 4 x i32> %2
1446+
}
1447+
12721448
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
12731449
; NOVLOPT-LABEL: vwmacc_vx:
12741450
; NOVLOPT: # %bb.0:

0 commit comments

Comments
 (0)