Skip to content

Commit 4c92e31

Browse files
committed
[RISCV] Add tests for __builtin_parity idiom.
We use (and (ctpop X), 1) to represent parity. The generated code for i32 parity on RV64 has more instructions than necessary which I hope to improve in a followup patch. Also add missing test for i64 ctpop.
1 parent 4ae0ab0 commit 4c92e31

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed

llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
1313
declare i64 @llvm.cttz.i64(i64, i1)
1414
declare i32 @llvm.ctlz.i32(i32, i1)
1515
declare i32 @llvm.ctpop.i32(i32)
16+
declare i64 @llvm.ctpop.i64(i64)
1617

1718
define i16 @test_bswap_i16(i16 %a) nounwind {
1819
; RV32I-LABEL: test_bswap_i16:
@@ -1169,3 +1170,190 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
11691170
%1 = call i32 @llvm.ctpop.i32(i32 %a)
11701171
ret i32 %1
11711172
}
1173+
1174+
define i64 @test_ctpop_i64(i64 %a) nounwind {
1175+
; RV32I-LABEL: test_ctpop_i64:
1176+
; RV32I: # %bb.0:
1177+
; RV32I-NEXT: addi sp, sp, -32
1178+
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1179+
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1180+
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1181+
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1182+
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1183+
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
1184+
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
1185+
; RV32I-NEXT: mv s2, a0
1186+
; RV32I-NEXT: srli a0, a1, 1
1187+
; RV32I-NEXT: lui a2, 349525
1188+
; RV32I-NEXT: addi s3, a2, 1365
1189+
; RV32I-NEXT: and a0, a0, s3
1190+
; RV32I-NEXT: sub a0, a1, a0
1191+
; RV32I-NEXT: lui a1, 209715
1192+
; RV32I-NEXT: addi s0, a1, 819
1193+
; RV32I-NEXT: and a1, a0, s0
1194+
; RV32I-NEXT: srli a0, a0, 2
1195+
; RV32I-NEXT: and a0, a0, s0
1196+
; RV32I-NEXT: add a0, a1, a0
1197+
; RV32I-NEXT: srli a1, a0, 4
1198+
; RV32I-NEXT: add a0, a0, a1
1199+
; RV32I-NEXT: lui a1, 61681
1200+
; RV32I-NEXT: addi s4, a1, -241
1201+
; RV32I-NEXT: and a0, a0, s4
1202+
; RV32I-NEXT: lui a1, 4112
1203+
; RV32I-NEXT: addi s1, a1, 257
1204+
; RV32I-NEXT: mv a1, s1
1205+
; RV32I-NEXT: call __mulsi3@plt
1206+
; RV32I-NEXT: srli s5, a0, 24
1207+
; RV32I-NEXT: srli a0, s2, 1
1208+
; RV32I-NEXT: and a0, a0, s3
1209+
; RV32I-NEXT: sub a0, s2, a0
1210+
; RV32I-NEXT: and a1, a0, s0
1211+
; RV32I-NEXT: srli a0, a0, 2
1212+
; RV32I-NEXT: and a0, a0, s0
1213+
; RV32I-NEXT: add a0, a1, a0
1214+
; RV32I-NEXT: srli a1, a0, 4
1215+
; RV32I-NEXT: add a0, a0, a1
1216+
; RV32I-NEXT: and a0, a0, s4
1217+
; RV32I-NEXT: mv a1, s1
1218+
; RV32I-NEXT: call __mulsi3@plt
1219+
; RV32I-NEXT: srli a0, a0, 24
1220+
; RV32I-NEXT: add a0, a0, s5
1221+
; RV32I-NEXT: mv a1, zero
1222+
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
1223+
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
1224+
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1225+
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1226+
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1227+
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1228+
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1229+
; RV32I-NEXT: addi sp, sp, 32
1230+
; RV32I-NEXT: ret
1231+
;
1232+
; RV64I-LABEL: test_ctpop_i64:
1233+
; RV64I: # %bb.0:
1234+
; RV64I-NEXT: addi sp, sp, -16
1235+
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1236+
; RV64I-NEXT: srli a1, a0, 1
1237+
; RV64I-NEXT: lui a2, 21845
1238+
; RV64I-NEXT: addiw a2, a2, 1365
1239+
; RV64I-NEXT: slli a2, a2, 12
1240+
; RV64I-NEXT: addi a2, a2, 1365
1241+
; RV64I-NEXT: slli a2, a2, 12
1242+
; RV64I-NEXT: addi a2, a2, 1365
1243+
; RV64I-NEXT: slli a2, a2, 12
1244+
; RV64I-NEXT: addi a2, a2, 1365
1245+
; RV64I-NEXT: and a1, a1, a2
1246+
; RV64I-NEXT: sub a0, a0, a1
1247+
; RV64I-NEXT: lui a1, 13107
1248+
; RV64I-NEXT: addiw a1, a1, 819
1249+
; RV64I-NEXT: slli a1, a1, 12
1250+
; RV64I-NEXT: addi a1, a1, 819
1251+
; RV64I-NEXT: slli a1, a1, 12
1252+
; RV64I-NEXT: addi a1, a1, 819
1253+
; RV64I-NEXT: slli a1, a1, 12
1254+
; RV64I-NEXT: addi a1, a1, 819
1255+
; RV64I-NEXT: and a2, a0, a1
1256+
; RV64I-NEXT: srli a0, a0, 2
1257+
; RV64I-NEXT: and a0, a0, a1
1258+
; RV64I-NEXT: add a0, a2, a0
1259+
; RV64I-NEXT: srli a1, a0, 4
1260+
; RV64I-NEXT: add a0, a0, a1
1261+
; RV64I-NEXT: lui a1, 3855
1262+
; RV64I-NEXT: addiw a1, a1, 241
1263+
; RV64I-NEXT: slli a1, a1, 12
1264+
; RV64I-NEXT: addi a1, a1, -241
1265+
; RV64I-NEXT: slli a1, a1, 12
1266+
; RV64I-NEXT: addi a1, a1, 241
1267+
; RV64I-NEXT: slli a1, a1, 12
1268+
; RV64I-NEXT: addi a1, a1, -241
1269+
; RV64I-NEXT: and a0, a0, a1
1270+
; RV64I-NEXT: lui a1, 4112
1271+
; RV64I-NEXT: addiw a1, a1, 257
1272+
; RV64I-NEXT: slli a1, a1, 16
1273+
; RV64I-NEXT: addi a1, a1, 257
1274+
; RV64I-NEXT: slli a1, a1, 16
1275+
; RV64I-NEXT: addi a1, a1, 257
1276+
; RV64I-NEXT: call __muldi3@plt
1277+
; RV64I-NEXT: srli a0, a0, 56
1278+
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1279+
; RV64I-NEXT: addi sp, sp, 16
1280+
; RV64I-NEXT: ret
1281+
%1 = call i64 @llvm.ctpop.i64(i64 %a)
1282+
ret i64 %1
1283+
}
1284+
1285+
define i32 @test_parity_i32(i32 %a) {
1286+
; RV32I-LABEL: test_parity_i32:
1287+
; RV32I: # %bb.0:
1288+
; RV32I-NEXT: srli a1, a0, 16
1289+
; RV32I-NEXT: xor a0, a0, a1
1290+
; RV32I-NEXT: srli a1, a0, 8
1291+
; RV32I-NEXT: xor a0, a0, a1
1292+
; RV32I-NEXT: srli a1, a0, 4
1293+
; RV32I-NEXT: xor a0, a0, a1
1294+
; RV32I-NEXT: srli a1, a0, 2
1295+
; RV32I-NEXT: xor a0, a0, a1
1296+
; RV32I-NEXT: srli a1, a0, 1
1297+
; RV32I-NEXT: xor a0, a0, a1
1298+
; RV32I-NEXT: andi a0, a0, 1
1299+
; RV32I-NEXT: ret
1300+
;
1301+
; RV64I-LABEL: test_parity_i32:
1302+
; RV64I: # %bb.0:
1303+
; RV64I-NEXT: slli a1, a0, 32
1304+
; RV64I-NEXT: srli a1, a1, 32
1305+
; RV64I-NEXT: srliw a0, a0, 16
1306+
; RV64I-NEXT: xor a0, a1, a0
1307+
; RV64I-NEXT: srli a1, a0, 8
1308+
; RV64I-NEXT: xor a0, a0, a1
1309+
; RV64I-NEXT: srli a1, a0, 4
1310+
; RV64I-NEXT: xor a0, a0, a1
1311+
; RV64I-NEXT: srli a1, a0, 2
1312+
; RV64I-NEXT: xor a0, a0, a1
1313+
; RV64I-NEXT: srli a1, a0, 1
1314+
; RV64I-NEXT: xor a0, a0, a1
1315+
; RV64I-NEXT: andi a0, a0, 1
1316+
; RV64I-NEXT: ret
1317+
%1 = call i32 @llvm.ctpop.i32(i32 %a)
1318+
%2 = and i32 %1, 1
1319+
ret i32 %2
1320+
}
1321+
1322+
define i64 @test_parity_i64(i64 %a) {
1323+
; RV32I-LABEL: test_parity_i64:
1324+
; RV32I: # %bb.0:
1325+
; RV32I-NEXT: xor a0, a0, a1
1326+
; RV32I-NEXT: srli a1, a0, 16
1327+
; RV32I-NEXT: xor a0, a0, a1
1328+
; RV32I-NEXT: srli a1, a0, 8
1329+
; RV32I-NEXT: xor a0, a0, a1
1330+
; RV32I-NEXT: srli a1, a0, 4
1331+
; RV32I-NEXT: xor a0, a0, a1
1332+
; RV32I-NEXT: srli a1, a0, 2
1333+
; RV32I-NEXT: xor a0, a0, a1
1334+
; RV32I-NEXT: srli a1, a0, 1
1335+
; RV32I-NEXT: xor a0, a0, a1
1336+
; RV32I-NEXT: andi a0, a0, 1
1337+
; RV32I-NEXT: mv a1, zero
1338+
; RV32I-NEXT: ret
1339+
;
1340+
; RV64I-LABEL: test_parity_i64:
1341+
; RV64I: # %bb.0:
1342+
; RV64I-NEXT: srli a1, a0, 32
1343+
; RV64I-NEXT: xor a0, a0, a1
1344+
; RV64I-NEXT: srli a1, a0, 16
1345+
; RV64I-NEXT: xor a0, a0, a1
1346+
; RV64I-NEXT: srli a1, a0, 8
1347+
; RV64I-NEXT: xor a0, a0, a1
1348+
; RV64I-NEXT: srli a1, a0, 4
1349+
; RV64I-NEXT: xor a0, a0, a1
1350+
; RV64I-NEXT: srli a1, a0, 2
1351+
; RV64I-NEXT: xor a0, a0, a1
1352+
; RV64I-NEXT: srli a1, a0, 1
1353+
; RV64I-NEXT: xor a0, a0, a1
1354+
; RV64I-NEXT: andi a0, a0, 1
1355+
; RV64I-NEXT: ret
1356+
%1 = call i64 @llvm.ctpop.i64(i64 %a)
1357+
%2 = and i64 %1, 1
1358+
ret i64 %2
1359+
}

0 commit comments

Comments
 (0)