@@ -13,6 +13,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
13
13
declare i64 @llvm.cttz.i64 (i64 , i1 )
14
14
declare i32 @llvm.ctlz.i32 (i32 , i1 )
15
15
declare i32 @llvm.ctpop.i32 (i32 )
16
+ declare i64 @llvm.ctpop.i64 (i64 )
16
17
17
18
define i16 @test_bswap_i16 (i16 %a ) nounwind {
18
19
; RV32I-LABEL: test_bswap_i16:
@@ -1169,3 +1170,190 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
1169
1170
%1 = call i32 @llvm.ctpop.i32 (i32 %a )
1170
1171
ret i32 %1
1171
1172
}
1173
+
1174
+ define i64 @test_ctpop_i64 (i64 %a ) nounwind {
1175
+ ; RV32I-LABEL: test_ctpop_i64:
1176
+ ; RV32I: # %bb.0:
1177
+ ; RV32I-NEXT: addi sp, sp, -32
1178
+ ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1179
+ ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1180
+ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1181
+ ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1182
+ ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1183
+ ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
1184
+ ; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
1185
+ ; RV32I-NEXT: mv s2, a0
1186
+ ; RV32I-NEXT: srli a0, a1, 1
1187
+ ; RV32I-NEXT: lui a2, 349525
1188
+ ; RV32I-NEXT: addi s3, a2, 1365
1189
+ ; RV32I-NEXT: and a0, a0, s3
1190
+ ; RV32I-NEXT: sub a0, a1, a0
1191
+ ; RV32I-NEXT: lui a1, 209715
1192
+ ; RV32I-NEXT: addi s0, a1, 819
1193
+ ; RV32I-NEXT: and a1, a0, s0
1194
+ ; RV32I-NEXT: srli a0, a0, 2
1195
+ ; RV32I-NEXT: and a0, a0, s0
1196
+ ; RV32I-NEXT: add a0, a1, a0
1197
+ ; RV32I-NEXT: srli a1, a0, 4
1198
+ ; RV32I-NEXT: add a0, a0, a1
1199
+ ; RV32I-NEXT: lui a1, 61681
1200
+ ; RV32I-NEXT: addi s4, a1, -241
1201
+ ; RV32I-NEXT: and a0, a0, s4
1202
+ ; RV32I-NEXT: lui a1, 4112
1203
+ ; RV32I-NEXT: addi s1, a1, 257
1204
+ ; RV32I-NEXT: mv a1, s1
1205
+ ; RV32I-NEXT: call __mulsi3@plt
1206
+ ; RV32I-NEXT: srli s5, a0, 24
1207
+ ; RV32I-NEXT: srli a0, s2, 1
1208
+ ; RV32I-NEXT: and a0, a0, s3
1209
+ ; RV32I-NEXT: sub a0, s2, a0
1210
+ ; RV32I-NEXT: and a1, a0, s0
1211
+ ; RV32I-NEXT: srli a0, a0, 2
1212
+ ; RV32I-NEXT: and a0, a0, s0
1213
+ ; RV32I-NEXT: add a0, a1, a0
1214
+ ; RV32I-NEXT: srli a1, a0, 4
1215
+ ; RV32I-NEXT: add a0, a0, a1
1216
+ ; RV32I-NEXT: and a0, a0, s4
1217
+ ; RV32I-NEXT: mv a1, s1
1218
+ ; RV32I-NEXT: call __mulsi3@plt
1219
+ ; RV32I-NEXT: srli a0, a0, 24
1220
+ ; RV32I-NEXT: add a0, a0, s5
1221
+ ; RV32I-NEXT: mv a1, zero
1222
+ ; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
1223
+ ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
1224
+ ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1225
+ ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1226
+ ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1227
+ ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1228
+ ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1229
+ ; RV32I-NEXT: addi sp, sp, 32
1230
+ ; RV32I-NEXT: ret
1231
+ ;
1232
+ ; RV64I-LABEL: test_ctpop_i64:
1233
+ ; RV64I: # %bb.0:
1234
+ ; RV64I-NEXT: addi sp, sp, -16
1235
+ ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1236
+ ; RV64I-NEXT: srli a1, a0, 1
1237
+ ; RV64I-NEXT: lui a2, 21845
1238
+ ; RV64I-NEXT: addiw a2, a2, 1365
1239
+ ; RV64I-NEXT: slli a2, a2, 12
1240
+ ; RV64I-NEXT: addi a2, a2, 1365
1241
+ ; RV64I-NEXT: slli a2, a2, 12
1242
+ ; RV64I-NEXT: addi a2, a2, 1365
1243
+ ; RV64I-NEXT: slli a2, a2, 12
1244
+ ; RV64I-NEXT: addi a2, a2, 1365
1245
+ ; RV64I-NEXT: and a1, a1, a2
1246
+ ; RV64I-NEXT: sub a0, a0, a1
1247
+ ; RV64I-NEXT: lui a1, 13107
1248
+ ; RV64I-NEXT: addiw a1, a1, 819
1249
+ ; RV64I-NEXT: slli a1, a1, 12
1250
+ ; RV64I-NEXT: addi a1, a1, 819
1251
+ ; RV64I-NEXT: slli a1, a1, 12
1252
+ ; RV64I-NEXT: addi a1, a1, 819
1253
+ ; RV64I-NEXT: slli a1, a1, 12
1254
+ ; RV64I-NEXT: addi a1, a1, 819
1255
+ ; RV64I-NEXT: and a2, a0, a1
1256
+ ; RV64I-NEXT: srli a0, a0, 2
1257
+ ; RV64I-NEXT: and a0, a0, a1
1258
+ ; RV64I-NEXT: add a0, a2, a0
1259
+ ; RV64I-NEXT: srli a1, a0, 4
1260
+ ; RV64I-NEXT: add a0, a0, a1
1261
+ ; RV64I-NEXT: lui a1, 3855
1262
+ ; RV64I-NEXT: addiw a1, a1, 241
1263
+ ; RV64I-NEXT: slli a1, a1, 12
1264
+ ; RV64I-NEXT: addi a1, a1, -241
1265
+ ; RV64I-NEXT: slli a1, a1, 12
1266
+ ; RV64I-NEXT: addi a1, a1, 241
1267
+ ; RV64I-NEXT: slli a1, a1, 12
1268
+ ; RV64I-NEXT: addi a1, a1, -241
1269
+ ; RV64I-NEXT: and a0, a0, a1
1270
+ ; RV64I-NEXT: lui a1, 4112
1271
+ ; RV64I-NEXT: addiw a1, a1, 257
1272
+ ; RV64I-NEXT: slli a1, a1, 16
1273
+ ; RV64I-NEXT: addi a1, a1, 257
1274
+ ; RV64I-NEXT: slli a1, a1, 16
1275
+ ; RV64I-NEXT: addi a1, a1, 257
1276
+ ; RV64I-NEXT: call __muldi3@plt
1277
+ ; RV64I-NEXT: srli a0, a0, 56
1278
+ ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1279
+ ; RV64I-NEXT: addi sp, sp, 16
1280
+ ; RV64I-NEXT: ret
1281
+ %1 = call i64 @llvm.ctpop.i64 (i64 %a )
1282
+ ret i64 %1
1283
+ }
1284
+
1285
+ define i32 @test_parity_i32 (i32 %a ) {
1286
+ ; RV32I-LABEL: test_parity_i32:
1287
+ ; RV32I: # %bb.0:
1288
+ ; RV32I-NEXT: srli a1, a0, 16
1289
+ ; RV32I-NEXT: xor a0, a0, a1
1290
+ ; RV32I-NEXT: srli a1, a0, 8
1291
+ ; RV32I-NEXT: xor a0, a0, a1
1292
+ ; RV32I-NEXT: srli a1, a0, 4
1293
+ ; RV32I-NEXT: xor a0, a0, a1
1294
+ ; RV32I-NEXT: srli a1, a0, 2
1295
+ ; RV32I-NEXT: xor a0, a0, a1
1296
+ ; RV32I-NEXT: srli a1, a0, 1
1297
+ ; RV32I-NEXT: xor a0, a0, a1
1298
+ ; RV32I-NEXT: andi a0, a0, 1
1299
+ ; RV32I-NEXT: ret
1300
+ ;
1301
+ ; RV64I-LABEL: test_parity_i32:
1302
+ ; RV64I: # %bb.0:
1303
+ ; RV64I-NEXT: slli a1, a0, 32
1304
+ ; RV64I-NEXT: srli a1, a1, 32
1305
+ ; RV64I-NEXT: srliw a0, a0, 16
1306
+ ; RV64I-NEXT: xor a0, a1, a0
1307
+ ; RV64I-NEXT: srli a1, a0, 8
1308
+ ; RV64I-NEXT: xor a0, a0, a1
1309
+ ; RV64I-NEXT: srli a1, a0, 4
1310
+ ; RV64I-NEXT: xor a0, a0, a1
1311
+ ; RV64I-NEXT: srli a1, a0, 2
1312
+ ; RV64I-NEXT: xor a0, a0, a1
1313
+ ; RV64I-NEXT: srli a1, a0, 1
1314
+ ; RV64I-NEXT: xor a0, a0, a1
1315
+ ; RV64I-NEXT: andi a0, a0, 1
1316
+ ; RV64I-NEXT: ret
1317
+ %1 = call i32 @llvm.ctpop.i32 (i32 %a )
1318
+ %2 = and i32 %1 , 1
1319
+ ret i32 %2
1320
+ }
1321
+
1322
+ define i64 @test_parity_i64 (i64 %a ) {
1323
+ ; RV32I-LABEL: test_parity_i64:
1324
+ ; RV32I: # %bb.0:
1325
+ ; RV32I-NEXT: xor a0, a0, a1
1326
+ ; RV32I-NEXT: srli a1, a0, 16
1327
+ ; RV32I-NEXT: xor a0, a0, a1
1328
+ ; RV32I-NEXT: srli a1, a0, 8
1329
+ ; RV32I-NEXT: xor a0, a0, a1
1330
+ ; RV32I-NEXT: srli a1, a0, 4
1331
+ ; RV32I-NEXT: xor a0, a0, a1
1332
+ ; RV32I-NEXT: srli a1, a0, 2
1333
+ ; RV32I-NEXT: xor a0, a0, a1
1334
+ ; RV32I-NEXT: srli a1, a0, 1
1335
+ ; RV32I-NEXT: xor a0, a0, a1
1336
+ ; RV32I-NEXT: andi a0, a0, 1
1337
+ ; RV32I-NEXT: mv a1, zero
1338
+ ; RV32I-NEXT: ret
1339
+ ;
1340
+ ; RV64I-LABEL: test_parity_i64:
1341
+ ; RV64I: # %bb.0:
1342
+ ; RV64I-NEXT: srli a1, a0, 32
1343
+ ; RV64I-NEXT: xor a0, a0, a1
1344
+ ; RV64I-NEXT: srli a1, a0, 16
1345
+ ; RV64I-NEXT: xor a0, a0, a1
1346
+ ; RV64I-NEXT: srli a1, a0, 8
1347
+ ; RV64I-NEXT: xor a0, a0, a1
1348
+ ; RV64I-NEXT: srli a1, a0, 4
1349
+ ; RV64I-NEXT: xor a0, a0, a1
1350
+ ; RV64I-NEXT: srli a1, a0, 2
1351
+ ; RV64I-NEXT: xor a0, a0, a1
1352
+ ; RV64I-NEXT: srli a1, a0, 1
1353
+ ; RV64I-NEXT: xor a0, a0, a1
1354
+ ; RV64I-NEXT: andi a0, a0, 1
1355
+ ; RV64I-NEXT: ret
1356
+ %1 = call i64 @llvm.ctpop.i64 (i64 %a )
1357
+ %2 = and i64 %1 , 1
1358
+ ret i64 %2
1359
+ }
0 commit comments