|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt -passes=vector-combine -S %s | FileCheck %s |
| 3 | + |
| 4 | +target triple = "aarch64" |
| 5 | + |
| 6 | +define i32 @test_and(<16 x i32> %a, ptr %b) { |
| 7 | +; CHECK-LABEL: @test_and( |
| 8 | +; CHECK-NEXT: entry: |
| 9 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 |
| 10 | +; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A:%.*]] to <16 x i8> |
| 11 | +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[WIDE_LOAD]], [[TMP0]] |
| 12 | +; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32> |
| 13 | +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]]) |
| 14 | +; CHECK-NEXT: ret i32 [[TMP3]] |
| 15 | +; |
| 16 | +entry: |
| 17 | + %wide.load = load <16 x i8>, ptr %b, align 1 |
| 18 | + %0 = zext <16 x i8> %wide.load to <16 x i32> |
| 19 | + %1 = and <16 x i32> %0, %a |
| 20 | + %2 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1) |
| 21 | + ret i32 %2 |
| 22 | +} |
| 23 | + |
| 24 | +define i32 @test_mask_or(<16 x i32> %a, ptr %b) { |
| 25 | +; CHECK-LABEL: @test_mask_or( |
| 26 | +; CHECK-NEXT: entry: |
| 27 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 |
| 28 | +; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> |
| 29 | +; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8> |
| 30 | +; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]] |
| 31 | +; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32> |
| 32 | +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]]) |
| 33 | +; CHECK-NEXT: ret i32 [[TMP3]] |
| 34 | +; |
| 35 | +entry: |
| 36 | + %wide.load = load <16 x i8>, ptr %b, align 1 |
| 37 | + %a.masked = and <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> |
| 38 | + %0 = zext <16 x i8> %wide.load to <16 x i32> |
| 39 | + %1 = or <16 x i32> %0, %a.masked |
| 40 | + %2 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1) |
| 41 | + ret i32 %2 |
| 42 | +} |
| 43 | + |
| 44 | +define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) { |
| 45 | +; CHECK-LABEL: @multiuse( |
| 46 | +; CHECK-NEXT: entry: |
| 47 | +; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> |
| 48 | +; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8> |
| 49 | +; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> |
| 50 | +; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8> |
| 51 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1 |
| 52 | +; CHECK-NEXT: [[TMP2:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> |
| 53 | +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP2]], [[TMP1]] |
| 54 | +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i32> |
| 55 | +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i8> [[WIDE_LOAD]], <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> |
| 56 | +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i8> [[TMP5]], [[TMP0]] |
| 57 | +; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32> |
| 58 | +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP4]], [[TMP7]] |
| 59 | +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) |
| 60 | +; CHECK-NEXT: ret i32 [[TMP9]] |
| 61 | +; |
| 62 | +entry: |
| 63 | + %u.masked = and <16 x i32> %u, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> |
| 64 | + %v.masked = and <16 x i32> %v, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> |
| 65 | + %wide.load = load <16 x i8>, ptr %b, align 1 |
| 66 | + %0 = zext <16 x i8> %wide.load to <16 x i32> |
| 67 | + %1 = lshr <16 x i32> %0, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> |
| 68 | + %2 = or <16 x i32> %1, %v.masked |
| 69 | + %3 = and <16 x i32> %0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> |
| 70 | + %4 = or <16 x i32> %3, %u.masked |
| 71 | + %5 = add nuw nsw <16 x i32> %2, %4 |
| 72 | + %6 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %5) |
| 73 | + ret i32 %6 |
| 74 | +} |
| 75 | + |
| 76 | +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) |
0 commit comments