24
24
25
25
// CHECK-A64-LABEL: @test_vcvt_f32_bf16(
26
26
// CHECK-A64-NEXT: entry:
27
- // CHECK-A64-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
28
- // CHECK-A64-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 16
29
- // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_836_I ]], align 8
30
- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
27
+ // CHECK-A64-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
28
+ // CHECK-A64-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 16
29
+ // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I ]], align 8
30
+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
31
31
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
32
- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
32
+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
33
33
// CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
34
- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 16
35
- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 16
34
+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 16
35
+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 16
36
36
// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
37
37
//
38
38
// CHECK-A32-HARDFP-LABEL: @test_vcvt_f32_bf16(
39
39
// CHECK-A32-HARDFP-NEXT: entry:
40
- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
41
- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 8
42
- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_836_I ]], align 8
43
- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
40
+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
41
+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 8
42
+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I ]], align 8
43
+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
44
44
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
45
- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
45
+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
46
46
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
47
- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 8
48
- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 8
47
+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 8
48
+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 8
49
49
// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
50
50
//
51
51
// CHECK-A32-SOFTFP-LABEL: @test_vcvt_f32_bf16(
52
52
// CHECK-A32-SOFTFP-NEXT: entry:
53
- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I :%.*]] = alloca <4 x bfloat>, align 8
54
- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I :%.*]] = alloca <4 x bfloat>, align 8
55
- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I :%.*]] = alloca <4 x i32>, align 8
53
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I :%.*]] = alloca <4 x bfloat>, align 8
54
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I :%.*]] = alloca <4 x bfloat>, align 8
55
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I :%.*]] = alloca <4 x i32>, align 8
56
56
// CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <4 x bfloat>, align 8
57
57
// CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8
58
58
// CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[A_COERCE:%.*]], ptr [[A]], align 8
59
59
// CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <4 x bfloat>, ptr [[A]], align 8
60
60
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[A1]], ptr [[COERCE]], align 8
61
61
// CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE]], align 8
62
- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP0]], ptr [[__P0_836_I ]], align 8
63
- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I ]], align 8
64
- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I ]], ptr [[__REINT_836_I ]], align 8
65
- // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_836_I ]], align 8
62
+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP0]], ptr [[__P0_808_I ]], align 8
63
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I ]], align 8
64
+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I ]], ptr [[__REINT_808_I ]], align 8
65
+ // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_808_I ]], align 8
66
66
// CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
67
- // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
67
+ // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
68
68
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16)
69
- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I ]], align 8
70
- // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_836_I ]], align 8
69
+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I ]], align 8
70
+ // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_808_I ]], align 8
71
71
// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP4]]
72
72
//
73
73
float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a ) {
@@ -76,39 +76,39 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
76
76
77
77
// CHECK-A64-LABEL: @test_vcvtq_low_f32_bf16(
78
78
// CHECK-A64-NEXT: entry:
79
- // CHECK-A64-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
80
- // CHECK-A64-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 16
79
+ // CHECK-A64-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
80
+ // CHECK-A64-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 16
81
81
// CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
82
- // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
83
- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
82
+ // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
83
+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
84
84
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
85
- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
85
+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
86
86
// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
87
- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 16
88
- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 16
87
+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 16
88
+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 16
89
89
// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
90
90
//
91
91
// CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_f32_bf16(
92
92
// CHECK-A32-HARDFP-NEXT: entry:
93
- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
94
- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
93
+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
94
+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
95
95
// CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96
- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
97
- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
96
+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
97
+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
98
98
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
99
- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
99
+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
100
100
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
101
- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
102
- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
101
+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
102
+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
103
103
// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
104
104
//
105
105
// CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_f32_bf16(
106
106
// CHECK-A32-SOFTFP-NEXT: entry:
107
107
// CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8
108
108
// CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8
109
- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
110
- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
111
- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
109
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
110
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
111
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
112
112
// CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8
113
113
// CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8
114
114
// CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8
@@ -132,15 +132,15 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
132
132
// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8
133
133
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8
134
134
// CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8
135
- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_836_I_I ]], align 8
136
- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I_I ]], align 8
137
- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I_I ]], ptr [[__REINT_836_I_I ]], align 8
138
- // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
135
+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I ]], align 8
136
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I ]], align 8
137
+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I ]], ptr [[__REINT_808_I_I ]], align 8
138
+ // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
139
139
// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
140
- // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
140
+ // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
141
141
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
142
- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
143
- // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
142
+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
143
+ // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
144
144
// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]]
145
145
//
146
146
float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a ) {
@@ -149,39 +149,39 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
149
149
150
150
// CHECK-A64-LABEL: @test_vcvtq_high_f32_bf16(
151
151
// CHECK-A64-NEXT: entry:
152
- // CHECK-A64-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
153
- // CHECK-A64-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 16
152
+ // CHECK-A64-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
153
+ // CHECK-A64-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 16
154
154
// CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
155
- // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
156
- // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
155
+ // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
156
+ // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
157
157
// CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
158
- // CHECK-A64-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
158
+ // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
159
159
// CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
160
- // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 16
161
- // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 16
160
+ // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 16
161
+ // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 16
162
162
// CHECK-A64-NEXT: ret <4 x float> [[TMP3]]
163
163
//
164
164
// CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_f32_bf16(
165
165
// CHECK-A32-HARDFP-NEXT: entry:
166
- // CHECK-A32-HARDFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
167
- // CHECK-A32-HARDFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
166
+ // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
167
+ // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
168
168
// CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
169
- // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_836_I_I ]], align 8
170
- // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
169
+ // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I ]], align 8
170
+ // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
171
171
// CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
172
- // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
172
+ // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
173
173
// CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
174
- // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
175
- // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
174
+ // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
175
+ // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
176
176
// CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]]
177
177
//
178
178
// CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_f32_bf16(
179
179
// CHECK-A32-SOFTFP-NEXT: entry:
180
180
// CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8
181
181
// CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8
182
- // CHECK-A32-SOFTFP-NEXT: [[__P0_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
183
- // CHECK-A32-SOFTFP-NEXT: [[__REINT_836_I_I :%.*]] = alloca <4 x bfloat>, align 8
184
- // CHECK-A32-SOFTFP-NEXT: [[__REINT1_836_I_I :%.*]] = alloca <4 x i32>, align 8
182
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
183
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I :%.*]] = alloca <4 x bfloat>, align 8
184
+ // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I :%.*]] = alloca <4 x i32>, align 8
185
185
// CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8
186
186
// CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8
187
187
// CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8
@@ -205,15 +205,15 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
205
205
// CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8
206
206
// CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8
207
207
// CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8
208
- // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_836_I_I ]], align 8
209
- // CHECK-A32-SOFTFP-NEXT: [[__P0_8361_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_836_I_I ]], align 8
210
- // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8361_I_I ]], ptr [[__REINT_836_I_I ]], align 8
211
- // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I ]], align 8
208
+ // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I ]], align 8
209
+ // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I :%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I ]], align 8
210
+ // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I ]], ptr [[__REINT_808_I_I ]], align 8
211
+ // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I ]], align 8
212
212
// CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
213
- // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
213
+ // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
214
214
// CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
215
- // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I ]], align 8
216
- // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I ]], align 8
215
+ // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I ]], align 8
216
+ // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I ]], align 8
217
217
// CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]]
218
218
//
219
219
float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a ) {
@@ -427,7 +427,7 @@ bfloat16_t test_vcvth_bf16_f32(float32_t a) {
427
427
// CHECK-NEXT: [[__REINT1_I:%.*]] = alloca i32, align 4
428
428
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[__REINT_I]], align 2
429
429
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[__REINT_I]], align 2
430
- // CHECK-NEXT: [[CONV_I:%.*]] = sext i16 [[TMP0]] to i32
430
+ // CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[TMP0]] to i32
431
431
// CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[CONV_I]], 16
432
432
// CHECK-NEXT: store i32 [[SHL_I]], ptr [[__REINT1_I]], align 4
433
433
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__REINT1_I]], align 4
0 commit comments