@@ -82,3 +82,121 @@ define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "a
82
82
%res = call i64 @agnostic_decl (i64 %v )
83
83
ret i64 %res
84
84
}
85
+
86
+ ; agnostic-ZA + streaming -> private-ZA + non-streaming
87
+ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee (i64 %v ) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
88
+ ; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
89
+ ; CHECK: // %bb.0:
90
+ ; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
91
+ ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
92
+ ; CHECK-NEXT: mov x9, x0
93
+ ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
94
+ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
95
+ ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
96
+ ; CHECK-NEXT: bl __arm_get_current_vg
97
+ ; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
98
+ ; CHECK-NEXT: mov x0, x9
99
+ ; CHECK-NEXT: add x29, sp, #64
100
+ ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
101
+ ; CHECK-NEXT: mov x8, x0
102
+ ; CHECK-NEXT: bl __arm_sme_state_size
103
+ ; CHECK-NEXT: sub sp, sp, x0
104
+ ; CHECK-NEXT: mov x20, sp
105
+ ; CHECK-NEXT: mov x0, x20
106
+ ; CHECK-NEXT: bl __arm_sme_save
107
+ ; CHECK-NEXT: smstop sm
108
+ ; CHECK-NEXT: mov x0, x8
109
+ ; CHECK-NEXT: bl private_za_decl
110
+ ; CHECK-NEXT: mov x1, x0
111
+ ; CHECK-NEXT: smstart sm
112
+ ; CHECK-NEXT: mov x0, x20
113
+ ; CHECK-NEXT: bl __arm_sme_restore
114
+ ; CHECK-NEXT: mov x0, x20
115
+ ; CHECK-NEXT: bl __arm_sme_save
116
+ ; CHECK-NEXT: smstop sm
117
+ ; CHECK-NEXT: mov x0, x1
118
+ ; CHECK-NEXT: bl private_za_decl
119
+ ; CHECK-NEXT: mov x1, x0
120
+ ; CHECK-NEXT: smstart sm
121
+ ; CHECK-NEXT: mov x0, x20
122
+ ; CHECK-NEXT: bl __arm_sme_restore
123
+ ; CHECK-NEXT: mov x0, x1
124
+ ; CHECK-NEXT: sub sp, x29, #64
125
+ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
126
+ ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
127
+ ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
128
+ ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
129
+ ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
130
+ ; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
131
+ ; CHECK-NEXT: ret
132
+ %res = call i64 @private_za_decl (i64 %v )
133
+ %res2 = call i64 @private_za_decl (i64 %res )
134
+ ret i64 %res2
135
+ }
136
+
137
+ ; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
138
+ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee (i64 %v ) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
139
+ ; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
140
+ ; CHECK: // %bb.0:
141
+ ; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
142
+ ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
143
+ ; CHECK-NEXT: mov x9, x0
144
+ ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
145
+ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
146
+ ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
147
+ ; CHECK-NEXT: bl __arm_get_current_vg
148
+ ; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
149
+ ; CHECK-NEXT: mov x0, x9
150
+ ; CHECK-NEXT: add x29, sp, #64
151
+ ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
152
+ ; CHECK-NEXT: mov x8, x0
153
+ ; CHECK-NEXT: bl __arm_sme_state_size
154
+ ; CHECK-NEXT: sub sp, sp, x0
155
+ ; CHECK-NEXT: mov x19, sp
156
+ ; CHECK-NEXT: mov x0, x19
157
+ ; CHECK-NEXT: bl __arm_sme_save
158
+ ; CHECK-NEXT: bl __arm_sme_state
159
+ ; CHECK-NEXT: and x20, x0, #0x1
160
+ ; CHECK-NEXT: tbz w20, #0, .LBB5_2
161
+ ; CHECK-NEXT: // %bb.1:
162
+ ; CHECK-NEXT: smstop sm
163
+ ; CHECK-NEXT: .LBB5_2:
164
+ ; CHECK-NEXT: mov x0, x8
165
+ ; CHECK-NEXT: bl private_za_decl
166
+ ; CHECK-NEXT: mov x2, x0
167
+ ; CHECK-NEXT: tbz w20, #0, .LBB5_4
168
+ ; CHECK-NEXT: // %bb.3:
169
+ ; CHECK-NEXT: smstart sm
170
+ ; CHECK-NEXT: .LBB5_4:
171
+ ; CHECK-NEXT: mov x0, x19
172
+ ; CHECK-NEXT: bl __arm_sme_restore
173
+ ; CHECK-NEXT: mov x0, x19
174
+ ; CHECK-NEXT: bl __arm_sme_save
175
+ ; CHECK-NEXT: bl __arm_sme_state
176
+ ; CHECK-NEXT: and x20, x0, #0x1
177
+ ; CHECK-NEXT: tbz w20, #0, .LBB5_6
178
+ ; CHECK-NEXT: // %bb.5:
179
+ ; CHECK-NEXT: smstop sm
180
+ ; CHECK-NEXT: .LBB5_6:
181
+ ; CHECK-NEXT: mov x0, x2
182
+ ; CHECK-NEXT: bl private_za_decl
183
+ ; CHECK-NEXT: mov x1, x0
184
+ ; CHECK-NEXT: tbz w20, #0, .LBB5_8
185
+ ; CHECK-NEXT: // %bb.7:
186
+ ; CHECK-NEXT: smstart sm
187
+ ; CHECK-NEXT: .LBB5_8:
188
+ ; CHECK-NEXT: mov x0, x19
189
+ ; CHECK-NEXT: bl __arm_sme_restore
190
+ ; CHECK-NEXT: mov x0, x1
191
+ ; CHECK-NEXT: sub sp, x29, #64
192
+ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
193
+ ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
194
+ ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
195
+ ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
196
+ ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
197
+ ; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
198
+ ; CHECK-NEXT: ret
199
+ %res = call i64 @private_za_decl (i64 %v )
200
+ %res2 = call i64 @private_za_decl (i64 %res )
201
+ ret i64 %res2
202
+ }
0 commit comments