@@ -186,98 +186,99 @@ define void @slsr1_0(i32 %b.arg, i32 %s.arg) #0 {
186
186
187
187
define void @slsr1_1 (i32 %b.arg , i32 %s.arg ) #0 {
188
188
; GFX9-LABEL: slsr1_1:
189
- ; GFX: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190
- ; GFX-NEXT: s_or_saveexec_b64 s[16:17], -1
191
- ; GFX-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
192
- ; GFX-NEXT: s_mov_b64 exec, s[16:17]
193
- ; GFX-NEXT: v_writelane_b32 v44, s33, 15
194
- ; GFX-NEXT: v_writelane_b32 v44, s34, 0
195
- ; GFX-NEXT: v_writelane_b32 v44, s35, 1
196
- ; GFX-NEXT: v_writelane_b32 v44, s36, 2
197
- ; GFX-NEXT: v_writelane_b32 v44, s37, 3
198
- ; GFX-NEXT: v_writelane_b32 v44, s38, 4
199
- ; GFX-NEXT: v_writelane_b32 v44, s39, 5
200
- ; GFX-NEXT: v_writelane_b32 v44, s40, 6
201
- ; GFX-NEXT: v_writelane_b32 v44, s41, 7
202
- ; GFX-NEXT: v_writelane_b32 v44, s42, 8
203
- ; GFX-NEXT: v_writelane_b32 v44, s43, 9
204
- ; GFX-NEXT: v_writelane_b32 v44, s44, 10
205
- ; GFX-NEXT: s_mov_b32 s33, s32
206
- ; GFX-NEXT: s_add_u32 s32, s32, 0x800
207
- ; GFX-NEXT: s_mov_b64 s[40:41], s[4:5]
208
- ; GFX-NEXT: v_writelane_b32 v44, s46, 11
209
- ; GFX-NEXT: s_getpc_b64 s[4:5]
210
- ; GFX-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
211
- ; GFX-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12
212
- ; GFX-NEXT: v_writelane_b32 v44, s47, 12
213
- ; GFX-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0
214
- ; GFX-NEXT: s_waitcnt lgkmcnt(0)
215
- ; GFX-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
216
- ; GFX-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
217
- ; GFX-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
218
- ; GFX-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
219
- ; GFX-NEXT: v_mov_b32_e32 v41, v1
220
- ; GFX-NEXT: v_mov_b32_e32 v42, v0
221
- ; GFX-NEXT: v_writelane_b32 v44, s30, 13
222
- ; GFX-NEXT: v_mul_u32_u24_e32 v0, v42, v41
223
- ; GFX-NEXT: s_mov_b64 s[4:5], s[40:41]
224
- ; GFX-NEXT: v_writelane_b32 v44, s31, 14
225
- ; GFX-NEXT: v_mov_b32_e32 v40, v31
226
- ; GFX-NEXT: s_mov_b32 s42, s14
227
- ; GFX-NEXT: s_mov_b32 s43, s13
228
- ; GFX-NEXT: s_mov_b32 s44, s12
229
- ; GFX-NEXT: s_mov_b64 s[34:35], s[10:11]
230
- ; GFX-NEXT: s_mov_b64 s[36:37], s[8:9]
231
- ; GFX-NEXT: s_mov_b64 s[38:39], s[6:7]
232
- ; GFX-NEXT: v_and_b32_e32 v43, 0xffffff, v41
233
- ; GFX-NEXT: s_swappc_b64 s[30:31], s[46:47]
234
- ; GFX-NEXT: v_mad_u32_u24 v41, v42, v41, v43
235
- ; GFX-NEXT: s_mov_b64 s[4:5], s[40:41]
236
- ; GFX-NEXT: s_mov_b64 s[6:7], s[38:39]
237
- ; GFX-NEXT: s_mov_b64 s[8:9], s[36:37]
238
- ; GFX-NEXT: s_mov_b64 s[10:11], s[34:35]
239
- ; GFX-NEXT: s_mov_b32 s12, s44
240
- ; GFX-NEXT: s_mov_b32 s13, s43
241
- ; GFX-NEXT: s_mov_b32 s14, s42
242
- ; GFX-NEXT: v_mov_b32_e32 v31, v40
243
- ; GFX-NEXT: v_mov_b32_e32 v0, v41
244
- ; GFX-NEXT: s_swappc_b64 s[30:31], s[46:47]
245
- ; GFX-NEXT: v_add_u32_e32 v0, v41, v43
246
- ; GFX-NEXT: s_mov_b64 s[4:5], s[40:41]
247
- ; GFX-NEXT: s_mov_b64 s[6:7], s[38:39]
248
- ; GFX-NEXT: s_mov_b64 s[8:9], s[36:37]
249
- ; GFX-NEXT: s_mov_b64 s[10:11], s[34:35]
250
- ; GFX-NEXT: s_mov_b32 s12, s44
251
- ; GFX-NEXT: s_mov_b32 s13, s43
252
- ; GFX-NEXT: s_mov_b32 s14, s42
253
- ; GFX-NEXT: v_mov_b32_e32 v31, v40
254
- ; GFX-NEXT: s_swappc_b64 s[30:31], s[46:47]
255
- ; GFX-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
256
- ; GFX-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
257
- ; GFX-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
258
- ; GFX-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
259
- ; GFX-NEXT: v_readlane_b32 s4, v44, 13
260
- ; GFX-NEXT: v_readlane_b32 s5, v44, 14
261
- ; GFX-NEXT: v_readlane_b32 s47, v44, 12
262
- ; GFX-NEXT: v_readlane_b32 s46, v44, 11
263
- ; GFX-NEXT: v_readlane_b32 s44, v44, 10
264
- ; GFX-NEXT: v_readlane_b32 s43, v44, 9
265
- ; GFX-NEXT: v_readlane_b32 s42, v44, 8
266
- ; GFX-NEXT: v_readlane_b32 s41, v44, 7
267
- ; GFX-NEXT: v_readlane_b32 s40, v44, 6
268
- ; GFX-NEXT: v_readlane_b32 s39, v44, 5
269
- ; GFX-NEXT: v_readlane_b32 s38, v44, 4
270
- ; GFX-NEXT: v_readlane_b32 s37, v44, 3
271
- ; GFX-NEXT: v_readlane_b32 s36, v44, 2
272
- ; GFX-NEXT: v_readlane_b32 s35, v44, 1
273
- ; GFX-NEXT: v_readlane_b32 s34, v44, 0
274
- ; GFX-NEXT: s_sub_u32 s32, s32, 0x800
275
- ; GFX-NEXT: v_readlane_b32 s33, v44, 15
276
- ; GFX-NEXT: s_or_saveexec_b64 s[6:7], -1
277
- ; GFX-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
278
- ; GFX-NEXT: s_mov_b64 exec, s[6:7]
279
- ; GFX-NEXT: s_waitcnt vmcnt(0)
280
- ; GFX-NEXT: s_setpc_b64 s[4:5]
189
+ ; GFX9: ; %bb.0:
190
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191
+ ; GFX9-NEXT: s_or_saveexec_b64 s[16:17], -1
192
+ ; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
193
+ ; GFX9-NEXT: s_mov_b64 exec, s[16:17]
194
+ ; GFX9-NEXT: v_writelane_b32 v44, s33, 15
195
+ ; GFX9-NEXT: v_writelane_b32 v44, s34, 0
196
+ ; GFX9-NEXT: v_writelane_b32 v44, s35, 1
197
+ ; GFX9-NEXT: v_writelane_b32 v44, s36, 2
198
+ ; GFX9-NEXT: v_writelane_b32 v44, s37, 3
199
+ ; GFX9-NEXT: v_writelane_b32 v44, s38, 4
200
+ ; GFX9-NEXT: v_writelane_b32 v44, s39, 5
201
+ ; GFX9-NEXT: v_writelane_b32 v44, s40, 6
202
+ ; GFX9-NEXT: v_writelane_b32 v44, s41, 7
203
+ ; GFX9-NEXT: v_writelane_b32 v44, s42, 8
204
+ ; GFX9-NEXT: v_writelane_b32 v44, s43, 9
205
+ ; GFX9-NEXT: v_writelane_b32 v44, s44, 10
206
+ ; GFX9-NEXT: s_mov_b32 s33, s32
207
+ ; GFX9-NEXT: s_add_u32 s32, s32, 0x800
208
+ ; GFX9-NEXT: s_mov_b64 s[40:41], s[4:5]
209
+ ; GFX9-NEXT: v_writelane_b32 v44, s46, 11
210
+ ; GFX9-NEXT: s_getpc_b64 s[4:5]
211
+ ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
212
+ ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12
213
+ ; GFX9-NEXT: v_writelane_b32 v44, s47, 12
214
+ ; GFX9-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0
215
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
216
+ ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
217
+ ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
218
+ ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
219
+ ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
220
+ ; GFX9-NEXT: v_mov_b32_e32 v41, v1
221
+ ; GFX9-NEXT: v_mov_b32_e32 v42, v0
222
+ ; GFX9-NEXT: v_writelane_b32 v44, s30, 13
223
+ ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41
224
+ ; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
225
+ ; GFX9-NEXT: v_writelane_b32 v44, s31, 14
226
+ ; GFX9-NEXT: v_mov_b32_e32 v40, v31
227
+ ; GFX9-NEXT: s_mov_b32 s42, s14
228
+ ; GFX9-NEXT: s_mov_b32 s43, s13
229
+ ; GFX9-NEXT: s_mov_b32 s44, s12
230
+ ; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11]
231
+ ; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9]
232
+ ; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7]
233
+ ; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41
234
+ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[46:47]
235
+ ; GFX9-NEXT: v_mad_u32_u24 v41, v42, v41, v43
236
+ ; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
237
+ ; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
238
+ ; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37]
239
+ ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
240
+ ; GFX9-NEXT: s_mov_b32 s12, s44
241
+ ; GFX9-NEXT: s_mov_b32 s13, s43
242
+ ; GFX9-NEXT: s_mov_b32 s14, s42
243
+ ; GFX9-NEXT: v_mov_b32_e32 v31, v40
244
+ ; GFX9-NEXT: v_mov_b32_e32 v0, v41
245
+ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[46:47]
246
+ ; GFX9-NEXT: v_add_u32_e32 v0, v41, v43
247
+ ; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41]
248
+ ; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39]
249
+ ; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37]
250
+ ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35]
251
+ ; GFX9-NEXT: s_mov_b32 s12, s44
252
+ ; GFX9-NEXT: s_mov_b32 s13, s43
253
+ ; GFX9-NEXT: s_mov_b32 s14, s42
254
+ ; GFX9-NEXT: v_mov_b32_e32 v31, v40
255
+ ; GFX9-NEXT: s_swappc_b64 s[30:31], s[46:47]
256
+ ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
257
+ ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
258
+ ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
259
+ ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
260
+ ; GFX9-NEXT: v_readlane_b32 s4, v44, 13
261
+ ; GFX9-NEXT: v_readlane_b32 s5, v44, 14
262
+ ; GFX9-NEXT: v_readlane_b32 s47, v44, 12
263
+ ; GFX9-NEXT: v_readlane_b32 s46, v44, 11
264
+ ; GFX9-NEXT: v_readlane_b32 s44, v44, 10
265
+ ; GFX9-NEXT: v_readlane_b32 s43, v44, 9
266
+ ; GFX9-NEXT: v_readlane_b32 s42, v44, 8
267
+ ; GFX9-NEXT: v_readlane_b32 s41, v44, 7
268
+ ; GFX9-NEXT: v_readlane_b32 s40, v44, 6
269
+ ; GFX9-NEXT: v_readlane_b32 s39, v44, 5
270
+ ; GFX9-NEXT: v_readlane_b32 s38, v44, 4
271
+ ; GFX9-NEXT: v_readlane_b32 s37, v44, 3
272
+ ; GFX9-NEXT: v_readlane_b32 s36, v44, 2
273
+ ; GFX9-NEXT: v_readlane_b32 s35, v44, 1
274
+ ; GFX9-NEXT: v_readlane_b32 s34, v44, 0
275
+ ; GFX9-NEXT: s_sub_u32 s32, s32, 0x800
276
+ ; GFX9-NEXT: v_readlane_b32 s33, v44, 15
277
+ ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
278
+ ; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
279
+ ; GFX9-NEXT: s_mov_b64 exec, s[6:7]
280
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
281
+ ; GFX9-NEXT: s_setpc_b64 s[4:5]
281
282
%b = and i32 %b.arg , 16777215
282
283
%s = and i32 %s.arg , 16777215
283
284
0 commit comments