Lines Matching +full:0 +full:x1b0

7 define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
9 ; GFX6: ; %bb.0:
10 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
11 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
15 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
18 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
21 ; GFX6-NEXT: s_waitcnt vmcnt(0)
23 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
27 ; GFX8: ; %bb.0:
28 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
29 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
32 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
35 ; GFX8-NEXT: s_waitcnt vmcnt(0)
37 ; GFX8-NEXT: flat_store_byte v[0:1], v2
41 ; EG: ; %bb.0:
42 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
43 ; EG-NEXT: TEX 0 @6
44 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
49 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
55 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
57 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
60 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
64 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
67 ; GFX12: ; %bb.0:
68 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
69 ; GFX12-NEXT: s_wait_kmcnt 0x0
70 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
71 ; GFX12-NEXT: s_wait_kmcnt 0x0
74 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
75 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
82 define amdgpu_kernel void @constant_load_v2i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
84 ; GFX6: ; %bb.0:
85 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
86 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
90 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
93 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
96 ; GFX6-NEXT: s_waitcnt vmcnt(0)
97 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
101 ; GFX8: ; %bb.0:
102 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
103 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
106 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
109 ; GFX8-NEXT: s_waitcnt vmcnt(0)
110 ; GFX8-NEXT: flat_store_byte v[0:1], v2
114 ; EG: ; %bb.0:
115 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
116 ; EG-NEXT: TEX 0 @6
117 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
122 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
128 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
130 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
133 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
137 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
140 ; GFX12: ; %bb.0:
141 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
142 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
143 ; GFX12-NEXT: s_wait_kmcnt 0x0
145 ; GFX12-NEXT: s_wait_loadcnt 0x0
146 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
153 define amdgpu_kernel void @constant_load_v3i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
155 ; GFX6: ; %bb.0:
156 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
157 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
161 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
164 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
167 ; GFX6-NEXT: s_waitcnt vmcnt(0)
168 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
172 ; GFX8: ; %bb.0:
173 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
174 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
177 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
180 ; GFX8-NEXT: s_waitcnt vmcnt(0)
181 ; GFX8-NEXT: flat_store_byte v[0:1], v2
185 ; EG: ; %bb.0:
186 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
187 ; EG-NEXT: TEX 0 @6
188 ; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
193 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
198 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
200 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
203 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
207 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
210 ; GFX12: ; %bb.0:
211 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
212 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
213 ; GFX12-NEXT: s_wait_kmcnt 0x0
215 ; GFX12-NEXT: s_wait_loadcnt 0x0
216 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
223 define amdgpu_kernel void @constant_load_v4i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
225 ; GFX6: ; %bb.0:
226 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
227 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
231 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
234 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
237 ; GFX6-NEXT: s_waitcnt vmcnt(0)
238 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
242 ; GFX8: ; %bb.0:
243 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
244 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
247 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
250 ; GFX8-NEXT: s_waitcnt vmcnt(0)
251 ; GFX8-NEXT: flat_store_byte v[0:1], v2
255 ; EG: ; %bb.0:
256 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
257 ; EG-NEXT: TEX 0 @6
258 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
263 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
271 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
274 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
278 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
281 ; GFX12: ; %bb.0:
282 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
283 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
284 ; GFX12-NEXT: s_wait_kmcnt 0x0
286 ; GFX12-NEXT: s_wait_loadcnt 0x0
287 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
294 define amdgpu_kernel void @constant_load_v8i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
296 ; GFX6: ; %bb.0:
297 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
298 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
302 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
305 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
308 ; GFX6-NEXT: s_waitcnt vmcnt(0)
309 ; GFX6-NEXT: buffer_store_byte v0, off, s[4:7], 0
313 ; GFX8: ; %bb.0:
314 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
315 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
318 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
321 ; GFX8-NEXT: s_waitcnt vmcnt(0)
322 ; GFX8-NEXT: flat_store_byte v[0:1], v2
326 ; EG: ; %bb.0:
327 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
328 ; EG-NEXT: TEX 0 @6
329 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
334 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
342 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
345 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
349 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
352 ; GFX12: ; %bb.0:
353 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
354 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
355 ; GFX12-NEXT: s_wait_kmcnt 0x0
357 ; GFX12-NEXT: s_wait_loadcnt 0x0
358 ; GFX12-NEXT: global_store_b8 v0, v1, s[0:1]
365 define amdgpu_kernel void @constant_load_v16i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
367 ; GFX6: ; %bb.0:
368 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
369 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
373 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
376 ; GFX6-NEXT: buffer_load_ushort v0, off, s[8:11], 0
379 ; GFX6-NEXT: s_waitcnt vmcnt(0)
380 ; GFX6-NEXT: buffer_store_short v0, off, s[4:7], 0
384 ; GFX8: ; %bb.0:
385 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
386 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
389 ; GFX8-NEXT: flat_load_ushort v2, v[0:1]
392 ; GFX8-NEXT: s_waitcnt vmcnt(0)
393 ; GFX8-NEXT: flat_store_short v[0:1], v2
397 ; EG: ; %bb.0:
398 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
399 ; EG-NEXT: TEX 0 @6
400 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
405 ; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
413 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
416 ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
420 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
423 ; GFX12: ; %bb.0:
424 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
425 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
426 ; GFX12-NEXT: s_wait_kmcnt 0x0
428 ; GFX12-NEXT: s_wait_loadcnt 0x0
429 ; GFX12-NEXT: global_store_b16 v0, v1, s[0:1]
436 define amdgpu_kernel void @constant_load_v32i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
438 ; GFX6: ; %bb.0:
439 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
440 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
441 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
442 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
444 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
446 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
450 ; GFX8: ; %bb.0:
451 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
452 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
453 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
456 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
458 ; GFX8-NEXT: flat_store_dword v[0:1], v2
462 ; EG: ; %bb.0:
463 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
464 ; EG-NEXT: TEX 0 @6
465 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
470 ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
475 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
478 ; GFX12: ; %bb.0:
479 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
480 ; GFX12-NEXT: s_wait_kmcnt 0x0
481 ; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
482 ; GFX12-NEXT: s_wait_kmcnt 0x0
483 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
484 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
491 define amdgpu_kernel void @constant_load_v64i1(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
493 ; GFX6: ; %bb.0:
494 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
495 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
496 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
497 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
499 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
502 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
506 ; GFX8: ; %bb.0:
507 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
508 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
509 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
512 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
515 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
519 ; EG: ; %bb.0:
520 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
521 ; EG-NEXT: TEX 0 @6
522 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
527 ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
532 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
535 ; GFX12: ; %bb.0:
536 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
537 ; GFX12-NEXT: s_wait_kmcnt 0x0
538 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
539 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
540 ; GFX12-NEXT: s_wait_kmcnt 0x0
542 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
549 define amdgpu_kernel void @constant_zextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
551 ; GFX6: ; %bb.0:
552 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
553 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
557 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
560 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
563 ; GFX6-NEXT: s_waitcnt vmcnt(0)
564 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
568 ; GFX8: ; %bb.0:
569 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
570 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
573 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
576 ; GFX8-NEXT: s_waitcnt vmcnt(0)
577 ; GFX8-NEXT: flat_store_dword v[0:1], v2
581 ; EG: ; %bb.0:
582 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
583 ; EG-NEXT: TEX 0 @6
584 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
589 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
594 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
597 ; GFX12: ; %bb.0:
598 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
599 ; GFX12-NEXT: s_wait_kmcnt 0x0
600 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
601 ; GFX12-NEXT: s_wait_kmcnt 0x0
602 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
603 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
611 define amdgpu_kernel void @constant_sextload_i1_to_i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
613 ; GFX6: ; %bb.0:
614 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
615 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
619 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
622 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
625 ; GFX6-NEXT: s_waitcnt vmcnt(0)
626 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
627 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
631 ; GFX8: ; %bb.0:
632 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
633 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
636 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
639 ; GFX8-NEXT: s_waitcnt vmcnt(0)
640 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
641 ; GFX8-NEXT: flat_store_dword v[0:1], v2
645 ; EG: ; %bb.0:
646 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
647 ; EG-NEXT: TEX 0 @6
648 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
653 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
659 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
662 ; GFX12: ; %bb.0:
663 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
664 ; GFX12-NEXT: s_wait_kmcnt 0x0
665 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
666 ; GFX12-NEXT: s_wait_kmcnt 0x0
667 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000
669 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
670 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
678 define amdgpu_kernel void @constant_zextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
680 ; GFX6: ; %bb.0:
681 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
682 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
686 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
689 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
692 ; GFX6-NEXT: s_waitcnt vmcnt(0)
693 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
697 ; GFX8: ; %bb.0:
698 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
699 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
702 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
705 ; GFX8-NEXT: s_waitcnt vmcnt(0)
706 ; GFX8-NEXT: flat_store_dword v[0:1], v2
710 ; EG: ; %bb.0:
711 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
712 ; EG-NEXT: TEX 0 @6
713 ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
718 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
723 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
726 ; GFX12: ; %bb.0:
727 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
728 ; GFX12-NEXT: s_wait_kmcnt 0x0
729 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
730 ; GFX12-NEXT: s_wait_kmcnt 0x0
731 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
732 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
740 define amdgpu_kernel void @constant_sextload_v1i1_to_v1i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
742 ; GFX6: ; %bb.0:
743 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
744 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
748 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
751 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
754 ; GFX6-NEXT: s_waitcnt vmcnt(0)
755 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
756 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
760 ; GFX8: ; %bb.0:
761 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
762 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
765 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
768 ; GFX8-NEXT: s_waitcnt vmcnt(0)
769 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
770 ; GFX8-NEXT: flat_store_dword v[0:1], v2
774 ; EG: ; %bb.0:
775 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
776 ; EG-NEXT: TEX 0 @6
777 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
782 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
788 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
791 ; GFX12: ; %bb.0:
792 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
793 ; GFX12-NEXT: s_wait_kmcnt 0x0
794 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
795 ; GFX12-NEXT: s_wait_kmcnt 0x0
796 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10000
798 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
799 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
807 define amdgpu_kernel void @constant_zextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
809 ; GFX6: ; %bb.0:
810 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
811 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
815 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
818 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
821 ; GFX6-NEXT: s_waitcnt vmcnt(0)
824 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
828 ; GFX8: ; %bb.0:
829 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
831 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
834 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
837 ; GFX8-NEXT: s_waitcnt vmcnt(0)
840 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
844 ; EG: ; %bb.0:
845 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
846 ; EG-NEXT: TEX 0 @6
847 ; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
852 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
859 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
862 ; GFX12: ; %bb.0:
863 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
864 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
865 ; GFX12-NEXT: s_wait_kmcnt 0x0
867 ; GFX12-NEXT: s_wait_loadcnt 0x0
868 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0
872 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
880 define amdgpu_kernel void @constant_sextload_v2i1_to_v2i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
882 ; GFX6: ; %bb.0:
883 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
884 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
888 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
891 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
894 ; GFX6-NEXT: s_waitcnt vmcnt(0)
896 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
897 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
901 ; GFX8: ; %bb.0:
902 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
903 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
906 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
909 ; GFX8-NEXT: s_waitcnt vmcnt(0)
911 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
912 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
916 ; EG: ; %bb.0:
917 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
918 ; EG-NEXT: TEX 0 @6
919 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
924 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
931 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
935 ; GFX12: ; %bb.0:
936 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
937 ; GFX12-NEXT: s_wait_kmcnt 0x0
938 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
939 ; GFX12-NEXT: s_wait_kmcnt 0x0
940 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10000
941 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
943 ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
945 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
953 define amdgpu_kernel void @constant_zextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
955 ; GFX6: ; %bb.0:
956 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
957 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
961 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
964 ; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0
967 ; GFX6-NEXT: s_waitcnt vmcnt(0)
971 ; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
972 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
976 ; GFX8: ; %bb.0:
977 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
978 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
981 ; GFX8-NEXT: flat_load_ubyte v1, v[0:1]
985 ; GFX8-NEXT: s_waitcnt vmcnt(0)
989 ; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
993 ; EG: ; %bb.0:
994 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
995 ; EG-NEXT: TEX 0 @6
996 ; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
997 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T3.X, 0
1001 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
1008 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1013 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1016 ; GFX12: ; %bb.0:
1017 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1018 ; GFX12-NEXT: v_mov_b32_e32 v3, 0
1019 ; GFX12-NEXT: s_wait_kmcnt 0x0
1021 ; GFX12-NEXT: s_wait_loadcnt 0x0
1022 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v1
1027 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
1028 ; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1]
1036 define amdgpu_kernel void @constant_sextload_v3i1_to_v3i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1038 ; GFX6: ; %bb.0:
1039 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1040 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
1044 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1047 ; GFX6-NEXT: buffer_load_ubyte v2, off, s[8:11], 0
1050 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1052 ; GFX6-NEXT: v_bfe_i32 v0, v2, 0, 1
1054 ; GFX6-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8
1055 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1059 ; GFX8: ; %bb.0:
1060 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1061 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1064 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1067 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1070 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
1071 ; GFX8-NEXT: flat_store_dwordx3 v[3:4], v[0:2]
1075 ; EG: ; %bb.0:
1076 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1077 ; EG-NEXT: TEX 0 @6
1078 ; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
1079 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T0.X, 0
1083 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
1089 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1094 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1097 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1100 ; GFX12: ; %bb.0:
1101 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1102 ; GFX12-NEXT: s_wait_kmcnt 0x0
1103 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
1104 ; GFX12-NEXT: s_wait_kmcnt 0x0
1105 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10002
1106 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10000
1107 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
1108 ; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v0, s4
1110 ; GFX12-NEXT: global_store_b96 v3, v[0:2], s[0:1]
1118 define amdgpu_kernel void @constant_zextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1120 ; GFX6: ; %bb.0:
1121 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1122 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
1126 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1129 ; GFX6-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
1132 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1137 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1141 ; GFX8: ; %bb.0:
1142 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1143 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1146 ; GFX8-NEXT: flat_load_ubyte v1, v[0:1]
1150 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1155 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1159 ; EG: ; %bb.0:
1160 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1161 ; EG-NEXT: TEX 0 @6
1162 ; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
1167 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
1172 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1174 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1178 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1181 ; GFX12: ; %bb.0:
1182 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1183 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
1184 ; GFX12-NEXT: s_wait_kmcnt 0x0
1186 ; GFX12-NEXT: s_wait_loadcnt 0x0
1187 ; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v1
1193 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
1201 define amdgpu_kernel void @constant_sextload_v4i1_to_v4i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1203 ; GFX6: ; %bb.0:
1204 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1205 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
1209 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1212 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
1215 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1219 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
1220 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1224 ; GFX8: ; %bb.0:
1225 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1226 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1229 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1232 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1236 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
1237 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
1241 ; EG: ; %bb.0:
1242 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1243 ; EG-NEXT: TEX 0 @6
1244 ; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
1249 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
1255 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1258 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1263 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1266 ; GFX12: ; %bb.0:
1267 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1268 ; GFX12-NEXT: s_wait_kmcnt 0x0
1269 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
1270 ; GFX12-NEXT: s_wait_kmcnt 0x0
1271 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
1272 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
1273 ; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10000
1274 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10001
1276 ; GFX12-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v1, s2
1279 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
1287 define amdgpu_kernel void @constant_zextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1289 ; GFX6: ; %bb.0:
1290 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1291 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
1295 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1298 ; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
1301 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1310 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
1311 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1315 ; GFX8: ; %bb.0:
1316 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1317 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1320 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
1323 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1325 ; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003
1326 ; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001
1327 ; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10005
1329 ; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10002
1330 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x10004
1332 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
1333 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
1344 ; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
1349 ; EG: ; %bb.0:
1350 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1351 ; EG-NEXT: TEX 0 @6
1352 ; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
1353 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0
1357 ; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
1362 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1364 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1367 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
1373 ; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
1378 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1381 ; GFX12: ; %bb.0:
1382 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1383 ; GFX12-NEXT: v_mov_b32_e32 v8, 0
1384 ; GFX12-NEXT: s_wait_kmcnt 0x0
1386 ; GFX12-NEXT: s_wait_loadcnt 0x0
1389 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
1390 ; GFX12-NEXT: v_dual_mov_b32 v5, s4 :: v_dual_and_b32 v0, 0xffff, v0
1391 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
1392 ; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10005
1394 ; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10002
1395 ; GFX12-NEXT: s_bfe_u32 s2, s2, 0x10004
1399 ; GFX12-NEXT: s_wait_alu 0xfffe
1402 ; GFX12-NEXT: s_clause 0x1
1403 ; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
1404 ; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1]
1412 define amdgpu_kernel void @constant_sextload_v8i1_to_v8i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1414 ; GFX6: ; %bb.0:
1415 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1416 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
1420 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1423 ; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
1426 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1430 ; GFX6-NEXT: v_bfe_i32 v0, v4, 0, 1
1435 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
1436 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1440 ; GFX8: ; %bb.0:
1441 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1442 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1445 ; GFX8-NEXT: flat_load_ubyte v4, v[0:1]
1447 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
1452 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1456 ; GFX8-NEXT: v_bfe_i32 v0, v4, 0, 1
1462 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
1466 ; EG: ; %bb.0:
1467 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
1468 ; EG-NEXT: TEX 0 @6
1469 ; EG-NEXT: ALU 23, @9, KC0[CB0:0-32], KC1[]
1470 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 0
1474 ; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
1479 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
1482 ; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
1498 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1501 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1504 ; GFX12: ; %bb.0:
1505 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1506 ; GFX12-NEXT: s_wait_kmcnt 0x0
1507 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
1508 ; GFX12-NEXT: s_wait_kmcnt 0x0
1509 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
1510 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
1511 ; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
1512 ; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
1513 ; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
1514 ; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
1515 ; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10004
1516 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x10005
1518 ; GFX12-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v1, s2
1523 ; GFX12-NEXT: s_clause 0x1
1524 ; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
1525 ; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1]
1533 define amdgpu_kernel void @constant_zextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1535 ; GFX6: ; %bb.0:
1536 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
1537 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1541 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1544 ; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0
1547 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1564 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
1565 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
1566 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
1567 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1571 ; GFX8: ; %bb.0:
1572 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1573 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1576 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
1579 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1581 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s2
1582 ; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10003
1583 ; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10001
1584 ; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007
1585 ; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10009
1586 ; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000d
1588 ; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000a
1589 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000c
1590 ; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10005
1591 ; GFX8-NEXT: s_bfe_u32 s12, s6, 0x1000b
1593 ; GFX8-NEXT: s_bfe_u32 s14, s6, 0x10002
1594 ; GFX8-NEXT: s_bfe_u32 s15, s6, 0x10006
1595 ; GFX8-NEXT: s_bfe_u32 s16, s6, 0x10004
1596 ; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10008
1597 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1000e
1601 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
1608 ; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
1609 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
1617 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[4:7]
1618 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
1628 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
1633 ; EG: ; %bb.0:
1634 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
1635 ; EG-NEXT: TEX 0 @8
1636 ; EG-NEXT: ALU 36, @11, KC0[CB0:0-32], KC1[]
1637 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T14.X, 0
1638 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 0
1639 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T12.X, 0
1643 ; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
1648 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
1650 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1653 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
1665 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1670 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
1675 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1683 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1686 ; GFX12: ; %bb.0:
1687 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1688 ; GFX12-NEXT: v_mov_b32_e32 v16, 0
1689 ; GFX12-NEXT: s_wait_kmcnt 0x0
1691 ; GFX12-NEXT: s_wait_loadcnt 0x0
1694 ; GFX12-NEXT: s_and_b32 s6, 0xffff, s2
1695 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
1696 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
1697 ; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007
1698 ; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10009
1699 ; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000d
1702 ; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000a
1703 ; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1000c
1704 ; GFX12-NEXT: s_bfe_u32 s11, s6, 0x10005
1705 ; GFX12-NEXT: s_bfe_u32 s12, s6, 0x1000b
1707 ; GFX12-NEXT: s_bfe_u32 s14, s6, 0x10002
1708 ; GFX12-NEXT: s_bfe_u32 s15, s6, 0x10006
1709 ; GFX12-NEXT: s_bfe_u32 s16, s6, 0x10004
1710 ; GFX12-NEXT: s_bfe_u32 s17, s6, 0x10008
1711 ; GFX12-NEXT: s_bfe_u32 s6, s6, 0x1000e
1720 ; GFX12-NEXT: s_clause 0x3
1721 ; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
1722 ; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
1723 ; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
1724 ; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
1732 define amdgpu_kernel void @constant_sextload_v16i1_to_v16i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1734 ; GFX6: ; %bb.0:
1735 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
1736 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1740 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1743 ; GFX6-NEXT: buffer_load_ushort v12, off, s[8:11], 0
1746 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1750 ; GFX6-NEXT: v_bfe_i32 v0, v12, 0, 1
1763 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
1764 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
1765 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
1766 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1770 ; GFX8: ; %bb.0:
1771 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
1772 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1775 ; GFX8-NEXT: flat_load_ushort v18, v[0:1]
1777 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
1782 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
1786 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
1790 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1803 ; GFX8-NEXT: v_bfe_i32 v0, v18, 0, 1
1810 ; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
1814 ; EG: ; %bb.0:
1815 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
1816 ; EG-NEXT: TEX 0 @8
1817 ; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[]
1818 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T7.X, 0
1819 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T14.X, 0
1820 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T11.X, 0
1824 ; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
1829 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
1832 ; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
1843 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
1854 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
1858 ; EG-NEXT: 14(1.961818e-44), 0(0.000000e+00)
1865 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1876 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
1879 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1882 ; GFX12: ; %bb.0:
1883 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1884 ; GFX12-NEXT: s_wait_kmcnt 0x0
1885 ; GFX12-NEXT: s_load_u16 s2, s[2:3], 0x0
1886 ; GFX12-NEXT: s_wait_kmcnt 0x0
1887 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
1888 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
1889 ; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
1890 ; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
1891 ; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
1892 ; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
1893 ; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005
1894 ; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004
1895 ; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b
1896 ; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a
1897 ; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009
1898 ; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008
1899 ; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f
1900 ; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e
1901 ; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000c
1902 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1000d
1904 ; GFX12-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v1, s2
1913 ; GFX12-NEXT: s_clause 0x3
1914 ; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
1915 ; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
1916 ; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
1917 ; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
1925 define amdgpu_kernel void @constant_zextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
1927 ; GFX6: ; %bb.0:
1928 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1929 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1930 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
1931 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
1933 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1934 ; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10003
1935 ; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10001
1936 ; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10007
1937 ; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10005
1938 ; GFX6-NEXT: s_bfe_u32 s9, s4, 0x1000b
1939 ; GFX6-NEXT: s_bfe_u32 s10, s4, 0x10009
1940 ; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000f
1941 ; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000d
1942 ; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10013
1943 ; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10011
1944 ; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10017
1945 ; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10015
1946 ; GFX6-NEXT: s_bfe_u32 s17, s4, 0x1001b
1947 ; GFX6-NEXT: s_bfe_u32 s18, s4, 0x10019
1949 ; GFX6-NEXT: s_bfe_u32 s20, s4, 0x1001d
1951 ; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002
1952 ; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10006
1953 ; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10004
1954 ; GFX6-NEXT: s_bfe_u32 s25, s4, 0x1000a
1955 ; GFX6-NEXT: s_bfe_u32 s26, s4, 0x10008
1956 ; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000e
1957 ; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000c
1958 ; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10012
1959 ; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10010
1960 ; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10016
1961 ; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10014
1962 ; GFX6-NEXT: s_bfe_u32 s34, s4, 0x1001a
1963 ; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001e
1964 ; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001c
1965 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x10018
1970 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
1971 ; GFX6-NEXT: s_waitcnt expcnt(0)
1976 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
1977 ; GFX6-NEXT: s_waitcnt expcnt(0)
1982 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
1983 ; GFX6-NEXT: s_waitcnt expcnt(0)
1988 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
1989 ; GFX6-NEXT: s_waitcnt expcnt(0)
1994 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
1995 ; GFX6-NEXT: s_waitcnt expcnt(0)
2000 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2001 ; GFX6-NEXT: s_waitcnt expcnt(0)
2006 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2007 ; GFX6-NEXT: s_waitcnt expcnt(0)
2012 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2016 ; GFX8: ; %bb.0:
2017 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
2018 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2019 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
2020 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2021 ; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10003
2022 ; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10001
2023 ; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10007
2024 ; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10005
2025 ; GFX8-NEXT: s_bfe_u32 s8, s2, 0x1000b
2026 ; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10009
2027 ; GFX8-NEXT: s_bfe_u32 s10, s2, 0x1000f
2028 ; GFX8-NEXT: s_bfe_u32 s11, s2, 0x1000d
2029 ; GFX8-NEXT: s_bfe_u32 s12, s2, 0x10013
2030 ; GFX8-NEXT: s_bfe_u32 s13, s2, 0x10011
2031 ; GFX8-NEXT: s_bfe_u32 s14, s2, 0x10017
2032 ; GFX8-NEXT: s_bfe_u32 s15, s2, 0x1001b
2033 ; GFX8-NEXT: s_bfe_u32 s16, s2, 0x10019
2035 ; GFX8-NEXT: s_bfe_u32 s17, s2, 0x1001d
2037 ; GFX8-NEXT: s_bfe_u32 s19, s2, 0x10002
2038 ; GFX8-NEXT: s_bfe_u32 s20, s2, 0x10006
2039 ; GFX8-NEXT: s_bfe_u32 s21, s2, 0x10004
2040 ; GFX8-NEXT: s_bfe_u32 s22, s2, 0x1000a
2041 ; GFX8-NEXT: s_bfe_u32 s23, s2, 0x10008
2042 ; GFX8-NEXT: s_bfe_u32 s24, s2, 0x1000e
2043 ; GFX8-NEXT: s_bfe_u32 s25, s2, 0x1000c
2044 ; GFX8-NEXT: s_bfe_u32 s26, s2, 0x10012
2045 ; GFX8-NEXT: s_bfe_u32 s27, s2, 0x10010
2046 ; GFX8-NEXT: s_bfe_u32 s28, s2, 0x10016
2047 ; GFX8-NEXT: s_bfe_u32 s29, s2, 0x10015
2048 ; GFX8-NEXT: s_bfe_u32 s30, s2, 0x10014
2049 ; GFX8-NEXT: s_bfe_u32 s31, s2, 0x1001a
2050 ; GFX8-NEXT: s_bfe_u32 s33, s2, 0x10018
2051 ; GFX8-NEXT: s_bfe_u32 s34, s2, 0x1001e
2052 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1001c
2054 ; GFX8-NEXT: s_add_u32 s2, s0, 0x70
2056 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2059 ; GFX8-NEXT: s_add_u32 s2, s0, 0x60
2062 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2063 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2066 ; GFX8-NEXT: s_add_u32 s2, s0, 0x50
2071 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2072 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2080 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2081 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2089 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2090 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2098 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2099 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2107 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2108 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2115 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2122 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2126 ; EG: ; %bb.0:
2127 ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
2128 ; EG-NEXT: TEX 0 @12
2129 ; EG-NEXT: ALU 76, @15, KC0[CB0:0-32], KC1[]
2130 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T26.X, 0
2131 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T25.X, 0
2132 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T24.X, 0
2133 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T22.X, 0
2134 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T20.X, 0
2135 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
2136 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T16.X, 0
2140 ; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
2145 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
2147 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2150 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
2162 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2167 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
2172 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
2177 ; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
2182 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
2187 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
2192 ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
2197 ; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
2202 ; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00)
2207 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
2212 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
2220 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2223 ; GFX12: ; %bb.0:
2224 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
2225 ; GFX12-NEXT: s_wait_kmcnt 0x0
2226 ; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
2227 ; GFX12-NEXT: s_wait_kmcnt 0x0
2228 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
2229 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10001
2230 ; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10007
2231 ; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10005
2232 ; GFX12-NEXT: s_bfe_u32 s7, s2, 0x1000b
2233 ; GFX12-NEXT: s_bfe_u32 s8, s2, 0x10009
2234 ; GFX12-NEXT: s_bfe_u32 s9, s2, 0x1000f
2235 ; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000d
2236 ; GFX12-NEXT: s_bfe_u32 s11, s2, 0x10013
2237 ; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10011
2238 ; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10017
2239 ; GFX12-NEXT: s_bfe_u32 s14, s2, 0x1001b
2240 ; GFX12-NEXT: s_bfe_u32 s15, s2, 0x10019
2242 ; GFX12-NEXT: s_bfe_u32 s17, s2, 0x1001d
2244 ; GFX12-NEXT: s_bfe_u32 s19, s2, 0x10002
2245 ; GFX12-NEXT: s_bfe_u32 s20, s2, 0x10006
2246 ; GFX12-NEXT: s_bfe_u32 s21, s2, 0x10004
2247 ; GFX12-NEXT: s_bfe_u32 s22, s2, 0x1000a
2248 ; GFX12-NEXT: s_bfe_u32 s23, s2, 0x10008
2249 ; GFX12-NEXT: s_bfe_u32 s24, s2, 0x1000e
2250 ; GFX12-NEXT: s_bfe_u32 s25, s2, 0x1000c
2251 ; GFX12-NEXT: s_bfe_u32 s26, s2, 0x10012
2252 ; GFX12-NEXT: s_bfe_u32 s27, s2, 0x10010
2253 ; GFX12-NEXT: s_bfe_u32 s28, s2, 0x10016
2254 ; GFX12-NEXT: s_bfe_u32 s29, s2, 0x10015
2255 ; GFX12-NEXT: s_bfe_u32 s30, s2, 0x10014
2256 ; GFX12-NEXT: s_bfe_u32 s31, s2, 0x1001a
2257 ; GFX12-NEXT: s_bfe_u32 s33, s2, 0x10018
2258 ; GFX12-NEXT: s_bfe_u32 s34, s2, 0x1001c
2259 ; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001e
2260 ; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s17
2267 ; GFX12-NEXT: s_clause 0x1
2268 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112
2269 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96
2280 ; GFX12-NEXT: s_clause 0x5
2281 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
2282 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
2283 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48
2284 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
2285 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
2286 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
2294 define amdgpu_kernel void @constant_sextload_v32i1_to_v32i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
2296 ; GFX6: ; %bb.0:
2297 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
2298 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2299 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
2300 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
2302 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2303 ; GFX6-NEXT: s_bfe_i32 s5, s4, 0x10003
2304 ; GFX6-NEXT: s_bfe_i32 s6, s4, 0x10002
2305 ; GFX6-NEXT: s_bfe_i32 s7, s4, 0x10001
2306 ; GFX6-NEXT: s_bfe_i32 s8, s4, 0x10000
2307 ; GFX6-NEXT: s_bfe_i32 s9, s4, 0x10007
2308 ; GFX6-NEXT: s_bfe_i32 s10, s4, 0x10006
2309 ; GFX6-NEXT: s_bfe_i32 s11, s4, 0x10005
2310 ; GFX6-NEXT: s_bfe_i32 s12, s4, 0x10004
2311 ; GFX6-NEXT: s_bfe_i32 s13, s4, 0x1000b
2312 ; GFX6-NEXT: s_bfe_i32 s14, s4, 0x1000a
2313 ; GFX6-NEXT: s_bfe_i32 s15, s4, 0x10009
2314 ; GFX6-NEXT: s_bfe_i32 s16, s4, 0x10008
2315 ; GFX6-NEXT: s_bfe_i32 s17, s4, 0x1000f
2316 ; GFX6-NEXT: s_bfe_i32 s18, s4, 0x1000e
2317 ; GFX6-NEXT: s_bfe_i32 s19, s4, 0x1000d
2318 ; GFX6-NEXT: s_bfe_i32 s20, s4, 0x1000c
2319 ; GFX6-NEXT: s_bfe_i32 s21, s4, 0x10013
2320 ; GFX6-NEXT: s_bfe_i32 s22, s4, 0x10012
2321 ; GFX6-NEXT: s_bfe_i32 s23, s4, 0x10011
2322 ; GFX6-NEXT: s_bfe_i32 s24, s4, 0x10010
2323 ; GFX6-NEXT: s_bfe_i32 s25, s4, 0x10017
2324 ; GFX6-NEXT: s_bfe_i32 s26, s4, 0x10016
2325 ; GFX6-NEXT: s_bfe_i32 s27, s4, 0x10015
2326 ; GFX6-NEXT: s_bfe_i32 s28, s4, 0x10014
2327 ; GFX6-NEXT: s_bfe_i32 s29, s4, 0x1001b
2328 ; GFX6-NEXT: s_bfe_i32 s30, s4, 0x1001a
2329 ; GFX6-NEXT: s_bfe_i32 s31, s4, 0x10019
2331 ; GFX6-NEXT: s_bfe_i32 s34, s4, 0x1001e
2332 ; GFX6-NEXT: s_bfe_i32 s35, s4, 0x1001d
2333 ; GFX6-NEXT: s_bfe_i32 s36, s4, 0x1001c
2334 ; GFX6-NEXT: s_bfe_i32 s4, s4, 0x10018
2339 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2340 ; GFX6-NEXT: s_waitcnt expcnt(0)
2345 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2346 ; GFX6-NEXT: s_waitcnt expcnt(0)
2351 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2352 ; GFX6-NEXT: s_waitcnt expcnt(0)
2357 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2358 ; GFX6-NEXT: s_waitcnt expcnt(0)
2363 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2364 ; GFX6-NEXT: s_waitcnt expcnt(0)
2369 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2370 ; GFX6-NEXT: s_waitcnt expcnt(0)
2375 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2376 ; GFX6-NEXT: s_waitcnt expcnt(0)
2381 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2385 ; GFX8: ; %bb.0:
2386 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
2387 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2388 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
2389 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2390 ; GFX8-NEXT: s_bfe_i32 s4, s2, 0x10003
2391 ; GFX8-NEXT: s_bfe_i32 s5, s2, 0x10002
2392 ; GFX8-NEXT: s_bfe_i32 s6, s2, 0x10001
2393 ; GFX8-NEXT: s_bfe_i32 s7, s2, 0x10000
2394 ; GFX8-NEXT: s_bfe_i32 s8, s2, 0x10007
2395 ; GFX8-NEXT: s_bfe_i32 s9, s2, 0x10006
2396 ; GFX8-NEXT: s_bfe_i32 s10, s2, 0x10005
2397 ; GFX8-NEXT: s_bfe_i32 s11, s2, 0x10004
2398 ; GFX8-NEXT: s_bfe_i32 s12, s2, 0x1000b
2399 ; GFX8-NEXT: s_bfe_i32 s13, s2, 0x1000a
2400 ; GFX8-NEXT: s_bfe_i32 s14, s2, 0x10009
2401 ; GFX8-NEXT: s_bfe_i32 s15, s2, 0x10008
2402 ; GFX8-NEXT: s_bfe_i32 s16, s2, 0x1000f
2403 ; GFX8-NEXT: s_bfe_i32 s17, s2, 0x1000e
2404 ; GFX8-NEXT: s_bfe_i32 s18, s2, 0x1000d
2405 ; GFX8-NEXT: s_bfe_i32 s19, s2, 0x1000c
2406 ; GFX8-NEXT: s_bfe_i32 s20, s2, 0x10013
2407 ; GFX8-NEXT: s_bfe_i32 s21, s2, 0x10012
2408 ; GFX8-NEXT: s_bfe_i32 s22, s2, 0x10011
2409 ; GFX8-NEXT: s_bfe_i32 s23, s2, 0x10010
2410 ; GFX8-NEXT: s_bfe_i32 s24, s2, 0x10017
2411 ; GFX8-NEXT: s_bfe_i32 s25, s2, 0x10016
2412 ; GFX8-NEXT: s_bfe_i32 s26, s2, 0x10015
2413 ; GFX8-NEXT: s_bfe_i32 s27, s2, 0x10014
2414 ; GFX8-NEXT: s_bfe_i32 s28, s2, 0x1001b
2415 ; GFX8-NEXT: s_bfe_i32 s29, s2, 0x1001a
2416 ; GFX8-NEXT: s_bfe_i32 s30, s2, 0x10019
2417 ; GFX8-NEXT: s_bfe_i32 s31, s2, 0x10018
2419 ; GFX8-NEXT: s_bfe_i32 s33, s2, 0x1001e
2420 ; GFX8-NEXT: s_bfe_i32 s34, s2, 0x1001d
2421 ; GFX8-NEXT: s_bfe_i32 s2, s2, 0x1001c
2423 ; GFX8-NEXT: s_add_u32 s2, s0, 0x70
2425 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2428 ; GFX8-NEXT: s_add_u32 s2, s0, 0x60
2431 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2432 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2435 ; GFX8-NEXT: s_add_u32 s2, s0, 0x50
2440 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2441 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2449 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2450 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2458 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2459 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2467 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2468 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2476 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
2477 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2484 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2491 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2495 ; EG: ; %bb.0:
2496 ; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[]
2497 ; EG-NEXT: TEX 0 @14
2498 ; EG-NEXT: ALU 99, @17, KC0[CB0:0-32], KC1[]
2499 ; EG-NEXT: ALU 5, @117, KC0[CB0:0-32], KC1[]
2500 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T11.X, 0
2501 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T26.X, 0
2502 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
2503 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T21.X, 0
2504 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T19.X, 0
2505 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T17.X, 0
2506 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T15.X, 0
2511 ; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
2516 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
2519 ; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
2530 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
2537 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2544 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
2551 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
2558 ; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
2565 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
2572 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
2579 ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
2586 ; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
2606 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
2618 ; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
2621 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2624 ; GFX12: ; %bb.0:
2625 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
2626 ; GFX12-NEXT: s_wait_kmcnt 0x0
2627 ; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
2628 ; GFX12-NEXT: s_wait_kmcnt 0x0
2629 ; GFX12-NEXT: s_bfe_i32 s3, s2, 0x10003
2630 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10002
2631 ; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10001
2632 ; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10000
2633 ; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10007
2634 ; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10006
2635 ; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10005
2636 ; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10004
2637 ; GFX12-NEXT: s_bfe_i32 s11, s2, 0x1000b
2638 ; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000a
2639 ; GFX12-NEXT: s_bfe_i32 s13, s2, 0x10009
2640 ; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10008
2641 ; GFX12-NEXT: s_bfe_i32 s15, s2, 0x1000f
2642 ; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000e
2643 ; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000d
2644 ; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000c
2645 ; GFX12-NEXT: s_bfe_i32 s19, s2, 0x10013
2646 ; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10012
2647 ; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10011
2648 ; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10010
2649 ; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10017
2650 ; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10016
2651 ; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10015
2652 ; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10014
2653 ; GFX12-NEXT: s_bfe_i32 s27, s2, 0x1001b
2654 ; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001a
2655 ; GFX12-NEXT: s_bfe_i32 s29, s2, 0x10019
2656 ; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10018
2658 ; GFX12-NEXT: s_bfe_i32 s33, s2, 0x1001e
2659 ; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001c
2660 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001d
2662 ; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s2
2669 ; GFX12-NEXT: s_clause 0x1
2670 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:112
2671 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:96
2682 ; GFX12-NEXT: s_clause 0x5
2683 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:80
2684 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:64
2685 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:48
2686 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
2687 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
2688 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
2696 define amdgpu_kernel void @constant_zextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
2698 ; GFX6: ; %bb.0:
2699 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
2700 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2701 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
2702 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2703 ; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003
2704 ; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10001
2705 ; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10007
2706 ; GFX6-NEXT: s_bfe_u32 s7, s2, 0x10005
2707 ; GFX6-NEXT: s_bfe_u32 s8, s2, 0x1000b
2708 ; GFX6-NEXT: s_bfe_u32 s9, s2, 0x10009
2709 ; GFX6-NEXT: s_bfe_u32 s10, s2, 0x1000f
2710 ; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000d
2711 ; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10013
2712 ; GFX6-NEXT: s_bfe_u32 s15, s2, 0x10011
2713 ; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10017
2714 ; GFX6-NEXT: s_bfe_u32 s17, s2, 0x10015
2715 ; GFX6-NEXT: s_bfe_u32 s18, s2, 0x1001b
2716 ; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10019
2718 ; GFX6-NEXT: s_bfe_u32 s21, s2, 0x1001d
2719 ; GFX6-NEXT: s_bfe_u32 s22, s3, 0x10003
2720 ; GFX6-NEXT: s_bfe_u32 s23, s3, 0x10001
2721 ; GFX6-NEXT: s_bfe_u32 s24, s3, 0x10007
2722 ; GFX6-NEXT: s_bfe_u32 s25, s3, 0x10005
2723 ; GFX6-NEXT: s_bfe_u32 s26, s3, 0x1000b
2724 ; GFX6-NEXT: s_bfe_u32 s27, s3, 0x10009
2725 ; GFX6-NEXT: s_bfe_u32 s28, s3, 0x1000f
2726 ; GFX6-NEXT: s_bfe_u32 s29, s3, 0x1000d
2727 ; GFX6-NEXT: s_bfe_u32 s30, s3, 0x10013
2728 ; GFX6-NEXT: s_bfe_u32 s31, s3, 0x10011
2729 ; GFX6-NEXT: s_bfe_u32 s33, s3, 0x10017
2730 ; GFX6-NEXT: s_bfe_u32 s34, s3, 0x10015
2731 ; GFX6-NEXT: s_bfe_u32 s35, s3, 0x1001b
2732 ; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10019
2734 ; GFX6-NEXT: s_bfe_u32 s38, s3, 0x1001d
2736 ; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10002
2737 ; GFX6-NEXT: s_bfe_u32 s39, s2, 0x10006
2738 ; GFX6-NEXT: s_bfe_u32 s40, s2, 0x10004
2739 ; GFX6-NEXT: s_bfe_u32 s41, s2, 0x1000a
2740 ; GFX6-NEXT: s_bfe_u32 s42, s2, 0x10008
2741 ; GFX6-NEXT: s_bfe_u32 s43, s2, 0x1000e
2742 ; GFX6-NEXT: s_bfe_u32 s44, s2, 0x1000c
2743 ; GFX6-NEXT: s_bfe_u32 s45, s2, 0x10012
2744 ; GFX6-NEXT: s_bfe_u32 s46, s2, 0x10010
2745 ; GFX6-NEXT: s_bfe_u32 s47, s2, 0x10016
2746 ; GFX6-NEXT: s_bfe_u32 s48, s2, 0x10014
2747 ; GFX6-NEXT: s_bfe_u32 s49, s2, 0x1001a
2748 ; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018
2749 ; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001e
2750 ; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c
2752 ; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002
2753 ; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10006
2754 ; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10004
2755 ; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008
2756 ; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000e
2757 ; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c
2758 ; GFX6-NEXT: s_bfe_u32 s60, s3, 0x10012
2759 ; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010
2760 ; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10016
2761 ; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014
2762 ; GFX6-NEXT: s_bfe_u32 s64, s3, 0x1001a
2763 ; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018
2764 ; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001e
2765 ; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001c
2766 ; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1000a
2767 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
2788 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
2789 ; GFX6-NEXT: s_waitcnt expcnt(0)
2795 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
2796 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
2797 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
2798 ; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
2799 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
2800 ; GFX6-NEXT: s_waitcnt expcnt(0)
2805 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
2806 ; GFX6-NEXT: s_waitcnt expcnt(0)
2811 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
2812 ; GFX6-NEXT: s_waitcnt expcnt(0)
2817 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
2818 ; GFX6-NEXT: s_waitcnt expcnt(0)
2823 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
2824 ; GFX6-NEXT: s_waitcnt expcnt(0)
2829 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
2830 ; GFX6-NEXT: s_waitcnt expcnt(0)
2835 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
2836 ; GFX6-NEXT: s_waitcnt expcnt(0)
2841 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
2842 ; GFX6-NEXT: s_waitcnt expcnt(0)
2847 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
2848 ; GFX6-NEXT: s_waitcnt expcnt(0)
2853 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
2854 ; GFX6-NEXT: s_waitcnt expcnt(0)
2859 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
2863 ; GFX8: ; %bb.0:
2864 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
2865 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2866 ; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0
2867 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2868 ; GFX8-NEXT: s_bfe_u32 s2, s26, 0x10003
2869 ; GFX8-NEXT: s_bfe_u32 s3, s26, 0x10001
2870 ; GFX8-NEXT: s_bfe_u32 s4, s26, 0x10007
2871 ; GFX8-NEXT: s_bfe_u32 s5, s26, 0x10005
2872 ; GFX8-NEXT: s_bfe_u32 s6, s26, 0x1000b
2873 ; GFX8-NEXT: s_bfe_u32 s9, s26, 0x10009
2874 ; GFX8-NEXT: s_bfe_u32 s11, s26, 0x1000f
2875 ; GFX8-NEXT: s_bfe_u32 s13, s26, 0x1000d
2876 ; GFX8-NEXT: s_bfe_u32 s15, s26, 0x10013
2877 ; GFX8-NEXT: s_bfe_u32 s17, s26, 0x10011
2878 ; GFX8-NEXT: s_bfe_u32 s19, s26, 0x10017
2879 ; GFX8-NEXT: s_bfe_u32 s21, s26, 0x1001b
2880 ; GFX8-NEXT: s_bfe_u32 s23, s26, 0x10019
2882 ; GFX8-NEXT: s_bfe_u32 s28, s26, 0x1001d
2883 ; GFX8-NEXT: s_bfe_u32 s29, s27, 0x10003
2884 ; GFX8-NEXT: s_bfe_u32 s30, s27, 0x10001
2885 ; GFX8-NEXT: s_bfe_u32 s31, s27, 0x10007
2886 ; GFX8-NEXT: s_bfe_u32 s33, s27, 0x10005
2887 ; GFX8-NEXT: s_bfe_u32 s34, s27, 0x1000b
2888 ; GFX8-NEXT: s_bfe_u32 s35, s27, 0x10009
2889 ; GFX8-NEXT: s_bfe_u32 s36, s27, 0x1000f
2890 ; GFX8-NEXT: s_bfe_u32 s37, s27, 0x1000d
2891 ; GFX8-NEXT: s_bfe_u32 s38, s27, 0x10013
2892 ; GFX8-NEXT: s_bfe_u32 s39, s27, 0x10011
2893 ; GFX8-NEXT: s_bfe_u32 s40, s27, 0x10017
2894 ; GFX8-NEXT: s_bfe_u32 s41, s27, 0x1001b
2895 ; GFX8-NEXT: s_bfe_u32 s42, s27, 0x10019
2897 ; GFX8-NEXT: s_bfe_u32 s44, s27, 0x1001d
2899 ; GFX8-NEXT: s_bfe_u32 s7, s26, 0x10002
2900 ; GFX8-NEXT: s_bfe_u32 s10, s26, 0x10006
2901 ; GFX8-NEXT: s_bfe_u32 s12, s26, 0x10004
2902 ; GFX8-NEXT: s_bfe_u32 s14, s26, 0x1000a
2903 ; GFX8-NEXT: s_bfe_u32 s16, s26, 0x10008
2904 ; GFX8-NEXT: s_bfe_u32 s18, s26, 0x1000e
2905 ; GFX8-NEXT: s_bfe_u32 s20, s26, 0x1000c
2906 ; GFX8-NEXT: s_bfe_u32 s22, s26, 0x10012
2907 ; GFX8-NEXT: s_bfe_u32 s24, s26, 0x10010
2908 ; GFX8-NEXT: s_bfe_u32 s45, s26, 0x10016
2909 ; GFX8-NEXT: s_bfe_u32 s46, s26, 0x10015
2910 ; GFX8-NEXT: s_bfe_u32 s47, s26, 0x10014
2911 ; GFX8-NEXT: s_bfe_u32 s48, s26, 0x1001a
2912 ; GFX8-NEXT: s_bfe_u32 s49, s26, 0x10018
2913 ; GFX8-NEXT: s_bfe_u32 s50, s26, 0x1001e
2914 ; GFX8-NEXT: s_bfe_u32 s51, s26, 0x1001c
2916 ; GFX8-NEXT: s_bfe_u32 s53, s27, 0x10002
2917 ; GFX8-NEXT: s_bfe_u32 s54, s27, 0x10006
2918 ; GFX8-NEXT: s_bfe_u32 s55, s27, 0x10004
2919 ; GFX8-NEXT: s_bfe_u32 s56, s27, 0x1000a
2920 ; GFX8-NEXT: s_bfe_u32 s57, s27, 0x10008
2921 ; GFX8-NEXT: s_bfe_u32 s58, s27, 0x1000e
2922 ; GFX8-NEXT: s_bfe_u32 s59, s27, 0x1000c
2923 ; GFX8-NEXT: s_bfe_u32 s60, s27, 0x10012
2924 ; GFX8-NEXT: s_bfe_u32 s61, s27, 0x10010
2925 ; GFX8-NEXT: s_bfe_u32 s62, s27, 0x10016
2926 ; GFX8-NEXT: s_bfe_u32 s63, s27, 0x10015
2927 ; GFX8-NEXT: s_bfe_u32 s64, s27, 0x10014
2928 ; GFX8-NEXT: s_bfe_u32 s65, s27, 0x1001a
2929 ; GFX8-NEXT: s_bfe_u32 s66, s27, 0x10018
2930 ; GFX8-NEXT: s_bfe_u32 s26, s27, 0x1001e
2931 ; GFX8-NEXT: s_bfe_u32 s27, s27, 0x1001c
2933 ; GFX8-NEXT: s_add_u32 s26, s0, 0xf0
2935 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2940 ; GFX8-NEXT: s_add_u32 s26, s0, 0xe0
2941 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2942 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2949 ; GFX8-NEXT: s_add_u32 s26, s0, 0xd0
2950 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2951 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2958 ; GFX8-NEXT: s_add_u32 s26, s0, 0xc0
2959 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2960 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2967 ; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
2968 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2969 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2976 ; GFX8-NEXT: s_add_u32 s26, s0, 0xa0
2977 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2978 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2985 ; GFX8-NEXT: s_add_u32 s26, s0, 0x90
2986 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2987 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
2994 ; GFX8-NEXT: s_add_u32 s26, s0, 0x80
2995 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2996 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3003 ; GFX8-NEXT: s_add_u32 s26, s0, 0x70
3004 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3005 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3012 ; GFX8-NEXT: s_add_u32 s26, s0, 0x60
3013 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3014 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3021 ; GFX8-NEXT: s_add_u32 s26, s0, 0x50
3022 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3023 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3030 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3031 ; GFX8-NEXT: s_nop 0
3034 ; GFX8-NEXT: s_addc_u32 s23, s1, 0
3040 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3041 ; GFX8-NEXT: s_nop 0
3044 ; GFX8-NEXT: s_addc_u32 s19, s1, 0
3050 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3051 ; GFX8-NEXT: s_nop 0
3054 ; GFX8-NEXT: s_addc_u32 s15, s1, 0
3060 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3061 ; GFX8-NEXT: s_nop 0
3065 ; GFX8-NEXT: s_addc_u32 s5, s1, 0
3070 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3077 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3081 ; EG: ; %bb.0:
3082 ; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
3083 ; EG-NEXT: TEX 0 @22
3084 ; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[]
3085 ; EG-NEXT: ALU 57, @122, KC0[CB0:0-32], KC1[]
3086 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
3087 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T49.X, 0
3088 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T47.X, 0
3089 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T45.X, 0
3090 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T43.X, 0
3091 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T41.X, 0
3092 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T39.X, 0
3093 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T37.X, 0
3094 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T21.X, 0
3095 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T34.X, 0
3096 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T32.X, 0
3097 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T30.X, 0
3098 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T28.X, 0
3099 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T26.X, 0
3100 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T24.X, 0
3105 ; EG-NEXT: VTX_READ_64 T21.XY, T19.X, 0, #1
3110 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
3112 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
3115 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
3127 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3132 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
3137 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
3142 ; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
3147 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
3152 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
3157 ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
3162 ; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
3167 ; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00)
3172 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
3177 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
3182 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
3187 ; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
3200 ; EG-NEXT: 11(1.541428e-44), 0(0.000000e+00)
3205 ; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00)
3211 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
3216 ; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00)
3221 ; EG-NEXT: 19(2.662467e-44), 0(0.000000e+00)
3226 ; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00)
3231 ; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00)
3236 ; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
3241 ; EG-NEXT: 27(3.783506e-44), 0(0.000000e+00)
3246 ; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00)
3251 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
3256 ; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
3264 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
3267 ; GFX12: ; %bb.0:
3268 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
3269 ; GFX12-NEXT: s_wait_kmcnt 0x0
3270 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
3271 ; GFX12-NEXT: s_wait_kmcnt 0x0
3273 ; GFX12-NEXT: s_bfe_u32 s34, s3, 0x1001d
3274 ; GFX12-NEXT: s_bfe_u32 s65, s3, 0x1001c
3275 ; GFX12-NEXT: s_bfe_u32 s66, s3, 0x1001e
3276 ; GFX12-NEXT: s_bfe_u32 s30, s3, 0x1001b
3277 ; GFX12-NEXT: s_bfe_u32 s31, s3, 0x10019
3278 ; GFX12-NEXT: s_bfe_u32 s63, s3, 0x1001a
3279 ; GFX12-NEXT: s_bfe_u32 s64, s3, 0x10018
3280 ; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s34
3281 ; GFX12-NEXT: s_bfe_u32 s29, s3, 0x10017
3282 ; GFX12-NEXT: s_bfe_u32 s60, s3, 0x10016
3283 ; GFX12-NEXT: s_bfe_u32 s61, s3, 0x10015
3284 ; GFX12-NEXT: s_bfe_u32 s62, s3, 0x10014
3287 ; GFX12-NEXT: s_bfe_u32 s27, s3, 0x10013
3288 ; GFX12-NEXT: s_bfe_u32 s28, s3, 0x10011
3289 ; GFX12-NEXT: s_bfe_u32 s58, s3, 0x10012
3290 ; GFX12-NEXT: s_bfe_u32 s59, s3, 0x10010
3295 ; GFX12-NEXT: s_bfe_u32 s19, s3, 0x10003
3296 ; GFX12-NEXT: s_bfe_u32 s20, s3, 0x10001
3297 ; GFX12-NEXT: s_bfe_u32 s21, s3, 0x10007
3298 ; GFX12-NEXT: s_bfe_u32 s22, s3, 0x10005
3299 ; GFX12-NEXT: s_bfe_u32 s23, s3, 0x1000b
3300 ; GFX12-NEXT: s_bfe_u32 s24, s3, 0x10009
3301 ; GFX12-NEXT: s_bfe_u32 s25, s3, 0x1000f
3302 ; GFX12-NEXT: s_bfe_u32 s26, s3, 0x1000d
3304 ; GFX12-NEXT: s_bfe_u32 s52, s3, 0x10002
3305 ; GFX12-NEXT: s_bfe_u32 s53, s3, 0x10006
3306 ; GFX12-NEXT: s_bfe_u32 s54, s3, 0x10004
3307 ; GFX12-NEXT: s_bfe_u32 s55, s3, 0x1000a
3308 ; GFX12-NEXT: s_bfe_u32 s56, s3, 0x10008
3309 ; GFX12-NEXT: s_bfe_u32 s57, s3, 0x1000e
3312 ; GFX12-NEXT: s_bfe_u32 s3, s3, 0x1000c
3313 ; GFX12-NEXT: s_clause 0x3
3314 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
3315 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
3316 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
3317 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
3323 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003
3324 ; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001
3325 ; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10007
3326 ; GFX12-NEXT: s_bfe_u32 s7, s2, 0x10005
3327 ; GFX12-NEXT: s_bfe_u32 s8, s2, 0x1000b
3328 ; GFX12-NEXT: s_bfe_u32 s9, s2, 0x10009
3329 ; GFX12-NEXT: s_bfe_u32 s10, s2, 0x1000f
3330 ; GFX12-NEXT: s_bfe_u32 s11, s2, 0x1000d
3331 ; GFX12-NEXT: s_bfe_u32 s12, s2, 0x10013
3332 ; GFX12-NEXT: s_bfe_u32 s13, s2, 0x10011
3333 ; GFX12-NEXT: s_bfe_u32 s14, s2, 0x10017
3334 ; GFX12-NEXT: s_bfe_u32 s15, s2, 0x1001b
3335 ; GFX12-NEXT: s_bfe_u32 s16, s2, 0x10019
3337 ; GFX12-NEXT: s_bfe_u32 s18, s2, 0x1001d
3339 ; GFX12-NEXT: s_bfe_u32 s36, s2, 0x10002
3340 ; GFX12-NEXT: s_bfe_u32 s37, s2, 0x10006
3341 ; GFX12-NEXT: s_bfe_u32 s38, s2, 0x10004
3342 ; GFX12-NEXT: s_bfe_u32 s39, s2, 0x1000a
3343 ; GFX12-NEXT: s_bfe_u32 s40, s2, 0x10008
3344 ; GFX12-NEXT: s_bfe_u32 s41, s2, 0x1000e
3345 ; GFX12-NEXT: s_bfe_u32 s42, s2, 0x1000c
3346 ; GFX12-NEXT: s_bfe_u32 s43, s2, 0x10012
3347 ; GFX12-NEXT: s_bfe_u32 s44, s2, 0x10010
3348 ; GFX12-NEXT: s_bfe_u32 s45, s2, 0x10016
3349 ; GFX12-NEXT: s_bfe_u32 s46, s2, 0x10015
3350 ; GFX12-NEXT: s_bfe_u32 s47, s2, 0x10014
3351 ; GFX12-NEXT: s_bfe_u32 s48, s2, 0x1001a
3352 ; GFX12-NEXT: s_bfe_u32 s49, s2, 0x10018
3353 ; GFX12-NEXT: s_bfe_u32 s50, s2, 0x1001e
3354 ; GFX12-NEXT: s_bfe_u32 s2, s2, 0x1001c
3359 ; GFX12-NEXT: s_wait_alu 0xfffe
3364 ; GFX12-NEXT: s_clause 0x5
3365 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
3366 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
3367 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
3368 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
3369 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
3370 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
3383 ; GFX12-NEXT: s_clause 0x5
3384 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
3385 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
3386 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
3387 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
3388 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
3389 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
3397 define amdgpu_kernel void @constant_sextload_v64i1_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
3399 ; GFX6: ; %bb.0:
3400 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
3401 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3402 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
3403 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
3404 ; GFX6-NEXT: s_bfe_i32 s4, s2, 0x10003
3405 ; GFX6-NEXT: s_bfe_i32 s5, s2, 0x10002
3406 ; GFX6-NEXT: s_bfe_i32 s6, s2, 0x10001
3407 ; GFX6-NEXT: s_bfe_i32 s7, s2, 0x10000
3408 ; GFX6-NEXT: s_bfe_i32 s8, s2, 0x10007
3409 ; GFX6-NEXT: s_bfe_i32 s9, s2, 0x10006
3410 ; GFX6-NEXT: s_bfe_i32 s10, s2, 0x10005
3411 ; GFX6-NEXT: s_bfe_i32 s11, s2, 0x10004
3412 ; GFX6-NEXT: s_bfe_i32 s12, s2, 0x1000b
3413 ; GFX6-NEXT: s_bfe_i32 s13, s2, 0x1000a
3414 ; GFX6-NEXT: s_bfe_i32 s14, s2, 0x10009
3415 ; GFX6-NEXT: s_bfe_i32 s15, s2, 0x10008
3416 ; GFX6-NEXT: s_bfe_i32 s16, s2, 0x1000f
3417 ; GFX6-NEXT: s_bfe_i32 s17, s2, 0x1000e
3418 ; GFX6-NEXT: s_bfe_i32 s18, s2, 0x1000d
3419 ; GFX6-NEXT: s_bfe_i32 s19, s2, 0x1000c
3420 ; GFX6-NEXT: s_bfe_i32 s20, s2, 0x10013
3421 ; GFX6-NEXT: s_bfe_i32 s21, s2, 0x10012
3422 ; GFX6-NEXT: s_bfe_i32 s22, s2, 0x10011
3423 ; GFX6-NEXT: s_bfe_i32 s23, s2, 0x10010
3424 ; GFX6-NEXT: s_bfe_i32 s24, s2, 0x10017
3425 ; GFX6-NEXT: s_bfe_i32 s25, s2, 0x10016
3426 ; GFX6-NEXT: s_bfe_i32 s26, s2, 0x10015
3427 ; GFX6-NEXT: s_bfe_i32 s27, s2, 0x10014
3428 ; GFX6-NEXT: s_bfe_i32 s28, s2, 0x1001b
3429 ; GFX6-NEXT: s_bfe_i32 s29, s2, 0x1001a
3430 ; GFX6-NEXT: s_bfe_i32 s30, s2, 0x10019
3431 ; GFX6-NEXT: s_bfe_i32 s31, s2, 0x10018
3433 ; GFX6-NEXT: s_bfe_i32 s34, s2, 0x1001e
3434 ; GFX6-NEXT: s_bfe_i32 s35, s2, 0x1001d
3435 ; GFX6-NEXT: s_bfe_i32 s36, s2, 0x1001c
3436 ; GFX6-NEXT: s_bfe_i32 s37, s3, 0x10003
3437 ; GFX6-NEXT: s_bfe_i32 s38, s3, 0x10002
3438 ; GFX6-NEXT: s_bfe_i32 s39, s3, 0x10001
3439 ; GFX6-NEXT: s_bfe_i32 s40, s3, 0x10000
3440 ; GFX6-NEXT: s_bfe_i32 s41, s3, 0x10007
3441 ; GFX6-NEXT: s_bfe_i32 s42, s3, 0x10006
3442 ; GFX6-NEXT: s_bfe_i32 s43, s3, 0x10005
3443 ; GFX6-NEXT: s_bfe_i32 s44, s3, 0x10004
3444 ; GFX6-NEXT: s_bfe_i32 s45, s3, 0x1000b
3445 ; GFX6-NEXT: s_bfe_i32 s46, s3, 0x1000a
3446 ; GFX6-NEXT: s_bfe_i32 s47, s3, 0x10009
3447 ; GFX6-NEXT: s_bfe_i32 s48, s3, 0x10008
3448 ; GFX6-NEXT: s_bfe_i32 s49, s3, 0x1000e
3449 ; GFX6-NEXT: s_bfe_i32 s50, s3, 0x1000d
3450 ; GFX6-NEXT: s_bfe_i32 s51, s3, 0x1000c
3451 ; GFX6-NEXT: s_bfe_i32 s52, s3, 0x10013
3452 ; GFX6-NEXT: s_bfe_i32 s53, s3, 0x10012
3453 ; GFX6-NEXT: s_bfe_i32 s54, s3, 0x10011
3454 ; GFX6-NEXT: s_bfe_i32 s55, s3, 0x10010
3455 ; GFX6-NEXT: s_bfe_i32 s56, s3, 0x10017
3456 ; GFX6-NEXT: s_bfe_i32 s57, s3, 0x10016
3457 ; GFX6-NEXT: s_bfe_i32 s58, s3, 0x10015
3458 ; GFX6-NEXT: s_bfe_i32 s59, s3, 0x10014
3459 ; GFX6-NEXT: s_bfe_i32 s60, s3, 0x1001b
3460 ; GFX6-NEXT: s_bfe_i32 s61, s3, 0x1001a
3461 ; GFX6-NEXT: s_bfe_i32 s62, s3, 0x10019
3462 ; GFX6-NEXT: s_bfe_i32 s63, s3, 0x10018
3464 ; GFX6-NEXT: s_bfe_i32 s65, s3, 0x1001e
3465 ; GFX6-NEXT: s_bfe_i32 s66, s3, 0x1001d
3466 ; GFX6-NEXT: s_bfe_i32 s67, s3, 0x1001c
3467 ; GFX6-NEXT: s_bfe_i32 s68, s3, 0x1000f
3468 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
3489 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
3490 ; GFX6-NEXT: s_waitcnt expcnt(0)
3496 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224
3497 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208
3498 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192
3499 ; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
3500 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
3501 ; GFX6-NEXT: s_waitcnt expcnt(0)
3506 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
3507 ; GFX6-NEXT: s_waitcnt expcnt(0)
3512 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
3513 ; GFX6-NEXT: s_waitcnt expcnt(0)
3518 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
3519 ; GFX6-NEXT: s_waitcnt expcnt(0)
3524 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
3525 ; GFX6-NEXT: s_waitcnt expcnt(0)
3530 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
3531 ; GFX6-NEXT: s_waitcnt expcnt(0)
3536 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
3537 ; GFX6-NEXT: s_waitcnt expcnt(0)
3542 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
3543 ; GFX6-NEXT: s_waitcnt expcnt(0)
3548 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
3549 ; GFX6-NEXT: s_waitcnt expcnt(0)
3554 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3555 ; GFX6-NEXT: s_waitcnt expcnt(0)
3560 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
3564 ; GFX8: ; %bb.0:
3565 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
3566 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3567 ; GFX8-NEXT: s_load_dwordx2 s[26:27], s[2:3], 0x0
3568 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
3569 ; GFX8-NEXT: s_bfe_i32 s2, s26, 0x10003
3570 ; GFX8-NEXT: s_bfe_i32 s3, s26, 0x10002
3571 ; GFX8-NEXT: s_bfe_i32 s4, s26, 0x10001
3572 ; GFX8-NEXT: s_bfe_i32 s5, s26, 0x10000
3573 ; GFX8-NEXT: s_bfe_i32 s6, s26, 0x10007
3574 ; GFX8-NEXT: s_bfe_i32 s7, s26, 0x10006
3575 ; GFX8-NEXT: s_bfe_i32 s8, s26, 0x10005
3576 ; GFX8-NEXT: s_bfe_i32 s9, s26, 0x10004
3577 ; GFX8-NEXT: s_bfe_i32 s10, s26, 0x1000b
3578 ; GFX8-NEXT: s_bfe_i32 s11, s26, 0x1000a
3579 ; GFX8-NEXT: s_bfe_i32 s12, s26, 0x10009
3580 ; GFX8-NEXT: s_bfe_i32 s13, s26, 0x10008
3581 ; GFX8-NEXT: s_bfe_i32 s14, s26, 0x1000f
3582 ; GFX8-NEXT: s_bfe_i32 s15, s26, 0x1000e
3583 ; GFX8-NEXT: s_bfe_i32 s16, s26, 0x1000d
3584 ; GFX8-NEXT: s_bfe_i32 s17, s26, 0x1000c
3585 ; GFX8-NEXT: s_bfe_i32 s18, s26, 0x10013
3586 ; GFX8-NEXT: s_bfe_i32 s19, s26, 0x10012
3587 ; GFX8-NEXT: s_bfe_i32 s20, s26, 0x10011
3588 ; GFX8-NEXT: s_bfe_i32 s21, s26, 0x10010
3589 ; GFX8-NEXT: s_bfe_i32 s22, s26, 0x10017
3590 ; GFX8-NEXT: s_bfe_i32 s23, s26, 0x10016
3591 ; GFX8-NEXT: s_bfe_i32 s24, s26, 0x10015
3592 ; GFX8-NEXT: s_bfe_i32 s25, s26, 0x10014
3593 ; GFX8-NEXT: s_bfe_i32 s28, s26, 0x1001b
3594 ; GFX8-NEXT: s_bfe_i32 s29, s26, 0x1001a
3595 ; GFX8-NEXT: s_bfe_i32 s30, s26, 0x10019
3596 ; GFX8-NEXT: s_bfe_i32 s31, s26, 0x10018
3598 ; GFX8-NEXT: s_bfe_i32 s34, s26, 0x1001e
3599 ; GFX8-NEXT: s_bfe_i32 s35, s26, 0x1001d
3600 ; GFX8-NEXT: s_bfe_i32 s36, s26, 0x1001c
3601 ; GFX8-NEXT: s_bfe_i32 s37, s27, 0x10003
3602 ; GFX8-NEXT: s_bfe_i32 s38, s27, 0x10002
3603 ; GFX8-NEXT: s_bfe_i32 s39, s27, 0x10001
3604 ; GFX8-NEXT: s_bfe_i32 s40, s27, 0x10000
3605 ; GFX8-NEXT: s_bfe_i32 s41, s27, 0x10007
3606 ; GFX8-NEXT: s_bfe_i32 s42, s27, 0x10006
3607 ; GFX8-NEXT: s_bfe_i32 s43, s27, 0x10005
3608 ; GFX8-NEXT: s_bfe_i32 s44, s27, 0x10004
3609 ; GFX8-NEXT: s_bfe_i32 s45, s27, 0x1000b
3610 ; GFX8-NEXT: s_bfe_i32 s46, s27, 0x1000a
3611 ; GFX8-NEXT: s_bfe_i32 s47, s27, 0x10009
3612 ; GFX8-NEXT: s_bfe_i32 s48, s27, 0x10008
3613 ; GFX8-NEXT: s_bfe_i32 s49, s27, 0x1000f
3614 ; GFX8-NEXT: s_bfe_i32 s50, s27, 0x1000e
3615 ; GFX8-NEXT: s_bfe_i32 s51, s27, 0x1000d
3616 ; GFX8-NEXT: s_bfe_i32 s52, s27, 0x1000c
3617 ; GFX8-NEXT: s_bfe_i32 s53, s27, 0x10013
3618 ; GFX8-NEXT: s_bfe_i32 s54, s27, 0x10012
3619 ; GFX8-NEXT: s_bfe_i32 s55, s27, 0x10011
3620 ; GFX8-NEXT: s_bfe_i32 s56, s27, 0x10010
3621 ; GFX8-NEXT: s_bfe_i32 s57, s27, 0x10017
3622 ; GFX8-NEXT: s_bfe_i32 s58, s27, 0x10016
3623 ; GFX8-NEXT: s_bfe_i32 s59, s27, 0x10015
3624 ; GFX8-NEXT: s_bfe_i32 s60, s27, 0x10014
3625 ; GFX8-NEXT: s_bfe_i32 s61, s27, 0x1001b
3626 ; GFX8-NEXT: s_bfe_i32 s62, s27, 0x1001a
3627 ; GFX8-NEXT: s_bfe_i32 s63, s27, 0x10019
3628 ; GFX8-NEXT: s_bfe_i32 s64, s27, 0x10018
3630 ; GFX8-NEXT: s_bfe_i32 s65, s27, 0x1001e
3631 ; GFX8-NEXT: s_bfe_i32 s66, s27, 0x1001d
3632 ; GFX8-NEXT: s_bfe_i32 s27, s27, 0x1001c
3634 ; GFX8-NEXT: s_add_u32 s26, s0, 0xf0
3636 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3641 ; GFX8-NEXT: s_add_u32 s26, s0, 0xe0
3642 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3643 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3650 ; GFX8-NEXT: s_add_u32 s26, s0, 0xd0
3651 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3652 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3659 ; GFX8-NEXT: s_add_u32 s26, s0, 0xc0
3660 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3661 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3668 ; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
3669 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3670 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3677 ; GFX8-NEXT: s_add_u32 s26, s0, 0xa0
3678 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3679 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3686 ; GFX8-NEXT: s_add_u32 s26, s0, 0x90
3687 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3688 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3695 ; GFX8-NEXT: s_add_u32 s26, s0, 0x80
3696 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3697 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3704 ; GFX8-NEXT: s_add_u32 s26, s0, 0x70
3705 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3706 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3713 ; GFX8-NEXT: s_add_u32 s26, s0, 0x60
3714 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3715 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
3722 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3723 ; GFX8-NEXT: s_nop 0
3725 ; GFX8-NEXT: s_add_u32 s22, s0, 0x50
3727 ; GFX8-NEXT: s_addc_u32 s23, s1, 0
3732 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3733 ; GFX8-NEXT: s_nop 0
3737 ; GFX8-NEXT: s_addc_u32 s19, s1, 0
3742 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3743 ; GFX8-NEXT: s_nop 0
3747 ; GFX8-NEXT: s_addc_u32 s15, s1, 0
3752 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3753 ; GFX8-NEXT: s_nop 0
3757 ; GFX8-NEXT: s_addc_u32 s11, s1, 0
3762 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3763 ; GFX8-NEXT: s_nop 0
3767 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
3772 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3779 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
3783 ; EG: ; %bb.0:
3784 ; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
3785 ; EG-NEXT: TEX 0 @22
3786 ; EG-NEXT: ALU 99, @25, KC0[CB0:0-32], KC1[]
3787 ; EG-NEXT: ALU 98, @125, KC0[CB0:0-32], KC1[]
3788 ; EG-NEXT: ALU 13, @224, KC0[CB0:0-32], KC1[]
3789 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T50.X, 0
3790 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T49.X, 0
3791 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T46.X, 0
3792 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T44.X, 0
3793 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T42.X, 0
3794 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T40.X, 0
3795 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T38.X, 0
3796 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T36.X, 0
3797 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T35.X, 0
3798 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T33.X, 0
3799 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T31.X, 0
3800 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T29.X, 0
3801 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T27.X, 0
3802 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T25.X, 0
3803 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T23.X, 0
3807 ; EG-NEXT: VTX_READ_64 T19.XY, T19.X, 0, #1
3812 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
3815 ; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
3826 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
3833 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3840 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
3847 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
3854 ; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
3861 ; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
3868 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3875 ; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
3882 ; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
3905 ; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00)
3925 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
3932 ; EG-NEXT: 144(2.017870e-43), 0(0.000000e+00)
3939 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
3946 ; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00)
3953 ; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00)
3960 ; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00)
3967 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
3974 ; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
3981 ; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
4010 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4016 ; EG-NEXT: 28(3.923636e-44), 0(0.000000e+00)
4022 ; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00)
4025 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4028 ; GFX12: ; %bb.0:
4029 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4030 ; GFX12-NEXT: s_wait_kmcnt 0x0
4031 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
4032 ; GFX12-NEXT: s_wait_kmcnt 0x0
4034 ; GFX12-NEXT: s_bfe_i32 s64, s3, 0x1001e
4035 ; GFX12-NEXT: s_bfe_i32 s65, s3, 0x1001c
4036 ; GFX12-NEXT: s_bfe_i32 s66, s3, 0x1001d
4037 ; GFX12-NEXT: s_bfe_i32 s59, s3, 0x1001b
4038 ; GFX12-NEXT: s_bfe_i32 s60, s3, 0x1001a
4039 ; GFX12-NEXT: s_bfe_i32 s61, s3, 0x10019
4040 ; GFX12-NEXT: s_bfe_i32 s62, s3, 0x10018
4041 ; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s66
4042 ; GFX12-NEXT: s_bfe_i32 s55, s3, 0x10017
4043 ; GFX12-NEXT: s_bfe_i32 s56, s3, 0x10016
4044 ; GFX12-NEXT: s_bfe_i32 s57, s3, 0x10015
4045 ; GFX12-NEXT: s_bfe_i32 s58, s3, 0x10014
4048 ; GFX12-NEXT: s_bfe_i32 s51, s3, 0x10013
4049 ; GFX12-NEXT: s_bfe_i32 s52, s3, 0x10012
4050 ; GFX12-NEXT: s_bfe_i32 s53, s3, 0x10011
4051 ; GFX12-NEXT: s_bfe_i32 s54, s3, 0x10010
4056 ; GFX12-NEXT: s_bfe_i32 s36, s3, 0x10003
4057 ; GFX12-NEXT: s_bfe_i32 s37, s3, 0x10002
4058 ; GFX12-NEXT: s_bfe_i32 s38, s3, 0x10001
4059 ; GFX12-NEXT: s_bfe_i32 s39, s3, 0x10000
4060 ; GFX12-NEXT: s_bfe_i32 s40, s3, 0x10007
4061 ; GFX12-NEXT: s_bfe_i32 s41, s3, 0x10006
4062 ; GFX12-NEXT: s_bfe_i32 s42, s3, 0x10005
4063 ; GFX12-NEXT: s_bfe_i32 s43, s3, 0x10004
4064 ; GFX12-NEXT: s_bfe_i32 s44, s3, 0x1000b
4065 ; GFX12-NEXT: s_bfe_i32 s45, s3, 0x1000a
4066 ; GFX12-NEXT: s_bfe_i32 s46, s3, 0x10009
4067 ; GFX12-NEXT: s_bfe_i32 s47, s3, 0x10008
4068 ; GFX12-NEXT: s_bfe_i32 s48, s3, 0x1000f
4069 ; GFX12-NEXT: s_bfe_i32 s49, s3, 0x1000e
4070 ; GFX12-NEXT: s_bfe_i32 s50, s3, 0x1000d
4073 ; GFX12-NEXT: s_bfe_i32 s3, s3, 0x1000c
4074 ; GFX12-NEXT: s_clause 0x3
4075 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
4076 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
4077 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
4078 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
4084 ; GFX12-NEXT: s_bfe_i32 s4, s2, 0x10003
4085 ; GFX12-NEXT: s_bfe_i32 s5, s2, 0x10002
4086 ; GFX12-NEXT: s_bfe_i32 s6, s2, 0x10001
4087 ; GFX12-NEXT: s_bfe_i32 s7, s2, 0x10000
4088 ; GFX12-NEXT: s_bfe_i32 s8, s2, 0x10007
4089 ; GFX12-NEXT: s_bfe_i32 s9, s2, 0x10006
4090 ; GFX12-NEXT: s_bfe_i32 s10, s2, 0x10005
4091 ; GFX12-NEXT: s_bfe_i32 s11, s2, 0x10004
4092 ; GFX12-NEXT: s_bfe_i32 s12, s2, 0x1000b
4093 ; GFX12-NEXT: s_bfe_i32 s13, s2, 0x1000a
4094 ; GFX12-NEXT: s_bfe_i32 s14, s2, 0x10009
4095 ; GFX12-NEXT: s_bfe_i32 s15, s2, 0x10008
4096 ; GFX12-NEXT: s_bfe_i32 s16, s2, 0x1000f
4097 ; GFX12-NEXT: s_bfe_i32 s17, s2, 0x1000e
4098 ; GFX12-NEXT: s_bfe_i32 s18, s2, 0x1000d
4099 ; GFX12-NEXT: s_bfe_i32 s19, s2, 0x1000c
4100 ; GFX12-NEXT: s_bfe_i32 s20, s2, 0x10013
4101 ; GFX12-NEXT: s_bfe_i32 s21, s2, 0x10012
4102 ; GFX12-NEXT: s_bfe_i32 s22, s2, 0x10011
4103 ; GFX12-NEXT: s_bfe_i32 s23, s2, 0x10010
4104 ; GFX12-NEXT: s_bfe_i32 s24, s2, 0x10017
4105 ; GFX12-NEXT: s_bfe_i32 s25, s2, 0x10016
4106 ; GFX12-NEXT: s_bfe_i32 s26, s2, 0x10015
4107 ; GFX12-NEXT: s_bfe_i32 s27, s2, 0x10014
4108 ; GFX12-NEXT: s_bfe_i32 s28, s2, 0x1001b
4109 ; GFX12-NEXT: s_bfe_i32 s29, s2, 0x1001a
4110 ; GFX12-NEXT: s_bfe_i32 s30, s2, 0x10019
4111 ; GFX12-NEXT: s_bfe_i32 s31, s2, 0x10018
4113 ; GFX12-NEXT: s_bfe_i32 s34, s2, 0x1001e
4114 ; GFX12-NEXT: s_bfe_i32 s35, s2, 0x1001d
4115 ; GFX12-NEXT: s_bfe_i32 s2, s2, 0x1001c
4120 ; GFX12-NEXT: s_wait_alu 0xfffe
4125 ; GFX12-NEXT: s_clause 0x5
4126 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
4127 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
4128 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
4129 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
4130 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
4131 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
4144 ; GFX12-NEXT: s_clause 0x5
4145 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
4146 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
4147 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
4148 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
4149 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
4150 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
4158 define amdgpu_kernel void @constant_zextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4160 ; GFX6: ; %bb.0:
4161 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4162 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4166 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4169 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4172 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4174 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
4175 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4179 ; GFX8: ; %bb.0:
4180 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4181 ; GFX8-NEXT: v_mov_b32_e32 v3, 0
4182 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4185 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
4188 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4190 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4194 ; EG: ; %bb.0:
4195 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4196 ; EG-NEXT: TEX 0 @6
4197 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
4202 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4208 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4211 ; GFX12: ; %bb.0:
4212 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4213 ; GFX12-NEXT: s_wait_kmcnt 0x0
4214 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
4215 ; GFX12-NEXT: s_wait_kmcnt 0x0
4218 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
4219 ; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1]
4227 define amdgpu_kernel void @constant_sextload_i1_to_i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4229 ; GFX6: ; %bb.0:
4230 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4231 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4235 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4238 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4241 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4242 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
4244 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4248 ; GFX8: ; %bb.0:
4249 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4250 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4253 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
4256 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4257 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
4259 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4263 ; EG: ; %bb.0:
4264 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4265 ; EG-NEXT: TEX 0 @6
4266 ; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
4271 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4277 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4281 ; GFX12: ; %bb.0:
4282 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4283 ; GFX12-NEXT: s_wait_kmcnt 0x0
4284 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
4285 ; GFX12-NEXT: s_wait_kmcnt 0x0
4286 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
4287 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
4289 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
4297 define amdgpu_kernel void @constant_zextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4299 ; GFX6: ; %bb.0:
4300 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4301 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4305 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4308 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4311 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4313 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
4314 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4318 ; GFX8: ; %bb.0:
4319 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4320 ; GFX8-NEXT: v_mov_b32_e32 v3, 0
4321 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4324 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
4327 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4329 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4333 ; EG: ; %bb.0:
4334 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4335 ; EG-NEXT: TEX 0 @6
4336 ; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[]
4341 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4347 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4350 ; GFX12: ; %bb.0:
4351 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4352 ; GFX12-NEXT: s_wait_kmcnt 0x0
4353 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
4354 ; GFX12-NEXT: s_wait_kmcnt 0x0
4357 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
4358 ; GFX12-NEXT: global_store_b64 v1, v[0:1], s[0:1]
4366 define amdgpu_kernel void @constant_sextload_v1i1_to_v1i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4368 ; GFX6: ; %bb.0:
4369 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4370 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4374 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4377 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4380 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4381 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
4383 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4387 ; GFX8: ; %bb.0:
4388 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4389 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4392 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1]
4395 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4396 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
4398 ; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
4402 ; EG: ; %bb.0:
4403 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4404 ; EG-NEXT: TEX 0 @6
4405 ; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[]
4410 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4416 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4420 ; GFX12: ; %bb.0:
4421 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4422 ; GFX12-NEXT: s_wait_kmcnt 0x0
4423 ; GFX12-NEXT: s_load_u8 s2, s[2:3], 0x0
4424 ; GFX12-NEXT: s_wait_kmcnt 0x0
4425 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
4426 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
4428 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
4436 define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4438 ; GFX6: ; %bb.0:
4439 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4440 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4444 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4447 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4448 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
4451 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4455 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4459 ; GFX8: ; %bb.0:
4460 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4462 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4465 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
4466 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
4470 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4473 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
4477 ; EG: ; %bb.0:
4478 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4479 ; EG-NEXT: TEX 0 @6
4480 ; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
4485 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4494 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4497 ; GFX12: ; %bb.0:
4498 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4499 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
4500 ; GFX12-NEXT: s_wait_kmcnt 0x0
4502 ; GFX12-NEXT: s_wait_loadcnt 0x0
4503 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v0
4507 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
4509 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
4510 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
4518 define amdgpu_kernel void @constant_sextload_v2i1_to_v2i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4520 ; GFX6: ; %bb.0:
4521 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4522 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4526 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4529 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4532 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4534 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
4536 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
4538 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4542 ; GFX8: ; %bb.0:
4543 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4544 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4547 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
4550 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4552 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
4553 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
4556 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
4560 ; EG: ; %bb.0:
4561 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4562 ; EG-NEXT: TEX 0 @6
4563 ; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
4568 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4578 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4581 ; GFX12: ; %bb.0:
4582 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4583 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
4584 ; GFX12-NEXT: s_wait_kmcnt 0x0
4586 ; GFX12-NEXT: s_wait_loadcnt 0x0
4588 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
4590 ; GFX12-NEXT: v_bfe_i32 v2, v1, 0, 1
4594 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
4602 define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4604 ; GFX6: ; %bb.0:
4605 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4606 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4610 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4613 ; GFX6-NEXT: buffer_load_ubyte v4, off, s[8:11], 0
4614 ; GFX6-NEXT: v_mov_b32_e32 v5, 0
4619 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4623 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
4624 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4628 ; GFX8: ; %bb.0:
4629 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4631 ; GFX8-NEXT: v_mov_b32_e32 v5, 0
4633 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4636 ; GFX8-NEXT: flat_load_ubyte v4, v[0:1]
4638 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
4644 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4649 ; GFX8-NEXT: flat_store_dwordx4 v[6:7], v[0:3]
4653 ; EG: ; %bb.0:
4654 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4655 ; EG-NEXT: TEX 0 @6
4656 ; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
4657 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
4661 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4669 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4676 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4679 ; GFX12: ; %bb.0:
4680 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4681 ; GFX12-NEXT: v_mov_b32_e32 v5, 0
4682 ; GFX12-NEXT: s_wait_kmcnt 0x0
4684 ; GFX12-NEXT: s_wait_loadcnt 0x0
4685 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v0
4691 ; GFX12-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_and_b32 v2, 0xffff, v2
4693 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
4694 ; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4
4695 ; GFX12-NEXT: s_clause 0x1
4696 ; GFX12-NEXT: global_store_b64 v5, v[4:5], s[0:1] offset:16
4697 ; GFX12-NEXT: global_store_b128 v5, v[0:3], s[0:1]
4705 define amdgpu_kernel void @constant_sextload_v3i1_to_v3i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4707 ; GFX6: ; %bb.0:
4708 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4709 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4713 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4716 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4719 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4722 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
4724 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
4725 ; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1
4728 ; GFX6-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
4729 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4733 ; GFX8: ; %bb.0:
4734 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4735 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4738 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
4740 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
4745 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4748 ; GFX8-NEXT: v_bfe_i32 v8, v3, 0, 1
4749 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
4750 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
4755 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
4759 ; EG: ; %bb.0:
4760 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4761 ; EG-NEXT: TEX 0 @6
4762 ; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
4763 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T3.X, 0
4767 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4774 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4777 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4781 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
4785 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4788 ; GFX12: ; %bb.0:
4789 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4790 ; GFX12-NEXT: v_mov_b32_e32 v6, 0
4791 ; GFX12-NEXT: s_wait_kmcnt 0x0
4793 ; GFX12-NEXT: s_wait_loadcnt 0x0
4796 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
4798 ; GFX12-NEXT: v_bfe_i32 v4, v1, 0, 1
4799 ; GFX12-NEXT: v_bfe_i32 v2, v2, 0, 1
4805 ; GFX12-NEXT: s_clause 0x1
4806 ; GFX12-NEXT: global_store_b64 v6, v[4:5], s[0:1] offset:16
4807 ; GFX12-NEXT: global_store_b128 v6, v[0:3], s[0:1]
4815 define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4817 ; GFX6: ; %bb.0:
4818 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4819 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4823 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4826 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4827 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
4833 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4838 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16
4839 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
4843 ; GFX8: ; %bb.0:
4844 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4846 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4849 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
4851 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
4852 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
4860 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4865 ; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
4870 ; EG: ; %bb.0:
4871 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4872 ; EG-NEXT: TEX 0 @6
4873 ; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
4874 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0
4878 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
4883 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
4888 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4896 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4899 ; GFX12: ; %bb.0:
4900 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
4901 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
4902 ; GFX12-NEXT: s_wait_kmcnt 0x0
4904 ; GFX12-NEXT: s_wait_loadcnt 0x0
4906 ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
4908 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10002
4910 ; GFX12-NEXT: s_and_b32 s3, 0xffff, s3
4913 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
4914 ; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2
4916 ; GFX12-NEXT: s_wait_alu 0xfffe
4917 ; GFX12-NEXT: s_and_b32 s3, 0xffff, s3
4918 ; GFX12-NEXT: s_and_b32 s2, 0xffff, s2
4919 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
4920 ; GFX12-NEXT: s_wait_alu 0xfffe
4923 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
4931 define amdgpu_kernel void @constant_sextload_v4i1_to_v4i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
4933 ; GFX6: ; %bb.0:
4934 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
4935 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
4939 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
4942 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
4945 ; GFX6-NEXT: s_waitcnt vmcnt(0)
4949 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
4951 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
4952 ; GFX6-NEXT: v_bfe_i32 v6, v4, 0, 1
4953 ; GFX6-NEXT: v_bfe_i32 v4, v3, 0, 1
4957 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4958 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4962 ; GFX8: ; %bb.0:
4963 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
4964 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4967 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
4969 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
4974 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4978 ; GFX8-NEXT: v_bfe_i32 v6, v4, 0, 1
4979 ; GFX8-NEXT: v_bfe_i32 v4, v3, 0, 1
4980 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
4981 ; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
4987 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
4991 ; EG: ; %bb.0:
4992 ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
4993 ; EG-NEXT: TEX 0 @6
4994 ; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[]
4995 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T3.X, 0
4999 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
5004 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
5008 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5020 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5023 ; GFX12: ; %bb.0:
5024 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5025 ; GFX12-NEXT: v_mov_b32_e32 v8, 0
5026 ; GFX12-NEXT: s_wait_kmcnt 0x0
5028 ; GFX12-NEXT: s_wait_loadcnt 0x0
5032 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 1
5034 ; GFX12-NEXT: v_bfe_i32 v6, v1, 0, 1
5035 ; GFX12-NEXT: v_bfe_i32 v4, v2, 0, 1
5037 ; GFX12-NEXT: v_bfe_i32 v2, v3, 0, 1
5044 ; GFX12-NEXT: s_clause 0x1
5045 ; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
5046 ; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1]
5054 define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5056 ; GFX6: ; %bb.0:
5057 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
5058 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5062 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5065 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
5066 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
5076 ; GFX6-NEXT: s_waitcnt vmcnt(0)
5085 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5086 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
5087 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
5088 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0
5092 ; GFX8: ; %bb.0:
5093 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
5094 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
5097 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
5099 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5101 ; GFX8-NEXT: s_addc_u32 s5, s1, 0
5102 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
5112 ; GFX8-NEXT: s_waitcnt vmcnt(0)
5120 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
5123 ; GFX8-NEXT: v_and_b32_e32 v6, 0xffff, v0
5132 ; GFX8-NEXT: flat_store_dwordx4 v[15:16], v[0:3]
5137 ; EG: ; %bb.0:
5138 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
5139 ; EG-NEXT: TEX 0 @8
5140 ; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[]
5141 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T12.X, 0
5142 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 0
5143 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T10.X, 0
5147 ; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
5152 ; EG-NEXT: 7(9.809089e-45), 0(0.000000e+00)
5165 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5181 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5184 ; GFX12: ; %bb.0:
5185 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5186 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
5187 ; GFX12-NEXT: s_wait_kmcnt 0x0
5189 ; GFX12-NEXT: s_wait_loadcnt 0x0
5190 ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v12
5206 ; GFX12-NEXT: s_clause 0x3
5207 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
5208 ; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:32
5209 ; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:16
5210 ; GFX12-NEXT: global_store_b128 v1, v[12:15], s[0:1]
5218 define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5220 ; GFX6: ; %bb.0:
5221 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
5222 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5226 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5229 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
5232 ; GFX6-NEXT: s_waitcnt vmcnt(0)
5240 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
5242 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
5243 ; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 1
5244 ; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 1
5245 ; GFX6-NEXT: v_bfe_i32 v10, v8, 0, 1
5246 ; GFX6-NEXT: v_bfe_i32 v8, v7, 0, 1
5247 ; GFX6-NEXT: v_bfe_i32 v14, v5, 0, 1
5248 ; GFX6-NEXT: v_bfe_i32 v12, v3, 0, 1
5256 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
5257 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
5258 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
5259 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
5263 ; GFX8: ; %bb.0:
5264 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
5265 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
5268 ; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
5271 ; GFX8-NEXT: s_waitcnt vmcnt(0)
5281 ; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
5282 ; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
5283 ; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
5284 ; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
5285 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
5286 ; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
5287 ; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
5291 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5297 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5306 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
5309 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
5319 ; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
5323 ; EG: ; %bb.0:
5324 ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
5325 ; EG-NEXT: TEX 0 @8
5326 ; EG-NEXT: ALU 37, @11, KC0[CB0:0-32], KC1[]
5327 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T12.X, 0
5328 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T11.X, 0
5329 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
5333 ; EG-NEXT: VTX_READ_8 T5.X, T5.X, 0, #1
5349 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5364 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
5374 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5377 ; GFX12: ; %bb.0:
5378 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5379 ; GFX12-NEXT: v_mov_b32_e32 v16, 0
5380 ; GFX12-NEXT: s_wait_kmcnt 0x0
5382 ; GFX12-NEXT: s_wait_loadcnt 0x0
5393 ; GFX12-NEXT: s_wait_alu 0xfffe
5394 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
5395 ; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
5396 ; GFX12-NEXT: v_bfe_i32 v12, v9, 0, 1
5397 ; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
5398 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
5399 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
5400 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
5401 ; GFX12-NEXT: s_wait_alu 0xfffe
5404 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
5411 ; GFX12-NEXT: s_clause 0x3
5412 ; GFX12-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48
5413 ; GFX12-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32
5414 ; GFX12-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16
5415 ; GFX12-NEXT: global_store_b128 v16, v[12:15], s[0:1]
5423 define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5425 ; GFX6: ; %bb.0:
5426 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
5427 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5431 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5434 ; GFX6-NEXT: buffer_load_ushort v29, off, s[8:11], 0
5435 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
5453 ; GFX6-NEXT: s_waitcnt vmcnt(0)
5456 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
5458 ; GFX6-NEXT: s_waitcnt expcnt(0)
5460 ; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
5462 ; GFX6-NEXT: s_waitcnt expcnt(0)
5464 ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
5474 ; GFX6-NEXT: s_waitcnt expcnt(0)
5476 ; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
5477 ; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
5478 ; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0
5479 ; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
5480 ; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
5484 ; GFX8: ; %bb.0:
5485 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
5486 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
5489 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
5490 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
5496 ; GFX8-NEXT: s_waitcnt vmcnt(0)
5498 ; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10009
5499 ; GFX8-NEXT: s_bfe_u32 s4, s2, 0x1000d
5500 ; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007
5501 ; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10003
5502 ; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10001
5504 ; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10002
5505 ; GFX8-NEXT: s_bfe_u32 s10, s2, 0x10004
5506 ; GFX8-NEXT: s_bfe_u32 s11, s2, 0x10006
5507 ; GFX8-NEXT: s_bfe_u32 s12, s2, 0x1000c
5508 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000a
5509 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffff, v0
5511 ; GFX8-NEXT: s_add_u32 s2, s0, 0x50
5513 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5518 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5519 ; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
5522 ; GFX8-NEXT: s_add_u32 s2, s0, 0x70
5527 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5532 ; GFX8-NEXT: s_add_u32 s2, s0, 0x60
5533 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5539 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5540 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5546 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5547 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5553 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5554 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5559 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5564 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
5568 ; EG: ; %bb.0:
5569 ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
5570 ; EG-NEXT: TEX 0 @12
5571 ; EG-NEXT: ALU 62, @15, KC0[CB0:0-32], KC1[]
5572 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T22.X, 0
5573 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T21.X, 0
5574 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T20.X, 0
5575 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 0
5576 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T18.X, 0
5577 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T17.X, 0
5578 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
5582 ; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
5587 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
5616 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5648 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5651 ; GFX12: ; %bb.0:
5652 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
5653 ; GFX12-NEXT: v_mov_b32_e32 v1, 0
5654 ; GFX12-NEXT: s_wait_kmcnt 0x0
5656 ; GFX12-NEXT: s_wait_loadcnt 0x0
5657 ; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v0
5663 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000a
5666 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
5667 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
5670 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
5672 ; GFX12-NEXT: s_wait_alu 0xfffe
5674 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
5675 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
5677 ; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10002
5678 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
5679 ; GFX12-NEXT: s_wait_alu 0xfffe
5682 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
5683 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
5684 ; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001
5686 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
5687 ; GFX12-NEXT: s_wait_alu 0xfffe
5690 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003
5694 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
5696 ; GFX12-NEXT: s_wait_alu 0xfffe
5699 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
5702 ; GFX12-NEXT: s_clause 0x2
5703 ; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112
5704 ; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64
5705 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
5713 define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
5715 ; GFX6: ; %bb.0:
5716 ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
5717 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
5721 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
5724 ; GFX6-NEXT: buffer_load_ushort v1, off, s[8:11], 0
5727 ; GFX6-NEXT: s_waitcnt vmcnt(0)
5742 ; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
5743 ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1
5744 ; GFX6-NEXT: v_bfe_i32 v5, v4, 0, 1
5745 ; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 1
5748 ; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:112
5749 ; GFX6-NEXT: s_waitcnt expcnt(0)
5750 ; GFX6-NEXT: v_bfe_i32 v6, v10, 0, 1
5751 ; GFX6-NEXT: v_bfe_i32 v4, v9, 0, 1
5752 ; GFX6-NEXT: v_bfe_i32 v9, v8, 0, 1
5753 ; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 1
5756 ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96
5757 ; GFX6-NEXT: s_waitcnt expcnt(0)
5758 ; GFX6-NEXT: v_bfe_i32 v9, v12, 0, 1
5759 ; GFX6-NEXT: v_bfe_i32 v7, v11, 0, 1
5760 ; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 1
5761 ; GFX6-NEXT: v_bfe_i32 v11, v1, 0, 1
5765 ; GFX6-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80
5766 ; GFX6-NEXT: v_bfe_i32 v17, v1, 0, 1
5767 ; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 1
5768 ; GFX6-NEXT: v_bfe_i32 v21, v16, 0, 1
5769 ; GFX6-NEXT: v_bfe_i32 v19, v14, 0, 1
5774 ; GFX6-NEXT: s_waitcnt expcnt(0)
5781 ; GFX6-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:64
5782 ; GFX6-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
5783 ; GFX6-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32
5784 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
5785 ; GFX6-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0
5789 ; GFX8: ; %bb.0:
5790 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
5791 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
5794 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
5797 ; GFX8-NEXT: s_waitcnt vmcnt(0)
5815 ; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
5816 ; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
5817 ; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
5818 ; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
5819 ; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
5820 ; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
5821 ; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
5822 ; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
5823 ; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
5824 ; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
5825 ; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
5826 ; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
5827 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
5828 ; GFX8-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
5829 ; GFX8-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
5831 ; GFX8-NEXT: s_add_u32 s2, s0, 0x70
5833 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5836 ; GFX8-NEXT: s_add_u32 s2, s0, 0x60
5839 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5843 ; GFX8-NEXT: s_add_u32 s2, s0, 0x50
5848 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5857 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5866 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5875 ; GFX8-NEXT: s_addc_u32 s3, s1, 0
5884 ; GFX8-NEXT: s_addc_u32 s1, s1, 0
5887 ; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 1
5897 ; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[0:3]
5901 ; EG: ; %bb.0:
5902 ; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
5903 ; EG-NEXT: TEX 0 @12
5904 ; EG-NEXT: ALU 78, @15, KC0[CB0:0-32], KC1[]
5905 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T22.X, 0
5906 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 0
5907 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T18.X, 0
5908 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T12.X, 0
5909 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T11.X, 0
5910 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T10.X, 0
5911 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T9.X, 0
5915 ; EG-NEXT: VTX_READ_16 T7.X, T7.X, 0, #1
5945 ; EG-NEXT: 10(1.401298e-44), 0(0.000000e+00)
5957 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
5970 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
5987 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
5997 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6000 ; GFX12: ; %bb.0:
6001 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
6002 ; GFX12-NEXT: v_mov_b32_e32 v32, 0
6003 ; GFX12-NEXT: s_wait_kmcnt 0x0
6005 ; GFX12-NEXT: s_wait_loadcnt 0x0
6010 ; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
6025 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
6026 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
6027 ; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
6028 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
6029 ; GFX12-NEXT: s_wait_alu 0xfffe
6030 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
6031 ; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
6032 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
6033 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
6034 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
6035 ; GFX12-NEXT: s_wait_alu 0xfffe
6043 ; GFX12-NEXT: v_bfe_i32 v28, v28, 0, 1
6044 ; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
6045 ; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
6048 ; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
6049 ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
6052 ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
6058 ; GFX12-NEXT: s_clause 0x1
6059 ; GFX12-NEXT: global_store_b128 v32, v[0:3], s[0:1] offset:112
6060 ; GFX12-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:96
6062 ; GFX12-NEXT: s_clause 0x5
6063 ; GFX12-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:80
6064 ; GFX12-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:64
6065 ; GFX12-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:48
6066 ; GFX12-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:32
6067 ; GFX12-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:16
6068 ; GFX12-NEXT: global_store_b128 v32, v[28:31], s[0:1]
6076 define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
6078 ; GFX6: ; %bb.0:
6079 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6080 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6081 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
6082 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
6083 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
6086 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6087 ; GFX6-NEXT: s_bfe_u32 s5, s4, 0x10001
6088 ; GFX6-NEXT: s_bfe_u32 s6, s4, 0x10003
6089 ; GFX6-NEXT: s_bfe_u32 s7, s4, 0x10005
6090 ; GFX6-NEXT: s_bfe_u32 s8, s4, 0x10007
6091 ; GFX6-NEXT: s_bfe_u32 s9, s4, 0x10009
6092 ; GFX6-NEXT: s_bfe_u32 s10, s4, 0x1000b
6093 ; GFX6-NEXT: s_bfe_u32 s11, s4, 0x1000d
6094 ; GFX6-NEXT: s_bfe_u32 s12, s4, 0x1000f
6095 ; GFX6-NEXT: s_bfe_u32 s13, s4, 0x10011
6096 ; GFX6-NEXT: s_bfe_u32 s14, s4, 0x10013
6097 ; GFX6-NEXT: s_bfe_u32 s15, s4, 0x10015
6098 ; GFX6-NEXT: s_bfe_u32 s16, s4, 0x10017
6099 ; GFX6-NEXT: s_bfe_u32 s17, s4, 0x10019
6100 ; GFX6-NEXT: s_bfe_u32 s18, s4, 0x1001b
6101 ; GFX6-NEXT: s_bfe_u32 s19, s4, 0x1001d
6104 ; GFX6-NEXT: s_bfe_u32 s22, s4, 0x10002
6105 ; GFX6-NEXT: s_bfe_u32 s23, s4, 0x10004
6106 ; GFX6-NEXT: s_bfe_u32 s24, s4, 0x10006
6107 ; GFX6-NEXT: s_bfe_u32 s25, s4, 0x10008
6108 ; GFX6-NEXT: s_bfe_u32 s26, s4, 0x1000a
6109 ; GFX6-NEXT: s_bfe_u32 s27, s4, 0x1000c
6110 ; GFX6-NEXT: s_bfe_u32 s28, s4, 0x1000e
6111 ; GFX6-NEXT: s_bfe_u32 s29, s4, 0x10010
6112 ; GFX6-NEXT: s_bfe_u32 s30, s4, 0x10012
6113 ; GFX6-NEXT: s_bfe_u32 s31, s4, 0x10014
6114 ; GFX6-NEXT: s_bfe_u32 s33, s4, 0x10016
6115 ; GFX6-NEXT: s_bfe_u32 s34, s4, 0x10018
6116 ; GFX6-NEXT: s_bfe_u32 s35, s4, 0x1001a
6117 ; GFX6-NEXT: s_bfe_u32 s36, s4, 0x1001e
6118 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x1001c
6121 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
6122 ; GFX6-NEXT: s_waitcnt expcnt(0)
6125 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
6126 ; GFX6-NEXT: s_waitcnt expcnt(0)
6129 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
6130 ; GFX6-NEXT: s_waitcnt expcnt(0)
6133 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
6134 ; GFX6-NEXT: s_waitcnt expcnt(0)
6137 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
6138 ; GFX6-NEXT: s_waitcnt expcnt(0)
6141 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
6142 ; GFX6-NEXT: s_waitcnt expcnt(0)
6145 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
6146 ; GFX6-NEXT: s_waitcnt expcnt(0)
6149 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
6150 ; GFX6-NEXT: s_waitcnt expcnt(0)
6153 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
6154 ; GFX6-NEXT: s_waitcnt expcnt(0)
6157 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
6158 ; GFX6-NEXT: s_waitcnt expcnt(0)
6161 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
6162 ; GFX6-NEXT: s_waitcnt expcnt(0)
6165 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
6166 ; GFX6-NEXT: s_waitcnt expcnt(0)
6169 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
6170 ; GFX6-NEXT: s_waitcnt expcnt(0)
6173 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
6174 ; GFX6-NEXT: s_waitcnt expcnt(0)
6177 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
6178 ; GFX6-NEXT: s_waitcnt expcnt(0)
6181 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6185 ; GFX8: ; %bb.0:
6186 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
6187 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
6189 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6190 ; GFX8-NEXT: s_load_dword s6, s[2:3], 0x0
6191 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6193 ; GFX8-NEXT: s_bfe_u32 s8, s6, 0x1001d
6194 ; GFX8-NEXT: s_bfe_u32 s9, s6, 0x1001b
6195 ; GFX8-NEXT: s_bfe_u32 s10, s6, 0x10019
6196 ; GFX8-NEXT: s_bfe_u32 s11, s6, 0x10017
6197 ; GFX8-NEXT: s_bfe_u32 s12, s6, 0x10013
6198 ; GFX8-NEXT: s_bfe_u32 s13, s6, 0x10011
6199 ; GFX8-NEXT: s_bfe_u32 s14, s6, 0x1000f
6200 ; GFX8-NEXT: s_bfe_u32 s15, s6, 0x1000d
6201 ; GFX8-NEXT: s_bfe_u32 s16, s6, 0x1000b
6202 ; GFX8-NEXT: s_bfe_u32 s17, s6, 0x10009
6203 ; GFX8-NEXT: s_bfe_u32 s18, s6, 0x10007
6204 ; GFX8-NEXT: s_bfe_u32 s19, s6, 0x10005
6205 ; GFX8-NEXT: s_bfe_u32 s4, s6, 0x10003
6206 ; GFX8-NEXT: s_bfe_u32 s2, s6, 0x10001
6208 ; GFX8-NEXT: s_bfe_u32 s5, s6, 0x10002
6209 ; GFX8-NEXT: s_bfe_u32 s20, s6, 0x10004
6210 ; GFX8-NEXT: s_bfe_u32 s21, s6, 0x10006
6211 ; GFX8-NEXT: s_bfe_u32 s22, s6, 0x10008
6212 ; GFX8-NEXT: s_bfe_u32 s23, s6, 0x1000a
6213 ; GFX8-NEXT: s_bfe_u32 s24, s6, 0x1000c
6214 ; GFX8-NEXT: s_bfe_u32 s25, s6, 0x1000e
6215 ; GFX8-NEXT: s_bfe_u32 s26, s6, 0x10010
6216 ; GFX8-NEXT: s_bfe_u32 s27, s6, 0x10012
6217 ; GFX8-NEXT: s_bfe_u32 s28, s6, 0x10014
6218 ; GFX8-NEXT: s_bfe_u32 s29, s6, 0x10015
6219 ; GFX8-NEXT: s_bfe_u32 s30, s6, 0x10016
6220 ; GFX8-NEXT: s_bfe_u32 s31, s6, 0x10018
6221 ; GFX8-NEXT: s_bfe_u32 s33, s6, 0x1001a
6222 ; GFX8-NEXT: s_bfe_u32 s34, s6, 0x1001c
6223 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x1001e
6225 ; GFX8-NEXT: s_add_u32 s6, s0, 0xf0
6227 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6230 ; GFX8-NEXT: s_add_u32 s6, s0, 0xe0
6231 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6232 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6237 ; GFX8-NEXT: s_add_u32 s6, s0, 0xd0
6238 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6239 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6244 ; GFX8-NEXT: s_add_u32 s6, s0, 0xc0
6245 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6246 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6251 ; GFX8-NEXT: s_add_u32 s6, s0, 0xb0
6252 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6253 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6258 ; GFX8-NEXT: s_add_u32 s6, s0, 0xa0
6259 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6260 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6265 ; GFX8-NEXT: s_add_u32 s6, s0, 0x90
6266 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6267 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6272 ; GFX8-NEXT: s_add_u32 s6, s0, 0x80
6273 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6274 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6279 ; GFX8-NEXT: s_add_u32 s6, s0, 0x70
6280 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6281 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6286 ; GFX8-NEXT: s_add_u32 s6, s0, 0x60
6287 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6288 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6293 ; GFX8-NEXT: s_add_u32 s6, s0, 0x50
6294 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6295 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6301 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6302 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6308 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6309 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6315 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6316 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6321 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6322 ; GFX8-NEXT: s_nop 0
6326 ; GFX8-NEXT: s_addc_u32 s5, s1, 0
6329 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6334 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6338 ; EG: ; %bb.0:
6339 ; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
6340 ; EG-NEXT: TEX 0 @22
6341 ; EG-NEXT: ALU 96, @25, KC0[CB0:0-32], KC1[]
6342 ; EG-NEXT: ALU 30, @122, KC0[CB0:0-32], KC1[]
6343 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T42.X, 0
6344 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T41.X, 0
6345 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T40.X, 0
6346 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T39.X, 0
6347 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T38.X, 0
6348 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T37.X, 0
6349 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T36.X, 0
6350 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T35.X, 0
6351 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T34.X, 0
6352 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T33.X, 0
6353 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0
6354 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T31.X, 0
6355 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T30.X, 0
6356 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T29.X, 0
6357 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T28.X, 0
6362 ; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
6367 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
6428 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6462 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6465 ; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00)
6494 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
6497 ; GFX12: ; %bb.0:
6498 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
6499 ; GFX12-NEXT: s_wait_kmcnt 0x0
6500 ; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
6501 ; GFX12-NEXT: s_wait_kmcnt 0x0
6502 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001e
6504 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
6506 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d
6508 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c
6509 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240
6510 ; GFX12-NEXT: s_wait_alu 0xfffe
6513 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b
6514 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a
6515 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224
6516 ; GFX12-NEXT: s_wait_alu 0xfffe
6519 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019
6520 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018
6521 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208
6522 ; GFX12-NEXT: s_wait_alu 0xfffe
6525 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017
6526 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016
6527 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
6528 ; GFX12-NEXT: s_wait_alu 0xfffe
6531 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014
6532 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015
6533 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
6534 ; GFX12-NEXT: s_wait_alu 0xfffe
6537 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013
6538 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012
6539 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
6540 ; GFX12-NEXT: s_wait_alu 0xfffe
6543 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011
6544 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010
6545 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
6546 ; GFX12-NEXT: s_wait_alu 0xfffe
6549 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f
6550 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e
6551 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128
6552 ; GFX12-NEXT: s_wait_alu 0xfffe
6555 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
6556 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
6557 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
6558 ; GFX12-NEXT: s_wait_alu 0xfffe
6561 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b
6562 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a
6563 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
6564 ; GFX12-NEXT: s_wait_alu 0xfffe
6567 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
6568 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008
6569 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
6570 ; GFX12-NEXT: s_wait_alu 0xfffe
6573 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
6574 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
6575 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
6576 ; GFX12-NEXT: s_wait_alu 0xfffe
6579 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005
6580 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
6581 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
6582 ; GFX12-NEXT: s_wait_alu 0xfffe
6585 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
6586 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002
6587 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
6588 ; GFX12-NEXT: s_wait_alu 0xfffe
6591 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
6593 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
6594 ; GFX12-NEXT: s_wait_alu 0xfffe
6597 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
6605 define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
6607 ; GFX6: ; %bb.0:
6608 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
6609 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6610 ; GFX6-NEXT: s_load_dword s4, s[2:3], 0x0
6611 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
6613 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
6631 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000
6636 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
6637 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
6644 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
6645 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
6652 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
6653 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
6660 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
6661 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
6668 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
6669 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
6670 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:240
6671 ; GFX6-NEXT: s_waitcnt expcnt(0)
6678 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
6679 ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:224
6680 ; GFX6-NEXT: s_waitcnt expcnt(0)
6685 ; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
6686 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
6687 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
6688 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
6689 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
6690 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
6691 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
6692 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
6693 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
6694 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
6695 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
6696 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
6697 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
6698 ; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
6699 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
6700 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
6701 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
6702 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
6703 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
6704 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
6705 ; GFX6-NEXT: buffer_store_dwordx4 v[10:13], off, s[0:3], 0 offset:208
6706 ; GFX6-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:192
6707 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:176
6710 ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:160
6716 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:144
6717 ; GFX6-NEXT: s_waitcnt expcnt(0)
6722 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:128
6723 ; GFX6-NEXT: s_waitcnt expcnt(0)
6728 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:112
6729 ; GFX6-NEXT: s_waitcnt expcnt(0)
6734 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:96
6735 ; GFX6-NEXT: s_waitcnt expcnt(0)
6740 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:80
6741 ; GFX6-NEXT: s_waitcnt expcnt(0)
6746 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:64
6747 ; GFX6-NEXT: s_waitcnt expcnt(0)
6752 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:48
6753 ; GFX6-NEXT: s_waitcnt expcnt(0)
6758 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:32
6759 ; GFX6-NEXT: s_waitcnt expcnt(0)
6764 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
6765 ; GFX6-NEXT: s_waitcnt expcnt(0)
6768 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
6772 ; GFX8: ; %bb.0:
6773 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
6774 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6775 ; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
6776 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6808 ; GFX8-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
6809 ; GFX8-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
6810 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
6811 ; GFX8-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
6812 ; GFX8-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
6813 ; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
6814 ; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
6815 ; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
6816 ; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
6817 ; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
6818 ; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
6819 ; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
6820 ; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
6821 ; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
6822 ; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
6823 ; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
6824 ; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
6825 ; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
6826 ; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
6827 ; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
6828 ; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
6829 ; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
6830 ; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
6831 ; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
6832 ; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
6833 ; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
6834 ; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
6835 ; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
6836 ; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
6837 ; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
6838 ; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
6839 ; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
6841 ; GFX8-NEXT: s_add_u32 s44, s0, 0xf0
6843 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6848 ; GFX8-NEXT: s_add_u32 s44, s0, 0xe0
6849 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6850 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6857 ; GFX8-NEXT: s_add_u32 s44, s0, 0xd0
6858 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6859 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6866 ; GFX8-NEXT: s_add_u32 s44, s0, 0xc0
6867 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6868 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6875 ; GFX8-NEXT: s_add_u32 s44, s0, 0xb0
6876 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6877 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6884 ; GFX8-NEXT: s_add_u32 s44, s0, 0xa0
6885 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6886 ; GFX8-NEXT: s_addc_u32 s45, s1, 0
6893 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6894 ; GFX8-NEXT: s_nop 0
6896 ; GFX8-NEXT: s_add_u32 s40, s0, 0x90
6898 ; GFX8-NEXT: s_addc_u32 s41, s1, 0
6903 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6904 ; GFX8-NEXT: s_nop 0
6906 ; GFX8-NEXT: s_add_u32 s36, s0, 0x80
6908 ; GFX8-NEXT: s_addc_u32 s37, s1, 0
6913 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6914 ; GFX8-NEXT: s_nop 0
6916 ; GFX8-NEXT: s_add_u32 s30, s0, 0x70
6918 ; GFX8-NEXT: s_addc_u32 s31, s1, 0
6923 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6924 ; GFX8-NEXT: s_nop 0
6926 ; GFX8-NEXT: s_add_u32 s26, s0, 0x60
6928 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
6933 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6934 ; GFX8-NEXT: s_nop 0
6936 ; GFX8-NEXT: s_add_u32 s22, s0, 0x50
6938 ; GFX8-NEXT: s_addc_u32 s23, s1, 0
6943 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6944 ; GFX8-NEXT: s_nop 0
6948 ; GFX8-NEXT: s_addc_u32 s19, s1, 0
6953 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6954 ; GFX8-NEXT: s_nop 0
6958 ; GFX8-NEXT: s_addc_u32 s15, s1, 0
6963 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6964 ; GFX8-NEXT: s_nop 0
6968 ; GFX8-NEXT: s_addc_u32 s11, s1, 0
6973 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6974 ; GFX8-NEXT: s_nop 0
6978 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
6983 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6990 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
6994 ; EG: ; %bb.0:
6995 ; EG-NEXT: ALU 0, @24, KC0[CB0:0-32], KC1[]
6996 ; EG-NEXT: TEX 0 @22
6997 ; EG-NEXT: ALU 92, @25, KC0[CB0:0-32], KC1[]
6998 ; EG-NEXT: ALU 65, @118, KC0[CB0:0-32], KC1[]
6999 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T42.X, 0
7000 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T41.X, 0
7001 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T34.X, 0
7002 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T24.X, 0
7003 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T23.X, 0
7004 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T22.X, 0
7005 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T21.X, 0
7006 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T20.X, 0
7007 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T19.X, 0
7008 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T18.X, 0
7009 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T17.X, 0
7010 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T16.X, 0
7011 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T15.X, 0
7012 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T14.X, 0
7013 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T13.X, 0
7018 ; EG-NEXT: VTX_READ_32 T11.X, T11.X, 0, #1
7062 ; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
7070 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7084 ; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00)
7121 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7134 ; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
7171 ; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00)
7181 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7184 ; GFX12: ; %bb.0:
7185 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
7186 ; GFX12-NEXT: s_wait_kmcnt 0x0
7187 ; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
7188 ; GFX12-NEXT: s_wait_kmcnt 0x0
7195 ; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
7196 ; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
7199 ; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
7200 ; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
7201 ; GFX12-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v1, s35
7202 ; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
7203 ; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
7208 ; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
7209 ; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
7218 ; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
7220 ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
7224 ; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
7225 ; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
7228 ; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
7229 ; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
7230 ; GFX12-NEXT: s_clause 0x3
7231 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240
7232 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224
7233 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208
7234 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192
7242 ; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
7243 ; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
7248 ; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
7249 ; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
7254 ; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
7255 ; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
7256 ; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
7257 ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
7262 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
7263 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
7268 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
7269 ; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
7272 ; GFX12-NEXT: s_clause 0x5
7273 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:176
7274 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:160
7275 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:144
7276 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:128
7277 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112
7278 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96
7283 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
7284 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
7287 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
7288 ; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
7291 ; GFX12-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
7292 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000
7299 ; GFX12-NEXT: s_clause 0x5
7300 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80
7301 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64
7302 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:48
7303 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:32
7304 ; GFX12-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:16
7305 ; GFX12-NEXT: global_store_b128 v24, v[20:23], s[0:1]
7313 define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
7315 ; GFX6: ; %bb.0:
7316 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
7317 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7318 ; GFX6-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
7319 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
7320 ; GFX6-NEXT: s_bfe_u32 s4, s2, 0x10003
7321 ; GFX6-NEXT: s_bfe_u32 s5, s2, 0x10005
7322 ; GFX6-NEXT: s_bfe_u32 s8, s2, 0x10007
7323 ; GFX6-NEXT: s_bfe_u32 s11, s2, 0x10009
7324 ; GFX6-NEXT: s_bfe_u32 s13, s2, 0x1000b
7325 ; GFX6-NEXT: s_bfe_u32 s15, s2, 0x1000d
7326 ; GFX6-NEXT: s_bfe_u32 s17, s2, 0x1000f
7327 ; GFX6-NEXT: s_bfe_u32 s19, s2, 0x10011
7328 ; GFX6-NEXT: s_bfe_u32 s21, s2, 0x10013
7329 ; GFX6-NEXT: s_bfe_u32 s23, s2, 0x10015
7330 ; GFX6-NEXT: s_bfe_u32 s25, s2, 0x10017
7331 ; GFX6-NEXT: s_bfe_u32 s27, s2, 0x10019
7332 ; GFX6-NEXT: s_bfe_u32 s29, s2, 0x1001b
7333 ; GFX6-NEXT: s_bfe_u32 s31, s2, 0x1001d
7335 ; GFX6-NEXT: s_bfe_u32 s35, s3, 0x10003
7336 ; GFX6-NEXT: s_bfe_u32 s36, s3, 0x10005
7337 ; GFX6-NEXT: s_bfe_u32 s37, s3, 0x10007
7338 ; GFX6-NEXT: s_bfe_u32 s38, s3, 0x10009
7339 ; GFX6-NEXT: s_bfe_u32 s39, s3, 0x1000b
7340 ; GFX6-NEXT: s_bfe_u32 s40, s3, 0x1000d
7341 ; GFX6-NEXT: s_bfe_u32 s41, s3, 0x1000f
7342 ; GFX6-NEXT: s_bfe_u32 s42, s3, 0x10011
7343 ; GFX6-NEXT: s_bfe_u32 s43, s3, 0x10013
7344 ; GFX6-NEXT: s_bfe_u32 s44, s3, 0x10015
7345 ; GFX6-NEXT: s_bfe_u32 s45, s3, 0x10017
7346 ; GFX6-NEXT: s_bfe_u32 s46, s3, 0x10019
7347 ; GFX6-NEXT: s_bfe_u32 s47, s3, 0x1001b
7348 ; GFX6-NEXT: s_bfe_u32 s48, s3, 0x1001d
7350 ; GFX6-NEXT: s_bfe_u32 s9, s3, 0x10001
7351 ; GFX6-NEXT: s_bfe_u32 s6, s2, 0x10001
7354 ; GFX6-NEXT: s_bfe_u32 s12, s2, 0x10002
7355 ; GFX6-NEXT: s_bfe_u32 s14, s2, 0x10004
7356 ; GFX6-NEXT: s_bfe_u32 s16, s2, 0x10006
7357 ; GFX6-NEXT: s_bfe_u32 s18, s2, 0x10008
7358 ; GFX6-NEXT: s_bfe_u32 s20, s2, 0x1000a
7359 ; GFX6-NEXT: s_bfe_u32 s22, s2, 0x1000c
7360 ; GFX6-NEXT: s_bfe_u32 s24, s2, 0x1000e
7361 ; GFX6-NEXT: s_bfe_u32 s26, s2, 0x10010
7362 ; GFX6-NEXT: s_bfe_u32 s28, s2, 0x10012
7363 ; GFX6-NEXT: s_bfe_u32 s30, s2, 0x10014
7364 ; GFX6-NEXT: s_bfe_u32 s33, s2, 0x10016
7365 ; GFX6-NEXT: s_bfe_u32 s50, s2, 0x10018
7366 ; GFX6-NEXT: s_bfe_u32 s51, s2, 0x1001a
7367 ; GFX6-NEXT: s_bfe_u32 s52, s2, 0x1001c
7368 ; GFX6-NEXT: s_bfe_u32 s53, s2, 0x1001e
7369 ; GFX6-NEXT: s_bfe_u32 s54, s3, 0x10002
7370 ; GFX6-NEXT: s_bfe_u32 s55, s3, 0x10004
7371 ; GFX6-NEXT: s_bfe_u32 s56, s3, 0x10006
7372 ; GFX6-NEXT: s_bfe_u32 s57, s3, 0x10008
7373 ; GFX6-NEXT: s_bfe_u32 s58, s3, 0x1000a
7374 ; GFX6-NEXT: s_bfe_u32 s59, s3, 0x1000c
7375 ; GFX6-NEXT: s_bfe_u32 s60, s3, 0x1000e
7376 ; GFX6-NEXT: s_bfe_u32 s61, s3, 0x10010
7377 ; GFX6-NEXT: s_bfe_u32 s62, s3, 0x10012
7378 ; GFX6-NEXT: s_bfe_u32 s63, s3, 0x10014
7379 ; GFX6-NEXT: s_bfe_u32 s64, s3, 0x10016
7380 ; GFX6-NEXT: s_bfe_u32 s65, s3, 0x10018
7381 ; GFX6-NEXT: s_bfe_u32 s66, s3, 0x1001a
7382 ; GFX6-NEXT: s_bfe_u32 s67, s3, 0x1001e
7383 ; GFX6-NEXT: s_bfe_u32 s68, s3, 0x1001c
7384 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
7385 ; GFX6-NEXT: v_mov_b32_e32 v1, 0
7390 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:496
7391 ; GFX6-NEXT: s_waitcnt expcnt(0)
7394 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:480
7395 ; GFX6-NEXT: s_waitcnt expcnt(0)
7398 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:464
7399 ; GFX6-NEXT: s_waitcnt expcnt(0)
7402 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:448
7403 ; GFX6-NEXT: s_waitcnt expcnt(0)
7406 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:432
7407 ; GFX6-NEXT: s_waitcnt expcnt(0)
7410 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:416
7411 ; GFX6-NEXT: s_waitcnt expcnt(0)
7414 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:400
7415 ; GFX6-NEXT: s_waitcnt expcnt(0)
7418 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:384
7419 ; GFX6-NEXT: s_waitcnt expcnt(0)
7422 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:368
7423 ; GFX6-NEXT: s_waitcnt expcnt(0)
7426 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:352
7427 ; GFX6-NEXT: s_waitcnt expcnt(0)
7430 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:336
7431 ; GFX6-NEXT: s_waitcnt expcnt(0)
7434 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:320
7435 ; GFX6-NEXT: s_waitcnt expcnt(0)
7438 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:304
7439 ; GFX6-NEXT: s_waitcnt expcnt(0)
7442 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:288
7443 ; GFX6-NEXT: s_waitcnt expcnt(0)
7446 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:272
7447 ; GFX6-NEXT: s_waitcnt expcnt(0)
7450 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240
7451 ; GFX6-NEXT: s_waitcnt expcnt(0)
7454 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224
7455 ; GFX6-NEXT: s_waitcnt expcnt(0)
7458 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208
7459 ; GFX6-NEXT: s_waitcnt expcnt(0)
7462 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
7463 ; GFX6-NEXT: s_waitcnt expcnt(0)
7466 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176
7467 ; GFX6-NEXT: s_waitcnt expcnt(0)
7470 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160
7471 ; GFX6-NEXT: s_waitcnt expcnt(0)
7474 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
7475 ; GFX6-NEXT: s_waitcnt expcnt(0)
7478 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
7479 ; GFX6-NEXT: s_waitcnt expcnt(0)
7482 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
7483 ; GFX6-NEXT: s_waitcnt expcnt(0)
7486 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
7487 ; GFX6-NEXT: s_waitcnt expcnt(0)
7490 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
7491 ; GFX6-NEXT: s_waitcnt expcnt(0)
7494 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
7495 ; GFX6-NEXT: s_waitcnt expcnt(0)
7498 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
7499 ; GFX6-NEXT: s_waitcnt expcnt(0)
7502 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
7503 ; GFX6-NEXT: s_waitcnt expcnt(0)
7506 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
7507 ; GFX6-NEXT: s_waitcnt expcnt(0)
7510 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
7511 ; GFX6-NEXT: s_waitcnt expcnt(0)
7514 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
7518 ; GFX8: ; %bb.0:
7519 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
7520 ; GFX8-NEXT: v_mov_b32_e32 v1, 0
7522 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
7523 ; GFX8-NEXT: s_load_dwordx2 s[42:43], s[2:3], 0x0
7524 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
7526 ; GFX8-NEXT: s_bfe_u32 s45, s43, 0x1001d
7527 ; GFX8-NEXT: s_bfe_u32 s46, s43, 0x1001b
7528 ; GFX8-NEXT: s_bfe_u32 s47, s43, 0x10019
7529 ; GFX8-NEXT: s_bfe_u32 s48, s43, 0x10017
7530 ; GFX8-NEXT: s_bfe_u32 s49, s43, 0x10013
7531 ; GFX8-NEXT: s_bfe_u32 s50, s43, 0x10011
7532 ; GFX8-NEXT: s_bfe_u32 s51, s43, 0x1000f
7533 ; GFX8-NEXT: s_bfe_u32 s52, s43, 0x1000d
7534 ; GFX8-NEXT: s_bfe_u32 s53, s43, 0x1000b
7535 ; GFX8-NEXT: s_bfe_u32 s40, s43, 0x10009
7536 ; GFX8-NEXT: s_bfe_u32 s38, s43, 0x10007
7537 ; GFX8-NEXT: s_bfe_u32 s37, s43, 0x10005
7538 ; GFX8-NEXT: s_bfe_u32 s35, s43, 0x10003
7539 ; GFX8-NEXT: s_bfe_u32 s33, s43, 0x10001
7541 ; GFX8-NEXT: s_bfe_u32 s28, s42, 0x1001d
7542 ; GFX8-NEXT: s_bfe_u32 s26, s42, 0x1001b
7543 ; GFX8-NEXT: s_bfe_u32 s25, s42, 0x10019
7544 ; GFX8-NEXT: s_bfe_u32 s22, s42, 0x10017
7545 ; GFX8-NEXT: s_bfe_u32 s19, s42, 0x10013
7546 ; GFX8-NEXT: s_bfe_u32 s17, s42, 0x10011
7547 ; GFX8-NEXT: s_bfe_u32 s15, s42, 0x1000f
7548 ; GFX8-NEXT: s_bfe_u32 s13, s42, 0x1000d
7549 ; GFX8-NEXT: s_bfe_u32 s12, s42, 0x1000b
7550 ; GFX8-NEXT: s_bfe_u32 s10, s42, 0x10009
7551 ; GFX8-NEXT: s_bfe_u32 s8, s42, 0x10007
7552 ; GFX8-NEXT: s_bfe_u32 s6, s42, 0x10005
7553 ; GFX8-NEXT: s_bfe_u32 s4, s42, 0x10003
7554 ; GFX8-NEXT: s_bfe_u32 s2, s42, 0x10001
7556 ; GFX8-NEXT: s_bfe_u32 s5, s42, 0x10002
7557 ; GFX8-NEXT: s_bfe_u32 s7, s42, 0x10004
7558 ; GFX8-NEXT: s_bfe_u32 s9, s42, 0x10006
7559 ; GFX8-NEXT: s_bfe_u32 s11, s42, 0x10008
7560 ; GFX8-NEXT: s_bfe_u32 s14, s42, 0x1000a
7561 ; GFX8-NEXT: s_bfe_u32 s16, s42, 0x1000c
7562 ; GFX8-NEXT: s_bfe_u32 s18, s42, 0x1000e
7563 ; GFX8-NEXT: s_bfe_u32 s20, s42, 0x10010
7564 ; GFX8-NEXT: s_bfe_u32 s21, s42, 0x10012
7565 ; GFX8-NEXT: s_bfe_u32 s23, s42, 0x10014
7566 ; GFX8-NEXT: s_bfe_u32 s24, s42, 0x10015
7567 ; GFX8-NEXT: s_bfe_u32 s27, s42, 0x10016
7568 ; GFX8-NEXT: s_bfe_u32 s29, s42, 0x10018
7569 ; GFX8-NEXT: s_bfe_u32 s31, s42, 0x1001a
7570 ; GFX8-NEXT: s_bfe_u32 s34, s42, 0x1001c
7571 ; GFX8-NEXT: s_bfe_u32 s36, s42, 0x1001e
7573 ; GFX8-NEXT: s_bfe_u32 s41, s43, 0x10002
7574 ; GFX8-NEXT: s_bfe_u32 s54, s43, 0x10004
7575 ; GFX8-NEXT: s_bfe_u32 s55, s43, 0x10006
7576 ; GFX8-NEXT: s_bfe_u32 s56, s43, 0x10008
7577 ; GFX8-NEXT: s_bfe_u32 s57, s43, 0x1000a
7578 ; GFX8-NEXT: s_bfe_u32 s58, s43, 0x1000c
7579 ; GFX8-NEXT: s_bfe_u32 s59, s43, 0x1000e
7580 ; GFX8-NEXT: s_bfe_u32 s60, s43, 0x10010
7581 ; GFX8-NEXT: s_bfe_u32 s61, s43, 0x10012
7582 ; GFX8-NEXT: s_bfe_u32 s62, s43, 0x10016
7583 ; GFX8-NEXT: s_bfe_u32 s63, s43, 0x10018
7584 ; GFX8-NEXT: s_bfe_u32 s64, s43, 0x1001a
7585 ; GFX8-NEXT: s_bfe_u32 s65, s43, 0x1001c
7586 ; GFX8-NEXT: s_bfe_u32 s66, s43, 0x1001e
7587 ; GFX8-NEXT: s_bfe_u32 s42, s43, 0x10015
7588 ; GFX8-NEXT: s_bfe_u32 s43, s43, 0x10014
7590 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1a0
7592 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7595 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1f0
7596 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7597 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7602 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1e0
7603 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7604 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7609 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1d0
7610 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7611 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7616 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1c0
7617 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7618 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7623 ; GFX8-NEXT: s_add_u32 s42, s0, 0x1b0
7624 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7625 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7630 ; GFX8-NEXT: s_add_u32 s42, s0, 0x190
7631 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7632 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7637 ; GFX8-NEXT: s_add_u32 s42, s0, 0x180
7638 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7639 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7644 ; GFX8-NEXT: s_add_u32 s42, s0, 0x170
7645 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7646 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7651 ; GFX8-NEXT: s_add_u32 s42, s0, 0x160
7652 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7653 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7658 ; GFX8-NEXT: s_add_u32 s42, s0, 0x150
7659 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7660 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7665 ; GFX8-NEXT: s_add_u32 s42, s0, 0x140
7666 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7667 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7672 ; GFX8-NEXT: s_add_u32 s42, s0, 0x130
7673 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7674 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7679 ; GFX8-NEXT: s_add_u32 s42, s0, 0x120
7680 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7681 ; GFX8-NEXT: s_addc_u32 s43, s1, 0
7686 ; GFX8-NEXT: s_add_u32 s40, s0, 0x110
7687 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7688 ; GFX8-NEXT: s_nop 0
7690 ; GFX8-NEXT: s_addc_u32 s41, s1, 0
7694 ; GFX8-NEXT: s_add_u32 s38, s0, 0x100
7695 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7696 ; GFX8-NEXT: s_nop 0
7698 ; GFX8-NEXT: s_addc_u32 s39, s1, 0
7702 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7703 ; GFX8-NEXT: s_nop 0
7705 ; GFX8-NEXT: s_add_u32 s36, s0, 0xf0
7706 ; GFX8-NEXT: s_addc_u32 s37, s1, 0
7710 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7711 ; GFX8-NEXT: s_nop 0
7713 ; GFX8-NEXT: s_add_u32 s34, s0, 0xe0
7714 ; GFX8-NEXT: s_addc_u32 s35, s1, 0
7718 ; GFX8-NEXT: s_add_u32 s30, s0, 0xd0
7719 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7720 ; GFX8-NEXT: s_nop 0
7722 ; GFX8-NEXT: s_addc_u32 s31, s1, 0
7726 ; GFX8-NEXT: s_add_u32 s28, s0, 0xc0
7727 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7728 ; GFX8-NEXT: s_nop 0
7730 ; GFX8-NEXT: s_addc_u32 s29, s1, 0
7734 ; GFX8-NEXT: s_add_u32 s26, s0, 0xb0
7735 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7736 ; GFX8-NEXT: s_nop 0
7738 ; GFX8-NEXT: s_addc_u32 s27, s1, 0
7742 ; GFX8-NEXT: s_add_u32 s22, s0, 0xa0
7743 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7744 ; GFX8-NEXT: s_nop 0
7746 ; GFX8-NEXT: s_addc_u32 s23, s1, 0
7750 ; GFX8-NEXT: s_add_u32 s22, s0, 0x90
7751 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7752 ; GFX8-NEXT: s_addc_u32 s23, s1, 0
7757 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7758 ; GFX8-NEXT: s_nop 0
7760 ; GFX8-NEXT: s_add_u32 s20, s0, 0x80
7761 ; GFX8-NEXT: s_addc_u32 s21, s1, 0
7765 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7766 ; GFX8-NEXT: s_nop 0
7768 ; GFX8-NEXT: s_add_u32 s18, s0, 0x70
7769 ; GFX8-NEXT: s_addc_u32 s19, s1, 0
7773 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7774 ; GFX8-NEXT: s_nop 0
7776 ; GFX8-NEXT: s_add_u32 s16, s0, 0x60
7777 ; GFX8-NEXT: s_addc_u32 s17, s1, 0
7781 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7782 ; GFX8-NEXT: s_nop 0
7784 ; GFX8-NEXT: s_add_u32 s12, s0, 0x50
7785 ; GFX8-NEXT: s_addc_u32 s13, s1, 0
7789 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7790 ; GFX8-NEXT: s_nop 0
7794 ; GFX8-NEXT: s_addc_u32 s11, s1, 0
7797 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7798 ; GFX8-NEXT: s_nop 0
7802 ; GFX8-NEXT: s_addc_u32 s9, s1, 0
7805 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7806 ; GFX8-NEXT: s_nop 0
7810 ; GFX8-NEXT: s_addc_u32 s7, s1, 0
7813 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7814 ; GFX8-NEXT: s_nop 0
7818 ; GFX8-NEXT: s_addc_u32 s5, s1, 0
7821 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7826 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
7830 ; EG: ; %bb.0:
7831 ; EG-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[]
7832 ; EG-NEXT: TEX 0 @38
7834 ; EG-NEXT: ALU 99, @137, KC0[CB0:0-32], KC1[]
7835 ; EG-NEXT: ALU 60, @237, KC0[CB0:0-32], KC1[]
7836 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T82.X, 0
7837 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T81.X, 0
7838 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T80.X, 0
7839 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T79.X, 0
7840 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T78.X, 0
7841 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T77.X, 0
7842 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T76.X, 0
7843 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T75.X, 0
7844 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T74.X, 0
7845 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T73.X, 0
7846 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T72.X, 0
7847 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T71.X, 0
7848 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T70.X, 0
7849 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T69.X, 0
7850 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T68.X, 0
7851 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T67.X, 0
7852 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T66.X, 0
7853 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T65.X, 0
7854 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T64.X, 0
7855 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T63.X, 0
7856 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T62.X, 0
7857 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T61.X, 0
7858 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T60.X, 0
7859 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T59.X, 0
7860 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T58.X, 0
7861 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T57.X, 0
7862 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T56.X, 0
7863 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T55.X, 0
7864 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T54.X, 0
7865 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T53.X, 0
7866 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T52.X, 0
7870 ; EG-NEXT: VTX_READ_64 T25.XY, T19.X, 0, #1
7875 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7936 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
7939 ; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
7969 ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
7973 ; EG-NEXT: 15(2.101948e-44), 0(0.000000e+00)
8002 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
8070 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
8073 ; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00)
8132 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
8135 ; GFX12: ; %bb.0:
8136 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
8137 ; GFX12-NEXT: s_wait_kmcnt 0x0
8138 ; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
8139 ; GFX12-NEXT: s_wait_kmcnt 0x0
8140 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10014
8141 ; GFX12-NEXT: s_wait_alu 0xfffe
8142 ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
8143 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10015
8145 ; GFX12-NEXT: s_wait_alu 0xfffe
8148 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001e
8149 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
8150 ; GFX12-NEXT: s_wait_alu 0xfffe
8153 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001d
8154 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001c
8155 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496
8156 ; GFX12-NEXT: s_wait_alu 0xfffe
8159 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1001b
8160 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1001a
8161 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:480
8162 ; GFX12-NEXT: s_wait_alu 0xfffe
8165 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10019
8166 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10018
8167 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:464
8168 ; GFX12-NEXT: s_wait_alu 0xfffe
8171 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10017
8172 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10016
8173 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448
8174 ; GFX12-NEXT: s_wait_alu 0xfffe
8177 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10013
8178 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10012
8179 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
8180 ; GFX12-NEXT: s_wait_alu 0xfffe
8183 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10011
8184 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10010
8185 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:400
8186 ; GFX12-NEXT: s_wait_alu 0xfffe
8189 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000f
8190 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000e
8191 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:384
8192 ; GFX12-NEXT: s_wait_alu 0xfffe
8195 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000d
8196 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000c
8197 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:368
8198 ; GFX12-NEXT: s_wait_alu 0xfffe
8201 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x1000b
8202 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x1000a
8203 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:352
8204 ; GFX12-NEXT: s_wait_alu 0xfffe
8207 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10009
8208 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10008
8209 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:336
8210 ; GFX12-NEXT: s_wait_alu 0xfffe
8213 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10007
8214 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10006
8215 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:320
8216 ; GFX12-NEXT: s_wait_alu 0xfffe
8219 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10005
8220 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10004
8221 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:304
8222 ; GFX12-NEXT: s_wait_alu 0xfffe
8225 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10003
8226 ; GFX12-NEXT: s_bfe_u32 s5, s3, 0x10002
8227 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:288
8228 ; GFX12-NEXT: s_wait_alu 0xfffe
8231 ; GFX12-NEXT: s_bfe_u32 s4, s3, 0x10001
8233 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:272
8234 ; GFX12-NEXT: s_wait_alu 0xfffe
8238 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001e
8239 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:256
8240 ; GFX12-NEXT: s_wait_alu 0xfffe
8243 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001d
8244 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001c
8245 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240
8246 ; GFX12-NEXT: s_wait_alu 0xfffe
8249 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1001b
8250 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1001a
8251 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224
8252 ; GFX12-NEXT: s_wait_alu 0xfffe
8255 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10019
8256 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10018
8257 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208
8258 ; GFX12-NEXT: s_wait_alu 0xfffe
8261 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10017
8262 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10016
8263 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
8264 ; GFX12-NEXT: s_wait_alu 0xfffe
8267 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10014
8268 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10015
8269 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
8270 ; GFX12-NEXT: s_wait_alu 0xfffe
8273 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10013
8274 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10012
8275 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
8276 ; GFX12-NEXT: s_wait_alu 0xfffe
8279 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10011
8280 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10010
8281 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
8282 ; GFX12-NEXT: s_wait_alu 0xfffe
8285 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000f
8286 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000e
8287 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128
8288 ; GFX12-NEXT: s_wait_alu 0xfffe
8291 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
8292 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
8293 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
8294 ; GFX12-NEXT: s_wait_alu 0xfffe
8297 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000b
8298 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000a
8299 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
8300 ; GFX12-NEXT: s_wait_alu 0xfffe
8303 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
8304 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10008
8305 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
8306 ; GFX12-NEXT: s_wait_alu 0xfffe
8309 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
8310 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
8311 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
8312 ; GFX12-NEXT: s_wait_alu 0xfffe
8315 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10005
8316 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
8317 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
8318 ; GFX12-NEXT: s_wait_alu 0xfffe
8321 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
8322 ; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002
8323 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
8324 ; GFX12-NEXT: s_wait_alu 0xfffe
8327 ; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
8329 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
8330 ; GFX12-NEXT: s_wait_alu 0xfffe
8333 ; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
8341 define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %out, ptr addrspace(4) nocapture %in) #0 {
8343 ; GFX6: ; %bb.0:
8344 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8345 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8346 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
8347 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
8349 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
8369 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[40:41], 0x10000
8374 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[4:5], 0x10000
8375 ; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
8382 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
8383 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
8390 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
8391 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
8398 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
8399 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
8401 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:496
8402 ; GFX6-NEXT: s_waitcnt expcnt(0)
8409 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
8410 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
8411 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:480
8412 ; GFX6-NEXT: s_waitcnt expcnt(0)
8419 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
8420 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
8421 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:464
8422 ; GFX6-NEXT: s_waitcnt expcnt(0)
8429 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
8430 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
8431 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:448
8432 ; GFX6-NEXT: s_waitcnt expcnt(0)
8439 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
8440 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
8441 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:432
8442 ; GFX6-NEXT: s_waitcnt expcnt(0)
8449 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
8450 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
8451 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:416
8452 ; GFX6-NEXT: s_waitcnt expcnt(0)
8459 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x10000
8460 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
8461 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:400
8462 ; GFX6-NEXT: s_waitcnt expcnt(0)
8469 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[36:37], 0x10000
8470 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[42:43], 0x10000
8471 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:384
8472 ; GFX6-NEXT: s_waitcnt expcnt(0)
8479 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[30:31], 0x10000
8480 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[38:39], 0x10000
8481 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:368
8482 ; GFX6-NEXT: s_waitcnt expcnt(0)
8489 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
8490 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
8491 ; GFX6-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:352
8498 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
8499 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
8500 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:336
8501 ; GFX6-NEXT: s_waitcnt expcnt(0)
8508 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
8509 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:320
8510 ; GFX6-NEXT: s_waitcnt expcnt(0)
8514 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
8515 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
8522 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
8523 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
8524 ; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:304
8525 ; GFX6-NEXT: s_waitcnt expcnt(0)
8532 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
8533 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
8534 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:288
8535 ; GFX6-NEXT: s_waitcnt expcnt(0)
8542 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
8543 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[46:47], 0x10000
8544 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:272
8545 ; GFX6-NEXT: s_waitcnt expcnt(0)
8552 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
8553 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
8554 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:256
8555 ; GFX6-NEXT: s_waitcnt expcnt(0)
8562 ; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
8563 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
8564 ; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240
8565 ; GFX6-NEXT: s_waitcnt expcnt(0)
8572 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
8573 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:224
8574 ; GFX6-NEXT: s_waitcnt expcnt(0)
8579 ; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
8580 ; GFX6-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
8581 ; GFX6-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
8582 ; GFX6-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
8583 ; GFX6-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
8584 ; GFX6-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
8585 ; GFX6-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
8586 ; GFX6-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
8587 ; GFX6-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
8588 ; GFX6-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
8589 ; GFX6-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
8590 ; GFX6-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
8591 ; GFX6-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
8592 ; GFX6-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
8593 ; GFX6-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
8594 ; GFX6-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
8595 ; GFX6-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
8596 ; GFX6-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
8597 ; GFX6-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
8598 ; GFX6-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
8599 ; GFX6-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:208
8600 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192
8601 ; GFX6-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176
8604 ; GFX6-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160
8610 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144
8611 ; GFX6-NEXT: s_waitcnt expcnt(0)
8616 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128
8617 ; GFX6-NEXT: s_waitcnt expcnt(0)
8622 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
8623 ; GFX6-NEXT: s_waitcnt expcnt(0)
8628 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
8629 ; GFX6-NEXT: s_waitcnt expcnt(0)
8634 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
8635 ; GFX6-NEXT: s_waitcnt expcnt(0)
8640 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64
8641 ; GFX6-NEXT: s_waitcnt expcnt(0)
8646 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48
8647 ; GFX6-NEXT: s_waitcnt expcnt(0)
8652 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32
8653 ; GFX6-NEXT: s_waitcnt expcnt(0)
8658 ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
8661 ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0
8665 ; GFX8: ; %bb.0:
8666 ; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
8668 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
8669 ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[10:11], 0x0
8670 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
8673 ; GFX8-NEXT: v_writelane_b32 v62, s0, 0
8695 ; GFX8-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
8698 ; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
8699 ; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
8700 ; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
8701 ; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
8702 ; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
8703 ; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
8704 ; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
8705 ; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
8706 ; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
8707 ; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
8708 ; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
8709 ; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
8710 ; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
8711 ; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
8712 ; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
8713 ; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
8714 ; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
8715 ; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
8716 ; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
8746 ; GFX8-NEXT: s_bfe_i64 vcc, s[52:53], 0x10000
8780 ; GFX8-NEXT: s_bfe_i64 s[30:31], s[2:3], 0x10000
8781 ; GFX8-NEXT: s_bfe_i64 s[2:3], s[48:49], 0x10000
8785 ; GFX8-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
8789 ; GFX8-NEXT: s_bfe_i64 s[50:51], s[4:5], 0x10000
8790 ; GFX8-NEXT: s_bfe_i64 s[4:5], s[6:7], 0x10000
8791 ; GFX8-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x10000
8792 ; GFX8-NEXT: v_readlane_b32 s2, v62, 0
8813 ; GFX8-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
8814 ; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
8815 ; GFX8-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
8816 ; GFX8-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
8817 ; GFX8-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
8818 ; GFX8-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
8819 ; GFX8-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
8820 ; GFX8-NEXT: s_bfe_i64 s[48:49], s[0:1], 0x10000
8821 ; GFX8-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
8822 ; GFX8-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
8823 ; GFX8-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
8824 ; GFX8-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
8825 ; GFX8-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
8826 ; GFX8-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
8827 ; GFX8-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
8828 ; GFX8-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
8829 ; GFX8-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
8830 ; GFX8-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
8831 ; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
8832 ; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
8833 ; GFX8-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
8834 ; GFX8-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
8835 ; GFX8-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
8836 ; GFX8-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
8837 ; GFX8-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
8838 ; GFX8-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
8839 ; GFX8-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
8840 ; GFX8-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
8841 ; GFX8-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
8842 ; GFX8-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
8843 ; GFX8-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
8844 ; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
8845 ; GFX8-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
8846 ; GFX8-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
8847 ; GFX8-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000
8848 ; GFX8-NEXT: s_bfe_i64 s[0:1], s[10:11], 0x10000
8849 ; GFX8-NEXT: s_bfe_i64 s[10:11], s[2:3], 0x10000
8850 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1f0
8851 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8854 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1e0
8855 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8858 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1d0
8859 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8862 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1c0
8863 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8866 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1b0
8867 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8870 ; GFX8-NEXT: s_add_u32 s2, s8, 0x1a0
8871 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8874 ; GFX8-NEXT: s_add_u32 s2, s8, 0x190
8875 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8878 ; GFX8-NEXT: s_add_u32 s2, s8, 0x180
8879 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8882 ; GFX8-NEXT: s_add_u32 s2, s8, 0x170
8883 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8886 ; GFX8-NEXT: s_add_u32 s2, s8, 0x160
8887 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8890 ; GFX8-NEXT: s_add_u32 s2, s8, 0x150
8891 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8893 ; GFX8-NEXT: flat_store_dwordx4 v[46:47], v[0:3]
8896 ; GFX8-NEXT: s_add_u32 s2, s8, 0x140
8897 ; GFX8-NEXT: s_addc_u32 s3, s9, 0
8899 ; GFX8-NEXT: s_add_u32 s0, s8, 0x130
8901 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8908 ; GFX8-NEXT: s_add_u32 s0, s8, 0x120
8909 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8912 ; GFX8-NEXT: s_add_u32 s0, s8, 0x110
8915 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8934 ; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
8938 ; GFX8-NEXT: s_add_u32 s0, s8, 0x100
8943 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8944 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8947 ; GFX8-NEXT: s_add_u32 s0, s8, 0xf0
8952 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8953 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8956 ; GFX8-NEXT: s_add_u32 s0, s8, 0xe0
8961 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8962 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8965 ; GFX8-NEXT: s_add_u32 s0, s8, 0xd0
8970 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8971 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8974 ; GFX8-NEXT: s_add_u32 s0, s8, 0xc0
8979 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8980 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8983 ; GFX8-NEXT: s_add_u32 s0, s8, 0xb0
8988 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8989 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
8992 ; GFX8-NEXT: s_add_u32 s0, s8, 0xa0
8997 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
8998 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9001 ; GFX8-NEXT: s_add_u32 s0, s8, 0x90
9006 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9007 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9010 ; GFX8-NEXT: s_add_u32 s0, s8, 0x80
9015 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9016 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9019 ; GFX8-NEXT: s_add_u32 s0, s8, 0x70
9024 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9025 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9028 ; GFX8-NEXT: s_add_u32 s0, s8, 0x60
9033 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9034 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9037 ; GFX8-NEXT: s_add_u32 s0, s8, 0x50
9042 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9043 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9051 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9052 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9060 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9061 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9069 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9070 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9078 ; GFX8-NEXT: s_addc_u32 s1, s9, 0
9079 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9087 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9095 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
9099 ; EG: ; %bb.0:
9100 ; EG-NEXT: ALU 22, @40, KC0[CB0:0-32], KC1[]
9101 ; EG-NEXT: TEX 0 @38
9102 ; EG-NEXT: ALU 89, @63, KC0[CB0:0-32], KC1[]
9104 ; EG-NEXT: ALU 107, @253, KC0[CB0:0-32], KC1[]
9105 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T82.X, 0
9106 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T80.XYZW, T81.X, 0
9107 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T73.X, 0
9108 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T79.XYZW, T48.X, 0
9109 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T47.X, 0
9110 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T78.XYZW, T46.X, 0
9111 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T45.X, 0
9112 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T77.XYZW, T44.X, 0
9113 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T43.X, 0
9114 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T76.XYZW, T42.X, 0
9115 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T55.XYZW, T41.X, 0
9116 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T75.XYZW, T39.X, 0
9117 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T38.X, 0
9118 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T74.XYZW, T37.X, 0
9119 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T36.X, 0
9120 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T35.X, 0
9121 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T34.X, 0
9122 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T72.XYZW, T33.X, 0
9123 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T32.X, 0
9124 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T71.XYZW, T31.X, 0
9125 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T30.X, 0
9126 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T70.XYZW, T29.X, 0
9127 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T28.X, 0
9128 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T69.XYZW, T27.X, 0
9129 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T26.X, 0
9130 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T68.XYZW, T25.X, 0
9131 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T24.X, 0
9132 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T23.X, 0
9133 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T22.X, 0
9134 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T67.XYZW, T21.X, 0
9135 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T20.X, 0
9139 ; EG-NEXT: VTX_READ_64 T40.XY, T26.X, 0, #1
9161 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
9163 ; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00)
9223 ; EG-NEXT: 384(5.380986e-43), 0(0.000000e+00)
9231 ; EG-NEXT: 400(5.605194e-43), 0(0.000000e+00)
9239 ; EG-NEXT: 416(5.829402e-43), 0(0.000000e+00)
9247 ; EG-NEXT: 432(6.053609e-43), 0(0.000000e+00)
9263 ; EG-NEXT: 20(2.802597e-44), 0(0.000000e+00)
9277 ; EG-NEXT: 26(3.643376e-44), 0(0.000000e+00)
9320 ; EG-NEXT: 30(4.203895e-44), 0(0.000000e+00)
9355 ; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
9358 ; EG-NEXT: 6(8.407791e-45), 0(0.000000e+00)
9364 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
9418 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
9454 ; EG-NEXT: 480(6.726233e-43), 0(0.000000e+00)
9464 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
9467 ; GFX12: ; %bb.0:
9468 ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
9469 ; GFX12-NEXT: s_wait_kmcnt 0x0
9470 ; GFX12-NEXT: s_load_b64 s[12:13], s[2:3], 0x0
9471 ; GFX12-NEXT: s_wait_kmcnt 0x0
9478 ; GFX12-NEXT: s_wait_alu 0xfffe
9479 ; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
9480 ; GFX12-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000
9483 ; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
9484 ; GFX12-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000
9485 ; GFX12-NEXT: s_wait_alu 0xfffe
9486 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s96
9491 ; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
9492 ; GFX12-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000
9502 ; GFX12-NEXT: s_wait_alu 0xfffe
9504 ; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
9505 ; GFX12-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
9509 ; GFX12-NEXT: s_wait_alu 0xfffe
9511 ; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
9512 ; GFX12-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000
9516 ; GFX12-NEXT: s_wait_alu 0xfffe
9518 ; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
9519 ; GFX12-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000
9520 ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
9521 ; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000
9522 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
9523 ; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000
9524 ; GFX12-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
9525 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
9529 ; GFX12-NEXT: s_wait_alu 0xfffe
9531 ; GFX12-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000
9532 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000
9541 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000
9542 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000
9543 ; GFX12-NEXT: s_clause 0x7
9544 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:496
9545 ; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:480
9546 ; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:464
9547 ; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:448
9548 ; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:432
9549 ; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:416
9550 ; GFX12-NEXT: global_store_b128 v0, v[25:28], s[0:1] offset:400
9551 ; GFX12-NEXT: global_store_b128 v0, v[29:32], s[0:1] offset:384
9559 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000
9560 ; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000
9565 ; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000
9566 ; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000
9571 ; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000
9572 ; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
9573 ; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000
9574 ; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000
9579 ; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000
9580 ; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000
9585 ; GFX12-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000
9586 ; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000
9589 ; GFX12-NEXT: s_clause 0x5
9590 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:368
9591 ; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:352
9592 ; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:336
9593 ; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:320
9594 ; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:304
9595 ; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:288
9603 ; GFX12-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000
9604 ; GFX12-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000
9609 ; GFX12-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000
9610 ; GFX12-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000
9615 ; GFX12-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000
9616 ; GFX12-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
9617 ; GFX12-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000
9618 ; GFX12-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000
9623 ; GFX12-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000
9624 ; GFX12-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000
9636 ; GFX12-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000
9637 ; GFX12-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000
9640 ; GFX12-NEXT: s_clause 0x5
9641 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:272
9642 ; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:256
9643 ; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:240
9644 ; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:224
9645 ; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:208
9646 ; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:192
9652 ; GFX12-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000
9653 ; GFX12-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000
9660 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[12:13], 0x10000
9661 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[26:27], 0x10000
9662 ; GFX12-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000
9663 ; GFX12-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000
9664 ; GFX12-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000
9665 ; GFX12-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000
9666 ; GFX12-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000
9667 ; GFX12-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000
9668 ; GFX12-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000
9669 ; GFX12-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000
9672 ; GFX12-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000
9673 ; GFX12-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000
9675 ; GFX12-NEXT: s_wait_alu 0xfffe
9677 ; GFX12-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000
9680 ; GFX12-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000
9683 ; GFX12-NEXT: s_clause 0x5
9684 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:176
9685 ; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:160
9686 ; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:144
9687 ; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:128
9688 ; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:112
9689 ; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1] offset:96
9692 ; GFX12-NEXT: s_wait_alu 0xfffe
9694 ; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000
9697 ; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
9699 ; GFX12-NEXT: s_wait_alu 0xfffe
9707 ; GFX12-NEXT: s_clause 0x5
9708 ; GFX12-NEXT: global_store_b128 v0, v[1:4], s[0:1] offset:80
9709 ; GFX12-NEXT: global_store_b128 v0, v[5:8], s[0:1] offset:64
9710 ; GFX12-NEXT: global_store_b128 v0, v[9:12], s[0:1] offset:48
9711 ; GFX12-NEXT: global_store_b128 v0, v[13:16], s[0:1] offset:32
9712 ; GFX12-NEXT: global_store_b128 v0, v[17:20], s[0:1] offset:16
9713 ; GFX12-NEXT: global_store_b128 v0, v[21:24], s[0:1]
9721 attributes #0 = { nounwind }