Lines Matching +full:0 +full:x3900
8 ; SI: ; %bb.0:
9 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; SI-NEXT: s_mov_b32 s6, 0
13 ; SI-NEXT: s_mov_b32 s7, 0xf000
16 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
17 ; SI-NEXT: s_waitcnt vmcnt(0)
18 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
19 ; SI-NEXT: s_waitcnt vmcnt(0)
20 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
21 ; SI-NEXT: s_waitcnt vmcnt(0)
22 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
23 ; SI-NEXT: s_waitcnt vmcnt(0)
24 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
25 ; SI-NEXT: s_waitcnt vmcnt(0)
26 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
27 ; SI-NEXT: s_waitcnt vmcnt(0)
28 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
29 ; SI-NEXT: s_waitcnt vmcnt(0)
30 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
31 ; SI-NEXT: s_waitcnt vmcnt(0)
41 ; SI-NEXT: s_mov_b64 vcc, 0
43 ; SI-NEXT: s_mov_b32 s6, 0
44 ; SI-NEXT: s_mov_b32 s7, 0xf000
47 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
48 ; SI-NEXT: s_waitcnt vmcnt(0)
49 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
50 ; SI-NEXT: s_waitcnt vmcnt(0)
51 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
52 ; SI-NEXT: s_waitcnt vmcnt(0)
53 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
54 ; SI-NEXT: s_waitcnt vmcnt(0)
55 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
56 ; SI-NEXT: s_waitcnt vmcnt(0)
57 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
58 ; SI-NEXT: s_waitcnt vmcnt(0)
59 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
60 ; SI-NEXT: s_waitcnt vmcnt(0)
61 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
62 ; SI-NEXT: s_waitcnt vmcnt(0)
69 ; SI-NEXT: v_bfe_i32 v1, v2, 0, 16
70 ; SI-NEXT: v_bfe_i32 v2, v3, 0, 16
71 ; SI-NEXT: v_mov_b32_e32 v3, 0xffff0000
73 ; SI-NEXT: v_mov_b32_e32 v5, 0xffff
74 ; SI-NEXT: v_mov_b32_e32 v6, 0x8000
75 ; SI-NEXT: v_mov_b32_e32 v7, 0xffff8000
84 ; SI-NEXT: v_and_b32_e32 v3, 0xffff, v1
90 ; GFX9: ; %bb.0:
91 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX9-NEXT: s_waitcnt vmcnt(0)
101 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
102 ; GFX9-NEXT: s_waitcnt vmcnt(0)
104 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0]
105 ; GFX9-NEXT: s_movk_i32 s4, 0x8000
106 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
108 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
109 ; GFX9-NEXT: v_or_b32_e32 v2, 0xffff8000, v0
111 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
117 ; GFX11: ; %bb.0:
118 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GFX11-NEXT: s_waitcnt vmcnt(0)
128 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
129 ; GFX11-NEXT: s_waitcnt vmcnt(0)
131 ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
132 ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0]
136 ; GFX11-NEXT: v_or_b32_e32 v1, 0xffff8000, v1
137 ; GFX11-NEXT: v_or_b32_e32 v0, 0xffff8000, v0
139 ; GFX11-NEXT: v_or_b32_e32 v2, 0xffff8000, v2
140 ; GFX11-NEXT: v_or_b32_e32 v3, 0xffff8000, v3
142 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
143 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
157 %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
165 ; SI: ; %bb.0:
166 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169 ; SI-NEXT: s_mov_b32 s6, 0
170 ; SI-NEXT: s_mov_b32 s7, 0xf000
173 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
174 ; SI-NEXT: s_waitcnt vmcnt(0)
175 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:2 glc
176 ; SI-NEXT: s_waitcnt vmcnt(0)
177 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:4 glc
178 ; SI-NEXT: s_waitcnt vmcnt(0)
179 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:6 glc
180 ; SI-NEXT: s_waitcnt vmcnt(0)
181 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:8 glc
182 ; SI-NEXT: s_waitcnt vmcnt(0)
183 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:10 glc
184 ; SI-NEXT: s_waitcnt vmcnt(0)
185 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:12 glc
186 ; SI-NEXT: s_waitcnt vmcnt(0)
187 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
188 ; SI-NEXT: s_waitcnt vmcnt(0)
198 ; SI-NEXT: s_mov_b64 vcc, 0
200 ; SI-NEXT: s_mov_b32 s6, 0
201 ; SI-NEXT: s_mov_b32 s7, 0xf000
204 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
205 ; SI-NEXT: s_waitcnt vmcnt(0)
206 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
207 ; SI-NEXT: s_waitcnt vmcnt(0)
208 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:4 glc
209 ; SI-NEXT: s_waitcnt vmcnt(0)
210 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:6 glc
211 ; SI-NEXT: s_waitcnt vmcnt(0)
212 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:8 glc
213 ; SI-NEXT: s_waitcnt vmcnt(0)
214 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:10 glc
215 ; SI-NEXT: s_waitcnt vmcnt(0)
216 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:12 glc
217 ; SI-NEXT: s_waitcnt vmcnt(0)
218 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
219 ; SI-NEXT: s_waitcnt vmcnt(0)
226 ; SI-NEXT: v_ashr_i64 v[0:1], v[4:5], 48
227 ; SI-NEXT: v_bfe_i32 v1, v4, 0, 16
228 ; SI-NEXT: v_bfe_i32 v3, v5, 0, 16
229 ; SI-NEXT: v_mov_b32_e32 v4, 0xffff0000
231 ; SI-NEXT: v_mov_b32_e32 v6, 0xffff
232 ; SI-NEXT: v_mov_b32_e32 v7, 0x8000
248 ; GFX9: ; %bb.0:
249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX9-NEXT: s_waitcnt vmcnt(0)
259 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
260 ; GFX9-NEXT: s_waitcnt vmcnt(0)
262 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1]
263 ; GFX9-NEXT: s_movk_i32 s4, 0x8000
264 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
266 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
267 ; GFX9-NEXT: v_or_b32_e32 v3, 0xffff8000, v0
269 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
275 ; GFX11: ; %bb.0:
276 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX11-NEXT: s_waitcnt vmcnt(0)
286 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
287 ; GFX11-NEXT: s_waitcnt vmcnt(0)
289 ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
290 ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1]
294 ; GFX11-NEXT: v_or_b32_e32 v1, 0xffff8000, v1
295 ; GFX11-NEXT: v_or_b32_e32 v0, 0xffff8000, v0
297 ; GFX11-NEXT: v_or_b32_e32 v2, 0xffff8000, v2
298 ; GFX11-NEXT: v_or_b32_e32 v3, 0xffff8000, v3
300 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
301 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
323 ; SI: ; %bb.0:
324 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327 ; SI-NEXT: s_mov_b32 s6, 0
328 ; SI-NEXT: s_mov_b32 s7, 0xf000
331 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
332 ; SI-NEXT: s_waitcnt vmcnt(0)
333 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
334 ; SI-NEXT: s_waitcnt vmcnt(0)
335 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
336 ; SI-NEXT: s_waitcnt vmcnt(0)
337 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
338 ; SI-NEXT: s_waitcnt vmcnt(0)
339 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
340 ; SI-NEXT: s_waitcnt vmcnt(0)
341 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
342 ; SI-NEXT: s_waitcnt vmcnt(0)
343 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
344 ; SI-NEXT: s_waitcnt vmcnt(0)
345 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
346 ; SI-NEXT: s_waitcnt vmcnt(0)
361 ; SI-NEXT: s_mov_b64 vcc, 0
363 ; SI-NEXT: s_mov_b32 s6, 0
364 ; SI-NEXT: s_mov_b32 s7, 0xf000
367 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
368 ; SI-NEXT: s_waitcnt vmcnt(0)
369 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
370 ; SI-NEXT: s_waitcnt vmcnt(0)
371 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
372 ; SI-NEXT: s_waitcnt vmcnt(0)
373 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
374 ; SI-NEXT: s_waitcnt vmcnt(0)
375 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
376 ; SI-NEXT: s_waitcnt vmcnt(0)
377 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
378 ; SI-NEXT: s_waitcnt vmcnt(0)
379 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
380 ; SI-NEXT: s_waitcnt vmcnt(0)
381 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
382 ; SI-NEXT: s_waitcnt vmcnt(0)
394 ; SI-NEXT: v_mov_b32_e32 v3, 0x3fa00000
395 ; SI-NEXT: v_mov_b32_e32 v4, 0x3f200000
409 ; GFX9: ; %bb.0:
410 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414 ; GFX9-NEXT: s_waitcnt vmcnt(0)
420 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
421 ; GFX9-NEXT: s_waitcnt vmcnt(0)
423 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
424 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
426 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3800
439 ; GFX11: ; %bb.0:
440 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX11-NEXT: s_waitcnt vmcnt(0)
450 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
451 ; GFX11-NEXT: s_waitcnt vmcnt(0)
453 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00
457 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x3900, v0, vcc_lo
459 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x3900 :: v_dual_cndmask_b32 v1, 0x3900, v0
462 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3d00, v4, vcc_lo
464 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x3900, v0, vcc_lo
480 %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
481 %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
482 …2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, …
489 ; SI: ; %bb.0:
490 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493 ; SI-NEXT: s_mov_b32 s6, 0
494 ; SI-NEXT: s_mov_b32 s7, 0xf000
497 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
498 ; SI-NEXT: s_waitcnt vmcnt(0)
499 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
500 ; SI-NEXT: s_waitcnt vmcnt(0)
501 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
502 ; SI-NEXT: s_waitcnt vmcnt(0)
503 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
504 ; SI-NEXT: s_waitcnt vmcnt(0)
505 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
506 ; SI-NEXT: s_waitcnt vmcnt(0)
507 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
508 ; SI-NEXT: s_waitcnt vmcnt(0)
509 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
510 ; SI-NEXT: s_waitcnt vmcnt(0)
511 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:14 glc
512 ; SI-NEXT: s_waitcnt vmcnt(0)
513 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:16 glc
514 ; SI-NEXT: s_waitcnt vmcnt(0)
515 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:18 glc
516 ; SI-NEXT: s_waitcnt vmcnt(0)
517 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:20 glc
518 ; SI-NEXT: s_waitcnt vmcnt(0)
519 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:22 glc
520 ; SI-NEXT: s_waitcnt vmcnt(0)
521 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:24 glc
522 ; SI-NEXT: s_waitcnt vmcnt(0)
523 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:26 glc
524 ; SI-NEXT: s_waitcnt vmcnt(0)
525 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:28 glc
526 ; SI-NEXT: s_waitcnt vmcnt(0)
527 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:30 glc
528 ; SI-NEXT: s_waitcnt vmcnt(0)
538 ; SI-NEXT: s_mov_b64 vcc, 0
540 ; SI-NEXT: s_mov_b32 s6, 0
541 ; SI-NEXT: s_mov_b32 s7, 0xf000
544 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
545 ; SI-NEXT: s_waitcnt vmcnt(0)
546 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
547 ; SI-NEXT: s_waitcnt vmcnt(0)
548 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
549 ; SI-NEXT: s_waitcnt vmcnt(0)
550 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
551 ; SI-NEXT: s_waitcnt vmcnt(0)
552 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
553 ; SI-NEXT: s_waitcnt vmcnt(0)
554 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
555 ; SI-NEXT: s_waitcnt vmcnt(0)
556 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
557 ; SI-NEXT: s_waitcnt vmcnt(0)
558 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:14 glc
559 ; SI-NEXT: s_waitcnt vmcnt(0)
560 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:16 glc
561 ; SI-NEXT: s_waitcnt vmcnt(0)
562 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:18 glc
563 ; SI-NEXT: s_waitcnt vmcnt(0)
564 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:20 glc
565 ; SI-NEXT: s_waitcnt vmcnt(0)
566 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:22 glc
567 ; SI-NEXT: s_waitcnt vmcnt(0)
568 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:24 glc
569 ; SI-NEXT: s_waitcnt vmcnt(0)
570 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:26 glc
571 ; SI-NEXT: s_waitcnt vmcnt(0)
572 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:28 glc
573 ; SI-NEXT: s_waitcnt vmcnt(0)
574 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:30 glc
575 ; SI-NEXT: s_waitcnt vmcnt(0)
582 ; SI-NEXT: v_bfe_i32 v1, v2, 0, 16
583 ; SI-NEXT: v_bfe_i32 v2, v3, 0, 16
584 ; SI-NEXT: v_mov_b32_e32 v3, 0xffff0000
586 ; SI-NEXT: v_mov_b32_e32 v5, 0xffff
587 ; SI-NEXT: v_mov_b32_e32 v6, 0x8000
588 ; SI-NEXT: v_mov_b32_e32 v7, 0xffff8000
597 ; SI-NEXT: v_and_b32_e32 v3, 0xffff, v1
603 ; GFX9: ; %bb.0:
604 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608 ; GFX9-NEXT: s_waitcnt vmcnt(0)
610 ; GFX9-NEXT: s_waitcnt vmcnt(0)
617 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
618 ; GFX9-NEXT: s_waitcnt vmcnt(0)
619 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc
620 ; GFX9-NEXT: s_waitcnt vmcnt(0)
623 ; GFX9-NEXT: s_waitcnt vmcnt(0)
624 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,0]
625 ; GFX9-NEXT: s_movk_i32 s4, 0x8000
626 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
628 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
629 ; GFX9-NEXT: v_or_b32_e32 v3, 0xffff8000, v0
631 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
637 ; GFX11: ; %bb.0:
638 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642 ; GFX11-NEXT: s_waitcnt vmcnt(0)
644 ; GFX11-NEXT: s_waitcnt vmcnt(0)
650 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
651 ; GFX11-NEXT: s_waitcnt vmcnt(0)
652 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
653 ; GFX11-NEXT: s_waitcnt vmcnt(0)
655 ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
656 ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0]
660 ; GFX11-NEXT: v_or_b32_e32 v1, 0xffff8000, v1
661 ; GFX11-NEXT: v_or_b32_e32 v0, 0xffff8000, v0
663 ; GFX11-NEXT: v_or_b32_e32 v2, 0xffff8000, v2
664 ; GFX11-NEXT: v_or_b32_e32 v3, 0xffff8000, v3
666 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
667 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
681 %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
690 ; SI: ; %bb.0:
691 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694 ; SI-NEXT: s_mov_b32 s6, 0
695 ; SI-NEXT: s_mov_b32 s7, 0xf000
698 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
699 ; SI-NEXT: s_waitcnt vmcnt(0)
700 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:2 glc
701 ; SI-NEXT: s_waitcnt vmcnt(0)
702 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:4 glc
703 ; SI-NEXT: s_waitcnt vmcnt(0)
704 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:6 glc
705 ; SI-NEXT: s_waitcnt vmcnt(0)
706 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:8 glc
707 ; SI-NEXT: s_waitcnt vmcnt(0)
708 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:10 glc
709 ; SI-NEXT: s_waitcnt vmcnt(0)
710 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:12 glc
711 ; SI-NEXT: s_waitcnt vmcnt(0)
712 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:14 glc
713 ; SI-NEXT: s_waitcnt vmcnt(0)
714 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:16 glc
715 ; SI-NEXT: s_waitcnt vmcnt(0)
716 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:18 glc
717 ; SI-NEXT: s_waitcnt vmcnt(0)
718 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:20 glc
719 ; SI-NEXT: s_waitcnt vmcnt(0)
720 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:22 glc
721 ; SI-NEXT: s_waitcnt vmcnt(0)
722 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:24 glc
723 ; SI-NEXT: s_waitcnt vmcnt(0)
724 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:26 glc
725 ; SI-NEXT: s_waitcnt vmcnt(0)
726 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:28 glc
727 ; SI-NEXT: s_waitcnt vmcnt(0)
728 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:30 glc
729 ; SI-NEXT: s_waitcnt vmcnt(0)
739 ; SI-NEXT: s_mov_b64 vcc, 0
741 ; SI-NEXT: s_mov_b32 s6, 0
742 ; SI-NEXT: s_mov_b32 s7, 0xf000
745 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
746 ; SI-NEXT: s_waitcnt vmcnt(0)
747 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
748 ; SI-NEXT: s_waitcnt vmcnt(0)
749 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:4 glc
750 ; SI-NEXT: s_waitcnt vmcnt(0)
751 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:6 glc
752 ; SI-NEXT: s_waitcnt vmcnt(0)
753 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:8 glc
754 ; SI-NEXT: s_waitcnt vmcnt(0)
755 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:10 glc
756 ; SI-NEXT: s_waitcnt vmcnt(0)
757 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:12 glc
758 ; SI-NEXT: s_waitcnt vmcnt(0)
759 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:14 glc
760 ; SI-NEXT: s_waitcnt vmcnt(0)
761 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:16 glc
762 ; SI-NEXT: s_waitcnt vmcnt(0)
763 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:18 glc
764 ; SI-NEXT: s_waitcnt vmcnt(0)
765 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:20 glc
766 ; SI-NEXT: s_waitcnt vmcnt(0)
767 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:22 glc
768 ; SI-NEXT: s_waitcnt vmcnt(0)
769 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:24 glc
770 ; SI-NEXT: s_waitcnt vmcnt(0)
771 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:26 glc
772 ; SI-NEXT: s_waitcnt vmcnt(0)
773 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:28 glc
774 ; SI-NEXT: s_waitcnt vmcnt(0)
775 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:30 glc
776 ; SI-NEXT: s_waitcnt vmcnt(0)
783 ; SI-NEXT: v_ashr_i64 v[0:1], v[4:5], 48
784 ; SI-NEXT: v_bfe_i32 v1, v4, 0, 16
785 ; SI-NEXT: v_bfe_i32 v3, v5, 0, 16
786 ; SI-NEXT: v_mov_b32_e32 v4, 0xffff0000
788 ; SI-NEXT: v_mov_b32_e32 v6, 0xffff
789 ; SI-NEXT: v_mov_b32_e32 v7, 0x8000
805 ; GFX9: ; %bb.0:
806 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810 ; GFX9-NEXT: s_waitcnt vmcnt(0)
812 ; GFX9-NEXT: s_waitcnt vmcnt(0)
819 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
820 ; GFX9-NEXT: s_waitcnt vmcnt(0)
821 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc
822 ; GFX9-NEXT: s_waitcnt vmcnt(0)
825 ; GFX9-NEXT: s_waitcnt vmcnt(0)
826 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v7 op_sel_hi:[0,1]
827 ; GFX9-NEXT: s_movk_i32 s4, 0x8000
828 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
830 ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v6 op_sel_hi:[0,1]
831 ; GFX9-NEXT: v_or_b32_e32 v3, 0xffff8000, v0
833 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
839 ; GFX11: ; %bb.0:
840 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
844 ; GFX11-NEXT: s_waitcnt vmcnt(0)
846 ; GFX11-NEXT: s_waitcnt vmcnt(0)
852 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
853 ; GFX11-NEXT: s_waitcnt vmcnt(0)
854 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
855 ; GFX11-NEXT: s_waitcnt vmcnt(0)
857 ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
858 ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1]
862 ; GFX11-NEXT: v_or_b32_e32 v1, 0xffff8000, v1
863 ; GFX11-NEXT: v_or_b32_e32 v0, 0xffff8000, v0
865 ; GFX11-NEXT: v_or_b32_e32 v2, 0xffff8000, v2
866 ; GFX11-NEXT: v_or_b32_e32 v3, 0xffff8000, v3
868 ; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
869 ; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
892 ; SI: ; %bb.0:
893 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896 ; SI-NEXT: s_mov_b32 s6, 0
897 ; SI-NEXT: s_mov_b32 s7, 0xf000
900 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
901 ; SI-NEXT: s_waitcnt vmcnt(0)
902 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
903 ; SI-NEXT: s_waitcnt vmcnt(0)
904 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
905 ; SI-NEXT: s_waitcnt vmcnt(0)
906 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
907 ; SI-NEXT: s_waitcnt vmcnt(0)
908 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
909 ; SI-NEXT: s_waitcnt vmcnt(0)
910 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
911 ; SI-NEXT: s_waitcnt vmcnt(0)
912 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
913 ; SI-NEXT: s_waitcnt vmcnt(0)
914 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:14 glc
915 ; SI-NEXT: s_waitcnt vmcnt(0)
916 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:16 glc
917 ; SI-NEXT: s_waitcnt vmcnt(0)
918 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:18 glc
919 ; SI-NEXT: s_waitcnt vmcnt(0)
920 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:20 glc
921 ; SI-NEXT: s_waitcnt vmcnt(0)
922 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:22 glc
923 ; SI-NEXT: s_waitcnt vmcnt(0)
924 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:24 glc
925 ; SI-NEXT: s_waitcnt vmcnt(0)
926 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:26 glc
927 ; SI-NEXT: s_waitcnt vmcnt(0)
928 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:28 glc
929 ; SI-NEXT: s_waitcnt vmcnt(0)
930 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:30 glc
931 ; SI-NEXT: s_waitcnt vmcnt(0)
946 ; SI-NEXT: s_mov_b64 vcc, 0
948 ; SI-NEXT: s_mov_b32 s6, 0
949 ; SI-NEXT: s_mov_b32 s7, 0xf000
952 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
953 ; SI-NEXT: s_waitcnt vmcnt(0)
954 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
955 ; SI-NEXT: s_waitcnt vmcnt(0)
956 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
957 ; SI-NEXT: s_waitcnt vmcnt(0)
958 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
959 ; SI-NEXT: s_waitcnt vmcnt(0)
960 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
961 ; SI-NEXT: s_waitcnt vmcnt(0)
962 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
963 ; SI-NEXT: s_waitcnt vmcnt(0)
964 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
965 ; SI-NEXT: s_waitcnt vmcnt(0)
966 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:14 glc
967 ; SI-NEXT: s_waitcnt vmcnt(0)
968 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:16 glc
969 ; SI-NEXT: s_waitcnt vmcnt(0)
970 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:18 glc
971 ; SI-NEXT: s_waitcnt vmcnt(0)
972 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:20 glc
973 ; SI-NEXT: s_waitcnt vmcnt(0)
974 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:22 glc
975 ; SI-NEXT: s_waitcnt vmcnt(0)
976 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:24 glc
977 ; SI-NEXT: s_waitcnt vmcnt(0)
978 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:26 glc
979 ; SI-NEXT: s_waitcnt vmcnt(0)
980 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:28 glc
981 ; SI-NEXT: s_waitcnt vmcnt(0)
982 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:30 glc
983 ; SI-NEXT: s_waitcnt vmcnt(0)
995 ; SI-NEXT: v_mov_b32_e32 v3, 0x3fa00000
996 ; SI-NEXT: v_mov_b32_e32 v4, 0x3f200000
1010 ; GFX9: ; %bb.0:
1011 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1017 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1024 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
1025 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1026 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc
1027 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1030 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
1031 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
1032 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1034 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x3800
1047 ; GFX11: ; %bb.0:
1048 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1052 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1054 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1060 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
1061 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1062 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
1063 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1065 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00
1069 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x3900, v0, vcc_lo
1071 ; GFX11-NEXT: v_dual_mov_b32 v4, 0x3900 :: v_dual_cndmask_b32 v1, 0x3900, v0
1074 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3d00, v4, vcc_lo
1076 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x3900, v0, vcc_lo
1092 %v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
1093 %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
1094 …2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, …
1100 ; SI: ; %bb.0:
1101 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1123 ; GFX9: ; %bb.0:
1124 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126 ; GFX9-NEXT: ds_read2_b32 v[0:1], v2 offset1:1
1128 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1132 ; GFX11: ; %bb.0:
1133 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135 ; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v2 offset1:1
1137 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1141 %i.0 = or disjoint i32 %idx, 0
1142 %p.0 = getelementptr half, ptr addrspace(3) %p, i32 %i.0
1143 %x.0 = load i16, ptr addrspace(3) %p.0, align 4
1144 %v0p = insertelement <8 x i16> poison, i16 %x.0, i32 0
1153 %v1p = insertelement <8 x i16> poison, i16 %x.2, i32 0
1162 %v2p = insertelement <8 x i16> poison, i16 %x.4, i32 0
1171 %v3p = insertelement <8 x i16> poison, i16 %x.6, i32 0
1177 …%z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 unde…
1178 …%z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, …
1179 …%z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, …
1185 ; SI: ; %bb.0:
1186 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1187 ; SI-NEXT: buffer_load_ubyte v4, off, s[0:3], s32
1188 ; SI-NEXT: s_waitcnt vmcnt(0)
1190 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
1192 ; SI-NEXT: s_mov_b32 s38, 0
1195 ; SI-NEXT: s_mov_b32 s39, 0xf000
1198 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[36:39], 0 addr64 glc
1199 ; SI-NEXT: s_waitcnt vmcnt(0)
1200 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[36:39], 0 addr64 offset:2 glc
1201 ; SI-NEXT: s_waitcnt vmcnt(0)
1202 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[36:39], 0 addr64 offset:4 glc
1203 ; SI-NEXT: s_waitcnt vmcnt(0)
1204 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[36:39], 0 addr64 offset:6 glc
1205 ; SI-NEXT: s_waitcnt vmcnt(0)
1206 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[36:39], 0 addr64 offset:8 glc
1207 ; SI-NEXT: s_waitcnt vmcnt(0)
1208 ; SI-NEXT: buffer_load_ushort v9, v[2:3], s[36:39], 0 addr64 offset:10 glc
1209 ; SI-NEXT: s_waitcnt vmcnt(0)
1210 ; SI-NEXT: buffer_load_ushort v10, v[2:3], s[36:39], 0 addr64 offset:12 glc
1211 ; SI-NEXT: s_waitcnt vmcnt(0)
1212 ; SI-NEXT: buffer_load_ushort v11, v[2:3], s[36:39], 0 addr64 offset:14 glc
1213 ; SI-NEXT: s_waitcnt vmcnt(0)
1214 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:16 glc
1215 ; SI-NEXT: s_waitcnt vmcnt(0)
1216 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:18 glc
1217 ; SI-NEXT: s_waitcnt vmcnt(0)
1218 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:20 glc
1219 ; SI-NEXT: s_waitcnt vmcnt(0)
1220 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:22 glc
1221 ; SI-NEXT: s_waitcnt vmcnt(0)
1222 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:24 glc
1223 ; SI-NEXT: s_waitcnt vmcnt(0)
1224 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:26 glc
1225 ; SI-NEXT: s_waitcnt vmcnt(0)
1226 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:28 glc
1227 ; SI-NEXT: s_waitcnt vmcnt(0)
1228 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[36:39], 0 addr64 offset:30 glc
1229 ; SI-NEXT: s_waitcnt vmcnt(0)
1243 ; SI-NEXT: s_mov_b64 vcc, 0
1245 ; SI-NEXT: s_mov_b32 s39, 0xf000
1248 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[36:39], 0 addr64 glc
1249 ; SI-NEXT: s_waitcnt vmcnt(0)
1250 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[36:39], 0 addr64 offset:2 glc
1251 ; SI-NEXT: s_waitcnt vmcnt(0)
1252 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[36:39], 0 addr64 offset:4 glc
1253 ; SI-NEXT: s_waitcnt vmcnt(0)
1254 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[36:39], 0 addr64 offset:6 glc
1255 ; SI-NEXT: s_waitcnt vmcnt(0)
1256 ; SI-NEXT: buffer_load_ushort v7, v[0:1], s[36:39], 0 addr64 offset:8 glc
1257 ; SI-NEXT: s_waitcnt vmcnt(0)
1258 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[36:39], 0 addr64 offset:10 glc
1259 ; SI-NEXT: s_waitcnt vmcnt(0)
1260 ; SI-NEXT: buffer_load_ushort v8, v[0:1], s[36:39], 0 addr64 offset:12 glc
1261 ; SI-NEXT: s_waitcnt vmcnt(0)
1262 ; SI-NEXT: buffer_load_ushort v9, v[0:1], s[36:39], 0 addr64 offset:14 glc
1263 ; SI-NEXT: s_waitcnt vmcnt(0)
1264 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:16 glc
1265 ; SI-NEXT: s_waitcnt vmcnt(0)
1266 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:18 glc
1267 ; SI-NEXT: s_waitcnt vmcnt(0)
1268 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:20 glc
1269 ; SI-NEXT: s_waitcnt vmcnt(0)
1270 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:22 glc
1271 ; SI-NEXT: s_waitcnt vmcnt(0)
1272 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:24 glc
1273 ; SI-NEXT: s_waitcnt vmcnt(0)
1274 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:26 glc
1275 ; SI-NEXT: s_waitcnt vmcnt(0)
1276 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:28 glc
1277 ; SI-NEXT: s_waitcnt vmcnt(0)
1278 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[36:39], 0 addr64 offset:30 glc
1279 ; SI-NEXT: s_waitcnt vmcnt(0)
1290 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v2
1292 ; SI-NEXT: v_and_b32_e32 v4, 0xffff, v4
1294 ; SI-NEXT: v_and_b32_e32 v5, 0xffff, v5
1296 ; SI-NEXT: v_and_b32_e32 v3, 0xffff, v3
1297 ; SI-NEXT: s_movk_i32 s34, 0x3800
1298 ; SI-NEXT: v_mov_b32_e32 v8, 0x3d000000
1299 ; SI-NEXT: v_mov_b32_e32 v9, 0x39000000
1300 ; SI-NEXT: v_mov_b32_e32 v10, 0x3d00
1301 ; SI-NEXT: v_mov_b32_e32 v11, 0x3900
1329 ; GFX9: ; %bb.0:
1330 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331 ; GFX9-NEXT: buffer_load_ubyte v4, off, s[0:3], s32
1332 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1334 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
1339 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1341 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1348 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
1349 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1350 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc
1351 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1354 ; GFX9-NEXT: s_movk_i32 s35, 0x3801
1355 ; GFX9-NEXT: s_movk_i32 s34, 0x3800
1356 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
1357 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
1358 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1375 ; GFX9-NEXT: s_mov_b32 s34, 0x5040100
1383 ; GFX11: ; %bb.0:
1384 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386 ; GFX11-NEXT: s_mov_b32 s0, 0
1387 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1390 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
1395 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1397 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1404 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
1405 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1406 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
1407 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1409 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5
1410 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900
1411 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00
1416 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x3900, v1, vcc_lo
1417 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v4
1418 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3900, v1, vcc_lo
1419 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v7
1420 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x3900, v1, vcc_lo
1421 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v3
1422 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x3900, v1, vcc_lo
1423 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v2
1424 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x3900, v1, vcc_lo
1425 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v0
1426 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3900, v1, vcc_lo
1427 ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v6
1429 ; GFX11-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
1430 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3900, v1, vcc_lo
1431 ; GFX11-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x3800, v8
1432 ; GFX11-NEXT: v_perm_b32 v2, v7, v4, 0x5040100
1433 ; GFX11-NEXT: v_perm_b32 v1, v1, v3, 0x5040100
1434 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x3d00, v9, vcc_lo
1436 ; GFX11-NEXT: v_perm_b32 v3, v6, v5, 0x5040100
1450 …%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4,…
1458 ; SI: ; %bb.0:
1459 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460 ; SI-NEXT: buffer_load_ubyte v4, off, s[0:3], s32
1461 ; SI-NEXT: s_waitcnt vmcnt(0)
1463 ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
1465 ; SI-NEXT: s_mov_b32 s38, 0
1468 ; SI-NEXT: s_mov_b32 s39, 0xf000
1471 ; SI-NEXT: buffer_load_ushort v5, v[2:3], s[36:39], 0 addr64 glc
1472 ; SI-NEXT: s_waitcnt vmcnt(0)
1473 ; SI-NEXT: buffer_load_ushort v6, v[2:3], s[36:39], 0 addr64 offset:2 glc
1474 ; SI-NEXT: s_waitcnt vmcnt(0)
1475 ; SI-NEXT: buffer_load_ushort v7, v[2:3], s[36:39], 0 addr64 offset:4 glc
1476 ; SI-NEXT: s_waitcnt vmcnt(0)
1477 ; SI-NEXT: buffer_load_ushort v4, v[2:3], s[36:39], 0 addr64 offset:6 glc
1478 ; SI-NEXT: s_waitcnt vmcnt(0)
1479 ; SI-NEXT: buffer_load_ushort v8, v[2:3], s[36:39], 0 addr64 offset:8 glc
1480 ; SI-NEXT: s_waitcnt vmcnt(0)
1481 ; SI-NEXT: buffer_load_ushort v9, v[2:3], s[36:39], 0 addr64 offset:10 glc
1482 ; SI-NEXT: s_waitcnt vmcnt(0)
1483 ; SI-NEXT: buffer_load_ushort v10, v[2:3], s[36:39], 0 addr64 offset:12 glc
1484 ; SI-NEXT: s_waitcnt vmcnt(0)
1485 ; SI-NEXT: buffer_load_ushort v11, v[2:3], s[36:39], 0 addr64 offset:14 glc
1486 ; SI-NEXT: s_waitcnt vmcnt(0)
1487 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:16 glc
1488 ; SI-NEXT: s_waitcnt vmcnt(0)
1489 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:18 glc
1490 ; SI-NEXT: s_waitcnt vmcnt(0)
1491 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:20 glc
1492 ; SI-NEXT: s_waitcnt vmcnt(0)
1493 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:22 glc
1494 ; SI-NEXT: s_waitcnt vmcnt(0)
1495 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:24 glc
1496 ; SI-NEXT: s_waitcnt vmcnt(0)
1497 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:26 glc
1498 ; SI-NEXT: s_waitcnt vmcnt(0)
1499 ; SI-NEXT: buffer_load_ushort v12, v[2:3], s[36:39], 0 addr64 offset:28 glc
1500 ; SI-NEXT: s_waitcnt vmcnt(0)
1501 ; SI-NEXT: buffer_load_ushort v2, v[2:3], s[36:39], 0 addr64 offset:30 glc
1502 ; SI-NEXT: s_waitcnt vmcnt(0)
1531 ; SI-NEXT: s_mov_b64 vcc, 0
1533 ; SI-NEXT: s_mov_b32 s39, 0xf000
1536 ; SI-NEXT: buffer_load_ushort v5, v[0:1], s[36:39], 0 addr64 glc
1537 ; SI-NEXT: s_waitcnt vmcnt(0)
1538 ; SI-NEXT: buffer_load_ushort v6, v[0:1], s[36:39], 0 addr64 offset:2 glc
1539 ; SI-NEXT: s_waitcnt vmcnt(0)
1540 ; SI-NEXT: buffer_load_ushort v7, v[0:1], s[36:39], 0 addr64 offset:4 glc
1541 ; SI-NEXT: s_waitcnt vmcnt(0)
1542 ; SI-NEXT: buffer_load_ushort v4, v[0:1], s[36:39], 0 addr64 offset:6 glc
1543 ; SI-NEXT: s_waitcnt vmcnt(0)
1544 ; SI-NEXT: buffer_load_ushort v8, v[0:1], s[36:39], 0 addr64 offset:8 glc
1545 ; SI-NEXT: s_waitcnt vmcnt(0)
1546 ; SI-NEXT: buffer_load_ushort v3, v[0:1], s[36:39], 0 addr64 offset:10 glc
1547 ; SI-NEXT: s_waitcnt vmcnt(0)
1548 ; SI-NEXT: buffer_load_ushort v9, v[0:1], s[36:39], 0 addr64 offset:12 glc
1549 ; SI-NEXT: s_waitcnt vmcnt(0)
1550 ; SI-NEXT: buffer_load_ushort v2, v[0:1], s[36:39], 0 addr64 offset:14 glc
1551 ; SI-NEXT: s_waitcnt vmcnt(0)
1552 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:16 glc
1553 ; SI-NEXT: s_waitcnt vmcnt(0)
1554 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:18 glc
1555 ; SI-NEXT: s_waitcnt vmcnt(0)
1556 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:20 glc
1557 ; SI-NEXT: s_waitcnt vmcnt(0)
1558 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:22 glc
1559 ; SI-NEXT: s_waitcnt vmcnt(0)
1560 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:24 glc
1561 ; SI-NEXT: s_waitcnt vmcnt(0)
1562 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:26 glc
1563 ; SI-NEXT: s_waitcnt vmcnt(0)
1564 ; SI-NEXT: buffer_load_ushort v10, v[0:1], s[36:39], 0 addr64 offset:28 glc
1565 ; SI-NEXT: s_waitcnt vmcnt(0)
1566 ; SI-NEXT: buffer_load_ushort v0, v[0:1], s[36:39], 0 addr64 offset:30 glc
1567 ; SI-NEXT: s_waitcnt vmcnt(0)
1593 ; SI-NEXT: v_mov_b32_e32 v8, 0x3fa00000
1594 ; SI-NEXT: v_mov_b32_e32 v9, 0x3f200000
1622 ; GFX9: ; %bb.0:
1623 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1624 ; GFX9-NEXT: buffer_load_ubyte v4, off, s[0:3], s32
1625 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1627 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
1632 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1634 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1641 ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 glc
1642 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1643 ; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off glc
1644 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1647 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3800
1648 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3900
1649 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x3d00
1650 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1674 ; GFX11: ; %bb.0:
1675 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677 ; GFX11-NEXT: s_mov_b32 s0, 0
1678 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1681 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
1686 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1688 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1695 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off offset:16 glc dlc
1696 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1697 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
1698 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1701 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900
1702 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00
1707 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x3900, v1, vcc_lo
1709 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x3900, v1, vcc_lo
1711 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x3900, v1, vcc_lo
1713 ; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x3900, v1, vcc_lo
1715 ; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x3900, v1, vcc_lo
1717 ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3900, v1, vcc_lo
1721 ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x3900, v1, vcc_lo
1725 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x3d00, v9, vcc_lo
1741 …%v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 …
1742 … ugt <8 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0…
1743 …0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half …