xref: /llvm-project/llvm/test/CodeGen/AMDGPU/srem.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TAHITI
4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s --check-prefixes=TONGA
5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefixes=EG
6
7define amdgpu_kernel void @srem_i16_7(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8; GCN-LABEL: srem_i16_7:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
11; GCN-NEXT:    v_mov_b32_e32 v0, 0
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    global_load_ushort v1, v0, s[2:3]
14; GCN-NEXT:    s_waitcnt vmcnt(0)
15; GCN-NEXT:    v_readfirstlane_b32 s2, v1
16; GCN-NEXT:    s_sext_i32_i16 s2, s2
17; GCN-NEXT:    s_mulk_i32 s2, 0x4925
18; GCN-NEXT:    s_lshr_b32 s3, s2, 31
19; GCN-NEXT:    s_ashr_i32 s2, s2, 17
20; GCN-NEXT:    s_add_i32 s2, s2, s3
21; GCN-NEXT:    s_mul_i32 s2, s2, 7
22; GCN-NEXT:    v_subrev_u32_e32 v1, s2, v1
23; GCN-NEXT:    global_store_short v0, v1, s[0:1]
24; GCN-NEXT:    s_endpgm
25;
26; TAHITI-LABEL: srem_i16_7:
27; TAHITI:       ; %bb.0:
28; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
29; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
30; TAHITI-NEXT:    s_mov_b32 s6, -1
31; TAHITI-NEXT:    s_mov_b32 s10, s6
32; TAHITI-NEXT:    s_mov_b32 s11, s7
33; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
34; TAHITI-NEXT:    s_mov_b32 s8, s2
35; TAHITI-NEXT:    s_mov_b32 s9, s3
36; TAHITI-NEXT:    buffer_load_sshort v0, off, s[8:11], 0
37; TAHITI-NEXT:    s_mov_b32 s4, s0
38; TAHITI-NEXT:    s_mov_b32 s5, s1
39; TAHITI-NEXT:    s_waitcnt vmcnt(0)
40; TAHITI-NEXT:    v_readfirstlane_b32 s0, v0
41; TAHITI-NEXT:    s_mulk_i32 s0, 0x4925
42; TAHITI-NEXT:    s_lshr_b32 s1, s0, 31
43; TAHITI-NEXT:    s_ashr_i32 s0, s0, 17
44; TAHITI-NEXT:    s_add_i32 s0, s0, s1
45; TAHITI-NEXT:    s_mul_i32 s0, s0, 7
46; TAHITI-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
47; TAHITI-NEXT:    buffer_store_short v0, off, s[4:7], 0
48; TAHITI-NEXT:    s_endpgm
49;
50; TONGA-LABEL: srem_i16_7:
51; TONGA:       ; %bb.0:
52; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
53; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
54; TONGA-NEXT:    v_mov_b32_e32 v0, s2
55; TONGA-NEXT:    v_mov_b32_e32 v1, s3
56; TONGA-NEXT:    flat_load_ushort v2, v[0:1]
57; TONGA-NEXT:    v_mov_b32_e32 v0, s0
58; TONGA-NEXT:    v_mov_b32_e32 v1, s1
59; TONGA-NEXT:    s_waitcnt vmcnt(0)
60; TONGA-NEXT:    v_readfirstlane_b32 s0, v2
61; TONGA-NEXT:    s_sext_i32_i16 s0, s0
62; TONGA-NEXT:    s_mulk_i32 s0, 0x4925
63; TONGA-NEXT:    s_lshr_b32 s1, s0, 31
64; TONGA-NEXT:    s_ashr_i32 s0, s0, 17
65; TONGA-NEXT:    s_add_i32 s0, s0, s1
66; TONGA-NEXT:    s_mul_i32 s0, s0, 7
67; TONGA-NEXT:    v_subrev_u32_e32 v2, vcc, s0, v2
68; TONGA-NEXT:    flat_store_short v[0:1], v2
69; TONGA-NEXT:    s_endpgm
70;
71; EG-LABEL: srem_i16_7:
72; EG:       ; %bb.0:
73; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
74; EG-NEXT:    TEX 0 @6
75; EG-NEXT:    ALU 22, @9, KC0[CB0:0-32], KC1[]
76; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
77; EG-NEXT:    CF_END
78; EG-NEXT:    PAD
79; EG-NEXT:    Fetch clause starting at 6:
80; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
81; EG-NEXT:    ALU clause starting at 8:
82; EG-NEXT:     MOV * T0.X, KC0[2].Z,
83; EG-NEXT:    ALU clause starting at 9:
84; EG-NEXT:     BFE_INT * T0.W, T0.X, 0.0, literal.x,
85; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
86; EG-NEXT:     MULLO_INT * T0.Y, PV.W, literal.x,
87; EG-NEXT:    18725(2.623931e-41), 0(0.000000e+00)
88; EG-NEXT:     ASHR T0.W, PS, literal.x,
89; EG-NEXT:     LSHR * T1.W, PS, literal.y,
90; EG-NEXT:    17(2.382207e-44), 31(4.344025e-44)
91; EG-NEXT:     ADD_INT * T0.W, PV.W, PS,
92; EG-NEXT:     MULLO_INT * T0.Y, PV.W, literal.x,
93; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
94; EG-NEXT:     AND_INT T0.W, KC0[2].Y, literal.x,
95; EG-NEXT:     SUB_INT * T1.W, T0.X, PS,
96; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
97; EG-NEXT:     AND_INT T1.W, PS, literal.x,
98; EG-NEXT:     LSHL * T0.W, PV.W, literal.y,
99; EG-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
100; EG-NEXT:     LSHL T0.X, PV.W, PS,
101; EG-NEXT:     LSHL * T0.W, literal.x, PS,
102; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
103; EG-NEXT:     MOV T0.Y, 0.0,
104; EG-NEXT:     MOV * T0.Z, 0.0,
105; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
106; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
107  %num = load i16, ptr addrspace(1) %in
108  %result = srem i16 %num, 7
109  store i16 %result, ptr addrspace(1) %out
110  ret void
111}
112
113define amdgpu_kernel void @srem_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
114; GCN-LABEL: srem_i32:
115; GCN:       ; %bb.0:
116; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
117; GCN-NEXT:    v_mov_b32_e32 v2, 0
118; GCN-NEXT:    s_waitcnt lgkmcnt(0)
119; GCN-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
120; GCN-NEXT:    s_waitcnt vmcnt(0)
121; GCN-NEXT:    v_readfirstlane_b32 s2, v1
122; GCN-NEXT:    s_abs_i32 s2, s2
123; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s2
124; GCN-NEXT:    v_readfirstlane_b32 s3, v0
125; GCN-NEXT:    s_sub_i32 s5, 0, s2
126; GCN-NEXT:    s_ashr_i32 s4, s3, 31
127; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v1
128; GCN-NEXT:    s_abs_i32 s3, s3
129; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v1
130; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
131; GCN-NEXT:    v_readfirstlane_b32 s6, v0
132; GCN-NEXT:    s_mul_i32 s5, s5, s6
133; GCN-NEXT:    s_mul_hi_u32 s5, s6, s5
134; GCN-NEXT:    s_add_i32 s6, s6, s5
135; GCN-NEXT:    s_mul_hi_u32 s5, s3, s6
136; GCN-NEXT:    s_mul_i32 s5, s5, s2
137; GCN-NEXT:    s_sub_i32 s3, s3, s5
138; GCN-NEXT:    s_sub_i32 s5, s3, s2
139; GCN-NEXT:    s_cmp_ge_u32 s3, s2
140; GCN-NEXT:    s_cselect_b32 s3, s5, s3
141; GCN-NEXT:    s_sub_i32 s5, s3, s2
142; GCN-NEXT:    s_cmp_ge_u32 s3, s2
143; GCN-NEXT:    s_cselect_b32 s2, s5, s3
144; GCN-NEXT:    s_xor_b32 s2, s2, s4
145; GCN-NEXT:    s_sub_i32 s2, s2, s4
146; GCN-NEXT:    v_mov_b32_e32 v0, s2
147; GCN-NEXT:    global_store_dword v2, v0, s[0:1]
148; GCN-NEXT:    s_endpgm
149;
150; TAHITI-LABEL: srem_i32:
151; TAHITI:       ; %bb.0:
152; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
153; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
154; TAHITI-NEXT:    s_mov_b32 s6, -1
155; TAHITI-NEXT:    s_mov_b32 s10, s6
156; TAHITI-NEXT:    s_mov_b32 s11, s7
157; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
158; TAHITI-NEXT:    s_mov_b32 s8, s2
159; TAHITI-NEXT:    s_mov_b32 s9, s3
160; TAHITI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
161; TAHITI-NEXT:    s_mov_b32 s5, s1
162; TAHITI-NEXT:    s_mov_b32 s4, s0
163; TAHITI-NEXT:    s_waitcnt vmcnt(0)
164; TAHITI-NEXT:    v_readfirstlane_b32 s2, v1
165; TAHITI-NEXT:    s_abs_i32 s2, s2
166; TAHITI-NEXT:    v_cvt_f32_u32_e32 v1, s2
167; TAHITI-NEXT:    s_sub_i32 s3, 0, s2
168; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v1, v1
169; TAHITI-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
170; TAHITI-NEXT:    v_cvt_u32_f32_e32 v1, v1
171; TAHITI-NEXT:    v_mul_lo_u32 v2, s3, v1
172; TAHITI-NEXT:    v_readfirstlane_b32 s3, v0
173; TAHITI-NEXT:    s_abs_i32 s8, s3
174; TAHITI-NEXT:    s_ashr_i32 s0, s3, 31
175; TAHITI-NEXT:    v_mul_hi_u32 v2, v1, v2
176; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v1, v2
177; TAHITI-NEXT:    v_mul_hi_u32 v0, s8, v0
178; TAHITI-NEXT:    v_readfirstlane_b32 s1, v0
179; TAHITI-NEXT:    s_mul_i32 s1, s1, s2
180; TAHITI-NEXT:    s_sub_i32 s1, s8, s1
181; TAHITI-NEXT:    s_sub_i32 s3, s1, s2
182; TAHITI-NEXT:    s_cmp_ge_u32 s1, s2
183; TAHITI-NEXT:    s_cselect_b32 s1, s3, s1
184; TAHITI-NEXT:    s_sub_i32 s3, s1, s2
185; TAHITI-NEXT:    s_cmp_ge_u32 s1, s2
186; TAHITI-NEXT:    s_cselect_b32 s1, s3, s1
187; TAHITI-NEXT:    s_xor_b32 s1, s1, s0
188; TAHITI-NEXT:    s_sub_i32 s0, s1, s0
189; TAHITI-NEXT:    v_mov_b32_e32 v0, s0
190; TAHITI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
191; TAHITI-NEXT:    s_endpgm
192;
193; TONGA-LABEL: srem_i32:
194; TONGA:       ; %bb.0:
195; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
196; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
197; TONGA-NEXT:    v_mov_b32_e32 v0, s2
198; TONGA-NEXT:    v_mov_b32_e32 v1, s3
199; TONGA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
200; TONGA-NEXT:    s_waitcnt vmcnt(0)
201; TONGA-NEXT:    v_readfirstlane_b32 s2, v1
202; TONGA-NEXT:    s_abs_i32 s2, s2
203; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, s2
204; TONGA-NEXT:    s_sub_i32 s3, 0, s2
205; TONGA-NEXT:    v_rcp_iflag_f32_e32 v1, v1
206; TONGA-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
207; TONGA-NEXT:    v_cvt_u32_f32_e32 v1, v1
208; TONGA-NEXT:    v_mul_lo_u32 v2, s3, v1
209; TONGA-NEXT:    v_readfirstlane_b32 s3, v0
210; TONGA-NEXT:    s_abs_i32 s4, s3
211; TONGA-NEXT:    v_mul_hi_u32 v2, v1, v2
212; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v1, v2
213; TONGA-NEXT:    v_mul_hi_u32 v2, s4, v0
214; TONGA-NEXT:    v_mov_b32_e32 v1, s1
215; TONGA-NEXT:    v_mov_b32_e32 v0, s0
216; TONGA-NEXT:    s_ashr_i32 s0, s3, 31
217; TONGA-NEXT:    v_readfirstlane_b32 s1, v2
218; TONGA-NEXT:    s_mul_i32 s1, s1, s2
219; TONGA-NEXT:    s_sub_i32 s1, s4, s1
220; TONGA-NEXT:    s_sub_i32 s3, s1, s2
221; TONGA-NEXT:    s_cmp_ge_u32 s1, s2
222; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
223; TONGA-NEXT:    s_sub_i32 s3, s1, s2
224; TONGA-NEXT:    s_cmp_ge_u32 s1, s2
225; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
226; TONGA-NEXT:    s_xor_b32 s1, s1, s0
227; TONGA-NEXT:    s_sub_i32 s0, s1, s0
228; TONGA-NEXT:    v_mov_b32_e32 v2, s0
229; TONGA-NEXT:    flat_store_dword v[0:1], v2
230; TONGA-NEXT:    s_endpgm
231;
232; EG-LABEL: srem_i32:
233; EG:       ; %bb.0:
234; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
235; EG-NEXT:    TEX 0 @6
236; EG-NEXT:    ALU 23, @9, KC0[CB0:0-32], KC1[]
237; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
238; EG-NEXT:    CF_END
239; EG-NEXT:    PAD
240; EG-NEXT:    Fetch clause starting at 6:
241; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
242; EG-NEXT:    ALU clause starting at 8:
243; EG-NEXT:     MOV * T0.X, KC0[2].Z,
244; EG-NEXT:    ALU clause starting at 9:
245; EG-NEXT:     SETGT_INT * T0.W, 0.0, T0.Y,
246; EG-NEXT:     ADD_INT * T1.W, T0.Y, PV.W,
247; EG-NEXT:     XOR_INT * T0.W, PV.W, T0.W,
248; EG-NEXT:     SUB_INT T1.W, 0.0, PV.W,
249; EG-NEXT:     RECIP_UINT * T0.Y, PV.W,
250; EG-NEXT:     SETGT_INT T2.W, 0.0, T0.X,
251; EG-NEXT:     MULLO_INT * T0.Z, PV.W, PS,
252; EG-NEXT:     ADD_INT T1.W, T0.X, PV.W,
253; EG-NEXT:     MULHI * T0.X, T0.Y, PS,
254; EG-NEXT:     ADD_INT T3.W, T0.Y, PS,
255; EG-NEXT:     XOR_INT * T1.W, PV.W, T2.W,
256; EG-NEXT:     MULHI * T0.X, PS, PV.W,
257; EG-NEXT:     MULLO_INT * T0.X, PS, T0.W,
258; EG-NEXT:     SUB_INT * T1.W, T1.W, PS,
259; EG-NEXT:     SETGE_UINT T3.W, PV.W, T0.W,
260; EG-NEXT:     SUB_INT * T4.W, PV.W, T0.W,
261; EG-NEXT:     CNDE_INT * T1.W, PV.W, T1.W, PS,
262; EG-NEXT:     SETGE_UINT T3.W, PV.W, T0.W,
263; EG-NEXT:     SUB_INT * T0.W, PV.W, T0.W,
264; EG-NEXT:     CNDE_INT * T0.W, PV.W, T1.W, PS,
265; EG-NEXT:     XOR_INT * T0.W, PV.W, T2.W,
266; EG-NEXT:     SUB_INT T0.X, PV.W, T2.W,
267; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
268; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
269  %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
270  %num = load i32, ptr addrspace(1) %in
271  %den = load i32, ptr addrspace(1) %den_ptr
272  %result = srem i32 %num, %den
273  store i32 %result, ptr addrspace(1) %out
274  ret void
275}
276
277define amdgpu_kernel void @srem_i32_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
278; GCN-LABEL: srem_i32_4:
279; GCN:       ; %bb.0:
280; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
281; GCN-NEXT:    v_mov_b32_e32 v0, 0
282; GCN-NEXT:    s_waitcnt lgkmcnt(0)
283; GCN-NEXT:    global_load_dword v1, v0, s[2:3]
284; GCN-NEXT:    s_waitcnt vmcnt(0)
285; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
286; GCN-NEXT:    v_lshrrev_b32_e32 v2, 30, v2
287; GCN-NEXT:    v_add_u32_e32 v2, v1, v2
288; GCN-NEXT:    v_and_b32_e32 v2, -4, v2
289; GCN-NEXT:    v_sub_u32_e32 v1, v1, v2
290; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
291; GCN-NEXT:    s_endpgm
292;
293; TAHITI-LABEL: srem_i32_4:
294; TAHITI:       ; %bb.0:
295; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
296; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
297; TAHITI-NEXT:    s_mov_b32 s6, -1
298; TAHITI-NEXT:    s_mov_b32 s10, s6
299; TAHITI-NEXT:    s_mov_b32 s11, s7
300; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
301; TAHITI-NEXT:    s_mov_b32 s8, s2
302; TAHITI-NEXT:    s_mov_b32 s9, s3
303; TAHITI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
304; TAHITI-NEXT:    s_mov_b32 s4, s0
305; TAHITI-NEXT:    s_mov_b32 s5, s1
306; TAHITI-NEXT:    s_waitcnt vmcnt(0)
307; TAHITI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
308; TAHITI-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
309; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
310; TAHITI-NEXT:    v_and_b32_e32 v1, -4, v1
311; TAHITI-NEXT:    v_subrev_i32_e32 v0, vcc, v1, v0
312; TAHITI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
313; TAHITI-NEXT:    s_endpgm
314;
315; TONGA-LABEL: srem_i32_4:
316; TONGA:       ; %bb.0:
317; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
318; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
319; TONGA-NEXT:    v_mov_b32_e32 v0, s2
320; TONGA-NEXT:    v_mov_b32_e32 v1, s3
321; TONGA-NEXT:    flat_load_dword v2, v[0:1]
322; TONGA-NEXT:    v_mov_b32_e32 v0, s0
323; TONGA-NEXT:    v_mov_b32_e32 v1, s1
324; TONGA-NEXT:    s_waitcnt vmcnt(0)
325; TONGA-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
326; TONGA-NEXT:    v_lshrrev_b32_e32 v3, 30, v3
327; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v3, v2
328; TONGA-NEXT:    v_and_b32_e32 v3, -4, v3
329; TONGA-NEXT:    v_subrev_u32_e32 v2, vcc, v3, v2
330; TONGA-NEXT:    flat_store_dword v[0:1], v2
331; TONGA-NEXT:    s_endpgm
332;
333; EG-LABEL: srem_i32_4:
334; EG:       ; %bb.0:
335; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
336; EG-NEXT:    TEX 0 @6
337; EG-NEXT:    ALU 9, @9, KC0[CB0:0-32], KC1[]
338; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
339; EG-NEXT:    CF_END
340; EG-NEXT:    PAD
341; EG-NEXT:    Fetch clause starting at 6:
342; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
343; EG-NEXT:    ALU clause starting at 8:
344; EG-NEXT:     MOV * T0.X, KC0[2].Z,
345; EG-NEXT:    ALU clause starting at 9:
346; EG-NEXT:     ASHR * T0.W, T0.X, literal.x,
347; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
348; EG-NEXT:     LSHR * T0.W, PV.W, literal.x,
349; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
350; EG-NEXT:     ADD_INT * T0.W, T0.X, PV.W,
351; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
352; EG-NEXT:    -4(nan), 0(0.000000e+00)
353; EG-NEXT:     SUB_INT T0.X, T0.X, PV.W,
354; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
355; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
356  %num = load i32, ptr addrspace(1) %in
357  %result = srem i32 %num, 4
358  store i32 %result, ptr addrspace(1) %out
359  ret void
360}
361
362; FIXME: uniform i16 srem should not use VALU instructions
363define amdgpu_kernel void @srem_i32_7(ptr addrspace(1) %out, ptr addrspace(1) %in) {
364; GCN-LABEL: srem_i32_7:
365; GCN:       ; %bb.0:
366; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
367; GCN-NEXT:    v_mov_b32_e32 v0, 0
368; GCN-NEXT:    s_waitcnt lgkmcnt(0)
369; GCN-NEXT:    global_load_dword v1, v0, s[2:3]
370; GCN-NEXT:    s_mov_b32 s2, 0x92492493
371; GCN-NEXT:    s_waitcnt vmcnt(0)
372; GCN-NEXT:    v_mul_hi_i32 v2, v1, s2
373; GCN-NEXT:    v_add_u32_e32 v2, v2, v1
374; GCN-NEXT:    v_lshrrev_b32_e32 v3, 31, v2
375; GCN-NEXT:    v_ashrrev_i32_e32 v2, 2, v2
376; GCN-NEXT:    v_add_u32_e32 v2, v2, v3
377; GCN-NEXT:    v_mul_lo_u32 v2, v2, 7
378; GCN-NEXT:    v_sub_u32_e32 v1, v1, v2
379; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
380; GCN-NEXT:    s_endpgm
381;
382; TAHITI-LABEL: srem_i32_7:
383; TAHITI:       ; %bb.0:
384; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
385; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
386; TAHITI-NEXT:    s_mov_b32 s6, -1
387; TAHITI-NEXT:    s_mov_b32 s10, s6
388; TAHITI-NEXT:    s_mov_b32 s11, s7
389; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
390; TAHITI-NEXT:    s_mov_b32 s8, s2
391; TAHITI-NEXT:    s_mov_b32 s9, s3
392; TAHITI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
393; TAHITI-NEXT:    s_mov_b32 s2, 0x92492493
394; TAHITI-NEXT:    s_mov_b32 s4, s0
395; TAHITI-NEXT:    s_mov_b32 s5, s1
396; TAHITI-NEXT:    s_waitcnt vmcnt(0)
397; TAHITI-NEXT:    v_mul_hi_i32 v1, v0, s2
398; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
399; TAHITI-NEXT:    v_lshrrev_b32_e32 v2, 31, v1
400; TAHITI-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
401; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
402; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, 7
403; TAHITI-NEXT:    v_subrev_i32_e32 v0, vcc, v1, v0
404; TAHITI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
405; TAHITI-NEXT:    s_endpgm
406;
407; TONGA-LABEL: srem_i32_7:
408; TONGA:       ; %bb.0:
409; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
410; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
411; TONGA-NEXT:    v_mov_b32_e32 v0, s2
412; TONGA-NEXT:    v_mov_b32_e32 v1, s3
413; TONGA-NEXT:    flat_load_dword v2, v[0:1]
414; TONGA-NEXT:    s_mov_b32 s2, 0x92492493
415; TONGA-NEXT:    s_waitcnt vmcnt(0)
416; TONGA-NEXT:    v_mul_hi_i32 v0, v2, s2
417; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
418; TONGA-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
419; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
420; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
421; TONGA-NEXT:    v_mul_lo_u32 v3, v0, 7
422; TONGA-NEXT:    v_mov_b32_e32 v0, s0
423; TONGA-NEXT:    v_mov_b32_e32 v1, s1
424; TONGA-NEXT:    v_subrev_u32_e32 v2, vcc, v3, v2
425; TONGA-NEXT:    flat_store_dword v[0:1], v2
426; TONGA-NEXT:    s_endpgm
427;
428; EG-LABEL: srem_i32_7:
429; EG:       ; %bb.0:
430; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
431; EG-NEXT:    TEX 0 @6
432; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
433; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
434; EG-NEXT:    CF_END
435; EG-NEXT:    PAD
436; EG-NEXT:    Fetch clause starting at 6:
437; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
438; EG-NEXT:    ALU clause starting at 8:
439; EG-NEXT:     MOV * T0.X, KC0[2].Z,
440; EG-NEXT:    ALU clause starting at 9:
441; EG-NEXT:     MULHI_INT * T0.Y, T0.X, literal.x,
442; EG-NEXT:    -1840700269(-6.346950e-28), 0(0.000000e+00)
443; EG-NEXT:     ADD_INT * T0.W, PS, T0.X,
444; EG-NEXT:     ASHR T1.W, PV.W, literal.x,
445; EG-NEXT:     LSHR * T0.W, PV.W, literal.y,
446; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
447; EG-NEXT:     ADD_INT * T0.W, PV.W, PS,
448; EG-NEXT:     MULLO_INT * T0.Y, PV.W, literal.x,
449; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
450; EG-NEXT:     SUB_INT T0.X, T0.X, PS,
451; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
452; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
453  %num = load i32, ptr addrspace(1) %in
454  %result = srem i32 %num, 7
455  store i32 %result, ptr addrspace(1) %out
456  ret void
457}
458
459define amdgpu_kernel void @srem_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
460; GCN-LABEL: srem_v2i32:
461; GCN:       ; %bb.0:
462; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
463; GCN-NEXT:    v_mov_b32_e32 v4, 0
464; GCN-NEXT:    s_waitcnt lgkmcnt(0)
465; GCN-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
466; GCN-NEXT:    s_waitcnt vmcnt(0)
467; GCN-NEXT:    v_readfirstlane_b32 s2, v2
468; GCN-NEXT:    s_abs_i32 s2, s2
469; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s2
470; GCN-NEXT:    v_readfirstlane_b32 s3, v0
471; GCN-NEXT:    s_sub_i32 s6, 0, s2
472; GCN-NEXT:    s_ashr_i32 s5, s3, 31
473; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
474; GCN-NEXT:    s_abs_i32 s3, s3
475; GCN-NEXT:    v_readfirstlane_b32 s4, v3
476; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v2
477; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
478; GCN-NEXT:    v_readfirstlane_b32 s7, v0
479; GCN-NEXT:    s_mul_i32 s6, s6, s7
480; GCN-NEXT:    s_mul_hi_u32 s6, s7, s6
481; GCN-NEXT:    s_add_i32 s7, s7, s6
482; GCN-NEXT:    s_mul_hi_u32 s6, s3, s7
483; GCN-NEXT:    s_mul_i32 s6, s6, s2
484; GCN-NEXT:    s_sub_i32 s3, s3, s6
485; GCN-NEXT:    s_sub_i32 s6, s3, s2
486; GCN-NEXT:    s_cmp_ge_u32 s3, s2
487; GCN-NEXT:    s_cselect_b32 s3, s6, s3
488; GCN-NEXT:    s_sub_i32 s6, s3, s2
489; GCN-NEXT:    s_cmp_ge_u32 s3, s2
490; GCN-NEXT:    s_cselect_b32 s2, s6, s3
491; GCN-NEXT:    s_abs_i32 s3, s4
492; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s3
493; GCN-NEXT:    s_xor_b32 s2, s2, s5
494; GCN-NEXT:    s_sub_i32 s7, 0, s3
495; GCN-NEXT:    s_sub_i32 s2, s2, s5
496; GCN-NEXT:    v_rcp_iflag_f32_e32 v0, v0
497; GCN-NEXT:    v_readfirstlane_b32 s4, v1
498; GCN-NEXT:    s_ashr_i32 s6, s4, 31
499; GCN-NEXT:    s_abs_i32 s4, s4
500; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
501; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
502; GCN-NEXT:    v_readfirstlane_b32 s5, v0
503; GCN-NEXT:    s_mul_i32 s7, s7, s5
504; GCN-NEXT:    s_mul_hi_u32 s7, s5, s7
505; GCN-NEXT:    s_add_i32 s5, s5, s7
506; GCN-NEXT:    s_mul_hi_u32 s5, s4, s5
507; GCN-NEXT:    s_mul_i32 s5, s5, s3
508; GCN-NEXT:    s_sub_i32 s4, s4, s5
509; GCN-NEXT:    s_sub_i32 s5, s4, s3
510; GCN-NEXT:    s_cmp_ge_u32 s4, s3
511; GCN-NEXT:    s_cselect_b32 s4, s5, s4
512; GCN-NEXT:    s_sub_i32 s5, s4, s3
513; GCN-NEXT:    s_cmp_ge_u32 s4, s3
514; GCN-NEXT:    s_cselect_b32 s3, s5, s4
515; GCN-NEXT:    s_xor_b32 s3, s3, s6
516; GCN-NEXT:    s_sub_i32 s3, s3, s6
517; GCN-NEXT:    v_mov_b32_e32 v0, s2
518; GCN-NEXT:    v_mov_b32_e32 v1, s3
519; GCN-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
520; GCN-NEXT:    s_endpgm
521;
522; TAHITI-LABEL: srem_v2i32:
523; TAHITI:       ; %bb.0:
524; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
525; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
526; TAHITI-NEXT:    s_mov_b32 s2, -1
527; TAHITI-NEXT:    s_mov_b32 s10, s2
528; TAHITI-NEXT:    s_mov_b32 s11, s3
529; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
530; TAHITI-NEXT:    s_mov_b32 s8, s6
531; TAHITI-NEXT:    s_mov_b32 s9, s7
532; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
533; TAHITI-NEXT:    s_waitcnt vmcnt(0)
534; TAHITI-NEXT:    v_readfirstlane_b32 s0, v2
535; TAHITI-NEXT:    s_abs_i32 s0, s0
536; TAHITI-NEXT:    v_cvt_f32_u32_e32 v2, s0
537; TAHITI-NEXT:    s_sub_i32 s1, 0, s0
538; TAHITI-NEXT:    v_readfirstlane_b32 s7, v3
539; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v2, v2
540; TAHITI-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
541; TAHITI-NEXT:    v_cvt_u32_f32_e32 v2, v2
542; TAHITI-NEXT:    v_mul_lo_u32 v4, s1, v2
543; TAHITI-NEXT:    v_readfirstlane_b32 s1, v0
544; TAHITI-NEXT:    s_abs_i32 s6, s1
545; TAHITI-NEXT:    s_ashr_i32 s8, s1, 31
546; TAHITI-NEXT:    v_mul_hi_u32 v4, v2, v4
547; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v2, v4
548; TAHITI-NEXT:    v_mul_hi_u32 v0, s6, v0
549; TAHITI-NEXT:    v_readfirstlane_b32 s1, v0
550; TAHITI-NEXT:    s_mul_i32 s1, s1, s0
551; TAHITI-NEXT:    s_sub_i32 s1, s6, s1
552; TAHITI-NEXT:    s_sub_i32 s6, s1, s0
553; TAHITI-NEXT:    s_cmp_ge_u32 s1, s0
554; TAHITI-NEXT:    s_cselect_b32 s1, s6, s1
555; TAHITI-NEXT:    s_sub_i32 s6, s1, s0
556; TAHITI-NEXT:    s_cmp_ge_u32 s1, s0
557; TAHITI-NEXT:    s_cselect_b32 s6, s6, s1
558; TAHITI-NEXT:    s_abs_i32 s7, s7
559; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, s7
560; TAHITI-NEXT:    s_sub_i32 s0, 0, s7
561; TAHITI-NEXT:    s_mov_b32 s1, s5
562; TAHITI-NEXT:    s_xor_b32 s6, s6, s8
563; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
564; TAHITI-NEXT:    s_sub_i32 s6, s6, s8
565; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
566; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
567; TAHITI-NEXT:    v_mul_lo_u32 v2, s0, v0
568; TAHITI-NEXT:    s_mov_b32 s0, s4
569; TAHITI-NEXT:    v_readfirstlane_b32 s4, v1
570; TAHITI-NEXT:    s_abs_i32 s5, s4
571; TAHITI-NEXT:    v_mul_hi_u32 v2, v0, v2
572; TAHITI-NEXT:    s_ashr_i32 s4, s4, 31
573; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
574; TAHITI-NEXT:    v_mul_hi_u32 v0, s5, v0
575; TAHITI-NEXT:    v_readfirstlane_b32 s8, v0
576; TAHITI-NEXT:    s_mul_i32 s8, s8, s7
577; TAHITI-NEXT:    s_sub_i32 s5, s5, s8
578; TAHITI-NEXT:    s_sub_i32 s8, s5, s7
579; TAHITI-NEXT:    s_cmp_ge_u32 s5, s7
580; TAHITI-NEXT:    s_cselect_b32 s5, s8, s5
581; TAHITI-NEXT:    s_sub_i32 s8, s5, s7
582; TAHITI-NEXT:    s_cmp_ge_u32 s5, s7
583; TAHITI-NEXT:    s_cselect_b32 s5, s8, s5
584; TAHITI-NEXT:    s_xor_b32 s5, s5, s4
585; TAHITI-NEXT:    s_sub_i32 s4, s5, s4
586; TAHITI-NEXT:    v_mov_b32_e32 v0, s6
587; TAHITI-NEXT:    v_mov_b32_e32 v1, s4
588; TAHITI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
589; TAHITI-NEXT:    s_endpgm
590;
591; TONGA-LABEL: srem_v2i32:
592; TONGA:       ; %bb.0:
593; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
594; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
595; TONGA-NEXT:    v_mov_b32_e32 v0, s2
596; TONGA-NEXT:    v_mov_b32_e32 v1, s3
597; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
598; TONGA-NEXT:    s_waitcnt vmcnt(0)
599; TONGA-NEXT:    v_readfirstlane_b32 s2, v2
600; TONGA-NEXT:    s_abs_i32 s2, s2
601; TONGA-NEXT:    v_cvt_f32_u32_e32 v2, s2
602; TONGA-NEXT:    s_sub_i32 s3, 0, s2
603; TONGA-NEXT:    v_readfirstlane_b32 s5, v3
604; TONGA-NEXT:    v_mov_b32_e32 v3, s1
605; TONGA-NEXT:    v_rcp_iflag_f32_e32 v2, v2
606; TONGA-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
607; TONGA-NEXT:    v_cvt_u32_f32_e32 v2, v2
608; TONGA-NEXT:    v_mul_lo_u32 v4, s3, v2
609; TONGA-NEXT:    v_readfirstlane_b32 s3, v0
610; TONGA-NEXT:    s_abs_i32 s4, s3
611; TONGA-NEXT:    s_ashr_i32 s3, s3, 31
612; TONGA-NEXT:    v_mul_hi_u32 v4, v2, v4
613; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v2, v4
614; TONGA-NEXT:    v_mul_hi_u32 v0, s4, v0
615; TONGA-NEXT:    v_mov_b32_e32 v2, s0
616; TONGA-NEXT:    v_readfirstlane_b32 s0, v1
617; TONGA-NEXT:    v_readfirstlane_b32 s6, v0
618; TONGA-NEXT:    s_mul_i32 s6, s6, s2
619; TONGA-NEXT:    s_sub_i32 s4, s4, s6
620; TONGA-NEXT:    s_sub_i32 s6, s4, s2
621; TONGA-NEXT:    s_cmp_ge_u32 s4, s2
622; TONGA-NEXT:    s_cselect_b32 s4, s6, s4
623; TONGA-NEXT:    s_sub_i32 s6, s4, s2
624; TONGA-NEXT:    s_cmp_ge_u32 s4, s2
625; TONGA-NEXT:    s_cselect_b32 s2, s6, s4
626; TONGA-NEXT:    s_abs_i32 s4, s5
627; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, s4
628; TONGA-NEXT:    s_sub_i32 s5, 0, s4
629; TONGA-NEXT:    s_abs_i32 s1, s0
630; TONGA-NEXT:    s_xor_b32 s2, s2, s3
631; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
632; TONGA-NEXT:    s_sub_i32 s2, s2, s3
633; TONGA-NEXT:    s_ashr_i32 s0, s0, 31
634; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
635; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
636; TONGA-NEXT:    v_mul_lo_u32 v4, s5, v0
637; TONGA-NEXT:    v_mul_hi_u32 v4, v0, v4
638; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v4
639; TONGA-NEXT:    v_mul_hi_u32 v0, s1, v0
640; TONGA-NEXT:    v_readfirstlane_b32 s3, v0
641; TONGA-NEXT:    s_mul_i32 s3, s3, s4
642; TONGA-NEXT:    s_sub_i32 s1, s1, s3
643; TONGA-NEXT:    s_sub_i32 s3, s1, s4
644; TONGA-NEXT:    s_cmp_ge_u32 s1, s4
645; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
646; TONGA-NEXT:    s_sub_i32 s3, s1, s4
647; TONGA-NEXT:    s_cmp_ge_u32 s1, s4
648; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
649; TONGA-NEXT:    s_xor_b32 s1, s1, s0
650; TONGA-NEXT:    s_sub_i32 s0, s1, s0
651; TONGA-NEXT:    v_mov_b32_e32 v0, s2
652; TONGA-NEXT:    v_mov_b32_e32 v1, s0
653; TONGA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
654; TONGA-NEXT:    s_endpgm
655;
656; EG-LABEL: srem_v2i32:
657; EG:       ; %bb.0:
658; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
659; EG-NEXT:    TEX 0 @6
660; EG-NEXT:    ALU 45, @9, KC0[CB0:0-32], KC1[]
661; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
662; EG-NEXT:    CF_END
663; EG-NEXT:    PAD
664; EG-NEXT:    Fetch clause starting at 6:
665; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
666; EG-NEXT:    ALU clause starting at 8:
667; EG-NEXT:     MOV * T0.X, KC0[2].Z,
668; EG-NEXT:    ALU clause starting at 9:
669; EG-NEXT:     SETGT_INT * T1.W, 0.0, T0.W,
670; EG-NEXT:     ADD_INT T0.W, T0.W, PV.W,
671; EG-NEXT:     SETGT_INT * T2.W, 0.0, T0.Z,
672; EG-NEXT:     XOR_INT * T0.W, PV.W, T1.W,
673; EG-NEXT:     SUB_INT T1.Z, 0.0, PV.W,
674; EG-NEXT:     ADD_INT T1.W, T0.Z, T2.W,
675; EG-NEXT:     RECIP_UINT * T0.Z, PV.W,
676; EG-NEXT:     XOR_INT T1.W, PV.W, T2.W,
677; EG-NEXT:     MULLO_INT * T1.X, PV.Z, PS,
678; EG-NEXT:     SUB_INT T2.W, 0.0, PV.W,
679; EG-NEXT:     RECIP_UINT * T1.Y, PV.W,
680; EG-NEXT:     SETGT_INT T3.W, 0.0, T0.X,
681; EG-NEXT:     MULLO_INT * T1.Z, PV.W, PS,
682; EG-NEXT:     SETGT_INT T2.Z, 0.0, T0.Y,
683; EG-NEXT:     ADD_INT T2.W, T0.X, PV.W,
684; EG-NEXT:     MULHI * T0.X, T1.Y, PS,
685; EG-NEXT:     ADD_INT T1.Y, T1.Y, PS,
686; EG-NEXT:     XOR_INT T1.Z, PV.W, T3.W,
687; EG-NEXT:     ADD_INT T2.W, T0.Y, PV.Z, BS:VEC_120/SCL_212
688; EG-NEXT:     MULHI * T0.X, T0.Z, T1.X,
689; EG-NEXT:     ADD_INT T0.Z, T0.Z, PS,
690; EG-NEXT:     XOR_INT T2.W, PV.W, T2.Z,
691; EG-NEXT:     MULHI * T0.X, PV.Z, PV.Y,
692; EG-NEXT:     MULHI * T0.Y, PV.W, PV.Z,
693; EG-NEXT:     MULLO_INT * T0.Y, PS, T0.W,
694; EG-NEXT:     SUB_INT T2.W, T2.W, PS,
695; EG-NEXT:     MULLO_INT * T0.X, T0.X, T1.W,
696; EG-NEXT:     SUB_INT T0.Z, T1.Z, PS,
697; EG-NEXT:     SETGE_UINT T4.W, PV.W, T0.W,
698; EG-NEXT:     SUB_INT * T5.W, PV.W, T0.W,
699; EG-NEXT:     CNDE_INT T1.Z, PV.W, T2.W, PS, BS:VEC_021/SCL_122
700; EG-NEXT:     SETGE_UINT T2.W, PV.Z, T1.W,
701; EG-NEXT:     SUB_INT * T4.W, PV.Z, T1.W,
702; EG-NEXT:     CNDE_INT T0.Z, PV.W, T0.Z, PS,
703; EG-NEXT:     SETGE_UINT T2.W, PV.Z, T0.W,
704; EG-NEXT:     SUB_INT * T0.W, PV.Z, T0.W,
705; EG-NEXT:     CNDE_INT T1.Z, PV.W, T1.Z, PS,
706; EG-NEXT:     SETGE_UINT T0.W, PV.Z, T1.W,
707; EG-NEXT:     SUB_INT * T1.W, PV.Z, T1.W,
708; EG-NEXT:     CNDE_INT T0.W, PV.W, T0.Z, PS, BS:VEC_021/SCL_122
709; EG-NEXT:     XOR_INT * T1.W, PV.Z, T2.Z,
710; EG-NEXT:     SUB_INT T0.Y, PS, T2.Z,
711; EG-NEXT:     XOR_INT * T0.W, PV.W, T3.W,
712; EG-NEXT:     SUB_INT T0.X, PV.W, T3.W,
713; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
714; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
715  %den_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
716  %num = load <2 x i32>, ptr addrspace(1) %in
717  %den = load <2 x i32>, ptr addrspace(1) %den_ptr
718  %result = srem <2 x i32> %num, %den
719  store <2 x i32> %result, ptr addrspace(1) %out
720  ret void
721}
722
723define amdgpu_kernel void @srem_v2i32_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
724; GCN-LABEL: srem_v2i32_4:
725; GCN:       ; %bb.0:
726; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
727; GCN-NEXT:    v_mov_b32_e32 v2, 0
728; GCN-NEXT:    s_waitcnt lgkmcnt(0)
729; GCN-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
730; GCN-NEXT:    s_waitcnt vmcnt(0)
731; GCN-NEXT:    v_readfirstlane_b32 s2, v0
732; GCN-NEXT:    v_readfirstlane_b32 s3, v1
733; GCN-NEXT:    s_ashr_i32 s4, s2, 31
734; GCN-NEXT:    s_ashr_i32 s5, s3, 31
735; GCN-NEXT:    s_lshr_b32 s4, s4, 30
736; GCN-NEXT:    s_lshr_b32 s5, s5, 30
737; GCN-NEXT:    s_add_i32 s4, s2, s4
738; GCN-NEXT:    s_add_i32 s5, s3, s5
739; GCN-NEXT:    s_and_b32 s4, s4, -4
740; GCN-NEXT:    s_and_b32 s5, s5, -4
741; GCN-NEXT:    s_sub_i32 s2, s2, s4
742; GCN-NEXT:    s_sub_i32 s3, s3, s5
743; GCN-NEXT:    v_mov_b32_e32 v0, s2
744; GCN-NEXT:    v_mov_b32_e32 v1, s3
745; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
746; GCN-NEXT:    s_endpgm
747;
748; TAHITI-LABEL: srem_v2i32_4:
749; TAHITI:       ; %bb.0:
750; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
751; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
752; TAHITI-NEXT:    s_mov_b32 s6, -1
753; TAHITI-NEXT:    s_mov_b32 s10, s6
754; TAHITI-NEXT:    s_mov_b32 s11, s7
755; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
756; TAHITI-NEXT:    s_mov_b32 s8, s2
757; TAHITI-NEXT:    s_mov_b32 s9, s3
758; TAHITI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
759; TAHITI-NEXT:    s_mov_b32 s4, s0
760; TAHITI-NEXT:    s_mov_b32 s5, s1
761; TAHITI-NEXT:    s_waitcnt vmcnt(0)
762; TAHITI-NEXT:    v_readfirstlane_b32 s0, v0
763; TAHITI-NEXT:    v_readfirstlane_b32 s1, v1
764; TAHITI-NEXT:    s_ashr_i32 s2, s0, 31
765; TAHITI-NEXT:    s_ashr_i32 s3, s1, 31
766; TAHITI-NEXT:    s_lshr_b32 s2, s2, 30
767; TAHITI-NEXT:    s_lshr_b32 s3, s3, 30
768; TAHITI-NEXT:    s_add_i32 s2, s0, s2
769; TAHITI-NEXT:    s_add_i32 s3, s1, s3
770; TAHITI-NEXT:    s_and_b32 s2, s2, -4
771; TAHITI-NEXT:    s_and_b32 s3, s3, -4
772; TAHITI-NEXT:    s_sub_i32 s0, s0, s2
773; TAHITI-NEXT:    s_sub_i32 s1, s1, s3
774; TAHITI-NEXT:    v_mov_b32_e32 v0, s0
775; TAHITI-NEXT:    v_mov_b32_e32 v1, s1
776; TAHITI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
777; TAHITI-NEXT:    s_endpgm
778;
779; TONGA-LABEL: srem_v2i32_4:
780; TONGA:       ; %bb.0:
781; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
782; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
783; TONGA-NEXT:    v_mov_b32_e32 v0, s2
784; TONGA-NEXT:    v_mov_b32_e32 v1, s3
785; TONGA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
786; TONGA-NEXT:    v_mov_b32_e32 v2, s0
787; TONGA-NEXT:    v_mov_b32_e32 v3, s1
788; TONGA-NEXT:    s_waitcnt vmcnt(0)
789; TONGA-NEXT:    v_readfirstlane_b32 s0, v0
790; TONGA-NEXT:    v_readfirstlane_b32 s1, v1
791; TONGA-NEXT:    s_ashr_i32 s2, s0, 31
792; TONGA-NEXT:    s_ashr_i32 s3, s1, 31
793; TONGA-NEXT:    s_lshr_b32 s2, s2, 30
794; TONGA-NEXT:    s_lshr_b32 s3, s3, 30
795; TONGA-NEXT:    s_add_i32 s2, s0, s2
796; TONGA-NEXT:    s_add_i32 s3, s1, s3
797; TONGA-NEXT:    s_and_b32 s2, s2, -4
798; TONGA-NEXT:    s_and_b32 s3, s3, -4
799; TONGA-NEXT:    s_sub_i32 s0, s0, s2
800; TONGA-NEXT:    s_sub_i32 s1, s1, s3
801; TONGA-NEXT:    v_mov_b32_e32 v0, s0
802; TONGA-NEXT:    v_mov_b32_e32 v1, s1
803; TONGA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
804; TONGA-NEXT:    s_endpgm
805;
806; EG-LABEL: srem_v2i32_4:
807; EG:       ; %bb.0:
808; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
809; EG-NEXT:    TEX 0 @6
810; EG-NEXT:    ALU 16, @9, KC0[CB0:0-32], KC1[]
811; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
812; EG-NEXT:    CF_END
813; EG-NEXT:    PAD
814; EG-NEXT:    Fetch clause starting at 6:
815; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
816; EG-NEXT:    ALU clause starting at 8:
817; EG-NEXT:     MOV * T0.X, KC0[2].Z,
818; EG-NEXT:    ALU clause starting at 9:
819; EG-NEXT:     ASHR T0.W, T0.Y, literal.x,
820; EG-NEXT:     ASHR * T1.W, T0.X, literal.x,
821; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
822; EG-NEXT:     LSHR * T0.W, PV.W, literal.x,
823; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
824; EG-NEXT:     ADD_INT T0.W, T0.Y, PV.W,
825; EG-NEXT:     LSHR * T1.W, T1.W, literal.x,
826; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
827; EG-NEXT:     ADD_INT T1.W, T0.X, PS,
828; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
829; EG-NEXT:    -4(nan), 0(0.000000e+00)
830; EG-NEXT:     SUB_INT T0.Y, T0.Y, PS,
831; EG-NEXT:     AND_INT * T0.W, PV.W, literal.x,
832; EG-NEXT:    -4(nan), 0(0.000000e+00)
833; EG-NEXT:     SUB_INT T0.X, T0.X, PV.W,
834; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
835; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
836  %num = load <2 x i32>, ptr addrspace(1) %in
837  %result = srem <2 x i32> %num, <i32 4, i32 4>
838  store <2 x i32> %result, ptr addrspace(1) %out
839  ret void
840}
841
842define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
843; GCN-LABEL: srem_v4i32:
844; GCN:       ; %bb.0:
845; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
846; GCN-NEXT:    v_mov_b32_e32 v0, 0
847; GCN-NEXT:    s_waitcnt lgkmcnt(0)
848; GCN-NEXT:    global_load_dwordx4 v[1:4], v0, s[2:3] offset:16
849; GCN-NEXT:    global_load_dwordx4 v[5:8], v0, s[2:3]
850; GCN-NEXT:    s_waitcnt vmcnt(1)
851; GCN-NEXT:    v_readfirstlane_b32 s2, v1
852; GCN-NEXT:    s_abs_i32 s2, s2
853; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s2
854; GCN-NEXT:    s_sub_i32 s6, 0, s2
855; GCN-NEXT:    s_waitcnt vmcnt(0)
856; GCN-NEXT:    v_readfirstlane_b32 s4, v5
857; GCN-NEXT:    s_ashr_i32 s5, s4, 31
858; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v1
859; GCN-NEXT:    s_abs_i32 s4, s4
860; GCN-NEXT:    v_readfirstlane_b32 s3, v2
861; GCN-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
862; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
863; GCN-NEXT:    v_readfirstlane_b32 s7, v1
864; GCN-NEXT:    s_mul_i32 s6, s6, s7
865; GCN-NEXT:    s_mul_hi_u32 s6, s7, s6
866; GCN-NEXT:    s_add_i32 s7, s7, s6
867; GCN-NEXT:    s_mul_hi_u32 s6, s4, s7
868; GCN-NEXT:    s_mul_i32 s6, s6, s2
869; GCN-NEXT:    s_sub_i32 s4, s4, s6
870; GCN-NEXT:    s_sub_i32 s6, s4, s2
871; GCN-NEXT:    s_cmp_ge_u32 s4, s2
872; GCN-NEXT:    s_cselect_b32 s4, s6, s4
873; GCN-NEXT:    s_sub_i32 s6, s4, s2
874; GCN-NEXT:    s_cmp_ge_u32 s4, s2
875; GCN-NEXT:    s_cselect_b32 s2, s6, s4
876; GCN-NEXT:    s_abs_i32 s3, s3
877; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s3
878; GCN-NEXT:    s_xor_b32 s2, s2, s5
879; GCN-NEXT:    s_sub_i32 s8, 0, s3
880; GCN-NEXT:    s_sub_i32 s2, s2, s5
881; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v1
882; GCN-NEXT:    v_readfirstlane_b32 s6, v6
883; GCN-NEXT:    s_ashr_i32 s7, s6, 31
884; GCN-NEXT:    s_abs_i32 s6, s6
885; GCN-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
886; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
887; GCN-NEXT:    v_readfirstlane_b32 s4, v3
888; GCN-NEXT:    v_readfirstlane_b32 s5, v1
889; GCN-NEXT:    s_mul_i32 s8, s8, s5
890; GCN-NEXT:    s_mul_hi_u32 s8, s5, s8
891; GCN-NEXT:    s_add_i32 s5, s5, s8
892; GCN-NEXT:    s_mul_hi_u32 s5, s6, s5
893; GCN-NEXT:    s_mul_i32 s5, s5, s3
894; GCN-NEXT:    s_sub_i32 s5, s6, s5
895; GCN-NEXT:    s_sub_i32 s6, s5, s3
896; GCN-NEXT:    s_cmp_ge_u32 s5, s3
897; GCN-NEXT:    s_cselect_b32 s5, s6, s5
898; GCN-NEXT:    s_sub_i32 s6, s5, s3
899; GCN-NEXT:    s_cmp_ge_u32 s5, s3
900; GCN-NEXT:    s_cselect_b32 s3, s6, s5
901; GCN-NEXT:    s_abs_i32 s4, s4
902; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s4
903; GCN-NEXT:    s_xor_b32 s3, s3, s7
904; GCN-NEXT:    s_sub_i32 s9, 0, s4
905; GCN-NEXT:    s_sub_i32 s3, s3, s7
906; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v1
907; GCN-NEXT:    v_readfirstlane_b32 s6, v7
908; GCN-NEXT:    s_ashr_i32 s8, s6, 31
909; GCN-NEXT:    s_abs_i32 s6, s6
910; GCN-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
911; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
912; GCN-NEXT:    v_readfirstlane_b32 s5, v4
913; GCN-NEXT:    v_readfirstlane_b32 s7, v1
914; GCN-NEXT:    s_mul_i32 s9, s9, s7
915; GCN-NEXT:    s_mul_hi_u32 s9, s7, s9
916; GCN-NEXT:    s_add_i32 s7, s7, s9
917; GCN-NEXT:    s_mul_hi_u32 s7, s6, s7
918; GCN-NEXT:    s_mul_i32 s7, s7, s4
919; GCN-NEXT:    s_sub_i32 s6, s6, s7
920; GCN-NEXT:    s_sub_i32 s7, s6, s4
921; GCN-NEXT:    s_cmp_ge_u32 s6, s4
922; GCN-NEXT:    s_cselect_b32 s6, s7, s6
923; GCN-NEXT:    s_sub_i32 s7, s6, s4
924; GCN-NEXT:    s_cmp_ge_u32 s6, s4
925; GCN-NEXT:    s_cselect_b32 s4, s7, s6
926; GCN-NEXT:    s_abs_i32 s5, s5
927; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s5
928; GCN-NEXT:    v_readfirstlane_b32 s6, v8
929; GCN-NEXT:    v_mov_b32_e32 v1, s2
930; GCN-NEXT:    s_ashr_i32 s2, s6, 31
931; GCN-NEXT:    v_rcp_iflag_f32_e32 v3, v2
932; GCN-NEXT:    v_mov_b32_e32 v2, s3
933; GCN-NEXT:    s_abs_i32 s3, s6
934; GCN-NEXT:    s_sub_i32 s6, 0, s5
935; GCN-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
936; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
937; GCN-NEXT:    s_xor_b32 s4, s4, s8
938; GCN-NEXT:    s_sub_i32 s4, s4, s8
939; GCN-NEXT:    v_readfirstlane_b32 s7, v3
940; GCN-NEXT:    s_mul_i32 s6, s6, s7
941; GCN-NEXT:    s_mul_hi_u32 s6, s7, s6
942; GCN-NEXT:    s_add_i32 s7, s7, s6
943; GCN-NEXT:    s_mul_hi_u32 s6, s3, s7
944; GCN-NEXT:    s_mul_i32 s6, s6, s5
945; GCN-NEXT:    s_sub_i32 s3, s3, s6
946; GCN-NEXT:    s_sub_i32 s6, s3, s5
947; GCN-NEXT:    s_cmp_ge_u32 s3, s5
948; GCN-NEXT:    s_cselect_b32 s3, s6, s3
949; GCN-NEXT:    s_sub_i32 s6, s3, s5
950; GCN-NEXT:    s_cmp_ge_u32 s3, s5
951; GCN-NEXT:    s_cselect_b32 s3, s6, s3
952; GCN-NEXT:    s_xor_b32 s3, s3, s2
953; GCN-NEXT:    s_sub_i32 s2, s3, s2
954; GCN-NEXT:    v_mov_b32_e32 v3, s4
955; GCN-NEXT:    v_mov_b32_e32 v4, s2
956; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[0:1]
957; GCN-NEXT:    s_endpgm
958;
959; TAHITI-LABEL: srem_v4i32:
960; TAHITI:       ; %bb.0:
961; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
962; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
963; TAHITI-NEXT:    s_mov_b32 s2, -1
964; TAHITI-NEXT:    s_mov_b32 s10, s2
965; TAHITI-NEXT:    s_mov_b32 s11, s3
966; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
967; TAHITI-NEXT:    s_mov_b32 s8, s6
968; TAHITI-NEXT:    s_mov_b32 s9, s7
969; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16
970; TAHITI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0
971; TAHITI-NEXT:    s_waitcnt vmcnt(1)
972; TAHITI-NEXT:    v_readfirstlane_b32 s0, v0
973; TAHITI-NEXT:    s_abs_i32 s0, s0
974; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, s0
975; TAHITI-NEXT:    s_sub_i32 s1, 0, s0
976; TAHITI-NEXT:    v_readfirstlane_b32 s7, v1
977; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
978; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
979; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
980; TAHITI-NEXT:    v_mul_lo_u32 v8, s1, v0
981; TAHITI-NEXT:    s_waitcnt vmcnt(0)
982; TAHITI-NEXT:    v_readfirstlane_b32 s1, v4
983; TAHITI-NEXT:    s_abs_i32 s6, s1
984; TAHITI-NEXT:    s_ashr_i32 s1, s1, 31
985; TAHITI-NEXT:    v_mul_hi_u32 v8, v0, v8
986; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v8
987; TAHITI-NEXT:    v_mul_hi_u32 v0, s6, v0
988; TAHITI-NEXT:    v_readfirstlane_b32 s8, v0
989; TAHITI-NEXT:    s_mul_i32 s8, s8, s0
990; TAHITI-NEXT:    s_sub_i32 s6, s6, s8
991; TAHITI-NEXT:    s_sub_i32 s8, s6, s0
992; TAHITI-NEXT:    s_cmp_ge_u32 s6, s0
993; TAHITI-NEXT:    s_cselect_b32 s6, s8, s6
994; TAHITI-NEXT:    s_sub_i32 s8, s6, s0
995; TAHITI-NEXT:    s_cmp_ge_u32 s6, s0
996; TAHITI-NEXT:    s_cselect_b32 s0, s8, s6
997; TAHITI-NEXT:    s_abs_i32 s6, s7
998; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, s6
999; TAHITI-NEXT:    s_sub_i32 s7, 0, s6
1000; TAHITI-NEXT:    v_readfirstlane_b32 s8, v5
1001; TAHITI-NEXT:    s_abs_i32 s9, s8
1002; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1003; TAHITI-NEXT:    s_xor_b32 s0, s0, s1
1004; TAHITI-NEXT:    s_sub_i32 s10, s0, s1
1005; TAHITI-NEXT:    s_ashr_i32 s8, s8, 31
1006; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1007; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
1008; TAHITI-NEXT:    v_mul_lo_u32 v1, s7, v0
1009; TAHITI-NEXT:    v_readfirstlane_b32 s7, v2
1010; TAHITI-NEXT:    v_mul_hi_u32 v1, v0, v1
1011; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1012; TAHITI-NEXT:    v_mul_hi_u32 v0, s9, v0
1013; TAHITI-NEXT:    v_readfirstlane_b32 s0, v0
1014; TAHITI-NEXT:    s_mul_i32 s0, s0, s6
1015; TAHITI-NEXT:    s_sub_i32 s0, s9, s0
1016; TAHITI-NEXT:    s_sub_i32 s1, s0, s6
1017; TAHITI-NEXT:    s_cmp_ge_u32 s0, s6
1018; TAHITI-NEXT:    s_cselect_b32 s0, s1, s0
1019; TAHITI-NEXT:    s_sub_i32 s1, s0, s6
1020; TAHITI-NEXT:    s_cmp_ge_u32 s0, s6
1021; TAHITI-NEXT:    s_cselect_b32 s0, s1, s0
1022; TAHITI-NEXT:    s_abs_i32 s1, s7
1023; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, s1
1024; TAHITI-NEXT:    s_sub_i32 s6, 0, s1
1025; TAHITI-NEXT:    v_readfirstlane_b32 s7, v6
1026; TAHITI-NEXT:    s_abs_i32 s9, s7
1027; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1028; TAHITI-NEXT:    s_xor_b32 s0, s0, s8
1029; TAHITI-NEXT:    s_sub_i32 s8, s0, s8
1030; TAHITI-NEXT:    s_ashr_i32 s7, s7, 31
1031; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1032; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
1033; TAHITI-NEXT:    v_mul_lo_u32 v1, s6, v0
1034; TAHITI-NEXT:    v_readfirstlane_b32 s6, v3
1035; TAHITI-NEXT:    v_mul_hi_u32 v1, v0, v1
1036; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1037; TAHITI-NEXT:    v_mul_hi_u32 v0, s9, v0
1038; TAHITI-NEXT:    v_readfirstlane_b32 s0, v0
1039; TAHITI-NEXT:    s_mul_i32 s0, s0, s1
1040; TAHITI-NEXT:    s_sub_i32 s0, s9, s0
1041; TAHITI-NEXT:    s_sub_i32 s9, s0, s1
1042; TAHITI-NEXT:    s_cmp_ge_u32 s0, s1
1043; TAHITI-NEXT:    s_cselect_b32 s0, s9, s0
1044; TAHITI-NEXT:    s_sub_i32 s9, s0, s1
1045; TAHITI-NEXT:    s_cmp_ge_u32 s0, s1
1046; TAHITI-NEXT:    s_cselect_b32 s9, s9, s0
1047; TAHITI-NEXT:    s_abs_i32 s6, s6
1048; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, s6
1049; TAHITI-NEXT:    s_sub_i32 s1, 0, s6
1050; TAHITI-NEXT:    s_mov_b32 s0, s4
1051; TAHITI-NEXT:    v_readfirstlane_b32 s4, v7
1052; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1053; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1054; TAHITI-NEXT:    v_cvt_u32_f32_e32 v2, v0
1055; TAHITI-NEXT:    v_mov_b32_e32 v0, s10
1056; TAHITI-NEXT:    v_mul_lo_u32 v1, s1, v2
1057; TAHITI-NEXT:    s_mov_b32 s1, s5
1058; TAHITI-NEXT:    s_abs_i32 s5, s4
1059; TAHITI-NEXT:    s_ashr_i32 s4, s4, 31
1060; TAHITI-NEXT:    v_mul_hi_u32 v3, v2, v1
1061; TAHITI-NEXT:    v_mov_b32_e32 v1, s8
1062; TAHITI-NEXT:    s_xor_b32 s8, s9, s7
1063; TAHITI-NEXT:    s_sub_i32 s7, s8, s7
1064; TAHITI-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
1065; TAHITI-NEXT:    v_mul_hi_u32 v2, s5, v2
1066; TAHITI-NEXT:    v_readfirstlane_b32 s8, v2
1067; TAHITI-NEXT:    s_mul_i32 s8, s8, s6
1068; TAHITI-NEXT:    s_sub_i32 s5, s5, s8
1069; TAHITI-NEXT:    s_sub_i32 s8, s5, s6
1070; TAHITI-NEXT:    s_cmp_ge_u32 s5, s6
1071; TAHITI-NEXT:    s_cselect_b32 s5, s8, s5
1072; TAHITI-NEXT:    s_sub_i32 s8, s5, s6
1073; TAHITI-NEXT:    s_cmp_ge_u32 s5, s6
1074; TAHITI-NEXT:    s_cselect_b32 s5, s8, s5
1075; TAHITI-NEXT:    s_xor_b32 s5, s5, s4
1076; TAHITI-NEXT:    s_sub_i32 s4, s5, s4
1077; TAHITI-NEXT:    v_mov_b32_e32 v2, s7
1078; TAHITI-NEXT:    v_mov_b32_e32 v3, s4
1079; TAHITI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1080; TAHITI-NEXT:    s_endpgm
1081;
1082; TONGA-LABEL: srem_v4i32:
1083; TONGA:       ; %bb.0:
1084; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1085; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
1086; TONGA-NEXT:    s_add_u32 s4, s2, 16
1087; TONGA-NEXT:    s_addc_u32 s5, s3, 0
1088; TONGA-NEXT:    v_mov_b32_e32 v0, s4
1089; TONGA-NEXT:    v_mov_b32_e32 v1, s5
1090; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1091; TONGA-NEXT:    v_mov_b32_e32 v5, s3
1092; TONGA-NEXT:    v_mov_b32_e32 v4, s2
1093; TONGA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
1094; TONGA-NEXT:    s_waitcnt vmcnt(1)
1095; TONGA-NEXT:    v_readfirstlane_b32 s2, v0
1096; TONGA-NEXT:    s_abs_i32 s2, s2
1097; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, s2
1098; TONGA-NEXT:    s_sub_i32 s3, 0, s2
1099; TONGA-NEXT:    v_readfirstlane_b32 s5, v1
1100; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1101; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1102; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
1103; TONGA-NEXT:    v_mul_lo_u32 v8, s3, v0
1104; TONGA-NEXT:    s_waitcnt vmcnt(0)
1105; TONGA-NEXT:    v_readfirstlane_b32 s3, v4
1106; TONGA-NEXT:    s_abs_i32 s4, s3
1107; TONGA-NEXT:    s_ashr_i32 s3, s3, 31
1108; TONGA-NEXT:    v_mul_hi_u32 v8, v0, v8
1109; TONGA-NEXT:    v_mov_b32_e32 v4, s0
1110; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v8
1111; TONGA-NEXT:    v_mul_hi_u32 v0, s4, v0
1112; TONGA-NEXT:    v_readfirstlane_b32 s6, v0
1113; TONGA-NEXT:    s_mul_i32 s6, s6, s2
1114; TONGA-NEXT:    s_sub_i32 s4, s4, s6
1115; TONGA-NEXT:    s_sub_i32 s6, s4, s2
1116; TONGA-NEXT:    s_cmp_ge_u32 s4, s2
1117; TONGA-NEXT:    s_cselect_b32 s4, s6, s4
1118; TONGA-NEXT:    s_sub_i32 s6, s4, s2
1119; TONGA-NEXT:    s_cmp_ge_u32 s4, s2
1120; TONGA-NEXT:    s_cselect_b32 s2, s6, s4
1121; TONGA-NEXT:    s_abs_i32 s4, s5
1122; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, s4
1123; TONGA-NEXT:    s_sub_i32 s5, 0, s4
1124; TONGA-NEXT:    v_readfirstlane_b32 s6, v5
1125; TONGA-NEXT:    s_abs_i32 s7, s6
1126; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1127; TONGA-NEXT:    s_xor_b32 s2, s2, s3
1128; TONGA-NEXT:    s_sub_i32 s2, s2, s3
1129; TONGA-NEXT:    s_ashr_i32 s6, s6, 31
1130; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1131; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
1132; TONGA-NEXT:    v_mov_b32_e32 v5, s1
1133; TONGA-NEXT:    v_mul_lo_u32 v1, s5, v0
1134; TONGA-NEXT:    v_readfirstlane_b32 s5, v2
1135; TONGA-NEXT:    v_mul_hi_u32 v1, v0, v1
1136; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1137; TONGA-NEXT:    v_mul_hi_u32 v0, s7, v0
1138; TONGA-NEXT:    v_readfirstlane_b32 s3, v0
1139; TONGA-NEXT:    s_mul_i32 s3, s3, s4
1140; TONGA-NEXT:    s_sub_i32 s3, s7, s3
1141; TONGA-NEXT:    s_sub_i32 s7, s3, s4
1142; TONGA-NEXT:    s_cmp_ge_u32 s3, s4
1143; TONGA-NEXT:    s_cselect_b32 s3, s7, s3
1144; TONGA-NEXT:    s_sub_i32 s7, s3, s4
1145; TONGA-NEXT:    s_cmp_ge_u32 s3, s4
1146; TONGA-NEXT:    s_cselect_b32 s3, s7, s3
1147; TONGA-NEXT:    s_abs_i32 s4, s5
1148; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, s4
1149; TONGA-NEXT:    s_sub_i32 s5, 0, s4
1150; TONGA-NEXT:    v_readfirstlane_b32 s7, v6
1151; TONGA-NEXT:    s_abs_i32 s8, s7
1152; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1153; TONGA-NEXT:    s_xor_b32 s3, s3, s6
1154; TONGA-NEXT:    s_sub_i32 s3, s3, s6
1155; TONGA-NEXT:    s_ashr_i32 s7, s7, 31
1156; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1157; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
1158; TONGA-NEXT:    v_mul_lo_u32 v1, s5, v0
1159; TONGA-NEXT:    v_readfirstlane_b32 s5, v3
1160; TONGA-NEXT:    v_mul_hi_u32 v1, v0, v1
1161; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1162; TONGA-NEXT:    v_mul_hi_u32 v0, s8, v0
1163; TONGA-NEXT:    v_readfirstlane_b32 s6, v0
1164; TONGA-NEXT:    s_mul_i32 s6, s6, s4
1165; TONGA-NEXT:    s_sub_i32 s6, s8, s6
1166; TONGA-NEXT:    s_sub_i32 s8, s6, s4
1167; TONGA-NEXT:    s_cmp_ge_u32 s6, s4
1168; TONGA-NEXT:    s_cselect_b32 s6, s8, s6
1169; TONGA-NEXT:    s_sub_i32 s8, s6, s4
1170; TONGA-NEXT:    s_cmp_ge_u32 s6, s4
1171; TONGA-NEXT:    s_cselect_b32 s4, s8, s6
1172; TONGA-NEXT:    s_abs_i32 s5, s5
1173; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, s5
1174; TONGA-NEXT:    s_sub_i32 s0, 0, s5
1175; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1176; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1177; TONGA-NEXT:    v_cvt_u32_f32_e32 v2, v0
1178; TONGA-NEXT:    v_mov_b32_e32 v0, s2
1179; TONGA-NEXT:    s_xor_b32 s2, s4, s7
1180; TONGA-NEXT:    s_sub_i32 s2, s2, s7
1181; TONGA-NEXT:    v_mul_lo_u32 v1, s0, v2
1182; TONGA-NEXT:    v_readfirstlane_b32 s0, v7
1183; TONGA-NEXT:    s_abs_i32 s1, s0
1184; TONGA-NEXT:    s_ashr_i32 s0, s0, 31
1185; TONGA-NEXT:    v_mul_hi_u32 v3, v2, v1
1186; TONGA-NEXT:    v_mov_b32_e32 v1, s3
1187; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
1188; TONGA-NEXT:    v_mul_hi_u32 v2, s1, v2
1189; TONGA-NEXT:    v_readfirstlane_b32 s3, v2
1190; TONGA-NEXT:    s_mul_i32 s3, s3, s5
1191; TONGA-NEXT:    s_sub_i32 s1, s1, s3
1192; TONGA-NEXT:    s_sub_i32 s3, s1, s5
1193; TONGA-NEXT:    s_cmp_ge_u32 s1, s5
1194; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
1195; TONGA-NEXT:    s_sub_i32 s3, s1, s5
1196; TONGA-NEXT:    s_cmp_ge_u32 s1, s5
1197; TONGA-NEXT:    s_cselect_b32 s1, s3, s1
1198; TONGA-NEXT:    s_xor_b32 s1, s1, s0
1199; TONGA-NEXT:    s_sub_i32 s0, s1, s0
1200; TONGA-NEXT:    v_mov_b32_e32 v2, s2
1201; TONGA-NEXT:    v_mov_b32_e32 v3, s0
1202; TONGA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1203; TONGA-NEXT:    s_endpgm
1204;
1205; EG-LABEL: srem_v4i32:
1206; EG:       ; %bb.0:
1207; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1208; EG-NEXT:    TEX 1 @6
1209; EG-NEXT:    ALU 89, @11, KC0[CB0:0-32], KC1[]
1210; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
1211; EG-NEXT:    CF_END
1212; EG-NEXT:    PAD
1213; EG-NEXT:    Fetch clause starting at 6:
1214; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
1215; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
1216; EG-NEXT:    ALU clause starting at 10:
1217; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1218; EG-NEXT:    ALU clause starting at 11:
1219; EG-NEXT:     SETGT_INT * T2.W, 0.0, T1.Z,
1220; EG-NEXT:     ADD_INT T3.W, T1.Z, PV.W,
1221; EG-NEXT:     SETGT_INT * T4.W, 0.0, T1.Y,
1222; EG-NEXT:     XOR_INT * T2.W, PV.W, T2.W,
1223; EG-NEXT:     SUB_INT T1.Z, 0.0, PV.W,
1224; EG-NEXT:     ADD_INT T3.W, T1.Y, T4.W,
1225; EG-NEXT:     RECIP_UINT * T1.Y, PV.W,
1226; EG-NEXT:     XOR_INT T3.W, PV.W, T4.W,
1227; EG-NEXT:     MULLO_INT * T1.Z, PV.Z, PS,
1228; EG-NEXT:     SUB_INT T4.W, 0.0, PV.W,
1229; EG-NEXT:     RECIP_UINT * T2.X, PV.W,
1230; EG-NEXT:     SETGT_INT T5.W, 0.0, T0.Y,
1231; EG-NEXT:     MULLO_INT * T2.Y, PV.W, PS,
1232; EG-NEXT:     SETGT_INT T2.Z, 0.0, T0.Z,
1233; EG-NEXT:     ADD_INT T4.W, T0.Y, PV.W,
1234; EG-NEXT:     MULHI * T0.Y, T2.X, PS,
1235; EG-NEXT:     ADD_INT T2.X, T2.X, PS,
1236; EG-NEXT:     XOR_INT T0.Y, PV.W, T5.W,
1237; EG-NEXT:     SETGT_INT T3.Z, 0.0, T1.W, BS:VEC_021/SCL_122
1238; EG-NEXT:     ADD_INT T4.W, T0.Z, PV.Z,
1239; EG-NEXT:     MULHI * T0.Z, T1.Y, T1.Z,
1240; EG-NEXT:     ADD_INT T1.Y, T1.Y, PS,
1241; EG-NEXT:     XOR_INT T0.Z, PV.W, T2.Z,
1242; EG-NEXT:     ADD_INT T1.W, T1.W, PV.Z,
1243; EG-NEXT:     MULHI * T1.Z, PV.Y, PV.X,
1244; EG-NEXT:     XOR_INT T1.W, PV.W, T3.Z,
1245; EG-NEXT:     MULHI * T1.Y, PV.Z, PV.Y,
1246; EG-NEXT:     SUB_INT T4.W, 0.0, PV.W,
1247; EG-NEXT:     RECIP_UINT * T2.X, PV.W,
1248; EG-NEXT:     SETGT_INT T6.W, 0.0, T0.W,
1249; EG-NEXT:     MULLO_INT * T2.Y, PV.W, PS,
1250; EG-NEXT:     ADD_INT T0.W, T0.W, PV.W,
1251; EG-NEXT:     MULHI * T2.Y, T2.X, PS,
1252; EG-NEXT:     ADD_INT T2.Y, T2.X, PS,
1253; EG-NEXT:     XOR_INT T3.Z, PV.W, T6.W, BS:VEC_021/SCL_122
1254; EG-NEXT:     SETGT_INT T0.W, 0.0, T1.X,
1255; EG-NEXT:     MULLO_INT * T1.Y, T1.Y, T2.W,
1256; EG-NEXT:     ADD_INT T4.W, T1.X, PV.W,
1257; EG-NEXT:     MULHI * T1.X, PV.Z, PV.Y,
1258; EG-NEXT:     XOR_INT T0.W, PV.W, T0.W, BS:VEC_021/SCL_122
1259; EG-NEXT:     MULLO_INT * T1.X, PS, T1.W,
1260; EG-NEXT:     SUB_INT T4.W, 0.0, PV.W,
1261; EG-NEXT:     RECIP_UINT * T2.X, PV.W,
1262; EG-NEXT:     SETGT_INT T7.W, 0.0, T0.X,
1263; EG-NEXT:     MULLO_INT * T2.Y, PV.W, PS,
1264; EG-NEXT:     ADD_INT T4.W, T0.X, PV.W,
1265; EG-NEXT:     MULHI * T0.X, T2.X, PS,
1266; EG-NEXT:     ADD_INT T0.X, T2.X, PS,
1267; EG-NEXT:     XOR_INT T2.Y, PV.W, T7.W, BS:VEC_021/SCL_122
1268; EG-NEXT:     SUB_INT T3.Z, T3.Z, T1.X,
1269; EG-NEXT:     SUB_INT T4.W, T0.Z, T1.Y, BS:VEC_120/SCL_212
1270; EG-NEXT:     MULLO_INT * T0.Z, T1.Z, T3.W,
1271; EG-NEXT:     SETGE_UINT T1.X, PV.W, T2.W,
1272; EG-NEXT:     SUB_INT T0.Y, T0.Y, PS,
1273; EG-NEXT:     SETGE_UINT T0.Z, PV.Z, T1.W, BS:VEC_021/SCL_122
1274; EG-NEXT:     SUB_INT T8.W, PV.Z, T1.W, BS:VEC_021/SCL_122
1275; EG-NEXT:     MULHI * T0.X, PV.Y, PV.X,
1276; EG-NEXT:     SUB_INT T2.X, T4.W, T2.W,
1277; EG-NEXT:     CNDE_INT T1.Y, PV.Z, T3.Z, PV.W,
1278; EG-NEXT:     SETGE_UINT T0.Z, PV.Y, T3.W, BS:VEC_021/SCL_122
1279; EG-NEXT:     SUB_INT * T8.W, PV.Y, T3.W, BS:VEC_021/SCL_122
1280; EG-NEXT:     MULLO_INT * T0.X, T0.X, T0.W,
1281; EG-NEXT:     CNDE_INT T3.X, T0.Z, T0.Y, T8.W,
1282; EG-NEXT:     SETGE_UINT T0.Y, T1.Y, T1.W,
1283; EG-NEXT:     SUB_INT T0.Z, T1.Y, T1.W,
1284; EG-NEXT:     CNDE_INT T1.W, T1.X, T4.W, T2.X, BS:VEC_102/SCL_221
1285; EG-NEXT:     SUB_INT * T4.W, T2.Y, PS,
1286; EG-NEXT:     SETGE_UINT T0.X, PS, T0.W,
1287; EG-NEXT:     SUB_INT T2.Y, PS, T0.W,
1288; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T2.W, BS:VEC_021/SCL_122
1289; EG-NEXT:     SUB_INT T2.W, PV.W, T2.W, BS:VEC_021/SCL_122
1290; EG-NEXT:     CNDE_INT * T8.W, PV.Y, T1.Y, PV.Z,
1291; EG-NEXT:     XOR_INT T1.X, PS, T6.W,
1292; EG-NEXT:     CNDE_INT T0.Y, PV.Z, T1.W, PV.W, BS:VEC_021/SCL_122
1293; EG-NEXT:     CNDE_INT * T0.Z, PV.X, T4.W, PV.Y, BS:VEC_102/SCL_221
1294; EG-NEXT:     SETGE_UINT T1.W, T3.X, T3.W,
1295; EG-NEXT:     SUB_INT * T2.W, T3.X, T3.W,
1296; EG-NEXT:     CNDE_INT T0.X, PV.W, T3.X, PS,
1297; EG-NEXT:     SETGE_UINT T1.Y, T0.Z, T0.W, BS:VEC_021/SCL_122
1298; EG-NEXT:     SUB_INT T1.Z, T0.Z, T0.W, BS:VEC_021/SCL_122
1299; EG-NEXT:     XOR_INT T0.W, T0.Y, T2.Z,
1300; EG-NEXT:     SUB_INT * T2.W, T1.X, T6.W,
1301; EG-NEXT:     SUB_INT T2.Z, PV.W, T2.Z,
1302; EG-NEXT:     CNDE_INT T0.W, PV.Y, T0.Z, PV.Z, BS:VEC_021/SCL_122
1303; EG-NEXT:     XOR_INT * T1.W, PV.X, T5.W,
1304; EG-NEXT:     SUB_INT T2.Y, PS, T5.W,
1305; EG-NEXT:     XOR_INT * T0.W, PV.W, T7.W, BS:VEC_021/SCL_122
1306; EG-NEXT:     SUB_INT T2.X, PV.W, T7.W,
1307; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
1308; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1309  %den_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
1310  %num = load <4 x i32>, ptr addrspace(1) %in
1311  %den = load <4 x i32>, ptr addrspace(1) %den_ptr
1312  %result = srem <4 x i32> %num, %den
1313  store <4 x i32> %result, ptr addrspace(1) %out
1314  ret void
1315}
1316
1317define amdgpu_kernel void @srem_v4i32_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1318; GCN-LABEL: srem_v4i32_4:
1319; GCN:       ; %bb.0:
1320; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1321; GCN-NEXT:    v_mov_b32_e32 v4, 0
1322; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1323; GCN-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
1324; GCN-NEXT:    s_waitcnt vmcnt(0)
1325; GCN-NEXT:    v_readfirstlane_b32 s2, v0
1326; GCN-NEXT:    v_readfirstlane_b32 s3, v1
1327; GCN-NEXT:    v_readfirstlane_b32 s4, v2
1328; GCN-NEXT:    v_readfirstlane_b32 s5, v3
1329; GCN-NEXT:    s_ashr_i32 s6, s2, 31
1330; GCN-NEXT:    s_ashr_i32 s7, s3, 31
1331; GCN-NEXT:    s_ashr_i32 s8, s4, 31
1332; GCN-NEXT:    s_ashr_i32 s9, s5, 31
1333; GCN-NEXT:    s_lshr_b32 s6, s6, 30
1334; GCN-NEXT:    s_lshr_b32 s7, s7, 30
1335; GCN-NEXT:    s_lshr_b32 s8, s8, 30
1336; GCN-NEXT:    s_lshr_b32 s9, s9, 30
1337; GCN-NEXT:    s_add_i32 s6, s2, s6
1338; GCN-NEXT:    s_add_i32 s7, s3, s7
1339; GCN-NEXT:    s_add_i32 s8, s4, s8
1340; GCN-NEXT:    s_add_i32 s9, s5, s9
1341; GCN-NEXT:    s_and_b32 s6, s6, -4
1342; GCN-NEXT:    s_and_b32 s7, s7, -4
1343; GCN-NEXT:    s_and_b32 s8, s8, -4
1344; GCN-NEXT:    s_and_b32 s9, s9, -4
1345; GCN-NEXT:    s_sub_i32 s2, s2, s6
1346; GCN-NEXT:    s_sub_i32 s3, s3, s7
1347; GCN-NEXT:    s_sub_i32 s4, s4, s8
1348; GCN-NEXT:    s_sub_i32 s5, s5, s9
1349; GCN-NEXT:    v_mov_b32_e32 v0, s2
1350; GCN-NEXT:    v_mov_b32_e32 v1, s3
1351; GCN-NEXT:    v_mov_b32_e32 v2, s4
1352; GCN-NEXT:    v_mov_b32_e32 v3, s5
1353; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
1354; GCN-NEXT:    s_endpgm
1355;
1356; TAHITI-LABEL: srem_v4i32_4:
1357; TAHITI:       ; %bb.0:
1358; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
1359; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
1360; TAHITI-NEXT:    s_mov_b32 s2, -1
1361; TAHITI-NEXT:    s_mov_b32 s10, s2
1362; TAHITI-NEXT:    s_mov_b32 s11, s3
1363; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
1364; TAHITI-NEXT:    s_mov_b32 s8, s6
1365; TAHITI-NEXT:    s_mov_b32 s9, s7
1366; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1367; TAHITI-NEXT:    s_mov_b32 s0, s4
1368; TAHITI-NEXT:    s_mov_b32 s1, s5
1369; TAHITI-NEXT:    s_waitcnt vmcnt(0)
1370; TAHITI-NEXT:    v_readfirstlane_b32 s4, v0
1371; TAHITI-NEXT:    v_readfirstlane_b32 s5, v1
1372; TAHITI-NEXT:    v_readfirstlane_b32 s6, v2
1373; TAHITI-NEXT:    v_readfirstlane_b32 s7, v3
1374; TAHITI-NEXT:    s_ashr_i32 s8, s4, 31
1375; TAHITI-NEXT:    s_ashr_i32 s9, s5, 31
1376; TAHITI-NEXT:    s_ashr_i32 s10, s6, 31
1377; TAHITI-NEXT:    s_ashr_i32 s11, s7, 31
1378; TAHITI-NEXT:    s_lshr_b32 s8, s8, 30
1379; TAHITI-NEXT:    s_lshr_b32 s9, s9, 30
1380; TAHITI-NEXT:    s_lshr_b32 s10, s10, 30
1381; TAHITI-NEXT:    s_lshr_b32 s11, s11, 30
1382; TAHITI-NEXT:    s_add_i32 s8, s4, s8
1383; TAHITI-NEXT:    s_add_i32 s9, s5, s9
1384; TAHITI-NEXT:    s_add_i32 s10, s6, s10
1385; TAHITI-NEXT:    s_add_i32 s11, s7, s11
1386; TAHITI-NEXT:    s_and_b32 s8, s8, -4
1387; TAHITI-NEXT:    s_and_b32 s9, s9, -4
1388; TAHITI-NEXT:    s_and_b32 s10, s10, -4
1389; TAHITI-NEXT:    s_and_b32 s11, s11, -4
1390; TAHITI-NEXT:    s_sub_i32 s4, s4, s8
1391; TAHITI-NEXT:    s_sub_i32 s5, s5, s9
1392; TAHITI-NEXT:    s_sub_i32 s6, s6, s10
1393; TAHITI-NEXT:    s_sub_i32 s7, s7, s11
1394; TAHITI-NEXT:    v_mov_b32_e32 v0, s4
1395; TAHITI-NEXT:    v_mov_b32_e32 v1, s5
1396; TAHITI-NEXT:    v_mov_b32_e32 v2, s6
1397; TAHITI-NEXT:    v_mov_b32_e32 v3, s7
1398; TAHITI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
1399; TAHITI-NEXT:    s_endpgm
1400;
1401; TONGA-LABEL: srem_v4i32_4:
1402; TONGA:       ; %bb.0:
1403; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1404; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
1405; TONGA-NEXT:    v_mov_b32_e32 v0, s2
1406; TONGA-NEXT:    v_mov_b32_e32 v1, s3
1407; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1408; TONGA-NEXT:    v_mov_b32_e32 v4, s0
1409; TONGA-NEXT:    v_mov_b32_e32 v5, s1
1410; TONGA-NEXT:    s_waitcnt vmcnt(0)
1411; TONGA-NEXT:    v_readfirstlane_b32 s0, v0
1412; TONGA-NEXT:    v_readfirstlane_b32 s1, v1
1413; TONGA-NEXT:    v_readfirstlane_b32 s2, v2
1414; TONGA-NEXT:    v_readfirstlane_b32 s3, v3
1415; TONGA-NEXT:    s_ashr_i32 s4, s0, 31
1416; TONGA-NEXT:    s_ashr_i32 s5, s1, 31
1417; TONGA-NEXT:    s_ashr_i32 s6, s2, 31
1418; TONGA-NEXT:    s_ashr_i32 s7, s3, 31
1419; TONGA-NEXT:    s_lshr_b32 s4, s4, 30
1420; TONGA-NEXT:    s_lshr_b32 s5, s5, 30
1421; TONGA-NEXT:    s_lshr_b32 s6, s6, 30
1422; TONGA-NEXT:    s_lshr_b32 s7, s7, 30
1423; TONGA-NEXT:    s_add_i32 s4, s0, s4
1424; TONGA-NEXT:    s_add_i32 s5, s1, s5
1425; TONGA-NEXT:    s_add_i32 s6, s2, s6
1426; TONGA-NEXT:    s_add_i32 s7, s3, s7
1427; TONGA-NEXT:    s_and_b32 s4, s4, -4
1428; TONGA-NEXT:    s_and_b32 s5, s5, -4
1429; TONGA-NEXT:    s_and_b32 s6, s6, -4
1430; TONGA-NEXT:    s_and_b32 s7, s7, -4
1431; TONGA-NEXT:    s_sub_i32 s0, s0, s4
1432; TONGA-NEXT:    s_sub_i32 s1, s1, s5
1433; TONGA-NEXT:    s_sub_i32 s2, s2, s6
1434; TONGA-NEXT:    s_sub_i32 s3, s3, s7
1435; TONGA-NEXT:    v_mov_b32_e32 v0, s0
1436; TONGA-NEXT:    v_mov_b32_e32 v1, s1
1437; TONGA-NEXT:    v_mov_b32_e32 v2, s2
1438; TONGA-NEXT:    v_mov_b32_e32 v3, s3
1439; TONGA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1440; TONGA-NEXT:    s_endpgm
1441;
1442; EG-LABEL: srem_v4i32_4:
1443; EG:       ; %bb.0:
1444; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
1445; EG-NEXT:    TEX 0 @6
1446; EG-NEXT:    ALU 29, @9, KC0[CB0:0-32], KC1[]
1447; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
1448; EG-NEXT:    CF_END
1449; EG-NEXT:    PAD
1450; EG-NEXT:    Fetch clause starting at 6:
1451; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
1452; EG-NEXT:    ALU clause starting at 8:
1453; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1454; EG-NEXT:    ALU clause starting at 9:
1455; EG-NEXT:     ASHR * T1.W, T0.W, literal.x,
1456; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
1457; EG-NEXT:     LSHR T1.W, PV.W, literal.x,
1458; EG-NEXT:     ASHR * T2.W, T0.Z, literal.y,
1459; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
1460; EG-NEXT:     ASHR T1.Z, T0.Y, literal.x,
1461; EG-NEXT:     LSHR T2.W, PS, literal.y,
1462; EG-NEXT:     ADD_INT * T1.W, T0.W, PV.W,
1463; EG-NEXT:    31(4.344025e-44), 30(4.203895e-44)
1464; EG-NEXT:     AND_INT T1.Y, PS, literal.x,
1465; EG-NEXT:     ADD_INT T2.Z, T0.Z, PV.W,
1466; EG-NEXT:     ASHR T1.W, T0.X, literal.y,
1467; EG-NEXT:     LSHR * T2.W, PV.Z, literal.z,
1468; EG-NEXT:    -4(nan), 31(4.344025e-44)
1469; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
1470; EG-NEXT:     ADD_INT T2.Y, T0.Y, PS,
1471; EG-NEXT:     LSHR T1.Z, PV.W, literal.x,
1472; EG-NEXT:     AND_INT T1.W, PV.Z, literal.y,
1473; EG-NEXT:     SUB_INT * T0.W, T0.W, PV.Y,
1474; EG-NEXT:    30(4.203895e-44), -4(nan)
1475; EG-NEXT:     SUB_INT T0.Z, T0.Z, PV.W,
1476; EG-NEXT:     ADD_INT T1.W, T0.X, PV.Z,
1477; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
1478; EG-NEXT:    -4(nan), 0(0.000000e+00)
1479; EG-NEXT:     SUB_INT T0.Y, T0.Y, PS,
1480; EG-NEXT:     AND_INT * T1.W, PV.W, literal.x,
1481; EG-NEXT:    -4(nan), 0(0.000000e+00)
1482; EG-NEXT:     SUB_INT T0.X, T0.X, PV.W,
1483; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1484; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1485  %num = load <4 x i32>, ptr addrspace(1) %in
1486  %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
1487  store <4 x i32> %result, ptr addrspace(1) %out
1488  ret void
1489}
1490
1491define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1492; GCN-LABEL: srem_i64:
1493; GCN:       ; %bb.0:
1494; GCN-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1495; GCN-NEXT:    v_mov_b32_e32 v0, 0
1496; GCN-NEXT:    s_waitcnt lgkmcnt(0)
1497; GCN-NEXT:    global_load_dwordx4 v[0:3], v0, s[10:11]
1498; GCN-NEXT:    s_waitcnt vmcnt(0)
1499; GCN-NEXT:    v_readfirstlane_b32 s7, v1
1500; GCN-NEXT:    v_readfirstlane_b32 s6, v0
1501; GCN-NEXT:    v_readfirstlane_b32 s5, v3
1502; GCN-NEXT:    v_readfirstlane_b32 s4, v2
1503; GCN-NEXT:    s_or_b64 s[0:1], s[6:7], s[4:5]
1504; GCN-NEXT:    s_mov_b32 s0, 0
1505; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
1506; GCN-NEXT:    s_cbranch_scc0 .LBB8_4
1507; GCN-NEXT:  ; %bb.1:
1508; GCN-NEXT:    s_ashr_i32 s0, s5, 31
1509; GCN-NEXT:    s_add_u32 s2, s4, s0
1510; GCN-NEXT:    s_mov_b32 s1, s0
1511; GCN-NEXT:    s_addc_u32 s3, s5, s0
1512; GCN-NEXT:    s_xor_b64 s[12:13], s[2:3], s[0:1]
1513; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s12
1514; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s13
1515; GCN-NEXT:    s_sub_u32 s0, 0, s12
1516; GCN-NEXT:    s_subb_u32 s1, 0, s13
1517; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
1518; GCN-NEXT:    v_rcp_f32_e32 v0, v0
1519; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
1520; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
1521; GCN-NEXT:    v_trunc_f32_e32 v1, v1
1522; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
1523; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
1524; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
1525; GCN-NEXT:    v_readfirstlane_b32 s2, v1
1526; GCN-NEXT:    v_readfirstlane_b32 s3, v0
1527; GCN-NEXT:    s_mul_i32 s5, s0, s2
1528; GCN-NEXT:    s_mul_hi_u32 s15, s0, s3
1529; GCN-NEXT:    s_mul_i32 s14, s1, s3
1530; GCN-NEXT:    s_add_i32 s5, s15, s5
1531; GCN-NEXT:    s_add_i32 s5, s5, s14
1532; GCN-NEXT:    s_mul_i32 s16, s0, s3
1533; GCN-NEXT:    s_mul_hi_u32 s14, s3, s5
1534; GCN-NEXT:    s_mul_i32 s15, s3, s5
1535; GCN-NEXT:    s_mul_hi_u32 s3, s3, s16
1536; GCN-NEXT:    s_add_u32 s3, s3, s15
1537; GCN-NEXT:    s_addc_u32 s14, 0, s14
1538; GCN-NEXT:    s_mul_hi_u32 s17, s2, s16
1539; GCN-NEXT:    s_mul_i32 s16, s2, s16
1540; GCN-NEXT:    s_add_u32 s3, s3, s16
1541; GCN-NEXT:    s_mul_hi_u32 s15, s2, s5
1542; GCN-NEXT:    s_addc_u32 s3, s14, s17
1543; GCN-NEXT:    s_addc_u32 s14, s15, 0
1544; GCN-NEXT:    s_mul_i32 s5, s2, s5
1545; GCN-NEXT:    s_add_u32 s3, s3, s5
1546; GCN-NEXT:    s_addc_u32 s5, 0, s14
1547; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, s3, v0
1548; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
1549; GCN-NEXT:    s_addc_u32 s2, s2, s5
1550; GCN-NEXT:    v_readfirstlane_b32 s5, v0
1551; GCN-NEXT:    s_mul_i32 s3, s0, s2
1552; GCN-NEXT:    s_mul_hi_u32 s14, s0, s5
1553; GCN-NEXT:    s_add_i32 s3, s14, s3
1554; GCN-NEXT:    s_mul_i32 s1, s1, s5
1555; GCN-NEXT:    s_add_i32 s3, s3, s1
1556; GCN-NEXT:    s_mul_i32 s0, s0, s5
1557; GCN-NEXT:    s_mul_hi_u32 s14, s2, s0
1558; GCN-NEXT:    s_mul_i32 s15, s2, s0
1559; GCN-NEXT:    s_mul_i32 s17, s5, s3
1560; GCN-NEXT:    s_mul_hi_u32 s0, s5, s0
1561; GCN-NEXT:    s_mul_hi_u32 s16, s5, s3
1562; GCN-NEXT:    s_add_u32 s0, s0, s17
1563; GCN-NEXT:    s_addc_u32 s5, 0, s16
1564; GCN-NEXT:    s_add_u32 s0, s0, s15
1565; GCN-NEXT:    s_mul_hi_u32 s1, s2, s3
1566; GCN-NEXT:    s_addc_u32 s0, s5, s14
1567; GCN-NEXT:    s_addc_u32 s1, s1, 0
1568; GCN-NEXT:    s_mul_i32 s3, s2, s3
1569; GCN-NEXT:    s_add_u32 s0, s0, s3
1570; GCN-NEXT:    s_addc_u32 s1, 0, s1
1571; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
1572; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
1573; GCN-NEXT:    s_addc_u32 s2, s2, s1
1574; GCN-NEXT:    s_ashr_i32 s14, s7, 31
1575; GCN-NEXT:    s_add_u32 s0, s6, s14
1576; GCN-NEXT:    s_mov_b32 s15, s14
1577; GCN-NEXT:    s_addc_u32 s1, s7, s14
1578; GCN-NEXT:    s_xor_b64 s[16:17], s[0:1], s[14:15]
1579; GCN-NEXT:    v_readfirstlane_b32 s3, v0
1580; GCN-NEXT:    s_mul_i32 s1, s16, s2
1581; GCN-NEXT:    s_mul_hi_u32 s5, s16, s3
1582; GCN-NEXT:    s_mul_hi_u32 s0, s16, s2
1583; GCN-NEXT:    s_add_u32 s1, s5, s1
1584; GCN-NEXT:    s_addc_u32 s0, 0, s0
1585; GCN-NEXT:    s_mul_hi_u32 s7, s17, s3
1586; GCN-NEXT:    s_mul_i32 s3, s17, s3
1587; GCN-NEXT:    s_add_u32 s1, s1, s3
1588; GCN-NEXT:    s_mul_hi_u32 s5, s17, s2
1589; GCN-NEXT:    s_addc_u32 s0, s0, s7
1590; GCN-NEXT:    s_addc_u32 s1, s5, 0
1591; GCN-NEXT:    s_mul_i32 s2, s17, s2
1592; GCN-NEXT:    s_add_u32 s0, s0, s2
1593; GCN-NEXT:    s_addc_u32 s1, 0, s1
1594; GCN-NEXT:    s_mul_i32 s1, s12, s1
1595; GCN-NEXT:    s_mul_hi_u32 s2, s12, s0
1596; GCN-NEXT:    s_add_i32 s1, s2, s1
1597; GCN-NEXT:    s_mul_i32 s2, s13, s0
1598; GCN-NEXT:    s_mul_i32 s0, s12, s0
1599; GCN-NEXT:    s_add_i32 s5, s1, s2
1600; GCN-NEXT:    v_mov_b32_e32 v0, s0
1601; GCN-NEXT:    s_sub_i32 s1, s17, s5
1602; GCN-NEXT:    v_sub_co_u32_e32 v0, vcc, s16, v0
1603; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
1604; GCN-NEXT:    s_subb_u32 s7, s1, s13
1605; GCN-NEXT:    v_subrev_co_u32_e64 v1, s[0:1], s12, v0
1606; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
1607; GCN-NEXT:    s_subb_u32 s15, s7, 0
1608; GCN-NEXT:    s_cmp_ge_u32 s15, s13
1609; GCN-NEXT:    s_cselect_b32 s16, -1, 0
1610; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v1
1611; GCN-NEXT:    s_cmp_eq_u32 s15, s13
1612; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
1613; GCN-NEXT:    v_mov_b32_e32 v3, s16
1614; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
1615; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
1616; GCN-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[2:3]
1617; GCN-NEXT:    s_subb_u32 s2, s7, s13
1618; GCN-NEXT:    v_subrev_co_u32_e64 v3, s[0:1], s12, v1
1619; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
1620; GCN-NEXT:    s_subb_u32 s2, s2, 0
1621; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v2
1622; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
1623; GCN-NEXT:    v_mov_b32_e32 v2, s15
1624; GCN-NEXT:    v_mov_b32_e32 v3, s2
1625; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
1626; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
1627; GCN-NEXT:    s_subb_u32 s0, s17, s5
1628; GCN-NEXT:    s_cmp_ge_u32 s0, s13
1629; GCN-NEXT:    s_cselect_b32 s1, -1, 0
1630; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v0
1631; GCN-NEXT:    s_cmp_eq_u32 s0, s13
1632; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
1633; GCN-NEXT:    v_mov_b32_e32 v4, s1
1634; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
1635; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
1636; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1637; GCN-NEXT:    v_mov_b32_e32 v4, s0
1638; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1639; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1640; GCN-NEXT:    v_xor_b32_e32 v0, s14, v0
1641; GCN-NEXT:    v_xor_b32_e32 v1, s14, v2
1642; GCN-NEXT:    v_mov_b32_e32 v2, s14
1643; GCN-NEXT:    v_subrev_co_u32_e32 v0, vcc, s14, v0
1644; GCN-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
1645; GCN-NEXT:    s_cbranch_execnz .LBB8_3
1646; GCN-NEXT:  .LBB8_2:
1647; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s4
1648; GCN-NEXT:    s_sub_i32 s0, 0, s4
1649; GCN-NEXT:    s_mov_b32 s1, 0
1650; GCN-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1651; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1652; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
1653; GCN-NEXT:    v_readfirstlane_b32 s2, v0
1654; GCN-NEXT:    s_mul_i32 s0, s0, s2
1655; GCN-NEXT:    s_mul_hi_u32 s0, s2, s0
1656; GCN-NEXT:    s_add_i32 s2, s2, s0
1657; GCN-NEXT:    s_mul_hi_u32 s0, s6, s2
1658; GCN-NEXT:    s_mul_i32 s0, s0, s4
1659; GCN-NEXT:    s_sub_i32 s0, s6, s0
1660; GCN-NEXT:    s_sub_i32 s2, s0, s4
1661; GCN-NEXT:    s_cmp_ge_u32 s0, s4
1662; GCN-NEXT:    s_cselect_b32 s0, s2, s0
1663; GCN-NEXT:    s_sub_i32 s2, s0, s4
1664; GCN-NEXT:    s_cmp_ge_u32 s0, s4
1665; GCN-NEXT:    s_cselect_b32 s0, s2, s0
1666; GCN-NEXT:    v_mov_b32_e32 v0, s0
1667; GCN-NEXT:    v_mov_b32_e32 v1, s1
1668; GCN-NEXT:  .LBB8_3:
1669; GCN-NEXT:    v_mov_b32_e32 v2, 0
1670; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
1671; GCN-NEXT:    s_endpgm
1672; GCN-NEXT:  .LBB8_4:
1673; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
1674; GCN-NEXT:    s_branch .LBB8_2
1675;
1676; TAHITI-LABEL: srem_i64:
1677; TAHITI:       ; %bb.0:
1678; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
1679; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
1680; TAHITI-NEXT:    s_mov_b32 s2, -1
1681; TAHITI-NEXT:    v_mov_b32_e32 v4, 0
1682; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
1683; TAHITI-NEXT:    s_mov_b32 s0, s6
1684; TAHITI-NEXT:    s_mov_b32 s1, s7
1685; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1686; TAHITI-NEXT:    s_waitcnt vmcnt(0)
1687; TAHITI-NEXT:    v_or_b32_e32 v5, v1, v3
1688; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
1689; TAHITI-NEXT:    s_cbranch_vccz .LBB8_4
1690; TAHITI-NEXT:  ; %bb.1:
1691; TAHITI-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
1692; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v2, v5
1693; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
1694; TAHITI-NEXT:    v_xor_b32_e32 v4, v4, v5
1695; TAHITI-NEXT:    v_xor_b32_e32 v3, v3, v5
1696; TAHITI-NEXT:    v_cvt_f32_u32_e32 v5, v4
1697; TAHITI-NEXT:    v_cvt_f32_u32_e32 v6, v3
1698; TAHITI-NEXT:    v_sub_i32_e32 v7, vcc, 0, v4
1699; TAHITI-NEXT:    v_subb_u32_e32 v8, vcc, 0, v3, vcc
1700; TAHITI-NEXT:    v_madmk_f32 v5, v6, 0x4f800000, v5
1701; TAHITI-NEXT:    v_rcp_f32_e32 v5, v5
1702; TAHITI-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
1703; TAHITI-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
1704; TAHITI-NEXT:    v_trunc_f32_e32 v6, v6
1705; TAHITI-NEXT:    v_madmk_f32 v5, v6, 0xcf800000, v5
1706; TAHITI-NEXT:    v_cvt_u32_f32_e32 v6, v6
1707; TAHITI-NEXT:    v_cvt_u32_f32_e32 v5, v5
1708; TAHITI-NEXT:    v_mul_lo_u32 v10, v7, v6
1709; TAHITI-NEXT:    v_mul_hi_u32 v9, v7, v5
1710; TAHITI-NEXT:    v_mul_lo_u32 v11, v8, v5
1711; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1712; TAHITI-NEXT:    v_mul_lo_u32 v10, v7, v5
1713; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1714; TAHITI-NEXT:    v_mul_lo_u32 v11, v5, v9
1715; TAHITI-NEXT:    v_mul_hi_u32 v12, v5, v10
1716; TAHITI-NEXT:    v_mul_hi_u32 v13, v5, v9
1717; TAHITI-NEXT:    v_mul_hi_u32 v14, v6, v9
1718; TAHITI-NEXT:    v_mul_lo_u32 v9, v6, v9
1719; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1720; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
1721; TAHITI-NEXT:    v_mul_lo_u32 v13, v6, v10
1722; TAHITI-NEXT:    v_mul_hi_u32 v10, v6, v10
1723; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
1724; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, v12, v10, vcc
1725; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
1726; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1727; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1728; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v5, v9
1729; TAHITI-NEXT:    v_addc_u32_e32 v6, vcc, v6, v10, vcc
1730; TAHITI-NEXT:    v_mul_lo_u32 v9, v7, v6
1731; TAHITI-NEXT:    v_mul_hi_u32 v10, v7, v5
1732; TAHITI-NEXT:    v_mul_lo_u32 v8, v8, v5
1733; TAHITI-NEXT:    v_mul_lo_u32 v7, v7, v5
1734; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
1735; TAHITI-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
1736; TAHITI-NEXT:    v_mul_lo_u32 v11, v5, v8
1737; TAHITI-NEXT:    v_mul_hi_u32 v12, v5, v7
1738; TAHITI-NEXT:    v_mul_hi_u32 v13, v5, v8
1739; TAHITI-NEXT:    v_mul_hi_u32 v10, v6, v7
1740; TAHITI-NEXT:    v_mul_lo_u32 v7, v6, v7
1741; TAHITI-NEXT:    v_mul_hi_u32 v9, v6, v8
1742; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
1743; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
1744; TAHITI-NEXT:    v_mul_lo_u32 v8, v6, v8
1745; TAHITI-NEXT:    v_add_i32_e32 v7, vcc, v11, v7
1746; TAHITI-NEXT:    v_addc_u32_e32 v7, vcc, v12, v10, vcc
1747; TAHITI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
1748; TAHITI-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
1749; TAHITI-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
1750; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
1751; TAHITI-NEXT:    v_addc_u32_e32 v6, vcc, v6, v8, vcc
1752; TAHITI-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
1753; TAHITI-NEXT:    v_add_i32_e32 v8, vcc, v0, v7
1754; TAHITI-NEXT:    v_xor_b32_e32 v8, v8, v7
1755; TAHITI-NEXT:    v_mul_lo_u32 v9, v8, v6
1756; TAHITI-NEXT:    v_mul_hi_u32 v10, v8, v5
1757; TAHITI-NEXT:    v_mul_hi_u32 v11, v8, v6
1758; TAHITI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
1759; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v7
1760; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
1761; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
1762; TAHITI-NEXT:    v_mul_lo_u32 v11, v1, v5
1763; TAHITI-NEXT:    v_mul_hi_u32 v5, v1, v5
1764; TAHITI-NEXT:    v_mul_hi_u32 v12, v1, v6
1765; TAHITI-NEXT:    v_mul_lo_u32 v6, v1, v6
1766; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
1767; TAHITI-NEXT:    v_addc_u32_e32 v5, vcc, v10, v5, vcc
1768; TAHITI-NEXT:    v_addc_u32_e32 v9, vcc, 0, v12, vcc
1769; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
1770; TAHITI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
1771; TAHITI-NEXT:    v_mul_lo_u32 v6, v4, v6
1772; TAHITI-NEXT:    v_mul_hi_u32 v9, v4, v5
1773; TAHITI-NEXT:    v_mul_lo_u32 v10, v3, v5
1774; TAHITI-NEXT:    v_mul_lo_u32 v5, v4, v5
1775; TAHITI-NEXT:    v_add_i32_e32 v6, vcc, v6, v9
1776; TAHITI-NEXT:    v_add_i32_e32 v6, vcc, v10, v6
1777; TAHITI-NEXT:    v_sub_i32_e32 v9, vcc, v1, v6
1778; TAHITI-NEXT:    v_sub_i32_e32 v5, vcc, v8, v5
1779; TAHITI-NEXT:    v_subb_u32_e64 v8, s[0:1], v9, v3, vcc
1780; TAHITI-NEXT:    v_sub_i32_e64 v9, s[0:1], v5, v4
1781; TAHITI-NEXT:    v_subbrev_u32_e64 v10, s[2:3], 0, v8, s[0:1]
1782; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v10, v3
1783; TAHITI-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[2:3]
1784; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v9, v4
1785; TAHITI-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
1786; TAHITI-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[2:3]
1787; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v10, v3
1788; TAHITI-NEXT:    v_subb_u32_e64 v8, s[0:1], v8, v3, s[0:1]
1789; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v3
1790; TAHITI-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[2:3]
1791; TAHITI-NEXT:    v_sub_i32_e64 v12, s[0:1], v9, v4
1792; TAHITI-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
1793; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v4
1794; TAHITI-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v8, s[0:1]
1795; TAHITI-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
1796; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v3
1797; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v11
1798; TAHITI-NEXT:    v_cndmask_b32_e32 v3, v6, v4, vcc
1799; TAHITI-NEXT:    v_cndmask_b32_e64 v9, v9, v12, s[0:1]
1800; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
1801; TAHITI-NEXT:    v_cndmask_b32_e64 v8, v10, v8, s[0:1]
1802; TAHITI-NEXT:    v_cndmask_b32_e32 v3, v5, v9, vcc
1803; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
1804; TAHITI-NEXT:    v_xor_b32_e32 v3, v3, v7
1805; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v7
1806; TAHITI-NEXT:    v_sub_i32_e32 v3, vcc, v3, v7
1807; TAHITI-NEXT:    v_subb_u32_e32 v4, vcc, v1, v7, vcc
1808; TAHITI-NEXT:    s_cbranch_execnz .LBB8_3
1809; TAHITI-NEXT:  .LBB8_2:
1810; TAHITI-NEXT:    v_cvt_f32_u32_e32 v1, v2
1811; TAHITI-NEXT:    v_sub_i32_e32 v3, vcc, 0, v2
1812; TAHITI-NEXT:    v_mov_b32_e32 v4, 0
1813; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v1, v1
1814; TAHITI-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
1815; TAHITI-NEXT:    v_cvt_u32_f32_e32 v1, v1
1816; TAHITI-NEXT:    v_mul_lo_u32 v3, v3, v1
1817; TAHITI-NEXT:    v_mul_hi_u32 v3, v1, v3
1818; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
1819; TAHITI-NEXT:    v_mul_hi_u32 v1, v0, v1
1820; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, v2
1821; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
1822; TAHITI-NEXT:    v_subrev_i32_e32 v1, vcc, v2, v0
1823; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
1824; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1825; TAHITI-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
1826; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
1827; TAHITI-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
1828; TAHITI-NEXT:  .LBB8_3:
1829; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
1830; TAHITI-NEXT:    s_mov_b32 s6, -1
1831; TAHITI-NEXT:    buffer_store_dwordx2 v[3:4], off, s[4:7], 0
1832; TAHITI-NEXT:    s_endpgm
1833; TAHITI-NEXT:  .LBB8_4:
1834; TAHITI-NEXT:    ; implicit-def: $vgpr3_vgpr4
1835; TAHITI-NEXT:    s_branch .LBB8_2
1836;
1837; TONGA-LABEL: srem_i64:
1838; TONGA:       ; %bb.0:
1839; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
1840; TONGA-NEXT:    v_mov_b32_e32 v4, 0
1841; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
1842; TONGA-NEXT:    v_mov_b32_e32 v0, s6
1843; TONGA-NEXT:    v_mov_b32_e32 v1, s7
1844; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
1845; TONGA-NEXT:    s_waitcnt vmcnt(0)
1846; TONGA-NEXT:    v_or_b32_e32 v5, v1, v3
1847; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
1848; TONGA-NEXT:    s_cbranch_vccz .LBB8_4
1849; TONGA-NEXT:  ; %bb.1:
1850; TONGA-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
1851; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v2, v4
1852; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
1853; TONGA-NEXT:    v_xor_b32_e32 v9, v5, v4
1854; TONGA-NEXT:    v_xor_b32_e32 v10, v3, v4
1855; TONGA-NEXT:    v_cvt_f32_u32_e32 v3, v9
1856; TONGA-NEXT:    v_cvt_f32_u32_e32 v4, v10
1857; TONGA-NEXT:    v_sub_u32_e32 v11, vcc, 0, v9
1858; TONGA-NEXT:    v_subb_u32_e32 v12, vcc, 0, v10, vcc
1859; TONGA-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
1860; TONGA-NEXT:    v_rcp_f32_e32 v3, v3
1861; TONGA-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
1862; TONGA-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
1863; TONGA-NEXT:    v_trunc_f32_e32 v4, v4
1864; TONGA-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
1865; TONGA-NEXT:    v_cvt_u32_f32_e32 v7, v4
1866; TONGA-NEXT:    v_cvt_u32_f32_e32 v8, v3
1867; TONGA-NEXT:    v_mul_lo_u32 v5, v11, v7
1868; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v11, v8, 0
1869; TONGA-NEXT:    v_mul_lo_u32 v6, v12, v8
1870; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v4, v5
1871; TONGA-NEXT:    v_add_u32_e32 v6, vcc, v4, v6
1872; TONGA-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], v8, v6, 0
1873; TONGA-NEXT:    v_mul_hi_u32 v13, v8, v3
1874; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v13, v4
1875; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v7, v3, 0
1876; TONGA-NEXT:    v_addc_u32_e32 v14, vcc, 0, v5, vcc
1877; TONGA-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], v7, v6, 0
1878; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v13, v3
1879; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v14, v4, vcc
1880; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
1881; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
1882; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
1883; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v8, v3
1884; TONGA-NEXT:    v_addc_u32_e32 v14, vcc, v7, v4, vcc
1885; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v11, v13, 0
1886; TONGA-NEXT:    v_mul_lo_u32 v7, v11, v14
1887; TONGA-NEXT:    v_mul_lo_u32 v8, v12, v13
1888; TONGA-NEXT:    v_mul_hi_u32 v11, v13, v3
1889; TONGA-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], v14, v3, 0
1890; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v7, v4
1891; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v4, v8
1892; TONGA-NEXT:    v_mad_u64_u32 v[7:8], s[0:1], v13, v4, 0
1893; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v14, v4, 0
1894; TONGA-NEXT:    v_add_u32_e32 v7, vcc, v11, v7
1895; TONGA-NEXT:    v_addc_u32_e32 v8, vcc, 0, v8, vcc
1896; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v7, v5
1897; TONGA-NEXT:    v_addc_u32_e32 v5, vcc, v8, v6, vcc
1898; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
1899; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
1900; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
1901; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v13, v3
1902; TONGA-NEXT:    v_addc_u32_e32 v6, vcc, v14, v4, vcc
1903; TONGA-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
1904; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v0, v7
1905; TONGA-NEXT:    v_xor_b32_e32 v8, v3, v7
1906; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v8, v6, 0
1907; TONGA-NEXT:    v_mul_hi_u32 v11, v8, v5
1908; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
1909; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v7
1910; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v3
1911; TONGA-NEXT:    v_addc_u32_e32 v12, vcc, 0, v4, vcc
1912; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v1, v5, 0
1913; TONGA-NEXT:    v_mad_u64_u32 v[5:6], s[0:1], v1, v6, 0
1914; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v11, v3
1915; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v12, v4, vcc
1916; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
1917; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v3, v5
1918; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
1919; TONGA-NEXT:    v_mul_lo_u32 v6, v9, v3
1920; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v9, v5, 0
1921; TONGA-NEXT:    v_mul_lo_u32 v5, v10, v5
1922; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v6, v4
1923; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v5, v4
1924; TONGA-NEXT:    v_sub_u32_e32 v5, vcc, v1, v4
1925; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, v8, v3
1926; TONGA-NEXT:    v_subb_u32_e64 v5, s[0:1], v5, v10, vcc
1927; TONGA-NEXT:    v_sub_u32_e64 v6, s[0:1], v3, v9
1928; TONGA-NEXT:    v_subbrev_u32_e64 v8, s[2:3], 0, v5, s[0:1]
1929; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v8, v10
1930; TONGA-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[2:3]
1931; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v6, v9
1932; TONGA-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[2:3]
1933; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v8, v10
1934; TONGA-NEXT:    v_subb_u32_e64 v5, s[0:1], v5, v10, s[0:1]
1935; TONGA-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[2:3]
1936; TONGA-NEXT:    v_sub_u32_e64 v12, s[0:1], v6, v9
1937; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
1938; TONGA-NEXT:    v_subbrev_u32_e64 v5, s[0:1], 0, v5, s[0:1]
1939; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
1940; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v11
1941; TONGA-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
1942; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
1943; TONGA-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[0:1]
1944; TONGA-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
1945; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v10
1946; TONGA-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
1947; TONGA-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[0:1]
1948; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
1949; TONGA-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
1950; TONGA-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
1951; TONGA-NEXT:    v_xor_b32_e32 v3, v3, v7
1952; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v7
1953; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, v3, v7
1954; TONGA-NEXT:    v_subb_u32_e32 v4, vcc, v1, v7, vcc
1955; TONGA-NEXT:    s_cbranch_execnz .LBB8_3
1956; TONGA-NEXT:  .LBB8_2:
1957; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, v2
1958; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, 0, v2
1959; TONGA-NEXT:    v_mov_b32_e32 v4, 0
1960; TONGA-NEXT:    v_rcp_iflag_f32_e32 v1, v1
1961; TONGA-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
1962; TONGA-NEXT:    v_cvt_u32_f32_e32 v1, v1
1963; TONGA-NEXT:    v_mul_lo_u32 v3, v3, v1
1964; TONGA-NEXT:    v_mul_hi_u32 v3, v1, v3
1965; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
1966; TONGA-NEXT:    v_mul_hi_u32 v1, v0, v1
1967; TONGA-NEXT:    v_mul_lo_u32 v1, v1, v2
1968; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1
1969; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v2, v0
1970; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
1971; TONGA-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1972; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v0, v2
1973; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
1974; TONGA-NEXT:    v_cndmask_b32_e32 v3, v0, v1, vcc
1975; TONGA-NEXT:  .LBB8_3:
1976; TONGA-NEXT:    v_mov_b32_e32 v0, s4
1977; TONGA-NEXT:    v_mov_b32_e32 v1, s5
1978; TONGA-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
1979; TONGA-NEXT:    s_endpgm
1980; TONGA-NEXT:  .LBB8_4:
1981; TONGA-NEXT:    ; implicit-def: $vgpr3_vgpr4
1982; TONGA-NEXT:    s_branch .LBB8_2
1983;
1984; EG-LABEL: srem_i64:
1985; EG:       ; %bb.0:
1986; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
1987; EG-NEXT:    TEX 0 @12
1988; EG-NEXT:    ALU_PUSH_BEFORE 4, @15, KC0[], KC1[]
1989; EG-NEXT:    JUMP @9 POP:1
1990; EG-NEXT:    ALU 114, @20, KC0[], KC1[]
1991; EG-NEXT:    ALU 115, @135, KC0[], KC1[]
1992; EG-NEXT:    ALU 115, @251, KC0[], KC1[]
1993; EG-NEXT:    ALU 115, @367, KC0[], KC1[]
1994; EG-NEXT:    ALU_POP_AFTER 82, @483, KC0[], KC1[]
1995; EG-NEXT:    ALU 20, @566, KC0[CB0:0-32], KC1[]
1996; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
1997; EG-NEXT:    CF_END
1998; EG-NEXT:    Fetch clause starting at 12:
1999; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
2000; EG-NEXT:    ALU clause starting at 14:
2001; EG-NEXT:     MOV * T0.X, KC0[2].Z,
2002; EG-NEXT:    ALU clause starting at 15:
2003; EG-NEXT:     OR_INT T2.W, T0.Y, T0.W,
2004; EG-NEXT:     MOV * T1.W, literal.x,
2005; EG-NEXT:    1(1.401298e-45), 0(0.000000e+00)
2006; EG-NEXT:     SETNE_INT * T2.W, PV.W, 0.0,
2007; EG-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
2008; EG-NEXT:    ALU clause starting at 20:
2009; EG-NEXT:     ASHR * T3.W, T0.W, literal.x,
2010; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2011; EG-NEXT:     ADD_INT * T1.W, T0.Z, PV.W,
2012; EG-NEXT:     XOR_INT * T2.W, PV.W, T3.W,
2013; EG-NEXT:     SUB_INT T1.Z, 0.0, PV.W,
2014; EG-NEXT:     ASHR T1.W, T0.Y, literal.x,
2015; EG-NEXT:     RECIP_UINT * T1.X, PV.W,
2016; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2017; EG-NEXT:     ADD_INT T2.Z, T0.Y, PV.W,
2018; EG-NEXT:     ADDC_UINT T4.W, T0.X, PV.W,
2019; EG-NEXT:     MULLO_INT * T0.Y, PV.Z, PS,
2020; EG-NEXT:     ADD_INT T4.W, PV.Z, PV.W,
2021; EG-NEXT:     MULHI * T0.Y, T1.X, PS,
2022; EG-NEXT:     ADD_INT T5.W, T1.X, PS,
2023; EG-NEXT:     XOR_INT * T4.W, PV.W, T1.W,
2024; EG-NEXT:     MULHI * T0.Y, PS, PV.W,
2025; EG-NEXT:     MULLO_INT * T0.Y, PS, T2.W,
2026; EG-NEXT:     SUB_INT * T5.W, T4.W, PS,
2027; EG-NEXT:     SETGE_UINT T6.W, PV.W, T2.W,
2028; EG-NEXT:     SUB_INT * T7.W, PV.W, T2.W,
2029; EG-NEXT:     CNDE_INT T1.Z, PV.W, T5.W, PS, BS:VEC_021/SCL_122
2030; EG-NEXT:     ADD_INT T0.W, T0.W, T3.W,
2031; EG-NEXT:     ADDC_UINT * T5.W, T0.Z, T3.W,
2032; EG-NEXT:     ADD_INT T2.Z, PV.W, PS,
2033; EG-NEXT:     SETGE_UINT T0.W, PV.Z, T2.W,
2034; EG-NEXT:     SUB_INT * T5.W, PV.Z, T2.W,
2035; EG-NEXT:     ADD_INT T3.Z, T0.X, T1.W, BS:VEC_021/SCL_122
2036; EG-NEXT:     CNDE_INT T5.W, PV.W, T1.Z, PS,
2037; EG-NEXT:     XOR_INT * T0.W, PV.Z, T3.W,
2038; EG-NEXT:     CNDE_INT T4.W, PS, PV.W, T4.W,
2039; EG-NEXT:     XOR_INT * T3.W, PV.Z, T1.W,
2040; EG-NEXT:     BIT_ALIGN_INT T5.W, PV.W, PS, literal.x,
2041; EG-NEXT:     LSHR * T4.W, PV.W, literal.x,
2042; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2043; EG-NEXT:     SETE_INT T1.Z, PS, T0.W, BS:VEC_021/SCL_122
2044; EG-NEXT:     SETGE_UINT T6.W, PS, T0.W, BS:VEC_021/SCL_122
2045; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T2.W,
2046; EG-NEXT:     CNDE_INT T0.Y, PV.Z, PV.W, PS,
2047; EG-NEXT:     SUB_INT * T1.Z, T5.W, T2.W,
2048; EG-NEXT:     SUB_INT * T6.W, T4.W, T0.W,
2049; EG-NEXT:     SUBB_UINT * T7.W, T5.W, T2.W,
2050; EG-NEXT:     SUB_INT T6.W, T6.W, PV.W,
2051; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2052; EG-NEXT:     LSHL T1.Z, PS, 1,
2053; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2054; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2055; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
2056; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2057; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2058; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2059; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2060; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2061; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2062; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2063; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2064; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2065; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2066; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2067; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2068; EG-NEXT:     LSHL T1.Z, PS, 1,
2069; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2070; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2071; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
2072; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2073; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2074; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2075; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2076; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2077; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2078; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2079; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2080; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2081; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2082; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2083; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2084; EG-NEXT:     LSHL T1.Z, PS, 1,
2085; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2086; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2087; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
2088; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2089; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2090; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2091; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2092; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2093; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2094; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2095; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2096; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2097; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2098; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2099; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2100; EG-NEXT:     LSHL T1.Z, PS, 1,
2101; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2102; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2103; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
2104; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2105; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2106; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2107; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2108; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2109; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2110; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2111; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2112; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2113; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2114; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2115; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2116; EG-NEXT:     LSHL T1.Z, PS, 1,
2117; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2118; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2119; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
2120; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2121; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2122; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2123; EG-NEXT:     SETGE_UINT * T1.Z, PS, T2.W,
2124; EG-NEXT:    ALU clause starting at 135:
2125; EG-NEXT:     SETE_INT T6.W, T4.W, T0.W,
2126; EG-NEXT:     SETGE_UINT * T7.W, T4.W, T0.W,
2127; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, T1.Z,
2128; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2129; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2130; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2131; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2132; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2133; EG-NEXT:     LSHL T1.Z, PS, 1,
2134; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2135; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2136; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
2137; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2138; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2139; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2140; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2141; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2142; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2143; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2144; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2145; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2146; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2147; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2148; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2149; EG-NEXT:     LSHL T1.Z, PS, 1,
2150; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2151; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2152; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
2153; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2154; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2155; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2156; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2157; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2158; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2159; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2160; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2161; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2162; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2163; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2164; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2165; EG-NEXT:     LSHL T1.Z, PS, 1,
2166; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2167; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2168; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
2169; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2170; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2171; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2172; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2173; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2174; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2175; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2176; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2177; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2178; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2179; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2180; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2181; EG-NEXT:     LSHL T1.Z, PS, 1,
2182; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2183; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2184; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
2185; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2186; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2187; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2188; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2189; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2190; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2191; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2192; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2193; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2194; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2195; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2196; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2197; EG-NEXT:     LSHL T1.Z, PS, 1,
2198; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2199; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2200; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
2201; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2202; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2203; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2204; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2205; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2206; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2207; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2208; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2209; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2210; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2211; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2212; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2213; EG-NEXT:     LSHL T1.Z, PS, 1,
2214; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2215; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2216; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
2217; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2218; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2219; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2220; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2221; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2222; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2223; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2224; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2225; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2226; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2227; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2228; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2229; EG-NEXT:     LSHL T1.Z, PS, 1,
2230; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2231; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2232; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
2233; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2234; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2235; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2236; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2237; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2238; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2239; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2240; EG-NEXT:     SUB_INT * T1.Z, T5.W, T2.W,
2241; EG-NEXT:    ALU clause starting at 251:
2242; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2243; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2244; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2245; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2246; EG-NEXT:     LSHL T1.Z, PS, 1,
2247; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2248; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2249; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
2250; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2251; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2252; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2253; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2254; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2255; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2256; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2257; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2258; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2259; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2260; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2261; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2262; EG-NEXT:     LSHL T1.Z, PS, 1,
2263; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2264; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2265; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
2266; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2267; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2268; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2269; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2270; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2271; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2272; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2273; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2274; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2275; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2276; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2277; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2278; EG-NEXT:     LSHL T1.Z, PS, 1,
2279; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2280; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2281; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
2282; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2283; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2284; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2285; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2286; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2287; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2288; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2289; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2290; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2291; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2292; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2293; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2294; EG-NEXT:     LSHL T1.Z, PS, 1,
2295; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2296; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2297; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
2298; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2299; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2300; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2301; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2302; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2303; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2304; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2305; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2306; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2307; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2308; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2309; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2310; EG-NEXT:     LSHL T1.Z, PS, 1,
2311; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2312; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2313; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
2314; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2315; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2316; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2317; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2318; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2319; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2320; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2321; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2322; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2323; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2324; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2325; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2326; EG-NEXT:     LSHL T1.Z, PS, 1,
2327; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2328; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2329; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
2330; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2331; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2332; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2333; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2334; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2335; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2336; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2337; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2338; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2339; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2340; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2341; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2342; EG-NEXT:     LSHL T1.Z, PS, 1,
2343; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2344; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2345; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
2346; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2347; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2348; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2349; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2350; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2351; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2352; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2353; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2354; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2355; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2356; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2357; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2358; EG-NEXT:    ALU clause starting at 367:
2359; EG-NEXT:     LSHL T1.Z, T5.W, 1,
2360; EG-NEXT:     BFE_UINT * T7.W, T3.W, literal.x, 1, BS:VEC_120/SCL_212
2361; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
2362; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T6.W,
2363; EG-NEXT:     BIT_ALIGN_INT T4.W, PV.W, T5.W, literal.x, BS:VEC_021/SCL_122
2364; EG-NEXT:     OR_INT * T5.W, T1.Z, T7.W,
2365; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2366; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2367; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2368; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2369; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2370; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2371; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2372; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2373; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2374; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2375; EG-NEXT:     LSHL T1.Z, PS, 1,
2376; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2377; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2378; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
2379; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2380; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2381; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2382; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2383; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2384; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2385; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2386; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2387; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2388; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2389; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2390; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2391; EG-NEXT:     LSHL T1.Z, PS, 1,
2392; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2393; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2394; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
2395; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2396; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2397; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2398; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2399; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2400; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2401; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2402; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2403; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2404; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2405; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2406; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2407; EG-NEXT:     LSHL T1.Z, PS, 1,
2408; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2409; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2410; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
2411; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2412; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2413; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2414; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2415; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2416; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2417; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2418; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2419; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2420; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2421; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2422; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2423; EG-NEXT:     LSHL T1.Z, PS, 1,
2424; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2425; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2426; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
2427; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2428; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2429; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2430; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2431; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2432; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2433; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2434; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2435; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2436; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2437; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2438; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2439; EG-NEXT:     LSHL T1.Z, PS, 1,
2440; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2441; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2442; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
2443; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2444; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2445; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2446; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2447; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2448; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2449; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2450; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2451; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2452; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2453; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2454; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2455; EG-NEXT:     LSHL T1.Z, PS, 1,
2456; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2457; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2458; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
2459; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2460; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2461; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2462; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2463; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2464; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2465; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2466; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2467; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2468; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2469; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2470; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2471; EG-NEXT:     LSHL T1.Z, PS, 1,
2472; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2473; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2474; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
2475; EG-NEXT:    ALU clause starting at 483:
2476; EG-NEXT:     BIT_ALIGN_INT T4.W, T4.W, T5.W, literal.x, BS:VEC_021/SCL_122
2477; EG-NEXT:     OR_INT * T5.W, T1.Z, T7.W,
2478; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2479; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2480; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2481; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2482; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2483; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2484; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2485; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2486; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2487; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2488; EG-NEXT:     LSHL T1.Z, PS, 1,
2489; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2490; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2491; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
2492; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2493; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2494; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2495; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2496; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2497; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2498; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2499; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2500; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2501; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2502; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2503; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2504; EG-NEXT:     LSHL T1.Z, PS, 1,
2505; EG-NEXT:     BFE_UINT T7.W, T3.W, literal.x, 1,
2506; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2507; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2508; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2509; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2510; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2511; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2512; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2513; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2514; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2515; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2516; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2517; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2518; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2519; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2520; EG-NEXT:     LSHL T1.Z, PS, 1,
2521; EG-NEXT:     BFE_UINT T7.W, T3.W, 1, 1,
2522; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2523; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2524; EG-NEXT:     OR_INT * T5.W, PV.Z, PV.W,
2525; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2526; EG-NEXT:     SETGE_UINT T1.Z, PS, T2.W, BS:VEC_021/SCL_122
2527; EG-NEXT:     SETE_INT T6.W, PV.W, T0.W,
2528; EG-NEXT:     SETGE_UINT * T7.W, PV.W, T0.W,
2529; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
2530; EG-NEXT:     SUB_INT T1.Z, T5.W, T2.W,
2531; EG-NEXT:     SUBB_UINT * T6.W, T5.W, T2.W,
2532; EG-NEXT:     SUB_INT * T7.W, T4.W, T0.W,
2533; EG-NEXT:     SUB_INT T6.W, PV.W, T6.W, BS:VEC_021/SCL_122
2534; EG-NEXT:     CNDE_INT * T5.W, T0.Y, T5.W, T1.Z,
2535; EG-NEXT:     LSHL T1.Z, PS, 1,
2536; EG-NEXT:     AND_INT T3.W, T3.W, 1,
2537; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, PV.W,
2538; EG-NEXT:     BIT_ALIGN_INT T4.W, PS, T5.W, literal.x,
2539; EG-NEXT:     OR_INT * T3.W, PV.Z, PV.W,
2540; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2541; EG-NEXT:     SETGE_UINT T1.X, PS, T2.W, BS:VEC_021/SCL_122
2542; EG-NEXT:     SETE_INT T0.Y, PV.W, T0.W,
2543; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T0.W,
2544; EG-NEXT:     SUBB_UINT T5.W, PS, T2.W, BS:VEC_021/SCL_122
2545; EG-NEXT:     SUB_INT * T0.W, PV.W, T0.W,
2546; EG-NEXT:     SUB_INT T2.Z, T3.W, T2.W,
2547; EG-NEXT:     SUB_INT T0.W, PS, PV.W,
2548; EG-NEXT:     CNDE_INT * T2.W, PV.Y, PV.Z, PV.X,
2549; EG-NEXT:     CNDE_INT T0.W, PS, T4.W, PV.W, BS:VEC_021/SCL_122
2550; EG-NEXT:     CNDE_INT * T2.W, PS, T3.W, PV.Z,
2551; EG-NEXT:     XOR_INT T2.W, PS, T1.W,
2552; EG-NEXT:     XOR_INT * T0.W, PV.W, T1.W,
2553; EG-NEXT:     SUB_INT T0.W, PS, T1.W,
2554; EG-NEXT:     SUBB_UINT * T3.W, PV.W, T1.W,
2555; EG-NEXT:     SUB_INT * T1.Y, PV.W, PS,
2556; EG-NEXT:     SUB_INT T1.X, T2.W, T1.W,
2557; EG-NEXT:     MOV * T1.W, literal.x,
2558; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
2559; EG-NEXT:    ALU clause starting at 566:
2560; EG-NEXT:     MOV T0.W, KC0[2].Y,
2561; EG-NEXT:     SETE_INT * T1.W, T1.W, 0.0,
2562; EG-NEXT:     PRED_SETNE_INT * Pred,PredicateBit (MASKED), PS, 0.0,
2563; EG-NEXT:     SUB_INT T1.W, 0.0, T0.Z, Pred_sel_zero
2564; EG-NEXT:     RECIP_UINT * T0.Y, T0.Z, Pred_sel_zero
2565; EG-NEXT:     MULLO_INT * T1.X, T1.W, T0.Y, Pred_sel_zero
2566; EG-NEXT:     MULHI * T1.X, T0.Y, T1.X, Pred_sel_zero
2567; EG-NEXT:     ADD_INT * T1.W, T0.Y, T1.X, Pred_sel_zero
2568; EG-NEXT:     MULHI * T0.Y, T0.X, T1.W, Pred_sel_zero
2569; EG-NEXT:     MULLO_INT * T0.Y, T0.Y, T0.Z, Pred_sel_zero
2570; EG-NEXT:     SUB_INT * T1.W, T0.X, T0.Y, Pred_sel_zero
2571; EG-NEXT:     SETGE_UINT T2.W, T1.W, T0.Z, Pred_sel_zero
2572; EG-NEXT:     SUB_INT * T3.W, T1.W, T0.Z, Pred_sel_zero
2573; EG-NEXT:     CNDE_INT * T1.W, T2.W, T1.W, T3.W, Pred_sel_zero
2574; EG-NEXT:     SETGE_UINT T2.W, T1.W, T0.Z, Pred_sel_zero
2575; EG-NEXT:     SUB_INT * T3.W, T1.W, T0.Z, Pred_sel_zero
2576; EG-NEXT:     CNDE_INT T1.X, T2.W, T1.W, T3.W, Pred_sel_zero
2577; EG-NEXT:     MOV * T1.Y, literal.x, Pred_sel_zero
2578; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
2579; EG-NEXT:     LSHR * T0.X, T0.W, literal.x,
2580; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2581  %den_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
2582  %num = load i64, ptr addrspace(1) %in
2583  %den = load i64, ptr addrspace(1) %den_ptr
2584  %result = srem i64 %num, %den
2585  store i64 %result, ptr addrspace(1) %out
2586  ret void
2587}
2588
2589define amdgpu_kernel void @srem_i64_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
2590; GCN-LABEL: srem_i64_4:
2591; GCN:       ; %bb.0:
2592; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2593; GCN-NEXT:    v_mov_b32_e32 v2, 0
2594; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2595; GCN-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
2596; GCN-NEXT:    s_waitcnt vmcnt(0)
2597; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
2598; GCN-NEXT:    v_lshrrev_b32_e32 v3, 30, v3
2599; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, v0, v3
2600; GCN-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v1, vcc
2601; GCN-NEXT:    v_and_b32_e32 v3, -4, v3
2602; GCN-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
2603; GCN-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v4, vcc
2604; GCN-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
2605; GCN-NEXT:    s_endpgm
2606;
2607; TAHITI-LABEL: srem_i64_4:
2608; TAHITI:       ; %bb.0:
2609; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2610; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
2611; TAHITI-NEXT:    s_mov_b32 s6, -1
2612; TAHITI-NEXT:    s_mov_b32 s10, s6
2613; TAHITI-NEXT:    s_mov_b32 s11, s7
2614; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
2615; TAHITI-NEXT:    s_mov_b32 s8, s2
2616; TAHITI-NEXT:    s_mov_b32 s9, s3
2617; TAHITI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
2618; TAHITI-NEXT:    s_mov_b32 s4, s0
2619; TAHITI-NEXT:    s_mov_b32 s5, s1
2620; TAHITI-NEXT:    s_waitcnt vmcnt(0)
2621; TAHITI-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
2622; TAHITI-NEXT:    v_lshrrev_b32_e32 v2, 30, v2
2623; TAHITI-NEXT:    v_add_i32_e32 v2, vcc, v0, v2
2624; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
2625; TAHITI-NEXT:    v_and_b32_e32 v2, -4, v2
2626; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
2627; TAHITI-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc
2628; TAHITI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
2629; TAHITI-NEXT:    s_endpgm
2630;
2631; TONGA-LABEL: srem_i64_4:
2632; TONGA:       ; %bb.0:
2633; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2634; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
2635; TONGA-NEXT:    v_mov_b32_e32 v0, s2
2636; TONGA-NEXT:    v_mov_b32_e32 v1, s3
2637; TONGA-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2638; TONGA-NEXT:    v_mov_b32_e32 v2, s0
2639; TONGA-NEXT:    v_mov_b32_e32 v3, s1
2640; TONGA-NEXT:    s_waitcnt vmcnt(0)
2641; TONGA-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
2642; TONGA-NEXT:    v_lshrrev_b32_e32 v4, 30, v4
2643; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v0, v4
2644; TONGA-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
2645; TONGA-NEXT:    v_and_b32_e32 v4, -4, v4
2646; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v4
2647; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v1, v5, vcc
2648; TONGA-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
2649; TONGA-NEXT:    s_endpgm
2650;
2651; EG-LABEL: srem_i64_4:
2652; EG:       ; %bb.0:
2653; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
2654; EG-NEXT:    TEX 0 @6
2655; EG-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
2656; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
2657; EG-NEXT:    CF_END
2658; EG-NEXT:    PAD
2659; EG-NEXT:    Fetch clause starting at 6:
2660; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
2661; EG-NEXT:    ALU clause starting at 8:
2662; EG-NEXT:     MOV * T0.X, KC0[2].Z,
2663; EG-NEXT:    ALU clause starting at 9:
2664; EG-NEXT:     ASHR * T0.W, T0.Y, literal.x,
2665; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
2666; EG-NEXT:     LSHR * T0.W, PV.W, literal.x,
2667; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
2668; EG-NEXT:     ADD_INT * T1.W, T0.X, PV.W,
2669; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
2670; EG-NEXT:     ADDC_UINT * T0.W, T0.X, T0.W,
2671; EG-NEXT:    -4(nan), 0(0.000000e+00)
2672; EG-NEXT:     BFE_INT T0.W, PS, 0.0, 1,
2673; EG-NEXT:     SUBB_UINT * T2.W, T0.X, PV.W,
2674; EG-NEXT:     SUB_INT * T0.Y, PV.W, PS,
2675; EG-NEXT:     SUB_INT T0.X, T0.X, T1.W,
2676; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
2677; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2678  %num = load i64, ptr addrspace(1) %in
2679  %result = srem i64 %num, 4
2680  store i64 %result, ptr addrspace(1) %out
2681  ret void
2682}
2683
2684define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
2685; GCN-LABEL: srem_v2i64:
2686; GCN:       ; %bb.0:
2687; GCN-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
2688; GCN-NEXT:    v_mov_b32_e32 v8, 0
2689; GCN-NEXT:    s_waitcnt lgkmcnt(0)
2690; GCN-NEXT:    global_load_dwordx4 v[0:3], v8, s[10:11] offset:16
2691; GCN-NEXT:    global_load_dwordx4 v[4:7], v8, s[10:11]
2692; GCN-NEXT:    s_waitcnt vmcnt(1)
2693; GCN-NEXT:    v_readfirstlane_b32 s11, v1
2694; GCN-NEXT:    v_readfirstlane_b32 s10, v0
2695; GCN-NEXT:    s_waitcnt vmcnt(0)
2696; GCN-NEXT:    v_readfirstlane_b32 s13, v5
2697; GCN-NEXT:    v_readfirstlane_b32 s12, v4
2698; GCN-NEXT:    s_or_b64 s[0:1], s[12:13], s[10:11]
2699; GCN-NEXT:    s_mov_b32 s0, 0
2700; GCN-NEXT:    v_readfirstlane_b32 s5, v3
2701; GCN-NEXT:    v_readfirstlane_b32 s4, v2
2702; GCN-NEXT:    v_readfirstlane_b32 s7, v7
2703; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2704; GCN-NEXT:    v_readfirstlane_b32 s6, v6
2705; GCN-NEXT:    s_cbranch_scc0 .LBB10_7
2706; GCN-NEXT:  ; %bb.1:
2707; GCN-NEXT:    s_ashr_i32 s0, s11, 31
2708; GCN-NEXT:    s_add_u32 s2, s10, s0
2709; GCN-NEXT:    s_mov_b32 s1, s0
2710; GCN-NEXT:    s_addc_u32 s3, s11, s0
2711; GCN-NEXT:    s_xor_b64 s[16:17], s[2:3], s[0:1]
2712; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s16
2713; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s17
2714; GCN-NEXT:    s_sub_u32 s0, 0, s16
2715; GCN-NEXT:    s_subb_u32 s1, 0, s17
2716; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
2717; GCN-NEXT:    v_rcp_f32_e32 v0, v0
2718; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
2719; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
2720; GCN-NEXT:    v_trunc_f32_e32 v1, v1
2721; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
2722; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
2723; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
2724; GCN-NEXT:    v_readfirstlane_b32 s2, v1
2725; GCN-NEXT:    v_readfirstlane_b32 s3, v0
2726; GCN-NEXT:    s_mul_i32 s11, s0, s2
2727; GCN-NEXT:    s_mul_hi_u32 s19, s0, s3
2728; GCN-NEXT:    s_mul_i32 s18, s1, s3
2729; GCN-NEXT:    s_add_i32 s11, s19, s11
2730; GCN-NEXT:    s_add_i32 s11, s11, s18
2731; GCN-NEXT:    s_mul_i32 s20, s0, s3
2732; GCN-NEXT:    s_mul_hi_u32 s18, s3, s11
2733; GCN-NEXT:    s_mul_i32 s19, s3, s11
2734; GCN-NEXT:    s_mul_hi_u32 s3, s3, s20
2735; GCN-NEXT:    s_add_u32 s3, s3, s19
2736; GCN-NEXT:    s_addc_u32 s18, 0, s18
2737; GCN-NEXT:    s_mul_hi_u32 s21, s2, s20
2738; GCN-NEXT:    s_mul_i32 s20, s2, s20
2739; GCN-NEXT:    s_add_u32 s3, s3, s20
2740; GCN-NEXT:    s_mul_hi_u32 s19, s2, s11
2741; GCN-NEXT:    s_addc_u32 s3, s18, s21
2742; GCN-NEXT:    s_addc_u32 s18, s19, 0
2743; GCN-NEXT:    s_mul_i32 s11, s2, s11
2744; GCN-NEXT:    s_add_u32 s3, s3, s11
2745; GCN-NEXT:    s_addc_u32 s11, 0, s18
2746; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, s3, v0
2747; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2748; GCN-NEXT:    s_addc_u32 s2, s2, s11
2749; GCN-NEXT:    v_readfirstlane_b32 s11, v0
2750; GCN-NEXT:    s_mul_i32 s3, s0, s2
2751; GCN-NEXT:    s_mul_hi_u32 s18, s0, s11
2752; GCN-NEXT:    s_add_i32 s3, s18, s3
2753; GCN-NEXT:    s_mul_i32 s1, s1, s11
2754; GCN-NEXT:    s_add_i32 s3, s3, s1
2755; GCN-NEXT:    s_mul_i32 s0, s0, s11
2756; GCN-NEXT:    s_mul_hi_u32 s18, s2, s0
2757; GCN-NEXT:    s_mul_i32 s19, s2, s0
2758; GCN-NEXT:    s_mul_i32 s21, s11, s3
2759; GCN-NEXT:    s_mul_hi_u32 s0, s11, s0
2760; GCN-NEXT:    s_mul_hi_u32 s20, s11, s3
2761; GCN-NEXT:    s_add_u32 s0, s0, s21
2762; GCN-NEXT:    s_addc_u32 s11, 0, s20
2763; GCN-NEXT:    s_add_u32 s0, s0, s19
2764; GCN-NEXT:    s_mul_hi_u32 s1, s2, s3
2765; GCN-NEXT:    s_addc_u32 s0, s11, s18
2766; GCN-NEXT:    s_addc_u32 s1, s1, 0
2767; GCN-NEXT:    s_mul_i32 s3, s2, s3
2768; GCN-NEXT:    s_add_u32 s0, s0, s3
2769; GCN-NEXT:    s_addc_u32 s1, 0, s1
2770; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
2771; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2772; GCN-NEXT:    s_addc_u32 s2, s2, s1
2773; GCN-NEXT:    s_ashr_i32 s18, s13, 31
2774; GCN-NEXT:    s_add_u32 s0, s12, s18
2775; GCN-NEXT:    s_mov_b32 s19, s18
2776; GCN-NEXT:    s_addc_u32 s1, s13, s18
2777; GCN-NEXT:    s_xor_b64 s[20:21], s[0:1], s[18:19]
2778; GCN-NEXT:    v_readfirstlane_b32 s3, v0
2779; GCN-NEXT:    s_mul_i32 s1, s20, s2
2780; GCN-NEXT:    s_mul_hi_u32 s11, s20, s3
2781; GCN-NEXT:    s_mul_hi_u32 s0, s20, s2
2782; GCN-NEXT:    s_add_u32 s1, s11, s1
2783; GCN-NEXT:    s_addc_u32 s0, 0, s0
2784; GCN-NEXT:    s_mul_hi_u32 s13, s21, s3
2785; GCN-NEXT:    s_mul_i32 s3, s21, s3
2786; GCN-NEXT:    s_add_u32 s1, s1, s3
2787; GCN-NEXT:    s_mul_hi_u32 s11, s21, s2
2788; GCN-NEXT:    s_addc_u32 s0, s0, s13
2789; GCN-NEXT:    s_addc_u32 s1, s11, 0
2790; GCN-NEXT:    s_mul_i32 s2, s21, s2
2791; GCN-NEXT:    s_add_u32 s0, s0, s2
2792; GCN-NEXT:    s_addc_u32 s1, 0, s1
2793; GCN-NEXT:    s_mul_i32 s1, s16, s1
2794; GCN-NEXT:    s_mul_hi_u32 s2, s16, s0
2795; GCN-NEXT:    s_add_i32 s1, s2, s1
2796; GCN-NEXT:    s_mul_i32 s2, s17, s0
2797; GCN-NEXT:    s_mul_i32 s0, s16, s0
2798; GCN-NEXT:    s_add_i32 s11, s1, s2
2799; GCN-NEXT:    v_mov_b32_e32 v0, s0
2800; GCN-NEXT:    s_sub_i32 s1, s21, s11
2801; GCN-NEXT:    v_sub_co_u32_e32 v0, vcc, s20, v0
2802; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2803; GCN-NEXT:    s_subb_u32 s13, s1, s17
2804; GCN-NEXT:    v_subrev_co_u32_e64 v1, s[0:1], s16, v0
2805; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2806; GCN-NEXT:    s_subb_u32 s19, s13, 0
2807; GCN-NEXT:    s_cmp_ge_u32 s19, s17
2808; GCN-NEXT:    s_cselect_b32 s20, -1, 0
2809; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s16, v1
2810; GCN-NEXT:    s_cmp_eq_u32 s19, s17
2811; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, -1, s[2:3]
2812; GCN-NEXT:    v_mov_b32_e32 v3, s20
2813; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
2814; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2815; GCN-NEXT:    v_cndmask_b32_e64 v2, v3, v2, s[2:3]
2816; GCN-NEXT:    s_subb_u32 s2, s13, s17
2817; GCN-NEXT:    v_subrev_co_u32_e64 v3, s[0:1], s16, v1
2818; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2819; GCN-NEXT:    s_subb_u32 s2, s2, 0
2820; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v2
2821; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
2822; GCN-NEXT:    v_mov_b32_e32 v2, s19
2823; GCN-NEXT:    v_mov_b32_e32 v3, s2
2824; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2825; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
2826; GCN-NEXT:    s_subb_u32 s0, s21, s11
2827; GCN-NEXT:    s_cmp_ge_u32 s0, s17
2828; GCN-NEXT:    s_cselect_b32 s1, -1, 0
2829; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s16, v0
2830; GCN-NEXT:    s_cmp_eq_u32 s0, s17
2831; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, -1, vcc
2832; GCN-NEXT:    v_mov_b32_e32 v4, s1
2833; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
2834; GCN-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
2835; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
2836; GCN-NEXT:    v_mov_b32_e32 v4, s0
2837; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2838; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
2839; GCN-NEXT:    v_xor_b32_e32 v0, s18, v0
2840; GCN-NEXT:    v_xor_b32_e32 v1, s18, v2
2841; GCN-NEXT:    v_mov_b32_e32 v2, s18
2842; GCN-NEXT:    v_subrev_co_u32_e32 v0, vcc, s18, v0
2843; GCN-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
2844; GCN-NEXT:    s_cbranch_execnz .LBB10_3
2845; GCN-NEXT:  .LBB10_2:
2846; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s10
2847; GCN-NEXT:    s_sub_i32 s0, 0, s10
2848; GCN-NEXT:    s_mov_b32 s1, 0
2849; GCN-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2850; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2851; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
2852; GCN-NEXT:    v_readfirstlane_b32 s2, v0
2853; GCN-NEXT:    s_mul_i32 s0, s0, s2
2854; GCN-NEXT:    s_mul_hi_u32 s0, s2, s0
2855; GCN-NEXT:    s_add_i32 s2, s2, s0
2856; GCN-NEXT:    s_mul_hi_u32 s0, s12, s2
2857; GCN-NEXT:    s_mul_i32 s0, s0, s10
2858; GCN-NEXT:    s_sub_i32 s0, s12, s0
2859; GCN-NEXT:    s_sub_i32 s2, s0, s10
2860; GCN-NEXT:    s_cmp_ge_u32 s0, s10
2861; GCN-NEXT:    s_cselect_b32 s0, s2, s0
2862; GCN-NEXT:    s_sub_i32 s2, s0, s10
2863; GCN-NEXT:    s_cmp_ge_u32 s0, s10
2864; GCN-NEXT:    s_cselect_b32 s0, s2, s0
2865; GCN-NEXT:    v_mov_b32_e32 v0, s0
2866; GCN-NEXT:    v_mov_b32_e32 v1, s1
2867; GCN-NEXT:  .LBB10_3:
2868; GCN-NEXT:    s_or_b64 s[0:1], s[6:7], s[4:5]
2869; GCN-NEXT:    s_mov_b32 s0, 0
2870; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2871; GCN-NEXT:    s_cbranch_scc0 .LBB10_8
2872; GCN-NEXT:  ; %bb.4:
2873; GCN-NEXT:    s_ashr_i32 s0, s5, 31
2874; GCN-NEXT:    s_add_u32 s2, s4, s0
2875; GCN-NEXT:    s_mov_b32 s1, s0
2876; GCN-NEXT:    s_addc_u32 s3, s5, s0
2877; GCN-NEXT:    s_xor_b64 s[12:13], s[2:3], s[0:1]
2878; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s12
2879; GCN-NEXT:    v_cvt_f32_u32_e32 v3, s13
2880; GCN-NEXT:    s_sub_u32 s0, 0, s12
2881; GCN-NEXT:    s_subb_u32 s1, 0, s13
2882; GCN-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
2883; GCN-NEXT:    v_rcp_f32_e32 v2, v2
2884; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
2885; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
2886; GCN-NEXT:    v_trunc_f32_e32 v3, v3
2887; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
2888; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
2889; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
2890; GCN-NEXT:    v_readfirstlane_b32 s2, v3
2891; GCN-NEXT:    v_readfirstlane_b32 s3, v2
2892; GCN-NEXT:    s_mul_i32 s5, s0, s2
2893; GCN-NEXT:    s_mul_hi_u32 s15, s0, s3
2894; GCN-NEXT:    s_mul_i32 s14, s1, s3
2895; GCN-NEXT:    s_add_i32 s5, s15, s5
2896; GCN-NEXT:    s_add_i32 s5, s5, s14
2897; GCN-NEXT:    s_mul_i32 s16, s0, s3
2898; GCN-NEXT:    s_mul_hi_u32 s14, s3, s5
2899; GCN-NEXT:    s_mul_i32 s15, s3, s5
2900; GCN-NEXT:    s_mul_hi_u32 s3, s3, s16
2901; GCN-NEXT:    s_add_u32 s3, s3, s15
2902; GCN-NEXT:    s_addc_u32 s14, 0, s14
2903; GCN-NEXT:    s_mul_hi_u32 s17, s2, s16
2904; GCN-NEXT:    s_mul_i32 s16, s2, s16
2905; GCN-NEXT:    s_add_u32 s3, s3, s16
2906; GCN-NEXT:    s_mul_hi_u32 s15, s2, s5
2907; GCN-NEXT:    s_addc_u32 s3, s14, s17
2908; GCN-NEXT:    s_addc_u32 s14, s15, 0
2909; GCN-NEXT:    s_mul_i32 s5, s2, s5
2910; GCN-NEXT:    s_add_u32 s3, s3, s5
2911; GCN-NEXT:    s_addc_u32 s5, 0, s14
2912; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, s3, v2
2913; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2914; GCN-NEXT:    s_addc_u32 s2, s2, s5
2915; GCN-NEXT:    v_readfirstlane_b32 s5, v2
2916; GCN-NEXT:    s_mul_i32 s3, s0, s2
2917; GCN-NEXT:    s_mul_hi_u32 s14, s0, s5
2918; GCN-NEXT:    s_add_i32 s3, s14, s3
2919; GCN-NEXT:    s_mul_i32 s1, s1, s5
2920; GCN-NEXT:    s_add_i32 s3, s3, s1
2921; GCN-NEXT:    s_mul_i32 s0, s0, s5
2922; GCN-NEXT:    s_mul_hi_u32 s14, s2, s0
2923; GCN-NEXT:    s_mul_i32 s15, s2, s0
2924; GCN-NEXT:    s_mul_i32 s17, s5, s3
2925; GCN-NEXT:    s_mul_hi_u32 s0, s5, s0
2926; GCN-NEXT:    s_mul_hi_u32 s16, s5, s3
2927; GCN-NEXT:    s_add_u32 s0, s0, s17
2928; GCN-NEXT:    s_addc_u32 s5, 0, s16
2929; GCN-NEXT:    s_add_u32 s0, s0, s15
2930; GCN-NEXT:    s_mul_hi_u32 s1, s2, s3
2931; GCN-NEXT:    s_addc_u32 s0, s5, s14
2932; GCN-NEXT:    s_addc_u32 s1, s1, 0
2933; GCN-NEXT:    s_mul_i32 s3, s2, s3
2934; GCN-NEXT:    s_add_u32 s0, s0, s3
2935; GCN-NEXT:    s_addc_u32 s1, 0, s1
2936; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
2937; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2938; GCN-NEXT:    s_addc_u32 s2, s2, s1
2939; GCN-NEXT:    s_ashr_i32 s14, s7, 31
2940; GCN-NEXT:    s_add_u32 s0, s6, s14
2941; GCN-NEXT:    s_mov_b32 s15, s14
2942; GCN-NEXT:    s_addc_u32 s1, s7, s14
2943; GCN-NEXT:    s_xor_b64 s[16:17], s[0:1], s[14:15]
2944; GCN-NEXT:    v_readfirstlane_b32 s3, v2
2945; GCN-NEXT:    s_mul_i32 s1, s16, s2
2946; GCN-NEXT:    s_mul_hi_u32 s5, s16, s3
2947; GCN-NEXT:    s_mul_hi_u32 s0, s16, s2
2948; GCN-NEXT:    s_add_u32 s1, s5, s1
2949; GCN-NEXT:    s_addc_u32 s0, 0, s0
2950; GCN-NEXT:    s_mul_hi_u32 s7, s17, s3
2951; GCN-NEXT:    s_mul_i32 s3, s17, s3
2952; GCN-NEXT:    s_add_u32 s1, s1, s3
2953; GCN-NEXT:    s_mul_hi_u32 s5, s17, s2
2954; GCN-NEXT:    s_addc_u32 s0, s0, s7
2955; GCN-NEXT:    s_addc_u32 s1, s5, 0
2956; GCN-NEXT:    s_mul_i32 s2, s17, s2
2957; GCN-NEXT:    s_add_u32 s0, s0, s2
2958; GCN-NEXT:    s_addc_u32 s1, 0, s1
2959; GCN-NEXT:    s_mul_i32 s1, s12, s1
2960; GCN-NEXT:    s_mul_hi_u32 s2, s12, s0
2961; GCN-NEXT:    s_add_i32 s1, s2, s1
2962; GCN-NEXT:    s_mul_i32 s2, s13, s0
2963; GCN-NEXT:    s_mul_i32 s0, s12, s0
2964; GCN-NEXT:    s_add_i32 s5, s1, s2
2965; GCN-NEXT:    v_mov_b32_e32 v2, s0
2966; GCN-NEXT:    s_sub_i32 s1, s17, s5
2967; GCN-NEXT:    v_sub_co_u32_e32 v2, vcc, s16, v2
2968; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2969; GCN-NEXT:    s_subb_u32 s7, s1, s13
2970; GCN-NEXT:    v_subrev_co_u32_e64 v3, s[0:1], s12, v2
2971; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2972; GCN-NEXT:    s_subb_u32 s15, s7, 0
2973; GCN-NEXT:    s_cmp_ge_u32 s15, s13
2974; GCN-NEXT:    s_cselect_b32 s16, -1, 0
2975; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v3
2976; GCN-NEXT:    s_cmp_eq_u32 s15, s13
2977; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[2:3]
2978; GCN-NEXT:    v_mov_b32_e32 v5, s16
2979; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
2980; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2981; GCN-NEXT:    v_cndmask_b32_e64 v4, v5, v4, s[2:3]
2982; GCN-NEXT:    s_subb_u32 s2, s7, s13
2983; GCN-NEXT:    v_subrev_co_u32_e64 v5, s[0:1], s12, v3
2984; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
2985; GCN-NEXT:    s_subb_u32 s2, s2, 0
2986; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v4
2987; GCN-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
2988; GCN-NEXT:    v_mov_b32_e32 v4, s15
2989; GCN-NEXT:    v_mov_b32_e32 v5, s2
2990; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
2991; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[0:1]
2992; GCN-NEXT:    s_subb_u32 s0, s17, s5
2993; GCN-NEXT:    s_cmp_ge_u32 s0, s13
2994; GCN-NEXT:    s_cselect_b32 s1, -1, 0
2995; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v2
2996; GCN-NEXT:    s_cmp_eq_u32 s0, s13
2997; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
2998; GCN-NEXT:    v_mov_b32_e32 v6, s1
2999; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
3000; GCN-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc
3001; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
3002; GCN-NEXT:    v_mov_b32_e32 v6, s0
3003; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3004; GCN-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
3005; GCN-NEXT:    v_xor_b32_e32 v2, s14, v2
3006; GCN-NEXT:    v_xor_b32_e32 v3, s14, v4
3007; GCN-NEXT:    v_mov_b32_e32 v4, s14
3008; GCN-NEXT:    v_subrev_co_u32_e32 v2, vcc, s14, v2
3009; GCN-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
3010; GCN-NEXT:    s_cbranch_execnz .LBB10_6
3011; GCN-NEXT:  .LBB10_5:
3012; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s4
3013; GCN-NEXT:    s_sub_i32 s0, 0, s4
3014; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3015; GCN-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
3016; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
3017; GCN-NEXT:    v_mul_lo_u32 v3, s0, v2
3018; GCN-NEXT:    v_mul_hi_u32 v3, v2, v3
3019; GCN-NEXT:    v_add_u32_e32 v2, v2, v3
3020; GCN-NEXT:    v_mul_hi_u32 v2, s6, v2
3021; GCN-NEXT:    v_mul_lo_u32 v2, v2, s4
3022; GCN-NEXT:    v_sub_u32_e32 v2, s6, v2
3023; GCN-NEXT:    v_subrev_u32_e32 v3, s4, v2
3024; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
3025; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3026; GCN-NEXT:    v_subrev_u32_e32 v3, s4, v2
3027; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s4, v2
3028; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
3029; GCN-NEXT:    v_mov_b32_e32 v3, 0
3030; GCN-NEXT:  .LBB10_6:
3031; GCN-NEXT:    v_mov_b32_e32 v4, 0
3032; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
3033; GCN-NEXT:    s_endpgm
3034; GCN-NEXT:  .LBB10_7:
3035; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
3036; GCN-NEXT:    s_branch .LBB10_2
3037; GCN-NEXT:  .LBB10_8:
3038; GCN-NEXT:    s_branch .LBB10_5
3039;
3040; TAHITI-LABEL: srem_v2i64:
3041; TAHITI:       ; %bb.0:
3042; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
3043; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
3044; TAHITI-NEXT:    s_mov_b32 s2, -1
3045; TAHITI-NEXT:    v_mov_b32_e32 v8, 0
3046; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
3047; TAHITI-NEXT:    s_mov_b32 s0, s6
3048; TAHITI-NEXT:    s_mov_b32 s1, s7
3049; TAHITI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0
3050; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:16
3051; TAHITI-NEXT:    s_waitcnt vmcnt(0)
3052; TAHITI-NEXT:    v_or_b32_e32 v9, v5, v1
3053; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
3054; TAHITI-NEXT:    s_cbranch_vccz .LBB10_7
3055; TAHITI-NEXT:  ; %bb.1:
3056; TAHITI-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
3057; TAHITI-NEXT:    v_add_i32_e32 v8, vcc, v0, v9
3058; TAHITI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v9, vcc
3059; TAHITI-NEXT:    v_xor_b32_e32 v8, v8, v9
3060; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v9
3061; TAHITI-NEXT:    v_cvt_f32_u32_e32 v9, v8
3062; TAHITI-NEXT:    v_cvt_f32_u32_e32 v10, v1
3063; TAHITI-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
3064; TAHITI-NEXT:    v_subb_u32_e32 v12, vcc, 0, v1, vcc
3065; TAHITI-NEXT:    v_madmk_f32 v9, v10, 0x4f800000, v9
3066; TAHITI-NEXT:    v_rcp_f32_e32 v9, v9
3067; TAHITI-NEXT:    v_mul_f32_e32 v9, 0x5f7ffffc, v9
3068; TAHITI-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v9
3069; TAHITI-NEXT:    v_trunc_f32_e32 v10, v10
3070; TAHITI-NEXT:    v_madmk_f32 v9, v10, 0xcf800000, v9
3071; TAHITI-NEXT:    v_cvt_u32_f32_e32 v10, v10
3072; TAHITI-NEXT:    v_cvt_u32_f32_e32 v9, v9
3073; TAHITI-NEXT:    v_mul_lo_u32 v14, v11, v10
3074; TAHITI-NEXT:    v_mul_hi_u32 v13, v11, v9
3075; TAHITI-NEXT:    v_mul_lo_u32 v15, v12, v9
3076; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
3077; TAHITI-NEXT:    v_mul_lo_u32 v14, v11, v9
3078; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
3079; TAHITI-NEXT:    v_mul_lo_u32 v15, v9, v13
3080; TAHITI-NEXT:    v_mul_hi_u32 v16, v9, v14
3081; TAHITI-NEXT:    v_mul_hi_u32 v17, v9, v13
3082; TAHITI-NEXT:    v_mul_hi_u32 v18, v10, v13
3083; TAHITI-NEXT:    v_mul_lo_u32 v13, v10, v13
3084; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
3085; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
3086; TAHITI-NEXT:    v_mul_lo_u32 v17, v10, v14
3087; TAHITI-NEXT:    v_mul_hi_u32 v14, v10, v14
3088; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
3089; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, v16, v14, vcc
3090; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, 0, v18, vcc
3091; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
3092; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
3093; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
3094; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, v10, v14, vcc
3095; TAHITI-NEXT:    v_mul_lo_u32 v13, v11, v10
3096; TAHITI-NEXT:    v_mul_hi_u32 v14, v11, v9
3097; TAHITI-NEXT:    v_mul_lo_u32 v12, v12, v9
3098; TAHITI-NEXT:    v_mul_lo_u32 v11, v11, v9
3099; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
3100; TAHITI-NEXT:    v_add_i32_e32 v12, vcc, v13, v12
3101; TAHITI-NEXT:    v_mul_lo_u32 v15, v9, v12
3102; TAHITI-NEXT:    v_mul_hi_u32 v16, v9, v11
3103; TAHITI-NEXT:    v_mul_hi_u32 v17, v9, v12
3104; TAHITI-NEXT:    v_mul_hi_u32 v14, v10, v11
3105; TAHITI-NEXT:    v_mul_lo_u32 v11, v10, v11
3106; TAHITI-NEXT:    v_mul_hi_u32 v13, v10, v12
3107; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
3108; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
3109; TAHITI-NEXT:    v_mul_lo_u32 v12, v10, v12
3110; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v15, v11
3111; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, v16, v14, vcc
3112; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
3113; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
3114; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
3115; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
3116; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, v10, v12, vcc
3117; TAHITI-NEXT:    v_ashrrev_i32_e32 v11, 31, v5
3118; TAHITI-NEXT:    v_add_i32_e32 v12, vcc, v4, v11
3119; TAHITI-NEXT:    v_xor_b32_e32 v12, v12, v11
3120; TAHITI-NEXT:    v_mul_lo_u32 v13, v12, v10
3121; TAHITI-NEXT:    v_mul_hi_u32 v14, v12, v9
3122; TAHITI-NEXT:    v_mul_hi_u32 v15, v12, v10
3123; TAHITI-NEXT:    v_addc_u32_e32 v5, vcc, v5, v11, vcc
3124; TAHITI-NEXT:    v_xor_b32_e32 v5, v5, v11
3125; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
3126; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
3127; TAHITI-NEXT:    v_mul_lo_u32 v15, v5, v9
3128; TAHITI-NEXT:    v_mul_hi_u32 v9, v5, v9
3129; TAHITI-NEXT:    v_mul_hi_u32 v16, v5, v10
3130; TAHITI-NEXT:    v_mul_lo_u32 v10, v5, v10
3131; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
3132; TAHITI-NEXT:    v_addc_u32_e32 v9, vcc, v14, v9, vcc
3133; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, 0, v16, vcc
3134; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v9, v10
3135; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, 0, v13, vcc
3136; TAHITI-NEXT:    v_mul_lo_u32 v10, v8, v10
3137; TAHITI-NEXT:    v_mul_hi_u32 v13, v8, v9
3138; TAHITI-NEXT:    v_mul_lo_u32 v14, v1, v9
3139; TAHITI-NEXT:    v_mul_lo_u32 v9, v8, v9
3140; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v10, v13
3141; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v14, v10
3142; TAHITI-NEXT:    v_sub_i32_e32 v13, vcc, v5, v10
3143; TAHITI-NEXT:    v_sub_i32_e32 v9, vcc, v12, v9
3144; TAHITI-NEXT:    v_subb_u32_e64 v12, s[0:1], v13, v1, vcc
3145; TAHITI-NEXT:    v_sub_i32_e64 v13, s[0:1], v9, v8
3146; TAHITI-NEXT:    v_subbrev_u32_e64 v14, s[2:3], 0, v12, s[0:1]
3147; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v1
3148; TAHITI-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[2:3]
3149; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v13, v8
3150; TAHITI-NEXT:    v_subb_u32_e32 v5, vcc, v5, v10, vcc
3151; TAHITI-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[2:3]
3152; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v14, v1
3153; TAHITI-NEXT:    v_subb_u32_e64 v12, s[0:1], v12, v1, s[0:1]
3154; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
3155; TAHITI-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[2:3]
3156; TAHITI-NEXT:    v_sub_i32_e64 v16, s[0:1], v13, v8
3157; TAHITI-NEXT:    v_cndmask_b32_e64 v10, 0, -1, vcc
3158; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v9, v8
3159; TAHITI-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v12, s[0:1]
3160; TAHITI-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
3161; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v1
3162; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v15
3163; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v10, v8, vcc
3164; TAHITI-NEXT:    v_cndmask_b32_e64 v13, v13, v16, s[0:1]
3165; TAHITI-NEXT:    v_cndmask_b32_e64 v12, v14, v12, s[0:1]
3166; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
3167; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v5, v12, vcc
3168; TAHITI-NEXT:    v_cndmask_b32_e32 v5, v9, v13, vcc
3169; TAHITI-NEXT:    v_xor_b32_e32 v5, v5, v11
3170; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v11
3171; TAHITI-NEXT:    v_sub_i32_e32 v8, vcc, v5, v11
3172; TAHITI-NEXT:    v_subb_u32_e32 v9, vcc, v1, v11, vcc
3173; TAHITI-NEXT:    s_cbranch_execnz .LBB10_3
3174; TAHITI-NEXT:  .LBB10_2:
3175; TAHITI-NEXT:    v_cvt_f32_u32_e32 v1, v0
3176; TAHITI-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
3177; TAHITI-NEXT:    v_mov_b32_e32 v9, 0
3178; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v1, v1
3179; TAHITI-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
3180; TAHITI-NEXT:    v_cvt_u32_f32_e32 v1, v1
3181; TAHITI-NEXT:    v_mul_lo_u32 v5, v5, v1
3182; TAHITI-NEXT:    v_mul_hi_u32 v5, v1, v5
3183; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
3184; TAHITI-NEXT:    v_mul_hi_u32 v1, v4, v1
3185; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, v0
3186; TAHITI-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
3187; TAHITI-NEXT:    v_sub_i32_e32 v4, vcc, v1, v0
3188; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
3189; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3190; TAHITI-NEXT:    v_sub_i32_e32 v4, vcc, v1, v0
3191; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
3192; TAHITI-NEXT:    v_cndmask_b32_e32 v8, v1, v4, vcc
3193; TAHITI-NEXT:  .LBB10_3:
3194; TAHITI-NEXT:    v_or_b32_e32 v1, v7, v3
3195; TAHITI-NEXT:    v_mov_b32_e32 v0, 0
3196; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
3197; TAHITI-NEXT:    s_cbranch_vccz .LBB10_8
3198; TAHITI-NEXT:  ; %bb.4:
3199; TAHITI-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
3200; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v2, v0
3201; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v0, vcc
3202; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v0
3203; TAHITI-NEXT:    v_xor_b32_e32 v0, v3, v0
3204; TAHITI-NEXT:    v_cvt_f32_u32_e32 v3, v1
3205; TAHITI-NEXT:    v_cvt_f32_u32_e32 v4, v0
3206; TAHITI-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
3207; TAHITI-NEXT:    v_subb_u32_e32 v10, vcc, 0, v0, vcc
3208; TAHITI-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
3209; TAHITI-NEXT:    v_rcp_f32_e32 v3, v3
3210; TAHITI-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
3211; TAHITI-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
3212; TAHITI-NEXT:    v_trunc_f32_e32 v4, v4
3213; TAHITI-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
3214; TAHITI-NEXT:    v_cvt_u32_f32_e32 v4, v4
3215; TAHITI-NEXT:    v_cvt_u32_f32_e32 v3, v3
3216; TAHITI-NEXT:    v_mul_lo_u32 v12, v5, v4
3217; TAHITI-NEXT:    v_mul_hi_u32 v11, v5, v3
3218; TAHITI-NEXT:    v_mul_lo_u32 v13, v10, v3
3219; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
3220; TAHITI-NEXT:    v_mul_lo_u32 v12, v5, v3
3221; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
3222; TAHITI-NEXT:    v_mul_lo_u32 v13, v3, v11
3223; TAHITI-NEXT:    v_mul_hi_u32 v14, v3, v12
3224; TAHITI-NEXT:    v_mul_hi_u32 v15, v3, v11
3225; TAHITI-NEXT:    v_mul_hi_u32 v16, v4, v11
3226; TAHITI-NEXT:    v_mul_lo_u32 v11, v4, v11
3227; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
3228; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
3229; TAHITI-NEXT:    v_mul_lo_u32 v15, v4, v12
3230; TAHITI-NEXT:    v_mul_hi_u32 v12, v4, v12
3231; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
3232; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, v14, v12, vcc
3233; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, 0, v16, vcc
3234; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
3235; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
3236; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v11
3237; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, v4, v12, vcc
3238; TAHITI-NEXT:    v_mul_lo_u32 v11, v5, v4
3239; TAHITI-NEXT:    v_mul_hi_u32 v12, v5, v3
3240; TAHITI-NEXT:    v_mul_lo_u32 v10, v10, v3
3241; TAHITI-NEXT:    v_mul_lo_u32 v5, v5, v3
3242; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v12
3243; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v11, v10
3244; TAHITI-NEXT:    v_mul_lo_u32 v13, v3, v10
3245; TAHITI-NEXT:    v_mul_hi_u32 v14, v3, v5
3246; TAHITI-NEXT:    v_mul_hi_u32 v15, v3, v10
3247; TAHITI-NEXT:    v_mul_hi_u32 v12, v4, v5
3248; TAHITI-NEXT:    v_mul_lo_u32 v5, v4, v5
3249; TAHITI-NEXT:    v_mul_hi_u32 v11, v4, v10
3250; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v14, v13
3251; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
3252; TAHITI-NEXT:    v_mul_lo_u32 v10, v4, v10
3253; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v13, v5
3254; TAHITI-NEXT:    v_addc_u32_e32 v5, vcc, v14, v12, vcc
3255; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
3256; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v5, v10
3257; TAHITI-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
3258; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
3259; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, v4, v10, vcc
3260; TAHITI-NEXT:    v_ashrrev_i32_e32 v5, 31, v7
3261; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v6, v5
3262; TAHITI-NEXT:    v_xor_b32_e32 v10, v10, v5
3263; TAHITI-NEXT:    v_mul_lo_u32 v11, v10, v4
3264; TAHITI-NEXT:    v_mul_hi_u32 v12, v10, v3
3265; TAHITI-NEXT:    v_mul_hi_u32 v13, v10, v4
3266; TAHITI-NEXT:    v_addc_u32_e32 v7, vcc, v7, v5, vcc
3267; TAHITI-NEXT:    v_xor_b32_e32 v7, v7, v5
3268; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v11
3269; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
3270; TAHITI-NEXT:    v_mul_lo_u32 v13, v7, v3
3271; TAHITI-NEXT:    v_mul_hi_u32 v3, v7, v3
3272; TAHITI-NEXT:    v_mul_hi_u32 v14, v7, v4
3273; TAHITI-NEXT:    v_mul_lo_u32 v4, v7, v4
3274; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v13
3275; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, v12, v3, vcc
3276; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
3277; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
3278; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, 0, v11, vcc
3279; TAHITI-NEXT:    v_mul_lo_u32 v4, v1, v4
3280; TAHITI-NEXT:    v_mul_hi_u32 v11, v1, v3
3281; TAHITI-NEXT:    v_mul_lo_u32 v12, v0, v3
3282; TAHITI-NEXT:    v_mul_lo_u32 v3, v1, v3
3283; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v4, v11
3284; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v12, v4
3285; TAHITI-NEXT:    v_sub_i32_e32 v11, vcc, v7, v4
3286; TAHITI-NEXT:    v_sub_i32_e32 v3, vcc, v10, v3
3287; TAHITI-NEXT:    v_subb_u32_e64 v10, s[0:1], v11, v0, vcc
3288; TAHITI-NEXT:    v_sub_i32_e64 v11, s[0:1], v3, v1
3289; TAHITI-NEXT:    v_subbrev_u32_e64 v12, s[2:3], 0, v10, s[0:1]
3290; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v12, v0
3291; TAHITI-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[2:3]
3292; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v11, v1
3293; TAHITI-NEXT:    v_subb_u32_e32 v4, vcc, v7, v4, vcc
3294; TAHITI-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[2:3]
3295; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v12, v0
3296; TAHITI-NEXT:    v_subb_u32_e64 v10, s[0:1], v10, v0, s[0:1]
3297; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v0
3298; TAHITI-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[2:3]
3299; TAHITI-NEXT:    v_sub_i32_e64 v14, s[0:1], v11, v1
3300; TAHITI-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
3301; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
3302; TAHITI-NEXT:    v_subbrev_u32_e64 v10, s[0:1], 0, v10, s[0:1]
3303; TAHITI-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
3304; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v0
3305; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v13
3306; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
3307; TAHITI-NEXT:    v_cndmask_b32_e64 v11, v11, v14, s[0:1]
3308; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3309; TAHITI-NEXT:    v_cndmask_b32_e64 v10, v12, v10, s[0:1]
3310; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v3, v11, vcc
3311; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v4, v10, vcc
3312; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v5
3313; TAHITI-NEXT:    v_xor_b32_e32 v0, v0, v5
3314; TAHITI-NEXT:    v_sub_i32_e32 v10, vcc, v1, v5
3315; TAHITI-NEXT:    v_subb_u32_e32 v11, vcc, v0, v5, vcc
3316; TAHITI-NEXT:    s_cbranch_execnz .LBB10_6
3317; TAHITI-NEXT:  .LBB10_5:
3318; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, v2
3319; TAHITI-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
3320; TAHITI-NEXT:    v_mov_b32_e32 v11, 0
3321; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
3322; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
3323; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
3324; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, v0
3325; TAHITI-NEXT:    v_mul_hi_u32 v1, v0, v1
3326; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
3327; TAHITI-NEXT:    v_mul_hi_u32 v0, v6, v0
3328; TAHITI-NEXT:    v_mul_lo_u32 v0, v0, v2
3329; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
3330; TAHITI-NEXT:    v_subrev_i32_e32 v1, vcc, v2, v0
3331; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
3332; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3333; TAHITI-NEXT:    v_subrev_i32_e32 v1, vcc, v2, v0
3334; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
3335; TAHITI-NEXT:    v_cndmask_b32_e32 v10, v0, v1, vcc
3336; TAHITI-NEXT:  .LBB10_6:
3337; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
3338; TAHITI-NEXT:    s_mov_b32 s6, -1
3339; TAHITI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3340; TAHITI-NEXT:    s_endpgm
3341; TAHITI-NEXT:  .LBB10_7:
3342; TAHITI-NEXT:    ; implicit-def: $vgpr8_vgpr9
3343; TAHITI-NEXT:    s_branch .LBB10_2
3344; TAHITI-NEXT:  .LBB10_8:
3345; TAHITI-NEXT:    s_branch .LBB10_5
3346;
3347; TONGA-LABEL: srem_v2i64:
3348; TONGA:       ; %bb.0:
3349; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
3350; TONGA-NEXT:    v_mov_b32_e32 v8, 0
3351; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
3352; TONGA-NEXT:    s_add_u32 s0, s6, 16
3353; TONGA-NEXT:    v_mov_b32_e32 v4, s6
3354; TONGA-NEXT:    s_addc_u32 s1, s7, 0
3355; TONGA-NEXT:    v_mov_b32_e32 v0, s0
3356; TONGA-NEXT:    v_mov_b32_e32 v5, s7
3357; TONGA-NEXT:    v_mov_b32_e32 v1, s1
3358; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
3359; TONGA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
3360; TONGA-NEXT:    s_waitcnt vmcnt(0)
3361; TONGA-NEXT:    v_or_b32_e32 v9, v5, v1
3362; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
3363; TONGA-NEXT:    s_cbranch_vccz .LBB10_7
3364; TONGA-NEXT:  ; %bb.1:
3365; TONGA-NEXT:    v_ashrrev_i32_e32 v8, 31, v1
3366; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v0, v8
3367; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, v1, v8, vcc
3368; TONGA-NEXT:    v_xor_b32_e32 v14, v9, v8
3369; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v8
3370; TONGA-NEXT:    v_cvt_f32_u32_e32 v8, v14
3371; TONGA-NEXT:    v_cvt_f32_u32_e32 v9, v1
3372; TONGA-NEXT:    v_sub_u32_e32 v15, vcc, 0, v14
3373; TONGA-NEXT:    v_subb_u32_e32 v16, vcc, 0, v1, vcc
3374; TONGA-NEXT:    v_madmk_f32 v8, v9, 0x4f800000, v8
3375; TONGA-NEXT:    v_rcp_f32_e32 v8, v8
3376; TONGA-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
3377; TONGA-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
3378; TONGA-NEXT:    v_trunc_f32_e32 v9, v9
3379; TONGA-NEXT:    v_madmk_f32 v8, v9, 0xcf800000, v8
3380; TONGA-NEXT:    v_cvt_u32_f32_e32 v12, v9
3381; TONGA-NEXT:    v_cvt_u32_f32_e32 v13, v8
3382; TONGA-NEXT:    v_mul_lo_u32 v10, v15, v12
3383; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v15, v13, 0
3384; TONGA-NEXT:    v_mul_lo_u32 v11, v16, v13
3385; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v9, v10
3386; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v9, v11
3387; TONGA-NEXT:    v_mad_u64_u32 v[9:10], s[0:1], v13, v11, 0
3388; TONGA-NEXT:    v_mul_hi_u32 v17, v13, v8
3389; TONGA-NEXT:    v_add_u32_e32 v17, vcc, v17, v9
3390; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v12, v8, 0
3391; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, 0, v10, vcc
3392; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v12, v11, 0
3393; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v17, v8
3394; TONGA-NEXT:    v_addc_u32_e32 v8, vcc, v18, v9, vcc
3395; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
3396; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v8, v10
3397; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
3398; TONGA-NEXT:    v_add_u32_e32 v17, vcc, v13, v8
3399; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, v12, v9, vcc
3400; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v15, v17, 0
3401; TONGA-NEXT:    v_mul_lo_u32 v12, v15, v18
3402; TONGA-NEXT:    v_mul_lo_u32 v13, v16, v17
3403; TONGA-NEXT:    v_mul_hi_u32 v15, v17, v8
3404; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v18, v8, 0
3405; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v12, v9
3406; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v9, v13
3407; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v17, v9, 0
3408; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v18, v9, 0
3409; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v15, v12
3410; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
3411; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v12, v10
3412; TONGA-NEXT:    v_addc_u32_e32 v10, vcc, v13, v11, vcc
3413; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
3414; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v10, v8
3415; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v9, vcc
3416; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v17, v8
3417; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, v18, v9, vcc
3418; TONGA-NEXT:    v_ashrrev_i32_e32 v12, 31, v5
3419; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v4, v12
3420; TONGA-NEXT:    v_xor_b32_e32 v13, v8, v12
3421; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v13, v11, 0
3422; TONGA-NEXT:    v_mul_hi_u32 v15, v13, v10
3423; TONGA-NEXT:    v_addc_u32_e32 v5, vcc, v5, v12, vcc
3424; TONGA-NEXT:    v_xor_b32_e32 v5, v5, v12
3425; TONGA-NEXT:    v_add_u32_e32 v15, vcc, v15, v8
3426; TONGA-NEXT:    v_addc_u32_e32 v16, vcc, 0, v9, vcc
3427; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v5, v10, 0
3428; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v5, v11, 0
3429; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v15, v8
3430; TONGA-NEXT:    v_addc_u32_e32 v8, vcc, v16, v9, vcc
3431; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v11, vcc
3432; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v8, v10
3433; TONGA-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
3434; TONGA-NEXT:    v_mul_lo_u32 v11, v14, v8
3435; TONGA-NEXT:    v_mad_u64_u32 v[8:9], s[0:1], v14, v10, 0
3436; TONGA-NEXT:    v_mul_lo_u32 v10, v1, v10
3437; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v11, v9
3438; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v10, v9
3439; TONGA-NEXT:    v_sub_u32_e32 v10, vcc, v5, v9
3440; TONGA-NEXT:    v_sub_u32_e32 v8, vcc, v13, v8
3441; TONGA-NEXT:    v_subb_u32_e64 v10, s[0:1], v10, v1, vcc
3442; TONGA-NEXT:    v_sub_u32_e64 v11, s[0:1], v8, v14
3443; TONGA-NEXT:    v_subbrev_u32_e64 v13, s[2:3], 0, v10, s[0:1]
3444; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v13, v1
3445; TONGA-NEXT:    v_cndmask_b32_e64 v15, 0, -1, s[2:3]
3446; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v11, v14
3447; TONGA-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s[2:3]
3448; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v13, v1
3449; TONGA-NEXT:    v_subb_u32_e64 v10, s[0:1], v10, v1, s[0:1]
3450; TONGA-NEXT:    v_cndmask_b32_e64 v15, v15, v16, s[2:3]
3451; TONGA-NEXT:    v_sub_u32_e64 v16, s[0:1], v11, v14
3452; TONGA-NEXT:    v_subb_u32_e32 v5, vcc, v5, v9, vcc
3453; TONGA-NEXT:    v_subbrev_u32_e64 v10, s[0:1], 0, v10, s[0:1]
3454; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
3455; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v15
3456; TONGA-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
3457; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v14
3458; TONGA-NEXT:    v_cndmask_b32_e64 v10, v13, v10, s[0:1]
3459; TONGA-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
3460; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v1
3461; TONGA-NEXT:    v_cndmask_b32_e32 v1, v9, v13, vcc
3462; TONGA-NEXT:    v_cndmask_b32_e64 v11, v11, v16, s[0:1]
3463; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
3464; TONGA-NEXT:    v_cndmask_b32_e32 v1, v5, v10, vcc
3465; TONGA-NEXT:    v_cndmask_b32_e32 v5, v8, v11, vcc
3466; TONGA-NEXT:    v_xor_b32_e32 v5, v5, v12
3467; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v12
3468; TONGA-NEXT:    v_sub_u32_e32 v8, vcc, v5, v12
3469; TONGA-NEXT:    v_subb_u32_e32 v9, vcc, v1, v12, vcc
3470; TONGA-NEXT:    s_cbranch_execnz .LBB10_3
3471; TONGA-NEXT:  .LBB10_2:
3472; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, v0
3473; TONGA-NEXT:    v_sub_u32_e32 v5, vcc, 0, v0
3474; TONGA-NEXT:    v_mov_b32_e32 v9, 0
3475; TONGA-NEXT:    v_rcp_iflag_f32_e32 v1, v1
3476; TONGA-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
3477; TONGA-NEXT:    v_cvt_u32_f32_e32 v1, v1
3478; TONGA-NEXT:    v_mul_lo_u32 v5, v5, v1
3479; TONGA-NEXT:    v_mul_hi_u32 v5, v1, v5
3480; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
3481; TONGA-NEXT:    v_mul_hi_u32 v1, v4, v1
3482; TONGA-NEXT:    v_mul_lo_u32 v1, v1, v0
3483; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
3484; TONGA-NEXT:    v_subrev_u32_e32 v4, vcc, v0, v1
3485; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
3486; TONGA-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
3487; TONGA-NEXT:    v_sub_u32_e32 v4, vcc, v1, v0
3488; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
3489; TONGA-NEXT:    v_cndmask_b32_e32 v8, v1, v4, vcc
3490; TONGA-NEXT:  .LBB10_3:
3491; TONGA-NEXT:    v_or_b32_e32 v1, v7, v3
3492; TONGA-NEXT:    v_mov_b32_e32 v0, 0
3493; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
3494; TONGA-NEXT:    s_cbranch_vccz .LBB10_8
3495; TONGA-NEXT:  ; %bb.4:
3496; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
3497; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v2, v0
3498; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v3, v0, vcc
3499; TONGA-NEXT:    v_xor_b32_e32 v5, v1, v0
3500; TONGA-NEXT:    v_xor_b32_e32 v12, v3, v0
3501; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, v5
3502; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, v12
3503; TONGA-NEXT:    v_sub_u32_e32 v13, vcc, 0, v5
3504; TONGA-NEXT:    v_subb_u32_e32 v14, vcc, 0, v12, vcc
3505; TONGA-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
3506; TONGA-NEXT:    v_rcp_f32_e32 v0, v0
3507; TONGA-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
3508; TONGA-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
3509; TONGA-NEXT:    v_trunc_f32_e32 v1, v1
3510; TONGA-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
3511; TONGA-NEXT:    v_cvt_u32_f32_e32 v10, v1
3512; TONGA-NEXT:    v_cvt_u32_f32_e32 v11, v0
3513; TONGA-NEXT:    v_mul_lo_u32 v3, v13, v10
3514; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v13, v11, 0
3515; TONGA-NEXT:    v_mul_lo_u32 v4, v14, v11
3516; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
3517; TONGA-NEXT:    v_add_u32_e32 v15, vcc, v1, v4
3518; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v11, v15, 0
3519; TONGA-NEXT:    v_mul_hi_u32 v1, v11, v0
3520; TONGA-NEXT:    v_add_u32_e32 v16, vcc, v1, v3
3521; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v10, v0, 0
3522; TONGA-NEXT:    v_addc_u32_e32 v17, vcc, 0, v4, vcc
3523; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v10, v15, 0
3524; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v16, v0
3525; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, v17, v1, vcc
3526; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
3527; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
3528; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3529; TONGA-NEXT:    v_add_u32_e32 v15, vcc, v11, v0
3530; TONGA-NEXT:    v_addc_u32_e32 v16, vcc, v10, v1, vcc
3531; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v13, v15, 0
3532; TONGA-NEXT:    v_mul_lo_u32 v10, v13, v16
3533; TONGA-NEXT:    v_mul_lo_u32 v11, v14, v15
3534; TONGA-NEXT:    v_mul_hi_u32 v13, v15, v0
3535; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v16, v0, 0
3536; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v10, v1
3537; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v11
3538; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v15, v1, 0
3539; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v16, v1, 0
3540; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v13, v10
3541; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
3542; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v10, v3
3543; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v11, v4, vcc
3544; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3545; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
3546; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3547; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v15, v0
3548; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, v16, v1, vcc
3549; TONGA-NEXT:    v_ashrrev_i32_e32 v11, 31, v7
3550; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v6, v11
3551; TONGA-NEXT:    v_xor_b32_e32 v10, v0, v11
3552; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v10, v4, 0
3553; TONGA-NEXT:    v_mul_hi_u32 v13, v10, v3
3554; TONGA-NEXT:    v_addc_u32_e32 v7, vcc, v7, v11, vcc
3555; TONGA-NEXT:    v_xor_b32_e32 v7, v7, v11
3556; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v13, v0
3557; TONGA-NEXT:    v_addc_u32_e32 v14, vcc, 0, v1, vcc
3558; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v7, v3, 0
3559; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v7, v4, 0
3560; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v13, v0
3561; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, v14, v1, vcc
3562; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
3563; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v0, v3
3564; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, 0, v1, vcc
3565; TONGA-NEXT:    v_mul_lo_u32 v4, v5, v0
3566; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v5, v3, 0
3567; TONGA-NEXT:    v_mul_lo_u32 v3, v12, v3
3568; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v4, v1
3569; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v3, v1
3570; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, v7, v1
3571; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v10, v0
3572; TONGA-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v12, vcc
3573; TONGA-NEXT:    v_sub_u32_e64 v4, s[0:1], v0, v5
3574; TONGA-NEXT:    v_subbrev_u32_e64 v10, s[2:3], 0, v3, s[0:1]
3575; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v10, v12
3576; TONGA-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[2:3]
3577; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v4, v5
3578; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v7, v1, vcc
3579; TONGA-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[2:3]
3580; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v10, v12
3581; TONGA-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v12, s[0:1]
3582; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v12
3583; TONGA-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[2:3]
3584; TONGA-NEXT:    v_sub_u32_e64 v14, s[0:1], v4, v5
3585; TONGA-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
3586; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
3587; TONGA-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
3588; TONGA-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
3589; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v12
3590; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v13
3591; TONGA-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
3592; TONGA-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[0:1]
3593; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
3594; TONGA-NEXT:    v_cndmask_b32_e64 v3, v10, v3, s[0:1]
3595; TONGA-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
3596; TONGA-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
3597; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v11
3598; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v11
3599; TONGA-NEXT:    v_sub_u32_e32 v10, vcc, v0, v11
3600; TONGA-NEXT:    v_subb_u32_e32 v11, vcc, v1, v11, vcc
3601; TONGA-NEXT:    s_cbranch_execnz .LBB10_6
3602; TONGA-NEXT:  .LBB10_5:
3603; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, v2
3604; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, 0, v2
3605; TONGA-NEXT:    v_mov_b32_e32 v11, 0
3606; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
3607; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
3608; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
3609; TONGA-NEXT:    v_mul_lo_u32 v1, v1, v0
3610; TONGA-NEXT:    v_mul_hi_u32 v1, v0, v1
3611; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
3612; TONGA-NEXT:    v_mul_hi_u32 v0, v6, v0
3613; TONGA-NEXT:    v_mul_lo_u32 v0, v0, v2
3614; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v6, v0
3615; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v2, v0
3616; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
3617; TONGA-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
3618; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v2, v0
3619; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
3620; TONGA-NEXT:    v_cndmask_b32_e32 v10, v0, v1, vcc
3621; TONGA-NEXT:  .LBB10_6:
3622; TONGA-NEXT:    v_mov_b32_e32 v0, s4
3623; TONGA-NEXT:    v_mov_b32_e32 v1, s5
3624; TONGA-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
3625; TONGA-NEXT:    s_endpgm
3626; TONGA-NEXT:  .LBB10_7:
3627; TONGA-NEXT:    ; implicit-def: $vgpr8_vgpr9
3628; TONGA-NEXT:    s_branch .LBB10_2
3629; TONGA-NEXT:  .LBB10_8:
3630; TONGA-NEXT:    s_branch .LBB10_5
3631;
3632; EG-LABEL: srem_v2i64:
3633; EG:       ; %bb.0:
3634; EG-NEXT:    ALU 0, @18, KC0[CB0:0-32], KC1[]
3635; EG-NEXT:    TEX 1 @14
3636; EG-NEXT:    ALU 112, @19, KC0[], KC1[]
3637; EG-NEXT:    ALU 111, @132, KC0[], KC1[]
3638; EG-NEXT:    ALU 112, @244, KC0[], KC1[]
3639; EG-NEXT:    ALU 111, @357, KC0[], KC1[]
3640; EG-NEXT:    ALU 112, @469, KC0[], KC1[]
3641; EG-NEXT:    ALU 112, @582, KC0[], KC1[]
3642; EG-NEXT:    ALU 111, @695, KC0[], KC1[]
3643; EG-NEXT:    ALU 112, @807, KC0[], KC1[]
3644; EG-NEXT:    ALU 112, @920, KC0[], KC1[]
3645; EG-NEXT:    ALU 47, @1033, KC0[CB0:0-32], KC1[]
3646; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T0.X, 1
3647; EG-NEXT:    CF_END
3648; EG-NEXT:    Fetch clause starting at 14:
3649; EG-NEXT:     VTX_READ_128 T4.XYZW, T0.X, 16, #1
3650; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 0, #1
3651; EG-NEXT:    ALU clause starting at 18:
3652; EG-NEXT:     MOV * T0.X, KC0[2].Z,
3653; EG-NEXT:    ALU clause starting at 19:
3654; EG-NEXT:     ASHR * T5.W, T4.Y, literal.x,
3655; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3656; EG-NEXT:     ADD_INT T0.W, T4.X, PV.W,
3657; EG-NEXT:     ASHR * T6.W, T4.W, literal.x,
3658; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3659; EG-NEXT:     XOR_INT * T2.W, PV.W, T5.W,
3660; EG-NEXT:     SUB_INT T0.Z, 0.0, PV.W,
3661; EG-NEXT:     ADD_INT T0.W, T4.Z, T6.W,
3662; EG-NEXT:     RECIP_UINT * T0.X, PV.W,
3663; EG-NEXT:     XOR_INT T3.W, PV.W, T6.W,
3664; EG-NEXT:     MULLO_INT * T0.Y, PV.Z, PS,
3665; EG-NEXT:     SUB_INT T0.Z, 0.0, PV.W,
3666; EG-NEXT:     ASHR T0.W, T1.W, literal.x,
3667; EG-NEXT:     RECIP_UINT * T2.X, PV.W,
3668; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3669; EG-NEXT:     ADD_INT T2.Y, T1.W, PV.W,
3670; EG-NEXT:     ADDC_UINT T2.Z, T1.Z, PV.W,
3671; EG-NEXT:     ASHR T1.W, T1.Y, literal.x,
3672; EG-NEXT:     MULLO_INT * T0.Z, PV.Z, PS,
3673; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3674; EG-NEXT:     ADD_INT T1.Y, T1.Y, PV.W,
3675; EG-NEXT:     ADDC_UINT T3.Z, T1.X, PV.W,
3676; EG-NEXT:     ADD_INT T7.W, PV.Y, PV.Z,
3677; EG-NEXT:     MULHI * T0.Z, T2.X, PS,
3678; EG-NEXT:     ADD_INT T2.Y, T2.X, PS,
3679; EG-NEXT:     XOR_INT T0.Z, PV.W, T0.W,
3680; EG-NEXT:     ADD_INT T7.W, PV.Y, PV.Z,
3681; EG-NEXT:     MULHI * T0.Y, T0.X, T0.Y,
3682; EG-NEXT:     ADD_INT T2.Z, T0.X, PS,
3683; EG-NEXT:     XOR_INT T7.W, PV.W, T1.W,
3684; EG-NEXT:     MULHI * T0.X, PV.Z, PV.Y,
3685; EG-NEXT:     MULHI * T0.Y, PV.W, PV.Z,
3686; EG-NEXT:     MULLO_INT * T0.Y, PS, T2.W,
3687; EG-NEXT:     SUB_INT T8.W, T7.W, PS,
3688; EG-NEXT:     MULLO_INT * T0.X, T0.X, T3.W,
3689; EG-NEXT:     SETGE_UINT T2.X, PV.W, T2.W, BS:VEC_021/SCL_122
3690; EG-NEXT:     SUB_INT T0.Y, PV.W, T2.W, BS:VEC_021/SCL_122
3691; EG-NEXT:     SUB_INT T2.Z, T0.Z, PS,
3692; EG-NEXT:     ADD_INT T9.W, T4.Y, T5.W,
3693; EG-NEXT:     ADDC_UINT * T10.W, T4.X, T5.W,
3694; EG-NEXT:     ADD_INT T1.Y, PV.W, PS,
3695; EG-NEXT:     SETGE_UINT T3.Z, PV.Z, T3.W, BS:VEC_021/SCL_122
3696; EG-NEXT:     SUB_INT T9.W, PV.Z, T3.W, BS:VEC_021/SCL_122
3697; EG-NEXT:     CNDE_INT * T8.W, PV.X, T8.W, PV.Y,
3698; EG-NEXT:     SETGE_UINT T0.X, PS, T2.W, BS:VEC_021/SCL_122
3699; EG-NEXT:     SUB_INT T0.Y, PS, T2.W, BS:VEC_021/SCL_122
3700; EG-NEXT:     CNDE_INT T2.Z, PV.Z, T2.Z, PV.W,
3701; EG-NEXT:     ADD_INT T4.W, T4.W, T6.W,
3702; EG-NEXT:     ADDC_UINT * T9.W, T4.Z, T6.W,
3703; EG-NEXT:     ADD_INT T2.X, PV.W, PS,
3704; EG-NEXT:     SETGE_UINT T2.Y, PV.Z, T3.W, BS:VEC_021/SCL_122
3705; EG-NEXT:     SUB_INT T3.Z, PV.Z, T3.W, BS:VEC_021/SCL_122
3706; EG-NEXT:     CNDE_INT T8.W, PV.X, T8.W, PV.Y, BS:VEC_102/SCL_221
3707; EG-NEXT:     XOR_INT * T4.W, T1.Y, T5.W,
3708; EG-NEXT:     CNDE_INT T0.X, PS, PV.W, T7.W,
3709; EG-NEXT:     ADD_INT T1.Y, T1.Z, T0.W,
3710; EG-NEXT:     CNDE_INT T1.Z, PV.Y, T2.Z, PV.Z,
3711; EG-NEXT:     XOR_INT * T5.W, PV.X, T6.W, BS:VEC_102/SCL_221
3712; EG-NEXT:     ADD_INT * T6.W, T1.X, T1.W,
3713; EG-NEXT:     XOR_INT T0.Y, PV.W, T1.W,
3714; EG-NEXT:     CNDE_INT T0.Z, T5.W, T1.Z, T0.Z,
3715; EG-NEXT:     XOR_INT T6.W, T1.Y, T0.W, BS:VEC_021/SCL_122
3716; EG-NEXT:     LSHR * T7.W, T0.X, literal.x,
3717; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3718; EG-NEXT:     SETE_INT T1.Y, PS, T4.W,
3719; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Z, PV.W, literal.x,
3720; EG-NEXT:     BIT_ALIGN_INT T8.W, T0.X, PV.Y, literal.x,
3721; EG-NEXT:     LSHR * T9.W, PV.Z, literal.x,
3722; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3723; EG-NEXT:     SETGE_UINT T0.X, T7.W, T4.W,
3724; EG-NEXT:     SETGE_UINT * T2.Y, PV.W, T2.W, BS:VEC_021/SCL_122
3725; EG-NEXT:     SETE_INT T0.Z, T9.W, T5.W, BS:VEC_021/SCL_122
3726; EG-NEXT:     SETGE_UINT T10.W, T9.W, T5.W, BS:VEC_021/SCL_122
3727; EG-NEXT:     SETGE_UINT * T11.W, T1.Z, T3.W,
3728; EG-NEXT:     CNDE_INT T3.Y, PV.Z, PV.W, PS,
3729; EG-NEXT:     SUB_INT T0.Z, T1.Z, T3.W, BS:VEC_102/SCL_221
3730; EG-NEXT:     CNDE_INT T10.W, T1.Y, T0.X, T2.Y,
3731; EG-NEXT:     SUB_INT * T11.W, T8.W, T2.W,
3732; EG-NEXT:     SUB_INT * T0.X, T7.W, T4.W,
3733; EG-NEXT:     SUBB_UINT * T1.Y, T8.W, T2.W,
3734; EG-NEXT:     SUB_INT T2.Z, T9.W, T5.W,
3735; EG-NEXT:     SUBB_UINT * T12.W, T1.Z, T3.W, BS:VEC_021/SCL_122
3736; EG-NEXT:     CNDE_INT * T8.W, T10.W, T8.W, T11.W,
3737; EG-NEXT:     LSHL T1.X, PV.W, 1,
3738; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3739; EG-NEXT:     SUB_INT T2.Z, T2.Z, T12.W, BS:VEC_201
3740; EG-NEXT:     SUB_INT T11.W, T0.X, T1.Y,
3741; EG-NEXT:     CNDE_INT * T12.W, T3.Y, T1.Z, T0.Z,
3742; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
3743; EG-NEXT:     LSHL T0.X, PS, 1,
3744; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3745; EG-NEXT:     CNDE_INT * T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
3746; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
3747; EG-NEXT:     CNDE_INT T7.W, T3.Y, T9.W, T2.Z,
3748; EG-NEXT:     OR_INT * T9.W, T1.X, T2.Y,
3749; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3750; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T12.W, literal.x, BS:VEC_021/SCL_122
3751; EG-NEXT:     BIT_ALIGN_INT T7.W, T0.Z, T8.W, literal.x, BS:VEC_102/SCL_221
3752; EG-NEXT:     OR_INT * T8.W, T0.X, T1.Y,
3753; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3754; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3755; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3756; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3757; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
3758; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3759; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3760; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3761; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
3762; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
3763; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
3764; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3765; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3766; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3767; EG-NEXT:    ALU clause starting at 132:
3768; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
3769; EG-NEXT:     LSHL T1.X, PV.W, 1,
3770; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3771; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3772; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3773; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3774; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
3775; EG-NEXT:     LSHL T0.X, PS, 1,
3776; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3777; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
3778; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3779; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
3780; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
3781; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3782; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3783; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
3784; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3785; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3786; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3787; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3788; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3789; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
3790; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3791; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3792; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3793; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
3794; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
3795; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
3796; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3797; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3798; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3799; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
3800; EG-NEXT:     LSHL T1.X, PV.W, 1,
3801; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3802; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3803; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3804; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3805; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
3806; EG-NEXT:     LSHL T0.X, PS, 1,
3807; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3808; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
3809; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3810; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
3811; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
3812; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3813; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3814; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
3815; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3816; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3817; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3818; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3819; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3820; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
3821; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3822; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3823; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3824; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
3825; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
3826; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
3827; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3828; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3829; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3830; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
3831; EG-NEXT:     LSHL T1.X, PV.W, 1,
3832; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3833; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3834; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3835; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3836; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
3837; EG-NEXT:     LSHL T0.X, PS, 1,
3838; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3839; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
3840; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3841; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
3842; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
3843; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3844; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3845; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
3846; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3847; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3848; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3849; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3850; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3851; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
3852; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3853; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3854; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3855; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
3856; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
3857; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
3858; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3859; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3860; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3861; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
3862; EG-NEXT:     LSHL T1.X, PV.W, 1,
3863; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3864; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3865; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3866; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3867; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
3868; EG-NEXT:     LSHL T0.X, PS, 1,
3869; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3870; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
3871; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3872; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
3873; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
3874; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3875; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3876; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
3877; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3878; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3879; EG-NEXT:     SETE_INT * T0.X, PV.W, T4.W,
3880; EG-NEXT:    ALU clause starting at 244:
3881; EG-NEXT:     SETGE_UINT * T1.Y, T7.W, T4.W,
3882; EG-NEXT:     SETGE_UINT T0.Z, T8.W, T3.W, BS:VEC_021/SCL_122
3883; EG-NEXT:     SETE_INT T10.W, T1.Z, T5.W,
3884; EG-NEXT:     SETGE_UINT * T11.W, T1.Z, T5.W,
3885; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3886; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3887; EG-NEXT:     CNDE_INT * T10.W, T0.X, T1.Y, T2.Y,
3888; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
3889; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
3890; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3891; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3892; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3893; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
3894; EG-NEXT:     LSHL T1.X, PV.W, 1,
3895; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3896; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3897; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3898; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3899; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
3900; EG-NEXT:     LSHL T0.X, PS, 1,
3901; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3902; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
3903; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3904; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
3905; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
3906; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3907; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3908; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
3909; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3910; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3911; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3912; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3913; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3914; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
3915; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3916; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3917; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3918; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
3919; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
3920; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
3921; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3922; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3923; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3924; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
3925; EG-NEXT:     LSHL T1.X, PV.W, 1,
3926; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3927; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3928; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3929; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3930; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
3931; EG-NEXT:     LSHL T0.X, PS, 1,
3932; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3933; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
3934; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3935; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
3936; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
3937; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3938; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3939; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
3940; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3941; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3942; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3943; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3944; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3945; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
3946; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3947; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3948; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3949; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
3950; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
3951; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
3952; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3953; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3954; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3955; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
3956; EG-NEXT:     LSHL T1.X, PV.W, 1,
3957; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3958; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3959; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3960; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3961; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
3962; EG-NEXT:     LSHL T0.X, PS, 1,
3963; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
3964; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
3965; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
3966; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
3967; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
3968; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
3969; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
3970; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
3971; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
3972; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
3973; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
3974; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
3975; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
3976; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
3977; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
3978; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
3979; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
3980; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
3981; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
3982; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
3983; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
3984; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
3985; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
3986; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
3987; EG-NEXT:     LSHL T1.X, PV.W, 1,
3988; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
3989; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
3990; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
3991; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
3992; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
3993; EG-NEXT:     LSHL * T0.X, PS, 1,
3994; EG-NEXT:    ALU clause starting at 357:
3995; EG-NEXT:     BFE_UINT * T1.Y, T6.W, literal.x, 1,
3996; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
3997; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, T11.W,
3998; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, T2.Z,
3999; EG-NEXT:     OR_INT * T9.W, T1.X, T2.Y,
4000; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4001; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4002; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4003; EG-NEXT:     OR_INT * T8.W, T0.X, T1.Y,
4004; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4005; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4006; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4007; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4008; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4009; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4010; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4011; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4012; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4013; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4014; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4015; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4016; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4017; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4018; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4019; EG-NEXT:     LSHL T1.X, PV.W, 1,
4020; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4021; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4022; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4023; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4024; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
4025; EG-NEXT:     LSHL T0.X, PS, 1,
4026; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4027; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4028; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4029; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4030; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
4031; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4032; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4033; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4034; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4035; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4036; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4037; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4038; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4039; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4040; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4041; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4042; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4043; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4044; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4045; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4046; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4047; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4048; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4049; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4050; EG-NEXT:     LSHL T1.X, PV.W, 1,
4051; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4052; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4053; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4054; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4055; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
4056; EG-NEXT:     LSHL T0.X, PS, 1,
4057; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4058; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4059; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4060; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4061; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
4062; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4063; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4064; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4065; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4066; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4067; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4068; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4069; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4070; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4071; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4072; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4073; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4074; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4075; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4076; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4077; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4078; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4079; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4080; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4081; EG-NEXT:     LSHL T1.X, PV.W, 1,
4082; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4083; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4084; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4085; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4086; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
4087; EG-NEXT:     LSHL T0.X, PS, 1,
4088; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4089; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4090; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4091; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4092; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
4093; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4094; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4095; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4096; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4097; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4098; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4099; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4100; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4101; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4102; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4103; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4104; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4105; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4106; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4107; EG-NEXT:    ALU clause starting at 469:
4108; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4109; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4110; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4111; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4112; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4113; EG-NEXT:     LSHL T1.X, PV.W, 1,
4114; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4115; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4116; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4117; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4118; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
4119; EG-NEXT:     LSHL T0.X, PS, 1,
4120; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4121; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4122; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4123; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4124; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
4125; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4126; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4127; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4128; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4129; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4130; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4131; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4132; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4133; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4134; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4135; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4136; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4137; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4138; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4139; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4140; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4141; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4142; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4143; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4144; EG-NEXT:     LSHL T1.X, PV.W, 1,
4145; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4146; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4147; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4148; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4149; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
4150; EG-NEXT:     LSHL T0.X, PS, 1,
4151; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4152; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4153; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4154; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4155; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
4156; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4157; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4158; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4159; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4160; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4161; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4162; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4163; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4164; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4165; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4166; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4167; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4168; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4169; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4170; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4171; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4172; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4173; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4174; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4175; EG-NEXT:     LSHL T1.X, PV.W, 1,
4176; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4177; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4178; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4179; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4180; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4181; EG-NEXT:     LSHL T0.X, PS, 1,
4182; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4183; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4184; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4185; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4186; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
4187; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4188; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4189; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4190; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4191; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4192; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4193; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4194; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4195; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4196; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4197; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4198; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4199; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4200; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4201; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4202; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4203; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4204; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4205; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4206; EG-NEXT:     LSHL T1.X, PV.W, 1,
4207; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4208; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4209; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4210; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4211; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
4212; EG-NEXT:     LSHL T0.X, PS, 1,
4213; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4214; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4215; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4216; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4217; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
4218; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4219; EG-NEXT:     BIT_ALIGN_INT * T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4220; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4221; EG-NEXT:    ALU clause starting at 582:
4222; EG-NEXT:     BIT_ALIGN_INT T7.W, T0.Z, T9.W, literal.x,
4223; EG-NEXT:     OR_INT * T8.W, T0.X, T1.Y,
4224; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4225; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4226; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4227; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4228; EG-NEXT:     SETE_INT T9.W, T1.Z, T5.W,
4229; EG-NEXT:     SETGE_UINT * T11.W, T1.Z, T5.W,
4230; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4231; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4232; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4233; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4234; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4235; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4236; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4237; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4238; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4239; EG-NEXT:     LSHL T1.X, PV.W, 1,
4240; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4241; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4242; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4243; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4244; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
4245; EG-NEXT:     LSHL T0.X, PS, 1,
4246; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4247; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4248; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4249; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4250; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
4251; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4252; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4253; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4254; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4255; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4256; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4257; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4258; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4259; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4260; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4261; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4262; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4263; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4264; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4265; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4266; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4267; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4268; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4269; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4270; EG-NEXT:     LSHL T1.X, PV.W, 1,
4271; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4272; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4273; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4274; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4275; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
4276; EG-NEXT:     LSHL T0.X, PS, 1,
4277; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4278; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4279; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4280; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4281; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
4282; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4283; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4284; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4285; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4286; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4287; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4288; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4289; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4290; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4291; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4292; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4293; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4294; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4295; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4296; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4297; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4298; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4299; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4300; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4301; EG-NEXT:     LSHL T1.X, PV.W, 1,
4302; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4303; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4304; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4305; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4306; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
4307; EG-NEXT:     LSHL T0.X, PS, 1,
4308; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4309; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4310; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4311; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4312; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
4313; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4314; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4315; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4316; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4317; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4318; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4319; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4320; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4321; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4322; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4323; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4324; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4325; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4326; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4327; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4328; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4329; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4330; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4331; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4332; EG-NEXT:     LSHL T1.X, PV.W, 1,
4333; EG-NEXT:     BFE_UINT * T2.Y, T0.Y, literal.x, 1,
4334; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
4335; EG-NEXT:    ALU clause starting at 695:
4336; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4337; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X,
4338; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4339; EG-NEXT:     LSHL T0.X, PS, 1,
4340; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4341; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4342; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4343; EG-NEXT:     OR_INT * T10.W, T1.X, T2.Y,
4344; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
4345; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4346; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4347; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4348; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4349; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4350; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4351; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4352; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4353; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4354; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4355; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4356; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4357; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4358; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4359; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4360; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4361; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4362; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4363; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4364; EG-NEXT:     LSHL T1.X, PV.W, 1,
4365; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4366; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4367; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4368; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4369; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
4370; EG-NEXT:     LSHL T0.X, PS, 1,
4371; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4372; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4373; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4374; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4375; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
4376; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4377; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4378; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4379; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4380; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4381; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4382; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4383; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4384; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4385; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4386; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4387; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4388; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4389; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4390; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4391; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4392; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4393; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4394; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4395; EG-NEXT:     LSHL T1.X, PV.W, 1,
4396; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4397; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4398; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4399; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4400; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
4401; EG-NEXT:     LSHL T0.X, PS, 1,
4402; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4403; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4404; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4405; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4406; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
4407; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4408; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4409; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4410; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4411; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4412; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4413; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4414; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4415; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4416; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4417; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4418; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4419; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4420; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4421; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4422; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4423; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4424; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4425; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4426; EG-NEXT:     LSHL T1.X, PV.W, 1,
4427; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4428; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4429; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4430; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4431; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4432; EG-NEXT:     LSHL T0.X, PS, 1,
4433; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4434; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4435; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4436; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4437; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
4438; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4439; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4440; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4441; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4442; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4443; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4444; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4445; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4446; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4447; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4448; EG-NEXT:    ALU clause starting at 807:
4449; EG-NEXT:     CNDE_INT * T3.Y, T10.W, T11.W, T0.Z,
4450; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4451; EG-NEXT:     CNDE_INT * T10.W, T0.X, T1.Y, T2.Y,
4452; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4453; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4454; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4455; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4456; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4457; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4458; EG-NEXT:     LSHL T1.X, PV.W, 1,
4459; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4460; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4461; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4462; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4463; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
4464; EG-NEXT:     LSHL T0.X, PS, 1,
4465; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4466; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4467; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4468; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4469; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
4470; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4471; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4472; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4473; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4474; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4475; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4476; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4477; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4478; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4479; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4480; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4481; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4482; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4483; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4484; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4485; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4486; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4487; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4488; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4489; EG-NEXT:     LSHL T1.X, PV.W, 1,
4490; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4491; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4492; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4493; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4494; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
4495; EG-NEXT:     LSHL T0.X, PS, 1,
4496; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4497; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4498; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4499; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4500; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
4501; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4502; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4503; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4504; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4505; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4506; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4507; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4508; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4509; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4510; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4511; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4512; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4513; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4514; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4515; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4516; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4517; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4518; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4519; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4520; EG-NEXT:     LSHL T1.X, PV.W, 1,
4521; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4522; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4523; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4524; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4525; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
4526; EG-NEXT:     LSHL T0.X, PS, 1,
4527; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4528; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4529; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4530; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4531; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
4532; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4533; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4534; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4535; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4536; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4537; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4538; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4539; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4540; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4541; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4542; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4543; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4544; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4545; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4546; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4547; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4548; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4549; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4550; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4551; EG-NEXT:     LSHL T1.X, PV.W, 1,
4552; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4553; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4554; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4555; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4556; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
4557; EG-NEXT:     LSHL T0.X, PS, 1,
4558; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4559; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4560; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T1.Z, PV.Z,
4561; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
4562; EG-NEXT:    ALU clause starting at 920:
4563; EG-NEXT:     OR_INT * T9.W, T1.X, T2.Y,
4564; EG-NEXT:     SETGE_UINT T2.Y, PV.W, T2.W,
4565; EG-NEXT:     BIT_ALIGN_INT * T1.Z, T7.W, T8.W, literal.x, BS:VEC_021/SCL_122
4566; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4567; EG-NEXT:     BIT_ALIGN_INT T7.W, T0.Z, T10.W, literal.x,
4568; EG-NEXT:     OR_INT * T8.W, T0.X, T1.Y,
4569; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4570; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4571; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4572; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4573; EG-NEXT:     SETE_INT T10.W, T1.Z, T5.W,
4574; EG-NEXT:     SETGE_UINT * T11.W, T1.Z, T5.W,
4575; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4576; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4577; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4578; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4579; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4580; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4581; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4582; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4583; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4584; EG-NEXT:     LSHL T1.X, PV.W, 1,
4585; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4586; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4587; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4588; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4589; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
4590; EG-NEXT:     LSHL T0.X, PS, 1,
4591; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4592; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4593; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4594; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4595; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
4596; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4597; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4598; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4599; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4600; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4601; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4602; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4603; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4604; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4605; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4606; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4607; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4608; EG-NEXT:     CNDE_INT * T9.W, PV.X, PV.Y, T2.Y,
4609; EG-NEXT:     SUB_INT * T11.W, T10.W, T2.W,
4610; EG-NEXT:     SUBB_UINT * T0.X, T10.W, T2.W,
4611; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4612; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4613; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4614; EG-NEXT:     CNDE_INT * T10.W, T9.W, T10.W, T11.W,
4615; EG-NEXT:     LSHL T1.X, PV.W, 1,
4616; EG-NEXT:     BFE_UINT T2.Y, T0.Y, literal.x, 1,
4617; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4618; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4619; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4620; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4621; EG-NEXT:     LSHL T0.X, PS, 1,
4622; EG-NEXT:     BFE_UINT T1.Y, T6.W, literal.x, 1,
4623; EG-NEXT:     CNDE_INT T0.Z, T9.W, T7.W, PV.W, BS:VEC_120/SCL_212
4624; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4625; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4626; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4627; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4628; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4629; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T10.W, literal.x, BS:VEC_102/SCL_221
4630; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4631; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4632; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4633; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4634; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4635; EG-NEXT:     SETE_INT T10.W, PV.Z, T5.W,
4636; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4637; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
4638; EG-NEXT:     SUB_INT T0.Z, T8.W, T3.W,
4639; EG-NEXT:     CNDE_INT * T10.W, PV.X, PV.Y, T2.Y,
4640; EG-NEXT:     SUB_INT * T11.W, T9.W, T2.W,
4641; EG-NEXT:     SUBB_UINT * T0.X, T9.W, T2.W,
4642; EG-NEXT:     SUB_INT * T1.Y, T7.W, T4.W,
4643; EG-NEXT:     SUBB_UINT T2.Z, T8.W, T3.W,
4644; EG-NEXT:     SUB_INT * T12.W, T1.Z, T5.W, BS:VEC_021/SCL_122
4645; EG-NEXT:     CNDE_INT * T9.W, T10.W, T9.W, T11.W,
4646; EG-NEXT:     LSHL T1.X, PV.W, 1,
4647; EG-NEXT:     BFE_UINT T2.Y, T0.Y, 1, 1,
4648; EG-NEXT:     SUB_INT T2.Z, T12.W, T2.Z,
4649; EG-NEXT:     SUB_INT T11.W, T1.Y, T0.X, BS:VEC_120/SCL_212
4650; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
4651; EG-NEXT:     LSHL T0.X, PS, 1,
4652; EG-NEXT:     BFE_UINT T1.Y, T6.W, 1, 1,
4653; EG-NEXT:     CNDE_INT T0.Z, T10.W, T7.W, PV.W, BS:VEC_120/SCL_212
4654; EG-NEXT:     CNDE_INT T7.W, T3.Y, T1.Z, PV.Z,
4655; EG-NEXT:     OR_INT * T10.W, PV.X, PV.Y,
4656; EG-NEXT:     SETGE_UINT T2.Y, PS, T2.W,
4657; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
4658; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T9.W, literal.x, BS:VEC_102/SCL_221
4659; EG-NEXT:     OR_INT * T8.W, PV.X, PV.Y,
4660; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4661; EG-NEXT:     SETE_INT T0.X, PV.W, T4.W, BS:VEC_021/SCL_122
4662; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4663; EG-NEXT:     SETGE_UINT T0.Z, PS, T3.W, BS:VEC_102/SCL_221
4664; EG-NEXT:     SETE_INT T9.W, PV.Z, T5.W,
4665; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T5.W,
4666; EG-NEXT:     CNDE_INT T1.X, PV.W, PS, PV.Z,
4667; EG-NEXT:     CNDE_INT T1.Y, PV.X, PV.Y, T2.Y,
4668; EG-NEXT:     SUB_INT T0.Z, T10.W, T2.W,
4669; EG-NEXT:     SUBB_UINT * T9.W, T10.W, T2.W,
4670; EG-NEXT:     SUB_INT * T11.W, T7.W, T4.W,
4671; EG-NEXT:     SUB_INT T0.X, T8.W, T3.W,
4672; EG-NEXT:     SUBB_UINT T2.Y, T8.W, T3.W,
4673; EG-NEXT:     SUB_INT * T2.Z, T1.Z, T5.W, BS:VEC_021/SCL_122
4674; EG-NEXT:     SUB_INT T9.W, T11.W, T9.W, BS:VEC_021/SCL_122
4675; EG-NEXT:     CNDE_INT * T10.W, T1.Y, T10.W, T0.Z,
4676; EG-NEXT:    ALU clause starting at 1033:
4677; EG-NEXT:     LSHL T2.X, T10.W, 1,
4678; EG-NEXT:     AND_INT T0.Y, T0.Y, 1, BS:VEC_120/SCL_212
4679; EG-NEXT:     CNDE_INT T0.Z, T1.Y, T7.W, T9.W,
4680; EG-NEXT:     SUB_INT * T7.W, T2.Z, T2.Y, BS:VEC_021/SCL_122
4681; EG-NEXT:     CNDE_INT * T8.W, T1.X, T8.W, T0.X,
4682; EG-NEXT:     LSHL T0.X, PV.W, 1,
4683; EG-NEXT:     AND_INT T1.Y, T6.W, 1,
4684; EG-NEXT:     CNDE_INT T1.Z, T1.X, T1.Z, T7.W,
4685; EG-NEXT:     BIT_ALIGN_INT T6.W, T0.Z, T10.W, literal.x,
4686; EG-NEXT:     OR_INT * T7.W, T2.X, T0.Y,
4687; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4688; EG-NEXT:     SETGE_UINT T1.X, PS, T2.W,
4689; EG-NEXT:     SETE_INT T0.Y, PV.W, T4.W, BS:VEC_021/SCL_122
4690; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T4.W, BS:VEC_021/SCL_122
4691; EG-NEXT:     BIT_ALIGN_INT T8.W, PV.Z, T8.W, literal.x, BS:VEC_102/SCL_221
4692; EG-NEXT:     OR_INT * T9.W, PV.X, PV.Y,
4693; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4694; EG-NEXT:     SETGE_UINT T0.X, PS, T3.W, BS:VEC_021/SCL_122
4695; EG-NEXT:     SETE_INT T1.Y, PV.W, T5.W,
4696; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T5.W,
4697; EG-NEXT:     SUBB_UINT T10.W, PS, T3.W, BS:VEC_021/SCL_122
4698; EG-NEXT:     SUB_INT * T5.W, PV.W, T5.W,
4699; EG-NEXT:     SUBB_UINT * T2.X, T7.W, T2.W,
4700; EG-NEXT:     SUB_INT * T2.Y, T6.W, T4.W,
4701; EG-NEXT:     SUB_INT * T2.Z, T9.W, T3.W,
4702; EG-NEXT:     SUB_INT T3.W, T5.W, T10.W,
4703; EG-NEXT:     CNDE_INT * T4.W, T1.Y, T1.Z, T0.X,
4704; EG-NEXT:     CNDE_INT T0.X, PS, T8.W, PV.W,
4705; EG-NEXT:     CNDE_INT * T1.Y, PS, T9.W, T2.Z, BS:VEC_021/SCL_122
4706; EG-NEXT:     SUB_INT T1.Z, T7.W, T2.W,
4707; EG-NEXT:     SUB_INT T2.W, T2.Y, T2.X,
4708; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T0.Z, T1.X,
4709; EG-NEXT:     CNDE_INT T0.Y, PS, T6.W, PV.W, BS:VEC_021/SCL_122
4710; EG-NEXT:     CNDE_INT T0.Z, PS, T7.W, PV.Z, BS:VEC_102/SCL_221
4711; EG-NEXT:     XOR_INT T2.W, T1.Y, T0.W,
4712; EG-NEXT:     XOR_INT * T3.W, T0.X, T0.W,
4713; EG-NEXT:     SUB_INT T1.Y, PS, T0.W, BS:VEC_021/SCL_122
4714; EG-NEXT:     SUBB_UINT T1.Z, PV.W, T0.W, BS:VEC_021/SCL_122
4715; EG-NEXT:     XOR_INT T3.W, PV.Z, T1.W,
4716; EG-NEXT:     XOR_INT * T4.W, PV.Y, T1.W,
4717; EG-NEXT:     SUB_INT T0.Z, PS, T1.W,
4718; EG-NEXT:     SUBB_UINT T4.W, PV.W, T1.W,
4719; EG-NEXT:     SUB_INT * T5.W, PV.Y, PV.Z,
4720; EG-NEXT:     SUB_INT T5.Y, PV.Z, PV.W,
4721; EG-NEXT:     SUB_INT * T5.Z, T2.W, T0.W,
4722; EG-NEXT:     SUB_INT T5.X, T3.W, T1.W,
4723; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
4724; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4725  %den_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
4726  %num = load <2 x i64>, ptr addrspace(1) %in
4727  %den = load <2 x i64>, ptr addrspace(1) %den_ptr
4728  %result = srem <2 x i64> %num, %den
4729  store <2 x i64> %result, ptr addrspace(1) %out
4730  ret void
4731}
4732
4733define amdgpu_kernel void @srem_v2i64_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
4734; GCN-LABEL: srem_v2i64_4:
4735; GCN:       ; %bb.0:
4736; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4737; GCN-NEXT:    v_mov_b32_e32 v4, 0
4738; GCN-NEXT:    s_waitcnt lgkmcnt(0)
4739; GCN-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
4740; GCN-NEXT:    s_waitcnt vmcnt(0)
4741; GCN-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
4742; GCN-NEXT:    v_lshrrev_b32_e32 v5, 30, v5
4743; GCN-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
4744; GCN-NEXT:    v_add_co_u32_e32 v5, vcc, v0, v5
4745; GCN-NEXT:    v_lshrrev_b32_e32 v6, 30, v6
4746; GCN-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
4747; GCN-NEXT:    v_add_co_u32_e32 v6, vcc, v2, v6
4748; GCN-NEXT:    v_addc_co_u32_e32 v8, vcc, 0, v3, vcc
4749; GCN-NEXT:    v_and_b32_e32 v5, -4, v5
4750; GCN-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v5
4751; GCN-NEXT:    v_and_b32_e32 v6, -4, v6
4752; GCN-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v7, vcc
4753; GCN-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v6
4754; GCN-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v8, vcc
4755; GCN-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
4756; GCN-NEXT:    s_endpgm
4757;
4758; TAHITI-LABEL: srem_v2i64_4:
4759; TAHITI:       ; %bb.0:
4760; TAHITI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4761; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
4762; TAHITI-NEXT:    s_mov_b32 s6, -1
4763; TAHITI-NEXT:    s_mov_b32 s10, s6
4764; TAHITI-NEXT:    s_mov_b32 s11, s7
4765; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
4766; TAHITI-NEXT:    s_mov_b32 s8, s2
4767; TAHITI-NEXT:    s_mov_b32 s9, s3
4768; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4769; TAHITI-NEXT:    s_mov_b32 s4, s0
4770; TAHITI-NEXT:    s_mov_b32 s5, s1
4771; TAHITI-NEXT:    s_waitcnt vmcnt(0)
4772; TAHITI-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
4773; TAHITI-NEXT:    v_lshrrev_b32_e32 v4, 30, v4
4774; TAHITI-NEXT:    v_ashrrev_i32_e32 v5, 31, v3
4775; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v0, v4
4776; TAHITI-NEXT:    v_lshrrev_b32_e32 v5, 30, v5
4777; TAHITI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
4778; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v2, v5
4779; TAHITI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v3, vcc
4780; TAHITI-NEXT:    v_and_b32_e32 v4, -4, v4
4781; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
4782; TAHITI-NEXT:    v_and_b32_e32 v5, -4, v5
4783; TAHITI-NEXT:    v_subb_u32_e32 v1, vcc, v1, v6, vcc
4784; TAHITI-NEXT:    v_sub_i32_e32 v2, vcc, v2, v5
4785; TAHITI-NEXT:    v_subb_u32_e32 v3, vcc, v3, v7, vcc
4786; TAHITI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4787; TAHITI-NEXT:    s_endpgm
4788;
4789; TONGA-LABEL: srem_v2i64_4:
4790; TONGA:       ; %bb.0:
4791; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4792; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
4793; TONGA-NEXT:    v_mov_b32_e32 v0, s2
4794; TONGA-NEXT:    v_mov_b32_e32 v1, s3
4795; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
4796; TONGA-NEXT:    v_mov_b32_e32 v4, s0
4797; TONGA-NEXT:    v_mov_b32_e32 v5, s1
4798; TONGA-NEXT:    s_waitcnt vmcnt(0)
4799; TONGA-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
4800; TONGA-NEXT:    v_lshrrev_b32_e32 v6, 30, v6
4801; TONGA-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
4802; TONGA-NEXT:    v_add_u32_e32 v6, vcc, v0, v6
4803; TONGA-NEXT:    v_lshrrev_b32_e32 v7, 30, v7
4804; TONGA-NEXT:    v_addc_u32_e32 v8, vcc, 0, v1, vcc
4805; TONGA-NEXT:    v_add_u32_e32 v7, vcc, v2, v7
4806; TONGA-NEXT:    v_addc_u32_e32 v9, vcc, 0, v3, vcc
4807; TONGA-NEXT:    v_and_b32_e32 v6, -4, v6
4808; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v6
4809; TONGA-NEXT:    v_and_b32_e32 v7, -4, v7
4810; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v1, v8, vcc
4811; TONGA-NEXT:    v_sub_u32_e32 v2, vcc, v2, v7
4812; TONGA-NEXT:    v_subb_u32_e32 v3, vcc, v3, v9, vcc
4813; TONGA-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
4814; TONGA-NEXT:    s_endpgm
4815;
4816; EG-LABEL: srem_v2i64_4:
4817; EG:       ; %bb.0:
4818; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
4819; EG-NEXT:    TEX 0 @6
4820; EG-NEXT:    ALU 24, @9, KC0[CB0:0-32], KC1[]
4821; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
4822; EG-NEXT:    CF_END
4823; EG-NEXT:    PAD
4824; EG-NEXT:    Fetch clause starting at 6:
4825; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
4826; EG-NEXT:    ALU clause starting at 8:
4827; EG-NEXT:     MOV * T0.X, KC0[2].Z,
4828; EG-NEXT:    ALU clause starting at 9:
4829; EG-NEXT:     ASHR T0.W, T0.W, literal.x,
4830; EG-NEXT:     ASHR * T1.W, T0.Y, literal.x,
4831; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
4832; EG-NEXT:     LSHR * T0.W, PV.W, literal.x,
4833; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
4834; EG-NEXT:     ADD_INT T2.W, T0.Z, PV.W,
4835; EG-NEXT:     LSHR * T1.W, T1.W, literal.x,
4836; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
4837; EG-NEXT:     ADD_INT T1.Z, T0.X, PS,
4838; EG-NEXT:     ADDC_UINT T0.W, T0.Z, T0.W,
4839; EG-NEXT:     AND_INT * T2.W, PV.W, literal.x,
4840; EG-NEXT:    -4(nan), 0(0.000000e+00)
4841; EG-NEXT:     SUBB_UINT T0.Y, T0.Z, PS,
4842; EG-NEXT:     BFE_INT T2.Z, PV.W, 0.0, 1,
4843; EG-NEXT:     ADDC_UINT T0.W, T0.X, T1.W,
4844; EG-NEXT:     AND_INT * T1.W, PV.Z, literal.x,
4845; EG-NEXT:    -4(nan), 0(0.000000e+00)
4846; EG-NEXT:     SUBB_UINT T1.Z, T0.X, PS,
4847; EG-NEXT:     BFE_INT T0.W, PV.W, 0.0, 1,
4848; EG-NEXT:     SUB_INT * T3.W, PV.Z, PV.Y,
4849; EG-NEXT:     SUB_INT T3.Y, PV.W, PV.Z,
4850; EG-NEXT:     SUB_INT * T3.Z, T0.Z, T2.W,
4851; EG-NEXT:     SUB_INT T3.X, T0.X, T1.W,
4852; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
4853; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
4854  %num = load <2 x i64>, ptr addrspace(1) %in
4855  %result = srem <2 x i64> %num, <i64 4, i64 4>
4856  store <2 x i64> %result, ptr addrspace(1) %out
4857  ret void
4858}
4859
4860define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
4861; GCN-LABEL: srem_v4i64:
4862; GCN:       ; %bb.0:
4863; GCN-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
4864; GCN-NEXT:    v_mov_b32_e32 v8, 0
4865; GCN-NEXT:    s_waitcnt lgkmcnt(0)
4866; GCN-NEXT:    global_load_dwordx4 v[10:13], v8, s[10:11] offset:32
4867; GCN-NEXT:    global_load_dwordx4 v[14:17], v8, s[10:11]
4868; GCN-NEXT:    global_load_dwordx4 v[0:3], v8, s[10:11] offset:48
4869; GCN-NEXT:    global_load_dwordx4 v[4:7], v8, s[10:11] offset:16
4870; GCN-NEXT:    s_waitcnt vmcnt(3)
4871; GCN-NEXT:    v_readfirstlane_b32 s5, v11
4872; GCN-NEXT:    v_readfirstlane_b32 s4, v10
4873; GCN-NEXT:    s_waitcnt vmcnt(2)
4874; GCN-NEXT:    v_readfirstlane_b32 s7, v15
4875; GCN-NEXT:    v_readfirstlane_b32 s6, v14
4876; GCN-NEXT:    s_or_b64 s[0:1], s[6:7], s[4:5]
4877; GCN-NEXT:    s_mov_b32 s0, 0
4878; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
4879; GCN-NEXT:    s_cbranch_scc0 .LBB12_13
4880; GCN-NEXT:  ; %bb.1:
4881; GCN-NEXT:    s_ashr_i32 s0, s5, 31
4882; GCN-NEXT:    s_add_u32 s2, s4, s0
4883; GCN-NEXT:    s_mov_b32 s1, s0
4884; GCN-NEXT:    s_addc_u32 s3, s5, s0
4885; GCN-NEXT:    s_xor_b64 s[12:13], s[2:3], s[0:1]
4886; GCN-NEXT:    v_cvt_f32_u32_e32 v8, s12
4887; GCN-NEXT:    v_cvt_f32_u32_e32 v9, s13
4888; GCN-NEXT:    s_sub_u32 s0, 0, s12
4889; GCN-NEXT:    s_subb_u32 s1, 0, s13
4890; GCN-NEXT:    v_madmk_f32 v8, v9, 0x4f800000, v8
4891; GCN-NEXT:    v_rcp_f32_e32 v8, v8
4892; GCN-NEXT:    v_mul_f32_e32 v8, 0x5f7ffffc, v8
4893; GCN-NEXT:    v_mul_f32_e32 v9, 0x2f800000, v8
4894; GCN-NEXT:    v_trunc_f32_e32 v9, v9
4895; GCN-NEXT:    v_madmk_f32 v8, v9, 0xcf800000, v8
4896; GCN-NEXT:    v_cvt_u32_f32_e32 v9, v9
4897; GCN-NEXT:    v_cvt_u32_f32_e32 v8, v8
4898; GCN-NEXT:    v_readfirstlane_b32 s2, v9
4899; GCN-NEXT:    v_readfirstlane_b32 s3, v8
4900; GCN-NEXT:    s_mul_i32 s5, s0, s2
4901; GCN-NEXT:    s_mul_hi_u32 s15, s0, s3
4902; GCN-NEXT:    s_mul_i32 s14, s1, s3
4903; GCN-NEXT:    s_add_i32 s5, s15, s5
4904; GCN-NEXT:    s_add_i32 s5, s5, s14
4905; GCN-NEXT:    s_mul_i32 s16, s0, s3
4906; GCN-NEXT:    s_mul_hi_u32 s14, s3, s5
4907; GCN-NEXT:    s_mul_i32 s15, s3, s5
4908; GCN-NEXT:    s_mul_hi_u32 s3, s3, s16
4909; GCN-NEXT:    s_add_u32 s3, s3, s15
4910; GCN-NEXT:    s_addc_u32 s14, 0, s14
4911; GCN-NEXT:    s_mul_hi_u32 s17, s2, s16
4912; GCN-NEXT:    s_mul_i32 s16, s2, s16
4913; GCN-NEXT:    s_add_u32 s3, s3, s16
4914; GCN-NEXT:    s_mul_hi_u32 s15, s2, s5
4915; GCN-NEXT:    s_addc_u32 s3, s14, s17
4916; GCN-NEXT:    s_addc_u32 s14, s15, 0
4917; GCN-NEXT:    s_mul_i32 s5, s2, s5
4918; GCN-NEXT:    s_add_u32 s3, s3, s5
4919; GCN-NEXT:    s_addc_u32 s5, 0, s14
4920; GCN-NEXT:    v_add_co_u32_e32 v8, vcc, s3, v8
4921; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
4922; GCN-NEXT:    s_addc_u32 s2, s2, s5
4923; GCN-NEXT:    v_readfirstlane_b32 s5, v8
4924; GCN-NEXT:    s_mul_i32 s3, s0, s2
4925; GCN-NEXT:    s_mul_hi_u32 s14, s0, s5
4926; GCN-NEXT:    s_add_i32 s3, s14, s3
4927; GCN-NEXT:    s_mul_i32 s1, s1, s5
4928; GCN-NEXT:    s_add_i32 s3, s3, s1
4929; GCN-NEXT:    s_mul_i32 s0, s0, s5
4930; GCN-NEXT:    s_mul_hi_u32 s14, s2, s0
4931; GCN-NEXT:    s_mul_i32 s15, s2, s0
4932; GCN-NEXT:    s_mul_i32 s17, s5, s3
4933; GCN-NEXT:    s_mul_hi_u32 s0, s5, s0
4934; GCN-NEXT:    s_mul_hi_u32 s16, s5, s3
4935; GCN-NEXT:    s_add_u32 s0, s0, s17
4936; GCN-NEXT:    s_addc_u32 s5, 0, s16
4937; GCN-NEXT:    s_add_u32 s0, s0, s15
4938; GCN-NEXT:    s_mul_hi_u32 s1, s2, s3
4939; GCN-NEXT:    s_addc_u32 s0, s5, s14
4940; GCN-NEXT:    s_addc_u32 s1, s1, 0
4941; GCN-NEXT:    s_mul_i32 s3, s2, s3
4942; GCN-NEXT:    s_add_u32 s0, s0, s3
4943; GCN-NEXT:    s_addc_u32 s1, 0, s1
4944; GCN-NEXT:    v_add_co_u32_e32 v8, vcc, s0, v8
4945; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
4946; GCN-NEXT:    s_addc_u32 s2, s2, s1
4947; GCN-NEXT:    s_ashr_i32 s14, s7, 31
4948; GCN-NEXT:    s_add_u32 s0, s6, s14
4949; GCN-NEXT:    s_mov_b32 s15, s14
4950; GCN-NEXT:    s_addc_u32 s1, s7, s14
4951; GCN-NEXT:    s_xor_b64 s[16:17], s[0:1], s[14:15]
4952; GCN-NEXT:    v_readfirstlane_b32 s3, v8
4953; GCN-NEXT:    s_mul_i32 s1, s16, s2
4954; GCN-NEXT:    s_mul_hi_u32 s5, s16, s3
4955; GCN-NEXT:    s_mul_hi_u32 s0, s16, s2
4956; GCN-NEXT:    s_add_u32 s1, s5, s1
4957; GCN-NEXT:    s_addc_u32 s0, 0, s0
4958; GCN-NEXT:    s_mul_hi_u32 s7, s17, s3
4959; GCN-NEXT:    s_mul_i32 s3, s17, s3
4960; GCN-NEXT:    s_add_u32 s1, s1, s3
4961; GCN-NEXT:    s_mul_hi_u32 s5, s17, s2
4962; GCN-NEXT:    s_addc_u32 s0, s0, s7
4963; GCN-NEXT:    s_addc_u32 s1, s5, 0
4964; GCN-NEXT:    s_mul_i32 s2, s17, s2
4965; GCN-NEXT:    s_add_u32 s0, s0, s2
4966; GCN-NEXT:    s_addc_u32 s1, 0, s1
4967; GCN-NEXT:    s_mul_i32 s1, s12, s1
4968; GCN-NEXT:    s_mul_hi_u32 s2, s12, s0
4969; GCN-NEXT:    s_add_i32 s1, s2, s1
4970; GCN-NEXT:    s_mul_i32 s2, s13, s0
4971; GCN-NEXT:    s_mul_i32 s0, s12, s0
4972; GCN-NEXT:    s_add_i32 s5, s1, s2
4973; GCN-NEXT:    v_mov_b32_e32 v8, s0
4974; GCN-NEXT:    s_sub_i32 s1, s17, s5
4975; GCN-NEXT:    v_sub_co_u32_e32 v8, vcc, s16, v8
4976; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
4977; GCN-NEXT:    s_subb_u32 s7, s1, s13
4978; GCN-NEXT:    v_subrev_co_u32_e64 v9, s[0:1], s12, v8
4979; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
4980; GCN-NEXT:    s_subb_u32 s15, s7, 0
4981; GCN-NEXT:    s_cmp_ge_u32 s15, s13
4982; GCN-NEXT:    s_cselect_b32 s16, -1, 0
4983; GCN-NEXT:    v_cmp_le_u32_e64 s[2:3], s12, v9
4984; GCN-NEXT:    s_cmp_eq_u32 s15, s13
4985; GCN-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[2:3]
4986; GCN-NEXT:    v_mov_b32_e32 v11, s16
4987; GCN-NEXT:    s_cselect_b64 s[2:3], -1, 0
4988; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
4989; GCN-NEXT:    v_cndmask_b32_e64 v10, v11, v10, s[2:3]
4990; GCN-NEXT:    s_subb_u32 s2, s7, s13
4991; GCN-NEXT:    v_subrev_co_u32_e64 v11, s[0:1], s12, v9
4992; GCN-NEXT:    s_cmp_lg_u64 s[0:1], 0
4993; GCN-NEXT:    s_subb_u32 s2, s2, 0
4994; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
4995; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[0:1]
4996; GCN-NEXT:    v_mov_b32_e32 v10, s15
4997; GCN-NEXT:    v_mov_b32_e32 v11, s2
4998; GCN-NEXT:    s_cmp_lg_u64 vcc, 0
4999; GCN-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[0:1]
5000; GCN-NEXT:    s_subb_u32 s0, s17, s5
5001; GCN-NEXT:    s_cmp_ge_u32 s0, s13
5002; GCN-NEXT:    s_cselect_b32 s1, -1, 0
5003; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s12, v8
5004; GCN-NEXT:    s_cmp_eq_u32 s0, s13
5005; GCN-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
5006; GCN-NEXT:    v_mov_b32_e32 v14, s1
5007; GCN-NEXT:    s_cselect_b64 vcc, -1, 0
5008; GCN-NEXT:    v_cndmask_b32_e32 v11, v14, v11, vcc
5009; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
5010; GCN-NEXT:    v_mov_b32_e32 v14, s0
5011; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
5012; GCN-NEXT:    v_cndmask_b32_e32 v10, v14, v10, vcc
5013; GCN-NEXT:    v_xor_b32_e32 v8, s14, v8
5014; GCN-NEXT:    v_xor_b32_e32 v9, s14, v10
5015; GCN-NEXT:    v_mov_b32_e32 v10, s14
5016; GCN-NEXT:    v_subrev_co_u32_e32 v8, vcc, s14, v8
5017; GCN-NEXT:    v_subb_co_u32_e32 v9, vcc, v9, v10, vcc
5018; GCN-NEXT:    s_cbranch_execnz .LBB12_3
5019; GCN-NEXT:  .LBB12_2:
5020; GCN-NEXT:    v_cvt_f32_u32_e32 v8, s4
5021; GCN-NEXT:    s_sub_i32 s0, 0, s4
5022; GCN-NEXT:    s_mov_b32 s1, 0
5023; GCN-NEXT:    v_rcp_iflag_f32_e32 v8, v8
5024; GCN-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
5025; GCN-NEXT:    v_cvt_u32_f32_e32 v8, v8
5026; GCN-NEXT:    v_readfirstlane_b32 s2, v8
5027; GCN-NEXT:    s_mul_i32 s0, s0, s2
5028; GCN-NEXT:    s_mul_hi_u32 s0, s2, s0
5029; GCN-NEXT:    s_add_i32 s2, s2, s0
5030; GCN-NEXT:    s_mul_hi_u32 s0, s6, s2
5031; GCN-NEXT:    s_mul_i32 s0, s0, s4
5032; GCN-NEXT:    s_sub_i32 s0, s6, s0
5033; GCN-NEXT:    s_sub_i32 s2, s0, s4
5034; GCN-NEXT:    s_cmp_ge_u32 s0, s4
5035; GCN-NEXT:    s_cselect_b32 s0, s2, s0
5036; GCN-NEXT:    s_sub_i32 s2, s0, s4
5037; GCN-NEXT:    s_cmp_ge_u32 s0, s4
5038; GCN-NEXT:    s_cselect_b32 s0, s2, s0
5039; GCN-NEXT:    v_mov_b32_e32 v9, s1
5040; GCN-NEXT:    v_mov_b32_e32 v8, s0
5041; GCN-NEXT:  .LBB12_3:
5042; GCN-NEXT:    v_or_b32_e32 v11, v17, v13
5043; GCN-NEXT:    v_mov_b32_e32 v10, 0
5044; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
5045; GCN-NEXT:    s_cbranch_vccz .LBB12_14
5046; GCN-NEXT:  ; %bb.4:
5047; GCN-NEXT:    v_ashrrev_i32_e32 v10, 31, v13
5048; GCN-NEXT:    v_add_co_u32_e32 v11, vcc, v12, v10
5049; GCN-NEXT:    v_addc_co_u32_e32 v13, vcc, v13, v10, vcc
5050; GCN-NEXT:    v_xor_b32_e32 v11, v11, v10
5051; GCN-NEXT:    v_xor_b32_e32 v10, v13, v10
5052; GCN-NEXT:    v_cvt_f32_u32_e32 v13, v11
5053; GCN-NEXT:    v_cvt_f32_u32_e32 v14, v10
5054; GCN-NEXT:    v_sub_co_u32_e32 v15, vcc, 0, v11
5055; GCN-NEXT:    v_subb_co_u32_e32 v18, vcc, 0, v10, vcc
5056; GCN-NEXT:    v_madmk_f32 v13, v14, 0x4f800000, v13
5057; GCN-NEXT:    v_rcp_f32_e32 v13, v13
5058; GCN-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
5059; GCN-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v13
5060; GCN-NEXT:    v_trunc_f32_e32 v14, v14
5061; GCN-NEXT:    v_madmk_f32 v13, v14, 0xcf800000, v13
5062; GCN-NEXT:    v_cvt_u32_f32_e32 v14, v14
5063; GCN-NEXT:    v_cvt_u32_f32_e32 v13, v13
5064; GCN-NEXT:    v_mul_lo_u32 v20, v15, v14
5065; GCN-NEXT:    v_mul_hi_u32 v19, v15, v13
5066; GCN-NEXT:    v_mul_lo_u32 v21, v18, v13
5067; GCN-NEXT:    v_mul_lo_u32 v22, v15, v13
5068; GCN-NEXT:    v_add_u32_e32 v19, v19, v20
5069; GCN-NEXT:    v_add_u32_e32 v19, v19, v21
5070; GCN-NEXT:    v_mul_lo_u32 v20, v13, v19
5071; GCN-NEXT:    v_mul_hi_u32 v21, v13, v22
5072; GCN-NEXT:    v_mul_hi_u32 v23, v13, v19
5073; GCN-NEXT:    v_mul_hi_u32 v24, v14, v19
5074; GCN-NEXT:    v_mul_lo_u32 v19, v14, v19
5075; GCN-NEXT:    v_add_co_u32_e32 v20, vcc, v21, v20
5076; GCN-NEXT:    v_addc_co_u32_e32 v21, vcc, 0, v23, vcc
5077; GCN-NEXT:    v_mul_lo_u32 v23, v14, v22
5078; GCN-NEXT:    v_mul_hi_u32 v22, v14, v22
5079; GCN-NEXT:    v_add_co_u32_e32 v20, vcc, v20, v23
5080; GCN-NEXT:    v_addc_co_u32_e32 v20, vcc, v21, v22, vcc
5081; GCN-NEXT:    v_addc_co_u32_e32 v21, vcc, 0, v24, vcc
5082; GCN-NEXT:    v_add_co_u32_e32 v19, vcc, v20, v19
5083; GCN-NEXT:    v_addc_co_u32_e32 v20, vcc, 0, v21, vcc
5084; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v19
5085; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v20, vcc
5086; GCN-NEXT:    v_mul_lo_u32 v19, v15, v14
5087; GCN-NEXT:    v_mul_hi_u32 v20, v15, v13
5088; GCN-NEXT:    v_mul_lo_u32 v18, v18, v13
5089; GCN-NEXT:    v_mul_lo_u32 v15, v15, v13
5090; GCN-NEXT:    v_add_u32_e32 v19, v20, v19
5091; GCN-NEXT:    v_add_u32_e32 v18, v19, v18
5092; GCN-NEXT:    v_mul_lo_u32 v21, v13, v18
5093; GCN-NEXT:    v_mul_hi_u32 v22, v13, v15
5094; GCN-NEXT:    v_mul_hi_u32 v23, v13, v18
5095; GCN-NEXT:    v_mul_hi_u32 v20, v14, v15
5096; GCN-NEXT:    v_mul_lo_u32 v15, v14, v15
5097; GCN-NEXT:    v_mul_hi_u32 v19, v14, v18
5098; GCN-NEXT:    v_add_co_u32_e32 v21, vcc, v22, v21
5099; GCN-NEXT:    v_addc_co_u32_e32 v22, vcc, 0, v23, vcc
5100; GCN-NEXT:    v_mul_lo_u32 v18, v14, v18
5101; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v21, v15
5102; GCN-NEXT:    v_addc_co_u32_e32 v15, vcc, v22, v20, vcc
5103; GCN-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v19, vcc
5104; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v15, v18
5105; GCN-NEXT:    v_addc_co_u32_e32 v18, vcc, 0, v19, vcc
5106; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v15
5107; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v18, vcc
5108; GCN-NEXT:    v_ashrrev_i32_e32 v15, 31, v17
5109; GCN-NEXT:    v_add_co_u32_e32 v18, vcc, v16, v15
5110; GCN-NEXT:    v_xor_b32_e32 v18, v18, v15
5111; GCN-NEXT:    v_mul_lo_u32 v19, v18, v14
5112; GCN-NEXT:    v_mul_hi_u32 v20, v18, v13
5113; GCN-NEXT:    v_mul_hi_u32 v21, v18, v14
5114; GCN-NEXT:    v_addc_co_u32_e32 v17, vcc, v17, v15, vcc
5115; GCN-NEXT:    v_xor_b32_e32 v17, v17, v15
5116; GCN-NEXT:    v_add_co_u32_e32 v19, vcc, v20, v19
5117; GCN-NEXT:    v_addc_co_u32_e32 v20, vcc, 0, v21, vcc
5118; GCN-NEXT:    v_mul_lo_u32 v21, v17, v13
5119; GCN-NEXT:    v_mul_hi_u32 v13, v17, v13
5120; GCN-NEXT:    v_mul_hi_u32 v22, v17, v14
5121; GCN-NEXT:    v_mul_lo_u32 v14, v17, v14
5122; GCN-NEXT:    v_add_co_u32_e32 v19, vcc, v19, v21
5123; GCN-NEXT:    v_addc_co_u32_e32 v13, vcc, v20, v13, vcc
5124; GCN-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v22, vcc
5125; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v14
5126; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v19, vcc
5127; GCN-NEXT:    v_mul_lo_u32 v14, v11, v14
5128; GCN-NEXT:    v_mul_hi_u32 v19, v11, v13
5129; GCN-NEXT:    v_mul_lo_u32 v20, v10, v13
5130; GCN-NEXT:    v_mul_lo_u32 v13, v11, v13
5131; GCN-NEXT:    v_add_u32_e32 v14, v19, v14
5132; GCN-NEXT:    v_add_u32_e32 v14, v14, v20
5133; GCN-NEXT:    v_sub_u32_e32 v19, v17, v14
5134; GCN-NEXT:    v_sub_co_u32_e32 v13, vcc, v18, v13
5135; GCN-NEXT:    v_subb_co_u32_e64 v18, s[0:1], v19, v10, vcc
5136; GCN-NEXT:    v_sub_co_u32_e64 v19, s[0:1], v13, v11
5137; GCN-NEXT:    v_subbrev_co_u32_e64 v20, s[2:3], 0, v18, s[0:1]
5138; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v20, v10
5139; GCN-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s[2:3]
5140; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v19, v11
5141; GCN-NEXT:    v_subb_co_u32_e32 v14, vcc, v17, v14, vcc
5142; GCN-NEXT:    v_cndmask_b32_e64 v22, 0, -1, s[2:3]
5143; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], v20, v10
5144; GCN-NEXT:    v_subb_co_u32_e64 v18, s[0:1], v18, v10, s[0:1]
5145; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v14, v10
5146; GCN-NEXT:    v_cndmask_b32_e64 v21, v21, v22, s[2:3]
5147; GCN-NEXT:    v_sub_co_u32_e64 v22, s[0:1], v19, v11
5148; GCN-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
5149; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v11
5150; GCN-NEXT:    v_subbrev_co_u32_e64 v18, s[0:1], 0, v18, s[0:1]
5151; GCN-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
5152; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v14, v10
5153; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v21
5154; GCN-NEXT:    v_cndmask_b32_e32 v10, v17, v11, vcc
5155; GCN-NEXT:    v_cndmask_b32_e64 v19, v19, v22, s[0:1]
5156; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
5157; GCN-NEXT:    v_cndmask_b32_e64 v18, v20, v18, s[0:1]
5158; GCN-NEXT:    v_cndmask_b32_e32 v11, v13, v19, vcc
5159; GCN-NEXT:    v_cndmask_b32_e32 v10, v14, v18, vcc
5160; GCN-NEXT:    v_xor_b32_e32 v11, v11, v15
5161; GCN-NEXT:    v_xor_b32_e32 v13, v10, v15
5162; GCN-NEXT:    v_sub_co_u32_e32 v10, vcc, v11, v15
5163; GCN-NEXT:    v_subb_co_u32_e32 v11, vcc, v13, v15, vcc
5164; GCN-NEXT:    s_cbranch_execnz .LBB12_6
5165; GCN-NEXT:  .LBB12_5:
5166; GCN-NEXT:    v_cvt_f32_u32_e32 v10, v12
5167; GCN-NEXT:    v_sub_u32_e32 v11, 0, v12
5168; GCN-NEXT:    v_rcp_iflag_f32_e32 v10, v10
5169; GCN-NEXT:    v_mul_f32_e32 v10, 0x4f7ffffe, v10
5170; GCN-NEXT:    v_cvt_u32_f32_e32 v10, v10
5171; GCN-NEXT:    v_mul_lo_u32 v11, v11, v10
5172; GCN-NEXT:    v_mul_hi_u32 v11, v10, v11
5173; GCN-NEXT:    v_add_u32_e32 v10, v10, v11
5174; GCN-NEXT:    v_mul_hi_u32 v10, v16, v10
5175; GCN-NEXT:    v_mul_lo_u32 v10, v10, v12
5176; GCN-NEXT:    v_sub_u32_e32 v10, v16, v10
5177; GCN-NEXT:    v_sub_u32_e32 v11, v10, v12
5178; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
5179; GCN-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
5180; GCN-NEXT:    v_sub_u32_e32 v11, v10, v12
5181; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
5182; GCN-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
5183; GCN-NEXT:    v_mov_b32_e32 v11, 0
5184; GCN-NEXT:  .LBB12_6:
5185; GCN-NEXT:    s_waitcnt vmcnt(0)
5186; GCN-NEXT:    v_or_b32_e32 v13, v5, v1
5187; GCN-NEXT:    v_mov_b32_e32 v12, 0
5188; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
5189; GCN-NEXT:    s_cbranch_vccz .LBB12_15
5190; GCN-NEXT:  ; %bb.7:
5191; GCN-NEXT:    v_ashrrev_i32_e32 v13, 31, v1
5192; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, v0, v13
5193; GCN-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v13, vcc
5194; GCN-NEXT:    v_xor_b32_e32 v12, v12, v13
5195; GCN-NEXT:    v_xor_b32_e32 v1, v1, v13
5196; GCN-NEXT:    v_cvt_f32_u32_e32 v13, v12
5197; GCN-NEXT:    v_cvt_f32_u32_e32 v14, v1
5198; GCN-NEXT:    v_sub_co_u32_e32 v15, vcc, 0, v12
5199; GCN-NEXT:    v_subb_co_u32_e32 v16, vcc, 0, v1, vcc
5200; GCN-NEXT:    v_madmk_f32 v13, v14, 0x4f800000, v13
5201; GCN-NEXT:    v_rcp_f32_e32 v13, v13
5202; GCN-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
5203; GCN-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v13
5204; GCN-NEXT:    v_trunc_f32_e32 v14, v14
5205; GCN-NEXT:    v_madmk_f32 v13, v14, 0xcf800000, v13
5206; GCN-NEXT:    v_cvt_u32_f32_e32 v14, v14
5207; GCN-NEXT:    v_cvt_u32_f32_e32 v13, v13
5208; GCN-NEXT:    v_mul_lo_u32 v18, v15, v14
5209; GCN-NEXT:    v_mul_hi_u32 v17, v15, v13
5210; GCN-NEXT:    v_mul_lo_u32 v19, v16, v13
5211; GCN-NEXT:    v_mul_lo_u32 v20, v15, v13
5212; GCN-NEXT:    v_add_u32_e32 v17, v17, v18
5213; GCN-NEXT:    v_add_u32_e32 v17, v17, v19
5214; GCN-NEXT:    v_mul_lo_u32 v18, v13, v17
5215; GCN-NEXT:    v_mul_hi_u32 v19, v13, v20
5216; GCN-NEXT:    v_mul_hi_u32 v21, v13, v17
5217; GCN-NEXT:    v_mul_hi_u32 v22, v14, v17
5218; GCN-NEXT:    v_mul_lo_u32 v17, v14, v17
5219; GCN-NEXT:    v_add_co_u32_e32 v18, vcc, v19, v18
5220; GCN-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v21, vcc
5221; GCN-NEXT:    v_mul_lo_u32 v21, v14, v20
5222; GCN-NEXT:    v_mul_hi_u32 v20, v14, v20
5223; GCN-NEXT:    v_add_co_u32_e32 v18, vcc, v18, v21
5224; GCN-NEXT:    v_addc_co_u32_e32 v18, vcc, v19, v20, vcc
5225; GCN-NEXT:    v_addc_co_u32_e32 v19, vcc, 0, v22, vcc
5226; GCN-NEXT:    v_add_co_u32_e32 v17, vcc, v18, v17
5227; GCN-NEXT:    v_addc_co_u32_e32 v18, vcc, 0, v19, vcc
5228; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v17
5229; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v18, vcc
5230; GCN-NEXT:    v_mul_lo_u32 v17, v15, v14
5231; GCN-NEXT:    v_mul_hi_u32 v18, v15, v13
5232; GCN-NEXT:    v_mul_lo_u32 v16, v16, v13
5233; GCN-NEXT:    v_mul_lo_u32 v15, v15, v13
5234; GCN-NEXT:    v_add_u32_e32 v17, v18, v17
5235; GCN-NEXT:    v_add_u32_e32 v16, v17, v16
5236; GCN-NEXT:    v_mul_lo_u32 v19, v13, v16
5237; GCN-NEXT:    v_mul_hi_u32 v20, v13, v15
5238; GCN-NEXT:    v_mul_hi_u32 v21, v13, v16
5239; GCN-NEXT:    v_mul_hi_u32 v18, v14, v15
5240; GCN-NEXT:    v_mul_lo_u32 v15, v14, v15
5241; GCN-NEXT:    v_mul_hi_u32 v17, v14, v16
5242; GCN-NEXT:    v_add_co_u32_e32 v19, vcc, v20, v19
5243; GCN-NEXT:    v_addc_co_u32_e32 v20, vcc, 0, v21, vcc
5244; GCN-NEXT:    v_mul_lo_u32 v16, v14, v16
5245; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v19, v15
5246; GCN-NEXT:    v_addc_co_u32_e32 v15, vcc, v20, v18, vcc
5247; GCN-NEXT:    v_addc_co_u32_e32 v17, vcc, 0, v17, vcc
5248; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v15, v16
5249; GCN-NEXT:    v_addc_co_u32_e32 v16, vcc, 0, v17, vcc
5250; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v15
5251; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, v14, v16, vcc
5252; GCN-NEXT:    v_ashrrev_i32_e32 v15, 31, v5
5253; GCN-NEXT:    v_add_co_u32_e32 v16, vcc, v4, v15
5254; GCN-NEXT:    v_xor_b32_e32 v16, v16, v15
5255; GCN-NEXT:    v_mul_lo_u32 v17, v16, v14
5256; GCN-NEXT:    v_mul_hi_u32 v18, v16, v13
5257; GCN-NEXT:    v_mul_hi_u32 v19, v16, v14
5258; GCN-NEXT:    v_addc_co_u32_e32 v5, vcc, v5, v15, vcc
5259; GCN-NEXT:    v_xor_b32_e32 v5, v5, v15
5260; GCN-NEXT:    v_add_co_u32_e32 v17, vcc, v18, v17
5261; GCN-NEXT:    v_addc_co_u32_e32 v18, vcc, 0, v19, vcc
5262; GCN-NEXT:    v_mul_lo_u32 v19, v5, v13
5263; GCN-NEXT:    v_mul_hi_u32 v13, v5, v13
5264; GCN-NEXT:    v_mul_hi_u32 v20, v5, v14
5265; GCN-NEXT:    v_mul_lo_u32 v14, v5, v14
5266; GCN-NEXT:    v_add_co_u32_e32 v17, vcc, v17, v19
5267; GCN-NEXT:    v_addc_co_u32_e32 v13, vcc, v18, v13, vcc
5268; GCN-NEXT:    v_addc_co_u32_e32 v17, vcc, 0, v20, vcc
5269; GCN-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v14
5270; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v17, vcc
5271; GCN-NEXT:    v_mul_lo_u32 v14, v12, v14
5272; GCN-NEXT:    v_mul_hi_u32 v17, v12, v13
5273; GCN-NEXT:    v_mul_lo_u32 v18, v1, v13
5274; GCN-NEXT:    v_mul_lo_u32 v13, v12, v13
5275; GCN-NEXT:    v_add_u32_e32 v14, v17, v14
5276; GCN-NEXT:    v_add_u32_e32 v14, v14, v18
5277; GCN-NEXT:    v_sub_u32_e32 v17, v5, v14
5278; GCN-NEXT:    v_sub_co_u32_e32 v13, vcc, v16, v13
5279; GCN-NEXT:    v_subb_co_u32_e64 v16, s[0:1], v17, v1, vcc
5280; GCN-NEXT:    v_sub_co_u32_e64 v17, s[0:1], v13, v12
5281; GCN-NEXT:    v_subbrev_co_u32_e64 v18, s[2:3], 0, v16, s[0:1]
5282; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v18, v1
5283; GCN-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[2:3]
5284; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v17, v12
5285; GCN-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v14, vcc
5286; GCN-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[2:3]
5287; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], v18, v1
5288; GCN-NEXT:    v_subb_co_u32_e64 v16, s[0:1], v16, v1, s[0:1]
5289; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
5290; GCN-NEXT:    v_cndmask_b32_e64 v19, v19, v20, s[2:3]
5291; GCN-NEXT:    v_sub_co_u32_e64 v20, s[0:1], v17, v12
5292; GCN-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
5293; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v12
5294; GCN-NEXT:    v_subbrev_co_u32_e64 v16, s[0:1], 0, v16, s[0:1]
5295; GCN-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
5296; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v1
5297; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v19
5298; GCN-NEXT:    v_cndmask_b32_e32 v1, v14, v12, vcc
5299; GCN-NEXT:    v_cndmask_b32_e64 v17, v17, v20, s[0:1]
5300; GCN-NEXT:    v_cndmask_b32_e64 v16, v18, v16, s[0:1]
5301; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
5302; GCN-NEXT:    v_cndmask_b32_e32 v1, v5, v16, vcc
5303; GCN-NEXT:    v_cndmask_b32_e32 v5, v13, v17, vcc
5304; GCN-NEXT:    v_xor_b32_e32 v5, v5, v15
5305; GCN-NEXT:    v_xor_b32_e32 v1, v1, v15
5306; GCN-NEXT:    v_sub_co_u32_e32 v12, vcc, v5, v15
5307; GCN-NEXT:    v_subb_co_u32_e32 v13, vcc, v1, v15, vcc
5308; GCN-NEXT:    s_cbranch_execnz .LBB12_9
5309; GCN-NEXT:  .LBB12_8:
5310; GCN-NEXT:    v_cvt_f32_u32_e32 v1, v0
5311; GCN-NEXT:    v_sub_u32_e32 v5, 0, v0
5312; GCN-NEXT:    v_mov_b32_e32 v13, 0
5313; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v1
5314; GCN-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
5315; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
5316; GCN-NEXT:    v_mul_lo_u32 v5, v5, v1
5317; GCN-NEXT:    v_mul_hi_u32 v5, v1, v5
5318; GCN-NEXT:    v_add_u32_e32 v1, v1, v5
5319; GCN-NEXT:    v_mul_hi_u32 v1, v4, v1
5320; GCN-NEXT:    v_mul_lo_u32 v1, v1, v0
5321; GCN-NEXT:    v_sub_u32_e32 v1, v4, v1
5322; GCN-NEXT:    v_sub_u32_e32 v4, v1, v0
5323; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
5324; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
5325; GCN-NEXT:    v_sub_u32_e32 v4, v1, v0
5326; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
5327; GCN-NEXT:    v_cndmask_b32_e32 v12, v1, v4, vcc
5328; GCN-NEXT:  .LBB12_9:
5329; GCN-NEXT:    v_or_b32_e32 v1, v7, v3
5330; GCN-NEXT:    v_mov_b32_e32 v0, 0
5331; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
5332; GCN-NEXT:    s_cbranch_vccz .LBB12_16
5333; GCN-NEXT:  ; %bb.10:
5334; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
5335; GCN-NEXT:    v_add_co_u32_e32 v1, vcc, v2, v0
5336; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v0, vcc
5337; GCN-NEXT:    v_xor_b32_e32 v1, v1, v0
5338; GCN-NEXT:    v_xor_b32_e32 v0, v3, v0
5339; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v1
5340; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v0
5341; GCN-NEXT:    v_sub_co_u32_e32 v5, vcc, 0, v1
5342; GCN-NEXT:    v_subb_co_u32_e32 v14, vcc, 0, v0, vcc
5343; GCN-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
5344; GCN-NEXT:    v_rcp_f32_e32 v3, v3
5345; GCN-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
5346; GCN-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
5347; GCN-NEXT:    v_trunc_f32_e32 v4, v4
5348; GCN-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
5349; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
5350; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
5351; GCN-NEXT:    v_mul_lo_u32 v16, v5, v4
5352; GCN-NEXT:    v_mul_hi_u32 v15, v5, v3
5353; GCN-NEXT:    v_mul_lo_u32 v17, v14, v3
5354; GCN-NEXT:    v_mul_lo_u32 v18, v5, v3
5355; GCN-NEXT:    v_add_u32_e32 v15, v15, v16
5356; GCN-NEXT:    v_add_u32_e32 v15, v15, v17
5357; GCN-NEXT:    v_mul_lo_u32 v16, v3, v15
5358; GCN-NEXT:    v_mul_hi_u32 v17, v3, v18
5359; GCN-NEXT:    v_mul_hi_u32 v19, v3, v15
5360; GCN-NEXT:    v_mul_hi_u32 v20, v4, v15
5361; GCN-NEXT:    v_mul_lo_u32 v15, v4, v15
5362; GCN-NEXT:    v_add_co_u32_e32 v16, vcc, v17, v16
5363; GCN-NEXT:    v_addc_co_u32_e32 v17, vcc, 0, v19, vcc
5364; GCN-NEXT:    v_mul_lo_u32 v19, v4, v18
5365; GCN-NEXT:    v_mul_hi_u32 v18, v4, v18
5366; GCN-NEXT:    v_add_co_u32_e32 v16, vcc, v16, v19
5367; GCN-NEXT:    v_addc_co_u32_e32 v16, vcc, v17, v18, vcc
5368; GCN-NEXT:    v_addc_co_u32_e32 v17, vcc, 0, v20, vcc
5369; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v16, v15
5370; GCN-NEXT:    v_addc_co_u32_e32 v16, vcc, 0, v17, vcc
5371; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v15
5372; GCN-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v16, vcc
5373; GCN-NEXT:    v_mul_lo_u32 v15, v5, v4
5374; GCN-NEXT:    v_mul_hi_u32 v16, v5, v3
5375; GCN-NEXT:    v_mul_lo_u32 v14, v14, v3
5376; GCN-NEXT:    v_mul_lo_u32 v5, v5, v3
5377; GCN-NEXT:    v_add_u32_e32 v15, v16, v15
5378; GCN-NEXT:    v_add_u32_e32 v14, v15, v14
5379; GCN-NEXT:    v_mul_lo_u32 v17, v3, v14
5380; GCN-NEXT:    v_mul_hi_u32 v18, v3, v5
5381; GCN-NEXT:    v_mul_hi_u32 v19, v3, v14
5382; GCN-NEXT:    v_mul_hi_u32 v16, v4, v5
5383; GCN-NEXT:    v_mul_lo_u32 v5, v4, v5
5384; GCN-NEXT:    v_mul_hi_u32 v15, v4, v14
5385; GCN-NEXT:    v_add_co_u32_e32 v17, vcc, v18, v17
5386; GCN-NEXT:    v_addc_co_u32_e32 v18, vcc, 0, v19, vcc
5387; GCN-NEXT:    v_mul_lo_u32 v14, v4, v14
5388; GCN-NEXT:    v_add_co_u32_e32 v5, vcc, v17, v5
5389; GCN-NEXT:    v_addc_co_u32_e32 v5, vcc, v18, v16, vcc
5390; GCN-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v15, vcc
5391; GCN-NEXT:    v_add_co_u32_e32 v5, vcc, v5, v14
5392; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v15, vcc
5393; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v5
5394; GCN-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v14, vcc
5395; GCN-NEXT:    v_ashrrev_i32_e32 v5, 31, v7
5396; GCN-NEXT:    v_add_co_u32_e32 v14, vcc, v6, v5
5397; GCN-NEXT:    v_xor_b32_e32 v14, v14, v5
5398; GCN-NEXT:    v_mul_lo_u32 v15, v14, v4
5399; GCN-NEXT:    v_mul_hi_u32 v16, v14, v3
5400; GCN-NEXT:    v_mul_hi_u32 v17, v14, v4
5401; GCN-NEXT:    v_addc_co_u32_e32 v7, vcc, v7, v5, vcc
5402; GCN-NEXT:    v_xor_b32_e32 v7, v7, v5
5403; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v16, v15
5404; GCN-NEXT:    v_addc_co_u32_e32 v16, vcc, 0, v17, vcc
5405; GCN-NEXT:    v_mul_lo_u32 v17, v7, v3
5406; GCN-NEXT:    v_mul_hi_u32 v3, v7, v3
5407; GCN-NEXT:    v_mul_hi_u32 v18, v7, v4
5408; GCN-NEXT:    v_mul_lo_u32 v4, v7, v4
5409; GCN-NEXT:    v_add_co_u32_e32 v15, vcc, v15, v17
5410; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v16, v3, vcc
5411; GCN-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v18, vcc
5412; GCN-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v4
5413; GCN-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v15, vcc
5414; GCN-NEXT:    v_mul_lo_u32 v4, v1, v4
5415; GCN-NEXT:    v_mul_hi_u32 v15, v1, v3
5416; GCN-NEXT:    v_mul_lo_u32 v16, v0, v3
5417; GCN-NEXT:    v_mul_lo_u32 v3, v1, v3
5418; GCN-NEXT:    v_add_u32_e32 v4, v15, v4
5419; GCN-NEXT:    v_add_u32_e32 v4, v4, v16
5420; GCN-NEXT:    v_sub_u32_e32 v15, v7, v4
5421; GCN-NEXT:    v_sub_co_u32_e32 v3, vcc, v14, v3
5422; GCN-NEXT:    v_subb_co_u32_e64 v14, s[0:1], v15, v0, vcc
5423; GCN-NEXT:    v_sub_co_u32_e64 v15, s[0:1], v3, v1
5424; GCN-NEXT:    v_subbrev_co_u32_e64 v16, s[2:3], 0, v14, s[0:1]
5425; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v16, v0
5426; GCN-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[2:3]
5427; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v15, v1
5428; GCN-NEXT:    v_subb_co_u32_e32 v4, vcc, v7, v4, vcc
5429; GCN-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[2:3]
5430; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], v16, v0
5431; GCN-NEXT:    v_subb_co_u32_e64 v14, s[0:1], v14, v0, s[0:1]
5432; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v0
5433; GCN-NEXT:    v_cndmask_b32_e64 v17, v17, v18, s[2:3]
5434; GCN-NEXT:    v_sub_co_u32_e64 v18, s[0:1], v15, v1
5435; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
5436; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
5437; GCN-NEXT:    v_subbrev_co_u32_e64 v14, s[0:1], 0, v14, s[0:1]
5438; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
5439; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v0
5440; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v17
5441; GCN-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
5442; GCN-NEXT:    v_cndmask_b32_e64 v15, v15, v18, s[0:1]
5443; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
5444; GCN-NEXT:    v_cndmask_b32_e64 v14, v16, v14, s[0:1]
5445; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v15, vcc
5446; GCN-NEXT:    v_cndmask_b32_e32 v0, v4, v14, vcc
5447; GCN-NEXT:    v_xor_b32_e32 v1, v1, v5
5448; GCN-NEXT:    v_xor_b32_e32 v0, v0, v5
5449; GCN-NEXT:    v_sub_co_u32_e32 v14, vcc, v1, v5
5450; GCN-NEXT:    v_subb_co_u32_e32 v15, vcc, v0, v5, vcc
5451; GCN-NEXT:    s_cbranch_execnz .LBB12_12
5452; GCN-NEXT:  .LBB12_11:
5453; GCN-NEXT:    v_cvt_f32_u32_e32 v0, v2
5454; GCN-NEXT:    v_sub_u32_e32 v1, 0, v2
5455; GCN-NEXT:    v_mov_b32_e32 v15, 0
5456; GCN-NEXT:    v_rcp_iflag_f32_e32 v0, v0
5457; GCN-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
5458; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
5459; GCN-NEXT:    v_mul_lo_u32 v1, v1, v0
5460; GCN-NEXT:    v_mul_hi_u32 v1, v0, v1
5461; GCN-NEXT:    v_add_u32_e32 v0, v0, v1
5462; GCN-NEXT:    v_mul_hi_u32 v0, v6, v0
5463; GCN-NEXT:    v_mul_lo_u32 v0, v0, v2
5464; GCN-NEXT:    v_sub_u32_e32 v0, v6, v0
5465; GCN-NEXT:    v_sub_u32_e32 v1, v0, v2
5466; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
5467; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
5468; GCN-NEXT:    v_sub_u32_e32 v1, v0, v2
5469; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
5470; GCN-NEXT:    v_cndmask_b32_e32 v14, v0, v1, vcc
5471; GCN-NEXT:  .LBB12_12:
5472; GCN-NEXT:    v_mov_b32_e32 v0, 0
5473; GCN-NEXT:    global_store_dwordx4 v0, v[12:15], s[8:9] offset:16
5474; GCN-NEXT:    global_store_dwordx4 v0, v[8:11], s[8:9]
5475; GCN-NEXT:    s_endpgm
5476; GCN-NEXT:  .LBB12_13:
5477; GCN-NEXT:    ; implicit-def: $vgpr8_vgpr9
5478; GCN-NEXT:    s_branch .LBB12_2
5479; GCN-NEXT:  .LBB12_14:
5480; GCN-NEXT:    s_branch .LBB12_5
5481; GCN-NEXT:  .LBB12_15:
5482; GCN-NEXT:    ; implicit-def: $vgpr12_vgpr13
5483; GCN-NEXT:    s_branch .LBB12_8
5484; GCN-NEXT:  .LBB12_16:
5485; GCN-NEXT:    s_branch .LBB12_11
5486;
5487; TAHITI-LABEL: srem_v4i64:
5488; TAHITI:       ; %bb.0:
5489; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
5490; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
5491; TAHITI-NEXT:    s_mov_b32 s2, -1
5492; TAHITI-NEXT:    v_mov_b32_e32 v8, 0
5493; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
5494; TAHITI-NEXT:    s_mov_b32 s0, s6
5495; TAHITI-NEXT:    s_mov_b32 s1, s7
5496; TAHITI-NEXT:    buffer_load_dwordx4 v[10:13], off, s[0:3], 0 offset:32
5497; TAHITI-NEXT:    buffer_load_dwordx4 v[14:17], off, s[0:3], 0
5498; TAHITI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
5499; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:48
5500; TAHITI-NEXT:    s_waitcnt vmcnt(2)
5501; TAHITI-NEXT:    v_or_b32_e32 v9, v15, v11
5502; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
5503; TAHITI-NEXT:    s_cbranch_vccz .LBB12_13
5504; TAHITI-NEXT:  ; %bb.1:
5505; TAHITI-NEXT:    v_ashrrev_i32_e32 v8, 31, v11
5506; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v10, v8
5507; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, v11, v8, vcc
5508; TAHITI-NEXT:    v_xor_b32_e32 v9, v9, v8
5509; TAHITI-NEXT:    v_xor_b32_e32 v8, v11, v8
5510; TAHITI-NEXT:    v_cvt_f32_u32_e32 v11, v9
5511; TAHITI-NEXT:    v_cvt_f32_u32_e32 v18, v8
5512; TAHITI-NEXT:    v_sub_i32_e32 v19, vcc, 0, v9
5513; TAHITI-NEXT:    v_subb_u32_e32 v20, vcc, 0, v8, vcc
5514; TAHITI-NEXT:    v_madmk_f32 v11, v18, 0x4f800000, v11
5515; TAHITI-NEXT:    v_rcp_f32_e32 v11, v11
5516; TAHITI-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
5517; TAHITI-NEXT:    v_mul_f32_e32 v18, 0x2f800000, v11
5518; TAHITI-NEXT:    v_trunc_f32_e32 v18, v18
5519; TAHITI-NEXT:    v_madmk_f32 v11, v18, 0xcf800000, v11
5520; TAHITI-NEXT:    v_cvt_u32_f32_e32 v18, v18
5521; TAHITI-NEXT:    v_cvt_u32_f32_e32 v11, v11
5522; TAHITI-NEXT:    v_mul_lo_u32 v22, v19, v18
5523; TAHITI-NEXT:    v_mul_hi_u32 v21, v19, v11
5524; TAHITI-NEXT:    v_mul_lo_u32 v23, v20, v11
5525; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v22, v21
5526; TAHITI-NEXT:    v_mul_lo_u32 v22, v19, v11
5527; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v21, v23
5528; TAHITI-NEXT:    v_mul_lo_u32 v23, v11, v21
5529; TAHITI-NEXT:    v_mul_hi_u32 v24, v11, v22
5530; TAHITI-NEXT:    v_mul_hi_u32 v25, v11, v21
5531; TAHITI-NEXT:    v_mul_hi_u32 v26, v18, v21
5532; TAHITI-NEXT:    v_mul_lo_u32 v21, v18, v21
5533; TAHITI-NEXT:    v_add_i32_e32 v23, vcc, v24, v23
5534; TAHITI-NEXT:    v_addc_u32_e32 v24, vcc, 0, v25, vcc
5535; TAHITI-NEXT:    v_mul_lo_u32 v25, v18, v22
5536; TAHITI-NEXT:    v_mul_hi_u32 v22, v18, v22
5537; TAHITI-NEXT:    v_add_i32_e32 v23, vcc, v23, v25
5538; TAHITI-NEXT:    v_addc_u32_e32 v22, vcc, v24, v22, vcc
5539; TAHITI-NEXT:    v_addc_u32_e32 v23, vcc, 0, v26, vcc
5540; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v22, v21
5541; TAHITI-NEXT:    v_addc_u32_e32 v22, vcc, 0, v23, vcc
5542; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v21
5543; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, v18, v22, vcc
5544; TAHITI-NEXT:    v_mul_lo_u32 v21, v19, v18
5545; TAHITI-NEXT:    v_mul_hi_u32 v22, v19, v11
5546; TAHITI-NEXT:    v_mul_lo_u32 v20, v20, v11
5547; TAHITI-NEXT:    v_mul_lo_u32 v19, v19, v11
5548; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v21, v22
5549; TAHITI-NEXT:    v_add_i32_e32 v20, vcc, v21, v20
5550; TAHITI-NEXT:    v_mul_lo_u32 v23, v11, v20
5551; TAHITI-NEXT:    v_mul_hi_u32 v24, v11, v19
5552; TAHITI-NEXT:    v_mul_hi_u32 v25, v11, v20
5553; TAHITI-NEXT:    v_mul_hi_u32 v22, v18, v19
5554; TAHITI-NEXT:    v_mul_lo_u32 v19, v18, v19
5555; TAHITI-NEXT:    v_mul_hi_u32 v21, v18, v20
5556; TAHITI-NEXT:    v_add_i32_e32 v23, vcc, v24, v23
5557; TAHITI-NEXT:    v_addc_u32_e32 v24, vcc, 0, v25, vcc
5558; TAHITI-NEXT:    v_mul_lo_u32 v20, v18, v20
5559; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v23, v19
5560; TAHITI-NEXT:    v_addc_u32_e32 v19, vcc, v24, v22, vcc
5561; TAHITI-NEXT:    v_addc_u32_e32 v21, vcc, 0, v21, vcc
5562; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
5563; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, 0, v21, vcc
5564; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v19
5565; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, v18, v20, vcc
5566; TAHITI-NEXT:    v_ashrrev_i32_e32 v19, 31, v15
5567; TAHITI-NEXT:    v_add_i32_e32 v20, vcc, v14, v19
5568; TAHITI-NEXT:    v_xor_b32_e32 v20, v20, v19
5569; TAHITI-NEXT:    v_mul_lo_u32 v21, v20, v18
5570; TAHITI-NEXT:    v_mul_hi_u32 v22, v20, v11
5571; TAHITI-NEXT:    v_mul_hi_u32 v23, v20, v18
5572; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, v15, v19, vcc
5573; TAHITI-NEXT:    v_xor_b32_e32 v15, v15, v19
5574; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v22, v21
5575; TAHITI-NEXT:    v_addc_u32_e32 v22, vcc, 0, v23, vcc
5576; TAHITI-NEXT:    v_mul_lo_u32 v23, v15, v11
5577; TAHITI-NEXT:    v_mul_hi_u32 v11, v15, v11
5578; TAHITI-NEXT:    v_mul_hi_u32 v24, v15, v18
5579; TAHITI-NEXT:    v_mul_lo_u32 v18, v15, v18
5580; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v21, v23
5581; TAHITI-NEXT:    v_addc_u32_e32 v11, vcc, v22, v11, vcc
5582; TAHITI-NEXT:    v_addc_u32_e32 v21, vcc, 0, v24, vcc
5583; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v11, v18
5584; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v21, vcc
5585; TAHITI-NEXT:    v_mul_lo_u32 v18, v9, v18
5586; TAHITI-NEXT:    v_mul_hi_u32 v21, v9, v11
5587; TAHITI-NEXT:    v_mul_lo_u32 v22, v8, v11
5588; TAHITI-NEXT:    v_mul_lo_u32 v11, v9, v11
5589; TAHITI-NEXT:    v_add_i32_e32 v18, vcc, v18, v21
5590; TAHITI-NEXT:    v_add_i32_e32 v18, vcc, v22, v18
5591; TAHITI-NEXT:    v_sub_i32_e32 v21, vcc, v15, v18
5592; TAHITI-NEXT:    v_sub_i32_e32 v11, vcc, v20, v11
5593; TAHITI-NEXT:    v_subb_u32_e64 v20, s[0:1], v21, v8, vcc
5594; TAHITI-NEXT:    v_sub_i32_e64 v21, s[0:1], v11, v9
5595; TAHITI-NEXT:    v_subbrev_u32_e64 v22, s[2:3], 0, v20, s[0:1]
5596; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v22, v8
5597; TAHITI-NEXT:    v_cndmask_b32_e64 v23, 0, -1, s[2:3]
5598; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v21, v9
5599; TAHITI-NEXT:    v_subb_u32_e32 v15, vcc, v15, v18, vcc
5600; TAHITI-NEXT:    v_cndmask_b32_e64 v24, 0, -1, s[2:3]
5601; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v22, v8
5602; TAHITI-NEXT:    v_subb_u32_e64 v20, s[0:1], v20, v8, s[0:1]
5603; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v15, v8
5604; TAHITI-NEXT:    v_cndmask_b32_e64 v23, v23, v24, s[2:3]
5605; TAHITI-NEXT:    v_sub_i32_e64 v24, s[0:1], v21, v9
5606; TAHITI-NEXT:    v_cndmask_b32_e64 v18, 0, -1, vcc
5607; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v9
5608; TAHITI-NEXT:    v_subbrev_u32_e64 v20, s[0:1], 0, v20, s[0:1]
5609; TAHITI-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
5610; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v15, v8
5611; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v23
5612; TAHITI-NEXT:    v_cndmask_b32_e32 v8, v18, v9, vcc
5613; TAHITI-NEXT:    v_cndmask_b32_e64 v21, v21, v24, s[0:1]
5614; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
5615; TAHITI-NEXT:    v_cndmask_b32_e64 v20, v22, v20, s[0:1]
5616; TAHITI-NEXT:    v_cndmask_b32_e32 v9, v11, v21, vcc
5617; TAHITI-NEXT:    v_cndmask_b32_e32 v8, v15, v20, vcc
5618; TAHITI-NEXT:    v_xor_b32_e32 v9, v9, v19
5619; TAHITI-NEXT:    v_xor_b32_e32 v11, v8, v19
5620; TAHITI-NEXT:    v_sub_i32_e32 v8, vcc, v9, v19
5621; TAHITI-NEXT:    v_subb_u32_e32 v9, vcc, v11, v19, vcc
5622; TAHITI-NEXT:    s_cbranch_execnz .LBB12_3
5623; TAHITI-NEXT:  .LBB12_2:
5624; TAHITI-NEXT:    v_cvt_f32_u32_e32 v8, v10
5625; TAHITI-NEXT:    v_sub_i32_e32 v9, vcc, 0, v10
5626; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v8, v8
5627; TAHITI-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
5628; TAHITI-NEXT:    v_cvt_u32_f32_e32 v8, v8
5629; TAHITI-NEXT:    v_mul_lo_u32 v9, v9, v8
5630; TAHITI-NEXT:    v_mul_hi_u32 v9, v8, v9
5631; TAHITI-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
5632; TAHITI-NEXT:    v_mul_hi_u32 v8, v14, v8
5633; TAHITI-NEXT:    v_mul_lo_u32 v8, v8, v10
5634; TAHITI-NEXT:    v_sub_i32_e32 v8, vcc, v14, v8
5635; TAHITI-NEXT:    v_sub_i32_e32 v9, vcc, v8, v10
5636; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v10
5637; TAHITI-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
5638; TAHITI-NEXT:    v_sub_i32_e32 v9, vcc, v8, v10
5639; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v10
5640; TAHITI-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
5641; TAHITI-NEXT:    v_mov_b32_e32 v9, 0
5642; TAHITI-NEXT:  .LBB12_3:
5643; TAHITI-NEXT:    v_or_b32_e32 v11, v17, v13
5644; TAHITI-NEXT:    v_mov_b32_e32 v10, 0
5645; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
5646; TAHITI-NEXT:    s_cbranch_vccz .LBB12_14
5647; TAHITI-NEXT:  ; %bb.4:
5648; TAHITI-NEXT:    v_ashrrev_i32_e32 v10, 31, v13
5649; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v12, v10
5650; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, v13, v10, vcc
5651; TAHITI-NEXT:    v_xor_b32_e32 v11, v11, v10
5652; TAHITI-NEXT:    v_xor_b32_e32 v10, v13, v10
5653; TAHITI-NEXT:    v_cvt_f32_u32_e32 v13, v11
5654; TAHITI-NEXT:    v_cvt_f32_u32_e32 v14, v10
5655; TAHITI-NEXT:    v_sub_i32_e32 v15, vcc, 0, v11
5656; TAHITI-NEXT:    v_subb_u32_e32 v18, vcc, 0, v10, vcc
5657; TAHITI-NEXT:    v_madmk_f32 v13, v14, 0x4f800000, v13
5658; TAHITI-NEXT:    v_rcp_f32_e32 v13, v13
5659; TAHITI-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
5660; TAHITI-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v13
5661; TAHITI-NEXT:    v_trunc_f32_e32 v14, v14
5662; TAHITI-NEXT:    v_madmk_f32 v13, v14, 0xcf800000, v13
5663; TAHITI-NEXT:    v_cvt_u32_f32_e32 v14, v14
5664; TAHITI-NEXT:    v_cvt_u32_f32_e32 v13, v13
5665; TAHITI-NEXT:    v_mul_lo_u32 v20, v15, v14
5666; TAHITI-NEXT:    v_mul_hi_u32 v19, v15, v13
5667; TAHITI-NEXT:    v_mul_lo_u32 v21, v18, v13
5668; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
5669; TAHITI-NEXT:    v_mul_lo_u32 v20, v15, v13
5670; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v19, v21
5671; TAHITI-NEXT:    v_mul_lo_u32 v21, v13, v19
5672; TAHITI-NEXT:    v_mul_hi_u32 v22, v13, v20
5673; TAHITI-NEXT:    v_mul_hi_u32 v23, v13, v19
5674; TAHITI-NEXT:    v_mul_hi_u32 v24, v14, v19
5675; TAHITI-NEXT:    v_mul_lo_u32 v19, v14, v19
5676; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v22, v21
5677; TAHITI-NEXT:    v_addc_u32_e32 v22, vcc, 0, v23, vcc
5678; TAHITI-NEXT:    v_mul_lo_u32 v23, v14, v20
5679; TAHITI-NEXT:    v_mul_hi_u32 v20, v14, v20
5680; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v21, v23
5681; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, v22, v20, vcc
5682; TAHITI-NEXT:    v_addc_u32_e32 v21, vcc, 0, v24, vcc
5683; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
5684; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, 0, v21, vcc
5685; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v19
5686; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, v14, v20, vcc
5687; TAHITI-NEXT:    v_mul_lo_u32 v19, v15, v14
5688; TAHITI-NEXT:    v_mul_hi_u32 v20, v15, v13
5689; TAHITI-NEXT:    v_mul_lo_u32 v18, v18, v13
5690; TAHITI-NEXT:    v_mul_lo_u32 v15, v15, v13
5691; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v19, v20
5692; TAHITI-NEXT:    v_add_i32_e32 v18, vcc, v19, v18
5693; TAHITI-NEXT:    v_mul_lo_u32 v21, v13, v18
5694; TAHITI-NEXT:    v_mul_hi_u32 v22, v13, v15
5695; TAHITI-NEXT:    v_mul_hi_u32 v23, v13, v18
5696; TAHITI-NEXT:    v_mul_hi_u32 v20, v14, v15
5697; TAHITI-NEXT:    v_mul_lo_u32 v15, v14, v15
5698; TAHITI-NEXT:    v_mul_hi_u32 v19, v14, v18
5699; TAHITI-NEXT:    v_add_i32_e32 v21, vcc, v22, v21
5700; TAHITI-NEXT:    v_addc_u32_e32 v22, vcc, 0, v23, vcc
5701; TAHITI-NEXT:    v_mul_lo_u32 v18, v14, v18
5702; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v21, v15
5703; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, v22, v20, vcc
5704; TAHITI-NEXT:    v_addc_u32_e32 v19, vcc, 0, v19, vcc
5705; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v18
5706; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v19, vcc
5707; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
5708; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, v14, v18, vcc
5709; TAHITI-NEXT:    v_ashrrev_i32_e32 v15, 31, v17
5710; TAHITI-NEXT:    v_add_i32_e32 v18, vcc, v16, v15
5711; TAHITI-NEXT:    v_xor_b32_e32 v18, v18, v15
5712; TAHITI-NEXT:    v_mul_lo_u32 v19, v18, v14
5713; TAHITI-NEXT:    v_mul_hi_u32 v20, v18, v13
5714; TAHITI-NEXT:    v_mul_hi_u32 v21, v18, v14
5715; TAHITI-NEXT:    v_addc_u32_e32 v17, vcc, v17, v15, vcc
5716; TAHITI-NEXT:    v_xor_b32_e32 v17, v17, v15
5717; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
5718; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, 0, v21, vcc
5719; TAHITI-NEXT:    v_mul_lo_u32 v21, v17, v13
5720; TAHITI-NEXT:    v_mul_hi_u32 v13, v17, v13
5721; TAHITI-NEXT:    v_mul_hi_u32 v22, v17, v14
5722; TAHITI-NEXT:    v_mul_lo_u32 v14, v17, v14
5723; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v19, v21
5724; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, v20, v13, vcc
5725; TAHITI-NEXT:    v_addc_u32_e32 v19, vcc, 0, v22, vcc
5726; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
5727; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v19, vcc
5728; TAHITI-NEXT:    v_mul_lo_u32 v14, v11, v14
5729; TAHITI-NEXT:    v_mul_hi_u32 v19, v11, v13
5730; TAHITI-NEXT:    v_mul_lo_u32 v20, v10, v13
5731; TAHITI-NEXT:    v_mul_lo_u32 v13, v11, v13
5732; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v14, v19
5733; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v20, v14
5734; TAHITI-NEXT:    v_sub_i32_e32 v19, vcc, v17, v14
5735; TAHITI-NEXT:    v_sub_i32_e32 v13, vcc, v18, v13
5736; TAHITI-NEXT:    v_subb_u32_e64 v18, s[0:1], v19, v10, vcc
5737; TAHITI-NEXT:    v_sub_i32_e64 v19, s[0:1], v13, v11
5738; TAHITI-NEXT:    v_subbrev_u32_e64 v20, s[2:3], 0, v18, s[0:1]
5739; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v20, v10
5740; TAHITI-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s[2:3]
5741; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v19, v11
5742; TAHITI-NEXT:    v_subb_u32_e32 v14, vcc, v17, v14, vcc
5743; TAHITI-NEXT:    v_cndmask_b32_e64 v22, 0, -1, s[2:3]
5744; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v20, v10
5745; TAHITI-NEXT:    v_subb_u32_e64 v18, s[0:1], v18, v10, s[0:1]
5746; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v14, v10
5747; TAHITI-NEXT:    v_cndmask_b32_e64 v21, v21, v22, s[2:3]
5748; TAHITI-NEXT:    v_sub_i32_e64 v22, s[0:1], v19, v11
5749; TAHITI-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
5750; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v11
5751; TAHITI-NEXT:    v_subbrev_u32_e64 v18, s[0:1], 0, v18, s[0:1]
5752; TAHITI-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc
5753; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v14, v10
5754; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v21
5755; TAHITI-NEXT:    v_cndmask_b32_e32 v10, v17, v11, vcc
5756; TAHITI-NEXT:    v_cndmask_b32_e64 v19, v19, v22, s[0:1]
5757; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v10
5758; TAHITI-NEXT:    v_cndmask_b32_e64 v18, v20, v18, s[0:1]
5759; TAHITI-NEXT:    v_cndmask_b32_e32 v11, v13, v19, vcc
5760; TAHITI-NEXT:    v_cndmask_b32_e32 v10, v14, v18, vcc
5761; TAHITI-NEXT:    v_xor_b32_e32 v11, v11, v15
5762; TAHITI-NEXT:    v_xor_b32_e32 v13, v10, v15
5763; TAHITI-NEXT:    v_sub_i32_e32 v10, vcc, v11, v15
5764; TAHITI-NEXT:    v_subb_u32_e32 v11, vcc, v13, v15, vcc
5765; TAHITI-NEXT:    s_cbranch_execnz .LBB12_6
5766; TAHITI-NEXT:  .LBB12_5:
5767; TAHITI-NEXT:    v_cvt_f32_u32_e32 v10, v12
5768; TAHITI-NEXT:    v_sub_i32_e32 v11, vcc, 0, v12
5769; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v10, v10
5770; TAHITI-NEXT:    v_mul_f32_e32 v10, 0x4f7ffffe, v10
5771; TAHITI-NEXT:    v_cvt_u32_f32_e32 v10, v10
5772; TAHITI-NEXT:    v_mul_lo_u32 v11, v11, v10
5773; TAHITI-NEXT:    v_mul_hi_u32 v11, v10, v11
5774; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v10, v11
5775; TAHITI-NEXT:    v_mul_hi_u32 v10, v16, v10
5776; TAHITI-NEXT:    v_mul_lo_u32 v10, v10, v12
5777; TAHITI-NEXT:    v_sub_i32_e32 v10, vcc, v16, v10
5778; TAHITI-NEXT:    v_subrev_i32_e32 v11, vcc, v12, v10
5779; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
5780; TAHITI-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
5781; TAHITI-NEXT:    v_subrev_i32_e32 v11, vcc, v12, v10
5782; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
5783; TAHITI-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
5784; TAHITI-NEXT:    v_mov_b32_e32 v11, 0
5785; TAHITI-NEXT:  .LBB12_6:
5786; TAHITI-NEXT:    s_waitcnt vmcnt(0)
5787; TAHITI-NEXT:    v_or_b32_e32 v13, v5, v1
5788; TAHITI-NEXT:    v_mov_b32_e32 v12, 0
5789; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
5790; TAHITI-NEXT:    s_cbranch_vccz .LBB12_15
5791; TAHITI-NEXT:  ; %bb.7:
5792; TAHITI-NEXT:    v_ashrrev_i32_e32 v13, 31, v1
5793; TAHITI-NEXT:    v_add_i32_e32 v12, vcc, v0, v13
5794; TAHITI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v13, vcc
5795; TAHITI-NEXT:    v_xor_b32_e32 v12, v12, v13
5796; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v13
5797; TAHITI-NEXT:    v_cvt_f32_u32_e32 v13, v12
5798; TAHITI-NEXT:    v_cvt_f32_u32_e32 v14, v1
5799; TAHITI-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
5800; TAHITI-NEXT:    v_subb_u32_e32 v16, vcc, 0, v1, vcc
5801; TAHITI-NEXT:    v_madmk_f32 v13, v14, 0x4f800000, v13
5802; TAHITI-NEXT:    v_rcp_f32_e32 v13, v13
5803; TAHITI-NEXT:    v_mul_f32_e32 v13, 0x5f7ffffc, v13
5804; TAHITI-NEXT:    v_mul_f32_e32 v14, 0x2f800000, v13
5805; TAHITI-NEXT:    v_trunc_f32_e32 v14, v14
5806; TAHITI-NEXT:    v_madmk_f32 v13, v14, 0xcf800000, v13
5807; TAHITI-NEXT:    v_cvt_u32_f32_e32 v14, v14
5808; TAHITI-NEXT:    v_cvt_u32_f32_e32 v13, v13
5809; TAHITI-NEXT:    v_mul_lo_u32 v18, v15, v14
5810; TAHITI-NEXT:    v_mul_hi_u32 v17, v15, v13
5811; TAHITI-NEXT:    v_mul_lo_u32 v19, v16, v13
5812; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
5813; TAHITI-NEXT:    v_mul_lo_u32 v18, v15, v13
5814; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
5815; TAHITI-NEXT:    v_mul_lo_u32 v19, v13, v17
5816; TAHITI-NEXT:    v_mul_hi_u32 v20, v13, v18
5817; TAHITI-NEXT:    v_mul_hi_u32 v21, v13, v17
5818; TAHITI-NEXT:    v_mul_hi_u32 v22, v14, v17
5819; TAHITI-NEXT:    v_mul_lo_u32 v17, v14, v17
5820; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
5821; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, 0, v21, vcc
5822; TAHITI-NEXT:    v_mul_lo_u32 v21, v14, v18
5823; TAHITI-NEXT:    v_mul_hi_u32 v18, v14, v18
5824; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v19, v21
5825; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, v20, v18, vcc
5826; TAHITI-NEXT:    v_addc_u32_e32 v19, vcc, 0, v22, vcc
5827; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
5828; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v19, vcc
5829; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v17
5830; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, v14, v18, vcc
5831; TAHITI-NEXT:    v_mul_lo_u32 v17, v15, v14
5832; TAHITI-NEXT:    v_mul_hi_u32 v18, v15, v13
5833; TAHITI-NEXT:    v_mul_lo_u32 v16, v16, v13
5834; TAHITI-NEXT:    v_mul_lo_u32 v15, v15, v13
5835; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v17, v18
5836; TAHITI-NEXT:    v_add_i32_e32 v16, vcc, v17, v16
5837; TAHITI-NEXT:    v_mul_lo_u32 v19, v13, v16
5838; TAHITI-NEXT:    v_mul_hi_u32 v20, v13, v15
5839; TAHITI-NEXT:    v_mul_hi_u32 v21, v13, v16
5840; TAHITI-NEXT:    v_mul_hi_u32 v18, v14, v15
5841; TAHITI-NEXT:    v_mul_lo_u32 v15, v14, v15
5842; TAHITI-NEXT:    v_mul_hi_u32 v17, v14, v16
5843; TAHITI-NEXT:    v_add_i32_e32 v19, vcc, v20, v19
5844; TAHITI-NEXT:    v_addc_u32_e32 v20, vcc, 0, v21, vcc
5845; TAHITI-NEXT:    v_mul_lo_u32 v16, v14, v16
5846; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v19, v15
5847; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, v20, v18, vcc
5848; TAHITI-NEXT:    v_addc_u32_e32 v17, vcc, 0, v17, vcc
5849; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
5850; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
5851; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v15
5852; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, v14, v16, vcc
5853; TAHITI-NEXT:    v_ashrrev_i32_e32 v15, 31, v5
5854; TAHITI-NEXT:    v_add_i32_e32 v16, vcc, v4, v15
5855; TAHITI-NEXT:    v_xor_b32_e32 v16, v16, v15
5856; TAHITI-NEXT:    v_mul_lo_u32 v17, v16, v14
5857; TAHITI-NEXT:    v_mul_hi_u32 v18, v16, v13
5858; TAHITI-NEXT:    v_mul_hi_u32 v19, v16, v14
5859; TAHITI-NEXT:    v_addc_u32_e32 v5, vcc, v5, v15, vcc
5860; TAHITI-NEXT:    v_xor_b32_e32 v5, v5, v15
5861; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
5862; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v19, vcc
5863; TAHITI-NEXT:    v_mul_lo_u32 v19, v5, v13
5864; TAHITI-NEXT:    v_mul_hi_u32 v13, v5, v13
5865; TAHITI-NEXT:    v_mul_hi_u32 v20, v5, v14
5866; TAHITI-NEXT:    v_mul_lo_u32 v14, v5, v14
5867; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
5868; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, v18, v13, vcc
5869; TAHITI-NEXT:    v_addc_u32_e32 v17, vcc, 0, v20, vcc
5870; TAHITI-NEXT:    v_add_i32_e32 v13, vcc, v13, v14
5871; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v17, vcc
5872; TAHITI-NEXT:    v_mul_lo_u32 v14, v12, v14
5873; TAHITI-NEXT:    v_mul_hi_u32 v17, v12, v13
5874; TAHITI-NEXT:    v_mul_lo_u32 v18, v1, v13
5875; TAHITI-NEXT:    v_mul_lo_u32 v13, v12, v13
5876; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v14, v17
5877; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v18, v14
5878; TAHITI-NEXT:    v_sub_i32_e32 v17, vcc, v5, v14
5879; TAHITI-NEXT:    v_sub_i32_e32 v13, vcc, v16, v13
5880; TAHITI-NEXT:    v_subb_u32_e64 v16, s[0:1], v17, v1, vcc
5881; TAHITI-NEXT:    v_sub_i32_e64 v17, s[0:1], v13, v12
5882; TAHITI-NEXT:    v_subbrev_u32_e64 v18, s[2:3], 0, v16, s[0:1]
5883; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v18, v1
5884; TAHITI-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[2:3]
5885; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v17, v12
5886; TAHITI-NEXT:    v_subb_u32_e32 v5, vcc, v5, v14, vcc
5887; TAHITI-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[2:3]
5888; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v18, v1
5889; TAHITI-NEXT:    v_subb_u32_e64 v16, s[0:1], v16, v1, s[0:1]
5890; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
5891; TAHITI-NEXT:    v_cndmask_b32_e64 v19, v19, v20, s[2:3]
5892; TAHITI-NEXT:    v_sub_i32_e64 v20, s[0:1], v17, v12
5893; TAHITI-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
5894; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v13, v12
5895; TAHITI-NEXT:    v_subbrev_u32_e64 v16, s[0:1], 0, v16, s[0:1]
5896; TAHITI-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc
5897; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v1
5898; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v19
5899; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v14, v12, vcc
5900; TAHITI-NEXT:    v_cndmask_b32_e64 v17, v17, v20, s[0:1]
5901; TAHITI-NEXT:    v_cndmask_b32_e64 v16, v18, v16, s[0:1]
5902; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
5903; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v5, v16, vcc
5904; TAHITI-NEXT:    v_cndmask_b32_e32 v5, v13, v17, vcc
5905; TAHITI-NEXT:    v_xor_b32_e32 v5, v5, v15
5906; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v15
5907; TAHITI-NEXT:    v_sub_i32_e32 v12, vcc, v5, v15
5908; TAHITI-NEXT:    v_subb_u32_e32 v13, vcc, v1, v15, vcc
5909; TAHITI-NEXT:    s_cbranch_execnz .LBB12_9
5910; TAHITI-NEXT:  .LBB12_8:
5911; TAHITI-NEXT:    v_cvt_f32_u32_e32 v1, v0
5912; TAHITI-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
5913; TAHITI-NEXT:    v_mov_b32_e32 v13, 0
5914; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v1, v1
5915; TAHITI-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
5916; TAHITI-NEXT:    v_cvt_u32_f32_e32 v1, v1
5917; TAHITI-NEXT:    v_mul_lo_u32 v5, v5, v1
5918; TAHITI-NEXT:    v_mul_hi_u32 v5, v1, v5
5919; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
5920; TAHITI-NEXT:    v_mul_hi_u32 v1, v4, v1
5921; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, v0
5922; TAHITI-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
5923; TAHITI-NEXT:    v_subrev_i32_e32 v4, vcc, v0, v1
5924; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
5925; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
5926; TAHITI-NEXT:    v_subrev_i32_e32 v4, vcc, v0, v1
5927; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
5928; TAHITI-NEXT:    v_cndmask_b32_e32 v12, v1, v4, vcc
5929; TAHITI-NEXT:  .LBB12_9:
5930; TAHITI-NEXT:    v_or_b32_e32 v1, v7, v3
5931; TAHITI-NEXT:    v_mov_b32_e32 v0, 0
5932; TAHITI-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
5933; TAHITI-NEXT:    s_cbranch_vccz .LBB12_16
5934; TAHITI-NEXT:  ; %bb.10:
5935; TAHITI-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
5936; TAHITI-NEXT:    v_add_i32_e32 v1, vcc, v2, v0
5937; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v0, vcc
5938; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v0
5939; TAHITI-NEXT:    v_xor_b32_e32 v0, v3, v0
5940; TAHITI-NEXT:    v_cvt_f32_u32_e32 v3, v1
5941; TAHITI-NEXT:    v_cvt_f32_u32_e32 v4, v0
5942; TAHITI-NEXT:    v_sub_i32_e32 v5, vcc, 0, v1
5943; TAHITI-NEXT:    v_subb_u32_e32 v14, vcc, 0, v0, vcc
5944; TAHITI-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
5945; TAHITI-NEXT:    v_rcp_f32_e32 v3, v3
5946; TAHITI-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
5947; TAHITI-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
5948; TAHITI-NEXT:    v_trunc_f32_e32 v4, v4
5949; TAHITI-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
5950; TAHITI-NEXT:    v_cvt_u32_f32_e32 v4, v4
5951; TAHITI-NEXT:    v_cvt_u32_f32_e32 v3, v3
5952; TAHITI-NEXT:    v_mul_lo_u32 v16, v5, v4
5953; TAHITI-NEXT:    v_mul_hi_u32 v15, v5, v3
5954; TAHITI-NEXT:    v_mul_lo_u32 v17, v14, v3
5955; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
5956; TAHITI-NEXT:    v_mul_lo_u32 v16, v5, v3
5957; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
5958; TAHITI-NEXT:    v_mul_lo_u32 v17, v3, v15
5959; TAHITI-NEXT:    v_mul_hi_u32 v18, v3, v16
5960; TAHITI-NEXT:    v_mul_hi_u32 v19, v3, v15
5961; TAHITI-NEXT:    v_mul_hi_u32 v20, v4, v15
5962; TAHITI-NEXT:    v_mul_lo_u32 v15, v4, v15
5963; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
5964; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v19, vcc
5965; TAHITI-NEXT:    v_mul_lo_u32 v19, v4, v16
5966; TAHITI-NEXT:    v_mul_hi_u32 v16, v4, v16
5967; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v17, v19
5968; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, v18, v16, vcc
5969; TAHITI-NEXT:    v_addc_u32_e32 v17, vcc, 0, v20, vcc
5970; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
5971; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
5972; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v15
5973; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, v4, v16, vcc
5974; TAHITI-NEXT:    v_mul_lo_u32 v15, v5, v4
5975; TAHITI-NEXT:    v_mul_hi_u32 v16, v5, v3
5976; TAHITI-NEXT:    v_mul_lo_u32 v14, v14, v3
5977; TAHITI-NEXT:    v_mul_lo_u32 v5, v5, v3
5978; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v16
5979; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v15, v14
5980; TAHITI-NEXT:    v_mul_lo_u32 v17, v3, v14
5981; TAHITI-NEXT:    v_mul_hi_u32 v18, v3, v5
5982; TAHITI-NEXT:    v_mul_hi_u32 v19, v3, v14
5983; TAHITI-NEXT:    v_mul_hi_u32 v16, v4, v5
5984; TAHITI-NEXT:    v_mul_lo_u32 v5, v4, v5
5985; TAHITI-NEXT:    v_mul_hi_u32 v15, v4, v14
5986; TAHITI-NEXT:    v_add_i32_e32 v17, vcc, v18, v17
5987; TAHITI-NEXT:    v_addc_u32_e32 v18, vcc, 0, v19, vcc
5988; TAHITI-NEXT:    v_mul_lo_u32 v14, v4, v14
5989; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v17, v5
5990; TAHITI-NEXT:    v_addc_u32_e32 v5, vcc, v18, v16, vcc
5991; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, 0, v15, vcc
5992; TAHITI-NEXT:    v_add_i32_e32 v5, vcc, v5, v14
5993; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v15, vcc
5994; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
5995; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, v4, v14, vcc
5996; TAHITI-NEXT:    v_ashrrev_i32_e32 v5, 31, v7
5997; TAHITI-NEXT:    v_add_i32_e32 v14, vcc, v6, v5
5998; TAHITI-NEXT:    v_xor_b32_e32 v14, v14, v5
5999; TAHITI-NEXT:    v_mul_lo_u32 v15, v14, v4
6000; TAHITI-NEXT:    v_mul_hi_u32 v16, v14, v3
6001; TAHITI-NEXT:    v_mul_hi_u32 v17, v14, v4
6002; TAHITI-NEXT:    v_addc_u32_e32 v7, vcc, v7, v5, vcc
6003; TAHITI-NEXT:    v_xor_b32_e32 v7, v7, v5
6004; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v16, v15
6005; TAHITI-NEXT:    v_addc_u32_e32 v16, vcc, 0, v17, vcc
6006; TAHITI-NEXT:    v_mul_lo_u32 v17, v7, v3
6007; TAHITI-NEXT:    v_mul_hi_u32 v3, v7, v3
6008; TAHITI-NEXT:    v_mul_hi_u32 v18, v7, v4
6009; TAHITI-NEXT:    v_mul_lo_u32 v4, v7, v4
6010; TAHITI-NEXT:    v_add_i32_e32 v15, vcc, v15, v17
6011; TAHITI-NEXT:    v_addc_u32_e32 v3, vcc, v16, v3, vcc
6012; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, 0, v18, vcc
6013; TAHITI-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
6014; TAHITI-NEXT:    v_addc_u32_e32 v4, vcc, 0, v15, vcc
6015; TAHITI-NEXT:    v_mul_lo_u32 v4, v1, v4
6016; TAHITI-NEXT:    v_mul_hi_u32 v15, v1, v3
6017; TAHITI-NEXT:    v_mul_lo_u32 v16, v0, v3
6018; TAHITI-NEXT:    v_mul_lo_u32 v3, v1, v3
6019; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v4, v15
6020; TAHITI-NEXT:    v_add_i32_e32 v4, vcc, v16, v4
6021; TAHITI-NEXT:    v_sub_i32_e32 v15, vcc, v7, v4
6022; TAHITI-NEXT:    v_sub_i32_e32 v3, vcc, v14, v3
6023; TAHITI-NEXT:    v_subb_u32_e64 v14, s[0:1], v15, v0, vcc
6024; TAHITI-NEXT:    v_sub_i32_e64 v15, s[0:1], v3, v1
6025; TAHITI-NEXT:    v_subbrev_u32_e64 v16, s[2:3], 0, v14, s[0:1]
6026; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v16, v0
6027; TAHITI-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[2:3]
6028; TAHITI-NEXT:    v_cmp_ge_u32_e64 s[2:3], v15, v1
6029; TAHITI-NEXT:    v_subb_u32_e32 v4, vcc, v7, v4, vcc
6030; TAHITI-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[2:3]
6031; TAHITI-NEXT:    v_cmp_eq_u32_e64 s[2:3], v16, v0
6032; TAHITI-NEXT:    v_subb_u32_e64 v14, s[0:1], v14, v0, s[0:1]
6033; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v0
6034; TAHITI-NEXT:    v_cndmask_b32_e64 v17, v17, v18, s[2:3]
6035; TAHITI-NEXT:    v_sub_i32_e64 v18, s[0:1], v15, v1
6036; TAHITI-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
6037; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v1
6038; TAHITI-NEXT:    v_subbrev_u32_e64 v14, s[0:1], 0, v14, s[0:1]
6039; TAHITI-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
6040; TAHITI-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v0
6041; TAHITI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v17
6042; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v7, v1, vcc
6043; TAHITI-NEXT:    v_cndmask_b32_e64 v15, v15, v18, s[0:1]
6044; TAHITI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
6045; TAHITI-NEXT:    v_cndmask_b32_e64 v14, v16, v14, s[0:1]
6046; TAHITI-NEXT:    v_cndmask_b32_e32 v1, v3, v15, vcc
6047; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v4, v14, vcc
6048; TAHITI-NEXT:    v_xor_b32_e32 v1, v1, v5
6049; TAHITI-NEXT:    v_xor_b32_e32 v0, v0, v5
6050; TAHITI-NEXT:    v_sub_i32_e32 v14, vcc, v1, v5
6051; TAHITI-NEXT:    v_subb_u32_e32 v15, vcc, v0, v5, vcc
6052; TAHITI-NEXT:    s_cbranch_execnz .LBB12_12
6053; TAHITI-NEXT:  .LBB12_11:
6054; TAHITI-NEXT:    v_cvt_f32_u32_e32 v0, v2
6055; TAHITI-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
6056; TAHITI-NEXT:    v_mov_b32_e32 v15, 0
6057; TAHITI-NEXT:    v_rcp_iflag_f32_e32 v0, v0
6058; TAHITI-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
6059; TAHITI-NEXT:    v_cvt_u32_f32_e32 v0, v0
6060; TAHITI-NEXT:    v_mul_lo_u32 v1, v1, v0
6061; TAHITI-NEXT:    v_mul_hi_u32 v1, v0, v1
6062; TAHITI-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6063; TAHITI-NEXT:    v_mul_hi_u32 v0, v6, v0
6064; TAHITI-NEXT:    v_mul_lo_u32 v0, v0, v2
6065; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v6, v0
6066; TAHITI-NEXT:    v_subrev_i32_e32 v1, vcc, v2, v0
6067; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
6068; TAHITI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
6069; TAHITI-NEXT:    v_subrev_i32_e32 v1, vcc, v2, v0
6070; TAHITI-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
6071; TAHITI-NEXT:    v_cndmask_b32_e32 v14, v0, v1, vcc
6072; TAHITI-NEXT:  .LBB12_12:
6073; TAHITI-NEXT:    s_mov_b32 s7, 0xf000
6074; TAHITI-NEXT:    s_mov_b32 s6, -1
6075; TAHITI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:16
6076; TAHITI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
6077; TAHITI-NEXT:    s_endpgm
6078; TAHITI-NEXT:  .LBB12_13:
6079; TAHITI-NEXT:    ; implicit-def: $vgpr8_vgpr9
6080; TAHITI-NEXT:    s_branch .LBB12_2
6081; TAHITI-NEXT:  .LBB12_14:
6082; TAHITI-NEXT:    s_branch .LBB12_5
6083; TAHITI-NEXT:  .LBB12_15:
6084; TAHITI-NEXT:    ; implicit-def: $vgpr12_vgpr13
6085; TAHITI-NEXT:    s_branch .LBB12_8
6086; TAHITI-NEXT:  .LBB12_16:
6087; TAHITI-NEXT:    s_branch .LBB12_11
6088;
6089; TONGA-LABEL: srem_v4i64:
6090; TONGA:       ; %bb.0:
6091; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
6092; TONGA-NEXT:    v_mov_b32_e32 v8, 0
6093; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
6094; TONGA-NEXT:    s_add_u32 s0, s6, 48
6095; TONGA-NEXT:    s_addc_u32 s1, s7, 0
6096; TONGA-NEXT:    s_add_u32 s2, s6, 32
6097; TONGA-NEXT:    v_mov_b32_e32 v0, s6
6098; TONGA-NEXT:    s_addc_u32 s3, s7, 0
6099; TONGA-NEXT:    v_mov_b32_e32 v2, s2
6100; TONGA-NEXT:    v_mov_b32_e32 v1, s7
6101; TONGA-NEXT:    v_mov_b32_e32 v3, s3
6102; TONGA-NEXT:    flat_load_dwordx4 v[10:13], v[2:3]
6103; TONGA-NEXT:    flat_load_dwordx4 v[14:17], v[0:1]
6104; TONGA-NEXT:    v_mov_b32_e32 v0, s0
6105; TONGA-NEXT:    v_mov_b32_e32 v1, s1
6106; TONGA-NEXT:    s_add_u32 s0, s6, 16
6107; TONGA-NEXT:    s_addc_u32 s1, s7, 0
6108; TONGA-NEXT:    v_mov_b32_e32 v5, s1
6109; TONGA-NEXT:    v_mov_b32_e32 v4, s0
6110; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
6111; TONGA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
6112; TONGA-NEXT:    s_waitcnt vmcnt(2)
6113; TONGA-NEXT:    v_or_b32_e32 v9, v15, v11
6114; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
6115; TONGA-NEXT:    s_cbranch_vccz .LBB12_13
6116; TONGA-NEXT:  ; %bb.1:
6117; TONGA-NEXT:    v_ashrrev_i32_e32 v8, 31, v11
6118; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v10, v8
6119; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, v11, v8, vcc
6120; TONGA-NEXT:    v_xor_b32_e32 v9, v9, v8
6121; TONGA-NEXT:    v_xor_b32_e32 v8, v11, v8
6122; TONGA-NEXT:    v_cvt_f32_u32_e32 v11, v9
6123; TONGA-NEXT:    v_cvt_f32_u32_e32 v18, v8
6124; TONGA-NEXT:    v_sub_u32_e32 v23, vcc, 0, v9
6125; TONGA-NEXT:    v_subb_u32_e32 v24, vcc, 0, v8, vcc
6126; TONGA-NEXT:    v_madmk_f32 v11, v18, 0x4f800000, v11
6127; TONGA-NEXT:    v_rcp_f32_e32 v11, v11
6128; TONGA-NEXT:    v_mul_f32_e32 v11, 0x5f7ffffc, v11
6129; TONGA-NEXT:    v_mul_f32_e32 v18, 0x2f800000, v11
6130; TONGA-NEXT:    v_trunc_f32_e32 v18, v18
6131; TONGA-NEXT:    v_madmk_f32 v11, v18, 0xcf800000, v11
6132; TONGA-NEXT:    v_cvt_u32_f32_e32 v22, v18
6133; TONGA-NEXT:    v_cvt_u32_f32_e32 v11, v11
6134; TONGA-NEXT:    v_mul_lo_u32 v20, v23, v22
6135; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v23, v11, 0
6136; TONGA-NEXT:    v_mul_lo_u32 v21, v24, v11
6137; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v19, v20
6138; TONGA-NEXT:    v_add_u32_e32 v21, vcc, v19, v21
6139; TONGA-NEXT:    v_mad_u64_u32 v[19:20], s[0:1], v11, v21, 0
6140; TONGA-NEXT:    v_mul_hi_u32 v25, v11, v18
6141; TONGA-NEXT:    v_add_u32_e32 v25, vcc, v25, v19
6142; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v22, v18, 0
6143; TONGA-NEXT:    v_addc_u32_e32 v26, vcc, 0, v20, vcc
6144; TONGA-NEXT:    v_mad_u64_u32 v[20:21], s[0:1], v22, v21, 0
6145; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v25, v18
6146; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, v26, v19, vcc
6147; TONGA-NEXT:    v_addc_u32_e32 v19, vcc, 0, v21, vcc
6148; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v18, v20
6149; TONGA-NEXT:    v_addc_u32_e32 v19, vcc, 0, v19, vcc
6150; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v18
6151; TONGA-NEXT:    v_addc_u32_e32 v25, vcc, v22, v19, vcc
6152; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v23, v11, 0
6153; TONGA-NEXT:    v_mul_lo_u32 v22, v23, v25
6154; TONGA-NEXT:    v_mul_lo_u32 v23, v24, v11
6155; TONGA-NEXT:    v_mul_hi_u32 v24, v11, v18
6156; TONGA-NEXT:    v_mad_u64_u32 v[20:21], s[0:1], v25, v18, 0
6157; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v22, v19
6158; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v19, v23
6159; TONGA-NEXT:    v_mad_u64_u32 v[22:23], s[0:1], v11, v19, 0
6160; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v25, v19, 0
6161; TONGA-NEXT:    v_add_u32_e32 v22, vcc, v24, v22
6162; TONGA-NEXT:    v_addc_u32_e32 v23, vcc, 0, v23, vcc
6163; TONGA-NEXT:    v_add_u32_e32 v20, vcc, v22, v20
6164; TONGA-NEXT:    v_addc_u32_e32 v20, vcc, v23, v21, vcc
6165; TONGA-NEXT:    v_addc_u32_e32 v19, vcc, 0, v19, vcc
6166; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v20, v18
6167; TONGA-NEXT:    v_addc_u32_e32 v19, vcc, 0, v19, vcc
6168; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v18
6169; TONGA-NEXT:    v_addc_u32_e32 v20, vcc, v25, v19, vcc
6170; TONGA-NEXT:    v_ashrrev_i32_e32 v22, 31, v15
6171; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v14, v22
6172; TONGA-NEXT:    v_xor_b32_e32 v23, v18, v22
6173; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v23, v20, 0
6174; TONGA-NEXT:    v_mul_hi_u32 v21, v23, v11
6175; TONGA-NEXT:    v_addc_u32_e32 v15, vcc, v15, v22, vcc
6176; TONGA-NEXT:    v_xor_b32_e32 v15, v15, v22
6177; TONGA-NEXT:    v_add_u32_e32 v24, vcc, v21, v18
6178; TONGA-NEXT:    v_addc_u32_e32 v25, vcc, 0, v19, vcc
6179; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v15, v11, 0
6180; TONGA-NEXT:    v_mad_u64_u32 v[20:21], s[0:1], v15, v20, 0
6181; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v24, v18
6182; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, v25, v19, vcc
6183; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, 0, v21, vcc
6184; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v20
6185; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, 0, v18, vcc
6186; TONGA-NEXT:    v_mul_lo_u32 v20, v9, v18
6187; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v9, v11, 0
6188; TONGA-NEXT:    v_mul_lo_u32 v11, v8, v11
6189; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v20, v19
6190; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v19
6191; TONGA-NEXT:    v_sub_u32_e32 v19, vcc, v15, v11
6192; TONGA-NEXT:    v_sub_u32_e32 v18, vcc, v23, v18
6193; TONGA-NEXT:    v_subb_u32_e64 v19, s[0:1], v19, v8, vcc
6194; TONGA-NEXT:    v_sub_u32_e64 v20, s[0:1], v18, v9
6195; TONGA-NEXT:    v_subbrev_u32_e64 v21, s[2:3], 0, v19, s[0:1]
6196; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v21, v8
6197; TONGA-NEXT:    v_cndmask_b32_e64 v23, 0, -1, s[2:3]
6198; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v20, v9
6199; TONGA-NEXT:    v_subb_u32_e32 v11, vcc, v15, v11, vcc
6200; TONGA-NEXT:    v_cndmask_b32_e64 v24, 0, -1, s[2:3]
6201; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v21, v8
6202; TONGA-NEXT:    v_subb_u32_e64 v19, s[0:1], v19, v8, s[0:1]
6203; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v8
6204; TONGA-NEXT:    v_cndmask_b32_e64 v23, v23, v24, s[2:3]
6205; TONGA-NEXT:    v_sub_u32_e64 v24, s[0:1], v20, v9
6206; TONGA-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
6207; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v18, v9
6208; TONGA-NEXT:    v_subbrev_u32_e64 v19, s[0:1], 0, v19, s[0:1]
6209; TONGA-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
6210; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v8
6211; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v23
6212; TONGA-NEXT:    v_cndmask_b32_e32 v8, v15, v9, vcc
6213; TONGA-NEXT:    v_cndmask_b32_e64 v20, v20, v24, s[0:1]
6214; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v8
6215; TONGA-NEXT:    v_cndmask_b32_e64 v19, v21, v19, s[0:1]
6216; TONGA-NEXT:    v_cndmask_b32_e32 v9, v18, v20, vcc
6217; TONGA-NEXT:    v_cndmask_b32_e32 v8, v11, v19, vcc
6218; TONGA-NEXT:    v_xor_b32_e32 v9, v9, v22
6219; TONGA-NEXT:    v_xor_b32_e32 v11, v8, v22
6220; TONGA-NEXT:    v_sub_u32_e32 v8, vcc, v9, v22
6221; TONGA-NEXT:    v_subb_u32_e32 v9, vcc, v11, v22, vcc
6222; TONGA-NEXT:    s_cbranch_execnz .LBB12_3
6223; TONGA-NEXT:  .LBB12_2:
6224; TONGA-NEXT:    v_cvt_f32_u32_e32 v8, v10
6225; TONGA-NEXT:    v_sub_u32_e32 v9, vcc, 0, v10
6226; TONGA-NEXT:    v_rcp_iflag_f32_e32 v8, v8
6227; TONGA-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
6228; TONGA-NEXT:    v_cvt_u32_f32_e32 v8, v8
6229; TONGA-NEXT:    v_mul_lo_u32 v9, v9, v8
6230; TONGA-NEXT:    v_mul_hi_u32 v9, v8, v9
6231; TONGA-NEXT:    v_add_u32_e32 v8, vcc, v8, v9
6232; TONGA-NEXT:    v_mul_hi_u32 v8, v14, v8
6233; TONGA-NEXT:    v_mul_lo_u32 v8, v8, v10
6234; TONGA-NEXT:    v_sub_u32_e32 v8, vcc, v14, v8
6235; TONGA-NEXT:    v_subrev_u32_e32 v9, vcc, v10, v8
6236; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v10
6237; TONGA-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
6238; TONGA-NEXT:    v_sub_u32_e32 v9, vcc, v8, v10
6239; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v10
6240; TONGA-NEXT:    v_cndmask_b32_e32 v8, v8, v9, vcc
6241; TONGA-NEXT:    v_mov_b32_e32 v9, 0
6242; TONGA-NEXT:  .LBB12_3:
6243; TONGA-NEXT:    v_or_b32_e32 v11, v17, v13
6244; TONGA-NEXT:    v_mov_b32_e32 v10, 0
6245; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
6246; TONGA-NEXT:    s_cbranch_vccz .LBB12_14
6247; TONGA-NEXT:  ; %bb.4:
6248; TONGA-NEXT:    v_ashrrev_i32_e32 v10, 31, v13
6249; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v12, v10
6250; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, v13, v10, vcc
6251; TONGA-NEXT:    v_xor_b32_e32 v15, v11, v10
6252; TONGA-NEXT:    v_xor_b32_e32 v20, v13, v10
6253; TONGA-NEXT:    v_cvt_f32_u32_e32 v10, v15
6254; TONGA-NEXT:    v_cvt_f32_u32_e32 v11, v20
6255; TONGA-NEXT:    v_sub_u32_e32 v21, vcc, 0, v15
6256; TONGA-NEXT:    v_subb_u32_e32 v22, vcc, 0, v20, vcc
6257; TONGA-NEXT:    v_madmk_f32 v10, v11, 0x4f800000, v10
6258; TONGA-NEXT:    v_rcp_f32_e32 v10, v10
6259; TONGA-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
6260; TONGA-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v10
6261; TONGA-NEXT:    v_trunc_f32_e32 v11, v11
6262; TONGA-NEXT:    v_madmk_f32 v10, v11, 0xcf800000, v10
6263; TONGA-NEXT:    v_cvt_u32_f32_e32 v18, v11
6264; TONGA-NEXT:    v_cvt_u32_f32_e32 v19, v10
6265; TONGA-NEXT:    v_mul_lo_u32 v13, v21, v18
6266; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v21, v19, 0
6267; TONGA-NEXT:    v_mul_lo_u32 v14, v22, v19
6268; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v13
6269; TONGA-NEXT:    v_add_u32_e32 v23, vcc, v11, v14
6270; TONGA-NEXT:    v_mad_u64_u32 v[13:14], s[0:1], v19, v23, 0
6271; TONGA-NEXT:    v_mul_hi_u32 v11, v19, v10
6272; TONGA-NEXT:    v_add_u32_e32 v24, vcc, v11, v13
6273; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v18, v10, 0
6274; TONGA-NEXT:    v_addc_u32_e32 v25, vcc, 0, v14, vcc
6275; TONGA-NEXT:    v_mad_u64_u32 v[13:14], s[0:1], v18, v23, 0
6276; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v24, v10
6277; TONGA-NEXT:    v_addc_u32_e32 v10, vcc, v25, v11, vcc
6278; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
6279; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v10, v13
6280; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
6281; TONGA-NEXT:    v_add_u32_e32 v23, vcc, v19, v10
6282; TONGA-NEXT:    v_addc_u32_e32 v24, vcc, v18, v11, vcc
6283; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v21, v23, 0
6284; TONGA-NEXT:    v_mul_lo_u32 v18, v21, v24
6285; TONGA-NEXT:    v_mul_lo_u32 v19, v22, v23
6286; TONGA-NEXT:    v_mul_hi_u32 v21, v23, v10
6287; TONGA-NEXT:    v_mad_u64_u32 v[13:14], s[0:1], v24, v10, 0
6288; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v18, v11
6289; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v11, v19
6290; TONGA-NEXT:    v_mad_u64_u32 v[18:19], s[0:1], v23, v11, 0
6291; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v24, v11, 0
6292; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v21, v18
6293; TONGA-NEXT:    v_addc_u32_e32 v19, vcc, 0, v19, vcc
6294; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v18, v13
6295; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, v19, v14, vcc
6296; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
6297; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v13, v10
6298; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v11, vcc
6299; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v23, v10
6300; TONGA-NEXT:    v_addc_u32_e32 v14, vcc, v24, v11, vcc
6301; TONGA-NEXT:    v_ashrrev_i32_e32 v18, 31, v17
6302; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v16, v18
6303; TONGA-NEXT:    v_xor_b32_e32 v19, v10, v18
6304; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v19, v14, 0
6305; TONGA-NEXT:    v_mul_hi_u32 v21, v19, v13
6306; TONGA-NEXT:    v_addc_u32_e32 v17, vcc, v17, v18, vcc
6307; TONGA-NEXT:    v_xor_b32_e32 v17, v17, v18
6308; TONGA-NEXT:    v_add_u32_e32 v21, vcc, v21, v10
6309; TONGA-NEXT:    v_addc_u32_e32 v22, vcc, 0, v11, vcc
6310; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v17, v13, 0
6311; TONGA-NEXT:    v_mad_u64_u32 v[13:14], s[0:1], v17, v14, 0
6312; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v21, v10
6313; TONGA-NEXT:    v_addc_u32_e32 v10, vcc, v22, v11, vcc
6314; TONGA-NEXT:    v_addc_u32_e32 v11, vcc, 0, v14, vcc
6315; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v10, v13
6316; TONGA-NEXT:    v_addc_u32_e32 v10, vcc, 0, v11, vcc
6317; TONGA-NEXT:    v_mul_lo_u32 v14, v15, v10
6318; TONGA-NEXT:    v_mad_u64_u32 v[10:11], s[0:1], v15, v13, 0
6319; TONGA-NEXT:    v_mul_lo_u32 v13, v20, v13
6320; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v14, v11
6321; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v13, v11
6322; TONGA-NEXT:    v_sub_u32_e32 v13, vcc, v17, v11
6323; TONGA-NEXT:    v_sub_u32_e32 v10, vcc, v19, v10
6324; TONGA-NEXT:    v_subb_u32_e64 v13, s[0:1], v13, v20, vcc
6325; TONGA-NEXT:    v_sub_u32_e64 v14, s[0:1], v10, v15
6326; TONGA-NEXT:    v_subbrev_u32_e64 v19, s[2:3], 0, v13, s[0:1]
6327; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v19, v20
6328; TONGA-NEXT:    v_cndmask_b32_e64 v21, 0, -1, s[2:3]
6329; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v15
6330; TONGA-NEXT:    v_subb_u32_e32 v11, vcc, v17, v11, vcc
6331; TONGA-NEXT:    v_cndmask_b32_e64 v22, 0, -1, s[2:3]
6332; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v19, v20
6333; TONGA-NEXT:    v_subb_u32_e64 v13, s[0:1], v13, v20, s[0:1]
6334; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v11, v20
6335; TONGA-NEXT:    v_cndmask_b32_e64 v21, v21, v22, s[2:3]
6336; TONGA-NEXT:    v_sub_u32_e64 v22, s[0:1], v14, v15
6337; TONGA-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
6338; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v15
6339; TONGA-NEXT:    v_subbrev_u32_e64 v13, s[0:1], 0, v13, s[0:1]
6340; TONGA-NEXT:    v_cndmask_b32_e64 v15, 0, -1, vcc
6341; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v11, v20
6342; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v21
6343; TONGA-NEXT:    v_cndmask_b32_e32 v15, v17, v15, vcc
6344; TONGA-NEXT:    v_cndmask_b32_e64 v14, v14, v22, s[0:1]
6345; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v15
6346; TONGA-NEXT:    v_cndmask_b32_e64 v13, v19, v13, s[0:1]
6347; TONGA-NEXT:    v_cndmask_b32_e32 v10, v10, v14, vcc
6348; TONGA-NEXT:    v_cndmask_b32_e32 v11, v11, v13, vcc
6349; TONGA-NEXT:    v_xor_b32_e32 v10, v10, v18
6350; TONGA-NEXT:    v_xor_b32_e32 v11, v11, v18
6351; TONGA-NEXT:    v_sub_u32_e32 v10, vcc, v10, v18
6352; TONGA-NEXT:    v_subb_u32_e32 v11, vcc, v11, v18, vcc
6353; TONGA-NEXT:    s_cbranch_execnz .LBB12_6
6354; TONGA-NEXT:  .LBB12_5:
6355; TONGA-NEXT:    v_cvt_f32_u32_e32 v10, v12
6356; TONGA-NEXT:    v_sub_u32_e32 v11, vcc, 0, v12
6357; TONGA-NEXT:    v_rcp_iflag_f32_e32 v10, v10
6358; TONGA-NEXT:    v_mul_f32_e32 v10, 0x4f7ffffe, v10
6359; TONGA-NEXT:    v_cvt_u32_f32_e32 v10, v10
6360; TONGA-NEXT:    v_mul_lo_u32 v11, v11, v10
6361; TONGA-NEXT:    v_mul_hi_u32 v11, v10, v11
6362; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v10, v11
6363; TONGA-NEXT:    v_mul_hi_u32 v10, v16, v10
6364; TONGA-NEXT:    v_mul_lo_u32 v10, v10, v12
6365; TONGA-NEXT:    v_sub_u32_e32 v10, vcc, v16, v10
6366; TONGA-NEXT:    v_subrev_u32_e32 v11, vcc, v12, v10
6367; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
6368; TONGA-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
6369; TONGA-NEXT:    v_subrev_u32_e32 v11, vcc, v12, v10
6370; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v10, v12
6371; TONGA-NEXT:    v_cndmask_b32_e32 v10, v10, v11, vcc
6372; TONGA-NEXT:    v_mov_b32_e32 v11, 0
6373; TONGA-NEXT:  .LBB12_6:
6374; TONGA-NEXT:    s_waitcnt vmcnt(0)
6375; TONGA-NEXT:    v_or_b32_e32 v13, v5, v1
6376; TONGA-NEXT:    v_mov_b32_e32 v12, 0
6377; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
6378; TONGA-NEXT:    s_cbranch_vccz .LBB12_15
6379; TONGA-NEXT:  ; %bb.7:
6380; TONGA-NEXT:    v_ashrrev_i32_e32 v12, 31, v1
6381; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v0, v12
6382; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, v1, v12, vcc
6383; TONGA-NEXT:    v_xor_b32_e32 v18, v13, v12
6384; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v12
6385; TONGA-NEXT:    v_cvt_f32_u32_e32 v12, v18
6386; TONGA-NEXT:    v_cvt_f32_u32_e32 v13, v1
6387; TONGA-NEXT:    v_sub_u32_e32 v19, vcc, 0, v18
6388; TONGA-NEXT:    v_subb_u32_e32 v20, vcc, 0, v1, vcc
6389; TONGA-NEXT:    v_madmk_f32 v12, v13, 0x4f800000, v12
6390; TONGA-NEXT:    v_rcp_f32_e32 v12, v12
6391; TONGA-NEXT:    v_mul_f32_e32 v12, 0x5f7ffffc, v12
6392; TONGA-NEXT:    v_mul_f32_e32 v13, 0x2f800000, v12
6393; TONGA-NEXT:    v_trunc_f32_e32 v13, v13
6394; TONGA-NEXT:    v_madmk_f32 v12, v13, 0xcf800000, v12
6395; TONGA-NEXT:    v_cvt_u32_f32_e32 v16, v13
6396; TONGA-NEXT:    v_cvt_u32_f32_e32 v17, v12
6397; TONGA-NEXT:    v_mul_lo_u32 v14, v19, v16
6398; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v19, v17, 0
6399; TONGA-NEXT:    v_mul_lo_u32 v15, v20, v17
6400; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v13, v14
6401; TONGA-NEXT:    v_add_u32_e32 v15, vcc, v13, v15
6402; TONGA-NEXT:    v_mad_u64_u32 v[13:14], s[0:1], v17, v15, 0
6403; TONGA-NEXT:    v_mul_hi_u32 v21, v17, v12
6404; TONGA-NEXT:    v_add_u32_e32 v21, vcc, v21, v13
6405; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v16, v12, 0
6406; TONGA-NEXT:    v_addc_u32_e32 v22, vcc, 0, v14, vcc
6407; TONGA-NEXT:    v_mad_u64_u32 v[14:15], s[0:1], v16, v15, 0
6408; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v21, v12
6409; TONGA-NEXT:    v_addc_u32_e32 v12, vcc, v22, v13, vcc
6410; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v15, vcc
6411; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v12, v14
6412; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
6413; TONGA-NEXT:    v_add_u32_e32 v21, vcc, v17, v12
6414; TONGA-NEXT:    v_addc_u32_e32 v22, vcc, v16, v13, vcc
6415; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v19, v21, 0
6416; TONGA-NEXT:    v_mul_lo_u32 v16, v19, v22
6417; TONGA-NEXT:    v_mul_lo_u32 v17, v20, v21
6418; TONGA-NEXT:    v_mul_hi_u32 v19, v21, v12
6419; TONGA-NEXT:    v_mad_u64_u32 v[14:15], s[0:1], v22, v12, 0
6420; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v16, v13
6421; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v13, v17
6422; TONGA-NEXT:    v_mad_u64_u32 v[16:17], s[0:1], v21, v13, 0
6423; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v22, v13, 0
6424; TONGA-NEXT:    v_add_u32_e32 v16, vcc, v19, v16
6425; TONGA-NEXT:    v_addc_u32_e32 v17, vcc, 0, v17, vcc
6426; TONGA-NEXT:    v_add_u32_e32 v14, vcc, v16, v14
6427; TONGA-NEXT:    v_addc_u32_e32 v14, vcc, v17, v15, vcc
6428; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
6429; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v14, v12
6430; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v13, vcc
6431; TONGA-NEXT:    v_add_u32_e32 v14, vcc, v21, v12
6432; TONGA-NEXT:    v_addc_u32_e32 v15, vcc, v22, v13, vcc
6433; TONGA-NEXT:    v_ashrrev_i32_e32 v16, 31, v5
6434; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v4, v16
6435; TONGA-NEXT:    v_xor_b32_e32 v17, v12, v16
6436; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v17, v15, 0
6437; TONGA-NEXT:    v_mul_hi_u32 v19, v17, v14
6438; TONGA-NEXT:    v_addc_u32_e32 v5, vcc, v5, v16, vcc
6439; TONGA-NEXT:    v_xor_b32_e32 v5, v5, v16
6440; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v19, v12
6441; TONGA-NEXT:    v_addc_u32_e32 v20, vcc, 0, v13, vcc
6442; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v5, v14, 0
6443; TONGA-NEXT:    v_mad_u64_u32 v[14:15], s[0:1], v5, v15, 0
6444; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v19, v12
6445; TONGA-NEXT:    v_addc_u32_e32 v12, vcc, v20, v13, vcc
6446; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v15, vcc
6447; TONGA-NEXT:    v_add_u32_e32 v14, vcc, v12, v14
6448; TONGA-NEXT:    v_addc_u32_e32 v12, vcc, 0, v13, vcc
6449; TONGA-NEXT:    v_mul_lo_u32 v15, v18, v12
6450; TONGA-NEXT:    v_mad_u64_u32 v[12:13], s[0:1], v18, v14, 0
6451; TONGA-NEXT:    v_mul_lo_u32 v14, v1, v14
6452; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v15, v13
6453; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v14, v13
6454; TONGA-NEXT:    v_sub_u32_e32 v14, vcc, v5, v13
6455; TONGA-NEXT:    v_sub_u32_e32 v12, vcc, v17, v12
6456; TONGA-NEXT:    v_subb_u32_e64 v14, s[0:1], v14, v1, vcc
6457; TONGA-NEXT:    v_sub_u32_e64 v15, s[0:1], v12, v18
6458; TONGA-NEXT:    v_subbrev_u32_e64 v17, s[2:3], 0, v14, s[0:1]
6459; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v17, v1
6460; TONGA-NEXT:    v_cndmask_b32_e64 v19, 0, -1, s[2:3]
6461; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v15, v18
6462; TONGA-NEXT:    v_cndmask_b32_e64 v20, 0, -1, s[2:3]
6463; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v17, v1
6464; TONGA-NEXT:    v_subb_u32_e64 v14, s[0:1], v14, v1, s[0:1]
6465; TONGA-NEXT:    v_cndmask_b32_e64 v19, v19, v20, s[2:3]
6466; TONGA-NEXT:    v_sub_u32_e64 v20, s[0:1], v15, v18
6467; TONGA-NEXT:    v_subb_u32_e32 v5, vcc, v5, v13, vcc
6468; TONGA-NEXT:    v_subbrev_u32_e64 v14, s[0:1], 0, v14, s[0:1]
6469; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v1
6470; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v19
6471; TONGA-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc
6472; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v12, v18
6473; TONGA-NEXT:    v_cndmask_b32_e64 v14, v17, v14, s[0:1]
6474; TONGA-NEXT:    v_cndmask_b32_e64 v17, 0, -1, vcc
6475; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v1
6476; TONGA-NEXT:    v_cndmask_b32_e32 v1, v13, v17, vcc
6477; TONGA-NEXT:    v_cndmask_b32_e64 v15, v15, v20, s[0:1]
6478; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
6479; TONGA-NEXT:    v_cndmask_b32_e32 v1, v5, v14, vcc
6480; TONGA-NEXT:    v_cndmask_b32_e32 v5, v12, v15, vcc
6481; TONGA-NEXT:    v_xor_b32_e32 v5, v5, v16
6482; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v16
6483; TONGA-NEXT:    v_sub_u32_e32 v12, vcc, v5, v16
6484; TONGA-NEXT:    v_subb_u32_e32 v13, vcc, v1, v16, vcc
6485; TONGA-NEXT:    s_cbranch_execnz .LBB12_9
6486; TONGA-NEXT:  .LBB12_8:
6487; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, v0
6488; TONGA-NEXT:    v_sub_u32_e32 v5, vcc, 0, v0
6489; TONGA-NEXT:    v_mov_b32_e32 v13, 0
6490; TONGA-NEXT:    v_rcp_iflag_f32_e32 v1, v1
6491; TONGA-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
6492; TONGA-NEXT:    v_cvt_u32_f32_e32 v1, v1
6493; TONGA-NEXT:    v_mul_lo_u32 v5, v5, v1
6494; TONGA-NEXT:    v_mul_hi_u32 v5, v1, v5
6495; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
6496; TONGA-NEXT:    v_mul_hi_u32 v1, v4, v1
6497; TONGA-NEXT:    v_mul_lo_u32 v1, v1, v0
6498; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
6499; TONGA-NEXT:    v_subrev_u32_e32 v4, vcc, v0, v1
6500; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
6501; TONGA-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
6502; TONGA-NEXT:    v_subrev_u32_e32 v4, vcc, v0, v1
6503; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v0
6504; TONGA-NEXT:    v_cndmask_b32_e32 v12, v1, v4, vcc
6505; TONGA-NEXT:  .LBB12_9:
6506; TONGA-NEXT:    v_or_b32_e32 v1, v7, v3
6507; TONGA-NEXT:    v_mov_b32_e32 v0, 0
6508; TONGA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
6509; TONGA-NEXT:    s_cbranch_vccz .LBB12_16
6510; TONGA-NEXT:  ; %bb.10:
6511; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 31, v3
6512; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v2, v0
6513; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v3, v0, vcc
6514; TONGA-NEXT:    v_xor_b32_e32 v5, v1, v0
6515; TONGA-NEXT:    v_xor_b32_e32 v16, v3, v0
6516; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, v5
6517; TONGA-NEXT:    v_cvt_f32_u32_e32 v1, v16
6518; TONGA-NEXT:    v_sub_u32_e32 v17, vcc, 0, v5
6519; TONGA-NEXT:    v_subb_u32_e32 v18, vcc, 0, v16, vcc
6520; TONGA-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
6521; TONGA-NEXT:    v_rcp_f32_e32 v0, v0
6522; TONGA-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
6523; TONGA-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
6524; TONGA-NEXT:    v_trunc_f32_e32 v1, v1
6525; TONGA-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
6526; TONGA-NEXT:    v_cvt_u32_f32_e32 v14, v1
6527; TONGA-NEXT:    v_cvt_u32_f32_e32 v15, v0
6528; TONGA-NEXT:    v_mul_lo_u32 v3, v17, v14
6529; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v17, v15, 0
6530; TONGA-NEXT:    v_mul_lo_u32 v4, v18, v15
6531; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
6532; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v1, v4
6533; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v15, v19, 0
6534; TONGA-NEXT:    v_mul_hi_u32 v1, v15, v0
6535; TONGA-NEXT:    v_add_u32_e32 v20, vcc, v1, v3
6536; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v14, v0, 0
6537; TONGA-NEXT:    v_addc_u32_e32 v21, vcc, 0, v4, vcc
6538; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v14, v19, 0
6539; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v20, v0
6540; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, v21, v1, vcc
6541; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
6542; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
6543; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6544; TONGA-NEXT:    v_add_u32_e32 v19, vcc, v15, v0
6545; TONGA-NEXT:    v_addc_u32_e32 v20, vcc, v14, v1, vcc
6546; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v17, v19, 0
6547; TONGA-NEXT:    v_mul_lo_u32 v14, v17, v20
6548; TONGA-NEXT:    v_mul_lo_u32 v15, v18, v19
6549; TONGA-NEXT:    v_mul_hi_u32 v17, v19, v0
6550; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v20, v0, 0
6551; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v14, v1
6552; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v15
6553; TONGA-NEXT:    v_mad_u64_u32 v[14:15], s[0:1], v19, v1, 0
6554; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v20, v1, 0
6555; TONGA-NEXT:    v_add_u32_e32 v14, vcc, v17, v14
6556; TONGA-NEXT:    v_addc_u32_e32 v15, vcc, 0, v15, vcc
6557; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v14, v3
6558; TONGA-NEXT:    v_addc_u32_e32 v3, vcc, v15, v4, vcc
6559; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6560; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v3, v0
6561; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6562; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v19, v0
6563; TONGA-NEXT:    v_addc_u32_e32 v4, vcc, v20, v1, vcc
6564; TONGA-NEXT:    v_ashrrev_i32_e32 v15, 31, v7
6565; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v6, v15
6566; TONGA-NEXT:    v_xor_b32_e32 v14, v0, v15
6567; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v14, v4, 0
6568; TONGA-NEXT:    v_mul_hi_u32 v17, v14, v3
6569; TONGA-NEXT:    v_addc_u32_e32 v7, vcc, v7, v15, vcc
6570; TONGA-NEXT:    v_xor_b32_e32 v7, v7, v15
6571; TONGA-NEXT:    v_add_u32_e32 v17, vcc, v17, v0
6572; TONGA-NEXT:    v_addc_u32_e32 v18, vcc, 0, v1, vcc
6573; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v7, v3, 0
6574; TONGA-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], v7, v4, 0
6575; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v17, v0
6576; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, v18, v1, vcc
6577; TONGA-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
6578; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v0, v3
6579; TONGA-NEXT:    v_addc_u32_e32 v0, vcc, 0, v1, vcc
6580; TONGA-NEXT:    v_mul_lo_u32 v4, v5, v0
6581; TONGA-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], v5, v3, 0
6582; TONGA-NEXT:    v_mul_lo_u32 v3, v16, v3
6583; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v4, v1
6584; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v3, v1
6585; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, v7, v1
6586; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v14, v0
6587; TONGA-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v16, vcc
6588; TONGA-NEXT:    v_sub_u32_e64 v4, s[0:1], v0, v5
6589; TONGA-NEXT:    v_subbrev_u32_e64 v14, s[2:3], 0, v3, s[0:1]
6590; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v16
6591; TONGA-NEXT:    v_cndmask_b32_e64 v17, 0, -1, s[2:3]
6592; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v4, v5
6593; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v7, v1, vcc
6594; TONGA-NEXT:    v_cndmask_b32_e64 v18, 0, -1, s[2:3]
6595; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], v14, v16
6596; TONGA-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v16, s[0:1]
6597; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v16
6598; TONGA-NEXT:    v_cndmask_b32_e64 v17, v17, v18, s[2:3]
6599; TONGA-NEXT:    v_sub_u32_e64 v18, s[0:1], v4, v5
6600; TONGA-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
6601; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
6602; TONGA-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
6603; TONGA-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
6604; TONGA-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v16
6605; TONGA-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v17
6606; TONGA-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
6607; TONGA-NEXT:    v_cndmask_b32_e64 v4, v4, v18, s[0:1]
6608; TONGA-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
6609; TONGA-NEXT:    v_cndmask_b32_e64 v3, v14, v3, s[0:1]
6610; TONGA-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
6611; TONGA-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6612; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v15
6613; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v15
6614; TONGA-NEXT:    v_sub_u32_e32 v14, vcc, v0, v15
6615; TONGA-NEXT:    v_subb_u32_e32 v15, vcc, v1, v15, vcc
6616; TONGA-NEXT:    s_cbranch_execnz .LBB12_12
6617; TONGA-NEXT:  .LBB12_11:
6618; TONGA-NEXT:    v_cvt_f32_u32_e32 v0, v2
6619; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, 0, v2
6620; TONGA-NEXT:    v_mov_b32_e32 v15, 0
6621; TONGA-NEXT:    v_rcp_iflag_f32_e32 v0, v0
6622; TONGA-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
6623; TONGA-NEXT:    v_cvt_u32_f32_e32 v0, v0
6624; TONGA-NEXT:    v_mul_lo_u32 v1, v1, v0
6625; TONGA-NEXT:    v_mul_hi_u32 v1, v0, v1
6626; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
6627; TONGA-NEXT:    v_mul_hi_u32 v0, v6, v0
6628; TONGA-NEXT:    v_mul_lo_u32 v0, v0, v2
6629; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v6, v0
6630; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v2, v0
6631; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
6632; TONGA-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
6633; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v2, v0
6634; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
6635; TONGA-NEXT:    v_cndmask_b32_e32 v14, v0, v1, vcc
6636; TONGA-NEXT:  .LBB12_12:
6637; TONGA-NEXT:    v_mov_b32_e32 v0, s4
6638; TONGA-NEXT:    v_mov_b32_e32 v1, s5
6639; TONGA-NEXT:    s_add_u32 s0, s4, 16
6640; TONGA-NEXT:    flat_store_dwordx4 v[0:1], v[8:11]
6641; TONGA-NEXT:    s_addc_u32 s1, s5, 0
6642; TONGA-NEXT:    v_mov_b32_e32 v0, s0
6643; TONGA-NEXT:    v_mov_b32_e32 v1, s1
6644; TONGA-NEXT:    flat_store_dwordx4 v[0:1], v[12:15]
6645; TONGA-NEXT:    s_endpgm
6646; TONGA-NEXT:  .LBB12_13:
6647; TONGA-NEXT:    ; implicit-def: $vgpr8_vgpr9
6648; TONGA-NEXT:    s_branch .LBB12_2
6649; TONGA-NEXT:  .LBB12_14:
6650; TONGA-NEXT:    s_branch .LBB12_5
6651; TONGA-NEXT:  .LBB12_15:
6652; TONGA-NEXT:    ; implicit-def: $vgpr12_vgpr13
6653; TONGA-NEXT:    s_branch .LBB12_8
6654; TONGA-NEXT:  .LBB12_16:
6655; TONGA-NEXT:    s_branch .LBB12_11
6656;
6657; EG-LABEL: srem_v4i64:
6658; EG:       ; %bb.0:
6659; EG-NEXT:    ALU 0, @34, KC0[CB0:0-32], KC1[]
6660; EG-NEXT:    TEX 1 @26
6661; EG-NEXT:    ALU 114, @35, KC0[], KC1[]
6662; EG-NEXT:    ALU 115, @150, KC0[], KC1[]
6663; EG-NEXT:    ALU 115, @266, KC0[], KC1[]
6664; EG-NEXT:    ALU 111, @382, KC0[], KC1[]
6665; EG-NEXT:    TEX 1 @30
6666; EG-NEXT:    ALU 114, @494, KC0[], KC1[]
6667; EG-NEXT:    ALU 113, @609, KC0[], KC1[]
6668; EG-NEXT:    ALU 114, @723, KC0[], KC1[]
6669; EG-NEXT:    ALU 113, @838, KC0[], KC1[]
6670; EG-NEXT:    ALU 114, @952, KC0[], KC1[]
6671; EG-NEXT:    ALU 113, @1067, KC0[], KC1[]
6672; EG-NEXT:    ALU 114, @1181, KC0[], KC1[]
6673; EG-NEXT:    ALU 113, @1296, KC0[], KC1[]
6674; EG-NEXT:    ALU 114, @1410, KC0[], KC1[]
6675; EG-NEXT:    ALU 114, @1525, KC0[], KC1[]
6676; EG-NEXT:    ALU 114, @1640, KC0[], KC1[]
6677; EG-NEXT:    ALU 115, @1755, KC0[], KC1[]
6678; EG-NEXT:    ALU 113, @1871, KC0[], KC1[]
6679; EG-NEXT:    ALU 112, @1985, KC0[], KC1[]
6680; EG-NEXT:    ALU 99, @2098, KC0[CB0:0-32], KC1[]
6681; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T1.X, 0
6682; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
6683; EG-NEXT:    CF_END
6684; EG-NEXT:    PAD
6685; EG-NEXT:    Fetch clause starting at 26:
6686; EG-NEXT:     VTX_READ_128 T1.XYZW, T2.X, 32, #1
6687; EG-NEXT:     VTX_READ_128 T0.XYZW, T2.X, 0, #1
6688; EG-NEXT:    Fetch clause starting at 30:
6689; EG-NEXT:     VTX_READ_128 T9.XYZW, T2.X, 16, #1
6690; EG-NEXT:     VTX_READ_128 T10.XYZW, T2.X, 48, #1
6691; EG-NEXT:    ALU clause starting at 34:
6692; EG-NEXT:     MOV * T2.X, KC0[2].Z,
6693; EG-NEXT:    ALU clause starting at 35:
6694; EG-NEXT:     ASHR * T3.W, T1.Y, literal.x,
6695; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6696; EG-NEXT:     ADD_INT * T2.W, T1.X, PV.W,
6697; EG-NEXT:     XOR_INT * T7.W, PV.W, T3.W,
6698; EG-NEXT:     SUB_INT T2.Z, 0.0, PV.W,
6699; EG-NEXT:     ASHR T2.W, T0.Y, literal.x,
6700; EG-NEXT:     RECIP_UINT * T2.Y, PV.W,
6701; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6702; EG-NEXT:     ADD_INT T3.Z, T0.Y, PV.W,
6703; EG-NEXT:     ADDC_UINT T4.W, T0.X, PV.W,
6704; EG-NEXT:     MULLO_INT * T0.Y, PV.Z, PS,
6705; EG-NEXT:     ADD_INT T4.W, PV.Z, PV.W,
6706; EG-NEXT:     MULHI * T0.Y, T2.Y, PS,
6707; EG-NEXT:     ADD_INT T5.W, T2.Y, PS,
6708; EG-NEXT:     XOR_INT * T4.W, PV.W, T2.W,
6709; EG-NEXT:     MULHI * T0.Y, PS, PV.W,
6710; EG-NEXT:     MULLO_INT * T0.Y, PS, T7.W,
6711; EG-NEXT:     SUB_INT * T5.W, T4.W, PS,
6712; EG-NEXT:     SETGE_UINT T6.W, PV.W, T7.W,
6713; EG-NEXT:     SUB_INT * T8.W, PV.W, T7.W,
6714; EG-NEXT:     CNDE_INT T2.Z, PV.W, T5.W, PS, BS:VEC_021/SCL_122
6715; EG-NEXT:     ADD_INT T5.W, T1.Y, T3.W,
6716; EG-NEXT:     ADDC_UINT * T6.W, T1.X, T3.W,
6717; EG-NEXT:     ADD_INT T3.Z, PV.W, PS,
6718; EG-NEXT:     SETGE_UINT T5.W, PV.Z, T7.W,
6719; EG-NEXT:     SUB_INT * T6.W, PV.Z, T7.W,
6720; EG-NEXT:     ADD_INT T4.Z, T0.X, T2.W, BS:VEC_021/SCL_122
6721; EG-NEXT:     CNDE_INT T5.W, PV.W, T2.Z, PS,
6722; EG-NEXT:     XOR_INT * T6.W, PV.Z, T3.W,
6723; EG-NEXT:     CNDE_INT T3.W, PS, PV.W, T4.W,
6724; EG-NEXT:     XOR_INT * T8.W, PV.Z, T2.W,
6725; EG-NEXT:     BIT_ALIGN_INT T4.W, PV.W, PS, literal.x,
6726; EG-NEXT:     LSHR * T3.W, PV.W, literal.x,
6727; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6728; EG-NEXT:     SETE_INT T2.Z, PS, T6.W, BS:VEC_021/SCL_122
6729; EG-NEXT:     SETGE_UINT T5.W, PS, T6.W, BS:VEC_021/SCL_122
6730; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T7.W,
6731; EG-NEXT:     CNDE_INT T0.Y, PV.Z, PV.W, PS,
6732; EG-NEXT:     SUB_INT * T2.Z, T4.W, T7.W,
6733; EG-NEXT:     SUB_INT * T5.W, T3.W, T6.W,
6734; EG-NEXT:     SUBB_UINT * T9.W, T4.W, T7.W,
6735; EG-NEXT:     SUB_INT T5.W, T5.W, PV.W,
6736; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6737; EG-NEXT:     LSHL T2.Z, PS, 1,
6738; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6739; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6740; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
6741; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6742; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6743; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6744; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6745; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6746; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6747; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6748; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6749; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6750; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6751; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6752; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6753; EG-NEXT:     LSHL T2.Z, PS, 1,
6754; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6755; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6756; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
6757; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6758; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6759; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6760; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6761; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6762; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6763; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6764; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6765; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6766; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6767; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6768; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6769; EG-NEXT:     LSHL T2.Z, PS, 1,
6770; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6771; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6772; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
6773; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6774; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6775; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6776; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6777; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6778; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6779; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6780; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6781; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6782; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6783; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6784; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6785; EG-NEXT:     LSHL T2.Z, PS, 1,
6786; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6787; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6788; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
6789; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6790; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6791; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6792; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6793; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6794; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6795; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6796; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6797; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6798; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6799; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6800; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6801; EG-NEXT:     LSHL T2.Z, PS, 1,
6802; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6803; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6804; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
6805; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6806; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6807; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6808; EG-NEXT:     SETGE_UINT * T2.Z, PS, T7.W,
6809; EG-NEXT:    ALU clause starting at 150:
6810; EG-NEXT:     SETE_INT T5.W, T3.W, T6.W,
6811; EG-NEXT:     SETGE_UINT * T9.W, T3.W, T6.W,
6812; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, T2.Z,
6813; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6814; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6815; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6816; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6817; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6818; EG-NEXT:     LSHL T2.Z, PS, 1,
6819; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6820; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6821; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
6822; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6823; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6824; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6825; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6826; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6827; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6828; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6829; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6830; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6831; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6832; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6833; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6834; EG-NEXT:     LSHL T2.Z, PS, 1,
6835; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6836; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6837; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
6838; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6839; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6840; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6841; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6842; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6843; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6844; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6845; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6846; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6847; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6848; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6849; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6850; EG-NEXT:     LSHL T2.Z, PS, 1,
6851; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6852; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6853; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
6854; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6855; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6856; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6857; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6858; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6859; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6860; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6861; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6862; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6863; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6864; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6865; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6866; EG-NEXT:     LSHL T2.Z, PS, 1,
6867; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6868; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6869; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
6870; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6871; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6872; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6873; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6874; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6875; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6876; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6877; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6878; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6879; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6880; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6881; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6882; EG-NEXT:     LSHL T2.Z, PS, 1,
6883; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6884; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6885; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
6886; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6887; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6888; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6889; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6890; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6891; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6892; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6893; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6894; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6895; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6896; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6897; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6898; EG-NEXT:     LSHL T2.Z, PS, 1,
6899; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6900; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6901; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
6902; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6903; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6904; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6905; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6906; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6907; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6908; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6909; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6910; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6911; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6912; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6913; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6914; EG-NEXT:     LSHL T2.Z, PS, 1,
6915; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6916; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6917; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
6918; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6919; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6920; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6921; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6922; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6923; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6924; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6925; EG-NEXT:     SUB_INT * T2.Z, T4.W, T7.W,
6926; EG-NEXT:    ALU clause starting at 266:
6927; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6928; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6929; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6930; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6931; EG-NEXT:     LSHL T2.Z, PS, 1,
6932; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6933; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6934; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
6935; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6936; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6937; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6938; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6939; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6940; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6941; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6942; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6943; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6944; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6945; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6946; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6947; EG-NEXT:     LSHL T2.Z, PS, 1,
6948; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6949; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6950; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
6951; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6952; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6953; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6954; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6955; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6956; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6957; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6958; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6959; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6960; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6961; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6962; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6963; EG-NEXT:     LSHL T2.Z, PS, 1,
6964; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6965; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6966; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
6967; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6968; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6969; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6970; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6971; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6972; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6973; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6974; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6975; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6976; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6977; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6978; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6979; EG-NEXT:     LSHL T2.Z, PS, 1,
6980; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6981; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6982; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
6983; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
6984; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
6985; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
6986; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
6987; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
6988; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
6989; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
6990; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
6991; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
6992; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
6993; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
6994; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
6995; EG-NEXT:     LSHL T2.Z, PS, 1,
6996; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
6997; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
6998; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
6999; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7000; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7001; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7002; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7003; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7004; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7005; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7006; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7007; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7008; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7009; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7010; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7011; EG-NEXT:     LSHL T2.Z, PS, 1,
7012; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7013; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7014; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
7015; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7016; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7017; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7018; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7019; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7020; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7021; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7022; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7023; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7024; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7025; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7026; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7027; EG-NEXT:     LSHL T2.Z, PS, 1,
7028; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7029; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7030; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
7031; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7032; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7033; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7034; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7035; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7036; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7037; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7038; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7039; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7040; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7041; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7042; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7043; EG-NEXT:    ALU clause starting at 382:
7044; EG-NEXT:     LSHL T2.Z, T4.W, 1,
7045; EG-NEXT:     BFE_UINT * T9.W, T8.W, literal.x, 1, BS:VEC_120/SCL_212
7046; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
7047; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, T5.W,
7048; EG-NEXT:     BIT_ALIGN_INT T3.W, PV.W, T4.W, literal.x, BS:VEC_021/SCL_122
7049; EG-NEXT:     OR_INT * T4.W, T2.Z, T9.W,
7050; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7051; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7052; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7053; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7054; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7055; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7056; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7057; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7058; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7059; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7060; EG-NEXT:     LSHL T2.Z, PS, 1,
7061; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7062; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7063; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
7064; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7065; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7066; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7067; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7068; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7069; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7070; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7071; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7072; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7073; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7074; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7075; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7076; EG-NEXT:     LSHL T2.Z, PS, 1,
7077; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7078; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7079; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
7080; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7081; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7082; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7083; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7084; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7085; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7086; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7087; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7088; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7089; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7090; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7091; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7092; EG-NEXT:     LSHL T2.Z, PS, 1,
7093; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7094; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7095; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
7096; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7097; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7098; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7099; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7100; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7101; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7102; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7103; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7104; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7105; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7106; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7107; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7108; EG-NEXT:     LSHL T2.Z, PS, 1,
7109; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7110; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7111; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
7112; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7113; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7114; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7115; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7116; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7117; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7118; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7119; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7120; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7121; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7122; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7123; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7124; EG-NEXT:     LSHL T2.Z, PS, 1,
7125; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7126; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7127; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
7128; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7129; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7130; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7131; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7132; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7133; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7134; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7135; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7136; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7137; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7138; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7139; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7140; EG-NEXT:     LSHL T2.Z, PS, 1,
7141; EG-NEXT:     BFE_UINT T9.W, T8.W, literal.x, 1,
7142; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7143; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
7144; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7145; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7146; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7147; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7148; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7149; EG-NEXT:     SETGE_UINT * T9.W, PV.W, T6.W,
7150; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7151; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7152; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7153; EG-NEXT:     SUB_INT * T9.W, T3.W, T6.W,
7154; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7155; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7156; EG-NEXT:    ALU clause starting at 494:
7157; EG-NEXT:     LSHL T2.Z, T4.W, 1,
7158; EG-NEXT:     BFE_UINT * T11.W, T8.W, literal.x, 1, BS:VEC_120/SCL_212
7159; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
7160; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, T5.W,
7161; EG-NEXT:     BIT_ALIGN_INT T3.W, PV.W, T4.W, literal.x, BS:VEC_021/SCL_122
7162; EG-NEXT:     OR_INT * T4.W, T2.Z, T11.W,
7163; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7164; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7165; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7166; EG-NEXT:     SETGE_UINT * T11.W, PV.W, T6.W,
7167; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7168; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7169; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7170; EG-NEXT:     SUB_INT * T11.W, T3.W, T6.W,
7171; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7172; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7173; EG-NEXT:     LSHL T2.Z, PS, 1,
7174; EG-NEXT:     BFE_UINT T11.W, T8.W, literal.x, 1,
7175; EG-NEXT:     CNDE_INT * T3.W, T0.Y, T3.W, PV.W,
7176; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
7177; EG-NEXT:     BIT_ALIGN_INT T3.W, PS, T4.W, literal.x,
7178; EG-NEXT:     OR_INT * T4.W, PV.Z, PV.W,
7179; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7180; EG-NEXT:     SETGE_UINT T2.Z, PS, T7.W, BS:VEC_021/SCL_122
7181; EG-NEXT:     SETE_INT T5.W, PV.W, T6.W,
7182; EG-NEXT:     SETGE_UINT * T11.W, PV.W, T6.W,
7183; EG-NEXT:     CNDE_INT T0.Y, PV.W, PS, PV.Z,
7184; EG-NEXT:     SUB_INT T2.Z, T4.W, T7.W,
7185; EG-NEXT:     SUBB_UINT * T5.W, T4.W, T7.W,
7186; EG-NEXT:     SUB_INT * T11.W, T3.W, T6.W,
7187; EG-NEXT:     SUB_INT T5.W, PV.W, T5.W, BS:VEC_021/SCL_122
7188; EG-NEXT:     CNDE_INT * T4.W, T0.Y, T4.W, T2.Z,
7189; EG-NEXT:     LSHL T1.Y, PS, 1,
7190; EG-NEXT:     BFE_UINT T2.Z, T8.W, literal.x, 1,
7191; EG-NEXT:     CNDE_INT T3.W, T0.Y, T3.W, PV.W,
7192; EG-NEXT:     ASHR * T11.W, T10.Y, literal.y,
7193; EG-NEXT:    2(2.802597e-45), 31(4.344025e-44)
7194; EG-NEXT:     ADD_INT T3.Z, T10.X, PS,
7195; EG-NEXT:     BIT_ALIGN_INT T5.W, PV.W, T4.W, literal.x,
7196; EG-NEXT:     OR_INT * T12.W, PV.Y, PV.Z,
7197; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7198; EG-NEXT:     SETGE_UINT T0.Y, PS, T7.W, BS:VEC_021/SCL_122
7199; EG-NEXT:     SETE_INT T2.Z, PV.W, T6.W, BS:VEC_102/SCL_221
7200; EG-NEXT:     SETGE_UINT T3.W, PV.W, T6.W, BS:VEC_102/SCL_221
7201; EG-NEXT:     XOR_INT * T4.W, PV.Z, T11.W,
7202; EG-NEXT:     SUB_INT T13.W, 0.0, PS,
7203; EG-NEXT:     CNDE_INT * T14.W, PV.Z, PV.W, PV.Y,
7204; EG-NEXT:     SUB_INT T0.X, T12.W, T7.W,
7205; EG-NEXT:     SUBB_UINT * T0.Y, T12.W, T7.W,
7206; EG-NEXT:     SUB_INT T2.Z, T5.W, T6.W,
7207; EG-NEXT:     ASHR T3.W, T9.Y, literal.x,
7208; EG-NEXT:     RECIP_UINT * T1.X, T4.W,
7209; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7210; EG-NEXT:     ADD_INT T1.Y, T9.Y, PV.W,
7211; EG-NEXT:     SUB_INT T2.Z, PV.Z, T0.Y,
7212; EG-NEXT:     CNDE_INT T12.W, T14.W, T12.W, T0.X,
7213; EG-NEXT:     MULLO_INT * T0.X, T13.W, PS,
7214; EG-NEXT:     ADDC_UINT T2.X, T9.X, T3.W,
7215; EG-NEXT:     LSHL T0.Y, PV.W, 1,
7216; EG-NEXT:     BFE_UINT * T3.Z, T8.W, 1, 1,
7217; EG-NEXT:     CNDE_INT T5.W, T14.W, T5.W, T2.Z,
7218; EG-NEXT:     MULHI * T0.X, T1.X, T0.X,
7219; EG-NEXT:     ADD_INT T2.Y, T1.X, PS,
7220; EG-NEXT:     BIT_ALIGN_INT T4.Z, PV.W, T12.W, literal.x,
7221; EG-NEXT:     OR_INT T12.W, T0.Y, T3.Z,
7222; EG-NEXT:     ADD_INT * T5.W, T1.Y, T2.X,
7223; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7224; EG-NEXT:     XOR_INT T1.X, PS, T3.W,
7225; EG-NEXT:     ASHR T0.Y, T10.W, literal.x,
7226; EG-NEXT:     SETGE_UINT * T2.Z, PV.W, T7.W, BS:VEC_021/SCL_122
7227; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7228; EG-NEXT:     SETE_INT T5.W, T4.Z, T6.W,
7229; EG-NEXT:     SETGE_UINT * T13.W, T4.Z, T6.W,
7230; EG-NEXT:     CNDE_INT T1.Y, PV.W, PS, T2.Z,
7231; EG-NEXT:     SUB_INT T3.Z, T12.W, T7.W,
7232; EG-NEXT:     ADD_INT T5.W, T10.Z, T0.Y, BS:VEC_021/SCL_122
7233; EG-NEXT:     MULHI * T0.X, T1.X, T2.Y,
7234; EG-NEXT:     SUBB_UINT T2.X, T12.W, T7.W,
7235; EG-NEXT:     SUB_INT T2.Y, T4.Z, T6.W, BS:VEC_021/SCL_122
7236; EG-NEXT:     XOR_INT * T2.Z, PV.W, T0.Y,
7237; EG-NEXT:     ASHR T5.W, T9.W, literal.x,
7238; EG-NEXT:     MULLO_INT * T0.X, T0.X, T4.W,
7239; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7240; EG-NEXT:     ADD_INT T3.X, T9.W, PV.W,
7241; EG-NEXT:     SUB_INT T3.Y, 0.0, T2.Z,
7242; EG-NEXT:     SUB_INT T5.Z, T2.Y, T2.X,
7243; EG-NEXT:     CNDE_INT T9.W, T1.Y, T12.W, T3.Z, BS:VEC_120/SCL_212
7244; EG-NEXT:     RECIP_UINT * T2.X, T2.Z,
7245; EG-NEXT:     ADDC_UINT T4.X, T9.Z, T5.W,
7246; EG-NEXT:     LSHL T2.Y, PV.W, 1,
7247; EG-NEXT:     AND_INT T3.Z, T8.W, 1,
7248; EG-NEXT:     CNDE_INT T8.W, T1.Y, T4.Z, PV.Z,
7249; EG-NEXT:     MULLO_INT * T1.Y, PV.Y, PS,
7250; EG-NEXT:     BIT_ALIGN_INT T3.Y, PV.W, T9.W, literal.x,
7251; EG-NEXT:     OR_INT T5.Z, PV.Y, PV.Z,
7252; EG-NEXT:     ADD_INT T8.W, T3.X, PV.X,
7253; EG-NEXT:     MULHI * T1.Y, T2.X, PS,
7254; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7255; EG-NEXT:     ADD_INT T2.X, T2.X, PS,
7256; EG-NEXT:     XOR_INT T1.Y, PV.W, T5.W,
7257; EG-NEXT:     SETGE_UINT T3.Z, PV.Z, T7.W, BS:VEC_021/SCL_122
7258; EG-NEXT:     SETE_INT T8.W, PV.Y, T6.W, BS:VEC_102/SCL_221
7259; EG-NEXT:     SUB_INT * T9.W, T1.X, T0.X,
7260; EG-NEXT:     SETGE_UINT T0.X, T3.Y, T6.W, BS:VEC_021/SCL_122
7261; EG-NEXT:     SUBB_UINT T2.Y, T5.Z, T7.W, BS:VEC_102/SCL_221
7262; EG-NEXT:     SUB_INT T4.Z, T3.Y, T6.W, BS:VEC_021/SCL_122
7263; EG-NEXT:     SETGE_UINT T6.W, PS, T4.W,
7264; EG-NEXT:     SUB_INT * T12.W, PS, T4.W,
7265; EG-NEXT:     CNDE_INT T3.X, PV.W, T9.W, PS,
7266; EG-NEXT:     ADD_INT T4.Y, T10.Y, T11.W, BS:VEC_102/SCL_221
7267; EG-NEXT:     ADDC_UINT T6.Z, T10.X, T11.W, BS:VEC_102/SCL_221
7268; EG-NEXT:     SUB_INT T6.W, PV.Z, PV.Y,
7269; EG-NEXT:     CNDE_INT * T8.W, T8.W, PV.X, T3.Z,
7270; EG-NEXT:     CNDE_INT T0.X, PS, T3.Y, PV.W,
7271; EG-NEXT:     ADD_INT * T2.Y, PV.Y, PV.Z,
7272; EG-NEXT:    ALU clause starting at 609:
7273; EG-NEXT:     SETGE_UINT T3.Z, T3.X, T4.W,
7274; EG-NEXT:     SUB_INT T6.W, T3.X, T4.W,
7275; EG-NEXT:     MULHI * T2.X, T1.Y, T2.X,
7276; EG-NEXT:     ASHR T4.X, T1.W, literal.x,
7277; EG-NEXT:     CNDE_INT T3.Y, PV.Z, T3.X, PV.W,
7278; EG-NEXT:     XOR_INT T3.Z, T2.Y, T11.W,
7279; EG-NEXT:     ADD_INT T6.W, T9.Z, T5.W, BS:VEC_021/SCL_122
7280; EG-NEXT:     MULLO_INT * T2.X, PS, T2.Z,
7281; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7282; EG-NEXT:     XOR_INT T3.X, PV.W, T5.W,
7283; EG-NEXT:     CNDE_INT T2.Y, PV.Z, PV.Y, T1.X,
7284; EG-NEXT:     ADD_INT T4.Z, T1.Z, PV.X,
7285; EG-NEXT:     SUB_INT T9.W, T1.Y, PS,
7286; EG-NEXT:     ASHR * T6.W, T0.W, literal.x,
7287; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7288; EG-NEXT:     ADD_INT T1.X, T0.W, PS,
7289; EG-NEXT:     ADDC_UINT T3.Y, T0.Z, PS,
7290; EG-NEXT:     SETGE_UINT T6.Z, PV.W, T2.Z,
7291; EG-NEXT:     SUB_INT T11.W, PV.W, T2.Z,
7292; EG-NEXT:     XOR_INT * T0.W, PV.Z, T4.X,
7293; EG-NEXT:     SUB_INT T2.X, 0.0, PS,
7294; EG-NEXT:     CNDE_INT T4.Y, PV.Z, T9.W, PV.W,
7295; EG-NEXT:     ADD_INT T4.Z, T10.W, T0.Y,
7296; EG-NEXT:     ADDC_UINT T9.W, T10.Z, T0.Y,
7297; EG-NEXT:     RECIP_UINT * T5.X, PS,
7298; EG-NEXT:     ADD_INT T6.X, PV.Z, PV.W,
7299; EG-NEXT:     SETGE_UINT T5.Y, PV.Y, T2.Z,
7300; EG-NEXT:     SUB_INT T4.Z, PV.Y, T2.Z,
7301; EG-NEXT:     ADD_INT T9.W, T9.X, T3.W,
7302; EG-NEXT:     MULLO_INT * T6.Y, PV.X, PS,
7303; EG-NEXT:     XOR_INT T2.X, PV.W, T3.W,
7304; EG-NEXT:     CNDE_INT T4.Y, PV.Y, T4.Y, PV.Z,
7305; EG-NEXT:     XOR_INT T4.Z, PV.X, T0.Y, BS:VEC_021/SCL_122
7306; EG-NEXT:     ADD_INT T9.W, T1.X, T3.Y, BS:VEC_102/SCL_221
7307; EG-NEXT:     MULHI * T0.Y, T5.X, PS,
7308; EG-NEXT:     ADD_INT T1.X, T5.X, PS,
7309; EG-NEXT:     XOR_INT T0.Y, PV.W, T6.W,
7310; EG-NEXT:     CNDE_INT T6.Z, PV.Z, PV.Y, T1.Y, BS:VEC_021/SCL_122
7311; EG-NEXT:     BIT_ALIGN_INT T9.W, T2.Y, PV.X, literal.x,
7312; EG-NEXT:     LSHR * T10.W, T2.Y, literal.x,
7313; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7314; EG-NEXT:     SETE_INT T5.X, PS, T3.Z,
7315; EG-NEXT:     SETGE_UINT T1.Y, PS, T3.Z,
7316; EG-NEXT:     SETGE_UINT T7.Z, PV.W, T4.W,
7317; EG-NEXT:     LSHR T11.W, PV.Z, literal.x,
7318; EG-NEXT:     MULHI * T1.X, PV.Y, PV.X,
7319; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7320; EG-NEXT:     SETE_INT T6.X, PV.W, T4.Z,
7321; EG-NEXT:     CNDE_INT T1.Y, PV.X, PV.Y, PV.Z,
7322; EG-NEXT:     SUB_INT T7.Z, T9.W, T4.W, BS:VEC_021/SCL_122
7323; EG-NEXT:     BIT_ALIGN_INT T12.W, T6.Z, T3.X, literal.x,
7324; EG-NEXT:     MULLO_INT * T1.X, PS, T0.W,
7325; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7326; EG-NEXT:     SETGE_UINT T5.X, T11.W, T4.Z,
7327; EG-NEXT:     SETGE_UINT T2.Y, PV.W, T2.Z, BS:VEC_102/SCL_221
7328; EG-NEXT:     SUB_INT T6.Z, T0.Y, PS,
7329; EG-NEXT:     CNDE_INT T13.W, PV.Y, T9.W, PV.Z, BS:VEC_021/SCL_122
7330; EG-NEXT:     SUB_INT * T7.W, T5.Z, T7.W,
7331; EG-NEXT:     CNDE_INT T1.X, T8.W, T5.Z, PS,
7332; EG-NEXT:     LSHL T3.Y, PV.W, 1,
7333; EG-NEXT:     SETGE_UINT T5.Z, PV.Z, T0.W,
7334; EG-NEXT:     SUB_INT T7.W, PV.Z, T0.W,
7335; EG-NEXT:     CNDE_INT * T8.W, T6.X, PV.X, PV.Y,
7336; EG-NEXT:     SUB_INT T5.X, T12.W, T2.Z,
7337; EG-NEXT:     SUB_INT T2.Y, T11.W, T4.Z, BS:VEC_102/SCL_221
7338; EG-NEXT:     SUBB_UINT T7.Z, T12.W, T2.Z,
7339; EG-NEXT:     ADD_INT T1.W, T1.W, T4.X, BS:VEC_201
7340; EG-NEXT:     ADDC_UINT * T14.W, T1.Z, T4.X,
7341; EG-NEXT:     BFE_UINT T6.X, T2.X, literal.x, 1,
7342; EG-NEXT:     ADD_INT T4.Y, PV.W, PS,
7343; EG-NEXT:     SUB_INT T1.Z, PV.Y, PV.Z,
7344; EG-NEXT:     CNDE_INT T1.W, T8.W, T12.W, PV.X, BS:VEC_120/SCL_212
7345; EG-NEXT:     CNDE_INT * T7.W, T5.Z, T6.Z, T7.W,
7346; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
7347; EG-NEXT:     SETGE_UINT T5.X, PS, T0.W, BS:VEC_102/SCL_221
7348; EG-NEXT:     SUB_INT T2.Y, PS, T0.W, BS:VEC_102/SCL_221
7349; EG-NEXT:     LSHL T5.Z, PV.W, 1,
7350; EG-NEXT:     BFE_UINT T12.W, T3.X, literal.x, 1,
7351; EG-NEXT:     CNDE_INT * T8.W, T8.W, T11.W, PV.Z,
7352; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
7353; EG-NEXT:     BIT_ALIGN_INT T7.X, PS, T1.W, literal.x,
7354; EG-NEXT:     OR_INT T5.Y, PV.Z, PV.W,
7355; EG-NEXT:     ADD_INT T0.Z, T0.Z, T6.W, BS:VEC_021/SCL_122
7356; EG-NEXT:     CNDE_INT T7.W, PV.X, T7.W, PV.Y, BS:VEC_102/SCL_221
7357; EG-NEXT:     XOR_INT * T1.W, T4.Y, T4.X,
7358; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7359; EG-NEXT:     CNDE_INT T4.X, PS, PV.W, T0.Y,
7360; EG-NEXT:     XOR_INT T0.Y, PV.Z, T6.W,
7361; EG-NEXT:     SETGE_UINT T0.Z, PV.Y, T2.Z, BS:VEC_021/SCL_122
7362; EG-NEXT:     SETE_INT T7.W, PV.X, T4.Z,
7363; EG-NEXT:     SETGE_UINT * T8.W, PV.X, T4.Z,
7364; EG-NEXT:     SUB_INT T5.X, T10.W, T3.Z,
7365; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
7366; EG-NEXT:     SUB_INT T0.Z, T5.Y, T2.Z, BS:VEC_021/SCL_122
7367; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.X, PV.Y, literal.x,
7368; EG-NEXT:     LSHR * T8.W, PV.X, literal.x,
7369; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7370; EG-NEXT:     SUBB_UINT T4.X, T5.Y, T2.Z,
7371; EG-NEXT:     SUB_INT T4.Y, T7.X, T4.Z, BS:VEC_021/SCL_122
7372; EG-NEXT:     SETE_INT T1.Z, PS, T1.W, BS:VEC_021/SCL_122
7373; EG-NEXT:     SETGE_UINT T11.W, PS, T1.W, BS:VEC_021/SCL_122
7374; EG-NEXT:     SETGE_UINT * T12.W, PV.W, T0.W,
7375; EG-NEXT:     SUB_INT T8.X, T8.W, T1.W,
7376; EG-NEXT:     CNDE_INT * T6.Y, PV.Z, PV.W, PS,
7377; EG-NEXT:     SUB_INT T1.Z, T7.W, T0.W,
7378; EG-NEXT:     SUB_INT T11.W, T4.Y, T4.X,
7379; EG-NEXT:     CNDE_INT * T12.W, T2.Y, T5.Y, T0.Z,
7380; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W, BS:VEC_021/SCL_122
7381; EG-NEXT:     LSHL T4.Y, PS, 1,
7382; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
7383; EG-NEXT:     CNDE_INT T11.W, T2.Y, T7.X, PV.W,
7384; EG-NEXT:     CNDE_INT * T7.W, T6.Y, T7.W, PV.Z,
7385; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
7386; EG-NEXT:     SUBB_UINT * T7.X, T9.W, T4.W,
7387; EG-NEXT:    ALU clause starting at 723:
7388; EG-NEXT:     LSHL T2.Y, T7.W, 1,
7389; EG-NEXT:     BIT_ALIGN_INT T1.Z, T11.W, T12.W, literal.x, BS:VEC_120/SCL_212
7390; EG-NEXT:     OR_INT T9.W, T4.Y, T0.Z,
7391; EG-NEXT:     SUB_INT * T11.W, T8.X, T4.X,
7392; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7393; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
7394; EG-NEXT:     CNDE_INT T4.Y, T6.Y, T8.W, PS, BS:VEC_120/SCL_212
7395; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
7396; EG-NEXT:     SETE_INT T8.W, PV.Z, T4.Z,
7397; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T4.Z,
7398; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
7399; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
7400; EG-NEXT:     SUB_INT T5.Y, T9.W, T2.Z,
7401; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T7.W, literal.x,
7402; EG-NEXT:     OR_INT T7.W, T2.Y, PV.X,
7403; EG-NEXT:     SUB_INT * T8.W, T5.X, T7.X,
7404; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7405; EG-NEXT:     CNDE_INT T4.X, T1.Y, T10.W, PS,
7406; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
7407; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
7408; EG-NEXT:     SETGE_UINT * T8.W, PV.Z, T1.W, BS:VEC_102/SCL_221
7409; EG-NEXT:     CNDE_INT * T10.W, T8.X, T9.W, T5.Y,
7410; EG-NEXT:     LSHL T5.X, PV.W, 1,
7411; EG-NEXT:     CNDE_INT T1.Y, T5.Z, T8.W, T1.Y,
7412; EG-NEXT:     SUB_INT * T5.Z, T7.W, T0.W, BS:VEC_021/SCL_122
7413; EG-NEXT:     BIT_ALIGN_INT T8.W, T4.X, T13.W, literal.x,
7414; EG-NEXT:     OR_INT * T11.W, T3.Y, T6.X,
7415; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7416; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
7417; EG-NEXT:     SUB_INT * T2.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7418; EG-NEXT:     SETGE_UINT T6.Z, T11.W, T4.W,
7419; EG-NEXT:     SETE_INT T12.W, T8.W, T3.Z, BS:VEC_201
7420; EG-NEXT:     SETGE_UINT * T13.W, T8.W, T3.Z,
7421; EG-NEXT:     SUBB_UINT T6.X, T11.W, T4.W, BS:VEC_021/SCL_122
7422; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7423; EG-NEXT:     SUB_INT T6.Z, T11.W, T4.W, BS:VEC_021/SCL_122
7424; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7425; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T5.Z,
7426; EG-NEXT:     SUB_INT T4.X, T8.W, T3.Z,
7427; EG-NEXT:     LSHL T2.Y, PS, 1,
7428; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7429; EG-NEXT:     CNDE_INT T12.W, T1.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7430; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
7431; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
7432; EG-NEXT:     BFE_UINT T7.X, T3.X, literal.x, 1,
7433; EG-NEXT:     LSHL T1.Y, PS, 1,
7434; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.y,
7435; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7436; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
7437; EG-NEXT:    28(3.923636e-44), 31(4.344025e-44)
7438; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7439; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T8.W, PS, BS:VEC_021/SCL_122
7440; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7441; EG-NEXT:     SETE_INT T8.W, PV.Z, T1.W,
7442; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T1.W,
7443; EG-NEXT:    29(4.063766e-44), 0(0.000000e+00)
7444; EG-NEXT:     SUBB_UINT T6.X, T9.W, T2.Z,
7445; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7446; EG-NEXT:     SUB_INT * T5.Z, T7.W, T0.W, BS:VEC_120/SCL_212
7447; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
7448; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7449; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7450; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
7451; EG-NEXT:     SUB_INT * T1.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7452; EG-NEXT:     SETGE_UINT T6.Z, T9.W, T4.W,
7453; EG-NEXT:     SETE_INT T11.W, T8.W, T3.Z, BS:VEC_201
7454; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T3.Z,
7455; EG-NEXT:     SUBB_UINT T9.X, T9.W, T4.W, BS:VEC_021/SCL_122
7456; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
7457; EG-NEXT:     SUB_INT T6.Z, T9.W, T4.W, BS:VEC_021/SCL_122
7458; EG-NEXT:     SUB_INT T11.W, T1.Y, T4.X,
7459; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T5.Z,
7460; EG-NEXT:     SUB_INT T4.X, T8.W, T3.Z,
7461; EG-NEXT:     LSHL T1.Y, PS, 1,
7462; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7463; EG-NEXT:     CNDE_INT T11.W, T3.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7464; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7465; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
7466; EG-NEXT:     SUB_INT T10.X, T1.Z, T4.Z,
7467; EG-NEXT:     LSHL T3.Y, PS, 1,
7468; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.x,
7469; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7470; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
7471; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7472; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7473; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS, BS:VEC_021/SCL_122
7474; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7475; EG-NEXT:     SETE_INT T8.W, PV.Z, T1.W,
7476; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T1.W,
7477; EG-NEXT:    28(3.923636e-44), 0(0.000000e+00)
7478; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
7479; EG-NEXT:     SUB_INT T2.Y, T7.W, T0.W,
7480; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
7481; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
7482; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
7483; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7484; EG-NEXT:     CNDE_INT T4.X, T8.X, T1.Z, PS,
7485; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
7486; EG-NEXT:     SETE_INT T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
7487; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T3.Z, BS:VEC_021/SCL_122
7488; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
7489; EG-NEXT:     LSHL T6.X, PS, 1,
7490; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7491; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
7492; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x, BS:VEC_021/SCL_122
7493; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
7494; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7495; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
7496; EG-NEXT:     SUB_INT T2.Y, T5.Z, T3.Z,
7497; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
7498; EG-NEXT:     SETE_INT T12.W, T9.W, T4.Z,
7499; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T4.Z,
7500; EG-NEXT:     SUBB_UINT T5.X, T10.W, T2.Z,
7501; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T6.Z,
7502; EG-NEXT:     SUB_INT * T6.Z, T10.W, T2.Z,
7503; EG-NEXT:    ALU clause starting at 838:
7504; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7505; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T1.Z,
7506; EG-NEXT:     SUB_INT T4.X, T9.W, T4.Z,
7507; EG-NEXT:     LSHL T2.Y, PS, 1,
7508; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
7509; EG-NEXT:     CNDE_INT T12.W, T1.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
7510; EG-NEXT:     CNDE_INT * T10.W, T3.Y, T10.W, T6.Z,
7511; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
7512; EG-NEXT:     BFE_UINT T7.X, T0.Y, literal.x, 1,
7513; EG-NEXT:     LSHL T1.Y, PS, 1,
7514; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.y,
7515; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7516; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
7517; EG-NEXT:    27(3.783506e-44), 31(4.344025e-44)
7518; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
7519; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS,
7520; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
7521; EG-NEXT:     SETE_INT T9.W, PV.Z, T3.Z,
7522; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T3.Z,
7523; EG-NEXT:    27(3.783506e-44), 0(0.000000e+00)
7524; EG-NEXT:     SUBB_UINT T5.X, T7.W, T0.W,
7525; EG-NEXT:     CNDE_INT * T3.Y, PV.W, PS, PV.Z,
7526; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
7527; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x, BS:VEC_021/SCL_122
7528; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7529; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7530; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
7531; EG-NEXT:     SUB_INT T1.Y, T5.Z, T3.Z,
7532; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
7533; EG-NEXT:     SETE_INT T10.W, T7.W, T4.Z,
7534; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T4.Z,
7535; EG-NEXT:     SUBB_UINT T8.X, T9.W, T2.Z,
7536; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
7537; EG-NEXT:     SUB_INT T6.Z, T9.W, T2.Z,
7538; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
7539; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T1.Z,
7540; EG-NEXT:     SUB_INT T4.X, T7.W, T4.Z,
7541; EG-NEXT:     LSHL T1.Y, PS, 1,
7542; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
7543; EG-NEXT:     CNDE_INT T10.W, T3.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
7544; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7545; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
7546; EG-NEXT:     SUB_INT T10.X, T0.Z, T1.W,
7547; EG-NEXT:     LSHL T3.Y, PS, 1,
7548; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
7549; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7550; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
7551; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7552; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
7553; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS,
7554; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
7555; EG-NEXT:     SETE_INT T7.W, PV.Z, T3.Z,
7556; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T3.Z,
7557; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
7558; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
7559; EG-NEXT:     SUB_INT T2.Y, T8.W, T4.W,
7560; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
7561; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
7562; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
7563; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7564; EG-NEXT:     CNDE_INT T4.X, T9.X, T0.Z, PS,
7565; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T2.Z, BS:VEC_021/SCL_122
7566; EG-NEXT:     SETE_INT T0.Z, PV.Z, T4.Z, BS:VEC_102/SCL_221
7567; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T4.Z, BS:VEC_102/SCL_221
7568; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
7569; EG-NEXT:     LSHL T5.X, PS, 1,
7570; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7571; EG-NEXT:     SUB_INT T0.Z, T7.W, T2.Z,
7572; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x,
7573; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
7574; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7575; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
7576; EG-NEXT:     SUB_INT T2.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
7577; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
7578; EG-NEXT:     SETE_INT T12.W, PV.W, T1.W,
7579; EG-NEXT:     SETGE_UINT * T13.W, PV.W, T1.W,
7580; EG-NEXT:     SUBB_UINT T6.X, T11.W, T0.W, BS:VEC_021/SCL_122
7581; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7582; EG-NEXT:     SUB_INT T6.Z, T11.W, T0.W, BS:VEC_021/SCL_122
7583; EG-NEXT:     SUB_INT T12.W, PV.Y, PV.X,
7584; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T0.Z,
7585; EG-NEXT:     SUB_INT T4.X, T9.W, T1.W, BS:VEC_021/SCL_122
7586; EG-NEXT:     LSHL T2.Y, PS, 1,
7587; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
7588; EG-NEXT:     CNDE_INT T12.W, T1.Y, T1.Z, PV.W,
7589; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
7590; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
7591; EG-NEXT:     BFE_UINT T7.X, T2.X, literal.x, 1,
7592; EG-NEXT:     LSHL T1.Y, PS, 1,
7593; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T7.W, literal.y,
7594; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7595; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
7596; EG-NEXT:    25(3.503246e-44), 31(4.344025e-44)
7597; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
7598; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_120/SCL_212
7599; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
7600; EG-NEXT:     SETE_INT T9.W, PV.Z, T4.Z,
7601; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T4.Z,
7602; EG-NEXT:    26(3.643376e-44), 0(0.000000e+00)
7603; EG-NEXT:     SUBB_UINT T6.X, T8.W, T4.W,
7604; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7605; EG-NEXT:     SUB_INT * T0.Z, T7.W, T2.Z, BS:VEC_201
7606; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
7607; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7608; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7609; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
7610; EG-NEXT:     SUB_INT T1.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
7611; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
7612; EG-NEXT:     SETE_INT T11.W, PV.W, T1.W,
7613; EG-NEXT:     SETGE_UINT * T12.W, PV.W, T1.W,
7614; EG-NEXT:     SUBB_UINT T9.X, T9.W, T0.W,
7615; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
7616; EG-NEXT:     SUB_INT T6.Z, T9.W, T0.W,
7617; EG-NEXT:     SUB_INT * T11.W, PV.Y, PV.X,
7618; EG-NEXT:    ALU clause starting at 952:
7619; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T0.Z,
7620; EG-NEXT:     SUB_INT T4.X, T8.W, T1.W,
7621; EG-NEXT:     LSHL T1.Y, PV.W, 1,
7622; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
7623; EG-NEXT:     CNDE_INT * T11.W, T3.Y, T1.Z, T11.W,
7624; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
7625; EG-NEXT:     CNDE_INT * T9.W, T2.Y, T9.W, T6.Z,
7626; EG-NEXT:     SUB_INT T10.X, T5.Z, T3.Z,
7627; EG-NEXT:     LSHL T3.Y, PV.W, 1,
7628; EG-NEXT:     BIT_ALIGN_INT T1.Z, T11.W, T7.W, literal.x,
7629; EG-NEXT:     OR_INT T7.W, T1.Y, T0.Z, BS:VEC_021/SCL_122
7630; EG-NEXT:     SUB_INT * T11.W, T4.X, T9.X,
7631; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7632; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
7633; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS, BS:VEC_120/SCL_212
7634; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
7635; EG-NEXT:     SETE_INT T8.W, PV.Z, T4.Z,
7636; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T4.Z,
7637; EG-NEXT:    25(3.503246e-44), 0(0.000000e+00)
7638; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
7639; EG-NEXT:     SUB_INT T2.Y, T7.W, T2.Z,
7640; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T9.W, literal.x,
7641; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
7642; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
7643; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7644; EG-NEXT:     CNDE_INT T4.X, T8.X, T5.Z, PS,
7645; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
7646; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
7647; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T1.W, BS:VEC_102/SCL_221
7648; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
7649; EG-NEXT:     LSHL T6.X, PS, 1,
7650; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7651; EG-NEXT:     SUB_INT T5.Z, T8.W, T0.W,
7652; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x, BS:VEC_021/SCL_122
7653; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
7654; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7655; EG-NEXT:     SUBB_UINT T4.X, T8.W, T0.W,
7656; EG-NEXT:     SUB_INT * T2.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7657; EG-NEXT:     SETGE_UINT T6.Z, T10.W, T4.W,
7658; EG-NEXT:     SETE_INT T12.W, T9.W, T3.Z, BS:VEC_201
7659; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T3.Z,
7660; EG-NEXT:     SUBB_UINT T5.X, T10.W, T4.W, BS:VEC_021/SCL_122
7661; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7662; EG-NEXT:     SUB_INT T6.Z, T10.W, T4.W, BS:VEC_021/SCL_122
7663; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7664; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T5.Z,
7665; EG-NEXT:     SUB_INT T4.X, T9.W, T3.Z,
7666; EG-NEXT:     LSHL T2.Y, PS, 1,
7667; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7668; EG-NEXT:     CNDE_INT T12.W, T1.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7669; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
7670; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
7671; EG-NEXT:     BFE_UINT T7.X, T3.X, literal.x, 1,
7672; EG-NEXT:     LSHL T1.Y, PS, 1,
7673; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T8.W, literal.y,
7674; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7675; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
7676; EG-NEXT:    23(3.222986e-44), 31(4.344025e-44)
7677; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7678; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_021/SCL_122
7679; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7680; EG-NEXT:     SETE_INT T9.W, PV.Z, T1.W,
7681; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T1.W,
7682; EG-NEXT:    24(3.363116e-44), 0(0.000000e+00)
7683; EG-NEXT:     SUBB_UINT T5.X, T7.W, T2.Z,
7684; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7685; EG-NEXT:     SUB_INT * T5.Z, T8.W, T0.W, BS:VEC_120/SCL_212
7686; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x,
7687; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7688; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7689; EG-NEXT:     SUBB_UINT T4.X, T8.W, T0.W,
7690; EG-NEXT:     SUB_INT * T1.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7691; EG-NEXT:     SETGE_UINT T6.Z, T9.W, T4.W,
7692; EG-NEXT:     SETE_INT T10.W, T7.W, T3.Z, BS:VEC_201
7693; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T3.Z,
7694; EG-NEXT:     SUBB_UINT T8.X, T9.W, T4.W, BS:VEC_021/SCL_122
7695; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
7696; EG-NEXT:     SUB_INT T6.Z, T9.W, T4.W, BS:VEC_021/SCL_122
7697; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
7698; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T5.Z,
7699; EG-NEXT:     SUB_INT T4.X, T7.W, T3.Z,
7700; EG-NEXT:     LSHL T1.Y, PS, 1,
7701; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7702; EG-NEXT:     CNDE_INT T10.W, T3.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7703; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7704; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
7705; EG-NEXT:     SUB_INT T10.X, T1.Z, T4.Z,
7706; EG-NEXT:     LSHL T3.Y, PS, 1,
7707; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T8.W, literal.x,
7708; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7709; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
7710; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7711; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7712; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS, BS:VEC_021/SCL_122
7713; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7714; EG-NEXT:     SETE_INT T7.W, PV.Z, T1.W,
7715; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T1.W,
7716; EG-NEXT:    23(3.222986e-44), 0(0.000000e+00)
7717; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
7718; EG-NEXT:     SUB_INT T2.Y, T8.W, T0.W,
7719; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
7720; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
7721; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
7722; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7723; EG-NEXT:     CNDE_INT T4.X, T9.X, T1.Z, PS,
7724; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
7725; EG-NEXT:     SETE_INT T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
7726; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T3.Z, BS:VEC_021/SCL_122
7727; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
7728; EG-NEXT:     LSHL T5.X, PS, 1,
7729; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7730; EG-NEXT:     SUB_INT T1.Z, T7.W, T4.W,
7731; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x, BS:VEC_021/SCL_122
7732; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
7733; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7734; EG-NEXT:    ALU clause starting at 1067:
7735; EG-NEXT:     SUBB_UINT T4.X, T7.W, T4.W,
7736; EG-NEXT:     SUB_INT T2.Y, T5.Z, T3.Z, BS:VEC_021/SCL_122
7737; EG-NEXT:     SETGE_UINT * T6.Z, T11.W, T2.Z, BS:VEC_210
7738; EG-NEXT:     SETE_INT T12.W, T9.W, T4.Z,
7739; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T4.Z,
7740; EG-NEXT:     SUBB_UINT T6.X, T11.W, T2.Z,
7741; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T6.Z,
7742; EG-NEXT:     SUB_INT T6.Z, T11.W, T2.Z,
7743; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7744; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T1.Z,
7745; EG-NEXT:     SUB_INT T4.X, T9.W, T4.Z,
7746; EG-NEXT:     LSHL T2.Y, PS, 1,
7747; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
7748; EG-NEXT:     CNDE_INT T12.W, T1.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
7749; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
7750; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
7751; EG-NEXT:     BFE_UINT T7.X, T0.Y, literal.x, 1,
7752; EG-NEXT:     LSHL T1.Y, PS, 1,
7753; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T7.W, literal.y,
7754; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7755; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
7756; EG-NEXT:    22(3.082857e-44), 31(4.344025e-44)
7757; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
7758; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS,
7759; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
7760; EG-NEXT:     SETE_INT T9.W, PV.Z, T3.Z,
7761; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T3.Z,
7762; EG-NEXT:    22(3.082857e-44), 0(0.000000e+00)
7763; EG-NEXT:     SUBB_UINT T6.X, T8.W, T0.W,
7764; EG-NEXT:     CNDE_INT * T3.Y, PV.W, PS, PV.Z,
7765; EG-NEXT:     SUB_INT T1.Z, T7.W, T4.W,
7766; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x, BS:VEC_021/SCL_122
7767; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7768; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7769; EG-NEXT:     SUBB_UINT T4.X, T7.W, T4.W,
7770; EG-NEXT:     SUB_INT T1.Y, T5.Z, T3.Z,
7771; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
7772; EG-NEXT:     SETE_INT T11.W, T8.W, T4.Z,
7773; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T4.Z,
7774; EG-NEXT:     SUBB_UINT T9.X, T9.W, T2.Z,
7775; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
7776; EG-NEXT:     SUB_INT T6.Z, T9.W, T2.Z,
7777; EG-NEXT:     SUB_INT T11.W, T1.Y, T4.X,
7778; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T1.Z,
7779; EG-NEXT:     SUB_INT T4.X, T8.W, T4.Z,
7780; EG-NEXT:     LSHL T1.Y, PS, 1,
7781; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
7782; EG-NEXT:     CNDE_INT T11.W, T3.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
7783; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7784; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
7785; EG-NEXT:     SUB_INT T10.X, T0.Z, T1.W,
7786; EG-NEXT:     LSHL T3.Y, PS, 1,
7787; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T7.W, literal.x, BS:VEC_021/SCL_122
7788; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7789; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
7790; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7791; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
7792; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS,
7793; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
7794; EG-NEXT:     SETE_INT T8.W, PV.Z, T3.Z,
7795; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T3.Z,
7796; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
7797; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
7798; EG-NEXT:     SUB_INT T2.Y, T7.W, T4.W,
7799; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
7800; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
7801; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
7802; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7803; EG-NEXT:     CNDE_INT T4.X, T8.X, T0.Z, PS,
7804; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T2.Z, BS:VEC_021/SCL_122
7805; EG-NEXT:     SETE_INT T0.Z, PV.Z, T4.Z, BS:VEC_102/SCL_221
7806; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T4.Z, BS:VEC_102/SCL_221
7807; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
7808; EG-NEXT:     LSHL T6.X, PS, 1,
7809; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7810; EG-NEXT:     SUB_INT T0.Z, T8.W, T2.Z,
7811; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x,
7812; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
7813; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7814; EG-NEXT:     SUBB_UINT T4.X, T8.W, T2.Z,
7815; EG-NEXT:     SUB_INT T2.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
7816; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
7817; EG-NEXT:     SETE_INT T12.W, PV.W, T1.W,
7818; EG-NEXT:     SETGE_UINT * T13.W, PV.W, T1.W,
7819; EG-NEXT:     SUBB_UINT T5.X, T10.W, T0.W, BS:VEC_021/SCL_122
7820; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7821; EG-NEXT:     SUB_INT T6.Z, T10.W, T0.W, BS:VEC_021/SCL_122
7822; EG-NEXT:     SUB_INT T12.W, PV.Y, PV.X,
7823; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T0.Z,
7824; EG-NEXT:     SUB_INT T4.X, T9.W, T1.W, BS:VEC_021/SCL_122
7825; EG-NEXT:     LSHL T2.Y, PS, 1,
7826; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
7827; EG-NEXT:     CNDE_INT T12.W, T1.Y, T1.Z, PV.W,
7828; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
7829; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
7830; EG-NEXT:     BFE_UINT T7.X, T2.X, literal.x, 1,
7831; EG-NEXT:     LSHL T1.Y, PS, 1,
7832; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.y,
7833; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7834; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
7835; EG-NEXT:    20(2.802597e-44), 31(4.344025e-44)
7836; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
7837; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_120/SCL_212
7838; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
7839; EG-NEXT:     SETE_INT T9.W, PV.Z, T4.Z,
7840; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T4.Z,
7841; EG-NEXT:    21(2.942727e-44), 0(0.000000e+00)
7842; EG-NEXT:     SUBB_UINT T5.X, T7.W, T4.W,
7843; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7844; EG-NEXT:     SUB_INT * T0.Z, T8.W, T2.Z, BS:VEC_201
7845; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x,
7846; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7847; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7848; EG-NEXT:     SUBB_UINT * T4.X, T8.W, T2.Z,
7849; EG-NEXT:    ALU clause starting at 1181:
7850; EG-NEXT:     SUB_INT T1.Y, T1.Z, T4.Z,
7851; EG-NEXT:     SETGE_UINT * T6.Z, T9.W, T0.W,
7852; EG-NEXT:     SETE_INT T10.W, T7.W, T1.W,
7853; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T1.W,
7854; EG-NEXT:     SUBB_UINT T8.X, T9.W, T0.W, BS:VEC_021/SCL_122
7855; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
7856; EG-NEXT:     SUB_INT T6.Z, T9.W, T0.W, BS:VEC_021/SCL_122
7857; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
7858; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
7859; EG-NEXT:     SUB_INT T4.X, T7.W, T1.W, BS:VEC_021/SCL_122
7860; EG-NEXT:     LSHL T1.Y, PS, 1,
7861; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
7862; EG-NEXT:     CNDE_INT T10.W, T3.Y, T1.Z, PV.W,
7863; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7864; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
7865; EG-NEXT:     SUB_INT T10.X, T5.Z, T3.Z,
7866; EG-NEXT:     LSHL T3.Y, PS, 1,
7867; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x,
7868; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7869; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
7870; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7871; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
7872; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS, BS:VEC_120/SCL_212
7873; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
7874; EG-NEXT:     SETE_INT T7.W, PV.Z, T4.Z,
7875; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T4.Z,
7876; EG-NEXT:    20(2.802597e-44), 0(0.000000e+00)
7877; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
7878; EG-NEXT:     SUB_INT T2.Y, T8.W, T2.Z,
7879; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T9.W, literal.x,
7880; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
7881; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
7882; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7883; EG-NEXT:     CNDE_INT T4.X, T9.X, T5.Z, PS,
7884; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
7885; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
7886; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T1.W, BS:VEC_102/SCL_221
7887; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
7888; EG-NEXT:     LSHL T5.X, PS, 1,
7889; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
7890; EG-NEXT:     SUB_INT T5.Z, T7.W, T0.W,
7891; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x, BS:VEC_021/SCL_122
7892; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
7893; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7894; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
7895; EG-NEXT:     SUB_INT * T2.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7896; EG-NEXT:     SETGE_UINT T6.Z, T11.W, T4.W,
7897; EG-NEXT:     SETE_INT T12.W, T9.W, T3.Z, BS:VEC_201
7898; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T3.Z,
7899; EG-NEXT:     SUBB_UINT T6.X, T11.W, T4.W, BS:VEC_021/SCL_122
7900; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7901; EG-NEXT:     SUB_INT T6.Z, T11.W, T4.W, BS:VEC_021/SCL_122
7902; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7903; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T5.Z,
7904; EG-NEXT:     SUB_INT T4.X, T9.W, T3.Z,
7905; EG-NEXT:     LSHL T2.Y, PS, 1,
7906; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7907; EG-NEXT:     CNDE_INT T12.W, T1.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7908; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
7909; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
7910; EG-NEXT:     BFE_UINT T7.X, T3.X, literal.x, 1,
7911; EG-NEXT:     LSHL T1.Y, PS, 1,
7912; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.y,
7913; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7914; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
7915; EG-NEXT:    18(2.522337e-44), 31(4.344025e-44)
7916; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7917; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_021/SCL_122
7918; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7919; EG-NEXT:     SETE_INT T9.W, PV.Z, T1.W,
7920; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T1.W,
7921; EG-NEXT:    19(2.662467e-44), 0(0.000000e+00)
7922; EG-NEXT:     SUBB_UINT T6.X, T8.W, T2.Z,
7923; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
7924; EG-NEXT:     SUB_INT * T5.Z, T7.W, T0.W, BS:VEC_120/SCL_212
7925; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
7926; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
7927; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7928; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
7929; EG-NEXT:     SUB_INT * T1.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
7930; EG-NEXT:     SETGE_UINT T6.Z, T9.W, T4.W,
7931; EG-NEXT:     SETE_INT T11.W, T8.W, T3.Z, BS:VEC_201
7932; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T3.Z,
7933; EG-NEXT:     SUBB_UINT T9.X, T9.W, T4.W, BS:VEC_021/SCL_122
7934; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
7935; EG-NEXT:     SUB_INT T6.Z, T9.W, T4.W, BS:VEC_021/SCL_122
7936; EG-NEXT:     SUB_INT T11.W, T1.Y, T4.X,
7937; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T5.Z,
7938; EG-NEXT:     SUB_INT T4.X, T8.W, T3.Z,
7939; EG-NEXT:     LSHL T1.Y, PS, 1,
7940; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
7941; EG-NEXT:     CNDE_INT T11.W, T3.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
7942; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
7943; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
7944; EG-NEXT:     SUB_INT T10.X, T1.Z, T4.Z,
7945; EG-NEXT:     LSHL T3.Y, PS, 1,
7946; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.x,
7947; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
7948; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
7949; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7950; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
7951; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS, BS:VEC_021/SCL_122
7952; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
7953; EG-NEXT:     SETE_INT T8.W, PV.Z, T1.W,
7954; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T1.W,
7955; EG-NEXT:    18(2.522337e-44), 0(0.000000e+00)
7956; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
7957; EG-NEXT:     SUB_INT T2.Y, T7.W, T0.W,
7958; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
7959; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
7960; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
7961; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7962; EG-NEXT:     CNDE_INT T4.X, T8.X, T1.Z, PS,
7963; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W,
7964; EG-NEXT:     SETE_INT * T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
7965; EG-NEXT:    ALU clause starting at 1296:
7966; EG-NEXT:     SETGE_UINT T9.W, T5.Z, T3.Z,
7967; EG-NEXT:     CNDE_INT * T11.W, T9.X, T7.W, T2.Y,
7968; EG-NEXT:     LSHL T6.X, PS, 1,
7969; EG-NEXT:     CNDE_INT T1.Y, T1.Z, PV.W, T1.Y,
7970; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
7971; EG-NEXT:     BIT_ALIGN_INT T9.W, T4.X, T10.W, literal.x, BS:VEC_021/SCL_122
7972; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
7973; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
7974; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
7975; EG-NEXT:     SUB_INT T2.Y, T5.Z, T3.Z,
7976; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
7977; EG-NEXT:     SETE_INT T12.W, T9.W, T4.Z,
7978; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T4.Z,
7979; EG-NEXT:     SUBB_UINT T5.X, T10.W, T2.Z,
7980; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T6.Z,
7981; EG-NEXT:     SUB_INT T6.Z, T10.W, T2.Z,
7982; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
7983; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T1.Z,
7984; EG-NEXT:     SUB_INT T4.X, T9.W, T4.Z,
7985; EG-NEXT:     LSHL T2.Y, PS, 1,
7986; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
7987; EG-NEXT:     CNDE_INT T12.W, T1.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
7988; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
7989; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
7990; EG-NEXT:     BFE_UINT T7.X, T0.Y, literal.x, 1,
7991; EG-NEXT:     LSHL T1.Y, PS, 1,
7992; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.y,
7993; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
7994; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
7995; EG-NEXT:    17(2.382207e-44), 31(4.344025e-44)
7996; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
7997; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS,
7998; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
7999; EG-NEXT:     SETE_INT T9.W, PV.Z, T3.Z,
8000; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T3.Z,
8001; EG-NEXT:    17(2.382207e-44), 0(0.000000e+00)
8002; EG-NEXT:     SUBB_UINT T5.X, T7.W, T0.W,
8003; EG-NEXT:     CNDE_INT * T3.Y, PV.W, PS, PV.Z,
8004; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
8005; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x, BS:VEC_021/SCL_122
8006; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8007; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8008; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
8009; EG-NEXT:     SUB_INT T1.Y, T5.Z, T3.Z,
8010; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
8011; EG-NEXT:     SETE_INT T10.W, T7.W, T4.Z,
8012; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T4.Z,
8013; EG-NEXT:     SUBB_UINT T8.X, T9.W, T2.Z,
8014; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
8015; EG-NEXT:     SUB_INT T6.Z, T9.W, T2.Z,
8016; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
8017; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T1.Z,
8018; EG-NEXT:     SUB_INT T4.X, T7.W, T4.Z,
8019; EG-NEXT:     LSHL T1.Y, PS, 1,
8020; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
8021; EG-NEXT:     CNDE_INT T10.W, T3.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
8022; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8023; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8024; EG-NEXT:     SUB_INT T10.X, T0.Z, T1.W,
8025; EG-NEXT:     LSHL T3.Y, PS, 1,
8026; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
8027; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8028; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
8029; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8030; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8031; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS,
8032; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
8033; EG-NEXT:     SETE_INT T7.W, PV.Z, T3.Z,
8034; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T3.Z,
8035; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8036; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
8037; EG-NEXT:     SUB_INT T2.Y, T8.W, T4.W,
8038; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
8039; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
8040; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
8041; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8042; EG-NEXT:     CNDE_INT T4.X, T9.X, T0.Z, PS,
8043; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T2.Z, BS:VEC_021/SCL_122
8044; EG-NEXT:     SETE_INT T0.Z, PV.Z, T4.Z, BS:VEC_102/SCL_221
8045; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T4.Z, BS:VEC_102/SCL_221
8046; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
8047; EG-NEXT:     LSHL T5.X, PS, 1,
8048; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8049; EG-NEXT:     SUB_INT T0.Z, T7.W, T2.Z,
8050; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x,
8051; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
8052; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8053; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
8054; EG-NEXT:     SUB_INT T2.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8055; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8056; EG-NEXT:     SETE_INT T12.W, PV.W, T1.W,
8057; EG-NEXT:     SETGE_UINT * T13.W, PV.W, T1.W,
8058; EG-NEXT:     SUBB_UINT T6.X, T11.W, T0.W, BS:VEC_021/SCL_122
8059; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8060; EG-NEXT:     SUB_INT T6.Z, T11.W, T0.W, BS:VEC_021/SCL_122
8061; EG-NEXT:     SUB_INT T12.W, PV.Y, PV.X,
8062; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T0.Z,
8063; EG-NEXT:     SUB_INT T4.X, T9.W, T1.W, BS:VEC_021/SCL_122
8064; EG-NEXT:     LSHL T2.Y, PS, 1,
8065; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
8066; EG-NEXT:     CNDE_INT T12.W, T1.Y, T1.Z, PV.W,
8067; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
8068; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
8069; EG-NEXT:     BFE_UINT T7.X, T2.X, literal.x, 1,
8070; EG-NEXT:     LSHL T1.Y, PS, 1,
8071; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T7.W, literal.y,
8072; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8073; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
8074; EG-NEXT:    15(2.101948e-44), 31(4.344025e-44)
8075; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8076; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_120/SCL_212
8077; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z,
8078; EG-NEXT:     SETE_INT * T9.W, PV.Z, T4.Z, BS:VEC_021/SCL_122
8079; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8080; EG-NEXT:    ALU clause starting at 1410:
8081; EG-NEXT:     SETGE_UINT * T12.W, T1.Z, T4.Z,
8082; EG-NEXT:     SUBB_UINT T6.X, T8.W, T4.W,
8083; EG-NEXT:     CNDE_INT * T3.Y, T9.W, PV.W, T0.Z, BS:VEC_201
8084; EG-NEXT:     SUB_INT T0.Z, T7.W, T2.Z,
8085; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
8086; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8087; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8088; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
8089; EG-NEXT:     SUB_INT T1.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8090; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8091; EG-NEXT:     SETE_INT T11.W, PV.W, T1.W,
8092; EG-NEXT:     SETGE_UINT * T12.W, PV.W, T1.W,
8093; EG-NEXT:     SUBB_UINT T9.X, T9.W, T0.W, BS:VEC_021/SCL_122
8094; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
8095; EG-NEXT:     SUB_INT T6.Z, T9.W, T0.W, BS:VEC_021/SCL_122
8096; EG-NEXT:     SUB_INT T11.W, PV.Y, PV.X,
8097; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T0.Z,
8098; EG-NEXT:     SUB_INT T4.X, T8.W, T1.W, BS:VEC_021/SCL_122
8099; EG-NEXT:     LSHL T1.Y, PS, 1,
8100; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
8101; EG-NEXT:     CNDE_INT T11.W, T3.Y, T1.Z, PV.W,
8102; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8103; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
8104; EG-NEXT:     SUB_INT T10.X, T5.Z, T3.Z,
8105; EG-NEXT:     LSHL T3.Y, PS, 1,
8106; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T7.W, literal.x,
8107; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8108; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
8109; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8110; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8111; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS, BS:VEC_120/SCL_212
8112; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
8113; EG-NEXT:     SETE_INT T8.W, PV.Z, T4.Z,
8114; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T4.Z,
8115; EG-NEXT:    15(2.101948e-44), 0(0.000000e+00)
8116; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
8117; EG-NEXT:     SUB_INT T2.Y, T7.W, T2.Z,
8118; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T9.W, literal.x,
8119; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
8120; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
8121; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8122; EG-NEXT:     CNDE_INT T4.X, T8.X, T5.Z, PS,
8123; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
8124; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
8125; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T1.W, BS:VEC_102/SCL_221
8126; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
8127; EG-NEXT:     LSHL T6.X, PS, 1,
8128; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8129; EG-NEXT:     SUB_INT T5.Z, T8.W, T0.W,
8130; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x, BS:VEC_021/SCL_122
8131; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
8132; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8133; EG-NEXT:     SUBB_UINT T4.X, T8.W, T0.W,
8134; EG-NEXT:     SUB_INT * T2.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
8135; EG-NEXT:     SETGE_UINT T6.Z, T10.W, T4.W,
8136; EG-NEXT:     SETE_INT T12.W, T9.W, T3.Z, BS:VEC_201
8137; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T3.Z,
8138; EG-NEXT:     SUBB_UINT T5.X, T10.W, T4.W, BS:VEC_021/SCL_122
8139; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8140; EG-NEXT:     SUB_INT T6.Z, T10.W, T4.W, BS:VEC_021/SCL_122
8141; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
8142; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T5.Z,
8143; EG-NEXT:     SUB_INT T4.X, T9.W, T3.Z,
8144; EG-NEXT:     LSHL T2.Y, PS, 1,
8145; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
8146; EG-NEXT:     CNDE_INT T12.W, T1.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
8147; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
8148; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
8149; EG-NEXT:     BFE_UINT T7.X, T3.X, literal.x, 1,
8150; EG-NEXT:     LSHL T1.Y, PS, 1,
8151; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T8.W, literal.y,
8152; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8153; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
8154; EG-NEXT:    13(1.821688e-44), 31(4.344025e-44)
8155; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
8156; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_021/SCL_122
8157; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
8158; EG-NEXT:     SETE_INT T9.W, PV.Z, T1.W,
8159; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T1.W,
8160; EG-NEXT:    14(1.961818e-44), 0(0.000000e+00)
8161; EG-NEXT:     SUBB_UINT T5.X, T7.W, T2.Z,
8162; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8163; EG-NEXT:     SUB_INT * T5.Z, T8.W, T0.W, BS:VEC_120/SCL_212
8164; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x,
8165; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8166; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8167; EG-NEXT:     SUBB_UINT T4.X, T8.W, T0.W,
8168; EG-NEXT:     SUB_INT * T1.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
8169; EG-NEXT:     SETGE_UINT T6.Z, T9.W, T4.W,
8170; EG-NEXT:     SETE_INT T10.W, T7.W, T3.Z, BS:VEC_201
8171; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T3.Z,
8172; EG-NEXT:     SUBB_UINT T8.X, T9.W, T4.W, BS:VEC_021/SCL_122
8173; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
8174; EG-NEXT:     SUB_INT T6.Z, T9.W, T4.W, BS:VEC_021/SCL_122
8175; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
8176; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T5.Z,
8177; EG-NEXT:     SUB_INT T4.X, T7.W, T3.Z,
8178; EG-NEXT:     LSHL T1.Y, PS, 1,
8179; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
8180; EG-NEXT:     CNDE_INT T10.W, T3.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
8181; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8182; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
8183; EG-NEXT:     SUB_INT T10.X, T1.Z, T4.Z,
8184; EG-NEXT:     LSHL T3.Y, PS, 1,
8185; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T8.W, literal.x,
8186; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8187; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
8188; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8189; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
8190; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS, BS:VEC_021/SCL_122
8191; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
8192; EG-NEXT:     SETE_INT T7.W, PV.Z, T1.W,
8193; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T1.W,
8194; EG-NEXT:    13(1.821688e-44), 0(0.000000e+00)
8195; EG-NEXT:     CNDE_INT * T8.X, PV.W, PS, PV.Z,
8196; EG-NEXT:    ALU clause starting at 1525:
8197; EG-NEXT:     SUB_INT T2.Y, T8.W, T0.W,
8198; EG-NEXT:     BIT_ALIGN_INT T5.Z, T1.Y, T9.W, literal.x, BS:VEC_021/SCL_122
8199; EG-NEXT:     OR_INT T7.W, T3.Y, T4.X, BS:VEC_102/SCL_221
8200; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
8201; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8202; EG-NEXT:     CNDE_INT T4.X, T9.X, T1.Z, PS,
8203; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
8204; EG-NEXT:     SETE_INT T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
8205; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T3.Z, BS:VEC_021/SCL_122
8206; EG-NEXT:     CNDE_INT * T10.W, T8.X, T8.W, PV.Y,
8207; EG-NEXT:     LSHL T5.X, PS, 1,
8208; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8209; EG-NEXT:     SUB_INT T1.Z, T7.W, T4.W,
8210; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x, BS:VEC_021/SCL_122
8211; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
8212; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8213; EG-NEXT:     SUBB_UINT T4.X, T7.W, T4.W,
8214; EG-NEXT:     SUB_INT T2.Y, T5.Z, T3.Z,
8215; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
8216; EG-NEXT:     SETE_INT T12.W, T9.W, T4.Z,
8217; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T4.Z,
8218; EG-NEXT:     SUBB_UINT T6.X, T11.W, T2.Z,
8219; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T6.Z,
8220; EG-NEXT:     SUB_INT T6.Z, T11.W, T2.Z,
8221; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
8222; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T1.Z,
8223; EG-NEXT:     SUB_INT T4.X, T9.W, T4.Z,
8224; EG-NEXT:     LSHL T2.Y, PS, 1,
8225; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
8226; EG-NEXT:     CNDE_INT T12.W, T1.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
8227; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
8228; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
8229; EG-NEXT:     BFE_UINT T7.X, T0.Y, literal.x, 1,
8230; EG-NEXT:     LSHL T1.Y, PS, 1,
8231; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T7.W, literal.y,
8232; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8233; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
8234; EG-NEXT:    12(1.681558e-44), 31(4.344025e-44)
8235; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8236; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS,
8237; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
8238; EG-NEXT:     SETE_INT T9.W, PV.Z, T3.Z,
8239; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T3.Z,
8240; EG-NEXT:    12(1.681558e-44), 0(0.000000e+00)
8241; EG-NEXT:     SUBB_UINT T6.X, T8.W, T0.W,
8242; EG-NEXT:     CNDE_INT * T3.Y, PV.W, PS, PV.Z,
8243; EG-NEXT:     SUB_INT T1.Z, T7.W, T4.W,
8244; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x, BS:VEC_021/SCL_122
8245; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8246; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8247; EG-NEXT:     SUBB_UINT T4.X, T7.W, T4.W,
8248; EG-NEXT:     SUB_INT T1.Y, T5.Z, T3.Z,
8249; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
8250; EG-NEXT:     SETE_INT T11.W, T8.W, T4.Z,
8251; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T4.Z,
8252; EG-NEXT:     SUBB_UINT T9.X, T9.W, T2.Z,
8253; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
8254; EG-NEXT:     SUB_INT T6.Z, T9.W, T2.Z,
8255; EG-NEXT:     SUB_INT T11.W, T1.Y, T4.X,
8256; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T1.Z,
8257; EG-NEXT:     SUB_INT T4.X, T8.W, T4.Z,
8258; EG-NEXT:     LSHL T1.Y, PS, 1,
8259; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
8260; EG-NEXT:     CNDE_INT T11.W, T3.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
8261; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8262; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
8263; EG-NEXT:     SUB_INT T10.X, T0.Z, T1.W,
8264; EG-NEXT:     LSHL T3.Y, PS, 1,
8265; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T7.W, literal.x, BS:VEC_021/SCL_122
8266; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8267; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
8268; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8269; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8270; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS,
8271; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
8272; EG-NEXT:     SETE_INT T8.W, PV.Z, T3.Z,
8273; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T3.Z,
8274; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
8275; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
8276; EG-NEXT:     SUB_INT T2.Y, T7.W, T4.W,
8277; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
8278; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
8279; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
8280; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8281; EG-NEXT:     CNDE_INT T4.X, T8.X, T0.Z, PS,
8282; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T2.Z, BS:VEC_021/SCL_122
8283; EG-NEXT:     SETE_INT T0.Z, PV.Z, T4.Z, BS:VEC_102/SCL_221
8284; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T4.Z, BS:VEC_102/SCL_221
8285; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
8286; EG-NEXT:     LSHL T6.X, PS, 1,
8287; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8288; EG-NEXT:     SUB_INT T0.Z, T8.W, T2.Z,
8289; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x,
8290; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
8291; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8292; EG-NEXT:     SUBB_UINT T4.X, T8.W, T2.Z,
8293; EG-NEXT:     SUB_INT T2.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8294; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8295; EG-NEXT:     SETE_INT T12.W, PV.W, T1.W,
8296; EG-NEXT:     SETGE_UINT * T13.W, PV.W, T1.W,
8297; EG-NEXT:     SUBB_UINT T5.X, T10.W, T0.W, BS:VEC_021/SCL_122
8298; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8299; EG-NEXT:     SUB_INT T6.Z, T10.W, T0.W, BS:VEC_021/SCL_122
8300; EG-NEXT:     SUB_INT T12.W, PV.Y, PV.X,
8301; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T0.Z,
8302; EG-NEXT:     SUB_INT T4.X, T9.W, T1.W, BS:VEC_021/SCL_122
8303; EG-NEXT:     LSHL T2.Y, PS, 1,
8304; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
8305; EG-NEXT:     CNDE_INT T12.W, T1.Y, T1.Z, PV.W,
8306; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
8307; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
8308; EG-NEXT:     BFE_UINT T7.X, T2.X, literal.x, 1,
8309; EG-NEXT:     LSHL T1.Y, PS, 1,
8310; EG-NEXT:     BIT_ALIGN_INT * T1.Z, PV.W, T8.W, literal.y,
8311; EG-NEXT:    10(1.401298e-44), 31(4.344025e-44)
8312; EG-NEXT:    ALU clause starting at 1640:
8313; EG-NEXT:     OR_INT T8.W, T2.Y, T0.Z,
8314; EG-NEXT:     SUB_INT * T12.W, T4.X, T5.X,
8315; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8316; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_120/SCL_212
8317; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_102/SCL_221
8318; EG-NEXT:     SETE_INT T9.W, T1.Z, T4.Z, BS:VEC_210
8319; EG-NEXT:     SETGE_UINT * T12.W, T1.Z, T4.Z,
8320; EG-NEXT:    11(1.541428e-44), 0(0.000000e+00)
8321; EG-NEXT:     SUBB_UINT T5.X, T7.W, T4.W,
8322; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8323; EG-NEXT:     SUB_INT * T0.Z, T8.W, T2.Z, BS:VEC_201
8324; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x,
8325; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8326; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8327; EG-NEXT:     SUBB_UINT T4.X, T8.W, T2.Z,
8328; EG-NEXT:     SUB_INT T1.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8329; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8330; EG-NEXT:     SETE_INT T10.W, PV.W, T1.W,
8331; EG-NEXT:     SETGE_UINT * T12.W, PV.W, T1.W,
8332; EG-NEXT:     SUBB_UINT T8.X, T9.W, T0.W, BS:VEC_021/SCL_122
8333; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
8334; EG-NEXT:     SUB_INT T6.Z, T9.W, T0.W, BS:VEC_021/SCL_122
8335; EG-NEXT:     SUB_INT T10.W, PV.Y, PV.X,
8336; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T0.Z,
8337; EG-NEXT:     SUB_INT T4.X, T7.W, T1.W, BS:VEC_021/SCL_122
8338; EG-NEXT:     LSHL T1.Y, PS, 1,
8339; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
8340; EG-NEXT:     CNDE_INT T10.W, T3.Y, T1.Z, PV.W,
8341; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8342; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
8343; EG-NEXT:     SUB_INT T10.X, T5.Z, T3.Z,
8344; EG-NEXT:     LSHL T3.Y, PS, 1,
8345; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T8.W, literal.x,
8346; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8347; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
8348; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8349; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8350; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS, BS:VEC_120/SCL_212
8351; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
8352; EG-NEXT:     SETE_INT T7.W, PV.Z, T4.Z,
8353; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T4.Z,
8354; EG-NEXT:    10(1.401298e-44), 0(0.000000e+00)
8355; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
8356; EG-NEXT:     SUB_INT T2.Y, T8.W, T2.Z,
8357; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T9.W, literal.x,
8358; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
8359; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
8360; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8361; EG-NEXT:     CNDE_INT T4.X, T9.X, T5.Z, PS,
8362; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
8363; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
8364; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T1.W, BS:VEC_102/SCL_221
8365; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
8366; EG-NEXT:     LSHL T5.X, PS, 1,
8367; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8368; EG-NEXT:     SUB_INT T5.Z, T7.W, T0.W,
8369; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x, BS:VEC_021/SCL_122
8370; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
8371; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8372; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
8373; EG-NEXT:     SUB_INT * T2.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
8374; EG-NEXT:     SETGE_UINT T6.Z, T11.W, T4.W,
8375; EG-NEXT:     SETE_INT T12.W, T9.W, T3.Z, BS:VEC_201
8376; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T3.Z,
8377; EG-NEXT:     SUBB_UINT T6.X, T11.W, T4.W, BS:VEC_021/SCL_122
8378; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8379; EG-NEXT:     SUB_INT T6.Z, T11.W, T4.W, BS:VEC_021/SCL_122
8380; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
8381; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T5.Z,
8382; EG-NEXT:     SUB_INT T4.X, T9.W, T3.Z,
8383; EG-NEXT:     LSHL T2.Y, PS, 1,
8384; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
8385; EG-NEXT:     CNDE_INT T12.W, T1.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
8386; EG-NEXT:     CNDE_INT * T11.W, PV.Y, T11.W, PV.Z,
8387; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
8388; EG-NEXT:     BFE_UINT T7.X, T3.X, literal.x, 1,
8389; EG-NEXT:     LSHL T1.Y, PS, 1,
8390; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.y,
8391; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8392; EG-NEXT:     SUB_INT * T12.W, PV.X, T6.X,
8393; EG-NEXT:    8(1.121039e-44), 31(4.344025e-44)
8394; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
8395; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_021/SCL_122
8396; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T0.W, BS:VEC_102/SCL_221
8397; EG-NEXT:     SETE_INT T9.W, PV.Z, T1.W,
8398; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T1.W,
8399; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
8400; EG-NEXT:     SUBB_UINT T6.X, T8.W, T2.Z,
8401; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8402; EG-NEXT:     SUB_INT * T5.Z, T7.W, T0.W, BS:VEC_120/SCL_212
8403; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
8404; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8405; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8406; EG-NEXT:     SUBB_UINT T4.X, T7.W, T0.W,
8407; EG-NEXT:     SUB_INT * T1.Y, T0.Z, T1.W, BS:VEC_021/SCL_122
8408; EG-NEXT:     SETGE_UINT T6.Z, T9.W, T4.W,
8409; EG-NEXT:     SETE_INT T11.W, T8.W, T3.Z, BS:VEC_201
8410; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T3.Z,
8411; EG-NEXT:     SUBB_UINT T9.X, T9.W, T4.W, BS:VEC_021/SCL_122
8412; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
8413; EG-NEXT:     SUB_INT T6.Z, T9.W, T4.W, BS:VEC_021/SCL_122
8414; EG-NEXT:     SUB_INT T11.W, T1.Y, T4.X,
8415; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T5.Z,
8416; EG-NEXT:     SUB_INT T4.X, T8.W, T3.Z,
8417; EG-NEXT:     LSHL T1.Y, PS, 1,
8418; EG-NEXT:     BFE_UINT T5.Z, T0.Y, literal.x, 1,
8419; EG-NEXT:     CNDE_INT T11.W, T3.Y, T0.Z, PV.W, BS:VEC_120/SCL_212
8420; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8421; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
8422; EG-NEXT:     SUB_INT T10.X, T1.Z, T4.Z,
8423; EG-NEXT:     LSHL T3.Y, PS, 1,
8424; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T7.W, literal.x,
8425; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8426; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
8427; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8428; EG-NEXT:    ALU clause starting at 1755:
8429; EG-NEXT:     BFE_UINT T4.X, T2.X, literal.x, 1,
8430; EG-NEXT:     CNDE_INT * T1.Y, T2.Y, T8.W, T11.W,
8431; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
8432; EG-NEXT:     SETGE_UINT T5.Z, T7.W, T0.W, BS:VEC_021/SCL_122
8433; EG-NEXT:     SETE_INT T8.W, T0.Z, T1.W,
8434; EG-NEXT:     SETGE_UINT * T11.W, T0.Z, T1.W,
8435; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
8436; EG-NEXT:     SUB_INT T2.Y, T7.W, T0.W,
8437; EG-NEXT:     BIT_ALIGN_INT T5.Z, T1.Y, T9.W, literal.x, BS:VEC_021/SCL_122
8438; EG-NEXT:     OR_INT T8.W, T3.Y, T4.X, BS:VEC_102/SCL_221
8439; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
8440; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8441; EG-NEXT:     CNDE_INT T4.X, T8.X, T1.Z, PS,
8442; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T4.W, BS:VEC_021/SCL_122
8443; EG-NEXT:     SETE_INT T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
8444; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T3.Z, BS:VEC_021/SCL_122
8445; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
8446; EG-NEXT:     LSHL T6.X, PS, 1,
8447; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8448; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
8449; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x, BS:VEC_021/SCL_122
8450; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
8451; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8452; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
8453; EG-NEXT:     SUB_INT T2.Y, T5.Z, T3.Z,
8454; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
8455; EG-NEXT:     SETE_INT T12.W, T9.W, T4.Z,
8456; EG-NEXT:     SETGE_UINT * T13.W, T9.W, T4.Z,
8457; EG-NEXT:     SUBB_UINT T5.X, T10.W, T2.Z,
8458; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T6.Z,
8459; EG-NEXT:     SUB_INT T6.Z, T10.W, T2.Z,
8460; EG-NEXT:     SUB_INT T12.W, T2.Y, T4.X,
8461; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T1.Z,
8462; EG-NEXT:     SUB_INT T4.X, T9.W, T4.Z,
8463; EG-NEXT:     LSHL T2.Y, PS, 1,
8464; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
8465; EG-NEXT:     CNDE_INT T12.W, T1.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
8466; EG-NEXT:     CNDE_INT * T10.W, PV.Y, T10.W, PV.Z,
8467; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
8468; EG-NEXT:     BFE_UINT T7.X, T0.Y, literal.x, 1,
8469; EG-NEXT:     LSHL T1.Y, PS, 1,
8470; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.y,
8471; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8472; EG-NEXT:     SUB_INT * T12.W, PV.X, T5.X,
8473; EG-NEXT:    7(9.809089e-45), 31(4.344025e-44)
8474; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8475; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS,
8476; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
8477; EG-NEXT:     SETE_INT T9.W, PV.Z, T3.Z,
8478; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T3.Z,
8479; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
8480; EG-NEXT:     SUBB_UINT T5.X, T7.W, T0.W,
8481; EG-NEXT:     CNDE_INT * T3.Y, PV.W, PS, PV.Z,
8482; EG-NEXT:     SUB_INT T1.Z, T8.W, T4.W,
8483; EG-NEXT:     BIT_ALIGN_INT T7.W, T2.Y, T10.W, literal.x, BS:VEC_021/SCL_122
8484; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8485; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8486; EG-NEXT:     SUBB_UINT T4.X, T8.W, T4.W,
8487; EG-NEXT:     SUB_INT T1.Y, T5.Z, T3.Z,
8488; EG-NEXT:     SETGE_UINT * T6.Z, PS, T2.Z, BS:VEC_021/SCL_122
8489; EG-NEXT:     SETE_INT T10.W, T7.W, T4.Z,
8490; EG-NEXT:     SETGE_UINT * T12.W, T7.W, T4.Z,
8491; EG-NEXT:     SUBB_UINT T8.X, T9.W, T2.Z,
8492; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, T6.Z,
8493; EG-NEXT:     SUB_INT T6.Z, T9.W, T2.Z,
8494; EG-NEXT:     SUB_INT T10.W, T1.Y, T4.X,
8495; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T1.Z,
8496; EG-NEXT:     SUB_INT T4.X, T7.W, T4.Z,
8497; EG-NEXT:     LSHL T1.Y, PS, 1,
8498; EG-NEXT:     BFE_UINT T1.Z, T2.X, literal.x, 1,
8499; EG-NEXT:     CNDE_INT T10.W, T3.Y, T5.Z, PV.W, BS:VEC_021/SCL_122
8500; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8501; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
8502; EG-NEXT:     SUB_INT T10.X, T0.Z, T1.W,
8503; EG-NEXT:     LSHL T3.Y, PS, 1,
8504; EG-NEXT:     BIT_ALIGN_INT T5.Z, PV.W, T8.W, literal.x, BS:VEC_021/SCL_122
8505; EG-NEXT:     OR_INT T8.W, PV.Y, PV.Z,
8506; EG-NEXT:     SUB_INT * T10.W, PV.X, T8.X,
8507; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8508; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8509; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T7.W, PS,
8510; EG-NEXT:     SETGE_UINT T1.Z, PV.W, T4.W, BS:VEC_021/SCL_122
8511; EG-NEXT:     SETE_INT T7.W, PV.Z, T3.Z,
8512; EG-NEXT:     SETGE_UINT * T10.W, PV.Z, T3.Z,
8513; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
8514; EG-NEXT:     CNDE_INT T8.X, PV.W, PS, PV.Z,
8515; EG-NEXT:     SUB_INT T2.Y, T8.W, T4.W,
8516; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T9.W, literal.x, BS:VEC_021/SCL_122
8517; EG-NEXT:     OR_INT T7.W, T3.Y, PV.X,
8518; EG-NEXT:     SUB_INT * T9.W, T10.X, T5.X,
8519; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8520; EG-NEXT:     CNDE_INT T4.X, T9.X, T0.Z, PS,
8521; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T2.Z, BS:VEC_021/SCL_122
8522; EG-NEXT:     SETE_INT T0.Z, PV.Z, T4.Z, BS:VEC_102/SCL_221
8523; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T4.Z, BS:VEC_102/SCL_221
8524; EG-NEXT:     CNDE_INT * T10.W, PV.X, T8.W, PV.Y,
8525; EG-NEXT:     LSHL T5.X, PS, 1,
8526; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8527; EG-NEXT:     SUB_INT T0.Z, T7.W, T2.Z,
8528; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T11.W, literal.x,
8529; EG-NEXT:     OR_INT * T11.W, T6.X, T7.X,
8530; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8531; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
8532; EG-NEXT:     SUB_INT T2.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8533; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8534; EG-NEXT:     SETE_INT T12.W, PV.W, T1.W,
8535; EG-NEXT:     SETGE_UINT * T13.W, PV.W, T1.W,
8536; EG-NEXT:     SUBB_UINT T6.X, T11.W, T0.W, BS:VEC_021/SCL_122
8537; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8538; EG-NEXT:     SUB_INT T6.Z, T11.W, T0.W, BS:VEC_021/SCL_122
8539; EG-NEXT:     SUB_INT T12.W, PV.Y, PV.X,
8540; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, T0.Z,
8541; EG-NEXT:     SUB_INT T4.X, T9.W, T1.W,
8542; EG-NEXT:     LSHL T2.Y, PS, 1,
8543; EG-NEXT:     BFE_UINT * T0.Z, T3.X, literal.x, 1,
8544; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
8545; EG-NEXT:    ALU clause starting at 1871:
8546; EG-NEXT:     CNDE_INT T12.W, T1.Y, T1.Z, T12.W,
8547; EG-NEXT:     CNDE_INT * T11.W, T3.Y, T11.W, T6.Z,
8548; EG-NEXT:     BFE_UINT T7.X, T2.X, literal.x, 1,
8549; EG-NEXT:     LSHL T1.Y, PS, 1,
8550; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T7.W, literal.y,
8551; EG-NEXT:     OR_INT T7.W, T2.Y, T0.Z,
8552; EG-NEXT:     SUB_INT * T12.W, T4.X, T6.X,
8553; EG-NEXT:    5(7.006492e-45), 31(4.344025e-44)
8554; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8555; EG-NEXT:     CNDE_INT T2.Y, T3.Y, T9.W, PS, BS:VEC_120/SCL_212
8556; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
8557; EG-NEXT:     SETE_INT T9.W, PV.Z, T4.Z,
8558; EG-NEXT:     SETGE_UINT * T12.W, PV.Z, T4.Z,
8559; EG-NEXT:    6(8.407791e-45), 0(0.000000e+00)
8560; EG-NEXT:     SUBB_UINT T6.X, T8.W, T4.W,
8561; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, PV.Z,
8562; EG-NEXT:     SUB_INT * T0.Z, T7.W, T2.Z, BS:VEC_201
8563; EG-NEXT:     BIT_ALIGN_INT T8.W, T2.Y, T11.W, literal.x,
8564; EG-NEXT:     OR_INT * T9.W, T1.Y, T4.X,
8565; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8566; EG-NEXT:     SUBB_UINT T4.X, T7.W, T2.Z,
8567; EG-NEXT:     SUB_INT T1.Y, T1.Z, T4.Z, BS:VEC_021/SCL_122
8568; EG-NEXT:     SETGE_UINT T6.Z, PS, T0.W, BS:VEC_021/SCL_122
8569; EG-NEXT:     SETE_INT T11.W, PV.W, T1.W,
8570; EG-NEXT:     SETGE_UINT * T12.W, PV.W, T1.W,
8571; EG-NEXT:     SUBB_UINT T9.X, T9.W, T0.W, BS:VEC_021/SCL_122
8572; EG-NEXT:     CNDE_INT T2.Y, PV.W, PS, PV.Z,
8573; EG-NEXT:     SUB_INT T6.Z, T9.W, T0.W, BS:VEC_021/SCL_122
8574; EG-NEXT:     SUB_INT T11.W, PV.Y, PV.X,
8575; EG-NEXT:     CNDE_INT * T7.W, T3.Y, T7.W, T0.Z,
8576; EG-NEXT:     SUB_INT T4.X, T8.W, T1.W, BS:VEC_021/SCL_122
8577; EG-NEXT:     LSHL T1.Y, PS, 1,
8578; EG-NEXT:     BFE_UINT T0.Z, T3.X, literal.x, 1,
8579; EG-NEXT:     CNDE_INT T11.W, T3.Y, T1.Z, PV.W,
8580; EG-NEXT:     CNDE_INT * T9.W, PV.Y, T9.W, PV.Z,
8581; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
8582; EG-NEXT:     SUB_INT T10.X, T5.Z, T3.Z,
8583; EG-NEXT:     LSHL T3.Y, PS, 1,
8584; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.W, T7.W, literal.x,
8585; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8586; EG-NEXT:     SUB_INT * T11.W, PV.X, T9.X,
8587; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8588; EG-NEXT:     BFE_UINT T4.X, T0.Y, literal.x, 1,
8589; EG-NEXT:     CNDE_INT T1.Y, T2.Y, T8.W, PS, BS:VEC_120/SCL_212
8590; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T2.Z, BS:VEC_021/SCL_122
8591; EG-NEXT:     SETE_INT T8.W, PV.Z, T4.Z,
8592; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T4.Z,
8593; EG-NEXT:    5(7.006492e-45), 0(0.000000e+00)
8594; EG-NEXT:     CNDE_INT T9.X, PV.W, PS, PV.Z,
8595; EG-NEXT:     SUB_INT T2.Y, T7.W, T2.Z,
8596; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.Y, T9.W, literal.x,
8597; EG-NEXT:     OR_INT T8.W, T3.Y, PV.X,
8598; EG-NEXT:     SUB_INT * T9.W, T10.X, T6.X,
8599; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8600; EG-NEXT:     CNDE_INT T4.X, T8.X, T5.Z, PS,
8601; EG-NEXT:     SETGE_UINT T1.Y, PV.W, T0.W, BS:VEC_021/SCL_122
8602; EG-NEXT:     SETE_INT T5.Z, PV.Z, T1.W, BS:VEC_102/SCL_221
8603; EG-NEXT:     SETGE_UINT T9.W, PV.Z, T1.W, BS:VEC_102/SCL_221
8604; EG-NEXT:     CNDE_INT * T11.W, PV.X, T7.W, PV.Y,
8605; EG-NEXT:     LSHL T6.X, PS, 1,
8606; EG-NEXT:     CNDE_INT T1.Y, PV.Z, PV.W, PV.Y,
8607; EG-NEXT:     SUB_INT T5.Z, T8.W, T0.W,
8608; EG-NEXT:     BIT_ALIGN_INT T9.W, PV.X, T10.W, literal.x, BS:VEC_021/SCL_122
8609; EG-NEXT:     OR_INT * T10.W, T5.X, T7.X,
8610; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8611; EG-NEXT:     BFE_UINT T4.X, T3.X, literal.x, 1,
8612; EG-NEXT:     SETGE_UINT T2.Y, PS, T4.W, BS:VEC_021/SCL_122
8613; EG-NEXT:     SETE_INT T6.Z, PV.W, T3.Z,
8614; EG-NEXT:     SETGE_UINT T12.W, PV.W, T3.Z,
8615; EG-NEXT:     CNDE_INT * T13.W, PV.Y, T8.W, PV.Z,
8616; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
8617; EG-NEXT:     LSHL T5.X, PS, 1,
8618; EG-NEXT:     CNDE_INT T2.Y, PV.Z, PV.W, PV.Y,
8619; EG-NEXT:     SUB_INT T5.Z, T10.W, T4.W,
8620; EG-NEXT:     SUBB_UINT T12.W, T10.W, T4.W,
8621; EG-NEXT:     SUB_INT * T14.W, T9.W, T3.Z,
8622; EG-NEXT:     SUBB_UINT T7.X, T7.W, T2.Z,
8623; EG-NEXT:     SUBB_UINT * T3.Y, T8.W, T0.W, BS:VEC_120/SCL_212
8624; EG-NEXT:     SUB_INT T6.Z, T0.Z, T1.W,
8625; EG-NEXT:     SUB_INT * T7.W, T14.W, T12.W, BS:VEC_021/SCL_122
8626; EG-NEXT:     CNDE_INT * T8.W, T2.Y, T10.W, T5.Z,
8627; EG-NEXT:     SUB_INT T8.X, T1.Z, T4.Z,
8628; EG-NEXT:     LSHL T4.Y, PV.W, 1,
8629; EG-NEXT:     BFE_UINT T5.Z, T2.X, literal.x, 1,
8630; EG-NEXT:     CNDE_INT T7.W, T2.Y, T9.W, T7.W,
8631; EG-NEXT:     SUB_INT * T9.W, T6.Z, T3.Y,
8632; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
8633; EG-NEXT:     BFE_UINT T10.X, T0.Y, literal.x, 1,
8634; EG-NEXT:     CNDE_INT T1.Y, T1.Y, T0.Z, PS, BS:VEC_120/SCL_212
8635; EG-NEXT:     BIT_ALIGN_INT T0.Z, PV.W, T8.W, literal.y,
8636; EG-NEXT:     OR_INT T7.W, PV.Y, PV.Z,
8637; EG-NEXT:     SUB_INT * T8.W, PV.X, T7.X,
8638; EG-NEXT:    4(5.605194e-45), 31(4.344025e-44)
8639; EG-NEXT:     CNDE_INT T7.X, T9.X, T1.Z, PS,
8640; EG-NEXT:     SETGE_UINT T2.Y, PV.W, T4.W,
8641; EG-NEXT:     SETE_INT T1.Z, PV.Z, T3.Z, BS:VEC_021/SCL_122
8642; EG-NEXT:     BIT_ALIGN_INT T8.W, PV.Y, T13.W, literal.x, BS:VEC_021/SCL_122
8643; EG-NEXT:     OR_INT * T9.W, T5.X, PV.X,
8644; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8645; EG-NEXT:     SETGE_UINT T5.X, T0.Z, T3.Z,
8646; EG-NEXT:     SUBB_UINT T1.Y, T7.W, T4.W,
8647; EG-NEXT:     SETGE_UINT * T5.Z, PS, T0.W, BS:VEC_021/SCL_122
8648; EG-NEXT:     SETE_INT T10.W, T8.W, T1.W,
8649; EG-NEXT:     SETGE_UINT * T12.W, T8.W, T1.W,
8650; EG-NEXT:     SUB_INT T8.X, T0.Z, T3.Z,
8651; EG-NEXT:     CNDE_INT T3.Y, PV.W, PS, T5.Z,
8652; EG-NEXT:     SUB_INT T5.Z, T9.W, T0.W,
8653; EG-NEXT:     SUBB_UINT * T10.W, T9.W, T0.W,
8654; EG-NEXT:     SUB_INT * T12.W, T8.W, T1.W,
8655; EG-NEXT:     SUB_INT T9.X, PV.W, T10.W,
8656; EG-NEXT:     CNDE_INT * T4.Y, T3.Y, T9.W, T5.Z, BS:VEC_021/SCL_122
8657; EG-NEXT:     SUB_INT T5.Z, T7.W, T4.W,
8658; EG-NEXT:     SUB_INT T9.W, T8.X, T1.Y,
8659; EG-NEXT:     CNDE_INT * T10.W, T1.Z, T5.X, T2.Y,
8660; EG-NEXT:    ALU clause starting at 1985:
8661; EG-NEXT:     CNDE_INT T5.X, T10.W, T0.Z, T9.W,
8662; EG-NEXT:     CNDE_INT T1.Y, T10.W, T7.W, T5.Z,
8663; EG-NEXT:     LSHL T0.Z, T4.Y, 1,
8664; EG-NEXT:     BFE_UINT * T7.W, T0.Y, literal.x, 1, BS:VEC_120/SCL_212
8665; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
8666; EG-NEXT:     CNDE_INT * T8.W, T3.Y, T8.W, T9.X,
8667; EG-NEXT:     BIT_ALIGN_INT T8.X, PV.W, T4.Y, literal.x,
8668; EG-NEXT:     OR_INT T2.Y, T0.Z, T7.W,
8669; EG-NEXT:     BIT_ALIGN_INT T0.Z, T5.X, T1.Y, literal.x, BS:VEC_021/SCL_122
8670; EG-NEXT:     BIT_ALIGN_INT * T7.W, T7.X, T11.W, literal.x, BS:VEC_120/SCL_212
8671; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8672; EG-NEXT:     OR_INT * T8.W, T6.X, T4.X,
8673; EG-NEXT:     SETGE_UINT T4.X, PV.W, T2.Z,
8674; EG-NEXT:     SETE_INT T3.Y, T7.W, T4.Z, BS:VEC_021/SCL_122
8675; EG-NEXT:     SETGE_UINT T1.Z, T7.W, T4.Z, BS:VEC_021/SCL_122
8676; EG-NEXT:     BFE_UINT T9.W, T2.X, literal.x, 1,
8677; EG-NEXT:     LSHL * T10.W, T1.Y, 1,
8678; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
8679; EG-NEXT:     OR_INT T5.X, PS, PV.W,
8680; EG-NEXT:     CNDE_INT T1.Y, PV.Y, PV.Z, PV.X,
8681; EG-NEXT:     SUB_INT T1.Z, T8.W, T2.Z, BS:VEC_021/SCL_122
8682; EG-NEXT:     SUBB_UINT T9.W, T8.W, T2.Z, BS:VEC_021/SCL_122
8683; EG-NEXT:     SUB_INT * T10.W, T7.W, T4.Z,
8684; EG-NEXT:     SUB_INT T4.X, PS, PV.W,
8685; EG-NEXT:     CNDE_INT T3.Y, PV.Y, T8.W, PV.Z,
8686; EG-NEXT:     SETGE_UINT T1.Z, PV.X, T4.W, BS:VEC_021/SCL_122
8687; EG-NEXT:     SETE_INT T8.W, T0.Z, T3.Z,
8688; EG-NEXT:     SETGE_UINT * T9.W, T0.Z, T3.Z,
8689; EG-NEXT:     CNDE_INT T6.X, PV.W, PS, PV.Z,
8690; EG-NEXT:     SUB_INT T4.Y, T5.X, T4.W, BS:VEC_021/SCL_122
8691; EG-NEXT:     LSHL T1.Z, PV.Y, 1,
8692; EG-NEXT:     BFE_UINT T8.W, T3.X, literal.x, 1, BS:VEC_120/SCL_212
8693; EG-NEXT:     CNDE_INT * T7.W, T1.Y, T7.W, PV.X,
8694; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8695; EG-NEXT:     BIT_ALIGN_INT T4.X, PS, T3.Y, literal.x,
8696; EG-NEXT:     OR_INT T1.Y, PV.Z, PV.W,
8697; EG-NEXT:     CNDE_INT T1.Z, PV.X, T5.X, PV.Y,
8698; EG-NEXT:     SUBB_UINT T7.W, T2.Y, T0.W, BS:VEC_021/SCL_122
8699; EG-NEXT:     SUB_INT * T8.W, T8.X, T1.W,
8700; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8701; EG-NEXT:     SUB_INT T7.X, PS, PV.W,
8702; EG-NEXT:     LSHL T3.Y, PV.Z, 1,
8703; EG-NEXT:     SETGE_UINT T5.Z, PV.Y, T2.Z, BS:VEC_021/SCL_122
8704; EG-NEXT:     SETE_INT T7.W, PV.X, T4.Z,
8705; EG-NEXT:     SETGE_UINT * T8.W, PV.X, T4.Z,
8706; EG-NEXT:     BFE_UINT T9.X, T2.X, literal.x, 1,
8707; EG-NEXT:     SETGE_UINT T4.Y, T2.Y, T0.W,
8708; EG-NEXT:     SETE_INT T6.Z, T8.X, T1.W, BS:VEC_120/SCL_212
8709; EG-NEXT:     CNDE_INT T7.W, PV.W, PS, PV.Z,
8710; EG-NEXT:     SUB_INT * T8.W, T1.Y, T2.Z,
8711; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8712; EG-NEXT:     SUBB_UINT T10.X, T1.Y, T2.Z,
8713; EG-NEXT:     SUB_INT T5.Y, T4.X, T4.Z, BS:VEC_021/SCL_122
8714; EG-NEXT:     SUBB_UINT * T5.Z, T5.X, T4.W, BS:VEC_120/SCL_212
8715; EG-NEXT:     SUB_INT T9.W, T0.Z, T3.Z,
8716; EG-NEXT:     CNDE_INT * T8.W, T7.W, T1.Y, T8.W,
8717; EG-NEXT:     SETGE_UINT T5.X, T8.X, T1.W,
8718; EG-NEXT:     LSHL T1.Y, PS, 1,
8719; EG-NEXT:     BFE_UINT T7.Z, T3.X, 1, 1, BS:VEC_201
8720; EG-NEXT:     SUB_INT T9.W, PV.W, T5.Z,
8721; EG-NEXT:     SUB_INT * T10.W, T5.Y, T10.X,
8722; EG-NEXT:     CNDE_INT T4.X, T7.W, T4.X, PS,
8723; EG-NEXT:     CNDE_INT T5.Y, T6.X, T0.Z, PV.W,
8724; EG-NEXT:     OR_INT T0.Z, PV.Y, PV.Z,
8725; EG-NEXT:     CNDE_INT T7.W, T6.Z, PV.X, T4.Y, BS:VEC_021/SCL_122
8726; EG-NEXT:     SUB_INT * T9.W, T2.Y, T0.W,
8727; EG-NEXT:     CNDE_INT T5.X, PV.W, T2.Y, PS,
8728; EG-NEXT:     SETGE_UINT T1.Y, PV.Z, T2.Z,
8729; EG-NEXT:     BIT_ALIGN_INT T1.Z, PV.Y, T1.Z, literal.x, BS:VEC_021/SCL_122
8730; EG-NEXT:     BIT_ALIGN_INT T8.W, PV.X, T8.W, literal.x,
8731; EG-NEXT:     OR_INT * T9.W, T3.Y, T9.X,
8732; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8733; EG-NEXT:     SETE_INT T4.X, PV.W, T4.Z, BS:VEC_021/SCL_122
8734; EG-NEXT:     SETGE_UINT T2.Y, PV.W, T4.Z, BS:VEC_021/SCL_122
8735; EG-NEXT:     SETGE_UINT T5.Z, PS, T4.W,
8736; EG-NEXT:     SETE_INT T10.W, PV.Z, T3.Z,
8737; EG-NEXT:     SETGE_UINT * T11.W, PV.Z, T3.Z,
8738; EG-NEXT:     CNDE_INT T6.X, PV.W, PS, PV.Z,
8739; EG-NEXT:     CNDE_INT T1.Y, PV.X, PV.Y, T1.Y,
8740; EG-NEXT:     LSHL T5.Z, T5.X, 1, BS:VEC_201
8741; EG-NEXT:     BFE_UINT T10.W, T0.Y, literal.x, 1,
8742; EG-NEXT:     CNDE_INT * T7.W, T7.W, T8.X, T7.X,
8743; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8744; EG-NEXT:     SUB_INT T4.X, T0.Z, T2.Z,
8745; EG-NEXT:     SUBB_UINT T2.Y, T0.Z, T2.Z,
8746; EG-NEXT:     SUB_INT T6.Z, T8.W, T4.Z, BS:VEC_021/SCL_122
8747; EG-NEXT:     BIT_ALIGN_INT T7.W, PS, T5.X, literal.x,
8748; EG-NEXT:     OR_INT * T10.W, PV.Z, PV.W,
8749; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8750; EG-NEXT:     SETGE_UINT T5.X, PS, T0.W,
8751; EG-NEXT:     SETE_INT T3.Y, PV.W, T1.W, BS:VEC_021/SCL_122
8752; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T1.W, BS:VEC_021/SCL_122
8753; EG-NEXT:     SUB_INT T11.W, PV.Z, PV.Y,
8754; EG-NEXT:     CNDE_INT * T12.W, T1.Y, T0.Z, PV.X,
8755; EG-NEXT:     SUBB_UINT * T4.X, T10.W, T0.W,
8756; EG-NEXT:     SUB_INT T2.Y, T7.W, T1.W,
8757; EG-NEXT:     LSHL T0.Z, T12.W, 1, BS:VEC_201
8758; EG-NEXT:     AND_INT * T13.W, T3.X, 1,
8759; EG-NEXT:     CNDE_INT * T8.W, T1.Y, T8.W, T11.W,
8760; EG-NEXT:     SUB_INT T3.X, T10.W, T0.W,
8761; EG-NEXT:     BIT_ALIGN_INT * T1.Y, PV.W, T12.W, literal.x, BS:VEC_021/SCL_122
8762; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8763; EG-NEXT:     OR_INT T0.Z, T0.Z, T13.W,
8764; EG-NEXT:     SUB_INT T8.W, T2.Y, T4.X,
8765; EG-NEXT:     CNDE_INT * T11.W, T3.Y, T5.Z, T5.X,
8766; EG-NEXT:     SUB_INT T4.X, T9.W, T4.W,
8767; EG-NEXT:     CNDE_INT T2.Y, PS, T7.W, PV.W, BS:VEC_021/SCL_122
8768; EG-NEXT:     SETGE_UINT T5.Z, PV.Z, T2.Z,
8769; EG-NEXT:     SETE_INT * T7.W, T1.Y, T4.Z, BS:VEC_021/SCL_122
8770; EG-NEXT:     CNDE_INT * T8.W, T11.W, T10.W, T3.X,
8771; EG-NEXT:     SETGE_UINT T3.X, T1.Y, T4.Z,
8772; EG-NEXT:     SUBB_UINT T3.Y, T0.Z, T2.Z, BS:VEC_021/SCL_122
8773; EG-NEXT:     SUB_INT * T4.Z, T1.Y, T4.Z,
8774; EG-NEXT:    ALU clause starting at 2098:
8775; EG-NEXT:     BFE_UINT T10.W, T0.Y, 1, 1,
8776; EG-NEXT:     LSHL * T11.W, T8.W, 1,
8777; EG-NEXT:     SUBB_UINT T5.X, T9.W, T4.W,
8778; EG-NEXT:     SUB_INT T4.Y, T1.Z, T3.Z,
8779; EG-NEXT:     OR_INT T6.Z, PS, PV.W,
8780; EG-NEXT:     SUB_INT * T10.W, T4.Z, T3.Y, BS:VEC_201
8781; EG-NEXT:     CNDE_INT * T7.W, T7.W, T3.X, T5.Z,
8782; EG-NEXT:     CNDE_INT T3.X, PV.W, T1.Y, T10.W,
8783; EG-NEXT:     SETGE_UINT T1.Y, T6.Z, T0.W,
8784; EG-NEXT:     SUB_INT T4.Z, T4.Y, T5.X,
8785; EG-NEXT:     BIT_ALIGN_INT * T8.W, T2.Y, T8.W, literal.x, BS:VEC_201
8786; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8787; EG-NEXT:     CNDE_INT * T9.W, T6.X, T9.W, T4.X,
8788; EG-NEXT:     SETE_INT T4.X, T8.W, T1.W,
8789; EG-NEXT:     SETGE_UINT T2.Y, T8.W, T1.W,
8790; EG-NEXT:     LSHL T5.Z, PV.W, 1,
8791; EG-NEXT:     BFE_UINT T10.W, T2.X, 1, 1,
8792; EG-NEXT:     CNDE_INT * T11.W, T6.X, T1.Z, T4.Z,
8793; EG-NEXT:     BIT_ALIGN_INT T5.X, PS, T9.W, literal.x, BS:VEC_021/SCL_122
8794; EG-NEXT:     OR_INT T3.Y, PV.Z, PV.W,
8795; EG-NEXT:     CNDE_INT T1.Z, PV.X, PV.Y, T1.Y,
8796; EG-NEXT:     SUB_INT T9.W, T6.Z, T0.W, BS:VEC_102/SCL_221
8797; EG-NEXT:     XOR_INT * T10.W, T3.X, T5.W,
8798; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8799; EG-NEXT:     SUB_INT T3.X, PS, T5.W,
8800; EG-NEXT:     CNDE_INT T1.Y, PV.Z, T6.Z, PV.W, BS:VEC_021/SCL_122
8801; EG-NEXT:     SETGE_UINT T4.Z, PV.Y, T4.W, BS:VEC_021/SCL_122
8802; EG-NEXT:     SETE_INT T9.W, PV.X, T3.Z,
8803; EG-NEXT:     SETGE_UINT * T10.W, PV.X, T3.Z,
8804; EG-NEXT:     CNDE_INT T4.X, PV.W, PS, PV.Z,
8805; EG-NEXT:     LSHL T2.Y, PV.Y, 1,
8806; EG-NEXT:     AND_INT T4.Z, T0.Y, 1,
8807; EG-NEXT:     SUBB_UINT T9.W, T6.Z, T0.W, BS:VEC_102/SCL_221
8808; EG-NEXT:     SUB_INT * T10.W, T8.W, T1.W,
8809; EG-NEXT:     SUB_INT T6.X, T3.Y, T4.W,
8810; EG-NEXT:     SUB_INT T0.Y, PS, PV.W,
8811; EG-NEXT:     SUBB_UINT T5.Z, T3.Y, T4.W,
8812; EG-NEXT:     SUB_INT T9.W, T5.X, T3.Z,
8813; EG-NEXT:     OR_INT * T10.W, PV.Y, PV.Z,
8814; EG-NEXT:     SUB_INT T7.X, PS, T0.W,
8815; EG-NEXT:     SUB_INT T2.Y, PV.W, PV.Z,
8816; EG-NEXT:     CNDE_INT T1.Z, T1.Z, T8.W, PV.Y, BS:VEC_021/SCL_122
8817; EG-NEXT:     CNDE_INT T8.W, T4.X, T3.Y, PV.X,
8818; EG-NEXT:     SUB_INT * T9.W, T0.Z, T2.Z,
8819; EG-NEXT:     CNDE_INT T6.X, T7.W, T0.Z, PS,
8820; EG-NEXT:     LSHL T0.Y, PV.W, 1,
8821; EG-NEXT:     AND_INT T0.Z, T2.X, 1,
8822; EG-NEXT:     BIT_ALIGN_INT T7.W, PV.Z, T1.Y, literal.x,
8823; EG-NEXT:     CNDE_INT * T9.W, T4.X, T5.X, PV.Y,
8824; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8825; EG-NEXT:     SETGE_UINT T2.X, T10.W, T0.W,
8826; EG-NEXT:     SETE_INT T1.Y, PV.W, T1.W, BS:VEC_021/SCL_122
8827; EG-NEXT:     SETGE_UINT * T1.Z, PV.W, T1.W, BS:VEC_021/SCL_122
8828; EG-NEXT:     BIT_ALIGN_INT T8.W, T9.W, T8.W, literal.x,
8829; EG-NEXT:     OR_INT * T9.W, T0.Y, T0.Z,
8830; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
8831; EG-NEXT:     SETGE_UINT T4.X, PS, T4.W,
8832; EG-NEXT:     SETE_INT T0.Y, PV.W, T3.Z,
8833; EG-NEXT:     SETGE_UINT T0.Z, PV.W, T3.Z,
8834; EG-NEXT:     SUBB_UINT T11.W, PS, T4.W,
8835; EG-NEXT:     SUB_INT * T12.W, PV.W, T3.Z,
8836; EG-NEXT:     SUBB_UINT * T5.X, T10.W, T0.W,
8837; EG-NEXT:     SUB_INT * T2.Y, T7.W, T1.W,
8838; EG-NEXT:     SUB_INT * T2.Z, T9.W, T4.W,
8839; EG-NEXT:     SUB_INT T0.W, T12.W, T11.W,
8840; EG-NEXT:     CNDE_INT * T1.W, T0.Y, T0.Z, T4.X,
8841; EG-NEXT:     CNDE_INT T4.X, PS, T8.W, PV.W, BS:VEC_021/SCL_122
8842; EG-NEXT:     CNDE_INT T0.Y, PS, T9.W, T2.Z, BS:VEC_102/SCL_221
8843; EG-NEXT:     SUB_INT T0.Z, T2.Y, T5.X,
8844; EG-NEXT:     CNDE_INT T0.W, T1.Y, T1.Z, T2.X, BS:VEC_210
8845; EG-NEXT:     XOR_INT * T1.W, T6.X, T5.W,
8846; EG-NEXT:     SUBB_UINT T2.X, PS, T5.W,
8847; EG-NEXT:     CNDE_INT T1.Y, PV.W, T7.W, PV.Z, BS:VEC_021/SCL_122
8848; EG-NEXT:     XOR_INT T0.Z, PV.Y, T3.W, BS:VEC_102/SCL_221
8849; EG-NEXT:     XOR_INT * T4.W, PV.X, T3.W, BS:VEC_102/SCL_221
8850; EG-NEXT:     CNDE_INT * T0.W, T0.W, T10.W, T7.X,
8851; EG-NEXT:     XOR_INT T4.X, PV.W, T6.W,
8852; EG-NEXT:     SUB_INT T0.Y, T4.W, T3.W, BS:VEC_021/SCL_122
8853; EG-NEXT:     SUBB_UINT T1.Z, T0.Z, T3.W, BS:VEC_021/SCL_122
8854; EG-NEXT:     XOR_INT T0.W, T1.Y, T6.W,
8855; EG-NEXT:     SUB_INT * T7.W, T3.X, T2.X,
8856; EG-NEXT:     SUB_INT T2.X, PV.W, T6.W, BS:VEC_021/SCL_122
8857; EG-NEXT:     SUB_INT T7.Y, PV.Y, PV.Z,
8858; EG-NEXT:     SUBB_UINT T1.Z, PV.X, T6.W, BS:VEC_021/SCL_122
8859; EG-NEXT:     XOR_INT T0.W, T1.X, T2.W,
8860; EG-NEXT:     XOR_INT * T4.W, T0.X, T2.W,
8861; EG-NEXT:     SUB_INT T0.Y, PS, T2.W,
8862; EG-NEXT:     SUB_INT T7.Z, T1.W, T5.W, BS:VEC_021/SCL_122
8863; EG-NEXT:     SUBB_UINT T1.W, PV.W, T2.W,
8864; EG-NEXT:     SUB_INT * T4.W, PV.X, PV.Z,
8865; EG-NEXT:     SUB_INT T7.X, T0.Z, T3.W,
8866; EG-NEXT:     SUB_INT T4.Y, PV.Y, PV.W,
8867; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
8868; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
8869; EG-NEXT:     LSHR T0.X, PV.W, literal.x,
8870; EG-NEXT:     SUB_INT T4.Z, T4.X, T6.W, BS:VEC_102/SCL_221
8871; EG-NEXT:     SUB_INT * T4.X, T0.W, T2.W,
8872; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8873; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
8874; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
8875  %den_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
8876  %num = load <4 x i64>, ptr addrspace(1) %in
8877  %den = load <4 x i64>, ptr addrspace(1) %den_ptr
8878  %result = srem <4 x i64> %num, %den
8879  store <4 x i64> %result, ptr addrspace(1) %out
8880  ret void
8881}
8882
8883define amdgpu_kernel void @srem_v4i64_4(ptr addrspace(1) %out, ptr addrspace(1) %in) {
8884; GCN-LABEL: srem_v4i64_4:
8885; GCN:       ; %bb.0:
8886; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8887; GCN-NEXT:    v_mov_b32_e32 v8, 0
8888; GCN-NEXT:    s_waitcnt lgkmcnt(0)
8889; GCN-NEXT:    global_load_dwordx4 v[0:3], v8, s[2:3]
8890; GCN-NEXT:    global_load_dwordx4 v[4:7], v8, s[2:3] offset:16
8891; GCN-NEXT:    s_waitcnt vmcnt(1)
8892; GCN-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
8893; GCN-NEXT:    v_lshrrev_b32_e32 v9, 30, v9
8894; GCN-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
8895; GCN-NEXT:    v_add_co_u32_e32 v9, vcc, v0, v9
8896; GCN-NEXT:    v_lshrrev_b32_e32 v10, 30, v10
8897; GCN-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
8898; GCN-NEXT:    s_waitcnt vmcnt(0)
8899; GCN-NEXT:    v_ashrrev_i32_e32 v11, 31, v5
8900; GCN-NEXT:    v_add_co_u32_e32 v10, vcc, v2, v10
8901; GCN-NEXT:    v_lshrrev_b32_e32 v11, 30, v11
8902; GCN-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v3, vcc
8903; GCN-NEXT:    v_ashrrev_i32_e32 v12, 31, v7
8904; GCN-NEXT:    v_add_co_u32_e32 v11, vcc, v4, v11
8905; GCN-NEXT:    v_lshrrev_b32_e32 v12, 30, v12
8906; GCN-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v5, vcc
8907; GCN-NEXT:    v_add_co_u32_e32 v12, vcc, v6, v12
8908; GCN-NEXT:    v_addc_co_u32_e32 v16, vcc, 0, v7, vcc
8909; GCN-NEXT:    v_and_b32_e32 v9, -4, v9
8910; GCN-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v9
8911; GCN-NEXT:    v_and_b32_e32 v10, -4, v10
8912; GCN-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v13, vcc
8913; GCN-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v10
8914; GCN-NEXT:    v_and_b32_e32 v11, -4, v11
8915; GCN-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v14, vcc
8916; GCN-NEXT:    v_sub_co_u32_e32 v4, vcc, v4, v11
8917; GCN-NEXT:    v_and_b32_e32 v12, -4, v12
8918; GCN-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v15, vcc
8919; GCN-NEXT:    v_sub_co_u32_e32 v6, vcc, v6, v12
8920; GCN-NEXT:    v_subb_co_u32_e32 v7, vcc, v7, v16, vcc
8921; GCN-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
8922; GCN-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1]
8923; GCN-NEXT:    s_endpgm
8924;
8925; TAHITI-LABEL: srem_v4i64_4:
8926; TAHITI:       ; %bb.0:
8927; TAHITI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
8928; TAHITI-NEXT:    s_mov_b32 s3, 0xf000
8929; TAHITI-NEXT:    s_mov_b32 s2, -1
8930; TAHITI-NEXT:    s_mov_b32 s10, s2
8931; TAHITI-NEXT:    s_mov_b32 s11, s3
8932; TAHITI-NEXT:    s_waitcnt lgkmcnt(0)
8933; TAHITI-NEXT:    s_mov_b32 s8, s6
8934; TAHITI-NEXT:    s_mov_b32 s9, s7
8935; TAHITI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
8936; TAHITI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
8937; TAHITI-NEXT:    s_mov_b32 s0, s4
8938; TAHITI-NEXT:    s_mov_b32 s1, s5
8939; TAHITI-NEXT:    s_waitcnt vmcnt(1)
8940; TAHITI-NEXT:    v_ashrrev_i32_e32 v8, 31, v1
8941; TAHITI-NEXT:    v_lshrrev_b32_e32 v8, 30, v8
8942; TAHITI-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
8943; TAHITI-NEXT:    v_add_i32_e32 v8, vcc, v0, v8
8944; TAHITI-NEXT:    v_lshrrev_b32_e32 v9, 30, v9
8945; TAHITI-NEXT:    v_addc_u32_e32 v12, vcc, 0, v1, vcc
8946; TAHITI-NEXT:    s_waitcnt vmcnt(0)
8947; TAHITI-NEXT:    v_ashrrev_i32_e32 v10, 31, v5
8948; TAHITI-NEXT:    v_add_i32_e32 v9, vcc, v2, v9
8949; TAHITI-NEXT:    v_lshrrev_b32_e32 v10, 30, v10
8950; TAHITI-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
8951; TAHITI-NEXT:    v_ashrrev_i32_e32 v11, 31, v7
8952; TAHITI-NEXT:    v_add_i32_e32 v10, vcc, v4, v10
8953; TAHITI-NEXT:    v_lshrrev_b32_e32 v11, 30, v11
8954; TAHITI-NEXT:    v_addc_u32_e32 v14, vcc, 0, v5, vcc
8955; TAHITI-NEXT:    v_add_i32_e32 v11, vcc, v6, v11
8956; TAHITI-NEXT:    v_addc_u32_e32 v15, vcc, 0, v7, vcc
8957; TAHITI-NEXT:    v_and_b32_e32 v8, -4, v8
8958; TAHITI-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
8959; TAHITI-NEXT:    v_and_b32_e32 v9, -4, v9
8960; TAHITI-NEXT:    v_subb_u32_e32 v1, vcc, v1, v12, vcc
8961; TAHITI-NEXT:    v_sub_i32_e32 v2, vcc, v2, v9
8962; TAHITI-NEXT:    v_and_b32_e32 v10, -4, v10
8963; TAHITI-NEXT:    v_subb_u32_e32 v3, vcc, v3, v13, vcc
8964; TAHITI-NEXT:    v_sub_i32_e32 v4, vcc, v4, v10
8965; TAHITI-NEXT:    v_and_b32_e32 v11, -4, v11
8966; TAHITI-NEXT:    v_subb_u32_e32 v5, vcc, v5, v14, vcc
8967; TAHITI-NEXT:    v_sub_i32_e32 v6, vcc, v6, v11
8968; TAHITI-NEXT:    v_subb_u32_e32 v7, vcc, v7, v15, vcc
8969; TAHITI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
8970; TAHITI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
8971; TAHITI-NEXT:    s_endpgm
8972;
8973; TONGA-LABEL: srem_v4i64_4:
8974; TONGA:       ; %bb.0:
8975; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8976; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
8977; TONGA-NEXT:    v_mov_b32_e32 v0, s2
8978; TONGA-NEXT:    v_mov_b32_e32 v1, s3
8979; TONGA-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
8980; TONGA-NEXT:    s_add_u32 s4, s2, 16
8981; TONGA-NEXT:    s_addc_u32 s5, s3, 0
8982; TONGA-NEXT:    v_mov_b32_e32 v4, s4
8983; TONGA-NEXT:    v_mov_b32_e32 v5, s5
8984; TONGA-NEXT:    flat_load_dwordx4 v[4:7], v[4:5]
8985; TONGA-NEXT:    v_mov_b32_e32 v9, s1
8986; TONGA-NEXT:    v_mov_b32_e32 v8, s0
8987; TONGA-NEXT:    s_add_u32 s0, s0, 16
8988; TONGA-NEXT:    s_addc_u32 s1, s1, 0
8989; TONGA-NEXT:    v_mov_b32_e32 v11, s1
8990; TONGA-NEXT:    v_mov_b32_e32 v10, s0
8991; TONGA-NEXT:    s_waitcnt vmcnt(1)
8992; TONGA-NEXT:    v_ashrrev_i32_e32 v12, 31, v1
8993; TONGA-NEXT:    v_lshrrev_b32_e32 v12, 30, v12
8994; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v0, v12
8995; TONGA-NEXT:    v_addc_u32_e32 v16, vcc, 0, v1, vcc
8996; TONGA-NEXT:    v_and_b32_e32 v12, -4, v12
8997; TONGA-NEXT:    v_ashrrev_i32_e32 v13, 31, v3
8998; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v12
8999; TONGA-NEXT:    v_lshrrev_b32_e32 v13, 30, v13
9000; TONGA-NEXT:    v_subb_u32_e32 v1, vcc, v1, v16, vcc
9001; TONGA-NEXT:    v_add_u32_e32 v12, vcc, v2, v13
9002; TONGA-NEXT:    v_addc_u32_e32 v13, vcc, 0, v3, vcc
9003; TONGA-NEXT:    v_and_b32_e32 v12, -4, v12
9004; TONGA-NEXT:    s_waitcnt vmcnt(0)
9005; TONGA-NEXT:    v_ashrrev_i32_e32 v14, 31, v5
9006; TONGA-NEXT:    v_ashrrev_i32_e32 v15, 31, v7
9007; TONGA-NEXT:    v_sub_u32_e32 v2, vcc, v2, v12
9008; TONGA-NEXT:    v_lshrrev_b32_e32 v14, 30, v14
9009; TONGA-NEXT:    v_lshrrev_b32_e32 v15, 30, v15
9010; TONGA-NEXT:    v_subb_u32_e32 v3, vcc, v3, v13, vcc
9011; TONGA-NEXT:    v_add_u32_e64 v12, s[0:1], v4, v14
9012; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v6, v15
9013; TONGA-NEXT:    v_addc_u32_e32 v15, vcc, 0, v7, vcc
9014; TONGA-NEXT:    v_and_b32_e32 v12, -4, v12
9015; TONGA-NEXT:    v_addc_u32_e64 v14, s[0:1], 0, v5, s[0:1]
9016; TONGA-NEXT:    v_sub_u32_e32 v4, vcc, v4, v12
9017; TONGA-NEXT:    v_and_b32_e32 v13, -4, v13
9018; TONGA-NEXT:    v_subb_u32_e32 v5, vcc, v5, v14, vcc
9019; TONGA-NEXT:    v_sub_u32_e32 v6, vcc, v6, v13
9020; TONGA-NEXT:    v_subb_u32_e32 v7, vcc, v7, v15, vcc
9021; TONGA-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
9022; TONGA-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
9023; TONGA-NEXT:    s_endpgm
9024;
9025; EG-LABEL: srem_v4i64_4:
9026; EG:       ; %bb.0:
9027; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
9028; EG-NEXT:    TEX 1 @6
9029; EG-NEXT:    ALU 48, @11, KC0[CB0:0-32], KC1[]
9030; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 0
9031; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T1.X, 1
9032; EG-NEXT:    CF_END
9033; EG-NEXT:    Fetch clause starting at 6:
9034; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
9035; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
9036; EG-NEXT:    ALU clause starting at 10:
9037; EG-NEXT:     MOV * T0.X, KC0[2].Z,
9038; EG-NEXT:    ALU clause starting at 11:
9039; EG-NEXT:     ASHR * T1.W, T1.W, literal.x,
9040; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
9041; EG-NEXT:     ASHR T2.Z, T0.Y, literal.x,
9042; EG-NEXT:     ASHR T2.W, T1.Y, literal.x, BS:VEC_120/SCL_212
9043; EG-NEXT:     LSHR * T1.W, PV.W, literal.y,
9044; EG-NEXT:    31(4.344025e-44), 30(4.203895e-44)
9045; EG-NEXT:     ADD_INT T0.Y, T1.Z, PS,
9046; EG-NEXT:     ASHR T3.Z, T0.W, literal.x,
9047; EG-NEXT:     LSHR T0.W, PV.W, literal.y,
9048; EG-NEXT:     LSHR * T2.W, PV.Z, literal.y,
9049; EG-NEXT:    31(4.344025e-44), 30(4.203895e-44)
9050; EG-NEXT:     ADD_INT T2.X, T0.X, PS,
9051; EG-NEXT:     ADD_INT T1.Y, T1.X, PV.W, BS:VEC_120/SCL_212
9052; EG-NEXT:     LSHR T2.Z, PV.Z, literal.x,
9053; EG-NEXT:     ADDC_UINT T1.W, T1.Z, T1.W,
9054; EG-NEXT:     AND_INT * T3.W, PV.Y, literal.y,
9055; EG-NEXT:    30(4.203895e-44), -4(nan)
9056; EG-NEXT:     SUBB_UINT T3.X, T1.Z, PS,
9057; EG-NEXT:     BFE_INT T0.Y, PV.W, 0.0, 1,
9058; EG-NEXT:     ADD_INT T3.Z, T0.Z, PV.Z, BS:VEC_120/SCL_212
9059; EG-NEXT:     ADDC_UINT T0.W, T1.X, T0.W,
9060; EG-NEXT:     AND_INT * T1.W, PV.Y, literal.x,
9061; EG-NEXT:    -4(nan), 0(0.000000e+00)
9062; EG-NEXT:     ADDC_UINT T4.X, T0.Z, T2.Z,
9063; EG-NEXT:     SUBB_UINT T1.Y, T1.X, PS,
9064; EG-NEXT:     BFE_INT T2.Z, PV.W, 0.0, 1,
9065; EG-NEXT:     AND_INT T0.W, PV.Z, literal.x,
9066; EG-NEXT:     SUB_INT * T4.W, PV.Y, PV.X,
9067; EG-NEXT:    -4(nan), 0(0.000000e+00)
9068; EG-NEXT:     SUBB_UINT T3.X, T0.Z, PV.W,
9069; EG-NEXT:     SUB_INT T4.Y, PV.Z, PV.Y,
9070; EG-NEXT:     BFE_INT T2.Z, PV.X, 0.0, 1,
9071; EG-NEXT:     ADDC_UINT T2.W, T0.X, T2.W,
9072; EG-NEXT:     AND_INT * T5.W, T2.X, literal.x,
9073; EG-NEXT:    -4(nan), 0(0.000000e+00)
9074; EG-NEXT:     SUBB_UINT T0.Y, T0.X, PS,
9075; EG-NEXT:     SUB_INT T4.Z, T1.Z, T3.W,
9076; EG-NEXT:     BFE_INT T2.W, PV.W, 0.0, 1,
9077; EG-NEXT:     SUB_INT * T3.W, PV.Z, PV.X,
9078; EG-NEXT:     SUB_INT T4.X, T1.X, T1.W,
9079; EG-NEXT:     SUB_INT T3.Y, PV.W, PV.Y,
9080; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, literal.x,
9081; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
9082; EG-NEXT:     LSHR T1.X, PV.W, literal.x,
9083; EG-NEXT:     SUB_INT T3.Z, T0.Z, T0.W, BS:VEC_021/SCL_122
9084; EG-NEXT:     SUB_INT * T3.X, T0.X, T5.W,
9085; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9086; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
9087; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
9088  %num = load <4 x i64>, ptr addrspace(1) %in
9089  %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
9090  store <4 x i64> %result, ptr addrspace(1) %out
9091  ret void
9092}
9093