xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9; GFX6-LABEL: s_fshl_i7:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
12; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
13; GFX6-NEXT:    s_and_b32 s2, s2, 0x7f
14; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x60001
15; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
16; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
17; GFX6-NEXT:    v_mul_lo_u32 v1, v0, -7
18; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
19; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
20; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
21; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
22; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
23; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
24; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
25; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
26; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
27; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
28; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
29; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
30; GFX6-NEXT:    v_and_b32_e32 v0, 0x7f, v0
31; GFX6-NEXT:    v_and_b32_e32 v1, 0x7f, v1
32; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
33; GFX6-NEXT:    v_lshr_b32_e32 v1, s1, v1
34; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
35; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
36; GFX6-NEXT:    ; return to shader part epilog
37;
38; GFX8-LABEL: s_fshl_i7:
39; GFX8:       ; %bb.0:
40; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
41; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
42; GFX8-NEXT:    s_and_b32 s2, s2, 0x7f
43; GFX8-NEXT:    s_and_b32 s1, s1, 0x7f
44; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
45; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
46; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
47; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
48; GFX8-NEXT:    v_mul_lo_u32 v1, v0, -7
49; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
50; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
51; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
52; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
53; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
54; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
55; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
56; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
57; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
58; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
59; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
60; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
61; GFX8-NEXT:    v_and_b32_e32 v0, 0x7f, v0
62; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
63; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
64; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
65; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
66; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
67; GFX8-NEXT:    ; return to shader part epilog
68;
69; GFX9-LABEL: s_fshl_i7:
70; GFX9:       ; %bb.0:
71; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
72; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
73; GFX9-NEXT:    s_and_b32 s2, s2, 0x7f
74; GFX9-NEXT:    s_and_b32 s1, s1, 0x7f
75; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
76; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
77; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
78; GFX9-NEXT:    s_lshr_b32 s1, s1, 1
79; GFX9-NEXT:    v_mul_lo_u32 v1, v0, -7
80; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
81; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
82; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
83; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
84; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
85; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
86; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
87; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
88; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
89; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
90; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
91; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
92; GFX9-NEXT:    v_and_b32_e32 v0, 0x7f, v0
93; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
94; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
95; GFX9-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
96; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
97; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
98; GFX9-NEXT:    ; return to shader part epilog
99;
100; GFX10-LABEL: s_fshl_i7:
101; GFX10:       ; %bb.0:
102; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
103; GFX10-NEXT:    s_and_b32 s2, s2, 0x7f
104; GFX10-NEXT:    s_and_b32 s1, s1, 0x7f
105; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
106; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
107; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
108; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
109; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
110; GFX10-NEXT:    v_mul_lo_u32 v1, v0, -7
111; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
112; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
113; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
114; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
115; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
116; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
117; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
118; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
119; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
120; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
121; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
122; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
123; GFX10-NEXT:    v_and_b32_e32 v0, 0x7f, v0
124; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
125; GFX10-NEXT:    v_lshlrev_b16 v0, v0, s0
126; GFX10-NEXT:    v_lshrrev_b16 v1, v1, s1
127; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
128; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
129; GFX10-NEXT:    ; return to shader part epilog
130;
131; GFX11-LABEL: s_fshl_i7:
132; GFX11:       ; %bb.0:
133; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
134; GFX11-NEXT:    s_and_b32 s2, s2, 0x7f
135; GFX11-NEXT:    s_and_b32 s1, s1, 0x7f
136; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
137; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
138; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
139; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
140; GFX11-NEXT:    s_waitcnt_depctr 0xfff
141; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
142; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
143; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
144; GFX11-NEXT:    v_mul_lo_u32 v1, v0, -7
145; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
146; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
147; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
148; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
149; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
150; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
151; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
152; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
153; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
154; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
155; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
156; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
158; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
159; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
160; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
161; GFX11-NEXT:    v_sub_nc_u16 v1, 6, v0
162; GFX11-NEXT:    v_and_b32_e32 v0, 0x7f, v0
163; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
164; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
165; GFX11-NEXT:    v_lshlrev_b16 v0, v0, s0
166; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
167; GFX11-NEXT:    v_lshrrev_b16 v1, v1, s1
168; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
169; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
170; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
171; GFX11-NEXT:    ; return to shader part epilog
172  %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
173  ret i7 %result
174}
175
176define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
177; GFX6-LABEL: v_fshl_i7:
178; GFX6:       ; %bb.0:
179; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
181; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
182; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
183; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 6
184; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
185; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
186; GFX6-NEXT:    v_mul_lo_u32 v4, v3, -7
187; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
188; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
189; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
190; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
191; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
192; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
193; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
194; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
195; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
196; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
197; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
198; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
199; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
200; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
201; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v3
202; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
203; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
204; GFX6-NEXT:    s_setpc_b64 s[30:31]
205;
206; GFX8-LABEL: v_fshl_i7:
207; GFX8:       ; %bb.0:
208; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
210; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
211; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
212; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
213; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
214; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
215; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
216; GFX8-NEXT:    v_mul_lo_u32 v4, v3, -7
217; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
218; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
219; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
220; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
221; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
222; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
223; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
224; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
225; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
226; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
227; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
228; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
229; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
230; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
231; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v3
232; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
233; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
234; GFX8-NEXT:    s_setpc_b64 s[30:31]
235;
236; GFX9-LABEL: v_fshl_i7:
237; GFX9:       ; %bb.0:
238; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
240; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
241; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
242; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
243; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
244; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
245; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
246; GFX9-NEXT:    v_mul_lo_u32 v4, v3, -7
247; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
248; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
249; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
250; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
251; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
252; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
253; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
254; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
255; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
256; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
257; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
258; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
259; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
260; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
261; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v3
262; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
263; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
264; GFX9-NEXT:    s_setpc_b64 s[30:31]
265;
266; GFX10-LABEL: v_fshl_i7:
267; GFX10:       ; %bb.0:
268; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
270; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
271; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
272; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
273; GFX10-NEXT:    v_lshrrev_b16 v1, 1, v1
274; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
275; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
276; GFX10-NEXT:    v_mul_lo_u32 v4, v3, -7
277; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
278; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
279; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
280; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
281; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
282; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
283; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
284; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
285; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
286; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
287; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
288; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
289; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
290; GFX10-NEXT:    v_and_b32_e32 v3, 0x7f, v3
291; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
292; GFX10-NEXT:    v_lshrrev_b16 v1, v3, v1
293; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
294; GFX10-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX11-LABEL: v_fshl_i7:
297; GFX11:       ; %bb.0:
298; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
300; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
301; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
302; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
303; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
304; GFX11-NEXT:    v_lshrrev_b16 v1, 1, v1
305; GFX11-NEXT:    s_waitcnt_depctr 0xfff
306; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
307; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
308; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
309; GFX11-NEXT:    v_mul_lo_u32 v4, v3, -7
310; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
311; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
312; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
313; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
314; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
315; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
316; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
317; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
318; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
319; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
320; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
321; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
322; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
323; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
324; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
325; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
326; GFX11-NEXT:    v_sub_nc_u16 v3, 6, v2
327; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
328; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
329; GFX11-NEXT:    v_and_b32_e32 v3, 0x7f, v3
330; GFX11-NEXT:    v_lshlrev_b16 v0, v2, v0
331; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
332; GFX11-NEXT:    v_lshrrev_b16 v1, v3, v1
333; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
334; GFX11-NEXT:    s_setpc_b64 s[30:31]
335  %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
336  ret i7 %result
337}
338
339define amdgpu_ps i8 @s_fshl_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
340; GFX6-LABEL: s_fshl_i8:
341; GFX6:       ; %bb.0:
342; GFX6-NEXT:    s_and_b32 s3, s2, 7
343; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
344; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x70001
345; GFX6-NEXT:    s_lshl_b32 s0, s0, s3
346; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
347; GFX6-NEXT:    s_or_b32 s0, s0, s1
348; GFX6-NEXT:    ; return to shader part epilog
349;
350; GFX8-LABEL: s_fshl_i8:
351; GFX8:       ; %bb.0:
352; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
353; GFX8-NEXT:    s_and_b32 s3, s2, 7
354; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
355; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
356; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
357; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
358; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
359; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
360; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
361; GFX8-NEXT:    s_or_b32 s0, s0, s1
362; GFX8-NEXT:    ; return to shader part epilog
363;
364; GFX9-LABEL: s_fshl_i8:
365; GFX9:       ; %bb.0:
366; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
367; GFX9-NEXT:    s_and_b32 s3, s2, 7
368; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
369; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
370; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
371; GFX9-NEXT:    s_lshr_b32 s1, s1, 1
372; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
373; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
374; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
375; GFX9-NEXT:    s_or_b32 s0, s0, s1
376; GFX9-NEXT:    ; return to shader part epilog
377;
378; GFX10-LABEL: s_fshl_i8:
379; GFX10:       ; %bb.0:
380; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
381; GFX10-NEXT:    s_and_b32 s3, s2, 7
382; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
383; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
384; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
385; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
386; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
387; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
388; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
389; GFX10-NEXT:    s_or_b32 s0, s0, s1
390; GFX10-NEXT:    ; return to shader part epilog
391;
392; GFX11-LABEL: s_fshl_i8:
393; GFX11:       ; %bb.0:
394; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
395; GFX11-NEXT:    s_and_b32 s3, s2, 7
396; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
397; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
398; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
399; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
400; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
401; GFX11-NEXT:    s_lshl_b32 s0, s0, s3
402; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
403; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
404; GFX11-NEXT:    s_or_b32 s0, s0, s1
405; GFX11-NEXT:    ; return to shader part epilog
406  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
407  ret i8 %result
408}
409
410define i8 @v_fshl_i8(i8 %lhs, i8 %rhs, i8 %amt) {
411; GFX6-LABEL: v_fshl_i8:
412; GFX6:       ; %bb.0:
413; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX6-NEXT:    v_and_b32_e32 v3, 7, v2
415; GFX6-NEXT:    v_not_b32_e32 v2, v2
416; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
417; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 7
418; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
419; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
420; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
421; GFX6-NEXT:    s_setpc_b64 s[30:31]
422;
423; GFX8-LABEL: v_fshl_i8:
424; GFX8:       ; %bb.0:
425; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426; GFX8-NEXT:    v_and_b32_e32 v3, 7, v2
427; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
428; GFX8-NEXT:    v_mov_b32_e32 v3, 1
429; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
430; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
431; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
432; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
433; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
434; GFX8-NEXT:    s_setpc_b64 s[30:31]
435;
436; GFX9-LABEL: v_fshl_i8:
437; GFX9:       ; %bb.0:
438; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439; GFX9-NEXT:    v_and_b32_e32 v3, 7, v2
440; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
441; GFX9-NEXT:    v_mov_b32_e32 v3, 1
442; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
443; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
444; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
445; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
446; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
447; GFX9-NEXT:    s_setpc_b64 s[30:31]
448;
449; GFX10-LABEL: v_fshl_i8:
450; GFX10:       ; %bb.0:
451; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
453; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
454; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
455; GFX10-NEXT:    v_lshrrev_b16 v1, 1, v1
456; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
457; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
458; GFX10-NEXT:    v_lshrrev_b16 v1, v3, v1
459; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
460; GFX10-NEXT:    s_setpc_b64 s[30:31]
461;
462; GFX11-LABEL: v_fshl_i8:
463; GFX11:       ; %bb.0:
464; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
466; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
467; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
468; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
469; GFX11-NEXT:    v_lshrrev_b16 v1, 1, v1
470; GFX11-NEXT:    v_and_b32_e32 v3, 7, v3
471; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
472; GFX11-NEXT:    v_lshlrev_b16 v0, v2, v0
473; GFX11-NEXT:    v_lshrrev_b16 v1, v3, v1
474; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
475; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
476; GFX11-NEXT:    s_setpc_b64 s[30:31]
477  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
478  ret i8 %result
479}
480
481define amdgpu_ps i8 @s_fshl_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
482; GFX6-LABEL: s_fshl_i8_4:
483; GFX6:       ; %bb.0:
484; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
485; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x40004
486; GFX6-NEXT:    s_or_b32 s0, s0, s1
487; GFX6-NEXT:    ; return to shader part epilog
488;
489; GFX8-LABEL: s_fshl_i8_4:
490; GFX8:       ; %bb.0:
491; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
492; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
493; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
494; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
495; GFX8-NEXT:    s_or_b32 s0, s0, s1
496; GFX8-NEXT:    ; return to shader part epilog
497;
498; GFX9-LABEL: s_fshl_i8_4:
499; GFX9:       ; %bb.0:
500; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
501; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
502; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
503; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
504; GFX9-NEXT:    s_or_b32 s0, s0, s1
505; GFX9-NEXT:    ; return to shader part epilog
506;
507; GFX10-LABEL: s_fshl_i8_4:
508; GFX10:       ; %bb.0:
509; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
510; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
511; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
512; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
513; GFX10-NEXT:    s_or_b32 s0, s0, s1
514; GFX10-NEXT:    ; return to shader part epilog
515;
516; GFX11-LABEL: s_fshl_i8_4:
517; GFX11:       ; %bb.0:
518; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
519; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
520; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
521; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
522; GFX11-NEXT:    s_lshr_b32 s1, s1, 4
523; GFX11-NEXT:    s_or_b32 s0, s0, s1
524; GFX11-NEXT:    ; return to shader part epilog
525  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
526  ret i8 %result
527}
528
529define i8 @v_fshl_i8_4(i8 %lhs, i8 %rhs) {
530; GFX6-LABEL: v_fshl_i8_4:
531; GFX6:       ; %bb.0:
532; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
534; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 4
535; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
536; GFX6-NEXT:    s_setpc_b64 s[30:31]
537;
538; GFX8-LABEL: v_fshl_i8_4:
539; GFX8:       ; %bb.0:
540; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; GFX8-NEXT:    v_mov_b32_e32 v2, 4
542; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
543; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
544; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
545; GFX8-NEXT:    s_setpc_b64 s[30:31]
546;
547; GFX9-LABEL: v_fshl_i8_4:
548; GFX9:       ; %bb.0:
549; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; GFX9-NEXT:    v_mov_b32_e32 v2, 4
551; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
552; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
553; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
554; GFX9-NEXT:    s_setpc_b64 s[30:31]
555;
556; GFX10-LABEL: v_fshl_i8_4:
557; GFX10:       ; %bb.0:
558; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
560; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
561; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
562; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
563; GFX10-NEXT:    s_setpc_b64 s[30:31]
564;
565; GFX11-LABEL: v_fshl_i8_4:
566; GFX11:       ; %bb.0:
567; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
569; GFX11-NEXT:    v_lshlrev_b16 v0, 4, v0
570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
571; GFX11-NEXT:    v_lshrrev_b16 v1, 4, v1
572; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
573; GFX11-NEXT:    s_setpc_b64 s[30:31]
574  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
575  ret i8 %result
576}
577
578define amdgpu_ps i8 @s_fshl_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
579; GFX6-LABEL: s_fshl_i8_5:
580; GFX6:       ; %bb.0:
581; GFX6-NEXT:    s_lshl_b32 s0, s0, 5
582; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x50003
583; GFX6-NEXT:    s_or_b32 s0, s0, s1
584; GFX6-NEXT:    ; return to shader part epilog
585;
586; GFX8-LABEL: s_fshl_i8_5:
587; GFX8:       ; %bb.0:
588; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
589; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
590; GFX8-NEXT:    s_lshl_b32 s0, s0, 5
591; GFX8-NEXT:    s_lshr_b32 s1, s1, 3
592; GFX8-NEXT:    s_or_b32 s0, s0, s1
593; GFX8-NEXT:    ; return to shader part epilog
594;
595; GFX9-LABEL: s_fshl_i8_5:
596; GFX9:       ; %bb.0:
597; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
598; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
599; GFX9-NEXT:    s_lshl_b32 s0, s0, 5
600; GFX9-NEXT:    s_lshr_b32 s1, s1, 3
601; GFX9-NEXT:    s_or_b32 s0, s0, s1
602; GFX9-NEXT:    ; return to shader part epilog
603;
604; GFX10-LABEL: s_fshl_i8_5:
605; GFX10:       ; %bb.0:
606; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
607; GFX10-NEXT:    s_lshl_b32 s0, s0, 5
608; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
609; GFX10-NEXT:    s_lshr_b32 s1, s1, 3
610; GFX10-NEXT:    s_or_b32 s0, s0, s1
611; GFX10-NEXT:    ; return to shader part epilog
612;
613; GFX11-LABEL: s_fshl_i8_5:
614; GFX11:       ; %bb.0:
615; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
616; GFX11-NEXT:    s_lshl_b32 s0, s0, 5
617; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
618; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
619; GFX11-NEXT:    s_lshr_b32 s1, s1, 3
620; GFX11-NEXT:    s_or_b32 s0, s0, s1
621; GFX11-NEXT:    ; return to shader part epilog
622  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
623  ret i8 %result
624}
625
626define i8 @v_fshl_i8_5(i8 %lhs, i8 %rhs) {
627; GFX6-LABEL: v_fshl_i8_5:
628; GFX6:       ; %bb.0:
629; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
631; GFX6-NEXT:    v_bfe_u32 v1, v1, 3, 5
632; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
633; GFX6-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX8-LABEL: v_fshl_i8_5:
636; GFX8:       ; %bb.0:
637; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX8-NEXT:    v_mov_b32_e32 v2, 3
639; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 5, v0
640; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
641; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
642; GFX8-NEXT:    s_setpc_b64 s[30:31]
643;
644; GFX9-LABEL: v_fshl_i8_5:
645; GFX9:       ; %bb.0:
646; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
647; GFX9-NEXT:    v_mov_b32_e32 v2, 3
648; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 5, v0
649; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
650; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
651; GFX9-NEXT:    s_setpc_b64 s[30:31]
652;
653; GFX10-LABEL: v_fshl_i8_5:
654; GFX10:       ; %bb.0:
655; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
657; GFX10-NEXT:    v_lshlrev_b16 v0, 5, v0
658; GFX10-NEXT:    v_lshrrev_b16 v1, 3, v1
659; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
660; GFX10-NEXT:    s_setpc_b64 s[30:31]
661;
662; GFX11-LABEL: v_fshl_i8_5:
663; GFX11:       ; %bb.0:
664; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
666; GFX11-NEXT:    v_lshlrev_b16 v0, 5, v0
667; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
668; GFX11-NEXT:    v_lshrrev_b16 v1, 3, v1
669; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
670; GFX11-NEXT:    s_setpc_b64 s[30:31]
671  %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
672  ret i8 %result
673}
674
675define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
676; GFX6-LABEL: s_fshl_v2i8:
677; GFX6:       ; %bb.0:
678; GFX6-NEXT:    s_and_b32 s5, s2, 7
679; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
680; GFX6-NEXT:    s_lshr_b32 s4, s2, 8
681; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
682; GFX6-NEXT:    s_lshl_b32 s0, s0, s5
683; GFX6-NEXT:    s_bfe_u32 s5, s1, 0x70001
684; GFX6-NEXT:    s_lshr_b32 s2, s5, s2
685; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80008
686; GFX6-NEXT:    s_or_b32 s0, s0, s2
687; GFX6-NEXT:    s_and_b32 s2, s4, 7
688; GFX6-NEXT:    s_andn2_b32 s4, 7, s4
689; GFX6-NEXT:    s_lshr_b32 s1, s1, 1
690; GFX6-NEXT:    s_lshl_b32 s2, s3, s2
691; GFX6-NEXT:    s_lshr_b32 s1, s1, s4
692; GFX6-NEXT:    s_or_b32 s1, s2, s1
693; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
694; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
695; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
696; GFX6-NEXT:    s_or_b32 s0, s0, s1
697; GFX6-NEXT:    ; return to shader part epilog
698;
699; GFX8-LABEL: s_fshl_v2i8:
700; GFX8:       ; %bb.0:
701; GFX8-NEXT:    s_lshr_b32 s4, s1, 8
702; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
703; GFX8-NEXT:    s_lshr_b32 s5, s2, 8
704; GFX8-NEXT:    s_and_b32 s6, s2, 7
705; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
706; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
707; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
708; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
709; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
710; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
711; GFX8-NEXT:    s_lshl_b32 s0, s0, s6
712; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
713; GFX8-NEXT:    s_or_b32 s0, s0, s1
714; GFX8-NEXT:    s_and_b32 s1, s5, 7
715; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
716; GFX8-NEXT:    s_and_b32 s2, s4, 0xff
717; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
718; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
719; GFX8-NEXT:    s_andn2_b32 s3, 7, s5
720; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
721; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
722; GFX8-NEXT:    s_lshr_b32 s2, s2, s3
723; GFX8-NEXT:    s_or_b32 s1, s1, s2
724; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
725; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
726; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
727; GFX8-NEXT:    s_or_b32 s0, s0, s1
728; GFX8-NEXT:    ; return to shader part epilog
729;
730; GFX9-LABEL: s_fshl_v2i8:
731; GFX9:       ; %bb.0:
732; GFX9-NEXT:    s_lshr_b32 s4, s1, 8
733; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
734; GFX9-NEXT:    s_lshr_b32 s5, s2, 8
735; GFX9-NEXT:    s_and_b32 s6, s2, 7
736; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
737; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
738; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
739; GFX9-NEXT:    s_lshr_b32 s1, s1, 1
740; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
741; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
742; GFX9-NEXT:    s_lshl_b32 s0, s0, s6
743; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
744; GFX9-NEXT:    s_or_b32 s0, s0, s1
745; GFX9-NEXT:    s_and_b32 s1, s5, 7
746; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
747; GFX9-NEXT:    s_and_b32 s2, s4, 0xff
748; GFX9-NEXT:    s_lshl_b32 s1, s3, s1
749; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
750; GFX9-NEXT:    s_andn2_b32 s3, 7, s5
751; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
752; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
753; GFX9-NEXT:    s_lshr_b32 s2, s2, s3
754; GFX9-NEXT:    s_or_b32 s1, s1, s2
755; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
756; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
757; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
758; GFX9-NEXT:    s_or_b32 s0, s0, s1
759; GFX9-NEXT:    ; return to shader part epilog
760;
761; GFX10-LABEL: s_fshl_v2i8:
762; GFX10:       ; %bb.0:
763; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
764; GFX10-NEXT:    s_and_b32 s5, s2, 7
765; GFX10-NEXT:    s_lshr_b32 s6, s2, 8
766; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
767; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
768; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
769; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
770; GFX10-NEXT:    s_lshl_b32 s0, s0, s5
771; GFX10-NEXT:    s_and_b32 s5, s6, 7
772; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
773; GFX10-NEXT:    s_andn2_b32 s6, 7, s6
774; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
775; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
776; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
777; GFX10-NEXT:    s_lshr_b32 s4, s4, 1
778; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
779; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
780; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
781; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
782; GFX10-NEXT:    s_lshr_b32 s4, s4, s6
783; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
784; GFX10-NEXT:    s_or_b32 s2, s3, s4
785; GFX10-NEXT:    s_or_b32 s0, s0, s1
786; GFX10-NEXT:    s_and_b32 s1, s2, 0xff
787; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
788; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
789; GFX10-NEXT:    s_or_b32 s0, s0, s1
790; GFX10-NEXT:    ; return to shader part epilog
791;
792; GFX11-LABEL: s_fshl_v2i8:
793; GFX11:       ; %bb.0:
794; GFX11-NEXT:    s_lshr_b32 s4, s1, 8
795; GFX11-NEXT:    s_and_b32 s5, s2, 7
796; GFX11-NEXT:    s_lshr_b32 s6, s2, 8
797; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
798; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
799; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
800; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
801; GFX11-NEXT:    s_lshl_b32 s0, s0, s5
802; GFX11-NEXT:    s_and_b32 s5, s6, 7
803; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
804; GFX11-NEXT:    s_and_not1_b32 s6, 7, s6
805; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
806; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
807; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
808; GFX11-NEXT:    s_lshr_b32 s4, s4, 1
809; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
810; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
811; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
812; GFX11-NEXT:    s_lshl_b32 s3, s3, s5
813; GFX11-NEXT:    s_lshr_b32 s4, s4, s6
814; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
815; GFX11-NEXT:    s_or_b32 s2, s3, s4
816; GFX11-NEXT:    s_or_b32 s0, s0, s1
817; GFX11-NEXT:    s_and_b32 s1, s2, 0xff
818; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
819; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
820; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
821; GFX11-NEXT:    s_or_b32 s0, s0, s1
822; GFX11-NEXT:    ; return to shader part epilog
823  %lhs = bitcast i16 %lhs.arg to <2 x i8>
824  %rhs = bitcast i16 %rhs.arg to <2 x i8>
825  %amt = bitcast i16 %amt.arg to <2 x i8>
826  %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
827  %cast.result = bitcast <2 x i8> %result to i16
828  ret i16 %cast.result
829}
830
831define i16 @v_fshl_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
832; GFX6-LABEL: v_fshl_v2i8:
833; GFX6:       ; %bb.0:
834; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
836; GFX6-NEXT:    v_and_b32_e32 v5, 7, v2
837; GFX6-NEXT:    v_not_b32_e32 v2, v2
838; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
839; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
840; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v5, v0
841; GFX6-NEXT:    v_bfe_u32 v5, v1, 1, 7
842; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v5
843; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
844; GFX6-NEXT:    v_and_b32_e32 v2, 7, v4
845; GFX6-NEXT:    v_not_b32_e32 v4, v4
846; GFX6-NEXT:    v_bfe_u32 v1, v1, 8, 8
847; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
848; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
849; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v2, v3
850; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v4, v1
851; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
852; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
853; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
854; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
855; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
856; GFX6-NEXT:    s_setpc_b64 s[30:31]
857;
858; GFX8-LABEL: v_fshl_v2i8:
859; GFX8:       ; %bb.0:
860; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
861; GFX8-NEXT:    v_and_b32_e32 v6, 7, v2
862; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
863; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
864; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
865; GFX8-NEXT:    v_mov_b32_e32 v6, 1
866; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
867; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
868; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
869; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
870; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
871; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
872; GFX8-NEXT:    v_and_b32_e32 v1, 7, v5
873; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v1, v3
874; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v5
875; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
876; GFX8-NEXT:    v_and_b32_e32 v3, 7, v3
877; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
878; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
879; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
880; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
881; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
882; GFX8-NEXT:    s_setpc_b64 s[30:31]
883;
884; GFX9-LABEL: v_fshl_v2i8:
885; GFX9:       ; %bb.0:
886; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
888; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
889; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
890; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
891; GFX9-NEXT:    v_mov_b32_e32 v6, 1
892; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
893; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
894; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
895; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
896; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
897; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
898; GFX9-NEXT:    v_and_b32_e32 v1, 7, v5
899; GFX9-NEXT:    v_lshlrev_b16_e32 v1, v1, v3
900; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v5
901; GFX9-NEXT:    v_lshrrev_b16_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
902; GFX9-NEXT:    v_and_b32_e32 v3, 7, v3
903; GFX9-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
904; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
905; GFX9-NEXT:    v_and_b32_e32 v1, 0xff, v1
906; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
907; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
908; GFX9-NEXT:    s_setpc_b64 s[30:31]
909;
910; GFX10-LABEL: v_fshl_v2i8:
911; GFX10:       ; %bb.0:
912; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
914; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
915; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v0
916; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
917; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v2
918; GFX10-NEXT:    v_and_b32_e32 v3, 0xff, v3
919; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
920; GFX10-NEXT:    v_and_b32_e32 v4, 7, v4
921; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
922; GFX10-NEXT:    v_lshrrev_b16 v1, 1, v1
923; GFX10-NEXT:    v_lshrrev_b16 v3, 1, v3
924; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
925; GFX10-NEXT:    v_and_b32_e32 v7, 7, v7
926; GFX10-NEXT:    v_lshlrev_b16 v4, v4, v5
927; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
928; GFX10-NEXT:    v_lshrrev_b16 v3, v6, v3
929; GFX10-NEXT:    v_lshrrev_b16 v1, v7, v1
930; GFX10-NEXT:    v_or_b32_e32 v2, v4, v3
931; GFX10-NEXT:    v_mov_b32_e32 v3, 0xff
932; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
933; GFX10-NEXT:    v_and_b32_sdwa v1, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
934; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
935; GFX10-NEXT:    s_setpc_b64 s[30:31]
936;
937; GFX11-LABEL: v_fshl_v2i8:
938; GFX11:       ; %bb.0:
939; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
941; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
942; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v0
943; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
944; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v2
945; GFX11-NEXT:    v_and_b32_e32 v3, 0xff, v3
946; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
947; GFX11-NEXT:    v_and_b32_e32 v4, 7, v4
948; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
949; GFX11-NEXT:    v_lshrrev_b16 v1, 1, v1
950; GFX11-NEXT:    v_lshrrev_b16 v3, 1, v3
951; GFX11-NEXT:    v_and_b32_e32 v6, 7, v6
952; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
953; GFX11-NEXT:    v_lshlrev_b16 v4, v4, v5
954; GFX11-NEXT:    v_lshlrev_b16 v0, v2, v0
955; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
956; GFX11-NEXT:    v_lshrrev_b16 v3, v6, v3
957; GFX11-NEXT:    v_lshrrev_b16 v1, v7, v1
958; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
959; GFX11-NEXT:    v_or_b32_e32 v2, v4, v3
960; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
961; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
962; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v2
963; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
964; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
965; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
966; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
967; GFX11-NEXT:    s_setpc_b64 s[30:31]
968  %lhs = bitcast i16 %lhs.arg to <2 x i8>
969  %rhs = bitcast i16 %rhs.arg to <2 x i8>
970  %amt = bitcast i16 %amt.arg to <2 x i8>
971  %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
972  %cast.result = bitcast <2 x i8> %result to i16
973  ret i16 %cast.result
974}
975
976define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
977; GFX6-LABEL: s_fshl_v4i8:
978; GFX6:       ; %bb.0:
979; GFX6-NEXT:    s_and_b32 s9, s2, 7
980; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
981; GFX6-NEXT:    s_lshr_b32 s4, s0, 16
982; GFX6-NEXT:    s_lshr_b32 s5, s0, 24
983; GFX6-NEXT:    s_lshr_b32 s6, s2, 8
984; GFX6-NEXT:    s_lshr_b32 s7, s2, 16
985; GFX6-NEXT:    s_lshr_b32 s8, s2, 24
986; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
987; GFX6-NEXT:    s_lshl_b32 s0, s0, s9
988; GFX6-NEXT:    s_bfe_u32 s9, s1, 0x70001
989; GFX6-NEXT:    s_lshr_b32 s2, s9, s2
990; GFX6-NEXT:    s_or_b32 s0, s0, s2
991; GFX6-NEXT:    s_and_b32 s2, s6, 7
992; GFX6-NEXT:    s_lshl_b32 s2, s3, s2
993; GFX6-NEXT:    s_bfe_u32 s3, s1, 0x80008
994; GFX6-NEXT:    s_andn2_b32 s6, 7, s6
995; GFX6-NEXT:    s_lshr_b32 s3, s3, 1
996; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
997; GFX6-NEXT:    s_or_b32 s2, s2, s3
998; GFX6-NEXT:    s_and_b32 s3, s7, 7
999; GFX6-NEXT:    s_lshl_b32 s3, s4, s3
1000; GFX6-NEXT:    s_bfe_u32 s4, s1, 0x80010
1001; GFX6-NEXT:    s_andn2_b32 s6, 7, s7
1002; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
1003; GFX6-NEXT:    s_lshr_b32 s4, s4, s6
1004; GFX6-NEXT:    s_or_b32 s3, s3, s4
1005; GFX6-NEXT:    s_and_b32 s4, s8, 7
1006; GFX6-NEXT:    s_andn2_b32 s6, 7, s8
1007; GFX6-NEXT:    s_lshr_b32 s1, s1, 25
1008; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
1009; GFX6-NEXT:    s_lshl_b32 s4, s5, s4
1010; GFX6-NEXT:    s_lshr_b32 s1, s1, s6
1011; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
1012; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1013; GFX6-NEXT:    s_or_b32 s1, s4, s1
1014; GFX6-NEXT:    s_or_b32 s0, s0, s2
1015; GFX6-NEXT:    s_and_b32 s2, s3, 0xff
1016; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1017; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1018; GFX6-NEXT:    s_or_b32 s0, s0, s2
1019; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
1020; GFX6-NEXT:    s_or_b32 s0, s0, s1
1021; GFX6-NEXT:    ; return to shader part epilog
1022;
1023; GFX8-LABEL: s_fshl_v4i8:
1024; GFX8:       ; %bb.0:
1025; GFX8-NEXT:    s_lshr_b32 s6, s1, 8
1026; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
1027; GFX8-NEXT:    s_lshr_b32 s8, s1, 24
1028; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1029; GFX8-NEXT:    s_lshr_b32 s9, s2, 8
1030; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
1031; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
1032; GFX8-NEXT:    s_and_b32 s12, s2, 7
1033; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
1034; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
1035; GFX8-NEXT:    s_and_b32 s12, 0xffff, s12
1036; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
1037; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1038; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
1039; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
1040; GFX8-NEXT:    s_lshr_b32 s5, s0, 24
1041; GFX8-NEXT:    s_lshl_b32 s0, s0, s12
1042; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
1043; GFX8-NEXT:    s_or_b32 s0, s0, s1
1044; GFX8-NEXT:    s_and_b32 s1, s9, 7
1045; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
1046; GFX8-NEXT:    s_and_b32 s2, s6, 0xff
1047; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
1048; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1049; GFX8-NEXT:    s_andn2_b32 s3, 7, s9
1050; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
1051; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1052; GFX8-NEXT:    s_lshr_b32 s2, s2, s3
1053; GFX8-NEXT:    s_or_b32 s1, s1, s2
1054; GFX8-NEXT:    s_and_b32 s2, s10, 7
1055; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1056; GFX8-NEXT:    s_and_b32 s3, s7, 0xff
1057; GFX8-NEXT:    s_lshl_b32 s2, s4, s2
1058; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1059; GFX8-NEXT:    s_andn2_b32 s4, 7, s10
1060; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
1061; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
1062; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
1063; GFX8-NEXT:    s_or_b32 s2, s2, s3
1064; GFX8-NEXT:    s_and_b32 s3, s11, 7
1065; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1066; GFX8-NEXT:    s_lshl_b32 s3, s5, s3
1067; GFX8-NEXT:    s_andn2_b32 s5, 7, s11
1068; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1069; GFX8-NEXT:    s_lshr_b32 s4, s8, 1
1070; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
1071; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
1072; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
1073; GFX8-NEXT:    s_lshr_b32 s4, s4, s5
1074; GFX8-NEXT:    s_or_b32 s0, s0, s1
1075; GFX8-NEXT:    s_and_b32 s1, s2, 0xff
1076; GFX8-NEXT:    s_or_b32 s3, s3, s4
1077; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
1078; GFX8-NEXT:    s_or_b32 s0, s0, s1
1079; GFX8-NEXT:    s_and_b32 s1, s3, 0xff
1080; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
1081; GFX8-NEXT:    s_or_b32 s0, s0, s1
1082; GFX8-NEXT:    ; return to shader part epilog
1083;
1084; GFX9-LABEL: s_fshl_v4i8:
1085; GFX9:       ; %bb.0:
1086; GFX9-NEXT:    s_lshr_b32 s6, s1, 8
1087; GFX9-NEXT:    s_lshr_b32 s7, s1, 16
1088; GFX9-NEXT:    s_lshr_b32 s8, s1, 24
1089; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1090; GFX9-NEXT:    s_lshr_b32 s9, s2, 8
1091; GFX9-NEXT:    s_lshr_b32 s10, s2, 16
1092; GFX9-NEXT:    s_lshr_b32 s11, s2, 24
1093; GFX9-NEXT:    s_and_b32 s12, s2, 7
1094; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
1095; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
1096; GFX9-NEXT:    s_and_b32 s12, 0xffff, s12
1097; GFX9-NEXT:    s_lshr_b32 s1, s1, 1
1098; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1099; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
1100; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
1101; GFX9-NEXT:    s_lshr_b32 s5, s0, 24
1102; GFX9-NEXT:    s_lshl_b32 s0, s0, s12
1103; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
1104; GFX9-NEXT:    s_or_b32 s0, s0, s1
1105; GFX9-NEXT:    s_and_b32 s1, s9, 7
1106; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
1107; GFX9-NEXT:    s_and_b32 s2, s6, 0xff
1108; GFX9-NEXT:    s_lshl_b32 s1, s3, s1
1109; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1110; GFX9-NEXT:    s_andn2_b32 s3, 7, s9
1111; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
1112; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1113; GFX9-NEXT:    s_lshr_b32 s2, s2, s3
1114; GFX9-NEXT:    s_or_b32 s1, s1, s2
1115; GFX9-NEXT:    s_and_b32 s2, s10, 7
1116; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1117; GFX9-NEXT:    s_and_b32 s3, s7, 0xff
1118; GFX9-NEXT:    s_lshl_b32 s2, s4, s2
1119; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1120; GFX9-NEXT:    s_andn2_b32 s4, 7, s10
1121; GFX9-NEXT:    s_lshr_b32 s3, s3, 1
1122; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
1123; GFX9-NEXT:    s_lshr_b32 s3, s3, s4
1124; GFX9-NEXT:    s_or_b32 s2, s2, s3
1125; GFX9-NEXT:    s_and_b32 s3, s11, 7
1126; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1127; GFX9-NEXT:    s_lshl_b32 s3, s5, s3
1128; GFX9-NEXT:    s_andn2_b32 s5, 7, s11
1129; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1130; GFX9-NEXT:    s_lshr_b32 s4, s8, 1
1131; GFX9-NEXT:    s_and_b32 s5, 0xffff, s5
1132; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
1133; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
1134; GFX9-NEXT:    s_lshr_b32 s4, s4, s5
1135; GFX9-NEXT:    s_or_b32 s0, s0, s1
1136; GFX9-NEXT:    s_and_b32 s1, s2, 0xff
1137; GFX9-NEXT:    s_or_b32 s3, s3, s4
1138; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
1139; GFX9-NEXT:    s_or_b32 s0, s0, s1
1140; GFX9-NEXT:    s_and_b32 s1, s3, 0xff
1141; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
1142; GFX9-NEXT:    s_or_b32 s0, s0, s1
1143; GFX9-NEXT:    ; return to shader part epilog
1144;
1145; GFX10-LABEL: s_fshl_v4i8:
1146; GFX10:       ; %bb.0:
1147; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
1148; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
1149; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
1150; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1151; GFX10-NEXT:    s_and_b32 s11, s2, 7
1152; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
1153; GFX10-NEXT:    s_andn2_b32 s12, 7, s2
1154; GFX10-NEXT:    s_and_b32 s11, 0xffff, s11
1155; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
1156; GFX10-NEXT:    s_and_b32 s12, 0xffff, s12
1157; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
1158; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
1159; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
1160; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
1161; GFX10-NEXT:    s_lshl_b32 s0, s0, s11
1162; GFX10-NEXT:    s_lshr_b32 s1, s1, s12
1163; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
1164; GFX10-NEXT:    s_or_b32 s0, s0, s1
1165; GFX10-NEXT:    s_and_b32 s1, s9, 7
1166; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
1167; GFX10-NEXT:    s_andn2_b32 s9, 7, s9
1168; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
1169; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
1170; GFX10-NEXT:    s_lshr_b32 s6, s6, 1
1171; GFX10-NEXT:    s_and_b32 s9, 0xffff, s9
1172; GFX10-NEXT:    s_lshl_b32 s1, s3, s1
1173; GFX10-NEXT:    s_lshr_b32 s3, s6, s9
1174; GFX10-NEXT:    s_and_b32 s6, s10, 7
1175; GFX10-NEXT:    s_or_b32 s1, s1, s3
1176; GFX10-NEXT:    s_and_b32 s3, 0xffff, s6
1177; GFX10-NEXT:    s_and_b32 s6, s7, 0xff
1178; GFX10-NEXT:    s_lshr_b32 s2, s2, 24
1179; GFX10-NEXT:    s_lshl_b32 s3, s4, s3
1180; GFX10-NEXT:    s_and_b32 s4, 0xffff, s6
1181; GFX10-NEXT:    s_andn2_b32 s6, 7, s10
1182; GFX10-NEXT:    s_lshr_b32 s4, s4, 1
1183; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
1184; GFX10-NEXT:    s_and_b32 s7, s2, 7
1185; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
1186; GFX10-NEXT:    s_lshr_b32 s4, s4, s6
1187; GFX10-NEXT:    s_and_b32 s6, 0xffff, s7
1188; GFX10-NEXT:    s_lshr_b32 s7, s8, 1
1189; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
1190; GFX10-NEXT:    s_lshl_b32 s5, s5, s6
1191; GFX10-NEXT:    s_lshr_b32 s2, s7, s2
1192; GFX10-NEXT:    s_or_b32 s3, s3, s4
1193; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1194; GFX10-NEXT:    s_or_b32 s2, s5, s2
1195; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
1196; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
1197; GFX10-NEXT:    s_and_b32 s3, s3, 0xff
1198; GFX10-NEXT:    s_or_b32 s0, s0, s1
1199; GFX10-NEXT:    s_lshl_b32 s1, s3, 16
1200; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
1201; GFX10-NEXT:    s_or_b32 s0, s0, s1
1202; GFX10-NEXT:    s_lshl_b32 s1, s2, 24
1203; GFX10-NEXT:    s_or_b32 s0, s0, s1
1204; GFX10-NEXT:    ; return to shader part epilog
1205;
1206; GFX11-LABEL: s_fshl_v4i8:
1207; GFX11:       ; %bb.0:
1208; GFX11-NEXT:    s_lshr_b32 s6, s1, 8
1209; GFX11-NEXT:    s_lshr_b32 s7, s1, 16
1210; GFX11-NEXT:    s_lshr_b32 s8, s1, 24
1211; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1212; GFX11-NEXT:    s_and_b32 s11, s2, 7
1213; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
1214; GFX11-NEXT:    s_and_not1_b32 s12, 7, s2
1215; GFX11-NEXT:    s_and_b32 s11, 0xffff, s11
1216; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
1217; GFX11-NEXT:    s_and_b32 s12, 0xffff, s12
1218; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
1219; GFX11-NEXT:    s_lshr_b32 s4, s0, 16
1220; GFX11-NEXT:    s_lshr_b32 s5, s0, 24
1221; GFX11-NEXT:    s_lshr_b32 s9, s2, 8
1222; GFX11-NEXT:    s_lshl_b32 s0, s0, s11
1223; GFX11-NEXT:    s_lshr_b32 s1, s1, s12
1224; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
1225; GFX11-NEXT:    s_or_b32 s0, s0, s1
1226; GFX11-NEXT:    s_and_b32 s1, s9, 7
1227; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
1228; GFX11-NEXT:    s_and_not1_b32 s9, 7, s9
1229; GFX11-NEXT:    s_lshr_b32 s10, s2, 16
1230; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
1231; GFX11-NEXT:    s_lshr_b32 s6, s6, 1
1232; GFX11-NEXT:    s_and_b32 s9, 0xffff, s9
1233; GFX11-NEXT:    s_lshl_b32 s1, s3, s1
1234; GFX11-NEXT:    s_lshr_b32 s3, s6, s9
1235; GFX11-NEXT:    s_and_b32 s6, s10, 7
1236; GFX11-NEXT:    s_or_b32 s1, s1, s3
1237; GFX11-NEXT:    s_and_b32 s3, 0xffff, s6
1238; GFX11-NEXT:    s_and_b32 s6, s7, 0xff
1239; GFX11-NEXT:    s_lshr_b32 s2, s2, 24
1240; GFX11-NEXT:    s_lshl_b32 s3, s4, s3
1241; GFX11-NEXT:    s_and_b32 s4, 0xffff, s6
1242; GFX11-NEXT:    s_and_not1_b32 s6, 7, s10
1243; GFX11-NEXT:    s_lshr_b32 s4, s4, 1
1244; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
1245; GFX11-NEXT:    s_and_b32 s7, s2, 7
1246; GFX11-NEXT:    s_and_not1_b32 s2, 7, s2
1247; GFX11-NEXT:    s_lshr_b32 s4, s4, s6
1248; GFX11-NEXT:    s_and_b32 s6, 0xffff, s7
1249; GFX11-NEXT:    s_lshr_b32 s7, s8, 1
1250; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
1251; GFX11-NEXT:    s_lshl_b32 s5, s5, s6
1252; GFX11-NEXT:    s_lshr_b32 s2, s7, s2
1253; GFX11-NEXT:    s_or_b32 s3, s3, s4
1254; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1255; GFX11-NEXT:    s_or_b32 s2, s5, s2
1256; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
1257; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
1258; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
1259; GFX11-NEXT:    s_or_b32 s0, s0, s1
1260; GFX11-NEXT:    s_lshl_b32 s1, s3, 16
1261; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
1262; GFX11-NEXT:    s_or_b32 s0, s0, s1
1263; GFX11-NEXT:    s_lshl_b32 s1, s2, 24
1264; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1265; GFX11-NEXT:    s_or_b32 s0, s0, s1
1266; GFX11-NEXT:    ; return to shader part epilog
1267  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1268  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1269  %amt = bitcast i32 %amt.arg to <4 x i8>
1270  %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1271  %cast.result = bitcast <4 x i8> %result to i32
1272  ret i32 %cast.result
1273}
1274
1275define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1276; GFX6-LABEL: v_fshl_v4i8:
1277; GFX6:       ; %bb.0:
1278; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 8, v2
1280; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
1281; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 24, v2
1282; GFX6-NEXT:    v_and_b32_e32 v9, 7, v2
1283; GFX6-NEXT:    v_not_b32_e32 v2, v2
1284; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1285; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1286; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1287; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
1288; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v9, v0
1289; GFX6-NEXT:    v_bfe_u32 v9, v1, 1, 7
1290; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v9
1291; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1292; GFX6-NEXT:    v_and_b32_e32 v2, 7, v6
1293; GFX6-NEXT:    v_not_b32_e32 v6, v6
1294; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v2, v3
1295; GFX6-NEXT:    v_bfe_u32 v3, v1, 8, 8
1296; GFX6-NEXT:    v_and_b32_e32 v6, 7, v6
1297; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
1298; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v6, v3
1299; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1300; GFX6-NEXT:    v_and_b32_e32 v3, 7, v7
1301; GFX6-NEXT:    v_not_b32_e32 v6, v7
1302; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v3, v4
1303; GFX6-NEXT:    v_bfe_u32 v4, v1, 16, 8
1304; GFX6-NEXT:    v_and_b32_e32 v6, 7, v6
1305; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
1306; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
1307; GFX6-NEXT:    v_not_b32_e32 v6, v8
1308; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
1309; GFX6-NEXT:    v_and_b32_e32 v4, 7, v8
1310; GFX6-NEXT:    v_and_b32_e32 v6, 7, v6
1311; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 25, v1
1312; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v2
1313; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
1314; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v6, v1
1315; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
1316; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1317; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
1318; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1319; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v3
1320; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1321; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
1322; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1323; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1324; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1325; GFX6-NEXT:    s_setpc_b64 s[30:31]
1326;
1327; GFX8-LABEL: v_fshl_v4i8:
1328; GFX8:       ; %bb.0:
1329; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX8-NEXT:    v_mov_b32_e32 v8, 1
1331; GFX8-NEXT:    v_xor_b32_e32 v10, -1, v2
1332; GFX8-NEXT:    v_and_b32_e32 v6, 7, v2
1333; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1334; GFX8-NEXT:    v_and_b32_e32 v10, 7, v10
1335; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1336; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v6, v0
1337; GFX8-NEXT:    v_lshrrev_b16_e32 v9, v10, v9
1338; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1339; GFX8-NEXT:    v_or_b32_e32 v6, v6, v9
1340; GFX8-NEXT:    v_and_b32_e32 v9, 7, v5
1341; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
1342; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1343; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1344; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1345; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v9, v3
1346; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v5, v4
1347; GFX8-NEXT:    v_mov_b32_e32 v7, 0xff
1348; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1349; GFX8-NEXT:    v_mov_b32_e32 v4, 7
1350; GFX8-NEXT:    v_mov_b32_e32 v9, -1
1351; GFX8-NEXT:    v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1352; GFX8-NEXT:    v_and_b32_sdwa v7, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1353; GFX8-NEXT:    v_xor_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1354; GFX8-NEXT:    v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1355; GFX8-NEXT:    v_xor_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1356; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v7
1357; GFX8-NEXT:    v_and_b32_e32 v10, 7, v10
1358; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1359; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
1360; GFX8-NEXT:    v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1361; GFX8-NEXT:    v_lshrrev_b16_e32 v7, v10, v7
1362; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1363; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
1364; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
1365; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1366; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1367; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1368; GFX8-NEXT:    v_and_b32_e32 v2, 0xff, v5
1369; GFX8-NEXT:    v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1370; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1371; GFX8-NEXT:    v_and_b32_e32 v0, 0xff, v0
1372; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1373; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1374; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1375; GFX8-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX9-LABEL: v_fshl_v4i8:
1378; GFX9:       ; %bb.0:
1379; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX9-NEXT:    v_mov_b32_e32 v8, 1
1381; GFX9-NEXT:    v_xor_b32_e32 v10, -1, v2
1382; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
1383; GFX9-NEXT:    v_lshrrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1384; GFX9-NEXT:    v_and_b32_e32 v10, 7, v10
1385; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1386; GFX9-NEXT:    v_lshlrev_b16_e32 v6, v6, v0
1387; GFX9-NEXT:    v_lshrrev_b16_e32 v9, v10, v9
1388; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1389; GFX9-NEXT:    v_or_b32_e32 v6, v6, v9
1390; GFX9-NEXT:    v_and_b32_e32 v9, 7, v5
1391; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v5
1392; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1393; GFX9-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1394; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1395; GFX9-NEXT:    v_lshlrev_b16_e32 v3, v9, v3
1396; GFX9-NEXT:    v_lshrrev_b16_e32 v4, v5, v4
1397; GFX9-NEXT:    v_mov_b32_e32 v7, 0xff
1398; GFX9-NEXT:    v_or_b32_e32 v3, v3, v4
1399; GFX9-NEXT:    v_mov_b32_e32 v4, 7
1400; GFX9-NEXT:    v_mov_b32_e32 v10, -1
1401; GFX9-NEXT:    v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1402; GFX9-NEXT:    v_and_b32_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1403; GFX9-NEXT:    v_xor_b32_sdwa v11, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1404; GFX9-NEXT:    v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1405; GFX9-NEXT:    v_xor_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1406; GFX9-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
1407; GFX9-NEXT:    v_and_b32_e32 v11, 7, v11
1408; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1409; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
1410; GFX9-NEXT:    v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1411; GFX9-NEXT:    v_lshrrev_b16_e32 v9, v11, v9
1412; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1413; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
1414; GFX9-NEXT:    v_or_b32_e32 v5, v5, v9
1415; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
1416; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1417; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1418; GFX9-NEXT:    v_and_b32_e32 v2, 0xff, v5
1419; GFX9-NEXT:    v_and_b32_e32 v0, 0xff, v0
1420; GFX9-NEXT:    v_and_or_b32 v1, v6, v7, v1
1421; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1422; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1423; GFX9-NEXT:    v_or3_b32 v0, v1, v2, v0
1424; GFX9-NEXT:    s_setpc_b64 s[30:31]
1425;
1426; GFX10-LABEL: v_fshl_v4i8:
1427; GFX10:       ; %bb.0:
1428; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 8, v2
1430; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1431; GFX10-NEXT:    v_and_b32_e32 v8, 7, v2
1432; GFX10-NEXT:    v_and_b32_e32 v9, 0xff, v1
1433; GFX10-NEXT:    v_xor_b32_e32 v10, -1, v2
1434; GFX10-NEXT:    v_and_b32_e32 v11, 7, v6
1435; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1436; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1437; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
1438; GFX10-NEXT:    v_lshlrev_b16 v0, v8, v0
1439; GFX10-NEXT:    v_lshrrev_b16 v8, 1, v9
1440; GFX10-NEXT:    v_and_b32_e32 v9, 7, v10
1441; GFX10-NEXT:    v_lshlrev_b16 v3, v11, v3
1442; GFX10-NEXT:    v_mov_b32_e32 v10, 0xff
1443; GFX10-NEXT:    v_mov_b32_e32 v11, -1
1444; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 24, v1
1445; GFX10-NEXT:    v_and_b32_e32 v7, 0xff, v7
1446; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v6
1447; GFX10-NEXT:    v_mov_b32_e32 v13, 7
1448; GFX10-NEXT:    v_and_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1449; GFX10-NEXT:    v_xor_b32_sdwa v10, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1450; GFX10-NEXT:    v_xor_b32_sdwa v11, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1451; GFX10-NEXT:    v_lshrrev_b16 v7, 1, v7
1452; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
1453; GFX10-NEXT:    v_and_b32_sdwa v14, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1454; GFX10-NEXT:    v_lshrrev_b16 v1, 1, v1
1455; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1456; GFX10-NEXT:    v_and_b32_sdwa v2, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1457; GFX10-NEXT:    v_lshrrev_b16 v12, 1, v12
1458; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1459; GFX10-NEXT:    v_lshrrev_b16 v6, v6, v7
1460; GFX10-NEXT:    v_lshlrev_b16 v4, v14, v4
1461; GFX10-NEXT:    v_lshrrev_b16 v1, v10, v1
1462; GFX10-NEXT:    v_lshlrev_b16 v2, v2, v5
1463; GFX10-NEXT:    v_lshrrev_b16 v5, v11, v12
1464; GFX10-NEXT:    v_lshrrev_b16 v7, v9, v8
1465; GFX10-NEXT:    v_or_b32_e32 v3, v3, v6
1466; GFX10-NEXT:    v_mov_b32_e32 v6, 8
1467; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
1468; GFX10-NEXT:    v_or_b32_e32 v2, v2, v5
1469; GFX10-NEXT:    v_or_b32_e32 v0, v0, v7
1470; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1471; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
1472; GFX10-NEXT:    v_and_b32_e32 v2, 0xff, v2
1473; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v3
1474; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1475; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1476; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1477; GFX10-NEXT:    s_setpc_b64 s[30:31]
1478;
1479; GFX11-LABEL: v_fshl_v4i8:
1480; GFX11:       ; %bb.0:
1481; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1482; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 8, v1
1483; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
1484; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1485; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
1486; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1487; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1488; GFX11-NEXT:    v_xor_b32_e32 v13, -1, v9
1489; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
1490; GFX11-NEXT:    v_and_b32_e32 v9, 7, v9
1491; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 24, v1
1492; GFX11-NEXT:    v_lshrrev_b16 v6, 1, v6
1493; GFX11-NEXT:    v_and_b32_e32 v13, 7, v13
1494; GFX11-NEXT:    v_and_b32_e32 v7, 0xff, v7
1495; GFX11-NEXT:    v_lshlrev_b16 v3, v9, v3
1496; GFX11-NEXT:    v_xor_b32_e32 v9, -1, v10
1497; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1498; GFX11-NEXT:    v_lshrrev_b16 v6, v13, v6
1499; GFX11-NEXT:    v_xor_b32_e32 v13, -1, v11
1500; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1501; GFX11-NEXT:    v_and_b32_e32 v12, 7, v2
1502; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
1503; GFX11-NEXT:    v_xor_b32_e32 v2, -1, v2
1504; GFX11-NEXT:    v_and_b32_e32 v10, 7, v10
1505; GFX11-NEXT:    v_lshrrev_b16 v7, 1, v7
1506; GFX11-NEXT:    v_and_b32_e32 v9, 7, v9
1507; GFX11-NEXT:    v_and_b32_e32 v11, 7, v11
1508; GFX11-NEXT:    v_lshrrev_b16 v8, 1, v8
1509; GFX11-NEXT:    v_and_b32_e32 v13, 7, v13
1510; GFX11-NEXT:    v_lshrrev_b16 v1, 1, v1
1511; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
1512; GFX11-NEXT:    v_or_b32_e32 v3, v3, v6
1513; GFX11-NEXT:    v_lshlrev_b16 v4, v10, v4
1514; GFX11-NEXT:    v_lshrrev_b16 v6, v9, v7
1515; GFX11-NEXT:    v_lshlrev_b16 v5, v11, v5
1516; GFX11-NEXT:    v_lshrrev_b16 v7, v13, v8
1517; GFX11-NEXT:    v_lshlrev_b16 v0, v12, v0
1518; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
1519; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1520; GFX11-NEXT:    v_or_b32_e32 v3, v4, v6
1521; GFX11-NEXT:    v_or_b32_e32 v4, v5, v7
1522; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1523; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1524; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v2
1525; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1526; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1527; GFX11-NEXT:    v_and_b32_e32 v3, 0xff, v4
1528; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1529; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
1530; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
1531; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1532; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1533; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
1534; GFX11-NEXT:    s_setpc_b64 s[30:31]
1535  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1536  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1537  %amt = bitcast i32 %amt.arg to <4 x i8>
1538  %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1539  %cast.result = bitcast <4 x i8> %result to i32
1540  ret i32 %cast.result
1541}
1542
1543define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1544; GFX6-LABEL: s_fshl_i24:
1545; GFX6:       ; %bb.0:
1546; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1547; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1548; GFX6-NEXT:    v_not_b32_e32 v1, 23
1549; GFX6-NEXT:    s_and_b32 s2, s2, 0xffffff
1550; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x170001
1551; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1552; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1553; GFX6-NEXT:    v_mul_lo_u32 v2, v0, v1
1554; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
1555; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1556; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
1557; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1558; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
1559; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
1560; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1561; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1562; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
1563; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1564; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1565; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
1566; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1567; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1568; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
1569; GFX6-NEXT:    v_lshr_b32_e32 v1, s1, v1
1570; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1571; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1572; GFX6-NEXT:    ; return to shader part epilog
1573;
1574; GFX8-LABEL: s_fshl_i24:
1575; GFX8:       ; %bb.0:
1576; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1577; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1578; GFX8-NEXT:    v_not_b32_e32 v1, 23
1579; GFX8-NEXT:    s_and_b32 s2, s2, 0xffffff
1580; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x170001
1581; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1582; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1583; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
1584; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
1585; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1586; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
1587; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1588; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
1589; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
1590; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1591; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1592; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
1593; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1594; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1595; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
1596; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1597; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1598; GFX8-NEXT:    v_lshlrev_b32_e64 v0, v0, s0
1599; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s1
1600; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1601; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1602; GFX8-NEXT:    ; return to shader part epilog
1603;
1604; GFX9-LABEL: s_fshl_i24:
1605; GFX9:       ; %bb.0:
1606; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1607; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1608; GFX9-NEXT:    v_not_b32_e32 v1, 23
1609; GFX9-NEXT:    s_and_b32 s2, s2, 0xffffff
1610; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x170001
1611; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1612; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1613; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
1614; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
1615; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1616; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
1617; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1618; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
1619; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
1620; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1621; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1622; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
1623; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1624; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1625; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
1626; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1627; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1628; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s1
1629; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v0, v1
1630; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1631; GFX9-NEXT:    ; return to shader part epilog
1632;
1633; GFX10-LABEL: s_fshl_i24:
1634; GFX10:       ; %bb.0:
1635; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1636; GFX10-NEXT:    s_and_b32 s2, s2, 0xffffff
1637; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x170001
1638; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1639; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1640; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1641; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1642; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
1643; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1644; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
1645; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1646; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1647; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1648; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1649; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1650; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1651; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1652; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1653; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1654; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1655; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1656; GFX10-NEXT:    v_lshrrev_b32_e64 v1, v1, s1
1657; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v0, v1
1658; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1659; GFX10-NEXT:    ; return to shader part epilog
1660;
1661; GFX11-LABEL: s_fshl_i24:
1662; GFX11:       ; %bb.0:
1663; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1664; GFX11-NEXT:    s_and_b32 s2, s2, 0xffffff
1665; GFX11-NEXT:    s_bfe_u32 s1, s1, 0x170001
1666; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1667; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1668; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1669; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1670; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
1671; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1672; GFX11-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1673; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
1674; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1675; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1676; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
1677; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1678; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
1679; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1680; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1681; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1682; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1683; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1684; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1685; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1686; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1687; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1688; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1689; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1690; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1691; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1692; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1693; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s1
1694; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v0, v1
1695; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1696; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1697; GFX11-NEXT:    ; return to shader part epilog
1698  %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1699  ret i24 %result
1700}
1701
1702define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1703; GFX6-LABEL: v_fshl_i24:
1704; GFX6:       ; %bb.0:
1705; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1706; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1707; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1708; GFX6-NEXT:    v_not_b32_e32 v4, 23
1709; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1710; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 23
1711; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1712; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
1713; GFX6-NEXT:    v_mul_lo_u32 v5, v3, v4
1714; GFX6-NEXT:    v_mul_hi_u32 v5, v3, v5
1715; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1716; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
1717; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
1718; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
1719; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
1720; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1721; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1722; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
1723; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1724; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1725; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1726; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1727; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
1728; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
1729; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1730; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1731; GFX6-NEXT:    s_setpc_b64 s[30:31]
1732;
1733; GFX8-LABEL: v_fshl_i24:
1734; GFX8:       ; %bb.0:
1735; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1737; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1738; GFX8-NEXT:    v_not_b32_e32 v4, 23
1739; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1740; GFX8-NEXT:    v_bfe_u32 v1, v1, 1, 23
1741; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1742; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
1743; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v4
1744; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v5
1745; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
1746; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
1747; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
1748; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
1749; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
1750; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1751; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1752; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
1753; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1754; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1755; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
1756; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1757; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
1758; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
1759; GFX8-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1760; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1761; GFX8-NEXT:    s_setpc_b64 s[30:31]
1762;
1763; GFX9-LABEL: v_fshl_i24:
1764; GFX9:       ; %bb.0:
1765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1766; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1767; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1768; GFX9-NEXT:    v_not_b32_e32 v4, 23
1769; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1770; GFX9-NEXT:    v_bfe_u32 v1, v1, 1, 23
1771; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1772; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
1773; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v4
1774; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
1775; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
1776; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
1777; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
1778; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
1779; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
1780; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1781; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1782; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
1783; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1784; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1785; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
1786; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1787; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1788; GFX9-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
1789; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v2, v1
1790; GFX9-NEXT:    s_setpc_b64 s[30:31]
1791;
1792; GFX10-LABEL: v_fshl_i24:
1793; GFX10:       ; %bb.0:
1794; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1796; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1797; GFX10-NEXT:    v_bfe_u32 v1, v1, 1, 23
1798; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1799; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1800; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
1801; GFX10-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1802; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
1803; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1804; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
1805; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
1806; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1807; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1808; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1809; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1810; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1811; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1812; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1813; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1814; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1815; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1816; GFX10-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
1817; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v2, v1
1818; GFX10-NEXT:    s_setpc_b64 s[30:31]
1819;
1820; GFX11-LABEL: v_fshl_i24:
1821; GFX11:       ; %bb.0:
1822; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1823; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1824; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1825; GFX11-NEXT:    v_bfe_u32 v1, v1, 1, 23
1826; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1827; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1828; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1829; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1830; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
1831; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1832; GFX11-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1833; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
1834; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1835; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1836; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
1837; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1838; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 24
1839; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1840; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1841; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1842; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1843; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1844; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1845; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1846; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1847; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1848; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1849; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1850; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1851; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1852; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1853; GFX11-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
1854; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v2, v1
1855; GFX11-NEXT:    s_setpc_b64 s[30:31]
1856  %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1857  ret i24 %result
1858}
1859
1860define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1861; GFX6-LABEL: s_fshl_v2i24:
1862; GFX6:       ; %bb.0:
1863; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1864; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1865; GFX6-NEXT:    s_lshr_b32 s6, s0, 16
1866; GFX6-NEXT:    s_lshr_b32 s7, s1, 8
1867; GFX6-NEXT:    s_bfe_u32 s9, s0, 0x80008
1868; GFX6-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1869; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
1870; GFX6-NEXT:    s_and_b32 s8, s0, 0xff
1871; GFX6-NEXT:    s_lshl_b32 s9, s9, 8
1872; GFX6-NEXT:    s_and_b32 s6, s6, 0xff
1873; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1874; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1875; GFX6-NEXT:    s_and_b32 s0, s7, 0xff
1876; GFX6-NEXT:    s_or_b32 s8, s8, s9
1877; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
1878; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 24
1879; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
1880; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
1881; GFX6-NEXT:    s_lshl_b32 s6, s6, 16
1882; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1883; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
1884; GFX6-NEXT:    v_not_b32_e32 v3, 23
1885; GFX6-NEXT:    s_or_b32 s6, s8, s6
1886; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
1887; GFX6-NEXT:    s_lshr_b32 s0, s2, 16
1888; GFX6-NEXT:    s_lshr_b32 s1, s3, 8
1889; GFX6-NEXT:    s_bfe_u32 s8, s2, 0x80008
1890; GFX6-NEXT:    v_mul_lo_u32 v4, v2, v3
1891; GFX6-NEXT:    s_and_b32 s7, s2, 0xff
1892; GFX6-NEXT:    s_lshl_b32 s8, s8, 8
1893; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
1894; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
1895; GFX6-NEXT:    v_mov_b32_e32 v1, s2
1896; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1897; GFX6-NEXT:    s_or_b32 s7, s7, s8
1898; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
1899; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 24
1900; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
1901; GFX6-NEXT:    s_and_b32 s7, 0xffff, s7
1902; GFX6-NEXT:    s_lshl_b32 s0, s0, 16
1903; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1904; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1905; GFX6-NEXT:    s_or_b32 s0, s7, s0
1906; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
1907; GFX6-NEXT:    s_lshr_b32 s1, s4, 16
1908; GFX6-NEXT:    s_bfe_u32 s7, s4, 0x80008
1909; GFX6-NEXT:    v_mul_hi_u32 v4, v2, v4
1910; GFX6-NEXT:    s_and_b32 s3, s4, 0xff
1911; GFX6-NEXT:    s_lshl_b32 s7, s7, 8
1912; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1913; GFX6-NEXT:    s_or_b32 s3, s3, s7
1914; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
1915; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
1916; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1917; GFX6-NEXT:    s_or_b32 s1, s3, s1
1918; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1919; GFX6-NEXT:    v_mul_hi_u32 v4, s1, v2
1920; GFX6-NEXT:    s_lshr_b32 s2, s5, 8
1921; GFX6-NEXT:    s_and_b32 s3, s5, 0xff
1922; GFX6-NEXT:    v_mov_b32_e32 v5, s4
1923; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
1924; GFX6-NEXT:    v_alignbit_b32 v5, s3, v5, 24
1925; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
1926; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1927; GFX6-NEXT:    v_mul_lo_u32 v4, v4, 24
1928; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1929; GFX6-NEXT:    v_or_b32_e32 v5, s2, v5
1930; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
1931; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s1, v4
1932; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
1933; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
1934; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
1935; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1936; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
1937; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
1938; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1939; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
1940; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
1941; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
1942; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1943; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
1944; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
1945; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1946; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
1947; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
1948; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
1949; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1950; GFX6-NEXT:    v_lshl_b32_e32 v4, s6, v4
1951; GFX6-NEXT:    v_lshr_b32_e32 v6, s0, v6
1952; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1953; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1954; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
1955; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
1956; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
1957; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
1958; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1959; GFX6-NEXT:    v_bfe_u32 v2, v4, 8, 8
1960; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1961; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v4
1962; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1963; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1964; GFX6-NEXT:    v_bfe_u32 v2, v4, 16, 8
1965; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1966; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1967; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v0
1968; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1969; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1970; GFX6-NEXT:    v_bfe_u32 v2, v0, 8, 8
1971; GFX6-NEXT:    v_bfe_u32 v0, v0, 16, 8
1972; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1973; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1974; GFX6-NEXT:    v_readfirstlane_b32 s0, v1
1975; GFX6-NEXT:    v_readfirstlane_b32 s1, v0
1976; GFX6-NEXT:    ; return to shader part epilog
1977;
1978; GFX8-LABEL: s_fshl_v2i24:
1979; GFX8:       ; %bb.0:
1980; GFX8-NEXT:    s_lshr_b32 s6, s0, 8
1981; GFX8-NEXT:    s_and_b32 s6, s6, 0xff
1982; GFX8-NEXT:    s_lshr_b32 s7, s0, 16
1983; GFX8-NEXT:    s_lshr_b32 s8, s0, 24
1984; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
1985; GFX8-NEXT:    s_lshl_b32 s6, s6, 8
1986; GFX8-NEXT:    s_or_b32 s0, s0, s6
1987; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
1988; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
1989; GFX8-NEXT:    s_lshr_b32 s9, s1, 8
1990; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
1991; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
1992; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1993; GFX8-NEXT:    s_or_b32 s0, s0, s6
1994; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
1995; GFX8-NEXT:    s_and_b32 s6, s9, 0xff
1996; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1997; GFX8-NEXT:    s_or_b32 s1, s8, s1
1998; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
1999; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2000; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
2001; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
2002; GFX8-NEXT:    s_or_b32 s1, s1, s6
2003; GFX8-NEXT:    s_lshr_b32 s6, s2, 8
2004; GFX8-NEXT:    s_and_b32 s6, s6, 0xff
2005; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
2006; GFX8-NEXT:    s_lshr_b32 s8, s2, 24
2007; GFX8-NEXT:    s_and_b32 s2, s2, 0xff
2008; GFX8-NEXT:    s_lshl_b32 s6, s6, 8
2009; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2010; GFX8-NEXT:    s_or_b32 s2, s2, s6
2011; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
2012; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
2013; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
2014; GFX8-NEXT:    s_lshr_b32 s9, s3, 8
2015; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
2016; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
2017; GFX8-NEXT:    s_and_b32 s3, s3, 0xff
2018; GFX8-NEXT:    s_or_b32 s2, s2, s6
2019; GFX8-NEXT:    s_lshl_b32 s3, s3, 8
2020; GFX8-NEXT:    s_and_b32 s6, s9, 0xff
2021; GFX8-NEXT:    v_not_b32_e32 v1, 23
2022; GFX8-NEXT:    s_or_b32 s3, s8, s3
2023; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
2024; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
2025; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
2026; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
2027; GFX8-NEXT:    s_or_b32 s3, s3, s6
2028; GFX8-NEXT:    s_lshr_b32 s6, s4, 8
2029; GFX8-NEXT:    s_and_b32 s6, s6, 0xff
2030; GFX8-NEXT:    s_lshr_b32 s7, s4, 16
2031; GFX8-NEXT:    s_lshr_b32 s8, s4, 24
2032; GFX8-NEXT:    s_and_b32 s4, s4, 0xff
2033; GFX8-NEXT:    s_lshl_b32 s6, s6, 8
2034; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
2035; GFX8-NEXT:    s_or_b32 s4, s4, s6
2036; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
2037; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
2038; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
2039; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
2040; GFX8-NEXT:    s_or_b32 s4, s4, s6
2041; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
2042; GFX8-NEXT:    v_mul_hi_u32 v2, s4, v0
2043; GFX8-NEXT:    s_lshr_b32 s9, s5, 8
2044; GFX8-NEXT:    s_and_b32 s5, s5, 0xff
2045; GFX8-NEXT:    s_lshl_b32 s5, s5, 8
2046; GFX8-NEXT:    v_mul_lo_u32 v2, v2, 24
2047; GFX8-NEXT:    s_and_b32 s6, s9, 0xff
2048; GFX8-NEXT:    s_or_b32 s5, s8, s5
2049; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
2050; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
2051; GFX8-NEXT:    s_lshl_b32 s6, s6, 16
2052; GFX8-NEXT:    s_or_b32 s5, s5, s6
2053; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
2054; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
2055; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
2056; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2057; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2058; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
2059; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2060; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
2061; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2062; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
2063; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2064; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
2065; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
2066; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2067; GFX8-NEXT:    v_lshrrev_b32_e64 v3, v3, s0
2068; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
2069; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
2070; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
2071; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2072; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2073; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
2074; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2075; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2076; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
2077; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2078; GFX8-NEXT:    s_lshr_b32 s0, s3, 1
2079; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2080; GFX8-NEXT:    v_lshlrev_b32_e64 v0, v0, s1
2081; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s0
2082; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2083; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2084; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2085; GFX8-NEXT:    v_mov_b32_e32 v4, 16
2086; GFX8-NEXT:    v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2087; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2088; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
2089; GFX8-NEXT:    v_and_b32_e32 v3, 0xff, v0
2090; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2091; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2092; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
2093; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2094; GFX8-NEXT:    v_readfirstlane_b32 s0, v2
2095; GFX8-NEXT:    v_readfirstlane_b32 s1, v0
2096; GFX8-NEXT:    ; return to shader part epilog
2097;
2098; GFX9-LABEL: s_fshl_v2i24:
2099; GFX9:       ; %bb.0:
2100; GFX9-NEXT:    s_lshr_b32 s6, s0, 8
2101; GFX9-NEXT:    s_and_b32 s6, s6, 0xff
2102; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
2103; GFX9-NEXT:    s_lshr_b32 s8, s0, 24
2104; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
2105; GFX9-NEXT:    s_lshl_b32 s6, s6, 8
2106; GFX9-NEXT:    s_or_b32 s0, s0, s6
2107; GFX9-NEXT:    s_and_b32 s6, s7, 0xff
2108; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2109; GFX9-NEXT:    s_lshr_b32 s9, s1, 8
2110; GFX9-NEXT:    s_and_b32 s0, 0xffff, s0
2111; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2112; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
2113; GFX9-NEXT:    s_or_b32 s0, s0, s6
2114; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
2115; GFX9-NEXT:    s_and_b32 s6, s9, 0xff
2116; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2117; GFX9-NEXT:    s_or_b32 s1, s8, s1
2118; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2119; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2120; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
2121; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2122; GFX9-NEXT:    s_or_b32 s1, s1, s6
2123; GFX9-NEXT:    s_lshr_b32 s6, s2, 8
2124; GFX9-NEXT:    s_and_b32 s6, s6, 0xff
2125; GFX9-NEXT:    s_lshr_b32 s7, s2, 16
2126; GFX9-NEXT:    s_lshr_b32 s8, s2, 24
2127; GFX9-NEXT:    s_and_b32 s2, s2, 0xff
2128; GFX9-NEXT:    s_lshl_b32 s6, s6, 8
2129; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2130; GFX9-NEXT:    s_or_b32 s2, s2, s6
2131; GFX9-NEXT:    s_and_b32 s6, s7, 0xff
2132; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
2133; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2134; GFX9-NEXT:    s_lshr_b32 s9, s3, 8
2135; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
2136; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2137; GFX9-NEXT:    s_and_b32 s3, s3, 0xff
2138; GFX9-NEXT:    s_or_b32 s2, s2, s6
2139; GFX9-NEXT:    s_lshl_b32 s3, s3, 8
2140; GFX9-NEXT:    s_and_b32 s6, s9, 0xff
2141; GFX9-NEXT:    v_not_b32_e32 v1, 23
2142; GFX9-NEXT:    s_or_b32 s3, s8, s3
2143; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2144; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
2145; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
2146; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2147; GFX9-NEXT:    s_or_b32 s3, s3, s6
2148; GFX9-NEXT:    s_lshr_b32 s6, s4, 8
2149; GFX9-NEXT:    s_and_b32 s6, s6, 0xff
2150; GFX9-NEXT:    s_lshr_b32 s7, s4, 16
2151; GFX9-NEXT:    s_lshr_b32 s8, s4, 24
2152; GFX9-NEXT:    s_and_b32 s4, s4, 0xff
2153; GFX9-NEXT:    s_lshl_b32 s6, s6, 8
2154; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
2155; GFX9-NEXT:    s_or_b32 s4, s4, s6
2156; GFX9-NEXT:    s_and_b32 s6, s7, 0xff
2157; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2158; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
2159; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2160; GFX9-NEXT:    s_or_b32 s4, s4, s6
2161; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
2162; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v0
2163; GFX9-NEXT:    s_lshr_b32 s9, s5, 8
2164; GFX9-NEXT:    s_and_b32 s5, s5, 0xff
2165; GFX9-NEXT:    s_lshl_b32 s5, s5, 8
2166; GFX9-NEXT:    s_and_b32 s6, s9, 0xff
2167; GFX9-NEXT:    s_or_b32 s5, s8, s5
2168; GFX9-NEXT:    v_mul_lo_u32 v1, v1, 24
2169; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2170; GFX9-NEXT:    s_and_b32 s5, 0xffff, s5
2171; GFX9-NEXT:    s_lshl_b32 s6, s6, 16
2172; GFX9-NEXT:    s_or_b32 s5, s5, s6
2173; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
2174; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
2175; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
2176; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2177; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2178; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
2179; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2180; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
2181; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2182; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v1
2183; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
2184; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2185; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2186; GFX9-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
2187; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
2188; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v1, v2
2189; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
2190; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2191; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2192; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
2193; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2194; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2195; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v0
2196; GFX9-NEXT:    s_lshr_b32 s0, s3, 1
2197; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2198; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2199; GFX9-NEXT:    v_lshrrev_b32_e64 v2, v2, s0
2200; GFX9-NEXT:    v_mov_b32_e32 v3, 8
2201; GFX9-NEXT:    v_lshl_or_b32 v0, s1, v0, v2
2202; GFX9-NEXT:    v_mov_b32_e32 v2, 0xff
2203; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2204; GFX9-NEXT:    v_and_or_b32 v2, v1, v2, v3
2205; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2206; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2207; GFX9-NEXT:    v_and_b32_e32 v3, 0xff, v0
2208; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2209; GFX9-NEXT:    v_or3_b32 v1, v2, v1, v3
2210; GFX9-NEXT:    v_bfe_u32 v2, v0, 8, 8
2211; GFX9-NEXT:    v_bfe_u32 v0, v0, 16, 8
2212; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 8, v2
2213; GFX9-NEXT:    v_readfirstlane_b32 s0, v1
2214; GFX9-NEXT:    v_readfirstlane_b32 s1, v0
2215; GFX9-NEXT:    ; return to shader part epilog
2216;
2217; GFX10-LABEL: s_fshl_v2i24:
2218; GFX10:       ; %bb.0:
2219; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2220; GFX10-NEXT:    s_lshr_b32 s6, s0, 8
2221; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
2222; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
2223; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
2224; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2225; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
2226; GFX10-NEXT:    s_lshl_b32 s6, s6, 8
2227; GFX10-NEXT:    s_lshr_b32 s10, s4, 16
2228; GFX10-NEXT:    s_or_b32 s0, s0, s6
2229; GFX10-NEXT:    s_and_b32 s6, s7, 0xff
2230; GFX10-NEXT:    s_lshr_b32 s7, s4, 8
2231; GFX10-NEXT:    s_lshr_b32 s11, s4, 24
2232; GFX10-NEXT:    s_and_b32 s7, s7, 0xff
2233; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
2234; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2235; GFX10-NEXT:    s_lshl_b32 s7, s7, 8
2236; GFX10-NEXT:    s_lshr_b32 s12, s5, 8
2237; GFX10-NEXT:    s_or_b32 s4, s4, s7
2238; GFX10-NEXT:    s_and_b32 s7, s10, 0xff
2239; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
2240; GFX10-NEXT:    s_and_b32 s7, 0xffff, s7
2241; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
2242; GFX10-NEXT:    s_lshl_b32 s7, s7, 16
2243; GFX10-NEXT:    s_and_b32 s5, s5, 0xff
2244; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
2245; GFX10-NEXT:    s_or_b32 s4, s4, s7
2246; GFX10-NEXT:    s_lshl_b32 s5, s5, 8
2247; GFX10-NEXT:    s_and_b32 s7, s12, 0xff
2248; GFX10-NEXT:    s_or_b32 s5, s11, s5
2249; GFX10-NEXT:    s_and_b32 s7, 0xffff, s7
2250; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
2251; GFX10-NEXT:    s_lshl_b32 s7, s7, 16
2252; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
2253; GFX10-NEXT:    s_or_b32 s5, s5, s7
2254; GFX10-NEXT:    s_lshr_b32 s9, s1, 8
2255; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
2256; GFX10-NEXT:    s_and_b32 s7, s9, 0xff
2257; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
2258; GFX10-NEXT:    s_lshr_b32 s9, s2, 16
2259; GFX10-NEXT:    s_or_b32 s1, s8, s1
2260; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
2261; GFX10-NEXT:    s_lshr_b32 s8, s2, 8
2262; GFX10-NEXT:    s_lshr_b32 s10, s2, 24
2263; GFX10-NEXT:    s_and_b32 s8, s8, 0xff
2264; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
2265; GFX10-NEXT:    v_mul_hi_u32 v1, s4, v0
2266; GFX10-NEXT:    v_mul_hi_u32 v0, s5, v0
2267; GFX10-NEXT:    s_lshl_b32 s8, s8, 8
2268; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
2269; GFX10-NEXT:    s_or_b32 s2, s2, s8
2270; GFX10-NEXT:    s_and_b32 s7, 0xffff, s7
2271; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
2272; GFX10-NEXT:    s_and_b32 s0, 0xffff, s0
2273; GFX10-NEXT:    v_mul_lo_u32 v1, v1, 24
2274; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
2275; GFX10-NEXT:    s_lshl_b32 s6, s6, 16
2276; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
2277; GFX10-NEXT:    s_lshl_b32 s7, s7, 16
2278; GFX10-NEXT:    s_or_b32 s0, s0, s6
2279; GFX10-NEXT:    s_or_b32 s1, s1, s7
2280; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
2281; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s5, v0
2282; GFX10-NEXT:    s_lshr_b32 s4, s3, 8
2283; GFX10-NEXT:    s_and_b32 s5, s9, 0xff
2284; GFX10-NEXT:    s_and_b32 s3, s3, 0xff
2285; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2286; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2287; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
2288; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
2289; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
2290; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
2291; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2292; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2293; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
2294; GFX10-NEXT:    s_or_b32 s3, s10, s3
2295; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
2296; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2297; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2298; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2299; GFX10-NEXT:    s_or_b32 s2, s2, s5
2300; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
2301; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
2302; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
2303; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2304; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2305; GFX10-NEXT:    s_or_b32 s3, s3, s4
2306; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
2307; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
2308; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2309; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2310; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2311; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
2312; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2313; GFX10-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
2314; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2315; GFX10-NEXT:    s_lshr_b32 s2, s3, 1
2316; GFX10-NEXT:    v_lshl_or_b32 v1, s0, v1, v2
2317; GFX10-NEXT:    v_lshrrev_b32_e64 v3, v3, s2
2318; GFX10-NEXT:    v_mov_b32_e32 v2, 8
2319; GFX10-NEXT:    v_lshl_or_b32 v0, s1, v0, v3
2320; GFX10-NEXT:    v_mov_b32_e32 v3, 16
2321; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2322; GFX10-NEXT:    v_and_b32_e32 v4, 0xff, v0
2323; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2324; GFX10-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
2325; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v4
2326; GFX10-NEXT:    v_bfe_u32 v4, v0, 8, 8
2327; GFX10-NEXT:    v_bfe_u32 v0, v0, 16, 8
2328; GFX10-NEXT:    v_or3_b32 v1, v1, v3, v2
2329; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 8, v4
2330; GFX10-NEXT:    v_readfirstlane_b32 s0, v1
2331; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2332; GFX10-NEXT:    ; return to shader part epilog
2333;
2334; GFX11-LABEL: s_fshl_v2i24:
2335; GFX11:       ; %bb.0:
2336; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2337; GFX11-NEXT:    s_lshr_b32 s6, s0, 8
2338; GFX11-NEXT:    s_lshr_b32 s7, s0, 16
2339; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
2340; GFX11-NEXT:    s_lshr_b32 s8, s0, 24
2341; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2342; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
2343; GFX11-NEXT:    s_lshl_b32 s6, s6, 8
2344; GFX11-NEXT:    s_and_b32 s7, s7, 0xff
2345; GFX11-NEXT:    s_or_b32 s0, s0, s6
2346; GFX11-NEXT:    s_and_b32 s6, 0xffff, s7
2347; GFX11-NEXT:    s_and_b32 s0, 0xffff, s0
2348; GFX11-NEXT:    s_lshl_b32 s6, s6, 16
2349; GFX11-NEXT:    s_lshr_b32 s7, s4, 16
2350; GFX11-NEXT:    s_or_b32 s0, s0, s6
2351; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2352; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2353; GFX11-NEXT:    s_lshr_b32 s6, s4, 8
2354; GFX11-NEXT:    s_lshr_b32 s10, s4, 24
2355; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
2356; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
2357; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
2358; GFX11-NEXT:    s_lshl_b32 s6, s6, 8
2359; GFX11-NEXT:    s_lshr_b32 s11, s5, 8
2360; GFX11-NEXT:    s_or_b32 s4, s4, s6
2361; GFX11-NEXT:    s_and_b32 s6, s7, 0xff
2362; GFX11-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
2363; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
2364; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
2365; GFX11-NEXT:    s_lshl_b32 s6, s6, 16
2366; GFX11-NEXT:    s_and_b32 s5, s5, 0xff
2367; GFX11-NEXT:    s_or_b32 s4, s4, s6
2368; GFX11-NEXT:    s_lshl_b32 s5, s5, 8
2369; GFX11-NEXT:    s_and_b32 s6, s11, 0xff
2370; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
2371; GFX11-NEXT:    s_or_b32 s5, s10, s5
2372; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
2373; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
2374; GFX11-NEXT:    s_lshl_b32 s6, s6, 16
2375; GFX11-NEXT:    s_lshr_b32 s9, s1, 8
2376; GFX11-NEXT:    s_or_b32 s5, s5, s6
2377; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
2378; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
2379; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
2380; GFX11-NEXT:    s_lshr_b32 s7, s2, 8
2381; GFX11-NEXT:    s_or_b32 s1, s8, s1
2382; GFX11-NEXT:    s_lshr_b32 s8, s2, 16
2383; GFX11-NEXT:    v_mul_hi_u32 v1, s4, v0
2384; GFX11-NEXT:    v_mul_hi_u32 v0, s5, v0
2385; GFX11-NEXT:    s_and_b32 s7, s7, 0xff
2386; GFX11-NEXT:    s_and_b32 s6, s9, 0xff
2387; GFX11-NEXT:    s_lshr_b32 s9, s2, 24
2388; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
2389; GFX11-NEXT:    s_lshl_b32 s7, s7, 8
2390; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
2391; GFX11-NEXT:    v_mul_lo_u32 v1, v1, 24
2392; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
2393; GFX11-NEXT:    s_or_b32 s2, s2, s7
2394; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
2395; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
2396; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2397; GFX11-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
2398; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s5, v0
2399; GFX11-NEXT:    s_and_b32 s5, s8, 0xff
2400; GFX11-NEXT:    s_lshr_b32 s4, s3, 8
2401; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
2402; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2403; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2404; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
2405; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
2406; GFX11-NEXT:    s_lshl_b32 s5, s5, 16
2407; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
2408; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2409; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2410; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
2411; GFX11-NEXT:    s_or_b32 s2, s2, s5
2412; GFX11-NEXT:    s_or_b32 s3, s9, s3
2413; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2414; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2415; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2416; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
2417; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
2418; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
2419; GFX11-NEXT:    s_lshl_b32 s4, s4, 16
2420; GFX11-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2421; GFX11-NEXT:    s_or_b32 s3, s3, s4
2422; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2423; GFX11-NEXT:    s_lshr_b32 s3, s3, 1
2424; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 23, v1
2425; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2426; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v0
2427; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2428; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2429; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2430; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2431; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2432; GFX11-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
2433; GFX11-NEXT:    s_lshl_b32 s2, s6, 16
2434; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
2435; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2436; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2437; GFX11-NEXT:    v_lshl_or_b32 v1, s0, v1, v2
2438; GFX11-NEXT:    s_or_b32 s0, s1, s2
2439; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2440; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2441; GFX11-NEXT:    v_bfe_u32 v2, v1, 8, 8
2442; GFX11-NEXT:    v_lshrrev_b32_e64 v3, v3, s3
2443; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2444; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
2445; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v0, v3
2446; GFX11-NEXT:    v_bfe_u32 v3, v1, 16, 8
2447; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2448; GFX11-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
2449; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v0
2450; GFX11-NEXT:    v_bfe_u32 v5, v0, 8, 8
2451; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
2452; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2453; GFX11-NEXT:    v_bfe_u32 v0, v0, 16, 8
2454; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
2455; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2456; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 8, v5
2457; GFX11-NEXT:    v_or3_b32 v1, v1, v3, v4
2458; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2459; GFX11-NEXT:    v_readfirstlane_b32 s1, v0
2460; GFX11-NEXT:    v_readfirstlane_b32 s0, v1
2461; GFX11-NEXT:    ; return to shader part epilog
2462  %lhs = bitcast i48 %lhs.arg to <2 x i24>
2463  %rhs = bitcast i48 %rhs.arg to <2 x i24>
2464  %amt = bitcast i48 %amt.arg to <2 x i24>
2465  %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2466  %cast.result = bitcast <2 x i24> %result to i48
2467  ret i48 %cast.result
2468}
2469
2470define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2471; GFX6-LABEL: v_fshl_v2i24:
2472; GFX6:       ; %bb.0:
2473; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2475; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2476; GFX6-NEXT:    v_not_b32_e32 v7, 23
2477; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2478; GFX6-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2479; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2480; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
2481; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 23
2482; GFX6-NEXT:    v_mul_lo_u32 v8, v6, v7
2483; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
2484; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2485; GFX6-NEXT:    v_mul_hi_u32 v8, v4, v6
2486; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
2487; GFX6-NEXT:    v_mul_lo_u32 v8, v8, 24
2488; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2489; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
2490; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
2491; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2492; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2493; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
2494; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2495; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2496; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
2497; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2498; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
2499; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v8
2500; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2501; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
2502; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
2503; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
2504; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2505; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2506; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
2507; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2508; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2509; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
2510; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2511; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
2512; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 23
2513; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
2514; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
2515; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
2516; GFX6-NEXT:    s_setpc_b64 s[30:31]
2517;
2518; GFX8-LABEL: v_fshl_v2i24:
2519; GFX8:       ; %bb.0:
2520; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2521; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2522; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2523; GFX8-NEXT:    v_not_b32_e32 v7, 23
2524; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2525; GFX8-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2526; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2527; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
2528; GFX8-NEXT:    v_bfe_u32 v2, v2, 1, 23
2529; GFX8-NEXT:    v_mul_lo_u32 v8, v6, v7
2530; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
2531; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
2532; GFX8-NEXT:    v_mul_hi_u32 v8, v4, v6
2533; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
2534; GFX8-NEXT:    v_mul_lo_u32 v8, v8, 24
2535; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2536; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
2537; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
2538; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2539; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2540; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
2541; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2542; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2543; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
2544; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2545; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
2546; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v8
2547; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2548; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2549; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
2550; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
2551; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2552; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2553; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
2554; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2555; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2556; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
2557; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2558; GFX8-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
2559; GFX8-NEXT:    v_bfe_u32 v2, v3, 1, 23
2560; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
2561; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
2562; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
2563; GFX8-NEXT:    s_setpc_b64 s[30:31]
2564;
2565; GFX9-LABEL: v_fshl_v2i24:
2566; GFX9:       ; %bb.0:
2567; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2568; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2569; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2570; GFX9-NEXT:    v_not_b32_e32 v7, 23
2571; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2572; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2573; GFX9-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2574; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v6
2575; GFX9-NEXT:    v_bfe_u32 v2, v2, 1, 23
2576; GFX9-NEXT:    v_bfe_u32 v3, v3, 1, 23
2577; GFX9-NEXT:    v_mul_lo_u32 v7, v6, v7
2578; GFX9-NEXT:    v_mul_hi_u32 v7, v6, v7
2579; GFX9-NEXT:    v_add_u32_e32 v6, v6, v7
2580; GFX9-NEXT:    v_mul_hi_u32 v7, v4, v6
2581; GFX9-NEXT:    v_mul_hi_u32 v6, v5, v6
2582; GFX9-NEXT:    v_mul_lo_u32 v7, v7, 24
2583; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
2584; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
2585; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
2586; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
2587; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2588; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2589; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
2590; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2591; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2592; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
2593; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2594; GFX9-NEXT:    v_add_u32_e32 v7, 0xffffffe8, v5
2595; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2596; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
2597; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
2598; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
2599; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v7, vcc
2600; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
2601; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2602; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2603; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
2604; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2605; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2606; GFX9-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
2607; GFX9-NEXT:    v_lshl_or_b32 v1, v1, v2, v3
2608; GFX9-NEXT:    s_setpc_b64 s[30:31]
2609;
2610; GFX10-LABEL: v_fshl_v2i24:
2611; GFX10:       ; %bb.0:
2612; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2613; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2614; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2615; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2616; GFX10-NEXT:    v_bfe_u32 v2, v2, 1, 23
2617; GFX10-NEXT:    v_bfe_u32 v3, v3, 1, 23
2618; GFX10-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2619; GFX10-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2620; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v6
2621; GFX10-NEXT:    v_mul_lo_u32 v7, 0xffffffe8, v6
2622; GFX10-NEXT:    v_mul_hi_u32 v7, v6, v7
2623; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v7
2624; GFX10-NEXT:    v_mul_hi_u32 v7, v4, v6
2625; GFX10-NEXT:    v_mul_hi_u32 v6, v5, v6
2626; GFX10-NEXT:    v_mul_lo_u32 v7, v7, 24
2627; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
2628; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
2629; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
2630; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2631; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2632; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2633; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2634; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2635; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2636; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2637; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2638; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2639; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2640; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2641; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2642; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2643; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2644; GFX10-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2645; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2646; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2647; GFX10-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
2648; GFX10-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
2649; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
2650; GFX10-NEXT:    v_lshrrev_b32_e32 v3, v7, v3
2651; GFX10-NEXT:    v_lshl_or_b32 v1, v1, v5, v3
2652; GFX10-NEXT:    s_setpc_b64 s[30:31]
2653;
2654; GFX11-LABEL: v_fshl_v2i24:
2655; GFX11:       ; %bb.0:
2656; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2657; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2658; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2659; GFX11-NEXT:    v_bfe_u32 v2, v2, 1, 23
2660; GFX11-NEXT:    v_bfe_u32 v3, v3, 1, 23
2661; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2662; GFX11-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2663; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2664; GFX11-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2665; GFX11-NEXT:    v_cvt_u32_f32_e32 v6, v6
2666; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2667; GFX11-NEXT:    v_mul_lo_u32 v7, 0xffffffe8, v6
2668; GFX11-NEXT:    v_mul_hi_u32 v7, v6, v7
2669; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2670; GFX11-NEXT:    v_add_nc_u32_e32 v6, v6, v7
2671; GFX11-NEXT:    v_mul_hi_u32 v7, v4, v6
2672; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2673; GFX11-NEXT:    v_mul_lo_u32 v7, v7, 24
2674; GFX11-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
2675; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2676; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2677; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2678; GFX11-NEXT:    v_mul_hi_u32 v6, v5, v6
2679; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2680; GFX11-NEXT:    v_mul_lo_u32 v6, v6, 24
2681; GFX11-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
2682; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2683; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
2684; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2685; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2686; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2687; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2688; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2689; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2690; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2691; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2692; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
2693; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2694; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2695; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2696; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2697; GFX11-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2698; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2699; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2700; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2701; GFX11-NEXT:    v_lshrrev_b32_e32 v2, v6, v2
2702; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2703; GFX11-NEXT:    v_and_b32_e32 v7, 0xffffff, v7
2704; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v4, v2
2705; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2706; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v7, v3
2707; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v5, v3
2708; GFX11-NEXT:    s_setpc_b64 s[30:31]
2709  %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2710  ret <2 x i24> %result
2711}
2712
2713define amdgpu_ps i32 @s_fshl_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2714; GFX6-LABEL: s_fshl_i32:
2715; GFX6:       ; %bb.0:
2716; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2717; GFX6-NEXT:    s_not_b32 s1, s2
2718; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2719; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
2720; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2721; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2722; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2723; GFX6-NEXT:    ; return to shader part epilog
2724;
2725; GFX8-LABEL: s_fshl_i32:
2726; GFX8:       ; %bb.0:
2727; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2728; GFX8-NEXT:    s_not_b32 s1, s2
2729; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2730; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
2731; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2732; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2733; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2734; GFX8-NEXT:    ; return to shader part epilog
2735;
2736; GFX9-LABEL: s_fshl_i32:
2737; GFX9:       ; %bb.0:
2738; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2739; GFX9-NEXT:    s_not_b32 s1, s2
2740; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2741; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
2742; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2743; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2744; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2745; GFX9-NEXT:    ; return to shader part epilog
2746;
2747; GFX10-LABEL: s_fshl_i32:
2748; GFX10:       ; %bb.0:
2749; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 1
2750; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
2751; GFX10-NEXT:    s_not_b32 s1, s2
2752; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2753; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2754; GFX10-NEXT:    ; return to shader part epilog
2755;
2756; GFX11-LABEL: s_fshl_i32:
2757; GFX11:       ; %bb.0:
2758; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 1
2759; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
2760; GFX11-NEXT:    s_not_b32 s1, s2
2761; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2762; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2763; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2764; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2765; GFX11-NEXT:    ; return to shader part epilog
2766  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2767  ret i32 %result
2768}
2769
2770define amdgpu_ps i32 @s_fshl_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2771; GFX6-LABEL: s_fshl_i32_5:
2772; GFX6:       ; %bb.0:
2773; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2774; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 27
2775; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2776; GFX6-NEXT:    ; return to shader part epilog
2777;
2778; GFX8-LABEL: s_fshl_i32_5:
2779; GFX8:       ; %bb.0:
2780; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2781; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 27
2782; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2783; GFX8-NEXT:    ; return to shader part epilog
2784;
2785; GFX9-LABEL: s_fshl_i32_5:
2786; GFX9:       ; %bb.0:
2787; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2788; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 27
2789; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2790; GFX9-NEXT:    ; return to shader part epilog
2791;
2792; GFX10-LABEL: s_fshl_i32_5:
2793; GFX10:       ; %bb.0:
2794; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 27
2795; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2796; GFX10-NEXT:    ; return to shader part epilog
2797;
2798; GFX11-LABEL: s_fshl_i32_5:
2799; GFX11:       ; %bb.0:
2800; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 27
2801; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2802; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2803; GFX11-NEXT:    ; return to shader part epilog
2804  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2805  ret i32 %result
2806}
2807
2808define amdgpu_ps i32 @s_fshl_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2809; GFX6-LABEL: s_fshl_i32_8:
2810; GFX6:       ; %bb.0:
2811; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2812; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 24
2813; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2814; GFX6-NEXT:    ; return to shader part epilog
2815;
2816; GFX8-LABEL: s_fshl_i32_8:
2817; GFX8:       ; %bb.0:
2818; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2819; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 24
2820; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2821; GFX8-NEXT:    ; return to shader part epilog
2822;
2823; GFX9-LABEL: s_fshl_i32_8:
2824; GFX9:       ; %bb.0:
2825; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2826; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 24
2827; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2828; GFX9-NEXT:    ; return to shader part epilog
2829;
2830; GFX10-LABEL: s_fshl_i32_8:
2831; GFX10:       ; %bb.0:
2832; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 24
2833; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2834; GFX10-NEXT:    ; return to shader part epilog
2835;
2836; GFX11-LABEL: s_fshl_i32_8:
2837; GFX11:       ; %bb.0:
2838; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 24
2839; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2840; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2841; GFX11-NEXT:    ; return to shader part epilog
2842  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2843  ret i32 %result
2844}
2845
2846define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2847; GCN-LABEL: v_fshl_i32:
2848; GCN:       ; %bb.0:
2849; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2850; GCN-NEXT:    v_alignbit_b32 v1, v0, v1, 1
2851; GCN-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
2852; GCN-NEXT:    v_not_b32_e32 v2, v2
2853; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2854; GCN-NEXT:    s_setpc_b64 s[30:31]
2855;
2856; GFX11-LABEL: v_fshl_i32:
2857; GFX11:       ; %bb.0:
2858; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859; GFX11-NEXT:    v_alignbit_b32 v1, v0, v1, 1
2860; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
2861; GFX11-NEXT:    v_not_b32_e32 v2, v2
2862; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2863; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2864; GFX11-NEXT:    s_setpc_b64 s[30:31]
2865  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2866  ret i32 %result
2867}
2868
2869define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) {
2870; GCN-LABEL: v_fshl_i32_5:
2871; GCN:       ; %bb.0:
2872; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, 27
2874; GCN-NEXT:    s_setpc_b64 s[30:31]
2875;
2876; GFX11-LABEL: v_fshl_i32_5:
2877; GFX11:       ; %bb.0:
2878; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2879; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 27
2880; GFX11-NEXT:    s_setpc_b64 s[30:31]
2881  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2882  ret i32 %result
2883}
2884
2885define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) {
2886; GCN-LABEL: v_fshl_i32_8:
2887; GCN:       ; %bb.0:
2888; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2889; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, 24
2890; GCN-NEXT:    s_setpc_b64 s[30:31]
2891;
2892; GFX11-LABEL: v_fshl_i32_8:
2893; GFX11:       ; %bb.0:
2894; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2895; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 24
2896; GFX11-NEXT:    s_setpc_b64 s[30:31]
2897  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2898  ret i32 %result
2899}
2900
2901define amdgpu_ps float @v_fshl_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2902; GFX6-LABEL: v_fshl_i32_ssv:
2903; GFX6:       ; %bb.0:
2904; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2905; GFX6-NEXT:    v_alignbit_b32 v1, s0, v1, 1
2906; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
2907; GFX6-NEXT:    v_not_b32_e32 v0, v0
2908; GFX6-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2909; GFX6-NEXT:    ; return to shader part epilog
2910;
2911; GFX8-LABEL: v_fshl_i32_ssv:
2912; GFX8:       ; %bb.0:
2913; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2914; GFX8-NEXT:    v_alignbit_b32 v1, s0, v1, 1
2915; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
2916; GFX8-NEXT:    v_not_b32_e32 v0, v0
2917; GFX8-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2918; GFX8-NEXT:    ; return to shader part epilog
2919;
2920; GFX9-LABEL: v_fshl_i32_ssv:
2921; GFX9:       ; %bb.0:
2922; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2923; GFX9-NEXT:    v_alignbit_b32 v1, s0, v1, 1
2924; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
2925; GFX9-NEXT:    v_not_b32_e32 v0, v0
2926; GFX9-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2927; GFX9-NEXT:    ; return to shader part epilog
2928;
2929; GFX10-LABEL: v_fshl_i32_ssv:
2930; GFX10:       ; %bb.0:
2931; GFX10-NEXT:    v_alignbit_b32 v1, s0, s1, 1
2932; GFX10-NEXT:    v_not_b32_e32 v0, v0
2933; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
2934; GFX10-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2935; GFX10-NEXT:    ; return to shader part epilog
2936;
2937; GFX11-LABEL: v_fshl_i32_ssv:
2938; GFX11:       ; %bb.0:
2939; GFX11-NEXT:    v_alignbit_b32 v1, s0, s1, 1
2940; GFX11-NEXT:    v_not_b32_e32 v0, v0
2941; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
2942; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2943; GFX11-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2944; GFX11-NEXT:    ; return to shader part epilog
2945  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2946  %cast.result = bitcast i32 %result to float
2947  ret float %cast.result
2948}
2949
2950define amdgpu_ps float @v_fshl_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2951; GFX6-LABEL: v_fshl_i32_svs:
2952; GFX6:       ; %bb.0:
2953; GFX6-NEXT:    s_not_b32 s1, s1
2954; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2955; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
2956; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2957; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2958; GFX6-NEXT:    ; return to shader part epilog
2959;
2960; GFX8-LABEL: v_fshl_i32_svs:
2961; GFX8:       ; %bb.0:
2962; GFX8-NEXT:    s_not_b32 s1, s1
2963; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2964; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
2965; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2966; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2967; GFX8-NEXT:    ; return to shader part epilog
2968;
2969; GFX9-LABEL: v_fshl_i32_svs:
2970; GFX9:       ; %bb.0:
2971; GFX9-NEXT:    s_not_b32 s1, s1
2972; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2973; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
2974; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2975; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2976; GFX9-NEXT:    ; return to shader part epilog
2977;
2978; GFX10-LABEL: v_fshl_i32_svs:
2979; GFX10:       ; %bb.0:
2980; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2981; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
2982; GFX10-NEXT:    s_not_b32 s1, s1
2983; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2984; GFX10-NEXT:    ; return to shader part epilog
2985;
2986; GFX11-LABEL: v_fshl_i32_svs:
2987; GFX11:       ; %bb.0:
2988; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, 1
2989; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
2990; GFX11-NEXT:    s_not_b32 s1, s1
2991; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2992; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2993; GFX11-NEXT:    ; return to shader part epilog
2994  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2995  %cast.result = bitcast i32 %result to float
2996  ret float %cast.result
2997}
2998
2999define amdgpu_ps float @v_fshl_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
3000; GFX6-LABEL: v_fshl_i32_vss:
3001; GFX6:       ; %bb.0:
3002; GFX6-NEXT:    v_mov_b32_e32 v0, s1
3003; GFX6-NEXT:    s_not_b32 s1, s2
3004; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 1
3005; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
3006; GFX6-NEXT:    v_mov_b32_e32 v1, s1
3007; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3008; GFX6-NEXT:    ; return to shader part epilog
3009;
3010; GFX8-LABEL: v_fshl_i32_vss:
3011; GFX8:       ; %bb.0:
3012; GFX8-NEXT:    v_mov_b32_e32 v0, s1
3013; GFX8-NEXT:    s_not_b32 s1, s2
3014; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 1
3015; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
3016; GFX8-NEXT:    v_mov_b32_e32 v1, s1
3017; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3018; GFX8-NEXT:    ; return to shader part epilog
3019;
3020; GFX9-LABEL: v_fshl_i32_vss:
3021; GFX9:       ; %bb.0:
3022; GFX9-NEXT:    v_mov_b32_e32 v0, s1
3023; GFX9-NEXT:    s_not_b32 s1, s2
3024; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 1
3025; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
3026; GFX9-NEXT:    v_mov_b32_e32 v1, s1
3027; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
3028; GFX9-NEXT:    ; return to shader part epilog
3029;
3030; GFX10-LABEL: v_fshl_i32_vss:
3031; GFX10:       ; %bb.0:
3032; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 1
3033; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
3034; GFX10-NEXT:    s_not_b32 s1, s2
3035; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
3036; GFX10-NEXT:    ; return to shader part epilog
3037;
3038; GFX11-LABEL: v_fshl_i32_vss:
3039; GFX11:       ; %bb.0:
3040; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 1
3041; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
3042; GFX11-NEXT:    s_not_b32 s1, s2
3043; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3044; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, s1
3045; GFX11-NEXT:    ; return to shader part epilog
3046  %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
3047  %cast.result = bitcast i32 %result to float
3048  ret float %cast.result
3049}
3050
3051define <2 x i32> @v_fshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
3052; GFX6-LABEL: v_fshl_v2i32:
3053; GFX6:       ; %bb.0:
3054; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3055; GFX6-NEXT:    v_alignbit_b32 v2, v0, v2, 1
3056; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3057; GFX6-NEXT:    v_not_b32_e32 v4, v4
3058; GFX6-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3059; GFX6-NEXT:    v_alignbit_b32 v2, v1, v3, 1
3060; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3061; GFX6-NEXT:    v_not_b32_e32 v3, v5
3062; GFX6-NEXT:    v_alignbit_b32 v1, v1, v2, v3
3063; GFX6-NEXT:    s_setpc_b64 s[30:31]
3064;
3065; GFX8-LABEL: v_fshl_v2i32:
3066; GFX8:       ; %bb.0:
3067; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3068; GFX8-NEXT:    v_alignbit_b32 v2, v0, v2, 1
3069; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3070; GFX8-NEXT:    v_not_b32_e32 v4, v4
3071; GFX8-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3072; GFX8-NEXT:    v_alignbit_b32 v2, v1, v3, 1
3073; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3074; GFX8-NEXT:    v_not_b32_e32 v3, v5
3075; GFX8-NEXT:    v_alignbit_b32 v1, v1, v2, v3
3076; GFX8-NEXT:    s_setpc_b64 s[30:31]
3077;
3078; GFX9-LABEL: v_fshl_v2i32:
3079; GFX9:       ; %bb.0:
3080; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3081; GFX9-NEXT:    v_alignbit_b32 v2, v0, v2, 1
3082; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3083; GFX9-NEXT:    v_not_b32_e32 v4, v4
3084; GFX9-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3085; GFX9-NEXT:    v_alignbit_b32 v2, v1, v3, 1
3086; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3087; GFX9-NEXT:    v_not_b32_e32 v3, v5
3088; GFX9-NEXT:    v_alignbit_b32 v1, v1, v2, v3
3089; GFX9-NEXT:    s_setpc_b64 s[30:31]
3090;
3091; GFX10-LABEL: v_fshl_v2i32:
3092; GFX10:       ; %bb.0:
3093; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3094; GFX10-NEXT:    v_alignbit_b32 v2, v0, v2, 1
3095; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3096; GFX10-NEXT:    v_not_b32_e32 v4, v4
3097; GFX10-NEXT:    v_alignbit_b32 v3, v1, v3, 1
3098; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3099; GFX10-NEXT:    v_not_b32_e32 v5, v5
3100; GFX10-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3101; GFX10-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3102; GFX10-NEXT:    s_setpc_b64 s[30:31]
3103;
3104; GFX11-LABEL: v_fshl_v2i32:
3105; GFX11:       ; %bb.0:
3106; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3107; GFX11-NEXT:    v_alignbit_b32 v2, v0, v2, 1
3108; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3109; GFX11-NEXT:    v_not_b32_e32 v4, v4
3110; GFX11-NEXT:    v_alignbit_b32 v3, v1, v3, 1
3111; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3112; GFX11-NEXT:    v_not_b32_e32 v5, v5
3113; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
3114; GFX11-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3115; GFX11-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3116; GFX11-NEXT:    s_setpc_b64 s[30:31]
3117  %result = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
3118  ret <2 x i32> %result
3119}
3120
3121define <3 x i32> @v_fshl_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
3122; GFX6-LABEL: v_fshl_v3i32:
3123; GFX6:       ; %bb.0:
3124; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3125; GFX6-NEXT:    v_alignbit_b32 v3, v0, v3, 1
3126; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3127; GFX6-NEXT:    v_not_b32_e32 v6, v6
3128; GFX6-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3129; GFX6-NEXT:    v_alignbit_b32 v3, v1, v4, 1
3130; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3131; GFX6-NEXT:    v_not_b32_e32 v4, v7
3132; GFX6-NEXT:    v_alignbit_b32 v1, v1, v3, v4
3133; GFX6-NEXT:    v_alignbit_b32 v3, v2, v5, 1
3134; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3135; GFX6-NEXT:    v_not_b32_e32 v4, v8
3136; GFX6-NEXT:    v_alignbit_b32 v2, v2, v3, v4
3137; GFX6-NEXT:    s_setpc_b64 s[30:31]
3138;
3139; GFX8-LABEL: v_fshl_v3i32:
3140; GFX8:       ; %bb.0:
3141; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3142; GFX8-NEXT:    v_alignbit_b32 v3, v0, v3, 1
3143; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3144; GFX8-NEXT:    v_not_b32_e32 v6, v6
3145; GFX8-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3146; GFX8-NEXT:    v_alignbit_b32 v3, v1, v4, 1
3147; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3148; GFX8-NEXT:    v_not_b32_e32 v4, v7
3149; GFX8-NEXT:    v_alignbit_b32 v1, v1, v3, v4
3150; GFX8-NEXT:    v_alignbit_b32 v3, v2, v5, 1
3151; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3152; GFX8-NEXT:    v_not_b32_e32 v4, v8
3153; GFX8-NEXT:    v_alignbit_b32 v2, v2, v3, v4
3154; GFX8-NEXT:    s_setpc_b64 s[30:31]
3155;
3156; GFX9-LABEL: v_fshl_v3i32:
3157; GFX9:       ; %bb.0:
3158; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159; GFX9-NEXT:    v_alignbit_b32 v3, v0, v3, 1
3160; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3161; GFX9-NEXT:    v_not_b32_e32 v6, v6
3162; GFX9-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3163; GFX9-NEXT:    v_alignbit_b32 v3, v1, v4, 1
3164; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3165; GFX9-NEXT:    v_not_b32_e32 v4, v7
3166; GFX9-NEXT:    v_alignbit_b32 v1, v1, v3, v4
3167; GFX9-NEXT:    v_alignbit_b32 v3, v2, v5, 1
3168; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3169; GFX9-NEXT:    v_not_b32_e32 v4, v8
3170; GFX9-NEXT:    v_alignbit_b32 v2, v2, v3, v4
3171; GFX9-NEXT:    s_setpc_b64 s[30:31]
3172;
3173; GFX10-LABEL: v_fshl_v3i32:
3174; GFX10:       ; %bb.0:
3175; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3176; GFX10-NEXT:    v_alignbit_b32 v3, v0, v3, 1
3177; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3178; GFX10-NEXT:    v_not_b32_e32 v6, v6
3179; GFX10-NEXT:    v_alignbit_b32 v4, v1, v4, 1
3180; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3181; GFX10-NEXT:    v_not_b32_e32 v7, v7
3182; GFX10-NEXT:    v_alignbit_b32 v5, v2, v5, 1
3183; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3184; GFX10-NEXT:    v_not_b32_e32 v8, v8
3185; GFX10-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3186; GFX10-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3187; GFX10-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3188; GFX10-NEXT:    s_setpc_b64 s[30:31]
3189;
3190; GFX11-LABEL: v_fshl_v3i32:
3191; GFX11:       ; %bb.0:
3192; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3193; GFX11-NEXT:    v_alignbit_b32 v3, v0, v3, 1
3194; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3195; GFX11-NEXT:    v_not_b32_e32 v6, v6
3196; GFX11-NEXT:    v_alignbit_b32 v4, v1, v4, 1
3197; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3198; GFX11-NEXT:    v_not_b32_e32 v7, v7
3199; GFX11-NEXT:    v_alignbit_b32 v5, v2, v5, 1
3200; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3201; GFX11-NEXT:    v_not_b32_e32 v8, v8
3202; GFX11-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3203; GFX11-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3204; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
3205; GFX11-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3206; GFX11-NEXT:    s_setpc_b64 s[30:31]
3207  %result = call <3 x i32> @llvm.fshl.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
3208  ret <3 x i32> %result
3209}
3210
3211define <4 x i32> @v_fshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
3212; GFX6-LABEL: v_fshl_v4i32:
3213; GFX6:       ; %bb.0:
3214; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3215; GFX6-NEXT:    v_alignbit_b32 v4, v0, v4, 1
3216; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3217; GFX6-NEXT:    v_not_b32_e32 v8, v8
3218; GFX6-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3219; GFX6-NEXT:    v_alignbit_b32 v4, v1, v5, 1
3220; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3221; GFX6-NEXT:    v_not_b32_e32 v5, v9
3222; GFX6-NEXT:    v_alignbit_b32 v1, v1, v4, v5
3223; GFX6-NEXT:    v_alignbit_b32 v4, v2, v6, 1
3224; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3225; GFX6-NEXT:    v_not_b32_e32 v5, v10
3226; GFX6-NEXT:    v_alignbit_b32 v2, v2, v4, v5
3227; GFX6-NEXT:    v_alignbit_b32 v4, v3, v7, 1
3228; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
3229; GFX6-NEXT:    v_not_b32_e32 v5, v11
3230; GFX6-NEXT:    v_alignbit_b32 v3, v3, v4, v5
3231; GFX6-NEXT:    s_setpc_b64 s[30:31]
3232;
3233; GFX8-LABEL: v_fshl_v4i32:
3234; GFX8:       ; %bb.0:
3235; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3236; GFX8-NEXT:    v_alignbit_b32 v4, v0, v4, 1
3237; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3238; GFX8-NEXT:    v_not_b32_e32 v8, v8
3239; GFX8-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3240; GFX8-NEXT:    v_alignbit_b32 v4, v1, v5, 1
3241; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3242; GFX8-NEXT:    v_not_b32_e32 v5, v9
3243; GFX8-NEXT:    v_alignbit_b32 v1, v1, v4, v5
3244; GFX8-NEXT:    v_alignbit_b32 v4, v2, v6, 1
3245; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3246; GFX8-NEXT:    v_not_b32_e32 v5, v10
3247; GFX8-NEXT:    v_alignbit_b32 v2, v2, v4, v5
3248; GFX8-NEXT:    v_alignbit_b32 v4, v3, v7, 1
3249; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
3250; GFX8-NEXT:    v_not_b32_e32 v5, v11
3251; GFX8-NEXT:    v_alignbit_b32 v3, v3, v4, v5
3252; GFX8-NEXT:    s_setpc_b64 s[30:31]
3253;
3254; GFX9-LABEL: v_fshl_v4i32:
3255; GFX9:       ; %bb.0:
3256; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3257; GFX9-NEXT:    v_alignbit_b32 v4, v0, v4, 1
3258; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3259; GFX9-NEXT:    v_not_b32_e32 v8, v8
3260; GFX9-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3261; GFX9-NEXT:    v_alignbit_b32 v4, v1, v5, 1
3262; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3263; GFX9-NEXT:    v_not_b32_e32 v5, v9
3264; GFX9-NEXT:    v_alignbit_b32 v1, v1, v4, v5
3265; GFX9-NEXT:    v_alignbit_b32 v4, v2, v6, 1
3266; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3267; GFX9-NEXT:    v_not_b32_e32 v5, v10
3268; GFX9-NEXT:    v_alignbit_b32 v2, v2, v4, v5
3269; GFX9-NEXT:    v_alignbit_b32 v4, v3, v7, 1
3270; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
3271; GFX9-NEXT:    v_not_b32_e32 v5, v11
3272; GFX9-NEXT:    v_alignbit_b32 v3, v3, v4, v5
3273; GFX9-NEXT:    s_setpc_b64 s[30:31]
3274;
3275; GFX10-LABEL: v_fshl_v4i32:
3276; GFX10:       ; %bb.0:
3277; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3278; GFX10-NEXT:    v_alignbit_b32 v4, v0, v4, 1
3279; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3280; GFX10-NEXT:    v_not_b32_e32 v8, v8
3281; GFX10-NEXT:    v_alignbit_b32 v5, v1, v5, 1
3282; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3283; GFX10-NEXT:    v_not_b32_e32 v9, v9
3284; GFX10-NEXT:    v_alignbit_b32 v6, v2, v6, 1
3285; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3286; GFX10-NEXT:    v_not_b32_e32 v10, v10
3287; GFX10-NEXT:    v_alignbit_b32 v7, v3, v7, 1
3288; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
3289; GFX10-NEXT:    v_not_b32_e32 v11, v11
3290; GFX10-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3291; GFX10-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3292; GFX10-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3293; GFX10-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3294; GFX10-NEXT:    s_setpc_b64 s[30:31]
3295;
3296; GFX11-LABEL: v_fshl_v4i32:
3297; GFX11:       ; %bb.0:
3298; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3299; GFX11-NEXT:    v_alignbit_b32 v4, v0, v4, 1
3300; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3301; GFX11-NEXT:    v_not_b32_e32 v8, v8
3302; GFX11-NEXT:    v_alignbit_b32 v5, v1, v5, 1
3303; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3304; GFX11-NEXT:    v_not_b32_e32 v9, v9
3305; GFX11-NEXT:    v_alignbit_b32 v6, v2, v6, 1
3306; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3307; GFX11-NEXT:    v_not_b32_e32 v10, v10
3308; GFX11-NEXT:    v_alignbit_b32 v7, v3, v7, 1
3309; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
3310; GFX11-NEXT:    v_not_b32_e32 v11, v11
3311; GFX11-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3312; GFX11-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3313; GFX11-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3314; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3315; GFX11-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3316; GFX11-NEXT:    s_setpc_b64 s[30:31]
3317  %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3318  ret <4 x i32> %result
3319}
3320
3321define amdgpu_ps i16 @s_fshl_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3322; GFX6-LABEL: s_fshl_i16:
3323; GFX6:       ; %bb.0:
3324; GFX6-NEXT:    s_and_b32 s3, s2, 15
3325; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
3326; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
3327; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xf0001
3328; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3329; GFX6-NEXT:    s_lshl_b32 s0, s0, s3
3330; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
3331; GFX6-NEXT:    s_or_b32 s0, s0, s1
3332; GFX6-NEXT:    ; return to shader part epilog
3333;
3334; GFX8-LABEL: s_fshl_i16:
3335; GFX8:       ; %bb.0:
3336; GFX8-NEXT:    s_and_b32 s3, s2, 15
3337; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3338; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3339; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
3340; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
3341; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3342; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3343; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3344; GFX8-NEXT:    s_or_b32 s0, s0, s1
3345; GFX8-NEXT:    ; return to shader part epilog
3346;
3347; GFX9-LABEL: s_fshl_i16:
3348; GFX9:       ; %bb.0:
3349; GFX9-NEXT:    s_and_b32 s3, s2, 15
3350; GFX9-NEXT:    s_andn2_b32 s2, 15, s2
3351; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3352; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
3353; GFX9-NEXT:    s_lshr_b32 s1, s1, 1
3354; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
3355; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
3356; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3357; GFX9-NEXT:    s_or_b32 s0, s0, s1
3358; GFX9-NEXT:    ; return to shader part epilog
3359;
3360; GFX10-LABEL: s_fshl_i16:
3361; GFX10:       ; %bb.0:
3362; GFX10-NEXT:    s_and_b32 s3, s2, 15
3363; GFX10-NEXT:    s_andn2_b32 s2, 15, s2
3364; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3365; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
3366; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
3367; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
3368; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
3369; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
3370; GFX10-NEXT:    s_or_b32 s0, s0, s1
3371; GFX10-NEXT:    ; return to shader part epilog
3372;
3373; GFX11-LABEL: s_fshl_i16:
3374; GFX11:       ; %bb.0:
3375; GFX11-NEXT:    s_and_b32 s3, s2, 15
3376; GFX11-NEXT:    s_and_not1_b32 s2, 15, s2
3377; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3378; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
3379; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
3380; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
3381; GFX11-NEXT:    s_lshl_b32 s0, s0, s3
3382; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
3383; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3384; GFX11-NEXT:    s_or_b32 s0, s0, s1
3385; GFX11-NEXT:    ; return to shader part epilog
3386  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3387  ret i16 %result
3388}
3389
3390define amdgpu_ps i16 @s_fshl_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3391; GFX6-LABEL: s_fshl_i16_4:
3392; GFX6:       ; %bb.0:
3393; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
3394; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x4000c
3395; GFX6-NEXT:    s_or_b32 s0, s0, s1
3396; GFX6-NEXT:    ; return to shader part epilog
3397;
3398; GFX8-LABEL: s_fshl_i16_4:
3399; GFX8:       ; %bb.0:
3400; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3401; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
3402; GFX8-NEXT:    s_lshr_b32 s1, s1, 12
3403; GFX8-NEXT:    s_or_b32 s0, s0, s1
3404; GFX8-NEXT:    ; return to shader part epilog
3405;
3406; GFX9-LABEL: s_fshl_i16_4:
3407; GFX9:       ; %bb.0:
3408; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3409; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
3410; GFX9-NEXT:    s_lshr_b32 s1, s1, 12
3411; GFX9-NEXT:    s_or_b32 s0, s0, s1
3412; GFX9-NEXT:    ; return to shader part epilog
3413;
3414; GFX10-LABEL: s_fshl_i16_4:
3415; GFX10:       ; %bb.0:
3416; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3417; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
3418; GFX10-NEXT:    s_lshr_b32 s1, s1, 12
3419; GFX10-NEXT:    s_or_b32 s0, s0, s1
3420; GFX10-NEXT:    ; return to shader part epilog
3421;
3422; GFX11-LABEL: s_fshl_i16_4:
3423; GFX11:       ; %bb.0:
3424; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3425; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
3426; GFX11-NEXT:    s_lshr_b32 s1, s1, 12
3427; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3428; GFX11-NEXT:    s_or_b32 s0, s0, s1
3429; GFX11-NEXT:    ; return to shader part epilog
3430  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
3431  ret i16 %result
3432}
3433
3434define amdgpu_ps i16 @s_fshl_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3435; GFX6-LABEL: s_fshl_i16_5:
3436; GFX6:       ; %bb.0:
3437; GFX6-NEXT:    s_lshl_b32 s0, s0, 5
3438; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x5000b
3439; GFX6-NEXT:    s_or_b32 s0, s0, s1
3440; GFX6-NEXT:    ; return to shader part epilog
3441;
3442; GFX8-LABEL: s_fshl_i16_5:
3443; GFX8:       ; %bb.0:
3444; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3445; GFX8-NEXT:    s_lshl_b32 s0, s0, 5
3446; GFX8-NEXT:    s_lshr_b32 s1, s1, 11
3447; GFX8-NEXT:    s_or_b32 s0, s0, s1
3448; GFX8-NEXT:    ; return to shader part epilog
3449;
3450; GFX9-LABEL: s_fshl_i16_5:
3451; GFX9:       ; %bb.0:
3452; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3453; GFX9-NEXT:    s_lshl_b32 s0, s0, 5
3454; GFX9-NEXT:    s_lshr_b32 s1, s1, 11
3455; GFX9-NEXT:    s_or_b32 s0, s0, s1
3456; GFX9-NEXT:    ; return to shader part epilog
3457;
3458; GFX10-LABEL: s_fshl_i16_5:
3459; GFX10:       ; %bb.0:
3460; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3461; GFX10-NEXT:    s_lshl_b32 s0, s0, 5
3462; GFX10-NEXT:    s_lshr_b32 s1, s1, 11
3463; GFX10-NEXT:    s_or_b32 s0, s0, s1
3464; GFX10-NEXT:    ; return to shader part epilog
3465;
3466; GFX11-LABEL: s_fshl_i16_5:
3467; GFX11:       ; %bb.0:
3468; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3469; GFX11-NEXT:    s_lshl_b32 s0, s0, 5
3470; GFX11-NEXT:    s_lshr_b32 s1, s1, 11
3471; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3472; GFX11-NEXT:    s_or_b32 s0, s0, s1
3473; GFX11-NEXT:    ; return to shader part epilog
3474  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
3475  ret i16 %result
3476}
3477
3478define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3479; GFX6-LABEL: v_fshl_i16:
3480; GFX6:       ; %bb.0:
3481; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3482; GFX6-NEXT:    v_and_b32_e32 v3, 15, v2
3483; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
3484; GFX6-NEXT:    v_and_b32_e32 v2, 15, v2
3485; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
3486; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
3487; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3488; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
3489; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
3490; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3491; GFX6-NEXT:    s_setpc_b64 s[30:31]
3492;
3493; GFX8-LABEL: v_fshl_i16:
3494; GFX8:       ; %bb.0:
3495; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3496; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
3497; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3498; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3499; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
3500; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
3501; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
3502; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3503; GFX8-NEXT:    s_setpc_b64 s[30:31]
3504;
3505; GFX9-LABEL: v_fshl_i16:
3506; GFX9:       ; %bb.0:
3507; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3508; GFX9-NEXT:    v_and_b32_e32 v3, 15, v2
3509; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3510; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
3511; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
3512; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
3513; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
3514; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3515; GFX9-NEXT:    s_setpc_b64 s[30:31]
3516;
3517; GFX10-LABEL: v_fshl_i16:
3518; GFX10:       ; %bb.0:
3519; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3520; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3521; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
3522; GFX10-NEXT:    v_lshrrev_b16 v1, 1, v1
3523; GFX10-NEXT:    v_and_b32_e32 v3, 15, v3
3524; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
3525; GFX10-NEXT:    v_lshrrev_b16 v1, v3, v1
3526; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3527; GFX10-NEXT:    s_setpc_b64 s[30:31]
3528;
3529; GFX11-LABEL: v_fshl_i16:
3530; GFX11:       ; %bb.0:
3531; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3532; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
3533; GFX11-NEXT:    v_and_b32_e32 v2, 15, v2
3534; GFX11-NEXT:    v_lshrrev_b16 v1, 1, v1
3535; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3536; GFX11-NEXT:    v_and_b32_e32 v3, 15, v3
3537; GFX11-NEXT:    v_lshlrev_b16 v0, v2, v0
3538; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3539; GFX11-NEXT:    v_lshrrev_b16 v1, v3, v1
3540; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3541; GFX11-NEXT:    s_setpc_b64 s[30:31]
3542  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3543  ret i16 %result
3544}
3545
3546define i16 @v_fshl_i16_4(i16 %lhs, i16 %rhs) {
3547; GFX6-LABEL: v_fshl_i16_4:
3548; GFX6:       ; %bb.0:
3549; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3550; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
3551; GFX6-NEXT:    v_bfe_u32 v1, v1, 12, 4
3552; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3553; GFX6-NEXT:    s_setpc_b64 s[30:31]
3554;
3555; GFX8-LABEL: v_fshl_i16_4:
3556; GFX8:       ; %bb.0:
3557; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3558; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
3559; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 12, v1
3560; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3561; GFX8-NEXT:    s_setpc_b64 s[30:31]
3562;
3563; GFX9-LABEL: v_fshl_i16_4:
3564; GFX9:       ; %bb.0:
3565; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3566; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
3567; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 12, v1
3568; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3569; GFX9-NEXT:    s_setpc_b64 s[30:31]
3570;
3571; GFX10-LABEL: v_fshl_i16_4:
3572; GFX10:       ; %bb.0:
3573; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3574; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
3575; GFX10-NEXT:    v_lshrrev_b16 v1, 12, v1
3576; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3577; GFX10-NEXT:    s_setpc_b64 s[30:31]
3578;
3579; GFX11-LABEL: v_fshl_i16_4:
3580; GFX11:       ; %bb.0:
3581; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3582; GFX11-NEXT:    v_lshlrev_b16 v0, 4, v0
3583; GFX11-NEXT:    v_lshrrev_b16 v1, 12, v1
3584; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3585; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3586; GFX11-NEXT:    s_setpc_b64 s[30:31]
3587  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
3588  ret i16 %result
3589}
3590
3591define i16 @v_fshl_i16_5(i16 %lhs, i16 %rhs) {
3592; GFX6-LABEL: v_fshl_i16_5:
3593; GFX6:       ; %bb.0:
3594; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3595; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 5, v0
3596; GFX6-NEXT:    v_bfe_u32 v1, v1, 11, 5
3597; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3598; GFX6-NEXT:    s_setpc_b64 s[30:31]
3599;
3600; GFX8-LABEL: v_fshl_i16_5:
3601; GFX8:       ; %bb.0:
3602; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3603; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 5, v0
3604; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 11, v1
3605; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3606; GFX8-NEXT:    s_setpc_b64 s[30:31]
3607;
3608; GFX9-LABEL: v_fshl_i16_5:
3609; GFX9:       ; %bb.0:
3610; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3611; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 5, v0
3612; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 11, v1
3613; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3614; GFX9-NEXT:    s_setpc_b64 s[30:31]
3615;
3616; GFX10-LABEL: v_fshl_i16_5:
3617; GFX10:       ; %bb.0:
3618; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3619; GFX10-NEXT:    v_lshlrev_b16 v0, 5, v0
3620; GFX10-NEXT:    v_lshrrev_b16 v1, 11, v1
3621; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3622; GFX10-NEXT:    s_setpc_b64 s[30:31]
3623;
3624; GFX11-LABEL: v_fshl_i16_5:
3625; GFX11:       ; %bb.0:
3626; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3627; GFX11-NEXT:    v_lshlrev_b16 v0, 5, v0
3628; GFX11-NEXT:    v_lshrrev_b16 v1, 11, v1
3629; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3630; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3631; GFX11-NEXT:    s_setpc_b64 s[30:31]
3632  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
3633  ret i16 %result
3634}
3635
3636define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3637; GFX6-LABEL: v_fshl_i16_ssv:
3638; GFX6:       ; %bb.0:
3639; GFX6-NEXT:    v_and_b32_e32 v1, 15, v0
3640; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3641; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3642; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3643; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
3644; GFX6-NEXT:    s_bfe_u32 s0, s1, 0xf0001
3645; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3646; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
3647; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
3648; GFX6-NEXT:    ; return to shader part epilog
3649;
3650; GFX8-LABEL: v_fshl_i16_ssv:
3651; GFX8:       ; %bb.0:
3652; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
3653; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3654; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
3655; GFX8-NEXT:    s_and_b32 s0, 0xffff, s1
3656; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3657; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
3658; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
3659; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
3660; GFX8-NEXT:    ; return to shader part epilog
3661;
3662; GFX9-LABEL: v_fshl_i16_ssv:
3663; GFX9:       ; %bb.0:
3664; GFX9-NEXT:    v_and_b32_e32 v1, 15, v0
3665; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3666; GFX9-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
3667; GFX9-NEXT:    s_and_b32 s0, 0xffff, s1
3668; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
3669; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
3670; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
3671; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
3672; GFX9-NEXT:    ; return to shader part epilog
3673;
3674; GFX10-LABEL: v_fshl_i16_ssv:
3675; GFX10:       ; %bb.0:
3676; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3677; GFX10-NEXT:    v_and_b32_e32 v0, 15, v0
3678; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3679; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
3680; GFX10-NEXT:    v_and_b32_e32 v1, 15, v1
3681; GFX10-NEXT:    v_lshlrev_b16 v0, v0, s0
3682; GFX10-NEXT:    v_lshrrev_b16 v1, v1, s1
3683; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3684; GFX10-NEXT:    ; return to shader part epilog
3685;
3686; GFX11-LABEL: v_fshl_i16_ssv:
3687; GFX11:       ; %bb.0:
3688; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
3689; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
3690; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3691; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
3692; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
3693; GFX11-NEXT:    v_and_b32_e32 v1, 15, v1
3694; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3695; GFX11-NEXT:    v_lshlrev_b16 v0, v0, s0
3696; GFX11-NEXT:    v_lshrrev_b16 v1, v1, s1
3697; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3698; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3699; GFX11-NEXT:    ; return to shader part epilog
3700  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3701  %cast.result = bitcast i16 %result to half
3702  ret half %cast.result
3703}
3704
3705define amdgpu_ps half @v_fshl_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3706; GFX6-LABEL: v_fshl_i16_svs:
3707; GFX6:       ; %bb.0:
3708; GFX6-NEXT:    s_and_b32 s2, s1, 15
3709; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3710; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3711; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
3712; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3713; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
3714; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s1, v0
3715; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3716; GFX6-NEXT:    ; return to shader part epilog
3717;
3718; GFX8-LABEL: v_fshl_i16_svs:
3719; GFX8:       ; %bb.0:
3720; GFX8-NEXT:    s_and_b32 s2, s1, 15
3721; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3722; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3723; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
3724; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3725; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
3726; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3727; GFX8-NEXT:    ; return to shader part epilog
3728;
3729; GFX9-LABEL: v_fshl_i16_svs:
3730; GFX9:       ; %bb.0:
3731; GFX9-NEXT:    s_and_b32 s2, s1, 15
3732; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3733; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
3734; GFX9-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
3735; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3736; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
3737; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3738; GFX9-NEXT:    ; return to shader part epilog
3739;
3740; GFX10-LABEL: v_fshl_i16_svs:
3741; GFX10:       ; %bb.0:
3742; GFX10-NEXT:    v_lshrrev_b16 v0, 1, v0
3743; GFX10-NEXT:    s_andn2_b32 s2, 15, s1
3744; GFX10-NEXT:    s_and_b32 s1, s1, 15
3745; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3746; GFX10-NEXT:    v_lshrrev_b16 v0, s2, v0
3747; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
3748; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3749; GFX10-NEXT:    ; return to shader part epilog
3750;
3751; GFX11-LABEL: v_fshl_i16_svs:
3752; GFX11:       ; %bb.0:
3753; GFX11-NEXT:    v_lshrrev_b16 v0, 1, v0
3754; GFX11-NEXT:    s_and_not1_b32 s2, 15, s1
3755; GFX11-NEXT:    s_and_b32 s1, s1, 15
3756; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3757; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3758; GFX11-NEXT:    v_lshrrev_b16 v0, s2, v0
3759; GFX11-NEXT:    s_lshl_b32 s0, s0, s1
3760; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3761; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3762; GFX11-NEXT:    ; return to shader part epilog
3763  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3764  %cast.result = bitcast i16 %result to half
3765  ret half %cast.result
3766}
3767
3768define amdgpu_ps half @v_fshl_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3769; GFX6-LABEL: v_fshl_i16_vss:
3770; GFX6:       ; %bb.0:
3771; GFX6-NEXT:    s_and_b32 s2, s1, 15
3772; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3773; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3774; GFX6-NEXT:    s_bfe_u32 s0, s0, 0xf0001
3775; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3776; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s2, v0
3777; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
3778; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3779; GFX6-NEXT:    ; return to shader part epilog
3780;
3781; GFX8-LABEL: v_fshl_i16_vss:
3782; GFX8:       ; %bb.0:
3783; GFX8-NEXT:    s_and_b32 s2, s1, 15
3784; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3785; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
3786; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
3787; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3788; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s2, v0
3789; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3790; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3791; GFX8-NEXT:    ; return to shader part epilog
3792;
3793; GFX9-LABEL: v_fshl_i16_vss:
3794; GFX9:       ; %bb.0:
3795; GFX9-NEXT:    s_and_b32 s2, s1, 15
3796; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3797; GFX9-NEXT:    s_and_b32 s0, 0xffff, s0
3798; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
3799; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3800; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s2, v0
3801; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
3802; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3803; GFX9-NEXT:    ; return to shader part epilog
3804;
3805; GFX10-LABEL: v_fshl_i16_vss:
3806; GFX10:       ; %bb.0:
3807; GFX10-NEXT:    s_and_b32 s2, s1, 15
3808; GFX10-NEXT:    s_andn2_b32 s1, 15, s1
3809; GFX10-NEXT:    s_and_b32 s0, 0xffff, s0
3810; GFX10-NEXT:    v_lshlrev_b16 v0, s2, v0
3811; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
3812; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3813; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3814; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3815; GFX10-NEXT:    ; return to shader part epilog
3816;
3817; GFX11-LABEL: v_fshl_i16_vss:
3818; GFX11:       ; %bb.0:
3819; GFX11-NEXT:    s_and_b32 s2, s1, 15
3820; GFX11-NEXT:    s_and_not1_b32 s1, 15, s1
3821; GFX11-NEXT:    s_and_b32 s0, 0xffff, s0
3822; GFX11-NEXT:    v_lshlrev_b16 v0, s2, v0
3823; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
3824; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3825; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3826; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
3827; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3828; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3829; GFX11-NEXT:    ; return to shader part epilog
3830  %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3831  %cast.result = bitcast i16 %result to half
3832  ret half %cast.result
3833}
3834
3835define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3836; GFX6-LABEL: s_fshl_v2i16:
3837; GFX6:       ; %bb.0:
3838; GFX6-NEXT:    s_and_b32 s6, s4, 15
3839; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
3840; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
3841; GFX6-NEXT:    s_bfe_u32 s2, s2, 0xf0001
3842; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
3843; GFX6-NEXT:    s_lshl_b32 s0, s0, s6
3844; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
3845; GFX6-NEXT:    s_or_b32 s0, s0, s2
3846; GFX6-NEXT:    s_and_b32 s2, s5, 15
3847; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
3848; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3849; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
3850; GFX6-NEXT:    s_bfe_u32 s2, s3, 0xf0001
3851; GFX6-NEXT:    s_and_b32 s3, 0xffff, s4
3852; GFX6-NEXT:    s_lshr_b32 s2, s2, s3
3853; GFX6-NEXT:    s_or_b32 s1, s1, s2
3854; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3855; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
3856; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3857; GFX6-NEXT:    s_or_b32 s0, s0, s1
3858; GFX6-NEXT:    ; return to shader part epilog
3859;
3860; GFX8-LABEL: s_fshl_v2i16:
3861; GFX8:       ; %bb.0:
3862; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3863; GFX8-NEXT:    s_lshr_b32 s5, s2, 16
3864; GFX8-NEXT:    s_and_b32 s6, s2, 15
3865; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3866; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3867; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
3868; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
3869; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3870; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
3871; GFX8-NEXT:    s_lshl_b32 s0, s0, s6
3872; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3873; GFX8-NEXT:    s_or_b32 s0, s0, s1
3874; GFX8-NEXT:    s_and_b32 s1, s5, 15
3875; GFX8-NEXT:    s_andn2_b32 s2, 15, s5
3876; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3877; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
3878; GFX8-NEXT:    s_lshr_b32 s3, s4, 1
3879; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3880; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
3881; GFX8-NEXT:    s_or_b32 s1, s1, s2
3882; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3883; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
3884; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
3885; GFX8-NEXT:    s_or_b32 s0, s0, s1
3886; GFX8-NEXT:    ; return to shader part epilog
3887;
3888; GFX9-LABEL: s_fshl_v2i16:
3889; GFX9:       ; %bb.0:
3890; GFX9-NEXT:    s_and_b32 s3, s2, 0xf000f
3891; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
3892; GFX9-NEXT:    s_lshr_b32 s5, s3, 16
3893; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
3894; GFX9-NEXT:    s_lshl_b32 s3, s4, s5
3895; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3896; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
3897; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
3898; GFX9-NEXT:    s_lshr_b32 s1, s1, 0x10001
3899; GFX9-NEXT:    s_lshr_b32 s3, s3, 1
3900; GFX9-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3901; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3902; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
3903; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
3904; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
3905; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3906; GFX9-NEXT:    s_lshr_b32 s2, s3, s4
3907; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3908; GFX9-NEXT:    s_or_b32 s0, s0, s1
3909; GFX9-NEXT:    ; return to shader part epilog
3910;
3911; GFX10-LABEL: s_fshl_v2i16:
3912; GFX10:       ; %bb.0:
3913; GFX10-NEXT:    s_and_b32 s6, s1, 0xffff
3914; GFX10-NEXT:    s_lshr_b32 s1, s1, 16
3915; GFX10-NEXT:    s_and_b32 s3, s2, 0xf000f
3916; GFX10-NEXT:    s_lshr_b32 s6, s6, 0x10001
3917; GFX10-NEXT:    s_lshr_b32 s1, s1, 1
3918; GFX10-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3919; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
3920; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
3921; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s6, s1
3922; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
3923; GFX10-NEXT:    s_lshl_b32 s3, s4, s5
3924; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
3925; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
3926; GFX10-NEXT:    s_lshr_b32 s5, s2, 16
3927; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
3928; GFX10-NEXT:    s_lshr_b32 s2, s4, s5
3929; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3930; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3931; GFX10-NEXT:    s_or_b32 s0, s0, s1
3932; GFX10-NEXT:    ; return to shader part epilog
3933;
3934; GFX11-LABEL: s_fshl_v2i16:
3935; GFX11:       ; %bb.0:
3936; GFX11-NEXT:    s_and_b32 s6, s1, 0xffff
3937; GFX11-NEXT:    s_lshr_b32 s1, s1, 16
3938; GFX11-NEXT:    s_and_b32 s3, s2, 0xf000f
3939; GFX11-NEXT:    s_lshr_b32 s6, s6, 0x10001
3940; GFX11-NEXT:    s_lshr_b32 s1, s1, 1
3941; GFX11-NEXT:    s_and_not1_b32 s2, 0xf000f, s2
3942; GFX11-NEXT:    s_lshr_b32 s4, s0, 16
3943; GFX11-NEXT:    s_lshr_b32 s5, s3, 16
3944; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s6, s1
3945; GFX11-NEXT:    s_lshl_b32 s0, s0, s3
3946; GFX11-NEXT:    s_lshl_b32 s3, s4, s5
3947; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
3948; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
3949; GFX11-NEXT:    s_lshr_b32 s5, s2, 16
3950; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
3951; GFX11-NEXT:    s_lshr_b32 s2, s4, s5
3952; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3953; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3954; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3955; GFX11-NEXT:    s_or_b32 s0, s0, s1
3956; GFX11-NEXT:    ; return to shader part epilog
3957  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3958  %cast = bitcast <2 x i16> %result to i32
3959  ret i32 %cast
3960}
3961
3962define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3963; GFX6-LABEL: v_fshl_v2i16:
3964; GFX6:       ; %bb.0:
3965; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3966; GFX6-NEXT:    v_and_b32_e32 v6, 15, v4
3967; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3968; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3969; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
3970; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3971; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
3972; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
3973; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
3974; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3975; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
3976; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
3977; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3978; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3979; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
3980; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3981; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v4
3982; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
3983; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3984; GFX6-NEXT:    s_setpc_b64 s[30:31]
3985;
3986; GFX8-LABEL: v_fshl_v2i16:
3987; GFX8:       ; %bb.0:
3988; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3989; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v2
3990; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
3991; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
3992; GFX8-NEXT:    v_lshrrev_b16_e32 v5, 1, v1
3993; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v3, v0
3994; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v5
3995; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3996; GFX8-NEXT:    v_mov_b32_e32 v4, 15
3997; GFX8-NEXT:    v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3998; GFX8-NEXT:    v_mov_b32_e32 v5, -1
3999; GFX8-NEXT:    v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4000; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4001; GFX8-NEXT:    v_mov_b32_e32 v4, 1
4002; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
4003; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4004; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
4005; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
4006; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4007; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4008; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4009; GFX8-NEXT:    s_setpc_b64 s[30:31]
4010;
4011; GFX9-LABEL: v_fshl_v2i16:
4012; GFX9:       ; %bb.0:
4013; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014; GFX9-NEXT:    v_and_b32_e32 v3, 0xf000f, v2
4015; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
4016; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4017; GFX9-NEXT:    v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4018; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
4019; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
4020; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
4021; GFX9-NEXT:    s_setpc_b64 s[30:31]
4022;
4023; GFX10-LABEL: v_fshl_v2i16:
4024; GFX10:       ; %bb.0:
4025; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4026; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
4027; GFX10-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4028; GFX10-NEXT:    v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4029; GFX10-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
4030; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
4031; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
4032; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
4033; GFX10-NEXT:    s_setpc_b64 s[30:31]
4034;
4035; GFX11-LABEL: v_fshl_v2i16:
4036; GFX11:       ; %bb.0:
4037; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4038; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
4039; GFX11-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
4040; GFX11-NEXT:    v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4041; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4042; GFX11-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
4043; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
4044; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4045; GFX11-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
4046; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
4047; GFX11-NEXT:    s_setpc_b64 s[30:31]
4048  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4049  ret <2 x i16> %result
4050}
4051
4052define <2 x i16> @v_fshl_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
4053; GFX6-LABEL: v_fshl_v2i16_4_8:
4054; GFX6:       ; %bb.0:
4055; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4056; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
4057; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
4058; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 11, v2
4059; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4060; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
4061; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
4062; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 7, v2
4063; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
4064; GFX6-NEXT:    s_setpc_b64 s[30:31]
4065;
4066; GFX8-LABEL: v_fshl_v2i16_4_8:
4067; GFX8:       ; %bb.0:
4068; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4069; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
4070; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
4071; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 12, v1
4072; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
4073; GFX8-NEXT:    v_mov_b32_e32 v3, 8
4074; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
4075; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4076; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
4077; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4078; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4079; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4080; GFX8-NEXT:    s_setpc_b64 s[30:31]
4081;
4082; GFX9-LABEL: v_fshl_v2i16_4_8:
4083; GFX9:       ; %bb.0:
4084; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4085; GFX9-NEXT:    v_mov_b32_e32 v2, 0x80004
4086; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
4087; GFX9-NEXT:    v_mov_b32_e32 v2, 0x8000c
4088; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
4089; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
4090; GFX9-NEXT:    s_setpc_b64 s[30:31]
4091;
4092; GFX10-LABEL: v_fshl_v2i16_4_8:
4093; GFX10:       ; %bb.0:
4094; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4095; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 0x80004, v0
4096; GFX10-NEXT:    v_pk_lshrrev_b16 v1, 0x8000c, v1
4097; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
4098; GFX10-NEXT:    s_setpc_b64 s[30:31]
4099;
4100; GFX11-LABEL: v_fshl_v2i16_4_8:
4101; GFX11:       ; %bb.0:
4102; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4103; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 0x80004, v0
4104; GFX11-NEXT:    v_pk_lshrrev_b16 v1, 0x8000c, v1
4105; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4106; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
4107; GFX11-NEXT:    s_setpc_b64 s[30:31]
4108  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
4109  ret <2 x i16> %result
4110}
4111
4112define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
4113; GFX6-LABEL: v_fshl_v2i16_ssv:
4114; GFX6:       ; %bb.0:
4115; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
4116; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
4117; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
4118; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
4119; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
4120; GFX6-NEXT:    s_bfe_u32 s0, s2, 0xf0001
4121; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4122; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
4123; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
4124; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
4125; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
4126; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
4127; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
4128; GFX6-NEXT:    s_bfe_u32 s0, s3, 0xf0001
4129; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4130; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
4131; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
4132; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4133; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4134; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4135; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4136; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4137; GFX6-NEXT:    ; return to shader part epilog
4138;
4139; GFX8-LABEL: v_fshl_v2i16_ssv:
4140; GFX8:       ; %bb.0:
4141; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
4142; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4143; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
4144; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
4145; GFX8-NEXT:    s_and_b32 s0, 0xffff, s1
4146; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
4147; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
4148; GFX8-NEXT:    v_lshrrev_b16_e64 v2, v2, s0
4149; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
4150; GFX8-NEXT:    v_mov_b32_e32 v2, 15
4151; GFX8-NEXT:    v_mov_b32_e32 v3, -1
4152; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
4153; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4154; GFX8-NEXT:    v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4155; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
4156; GFX8-NEXT:    s_lshr_b32 s0, s3, 1
4157; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
4158; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
4159; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
4160; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4161; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4162; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4163; GFX8-NEXT:    ; return to shader part epilog
4164;
4165; GFX9-LABEL: v_fshl_v2i16_ssv:
4166; GFX9:       ; %bb.0:
4167; GFX9-NEXT:    v_and_b32_e32 v1, 0xf000f, v0
4168; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
4169; GFX9-NEXT:    s_lshr_b32 s0, s1, 16
4170; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
4171; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
4172; GFX9-NEXT:    s_lshr_b32 s1, s1, 0x10001
4173; GFX9-NEXT:    s_lshr_b32 s0, s0, 1
4174; GFX9-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4175; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s1, s0
4176; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v0, s0
4177; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
4178; GFX9-NEXT:    ; return to shader part epilog
4179;
4180; GFX10-LABEL: v_fshl_v2i16_ssv:
4181; GFX10:       ; %bb.0:
4182; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
4183; GFX10-NEXT:    s_lshr_b32 s2, s1, 16
4184; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
4185; GFX10-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4186; GFX10-NEXT:    s_lshr_b32 s1, s1, 0x10001
4187; GFX10-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4188; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
4189; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4190; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
4191; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
4192; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
4193; GFX10-NEXT:    ; return to shader part epilog
4194;
4195; GFX11-LABEL: v_fshl_v2i16_ssv:
4196; GFX11:       ; %bb.0:
4197; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
4198; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
4199; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
4200; GFX11-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4201; GFX11-NEXT:    s_lshr_b32 s1, s1, 0x10001
4202; GFX11-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4203; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
4204; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4205; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4206; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
4207; GFX11-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
4208; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4209; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
4210; GFX11-NEXT:    ; return to shader part epilog
4211  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4212  %cast = bitcast <2 x i16> %result to float
4213  ret float %cast
4214}
4215
4216define amdgpu_ps float @v_fshl_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
4217; GFX6-LABEL: v_fshl_v2i16_svs:
4218; GFX6:       ; %bb.0:
4219; GFX6-NEXT:    s_and_b32 s4, s2, 15
4220; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
4221; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4222; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
4223; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
4224; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
4225; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s2, v0
4226; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4227; GFX6-NEXT:    s_and_b32 s0, s3, 15
4228; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
4229; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4230; GFX6-NEXT:    s_lshl_b32 s0, s1, s0
4231; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
4232; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4233; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s1, v1
4234; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
4235; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4236; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4237; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4238; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4239; GFX6-NEXT:    ; return to shader part epilog
4240;
4241; GFX8-LABEL: v_fshl_v2i16_svs:
4242; GFX8:       ; %bb.0:
4243; GFX8-NEXT:    s_and_b32 s4, s1, 15
4244; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
4245; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4246; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4247; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v0
4248; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4249; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
4250; GFX8-NEXT:    v_lshrrev_b16_e32 v1, s1, v1
4251; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4252; GFX8-NEXT:    s_and_b32 s0, s3, 15
4253; GFX8-NEXT:    v_mov_b32_e32 v2, 1
4254; GFX8-NEXT:    s_andn2_b32 s1, 15, s3
4255; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4256; GFX8-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4257; GFX8-NEXT:    s_lshl_b32 s0, s2, s0
4258; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
4259; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4260; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4261; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4262; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4263; GFX8-NEXT:    ; return to shader part epilog
4264;
4265; GFX9-LABEL: v_fshl_v2i16_svs:
4266; GFX9:       ; %bb.0:
4267; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4268; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
4269; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
4270; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4271; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
4272; GFX9-NEXT:    s_lshl_b32 s2, s3, s4
4273; GFX9-NEXT:    v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4274; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4275; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s1, v0
4276; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4277; GFX9-NEXT:    ; return to shader part epilog
4278;
4279; GFX10-LABEL: v_fshl_v2i16_svs:
4280; GFX10:       ; %bb.0:
4281; GFX10-NEXT:    v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4282; GFX10-NEXT:    s_and_b32 s2, s1, 0xf000f
4283; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4284; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
4285; GFX10-NEXT:    s_lshr_b32 s4, s2, 16
4286; GFX10-NEXT:    v_pk_lshrrev_b16 v0, s1, v0
4287; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
4288; GFX10-NEXT:    s_lshl_b32 s1, s3, s4
4289; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4290; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4291; GFX10-NEXT:    ; return to shader part epilog
4292;
4293; GFX11-LABEL: v_fshl_v2i16_svs:
4294; GFX11:       ; %bb.0:
4295; GFX11-NEXT:    v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4296; GFX11-NEXT:    s_and_b32 s2, s1, 0xf000f
4297; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4298; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
4299; GFX11-NEXT:    s_lshr_b32 s4, s2, 16
4300; GFX11-NEXT:    v_pk_lshrrev_b16 v0, s1, v0
4301; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
4302; GFX11-NEXT:    s_lshl_b32 s1, s3, s4
4303; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4304; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4305; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4306; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4307; GFX11-NEXT:    ; return to shader part epilog
4308  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4309  %cast = bitcast <2 x i16> %result to float
4310  ret float %cast
4311}
4312
4313define amdgpu_ps float @v_fshl_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4314; GFX6-LABEL: v_fshl_v2i16_vss:
4315; GFX6:       ; %bb.0:
4316; GFX6-NEXT:    s_and_b32 s4, s2, 15
4317; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
4318; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4319; GFX6-NEXT:    s_bfe_u32 s0, s0, 0xf0001
4320; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
4321; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4322; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
4323; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4324; GFX6-NEXT:    s_and_b32 s0, s3, 15
4325; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
4326; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4327; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
4328; GFX6-NEXT:    s_bfe_u32 s0, s1, 0xf0001
4329; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4330; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
4331; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
4332; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4333; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4334; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4335; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4336; GFX6-NEXT:    ; return to shader part epilog
4337;
4338; GFX8-LABEL: v_fshl_v2i16_vss:
4339; GFX8:       ; %bb.0:
4340; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4341; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
4342; GFX8-NEXT:    s_and_b32 s4, s1, 15
4343; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4344; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4345; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
4346; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4347; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s4, v0
4348; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4349; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4350; GFX8-NEXT:    s_and_b32 s0, s3, 15
4351; GFX8-NEXT:    s_andn2_b32 s1, 15, s3
4352; GFX8-NEXT:    v_mov_b32_e32 v2, s0
4353; GFX8-NEXT:    s_lshr_b32 s0, s2, 1
4354; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4355; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4356; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4357; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4358; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4359; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4360; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4361; GFX8-NEXT:    ; return to shader part epilog
4362;
4363; GFX9-LABEL: v_fshl_v2i16_vss:
4364; GFX9:       ; %bb.0:
4365; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4366; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s2, v0
4367; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
4368; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4369; GFX9-NEXT:    s_lshr_b32 s0, s0, 0x10001
4370; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
4371; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4372; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4373; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
4374; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4375; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
4376; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
4377; GFX9-NEXT:    s_lshr_b32 s1, s2, s3
4378; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4379; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4380; GFX9-NEXT:    ; return to shader part epilog
4381;
4382; GFX10-LABEL: v_fshl_v2i16_vss:
4383; GFX10:       ; %bb.0:
4384; GFX10-NEXT:    s_and_b32 s3, s0, 0xffff
4385; GFX10-NEXT:    s_lshr_b32 s0, s0, 16
4386; GFX10-NEXT:    s_lshr_b32 s3, s3, 0x10001
4387; GFX10-NEXT:    s_lshr_b32 s0, s0, 1
4388; GFX10-NEXT:    s_and_b32 s2, s1, 0xf000f
4389; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4390; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s3, s0
4391; GFX10-NEXT:    v_pk_lshlrev_b16 v0, s2, v0
4392; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4393; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
4394; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
4395; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
4396; GFX10-NEXT:    s_lshr_b32 s1, s2, s3
4397; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4398; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4399; GFX10-NEXT:    ; return to shader part epilog
4400;
4401; GFX11-LABEL: v_fshl_v2i16_vss:
4402; GFX11:       ; %bb.0:
4403; GFX11-NEXT:    s_and_b32 s3, s0, 0xffff
4404; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
4405; GFX11-NEXT:    s_lshr_b32 s3, s3, 0x10001
4406; GFX11-NEXT:    s_lshr_b32 s0, s0, 1
4407; GFX11-NEXT:    s_and_b32 s2, s1, 0xf000f
4408; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4409; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s3, s0
4410; GFX11-NEXT:    v_pk_lshlrev_b16 v0, s2, v0
4411; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4412; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
4413; GFX11-NEXT:    s_lshr_b32 s3, s1, 16
4414; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
4415; GFX11-NEXT:    s_lshr_b32 s1, s2, s3
4416; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
4417; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4418; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4419; GFX11-NEXT:    ; return to shader part epilog
4420  %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4421  %cast = bitcast <2 x i16> %result to float
4422  ret float %cast
4423}
4424
4425
4426define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4427; GFX6-LABEL: s_fshl_v3i16:
4428; GFX6:       ; %bb.0:
4429; GFX6-NEXT:    s_and_b32 s9, s6, 15
4430; GFX6-NEXT:    s_andn2_b32 s6, 15, s6
4431; GFX6-NEXT:    s_and_b32 s9, 0xffff, s9
4432; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
4433; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
4434; GFX6-NEXT:    s_lshl_b32 s0, s0, s9
4435; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
4436; GFX6-NEXT:    s_or_b32 s0, s0, s3
4437; GFX6-NEXT:    s_and_b32 s3, s7, 15
4438; GFX6-NEXT:    s_andn2_b32 s6, 15, s7
4439; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
4440; GFX6-NEXT:    s_lshl_b32 s1, s1, s3
4441; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
4442; GFX6-NEXT:    s_and_b32 s4, 0xffff, s6
4443; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4444; GFX6-NEXT:    s_or_b32 s1, s1, s3
4445; GFX6-NEXT:    s_and_b32 s3, s8, 15
4446; GFX6-NEXT:    s_andn2_b32 s4, 15, s8
4447; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
4448; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
4449; GFX6-NEXT:    s_bfe_u32 s3, s5, 0xf0001
4450; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4451; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4452; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
4453; GFX6-NEXT:    s_or_b32 s2, s2, s3
4454; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4455; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
4456; GFX6-NEXT:    s_or_b32 s0, s0, s1
4457; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4458; GFX6-NEXT:    ; return to shader part epilog
4459;
4460; GFX8-LABEL: s_fshl_v3i16:
4461; GFX8:       ; %bb.0:
4462; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
4463; GFX8-NEXT:    s_lshr_b32 s8, s4, 16
4464; GFX8-NEXT:    s_and_b32 s9, s4, 15
4465; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4466; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4467; GFX8-NEXT:    s_and_b32 s9, 0xffff, s9
4468; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
4469; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4470; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
4471; GFX8-NEXT:    s_lshl_b32 s0, s0, s9
4472; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
4473; GFX8-NEXT:    s_or_b32 s0, s0, s2
4474; GFX8-NEXT:    s_and_b32 s2, s8, 15
4475; GFX8-NEXT:    s_andn2_b32 s4, 15, s8
4476; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4477; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
4478; GFX8-NEXT:    s_lshr_b32 s6, s7, 1
4479; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4480; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
4481; GFX8-NEXT:    s_or_b32 s2, s2, s4
4482; GFX8-NEXT:    s_and_b32 s4, s5, 15
4483; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
4484; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4485; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4486; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
4487; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
4488; GFX8-NEXT:    s_and_b32 s4, 0xffff, s5
4489; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4490; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4491; GFX8-NEXT:    s_or_b32 s1, s1, s3
4492; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4493; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4494; GFX8-NEXT:    s_or_b32 s0, s0, s2
4495; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4496; GFX8-NEXT:    ; return to shader part epilog
4497;
4498; GFX9-LABEL: s_fshl_v3i16:
4499; GFX9:       ; %bb.0:
4500; GFX9-NEXT:    s_and_b32 s6, s4, 0xf000f
4501; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4502; GFX9-NEXT:    s_lshr_b32 s8, s6, 16
4503; GFX9-NEXT:    s_lshl_b32 s0, s0, s6
4504; GFX9-NEXT:    s_lshl_b32 s6, s7, s8
4505; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4506; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4507; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4508; GFX9-NEXT:    s_lshr_b32 s2, s2, 0x10001
4509; GFX9-NEXT:    s_lshr_b32 s6, s6, 1
4510; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4511; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4512; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4513; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4514; GFX9-NEXT:    s_lshr_b32 s7, s4, 16
4515; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
4516; GFX9-NEXT:    s_lshr_b32 s4, s6, s7
4517; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4518; GFX9-NEXT:    s_or_b32 s0, s0, s2
4519; GFX9-NEXT:    s_and_b32 s2, s5, 0xf000f
4520; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4521; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4522; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4523; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
4524; GFX9-NEXT:    s_lshl_b32 s2, s5, s6
4525; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4526; GFX9-NEXT:    s_lshr_b32 s2, s3, 16
4527; GFX9-NEXT:    s_and_b32 s3, s3, 0xffff
4528; GFX9-NEXT:    s_lshr_b32 s3, s3, 0x10001
4529; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
4530; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
4531; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
4532; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4533; GFX9-NEXT:    s_lshr_b32 s5, s4, 16
4534; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
4535; GFX9-NEXT:    s_lshr_b32 s3, s3, s5
4536; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
4537; GFX9-NEXT:    s_or_b32 s1, s1, s2
4538; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
4539; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4540; GFX9-NEXT:    s_lshl_b32 s2, s2, 16
4541; GFX9-NEXT:    s_or_b32 s0, s0, s2
4542; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
4543; GFX9-NEXT:    ; return to shader part epilog
4544;
4545; GFX10-LABEL: s_fshl_v3i16:
4546; GFX10:       ; %bb.0:
4547; GFX10-NEXT:    s_and_b32 s9, s2, 0xffff
4548; GFX10-NEXT:    s_lshr_b32 s2, s2, 16
4549; GFX10-NEXT:    s_and_b32 s6, s4, 0xf000f
4550; GFX10-NEXT:    s_lshr_b32 s9, s9, 0x10001
4551; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
4552; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4553; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
4554; GFX10-NEXT:    s_lshr_b32 s8, s6, 16
4555; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
4556; GFX10-NEXT:    s_lshl_b32 s0, s0, s6
4557; GFX10-NEXT:    s_lshl_b32 s6, s7, s8
4558; GFX10-NEXT:    s_lshr_b32 s7, s2, 16
4559; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4560; GFX10-NEXT:    s_lshr_b32 s8, s4, 16
4561; GFX10-NEXT:    s_lshr_b32 s2, s2, s4
4562; GFX10-NEXT:    s_lshr_b32 s4, s7, s8
4563; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4564; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4565; GFX10-NEXT:    s_and_b32 s7, s3, 0xffff
4566; GFX10-NEXT:    s_lshr_b32 s3, s3, 16
4567; GFX10-NEXT:    s_or_b32 s0, s0, s2
4568; GFX10-NEXT:    s_and_b32 s2, s5, 0xf000f
4569; GFX10-NEXT:    s_lshr_b32 s7, s7, 0x10001
4570; GFX10-NEXT:    s_lshr_b32 s3, s3, 1
4571; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4572; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
4573; GFX10-NEXT:    s_lshr_b32 s6, s2, 16
4574; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
4575; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s7, s3
4576; GFX10-NEXT:    s_lshl_b32 s3, s5, s6
4577; GFX10-NEXT:    s_lshr_b32 s5, s2, 16
4578; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4579; GFX10-NEXT:    s_lshr_b32 s6, s4, 16
4580; GFX10-NEXT:    s_lshr_b32 s2, s2, s4
4581; GFX10-NEXT:    s_lshr_b32 s4, s5, s6
4582; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
4583; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4584; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
4585; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
4586; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
4587; GFX10-NEXT:    s_or_b32 s1, s1, s2
4588; GFX10-NEXT:    s_or_b32 s0, s0, s3
4589; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
4590; GFX10-NEXT:    ; return to shader part epilog
4591;
4592; GFX11-LABEL: s_fshl_v3i16:
4593; GFX11:       ; %bb.0:
4594; GFX11-NEXT:    s_and_b32 s9, s2, 0xffff
4595; GFX11-NEXT:    s_lshr_b32 s2, s2, 16
4596; GFX11-NEXT:    s_and_b32 s6, s4, 0xf000f
4597; GFX11-NEXT:    s_lshr_b32 s9, s9, 0x10001
4598; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
4599; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s4
4600; GFX11-NEXT:    s_lshr_b32 s7, s0, 16
4601; GFX11-NEXT:    s_lshr_b32 s8, s6, 16
4602; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
4603; GFX11-NEXT:    s_lshl_b32 s0, s0, s6
4604; GFX11-NEXT:    s_lshl_b32 s6, s7, s8
4605; GFX11-NEXT:    s_lshr_b32 s7, s2, 16
4606; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4607; GFX11-NEXT:    s_lshr_b32 s8, s4, 16
4608; GFX11-NEXT:    s_lshr_b32 s2, s2, s4
4609; GFX11-NEXT:    s_lshr_b32 s4, s7, s8
4610; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4611; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4612; GFX11-NEXT:    s_and_b32 s7, s3, 0xffff
4613; GFX11-NEXT:    s_lshr_b32 s3, s3, 16
4614; GFX11-NEXT:    s_or_b32 s0, s0, s2
4615; GFX11-NEXT:    s_and_b32 s2, s5, 0xf000f
4616; GFX11-NEXT:    s_lshr_b32 s7, s7, 0x10001
4617; GFX11-NEXT:    s_lshr_b32 s3, s3, 1
4618; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s5
4619; GFX11-NEXT:    s_lshr_b32 s5, s1, 16
4620; GFX11-NEXT:    s_lshr_b32 s6, s2, 16
4621; GFX11-NEXT:    s_lshl_b32 s1, s1, s2
4622; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s7, s3
4623; GFX11-NEXT:    s_lshl_b32 s3, s5, s6
4624; GFX11-NEXT:    s_lshr_b32 s5, s2, 16
4625; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4626; GFX11-NEXT:    s_lshr_b32 s6, s4, 16
4627; GFX11-NEXT:    s_lshr_b32 s2, s2, s4
4628; GFX11-NEXT:    s_lshr_b32 s4, s5, s6
4629; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
4630; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4631; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
4632; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
4633; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
4634; GFX11-NEXT:    s_or_b32 s1, s1, s2
4635; GFX11-NEXT:    s_or_b32 s0, s0, s3
4636; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
4637; GFX11-NEXT:    ; return to shader part epilog
4638  %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4639  %cast = bitcast <3 x i16> %result to i48
4640  ret i48 %cast
4641}
4642
4643define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4644; GFX6-LABEL: v_fshl_v3i16:
4645; GFX6:       ; %bb.0:
4646; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647; GFX6-NEXT:    v_and_b32_e32 v9, 15, v6
4648; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
4649; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4650; GFX6-NEXT:    v_and_b32_e32 v9, 0xffff, v9
4651; GFX6-NEXT:    v_bfe_u32 v3, v3, 1, 15
4652; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
4653; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v9, v0
4654; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v6, v3
4655; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
4656; GFX6-NEXT:    v_and_b32_e32 v3, 15, v7
4657; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
4658; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4659; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
4660; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
4661; GFX6-NEXT:    v_bfe_u32 v3, v4, 1, 15
4662; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v6
4663; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
4664; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4665; GFX6-NEXT:    v_and_b32_e32 v3, 15, v8
4666; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v8
4667; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
4668; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
4669; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v3, v2
4670; GFX6-NEXT:    v_bfe_u32 v3, v5, 1, 15
4671; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
4672; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
4673; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
4674; GFX6-NEXT:    s_setpc_b64 s[30:31]
4675;
4676; GFX8-LABEL: v_fshl_v3i16:
4677; GFX8:       ; %bb.0:
4678; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4679; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v4
4680; GFX8-NEXT:    v_and_b32_e32 v6, 15, v4
4681; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
4682; GFX8-NEXT:    v_lshrrev_b16_e32 v8, 1, v2
4683; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v6, v0
4684; GFX8-NEXT:    v_lshrrev_b16_e32 v7, v7, v8
4685; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4686; GFX8-NEXT:    v_mov_b32_e32 v7, 15
4687; GFX8-NEXT:    v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4688; GFX8-NEXT:    v_mov_b32_e32 v8, -1
4689; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4690; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4691; GFX8-NEXT:    v_mov_b32_e32 v7, 1
4692; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4693; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4694; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
4695; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4696; GFX8-NEXT:    v_and_b32_e32 v2, 15, v5
4697; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v5
4698; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4699; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v2, v1
4700; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v3
4701; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
4702; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4703; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
4704; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4705; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4706; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4707; GFX8-NEXT:    s_setpc_b64 s[30:31]
4708;
4709; GFX9-LABEL: v_fshl_v3i16:
4710; GFX9:       ; %bb.0:
4711; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4712; GFX9-NEXT:    v_and_b32_e32 v6, 0xf000f, v4
4713; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4714; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4715; GFX9-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4716; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4717; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4718; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4719; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v5
4720; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
4721; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4722; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v2, v1
4723; GFX9-NEXT:    v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1]
4724; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4725; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
4726; GFX9-NEXT:    s_setpc_b64 s[30:31]
4727;
4728; GFX10-LABEL: v_fshl_v3i16:
4729; GFX10:       ; %bb.0:
4730; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4731; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
4732; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
4733; GFX10-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4734; GFX10-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4735; GFX10-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4736; GFX10-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
4737; GFX10-NEXT:    v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
4738; GFX10-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
4739; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4740; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
4741; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4742; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v7, v3
4743; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4744; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4745; GFX10-NEXT:    s_setpc_b64 s[30:31]
4746;
4747; GFX11-LABEL: v_fshl_v3i16:
4748; GFX11:       ; %bb.0:
4749; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4750; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
4751; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v5
4752; GFX11-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4753; GFX11-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4754; GFX11-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4755; GFX11-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
4756; GFX11-NEXT:    v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
4757; GFX11-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
4758; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4759; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
4760; GFX11-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4761; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
4762; GFX11-NEXT:    v_pk_lshrrev_b16 v3, v7, v3
4763; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
4764; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4765; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
4766; GFX11-NEXT:    s_setpc_b64 s[30:31]
4767  %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4768  %cast.result = bitcast <3 x i16> %result to <3 x half>
4769  ret <3 x half> %cast.result
4770}
4771
4772define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4773; GFX6-LABEL: s_fshl_v4i16:
4774; GFX6:       ; %bb.0:
4775; GFX6-NEXT:    s_and_b32 s12, s8, 15
4776; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
4777; GFX6-NEXT:    s_and_b32 s12, 0xffff, s12
4778; GFX6-NEXT:    s_bfe_u32 s4, s4, 0xf0001
4779; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
4780; GFX6-NEXT:    s_lshl_b32 s0, s0, s12
4781; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
4782; GFX6-NEXT:    s_or_b32 s0, s0, s4
4783; GFX6-NEXT:    s_and_b32 s4, s9, 15
4784; GFX6-NEXT:    s_andn2_b32 s8, 15, s9
4785; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4786; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
4787; GFX6-NEXT:    s_bfe_u32 s4, s5, 0xf0001
4788; GFX6-NEXT:    s_and_b32 s5, 0xffff, s8
4789; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4790; GFX6-NEXT:    s_or_b32 s1, s1, s4
4791; GFX6-NEXT:    s_and_b32 s4, s10, 15
4792; GFX6-NEXT:    s_andn2_b32 s5, 15, s10
4793; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4794; GFX6-NEXT:    s_lshl_b32 s2, s2, s4
4795; GFX6-NEXT:    s_bfe_u32 s4, s6, 0xf0001
4796; GFX6-NEXT:    s_and_b32 s5, 0xffff, s5
4797; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4798; GFX6-NEXT:    s_or_b32 s2, s2, s4
4799; GFX6-NEXT:    s_and_b32 s4, s11, 15
4800; GFX6-NEXT:    s_andn2_b32 s5, 15, s11
4801; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4802; GFX6-NEXT:    s_lshl_b32 s3, s3, s4
4803; GFX6-NEXT:    s_bfe_u32 s4, s7, 0xf0001
4804; GFX6-NEXT:    s_and_b32 s5, 0xffff, s5
4805; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4806; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
4807; GFX6-NEXT:    s_or_b32 s3, s3, s4
4808; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4809; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
4810; GFX6-NEXT:    s_or_b32 s0, s0, s1
4811; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4812; GFX6-NEXT:    s_and_b32 s2, 0xffff, s3
4813; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
4814; GFX6-NEXT:    s_or_b32 s1, s1, s2
4815; GFX6-NEXT:    ; return to shader part epilog
4816;
4817; GFX8-LABEL: s_fshl_v4i16:
4818; GFX8:       ; %bb.0:
4819; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
4820; GFX8-NEXT:    s_lshr_b32 s10, s4, 16
4821; GFX8-NEXT:    s_and_b32 s12, s4, 15
4822; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4823; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4824; GFX8-NEXT:    s_and_b32 s12, 0xffff, s12
4825; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
4826; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4827; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
4828; GFX8-NEXT:    s_lshl_b32 s0, s0, s12
4829; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
4830; GFX8-NEXT:    s_or_b32 s0, s0, s2
4831; GFX8-NEXT:    s_and_b32 s2, s10, 15
4832; GFX8-NEXT:    s_andn2_b32 s4, 15, s10
4833; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4834; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
4835; GFX8-NEXT:    s_lshr_b32 s6, s8, 1
4836; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4837; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
4838; GFX8-NEXT:    s_or_b32 s2, s2, s4
4839; GFX8-NEXT:    s_and_b32 s4, s5, 15
4840; GFX8-NEXT:    s_lshr_b32 s9, s3, 16
4841; GFX8-NEXT:    s_lshr_b32 s11, s5, 16
4842; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
4843; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4844; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4845; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
4846; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
4847; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
4848; GFX8-NEXT:    s_and_b32 s4, 0xffff, s5
4849; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4850; GFX8-NEXT:    s_or_b32 s1, s1, s3
4851; GFX8-NEXT:    s_and_b32 s3, s11, 15
4852; GFX8-NEXT:    s_andn2_b32 s4, 15, s11
4853; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4854; GFX8-NEXT:    s_lshr_b32 s5, s9, 1
4855; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4856; GFX8-NEXT:    s_lshl_b32 s3, s7, s3
4857; GFX8-NEXT:    s_lshr_b32 s4, s5, s4
4858; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4859; GFX8-NEXT:    s_or_b32 s3, s3, s4
4860; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4861; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4862; GFX8-NEXT:    s_or_b32 s0, s0, s2
4863; GFX8-NEXT:    s_and_b32 s2, 0xffff, s3
4864; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4865; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4866; GFX8-NEXT:    s_or_b32 s1, s1, s2
4867; GFX8-NEXT:    ; return to shader part epilog
4868;
4869; GFX9-LABEL: s_fshl_v4i16:
4870; GFX9:       ; %bb.0:
4871; GFX9-NEXT:    s_and_b32 s6, s4, 0xf000f
4872; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4873; GFX9-NEXT:    s_lshr_b32 s8, s6, 16
4874; GFX9-NEXT:    s_lshl_b32 s0, s0, s6
4875; GFX9-NEXT:    s_lshl_b32 s6, s7, s8
4876; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4877; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4878; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4879; GFX9-NEXT:    s_lshr_b32 s2, s2, 0x10001
4880; GFX9-NEXT:    s_lshr_b32 s6, s6, 1
4881; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4882; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4883; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4884; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4885; GFX9-NEXT:    s_lshr_b32 s7, s4, 16
4886; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
4887; GFX9-NEXT:    s_lshr_b32 s4, s6, s7
4888; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4889; GFX9-NEXT:    s_or_b32 s0, s0, s2
4890; GFX9-NEXT:    s_and_b32 s2, s5, 0xf000f
4891; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4892; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4893; GFX9-NEXT:    s_lshr_b32 s6, s2, 16
4894; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
4895; GFX9-NEXT:    s_lshl_b32 s2, s5, s6
4896; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4897; GFX9-NEXT:    s_lshr_b32 s2, s3, 16
4898; GFX9-NEXT:    s_and_b32 s3, s3, 0xffff
4899; GFX9-NEXT:    s_lshr_b32 s3, s3, 0x10001
4900; GFX9-NEXT:    s_lshr_b32 s2, s2, 1
4901; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s3, s2
4902; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
4903; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4904; GFX9-NEXT:    s_lshr_b32 s5, s4, 16
4905; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
4906; GFX9-NEXT:    s_lshr_b32 s3, s3, s5
4907; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
4908; GFX9-NEXT:    s_or_b32 s1, s1, s2
4909; GFX9-NEXT:    ; return to shader part epilog
4910;
4911; GFX10-LABEL: s_fshl_v4i16:
4912; GFX10:       ; %bb.0:
4913; GFX10-NEXT:    s_and_b32 s9, s2, 0xffff
4914; GFX10-NEXT:    s_lshr_b32 s2, s2, 16
4915; GFX10-NEXT:    s_and_b32 s6, s4, 0xf000f
4916; GFX10-NEXT:    s_lshr_b32 s9, s9, 0x10001
4917; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
4918; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4919; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
4920; GFX10-NEXT:    s_lshr_b32 s8, s6, 16
4921; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
4922; GFX10-NEXT:    s_lshl_b32 s0, s0, s6
4923; GFX10-NEXT:    s_lshl_b32 s6, s7, s8
4924; GFX10-NEXT:    s_lshr_b32 s7, s2, 16
4925; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4926; GFX10-NEXT:    s_lshr_b32 s8, s4, 16
4927; GFX10-NEXT:    s_lshr_b32 s2, s2, s4
4928; GFX10-NEXT:    s_lshr_b32 s4, s7, s8
4929; GFX10-NEXT:    s_and_b32 s8, s3, 0xffff
4930; GFX10-NEXT:    s_lshr_b32 s3, s3, 16
4931; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4932; GFX10-NEXT:    s_and_b32 s4, s5, 0xf000f
4933; GFX10-NEXT:    s_lshr_b32 s8, s8, 0x10001
4934; GFX10-NEXT:    s_lshr_b32 s3, s3, 1
4935; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4936; GFX10-NEXT:    s_andn2_b32 s5, 0xf000f, s5
4937; GFX10-NEXT:    s_lshr_b32 s6, s1, 16
4938; GFX10-NEXT:    s_lshr_b32 s7, s4, 16
4939; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s8, s3
4940; GFX10-NEXT:    s_lshl_b32 s1, s1, s4
4941; GFX10-NEXT:    s_lshl_b32 s4, s6, s7
4942; GFX10-NEXT:    s_lshr_b32 s6, s3, 16
4943; GFX10-NEXT:    s_and_b32 s3, s3, 0xffff
4944; GFX10-NEXT:    s_lshr_b32 s7, s5, 16
4945; GFX10-NEXT:    s_lshr_b32 s3, s3, s5
4946; GFX10-NEXT:    s_lshr_b32 s5, s6, s7
4947; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4948; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4949; GFX10-NEXT:    s_or_b32 s0, s0, s2
4950; GFX10-NEXT:    s_or_b32 s1, s1, s3
4951; GFX10-NEXT:    ; return to shader part epilog
4952;
4953; GFX11-LABEL: s_fshl_v4i16:
4954; GFX11:       ; %bb.0:
4955; GFX11-NEXT:    s_and_b32 s9, s2, 0xffff
4956; GFX11-NEXT:    s_lshr_b32 s2, s2, 16
4957; GFX11-NEXT:    s_and_b32 s6, s4, 0xf000f
4958; GFX11-NEXT:    s_lshr_b32 s9, s9, 0x10001
4959; GFX11-NEXT:    s_lshr_b32 s2, s2, 1
4960; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s4
4961; GFX11-NEXT:    s_lshr_b32 s7, s0, 16
4962; GFX11-NEXT:    s_lshr_b32 s8, s6, 16
4963; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s9, s2
4964; GFX11-NEXT:    s_lshl_b32 s0, s0, s6
4965; GFX11-NEXT:    s_lshl_b32 s6, s7, s8
4966; GFX11-NEXT:    s_lshr_b32 s7, s2, 16
4967; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4968; GFX11-NEXT:    s_lshr_b32 s8, s4, 16
4969; GFX11-NEXT:    s_lshr_b32 s2, s2, s4
4970; GFX11-NEXT:    s_lshr_b32 s4, s7, s8
4971; GFX11-NEXT:    s_and_b32 s8, s3, 0xffff
4972; GFX11-NEXT:    s_lshr_b32 s3, s3, 16
4973; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4974; GFX11-NEXT:    s_and_b32 s4, s5, 0xf000f
4975; GFX11-NEXT:    s_lshr_b32 s8, s8, 0x10001
4976; GFX11-NEXT:    s_lshr_b32 s3, s3, 1
4977; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4978; GFX11-NEXT:    s_and_not1_b32 s5, 0xf000f, s5
4979; GFX11-NEXT:    s_lshr_b32 s6, s1, 16
4980; GFX11-NEXT:    s_lshr_b32 s7, s4, 16
4981; GFX11-NEXT:    s_pack_ll_b32_b16 s3, s8, s3
4982; GFX11-NEXT:    s_lshl_b32 s1, s1, s4
4983; GFX11-NEXT:    s_lshl_b32 s4, s6, s7
4984; GFX11-NEXT:    s_lshr_b32 s6, s3, 16
4985; GFX11-NEXT:    s_and_b32 s3, s3, 0xffff
4986; GFX11-NEXT:    s_lshr_b32 s7, s5, 16
4987; GFX11-NEXT:    s_lshr_b32 s3, s3, s5
4988; GFX11-NEXT:    s_lshr_b32 s5, s6, s7
4989; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4990; GFX11-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4991; GFX11-NEXT:    s_or_b32 s0, s0, s2
4992; GFX11-NEXT:    s_or_b32 s1, s1, s3
4993; GFX11-NEXT:    ; return to shader part epilog
4994  %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4995  %cast.result = bitcast <4 x i16> %result to <2 x i32>
4996  ret <2 x i32> %cast.result
4997}
4998
4999define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
5000; GFX6-LABEL: v_fshl_v4i16:
5001; GFX6:       ; %bb.0:
5002; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5003; GFX6-NEXT:    v_and_b32_e32 v12, 15, v8
5004; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
5005; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
5006; GFX6-NEXT:    v_and_b32_e32 v12, 0xffff, v12
5007; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
5008; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
5009; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v12, v0
5010; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
5011; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
5012; GFX6-NEXT:    v_and_b32_e32 v4, 15, v9
5013; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v9
5014; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
5015; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
5016; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
5017; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
5018; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v8
5019; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
5020; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
5021; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
5022; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v10
5023; GFX6-NEXT:    v_and_b32_e32 v5, 15, v5
5024; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
5025; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
5026; GFX6-NEXT:    v_bfe_u32 v4, v6, 1, 15
5027; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
5028; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
5029; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
5030; GFX6-NEXT:    v_and_b32_e32 v4, 15, v11
5031; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v11
5032; GFX6-NEXT:    v_and_b32_e32 v5, 15, v5
5033; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
5034; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
5035; GFX6-NEXT:    v_bfe_u32 v4, v7, 1, 15
5036; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
5037; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
5038; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
5039; GFX6-NEXT:    s_setpc_b64 s[30:31]
5040;
5041; GFX8-LABEL: v_fshl_v4i16:
5042; GFX8:       ; %bb.0:
5043; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5044; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v4
5045; GFX8-NEXT:    v_and_b32_e32 v6, 15, v4
5046; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
5047; GFX8-NEXT:    v_lshrrev_b16_e32 v8, 1, v2
5048; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v6, v0
5049; GFX8-NEXT:    v_lshrrev_b16_e32 v7, v7, v8
5050; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
5051; GFX8-NEXT:    v_mov_b32_e32 v7, 15
5052; GFX8-NEXT:    v_and_b32_sdwa v8, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5053; GFX8-NEXT:    v_mov_b32_e32 v9, -1
5054; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5055; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5056; GFX8-NEXT:    v_mov_b32_e32 v8, 1
5057; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
5058; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5059; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
5060; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v5
5061; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5062; GFX8-NEXT:    v_and_b32_e32 v2, 15, v5
5063; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
5064; GFX8-NEXT:    v_lshrrev_b16_e32 v10, 1, v3
5065; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v1
5066; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v10
5067; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
5068; GFX8-NEXT:    v_and_b32_sdwa v4, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5069; GFX8-NEXT:    v_xor_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5070; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
5071; GFX8-NEXT:    v_lshrrev_b16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5072; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5073; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
5074; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5075; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5076; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
5077; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
5078; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5079; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5080; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5081; GFX8-NEXT:    s_setpc_b64 s[30:31]
5082;
5083; GFX9-LABEL: v_fshl_v4i16:
5084; GFX9:       ; %bb.0:
5085; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5086; GFX9-NEXT:    v_and_b32_e32 v6, 0xf000f, v4
5087; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
5088; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5089; GFX9-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5090; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
5091; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
5092; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5093; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v5
5094; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
5095; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5096; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v2, v1
5097; GFX9-NEXT:    v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1]
5098; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
5099; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
5100; GFX9-NEXT:    s_setpc_b64 s[30:31]
5101;
5102; GFX10-LABEL: v_fshl_v4i16:
5103; GFX10:       ; %bb.0:
5104; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5105; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
5106; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
5107; GFX10-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5108; GFX10-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5109; GFX10-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
5110; GFX10-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
5111; GFX10-NEXT:    v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
5112; GFX10-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
5113; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
5114; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
5115; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
5116; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v7, v3
5117; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5118; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5119; GFX10-NEXT:    s_setpc_b64 s[30:31]
5120;
5121; GFX11-LABEL: v_fshl_v4i16:
5122; GFX11:       ; %bb.0:
5123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5124; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
5125; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v5
5126; GFX11-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5127; GFX11-NEXT:    v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5128; GFX11-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
5129; GFX11-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
5130; GFX11-NEXT:    v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
5131; GFX11-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
5132; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
5133; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
5134; GFX11-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
5135; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5136; GFX11-NEXT:    v_pk_lshrrev_b16 v3, v7, v3
5137; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5138; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5139; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5140; GFX11-NEXT:    s_setpc_b64 s[30:31]
5141  %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5142  %cast.result = bitcast <4 x i16> %result to <4 x half>
5143  ret <4 x half> %cast.result
5144}
5145
5146define amdgpu_ps i64 @s_fshl_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5147; GFX6-LABEL: s_fshl_i64:
5148; GFX6:       ; %bb.0:
5149; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5150; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5151; GFX6-NEXT:    s_not_b32 s4, s4
5152; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5153; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5154; GFX6-NEXT:    ; return to shader part epilog
5155;
5156; GFX8-LABEL: s_fshl_i64:
5157; GFX8:       ; %bb.0:
5158; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5159; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5160; GFX8-NEXT:    s_not_b32 s4, s4
5161; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5162; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5163; GFX8-NEXT:    ; return to shader part epilog
5164;
5165; GFX9-LABEL: s_fshl_i64:
5166; GFX9:       ; %bb.0:
5167; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5168; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5169; GFX9-NEXT:    s_not_b32 s4, s4
5170; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5171; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5172; GFX9-NEXT:    ; return to shader part epilog
5173;
5174; GFX10-LABEL: s_fshl_i64:
5175; GFX10:       ; %bb.0:
5176; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5177; GFX10-NEXT:    s_not_b32 s5, s4
5178; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5179; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s5
5180; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5181; GFX10-NEXT:    ; return to shader part epilog
5182;
5183; GFX11-LABEL: s_fshl_i64:
5184; GFX11:       ; %bb.0:
5185; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5186; GFX11-NEXT:    s_not_b32 s5, s4
5187; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
5188; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s5
5189; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5190; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5191; GFX11-NEXT:    ; return to shader part epilog
5192  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5193  ret i64 %result
5194}
5195
5196define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5197; GCN-LABEL: s_fshl_i64_5:
5198; GCN:       ; %bb.0:
5199; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 5
5200; GCN-NEXT:    s_lshr_b32 s2, s3, 27
5201; GCN-NEXT:    s_mov_b32 s3, 0
5202; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5203; GCN-NEXT:    ; return to shader part epilog
5204;
5205; GFX11-LABEL: s_fshl_i64_5:
5206; GFX11:       ; %bb.0:
5207; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 5
5208; GFX11-NEXT:    s_lshr_b32 s2, s3, 27
5209; GFX11-NEXT:    s_mov_b32 s3, 0
5210; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5211; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5212; GFX11-NEXT:    ; return to shader part epilog
5213  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
5214  ret i64 %result
5215}
5216
5217define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5218; GCN-LABEL: s_fshl_i64_32:
5219; GCN:       ; %bb.0:
5220; GCN-NEXT:    s_mov_b32 s1, s0
5221; GCN-NEXT:    s_mov_b32 s0, 0
5222; GCN-NEXT:    s_mov_b32 s2, s3
5223; GCN-NEXT:    s_mov_b32 s3, s0
5224; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5225; GCN-NEXT:    ; return to shader part epilog
5226;
5227; GFX11-LABEL: s_fshl_i64_32:
5228; GFX11:       ; %bb.0:
5229; GFX11-NEXT:    s_mov_b32 s1, s0
5230; GFX11-NEXT:    s_mov_b32 s0, 0
5231; GFX11-NEXT:    s_mov_b32 s2, s3
5232; GFX11-NEXT:    s_mov_b32 s3, s0
5233; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5234; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5235; GFX11-NEXT:    ; return to shader part epilog
5236  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
5237  ret i64 %result
5238}
5239
5240define amdgpu_ps i64 @s_fshl_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5241; GCN-LABEL: s_fshl_i64_48:
5242; GCN:       ; %bb.0:
5243; GCN-NEXT:    s_lshl_b32 s1, s0, 16
5244; GCN-NEXT:    s_mov_b32 s0, 0
5245; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], 16
5246; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5247; GCN-NEXT:    ; return to shader part epilog
5248;
5249; GFX11-LABEL: s_fshl_i64_48:
5250; GFX11:       ; %bb.0:
5251; GFX11-NEXT:    s_lshl_b32 s1, s0, 16
5252; GFX11-NEXT:    s_mov_b32 s0, 0
5253; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 16
5254; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5255; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5256; GFX11-NEXT:    ; return to shader part epilog
5257  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
5258  ret i64 %result
5259}
5260
5261define i64 @v_fshl_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5262; GFX6-LABEL: v_fshl_i64:
5263; GFX6:       ; %bb.0:
5264; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5265; GFX6-NEXT:    v_and_b32_e32 v5, 63, v4
5266; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], 1
5267; GFX6-NEXT:    v_not_b32_e32 v4, v4
5268; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
5269; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v5
5270; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v4
5271; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5272; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
5273; GFX6-NEXT:    s_setpc_b64 s[30:31]
5274;
5275; GFX8-LABEL: v_fshl_i64:
5276; GFX8:       ; %bb.0:
5277; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5278; GFX8-NEXT:    v_and_b32_e32 v5, 63, v4
5279; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
5280; GFX8-NEXT:    v_not_b32_e32 v4, v4
5281; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
5282; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5283; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5284; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5285; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5286; GFX8-NEXT:    s_setpc_b64 s[30:31]
5287;
5288; GFX9-LABEL: v_fshl_i64:
5289; GFX9:       ; %bb.0:
5290; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5291; GFX9-NEXT:    v_and_b32_e32 v5, 63, v4
5292; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
5293; GFX9-NEXT:    v_not_b32_e32 v4, v4
5294; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
5295; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5296; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5297; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5298; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
5299; GFX9-NEXT:    s_setpc_b64 s[30:31]
5300;
5301; GFX10-LABEL: v_fshl_i64:
5302; GFX10:       ; %bb.0:
5303; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5304; GFX10-NEXT:    v_not_b32_e32 v5, v4
5305; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
5306; GFX10-NEXT:    v_and_b32_e32 v4, 63, v4
5307; GFX10-NEXT:    v_and_b32_e32 v5, 63, v5
5308; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
5309; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
5310; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5311; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5312; GFX10-NEXT:    s_setpc_b64 s[30:31]
5313;
5314; GFX11-LABEL: v_fshl_i64:
5315; GFX11:       ; %bb.0:
5316; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5317; GFX11-NEXT:    v_not_b32_e32 v5, v4
5318; GFX11-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
5319; GFX11-NEXT:    v_and_b32_e32 v4, 63, v4
5320; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5321; GFX11-NEXT:    v_and_b32_e32 v5, 63, v5
5322; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
5323; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5324; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
5325; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5326; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5327; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5328; GFX11-NEXT:    s_setpc_b64 s[30:31]
5329  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5330  ret i64 %result
5331}
5332
5333define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) {
5334; GFX6-LABEL: v_fshl_i64_5:
5335; GFX6:       ; %bb.0:
5336; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5337; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 5
5338; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 27, v3
5339; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5340; GFX6-NEXT:    s_setpc_b64 s[30:31]
5341;
5342; GFX8-LABEL: v_fshl_i64_5:
5343; GFX8:       ; %bb.0:
5344; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5345; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 5, v[0:1]
5346; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 27, v3
5347; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5348; GFX8-NEXT:    s_setpc_b64 s[30:31]
5349;
5350; GFX9-LABEL: v_fshl_i64_5:
5351; GFX9:       ; %bb.0:
5352; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5353; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 5, v[0:1]
5354; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 27, v3
5355; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5356; GFX9-NEXT:    s_setpc_b64 s[30:31]
5357;
5358; GFX10-LABEL: v_fshl_i64_5:
5359; GFX10:       ; %bb.0:
5360; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5361; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 5, v[0:1]
5362; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 27, v3
5363; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5364; GFX10-NEXT:    s_setpc_b64 s[30:31]
5365;
5366; GFX11-LABEL: v_fshl_i64_5:
5367; GFX11:       ; %bb.0:
5368; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5369; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 5, v[0:1]
5370; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 27, v3
5371; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5372; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5373; GFX11-NEXT:    s_setpc_b64 s[30:31]
5374  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
5375  ret i64 %result
5376}
5377
5378define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) {
5379; GCN-LABEL: v_fshl_i64_32:
5380; GCN:       ; %bb.0:
5381; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5382; GCN-NEXT:    v_mov_b32_e32 v1, v0
5383; GCN-NEXT:    v_mov_b32_e32 v0, v3
5384; GCN-NEXT:    s_setpc_b64 s[30:31]
5385;
5386; GFX11-LABEL: v_fshl_i64_32:
5387; GFX11:       ; %bb.0:
5388; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5389; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5390; GFX11-NEXT:    s_setpc_b64 s[30:31]
5391  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
5392  ret i64 %result
5393}
5394
5395define i64 @v_fshl_i64_48(i64 %lhs, i64 %rhs) {
5396; GFX6-LABEL: v_fshl_i64_48:
5397; GFX6:       ; %bb.0:
5398; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5399; GFX6-NEXT:    v_mov_b32_e32 v4, v0
5400; GFX6-NEXT:    v_lshr_b64 v[0:1], v[2:3], 16
5401; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
5402; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
5403; GFX6-NEXT:    s_setpc_b64 s[30:31]
5404;
5405; GFX8-LABEL: v_fshl_i64_48:
5406; GFX8:       ; %bb.0:
5407; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5408; GFX8-NEXT:    v_mov_b32_e32 v4, v0
5409; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 16, v[2:3]
5410; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
5411; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
5412; GFX8-NEXT:    s_setpc_b64 s[30:31]
5413;
5414; GFX9-LABEL: v_fshl_i64_48:
5415; GFX9:       ; %bb.0:
5416; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5417; GFX9-NEXT:    v_mov_b32_e32 v4, v0
5418; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 16, v[2:3]
5419; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
5420; GFX9-NEXT:    s_setpc_b64 s[30:31]
5421;
5422; GFX10-LABEL: v_fshl_i64_48:
5423; GFX10:       ; %bb.0:
5424; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5425; GFX10-NEXT:    v_mov_b32_e32 v4, v0
5426; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 16, v[2:3]
5427; GFX10-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
5428; GFX10-NEXT:    s_setpc_b64 s[30:31]
5429;
5430; GFX11-LABEL: v_fshl_i64_48:
5431; GFX11:       ; %bb.0:
5432; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5433; GFX11-NEXT:    v_mov_b32_e32 v4, v0
5434; GFX11-NEXT:    v_lshrrev_b64 v[0:1], 16, v[2:3]
5435; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5436; GFX11-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
5437; GFX11-NEXT:    s_setpc_b64 s[30:31]
5438  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
5439  ret i64 %result
5440}
5441
5442define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5443; GFX6-LABEL: v_fshl_i64_ssv:
5444; GFX6:       ; %bb.0:
5445; GFX6-NEXT:    v_and_b32_e32 v1, 63, v0
5446; GFX6-NEXT:    v_not_b32_e32 v0, v0
5447; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v1
5448; GFX6-NEXT:    s_lshr_b64 s[0:1], s[2:3], 1
5449; GFX6-NEXT:    v_and_b32_e32 v0, 63, v0
5450; GFX6-NEXT:    v_lshr_b64 v[3:4], s[0:1], v0
5451; GFX6-NEXT:    v_or_b32_e32 v0, v1, v3
5452; GFX6-NEXT:    v_or_b32_e32 v1, v2, v4
5453; GFX6-NEXT:    ; return to shader part epilog
5454;
5455; GFX8-LABEL: v_fshl_i64_ssv:
5456; GFX8:       ; %bb.0:
5457; GFX8-NEXT:    v_and_b32_e32 v1, 63, v0
5458; GFX8-NEXT:    v_not_b32_e32 v0, v0
5459; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v1, s[0:1]
5460; GFX8-NEXT:    s_lshr_b64 s[0:1], s[2:3], 1
5461; GFX8-NEXT:    v_and_b32_e32 v0, 63, v0
5462; GFX8-NEXT:    v_lshrrev_b64 v[3:4], v0, s[0:1]
5463; GFX8-NEXT:    v_or_b32_e32 v0, v1, v3
5464; GFX8-NEXT:    v_or_b32_e32 v1, v2, v4
5465; GFX8-NEXT:    ; return to shader part epilog
5466;
5467; GFX9-LABEL: v_fshl_i64_ssv:
5468; GFX9:       ; %bb.0:
5469; GFX9-NEXT:    v_and_b32_e32 v1, 63, v0
5470; GFX9-NEXT:    v_not_b32_e32 v0, v0
5471; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v1, s[0:1]
5472; GFX9-NEXT:    s_lshr_b64 s[0:1], s[2:3], 1
5473; GFX9-NEXT:    v_and_b32_e32 v0, 63, v0
5474; GFX9-NEXT:    v_lshrrev_b64 v[3:4], v0, s[0:1]
5475; GFX9-NEXT:    v_or_b32_e32 v0, v1, v3
5476; GFX9-NEXT:    v_or_b32_e32 v1, v2, v4
5477; GFX9-NEXT:    ; return to shader part epilog
5478;
5479; GFX10-LABEL: v_fshl_i64_ssv:
5480; GFX10:       ; %bb.0:
5481; GFX10-NEXT:    v_not_b32_e32 v1, v0
5482; GFX10-NEXT:    v_and_b32_e32 v0, 63, v0
5483; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5484; GFX10-NEXT:    v_and_b32_e32 v2, 63, v1
5485; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
5486; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
5487; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5488; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5489; GFX10-NEXT:    ; return to shader part epilog
5490;
5491; GFX11-LABEL: v_fshl_i64_ssv:
5492; GFX11:       ; %bb.0:
5493; GFX11-NEXT:    v_not_b32_e32 v1, v0
5494; GFX11-NEXT:    v_and_b32_e32 v0, 63, v0
5495; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
5496; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5497; GFX11-NEXT:    v_and_b32_e32 v2, 63, v1
5498; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
5499; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5500; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
5501; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5502; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5503; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5504; GFX11-NEXT:    ; return to shader part epilog
5505  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5506  %cast = bitcast i64 %result to <2 x float>
5507  ret <2 x float> %cast
5508}
5509
5510define amdgpu_ps <2 x float> @v_fshl_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5511; GFX6-LABEL: v_fshl_i64_svs:
5512; GFX6:       ; %bb.0:
5513; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 1
5514; GFX6-NEXT:    s_andn2_b32 s3, 63, s2
5515; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s3
5516; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5517; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5518; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5519; GFX6-NEXT:    ; return to shader part epilog
5520;
5521; GFX8-LABEL: v_fshl_i64_svs:
5522; GFX8:       ; %bb.0:
5523; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
5524; GFX8-NEXT:    s_andn2_b32 s3, 63, s2
5525; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5526; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5527; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5528; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5529; GFX8-NEXT:    ; return to shader part epilog
5530;
5531; GFX9-LABEL: v_fshl_i64_svs:
5532; GFX9:       ; %bb.0:
5533; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
5534; GFX9-NEXT:    s_andn2_b32 s3, 63, s2
5535; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5536; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5537; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5538; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5539; GFX9-NEXT:    ; return to shader part epilog
5540;
5541; GFX10-LABEL: v_fshl_i64_svs:
5542; GFX10:       ; %bb.0:
5543; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
5544; GFX10-NEXT:    s_andn2_b32 s3, 63, s2
5545; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5546; GFX10-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5547; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5548; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5549; GFX10-NEXT:    ; return to shader part epilog
5550;
5551; GFX11-LABEL: v_fshl_i64_svs:
5552; GFX11:       ; %bb.0:
5553; GFX11-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
5554; GFX11-NEXT:    s_and_not1_b32 s3, 63, s2
5555; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5556; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5557; GFX11-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5558; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5559; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5560; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5561; GFX11-NEXT:    ; return to shader part epilog
5562  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5563  %cast = bitcast i64 %result to <2 x float>
5564  ret <2 x float> %cast
5565}
5566
5567define amdgpu_ps <2 x float> @v_fshl_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5568; GFX6-LABEL: v_fshl_i64_vss:
5569; GFX6:       ; %bb.0:
5570; GFX6-NEXT:    s_and_b32 s3, s2, 63
5571; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s3
5572; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5573; GFX6-NEXT:    s_not_b32 s2, s2
5574; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5575; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5576; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5577; GFX6-NEXT:    ; return to shader part epilog
5578;
5579; GFX8-LABEL: v_fshl_i64_vss:
5580; GFX8:       ; %bb.0:
5581; GFX8-NEXT:    s_and_b32 s3, s2, 63
5582; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5583; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5584; GFX8-NEXT:    s_not_b32 s2, s2
5585; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5586; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5587; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5588; GFX8-NEXT:    ; return to shader part epilog
5589;
5590; GFX9-LABEL: v_fshl_i64_vss:
5591; GFX9:       ; %bb.0:
5592; GFX9-NEXT:    s_and_b32 s3, s2, 63
5593; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5594; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5595; GFX9-NEXT:    s_not_b32 s2, s2
5596; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5597; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5598; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5599; GFX9-NEXT:    ; return to shader part epilog
5600;
5601; GFX10-LABEL: v_fshl_i64_vss:
5602; GFX10:       ; %bb.0:
5603; GFX10-NEXT:    s_and_b32 s3, s2, 63
5604; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5605; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5606; GFX10-NEXT:    s_not_b32 s2, s2
5607; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5608; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5609; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5610; GFX10-NEXT:    ; return to shader part epilog
5611;
5612; GFX11-LABEL: v_fshl_i64_vss:
5613; GFX11:       ; %bb.0:
5614; GFX11-NEXT:    s_and_b32 s3, s2, 63
5615; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5616; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5617; GFX11-NEXT:    s_not_b32 s2, s2
5618; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5619; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5620; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5621; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5622; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5623; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5624; GFX11-NEXT:    ; return to shader part epilog
5625  %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5626  %cast = bitcast i64 %result to <2 x float>
5627  ret <2 x float> %cast
5628}
5629
5630define amdgpu_ps <2 x i64> @s_fshl_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5631; GFX6-LABEL: s_fshl_v2i64:
5632; GFX6:       ; %bb.0:
5633; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5634; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
5635; GFX6-NEXT:    s_not_b32 s8, s8
5636; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5637; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5638; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5639; GFX6-NEXT:    s_not_b32 s6, s10
5640; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s10
5641; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
5642; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5643; GFX6-NEXT:    ; return to shader part epilog
5644;
5645; GFX8-LABEL: s_fshl_v2i64:
5646; GFX8:       ; %bb.0:
5647; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5648; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
5649; GFX8-NEXT:    s_not_b32 s8, s8
5650; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5651; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5652; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5653; GFX8-NEXT:    s_not_b32 s6, s10
5654; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s10
5655; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
5656; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5657; GFX8-NEXT:    ; return to shader part epilog
5658;
5659; GFX9-LABEL: s_fshl_v2i64:
5660; GFX9:       ; %bb.0:
5661; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5662; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
5663; GFX9-NEXT:    s_not_b32 s8, s8
5664; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5665; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5666; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5667; GFX9-NEXT:    s_not_b32 s6, s10
5668; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s10
5669; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
5670; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5671; GFX9-NEXT:    ; return to shader part epilog
5672;
5673; GFX10-LABEL: s_fshl_v2i64:
5674; GFX10:       ; %bb.0:
5675; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
5676; GFX10-NEXT:    s_not_b32 s9, s8
5677; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5678; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
5679; GFX10-NEXT:    s_not_b32 s8, s10
5680; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s9
5681; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], s10
5682; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s8
5683; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5684; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5685; GFX10-NEXT:    ; return to shader part epilog
5686;
5687; GFX11-LABEL: s_fshl_v2i64:
5688; GFX11:       ; %bb.0:
5689; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
5690; GFX11-NEXT:    s_not_b32 s9, s8
5691; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
5692; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
5693; GFX11-NEXT:    s_not_b32 s8, s10
5694; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], s9
5695; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], s10
5696; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], s8
5697; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5698; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5699; GFX11-NEXT:    ; return to shader part epilog
5700  %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5701  ret <2 x i64> %result
5702}
5703
5704define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5705; GFX6-LABEL: v_fshl_v2i64:
5706; GFX6:       ; %bb.0:
5707; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5708; GFX6-NEXT:    v_and_b32_e32 v9, 63, v8
5709; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], 1
5710; GFX6-NEXT:    v_not_b32_e32 v8, v8
5711; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
5712; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v9
5713; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v8
5714; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], 1
5715; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
5716; GFX6-NEXT:    v_and_b32_e32 v4, 63, v10
5717; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v4
5718; GFX6-NEXT:    v_not_b32_e32 v4, v10
5719; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
5720; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v4
5721; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
5722; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
5723; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
5724; GFX6-NEXT:    s_setpc_b64 s[30:31]
5725;
5726; GFX8-LABEL: v_fshl_v2i64:
5727; GFX8:       ; %bb.0:
5728; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5729; GFX8-NEXT:    v_and_b32_e32 v9, 63, v8
5730; GFX8-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
5731; GFX8-NEXT:    v_not_b32_e32 v8, v8
5732; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
5733; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5734; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5735; GFX8-NEXT:    v_lshrrev_b64 v[6:7], 1, v[6:7]
5736; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
5737; GFX8-NEXT:    v_and_b32_e32 v4, 63, v10
5738; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v4, v[2:3]
5739; GFX8-NEXT:    v_not_b32_e32 v4, v10
5740; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
5741; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5742; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
5743; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
5744; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
5745; GFX8-NEXT:    s_setpc_b64 s[30:31]
5746;
5747; GFX9-LABEL: v_fshl_v2i64:
5748; GFX9:       ; %bb.0:
5749; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5750; GFX9-NEXT:    v_and_b32_e32 v9, 63, v8
5751; GFX9-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
5752; GFX9-NEXT:    v_not_b32_e32 v8, v8
5753; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
5754; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5755; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5756; GFX9-NEXT:    v_lshrrev_b64 v[6:7], 1, v[6:7]
5757; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
5758; GFX9-NEXT:    v_and_b32_e32 v4, 63, v10
5759; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v4, v[2:3]
5760; GFX9-NEXT:    v_not_b32_e32 v4, v10
5761; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
5762; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5763; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
5764; GFX9-NEXT:    v_or_b32_e32 v2, v2, v6
5765; GFX9-NEXT:    v_or_b32_e32 v3, v3, v7
5766; GFX9-NEXT:    s_setpc_b64 s[30:31]
5767;
5768; GFX10-LABEL: v_fshl_v2i64:
5769; GFX10:       ; %bb.0:
5770; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5771; GFX10-NEXT:    v_not_b32_e32 v9, v8
5772; GFX10-NEXT:    v_not_b32_e32 v11, v10
5773; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
5774; GFX10-NEXT:    v_lshrrev_b64 v[6:7], 1, v[6:7]
5775; GFX10-NEXT:    v_and_b32_e32 v8, 63, v8
5776; GFX10-NEXT:    v_and_b32_e32 v9, 63, v9
5777; GFX10-NEXT:    v_and_b32_e32 v10, 63, v10
5778; GFX10-NEXT:    v_and_b32_e32 v11, 63, v11
5779; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
5780; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
5781; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v10, v[2:3]
5782; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v11, v[6:7]
5783; GFX10-NEXT:    v_or_b32_e32 v0, v0, v4
5784; GFX10-NEXT:    v_or_b32_e32 v1, v1, v5
5785; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
5786; GFX10-NEXT:    v_or_b32_e32 v3, v3, v7
5787; GFX10-NEXT:    s_setpc_b64 s[30:31]
5788;
5789; GFX11-LABEL: v_fshl_v2i64:
5790; GFX11:       ; %bb.0:
5791; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5792; GFX11-NEXT:    v_not_b32_e32 v9, v8
5793; GFX11-NEXT:    v_not_b32_e32 v11, v10
5794; GFX11-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
5795; GFX11-NEXT:    v_lshrrev_b64 v[6:7], 1, v[6:7]
5796; GFX11-NEXT:    v_and_b32_e32 v8, 63, v8
5797; GFX11-NEXT:    v_and_b32_e32 v9, 63, v9
5798; GFX11-NEXT:    v_and_b32_e32 v10, 63, v10
5799; GFX11-NEXT:    v_and_b32_e32 v11, 63, v11
5800; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5801; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
5802; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
5803; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5804; GFX11-NEXT:    v_lshlrev_b64 v[2:3], v10, v[2:3]
5805; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v11, v[6:7]
5806; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5807; GFX11-NEXT:    v_or_b32_e32 v0, v0, v4
5808; GFX11-NEXT:    v_or_b32_e32 v1, v1, v5
5809; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5810; GFX11-NEXT:    v_or_b32_e32 v2, v2, v6
5811; GFX11-NEXT:    v_or_b32_e32 v3, v3, v7
5812; GFX11-NEXT:    s_setpc_b64 s[30:31]
5813  %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5814  ret <2 x i64> %result
5815}
5816
5817define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5818; GFX6-LABEL: s_fshl_i128:
5819; GFX6:       ; %bb.0:
5820; GFX6-NEXT:    s_and_b32 s9, s8, 0x7f
5821; GFX6-NEXT:    s_sub_i32 s11, s9, 64
5822; GFX6-NEXT:    s_sub_i32 s14, 64, s9
5823; GFX6-NEXT:    s_cmp_lt_u32 s9, 64
5824; GFX6-NEXT:    s_cselect_b32 s18, 1, 0
5825; GFX6-NEXT:    s_cmp_eq_u32 s9, 0
5826; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
5827; GFX6-NEXT:    s_lshr_b64 s[14:15], s[0:1], s14
5828; GFX6-NEXT:    s_lshl_b64 s[16:17], s[2:3], s8
5829; GFX6-NEXT:    s_lshl_b64 s[12:13], s[0:1], s8
5830; GFX6-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
5831; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s11
5832; GFX6-NEXT:    s_cmp_lg_u32 s18, 0
5833; GFX6-NEXT:    s_cselect_b64 s[12:13], s[12:13], 0
5834; GFX6-NEXT:    s_cselect_b64 s[0:1], s[14:15], s[0:1]
5835; GFX6-NEXT:    s_cmp_lg_u32 s9, 0
5836; GFX6-NEXT:    s_mov_b32 s10, 0
5837; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5838; GFX6-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
5839; GFX6-NEXT:    s_lshl_b32 s11, s6, 31
5840; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5841; GFX6-NEXT:    s_andn2_b32 s6, 0x7f, s8
5842; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5843; GFX6-NEXT:    s_not_b32 s9, s8
5844; GFX6-NEXT:    s_sub_i32 s14, s6, 64
5845; GFX6-NEXT:    s_sub_i32 s10, 64, s6
5846; GFX6-NEXT:    s_cmp_lt_u32 s6, 64
5847; GFX6-NEXT:    s_cselect_b32 s15, 1, 0
5848; GFX6-NEXT:    s_cmp_eq_u32 s6, 0
5849; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
5850; GFX6-NEXT:    s_lshr_b64 s[6:7], s[4:5], s9
5851; GFX6-NEXT:    s_lshr_b64 s[8:9], s[0:1], s9
5852; GFX6-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5853; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
5854; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s14
5855; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5856; GFX6-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
5857; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
5858; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5859; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5860; GFX6-NEXT:    s_cselect_b64 s[4:5], s[6:7], 0
5861; GFX6-NEXT:    s_or_b64 s[0:1], s[12:13], s[0:1]
5862; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5863; GFX6-NEXT:    ; return to shader part epilog
5864;
5865; GFX8-LABEL: s_fshl_i128:
5866; GFX8:       ; %bb.0:
5867; GFX8-NEXT:    s_and_b32 s9, s8, 0x7f
5868; GFX8-NEXT:    s_sub_i32 s11, s9, 64
5869; GFX8-NEXT:    s_sub_i32 s14, 64, s9
5870; GFX8-NEXT:    s_cmp_lt_u32 s9, 64
5871; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
5872; GFX8-NEXT:    s_cmp_eq_u32 s9, 0
5873; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
5874; GFX8-NEXT:    s_lshr_b64 s[14:15], s[0:1], s14
5875; GFX8-NEXT:    s_lshl_b64 s[16:17], s[2:3], s8
5876; GFX8-NEXT:    s_lshl_b64 s[12:13], s[0:1], s8
5877; GFX8-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
5878; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s11
5879; GFX8-NEXT:    s_cmp_lg_u32 s18, 0
5880; GFX8-NEXT:    s_cselect_b64 s[12:13], s[12:13], 0
5881; GFX8-NEXT:    s_cselect_b64 s[0:1], s[14:15], s[0:1]
5882; GFX8-NEXT:    s_cmp_lg_u32 s9, 0
5883; GFX8-NEXT:    s_mov_b32 s10, 0
5884; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5885; GFX8-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
5886; GFX8-NEXT:    s_lshl_b32 s11, s6, 31
5887; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5888; GFX8-NEXT:    s_andn2_b32 s6, 0x7f, s8
5889; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5890; GFX8-NEXT:    s_not_b32 s9, s8
5891; GFX8-NEXT:    s_sub_i32 s14, s6, 64
5892; GFX8-NEXT:    s_sub_i32 s10, 64, s6
5893; GFX8-NEXT:    s_cmp_lt_u32 s6, 64
5894; GFX8-NEXT:    s_cselect_b32 s15, 1, 0
5895; GFX8-NEXT:    s_cmp_eq_u32 s6, 0
5896; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
5897; GFX8-NEXT:    s_lshr_b64 s[6:7], s[4:5], s9
5898; GFX8-NEXT:    s_lshr_b64 s[8:9], s[0:1], s9
5899; GFX8-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5900; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
5901; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s14
5902; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5903; GFX8-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
5904; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
5905; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5906; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5907; GFX8-NEXT:    s_cselect_b64 s[4:5], s[6:7], 0
5908; GFX8-NEXT:    s_or_b64 s[0:1], s[12:13], s[0:1]
5909; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5910; GFX8-NEXT:    ; return to shader part epilog
5911;
5912; GFX9-LABEL: s_fshl_i128:
5913; GFX9:       ; %bb.0:
5914; GFX9-NEXT:    s_and_b32 s9, s8, 0x7f
5915; GFX9-NEXT:    s_sub_i32 s11, s9, 64
5916; GFX9-NEXT:    s_sub_i32 s14, 64, s9
5917; GFX9-NEXT:    s_cmp_lt_u32 s9, 64
5918; GFX9-NEXT:    s_cselect_b32 s18, 1, 0
5919; GFX9-NEXT:    s_cmp_eq_u32 s9, 0
5920; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
5921; GFX9-NEXT:    s_lshr_b64 s[14:15], s[0:1], s14
5922; GFX9-NEXT:    s_lshl_b64 s[16:17], s[2:3], s8
5923; GFX9-NEXT:    s_lshl_b64 s[12:13], s[0:1], s8
5924; GFX9-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
5925; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s11
5926; GFX9-NEXT:    s_cmp_lg_u32 s18, 0
5927; GFX9-NEXT:    s_cselect_b64 s[12:13], s[12:13], 0
5928; GFX9-NEXT:    s_cselect_b64 s[0:1], s[14:15], s[0:1]
5929; GFX9-NEXT:    s_cmp_lg_u32 s9, 0
5930; GFX9-NEXT:    s_mov_b32 s10, 0
5931; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5932; GFX9-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
5933; GFX9-NEXT:    s_lshl_b32 s11, s6, 31
5934; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5935; GFX9-NEXT:    s_andn2_b32 s6, 0x7f, s8
5936; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5937; GFX9-NEXT:    s_not_b32 s9, s8
5938; GFX9-NEXT:    s_sub_i32 s14, s6, 64
5939; GFX9-NEXT:    s_sub_i32 s10, 64, s6
5940; GFX9-NEXT:    s_cmp_lt_u32 s6, 64
5941; GFX9-NEXT:    s_cselect_b32 s15, 1, 0
5942; GFX9-NEXT:    s_cmp_eq_u32 s6, 0
5943; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
5944; GFX9-NEXT:    s_lshr_b64 s[6:7], s[4:5], s9
5945; GFX9-NEXT:    s_lshr_b64 s[8:9], s[0:1], s9
5946; GFX9-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5947; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
5948; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s14
5949; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5950; GFX9-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
5951; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
5952; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5953; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5954; GFX9-NEXT:    s_cselect_b64 s[4:5], s[6:7], 0
5955; GFX9-NEXT:    s_or_b64 s[0:1], s[12:13], s[0:1]
5956; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5957; GFX9-NEXT:    ; return to shader part epilog
5958;
5959; GFX10-LABEL: s_fshl_i128:
5960; GFX10:       ; %bb.0:
5961; GFX10-NEXT:    s_and_b32 s9, s8, 0x7f
5962; GFX10-NEXT:    s_mov_b32 s10, 0
5963; GFX10-NEXT:    s_sub_i32 s11, s9, 64
5964; GFX10-NEXT:    s_sub_i32 s12, 64, s9
5965; GFX10-NEXT:    s_cmp_lt_u32 s9, 64
5966; GFX10-NEXT:    s_cselect_b32 s18, 1, 0
5967; GFX10-NEXT:    s_cmp_eq_u32 s9, 0
5968; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
5969; GFX10-NEXT:    s_lshr_b64 s[12:13], s[0:1], s12
5970; GFX10-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
5971; GFX10-NEXT:    s_lshl_b64 s[16:17], s[0:1], s8
5972; GFX10-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
5973; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s11
5974; GFX10-NEXT:    s_cmp_lg_u32 s18, 0
5975; GFX10-NEXT:    s_cselect_b64 s[14:15], s[16:17], 0
5976; GFX10-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[0:1]
5977; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
5978; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5979; GFX10-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
5980; GFX10-NEXT:    s_lshl_b32 s11, s6, 31
5981; GFX10-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
5982; GFX10-NEXT:    s_andn2_b32 s6, 0x7f, s8
5983; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
5984; GFX10-NEXT:    s_not_b32 s10, s8
5985; GFX10-NEXT:    s_sub_i32 s12, s6, 64
5986; GFX10-NEXT:    s_sub_i32 s8, 64, s6
5987; GFX10-NEXT:    s_cmp_lt_u32 s6, 64
5988; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
5989; GFX10-NEXT:    s_cmp_eq_u32 s6, 0
5990; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
5991; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s10
5992; GFX10-NEXT:    s_lshl_b64 s[8:9], s[4:5], s8
5993; GFX10-NEXT:    s_lshr_b64 s[10:11], s[4:5], s10
5994; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5995; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
5996; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
5997; GFX10-NEXT:    s_cselect_b64 s[4:5], s[6:7], s[4:5]
5998; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
5999; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
6000; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
6001; GFX10-NEXT:    s_cselect_b64 s[4:5], s[10:11], 0
6002; GFX10-NEXT:    s_or_b64 s[0:1], s[14:15], s[0:1]
6003; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
6004; GFX10-NEXT:    ; return to shader part epilog
6005;
6006; GFX11-LABEL: s_fshl_i128:
6007; GFX11:       ; %bb.0:
6008; GFX11-NEXT:    s_and_b32 s9, s8, 0x7f
6009; GFX11-NEXT:    s_mov_b32 s10, 0
6010; GFX11-NEXT:    s_sub_i32 s11, s9, 64
6011; GFX11-NEXT:    s_sub_i32 s12, 64, s9
6012; GFX11-NEXT:    s_cmp_lt_u32 s9, 64
6013; GFX11-NEXT:    s_cselect_b32 s18, 1, 0
6014; GFX11-NEXT:    s_cmp_eq_u32 s9, 0
6015; GFX11-NEXT:    s_cselect_b32 s9, 1, 0
6016; GFX11-NEXT:    s_lshr_b64 s[12:13], s[0:1], s12
6017; GFX11-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
6018; GFX11-NEXT:    s_lshl_b64 s[16:17], s[0:1], s8
6019; GFX11-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
6020; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s11
6021; GFX11-NEXT:    s_cmp_lg_u32 s18, 0
6022; GFX11-NEXT:    s_cselect_b64 s[14:15], s[16:17], 0
6023; GFX11-NEXT:    s_cselect_b64 s[0:1], s[12:13], s[0:1]
6024; GFX11-NEXT:    s_cmp_lg_u32 s9, 0
6025; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6026; GFX11-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
6027; GFX11-NEXT:    s_lshl_b32 s11, s6, 31
6028; GFX11-NEXT:    s_lshr_b64 s[4:5], s[6:7], 1
6029; GFX11-NEXT:    s_and_not1_b32 s6, 0x7f, s8
6030; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
6031; GFX11-NEXT:    s_not_b32 s10, s8
6032; GFX11-NEXT:    s_sub_i32 s12, s6, 64
6033; GFX11-NEXT:    s_sub_i32 s8, 64, s6
6034; GFX11-NEXT:    s_cmp_lt_u32 s6, 64
6035; GFX11-NEXT:    s_cselect_b32 s13, 1, 0
6036; GFX11-NEXT:    s_cmp_eq_u32 s6, 0
6037; GFX11-NEXT:    s_cselect_b32 s16, 1, 0
6038; GFX11-NEXT:    s_lshr_b64 s[6:7], s[0:1], s10
6039; GFX11-NEXT:    s_lshl_b64 s[8:9], s[4:5], s8
6040; GFX11-NEXT:    s_lshr_b64 s[10:11], s[4:5], s10
6041; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6042; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
6043; GFX11-NEXT:    s_cmp_lg_u32 s13, 0
6044; GFX11-NEXT:    s_cselect_b64 s[4:5], s[6:7], s[4:5]
6045; GFX11-NEXT:    s_cmp_lg_u32 s16, 0
6046; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
6047; GFX11-NEXT:    s_cmp_lg_u32 s13, 0
6048; GFX11-NEXT:    s_cselect_b64 s[4:5], s[10:11], 0
6049; GFX11-NEXT:    s_or_b64 s[0:1], s[14:15], s[0:1]
6050; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
6051; GFX11-NEXT:    ; return to shader part epilog
6052  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6053  ret i128 %result
6054}
6055
6056define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
6057; GFX6-LABEL: v_fshl_i128:
6058; GFX6:       ; %bb.0:
6059; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6060; GFX6-NEXT:    v_and_b32_e32 v15, 0x7f, v8
6061; GFX6-NEXT:    v_sub_i32_e32 v9, vcc, 64, v15
6062; GFX6-NEXT:    v_add_i32_e32 v17, vcc, 0xffffffc0, v15
6063; GFX6-NEXT:    v_lshr_b64 v[9:10], v[0:1], v9
6064; GFX6-NEXT:    v_lshl_b64 v[11:12], v[2:3], v15
6065; GFX6-NEXT:    v_lshl_b64 v[13:14], v[0:1], v15
6066; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v17
6067; GFX6-NEXT:    v_or_b32_e32 v9, v9, v11
6068; GFX6-NEXT:    v_or_b32_e32 v10, v10, v12
6069; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6070; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
6071; GFX6-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
6072; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
6073; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
6074; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6075; GFX6-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6076; GFX6-NEXT:    v_cndmask_b32_e32 v13, v1, v3, vcc
6077; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], 1
6078; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 31, v6
6079; GFX6-NEXT:    v_not_b32_e32 v4, v8
6080; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
6081; GFX6-NEXT:    v_lshr_b64 v[2:3], v[6:7], 1
6082; GFX6-NEXT:    v_and_b32_e32 v14, 0x7f, v4
6083; GFX6-NEXT:    v_not_b32_e32 v16, 63
6084; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v14
6085; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v14, v16
6086; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], v14
6087; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], v6
6088; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], v14
6089; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v15
6090; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
6091; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
6092; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6093; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6094; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6095; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6096; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6097; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6098; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6099; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6100; GFX6-NEXT:    v_or_b32_e32 v0, v11, v0
6101; GFX6-NEXT:    v_or_b32_e32 v1, v12, v1
6102; GFX6-NEXT:    v_or_b32_e32 v2, v10, v2
6103; GFX6-NEXT:    v_or_b32_e32 v3, v13, v3
6104; GFX6-NEXT:    s_setpc_b64 s[30:31]
6105;
6106; GFX8-LABEL: v_fshl_i128:
6107; GFX8:       ; %bb.0:
6108; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6109; GFX8-NEXT:    v_and_b32_e32 v15, 0x7f, v8
6110; GFX8-NEXT:    v_sub_u32_e32 v9, vcc, 64, v15
6111; GFX8-NEXT:    v_add_u32_e32 v17, vcc, 0xffffffc0, v15
6112; GFX8-NEXT:    v_lshrrev_b64 v[9:10], v9, v[0:1]
6113; GFX8-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
6114; GFX8-NEXT:    v_lshlrev_b64 v[13:14], v15, v[0:1]
6115; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[0:1]
6116; GFX8-NEXT:    v_or_b32_e32 v9, v9, v11
6117; GFX8-NEXT:    v_or_b32_e32 v10, v10, v12
6118; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6119; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
6120; GFX8-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
6121; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
6122; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
6123; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6124; GFX8-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6125; GFX8-NEXT:    v_cndmask_b32_e32 v13, v1, v3, vcc
6126; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[4:5]
6127; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 31, v6
6128; GFX8-NEXT:    v_not_b32_e32 v4, v8
6129; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
6130; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[6:7]
6131; GFX8-NEXT:    v_and_b32_e32 v14, 0x7f, v4
6132; GFX8-NEXT:    v_not_b32_e32 v16, 63
6133; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v14
6134; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v14, v16
6135; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
6136; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[2:3]
6137; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[2:3]
6138; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v15, v[2:3]
6139; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
6140; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
6141; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6142; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6143; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6144; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6145; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6146; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6147; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6148; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6149; GFX8-NEXT:    v_or_b32_e32 v0, v11, v0
6150; GFX8-NEXT:    v_or_b32_e32 v1, v12, v1
6151; GFX8-NEXT:    v_or_b32_e32 v2, v10, v2
6152; GFX8-NEXT:    v_or_b32_e32 v3, v13, v3
6153; GFX8-NEXT:    s_setpc_b64 s[30:31]
6154;
6155; GFX9-LABEL: v_fshl_i128:
6156; GFX9:       ; %bb.0:
6157; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6158; GFX9-NEXT:    v_and_b32_e32 v15, 0x7f, v8
6159; GFX9-NEXT:    v_sub_u32_e32 v9, 64, v15
6160; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v15
6161; GFX9-NEXT:    v_lshrrev_b64 v[9:10], v9, v[0:1]
6162; GFX9-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
6163; GFX9-NEXT:    v_lshlrev_b64 v[13:14], v15, v[0:1]
6164; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[0:1]
6165; GFX9-NEXT:    v_or_b32_e32 v9, v9, v11
6166; GFX9-NEXT:    v_or_b32_e32 v10, v10, v12
6167; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6168; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v13, vcc
6169; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v14, vcc
6170; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
6171; GFX9-NEXT:    v_cndmask_b32_e32 v9, v1, v10, vcc
6172; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6173; GFX9-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
6174; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[4:5]
6175; GFX9-NEXT:    v_not_b32_e32 v4, v8
6176; GFX9-NEXT:    v_cndmask_b32_e32 v13, v9, v3, vcc
6177; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[6:7]
6178; GFX9-NEXT:    v_and_b32_e32 v14, 0x7f, v4
6179; GFX9-NEXT:    v_lshl_or_b32 v1, v6, 31, v1
6180; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v14
6181; GFX9-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
6182; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
6183; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[2:3]
6184; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[2:3]
6185; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v15, v[2:3]
6186; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
6187; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
6188; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6189; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6190; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6191; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6192; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6193; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6194; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6195; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6196; GFX9-NEXT:    v_or_b32_e32 v0, v11, v0
6197; GFX9-NEXT:    v_or_b32_e32 v1, v12, v1
6198; GFX9-NEXT:    v_or_b32_e32 v2, v10, v2
6199; GFX9-NEXT:    v_or_b32_e32 v3, v13, v3
6200; GFX9-NEXT:    s_setpc_b64 s[30:31]
6201;
6202; GFX10-LABEL: v_fshl_i128:
6203; GFX10:       ; %bb.0:
6204; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6205; GFX10-NEXT:    v_and_b32_e32 v18, 0x7f, v8
6206; GFX10-NEXT:    v_not_b32_e32 v10, v8
6207; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
6208; GFX10-NEXT:    v_lshrrev_b64 v[12:13], 1, v[6:7]
6209; GFX10-NEXT:    v_sub_nc_u32_e32 v11, 64, v18
6210; GFX10-NEXT:    v_and_b32_e32 v19, 0x7f, v10
6211; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6212; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 31, v5
6213; GFX10-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
6214; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v11, v[0:1]
6215; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6216; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v18, v[0:1]
6217; GFX10-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
6218; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6219; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6220; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
6221; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v19
6222; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[12:13]
6223; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
6224; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
6225; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6226; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v8, v[12:13]
6227; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v19
6228; GFX10-NEXT:    v_or_b32_e32 v14, v14, v16
6229; GFX10-NEXT:    v_or_b32_e32 v15, v15, v17
6230; GFX10-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6231; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v19, v[12:13]
6232; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v18
6233; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s4
6234; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s4
6235; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6236; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc_lo
6237; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s6
6238; GFX10-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s6
6239; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s5
6240; GFX10-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s5
6241; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s4
6242; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s4
6243; GFX10-NEXT:    v_or_b32_e32 v0, v6, v4
6244; GFX10-NEXT:    v_or_b32_e32 v1, v7, v5
6245; GFX10-NEXT:    v_or_b32_e32 v2, v2, v8
6246; GFX10-NEXT:    v_or_b32_e32 v3, v3, v9
6247; GFX10-NEXT:    s_setpc_b64 s[30:31]
6248;
6249; GFX11-LABEL: v_fshl_i128:
6250; GFX11:       ; %bb.0:
6251; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6252; GFX11-NEXT:    v_and_b32_e32 v18, 0x7f, v8
6253; GFX11-NEXT:    v_not_b32_e32 v10, v8
6254; GFX11-NEXT:    v_lshrrev_b64 v[4:5], 1, v[4:5]
6255; GFX11-NEXT:    v_lshrrev_b64 v[12:13], 1, v[6:7]
6256; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6257; GFX11-NEXT:    v_sub_nc_u32_e32 v11, 64, v18
6258; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v10
6259; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6260; GFX11-NEXT:    v_lshl_or_b32 v5, v6, 31, v5
6261; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v18, v[0:1]
6262; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v11, v[0:1]
6263; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6264; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6265; GFX11-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
6266; GFX11-NEXT:    v_lshrrev_b64 v[14:15], v19, v[4:5]
6267; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
6268; GFX11-NEXT:    v_or_b32_e32 v10, v10, v8
6269; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc_lo
6270; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v19
6271; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[12:13]
6272; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6273; GFX11-NEXT:    v_or_b32_e32 v11, v11, v9
6274; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v19
6275; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v8, v[12:13]
6276; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6277; GFX11-NEXT:    v_or_b32_e32 v14, v14, v16
6278; GFX11-NEXT:    v_or_b32_e32 v15, v15, v17
6279; GFX11-NEXT:    v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
6280; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v19, v[12:13]
6281; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
6282; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v14, s0
6283; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
6284; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v15, s0
6285; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
6286; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
6287; GFX11-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s2
6288; GFX11-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s2
6289; GFX11-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s1
6290; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s0
6291; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s0
6292; GFX11-NEXT:    v_or_b32_e32 v0, v6, v4
6293; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6294; GFX11-NEXT:    v_or_b32_e32 v1, v7, v5
6295; GFX11-NEXT:    v_or_b32_e32 v2, v2, v8
6296; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
6297; GFX11-NEXT:    v_or_b32_e32 v3, v3, v9
6298; GFX11-NEXT:    s_setpc_b64 s[30:31]
6299  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6300  ret i128 %result
6301}
6302
6303define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6304; GFX6-LABEL: v_fshl_i128_ssv:
6305; GFX6:       ; %bb.0:
6306; GFX6-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6307; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 64, v7
6308; GFX6-NEXT:    v_lshr_b64 v[1:2], s[0:1], v1
6309; GFX6-NEXT:    v_lshl_b64 v[3:4], s[2:3], v7
6310; GFX6-NEXT:    v_add_i32_e32 v9, vcc, 0xffffffc0, v7
6311; GFX6-NEXT:    v_lshl_b64 v[5:6], s[0:1], v7
6312; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6313; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
6314; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v9
6315; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6316; GFX6-NEXT:    v_not_b32_e32 v0, v0
6317; GFX6-NEXT:    s_mov_b32 s8, 0
6318; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6319; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6320; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6321; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6322; GFX6-NEXT:    v_mov_b32_e32 v3, s2
6323; GFX6-NEXT:    v_mov_b32_e32 v4, s3
6324; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6325; GFX6-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
6326; GFX6-NEXT:    s_lshl_b32 s9, s6, 31
6327; GFX6-NEXT:    v_and_b32_e32 v11, 0x7f, v0
6328; GFX6-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6329; GFX6-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
6330; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[8:9]
6331; GFX6-NEXT:    s_lshr_b64 s[2:3], s[6:7], 1
6332; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v11
6333; GFX6-NEXT:    v_not_b32_e32 v8, 63
6334; GFX6-NEXT:    v_lshr_b64 v[0:1], s[0:1], v11
6335; GFX6-NEXT:    v_lshl_b64 v[2:3], s[2:3], v2
6336; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
6337; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6338; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6339; GFX6-NEXT:    v_lshr_b64 v[0:1], s[2:3], v8
6340; GFX6-NEXT:    v_lshr_b64 v[4:5], s[2:3], v11
6341; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
6342; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6343; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6344; GFX6-NEXT:    v_mov_b32_e32 v2, s0
6345; GFX6-NEXT:    v_mov_b32_e32 v3, s1
6346; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
6347; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6348; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6349; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6350; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6351; GFX6-NEXT:    v_or_b32_e32 v0, v9, v0
6352; GFX6-NEXT:    v_or_b32_e32 v1, v6, v1
6353; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
6354; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
6355; GFX6-NEXT:    ; return to shader part epilog
6356;
6357; GFX8-LABEL: v_fshl_i128_ssv:
6358; GFX8:       ; %bb.0:
6359; GFX8-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6360; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 64, v7
6361; GFX8-NEXT:    v_lshrrev_b64 v[1:2], v1, s[0:1]
6362; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v7, s[2:3]
6363; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 0xffffffc0, v7
6364; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v7, s[0:1]
6365; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6366; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
6367; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[0:1]
6368; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6369; GFX8-NEXT:    v_not_b32_e32 v0, v0
6370; GFX8-NEXT:    s_mov_b32 s8, 0
6371; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6372; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6373; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6374; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6375; GFX8-NEXT:    v_mov_b32_e32 v3, s2
6376; GFX8-NEXT:    v_mov_b32_e32 v4, s3
6377; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6378; GFX8-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
6379; GFX8-NEXT:    s_lshl_b32 s9, s6, 31
6380; GFX8-NEXT:    v_and_b32_e32 v11, 0x7f, v0
6381; GFX8-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6382; GFX8-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
6383; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[8:9]
6384; GFX8-NEXT:    s_lshr_b64 s[2:3], s[6:7], 1
6385; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v11
6386; GFX8-NEXT:    v_not_b32_e32 v8, 63
6387; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[0:1]
6388; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[2:3]
6389; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v11, v8
6390; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6391; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6392; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[2:3]
6393; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[2:3]
6394; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
6395; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6396; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6397; GFX8-NEXT:    v_mov_b32_e32 v2, s0
6398; GFX8-NEXT:    v_mov_b32_e32 v3, s1
6399; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
6400; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6401; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6402; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6403; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6404; GFX8-NEXT:    v_or_b32_e32 v0, v9, v0
6405; GFX8-NEXT:    v_or_b32_e32 v1, v6, v1
6406; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
6407; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
6408; GFX8-NEXT:    ; return to shader part epilog
6409;
6410; GFX9-LABEL: v_fshl_i128_ssv:
6411; GFX9:       ; %bb.0:
6412; GFX9-NEXT:    v_and_b32_e32 v7, 0x7f, v0
6413; GFX9-NEXT:    v_sub_u32_e32 v1, 64, v7
6414; GFX9-NEXT:    v_lshrrev_b64 v[1:2], v1, s[0:1]
6415; GFX9-NEXT:    v_lshlrev_b64 v[3:4], v7, s[2:3]
6416; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v7
6417; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v7, s[0:1]
6418; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6419; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
6420; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v8, s[0:1]
6421; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6422; GFX9-NEXT:    v_not_b32_e32 v0, v0
6423; GFX9-NEXT:    s_mov_b32 s8, 0
6424; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
6425; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6426; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6427; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6428; GFX9-NEXT:    v_mov_b32_e32 v4, s3
6429; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6430; GFX9-NEXT:    s_lshr_b64 s[0:1], s[4:5], 1
6431; GFX9-NEXT:    s_lshl_b32 s9, s6, 31
6432; GFX9-NEXT:    v_and_b32_e32 v10, 0x7f, v0
6433; GFX9-NEXT:    v_mov_b32_e32 v3, s2
6434; GFX9-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
6435; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[8:9]
6436; GFX9-NEXT:    s_lshr_b64 s[2:3], s[6:7], 1
6437; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v10
6438; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6439; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v10, s[0:1]
6440; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[2:3]
6441; GFX9-NEXT:    v_add_u32_e32 v11, 0xffffffc0, v10
6442; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6443; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6444; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[2:3]
6445; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, s[2:3]
6446; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
6447; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6448; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6449; GFX9-NEXT:    v_mov_b32_e32 v2, s0
6450; GFX9-NEXT:    v_mov_b32_e32 v3, s1
6451; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
6452; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6453; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6454; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6455; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6456; GFX9-NEXT:    v_or_b32_e32 v0, v8, v0
6457; GFX9-NEXT:    v_or_b32_e32 v1, v6, v1
6458; GFX9-NEXT:    v_or_b32_e32 v2, v7, v2
6459; GFX9-NEXT:    v_or_b32_e32 v3, v9, v3
6460; GFX9-NEXT:    ; return to shader part epilog
6461;
6462; GFX10-LABEL: v_fshl_i128_ssv:
6463; GFX10:       ; %bb.0:
6464; GFX10-NEXT:    v_and_b32_e32 v12, 0x7f, v0
6465; GFX10-NEXT:    v_not_b32_e32 v2, v0
6466; GFX10-NEXT:    s_mov_b32 s8, 0
6467; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
6468; GFX10-NEXT:    s_lshl_b32 s9, s6, 31
6469; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 64, v12
6470; GFX10-NEXT:    v_and_b32_e32 v13, 0x7f, v2
6471; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[2:3]
6472; GFX10-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
6473; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
6474; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v3, s[0:1]
6475; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6476; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
6477; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v13, s[8:9]
6478; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6479; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v12, s[0:1]
6480; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
6481; GFX10-NEXT:    v_add_nc_u32_e32 v0, 0xffffffc0, v13
6482; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6483; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6484; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
6485; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6486; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[6:7]
6487; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6488; GFX10-NEXT:    v_or_b32_e32 v6, v6, v8
6489; GFX10-NEXT:    v_or_b32_e32 v7, v7, v9
6490; GFX10-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6491; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6492; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6493; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s0
6494; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v12
6495; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s0
6496; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc_lo
6497; GFX10-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc_lo
6498; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s8, s1
6499; GFX10-NEXT:    v_cndmask_b32_e64 v6, v8, s2, s4
6500; GFX10-NEXT:    v_cndmask_b32_e64 v7, v10, s3, s4
6501; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s9, s1
6502; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6503; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6504; GFX10-NEXT:    v_or_b32_e32 v0, v4, v0
6505; GFX10-NEXT:    v_or_b32_e32 v1, v5, v1
6506; GFX10-NEXT:    v_or_b32_e32 v2, v6, v2
6507; GFX10-NEXT:    v_or_b32_e32 v3, v7, v3
6508; GFX10-NEXT:    ; return to shader part epilog
6509;
6510; GFX11-LABEL: v_fshl_i128_ssv:
6511; GFX11:       ; %bb.0:
6512; GFX11-NEXT:    v_and_b32_e32 v12, 0x7f, v0
6513; GFX11-NEXT:    v_not_b32_e32 v2, v0
6514; GFX11-NEXT:    s_mov_b32 s8, 0
6515; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], 1
6516; GFX11-NEXT:    s_lshl_b32 s9, s6, 31
6517; GFX11-NEXT:    v_lshlrev_b64 v[4:5], v12, s[0:1]
6518; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6519; GFX11-NEXT:    v_and_b32_e32 v13, 0x7f, v2
6520; GFX11-NEXT:    s_or_b64 s[8:9], s[4:5], s[8:9]
6521; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
6522; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
6523; GFX11-NEXT:    v_dual_cndmask_b32 v4, 0, v4 :: v_dual_cndmask_b32 v5, 0, v5
6524; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 64, v12
6525; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[2:3]
6526; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6527; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
6528; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v13, s[8:9]
6529; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v3, s[0:1]
6530; GFX11-NEXT:    v_cmp_eq_u32_e64 s4, 0, v12
6531; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6532; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6533; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6534; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6535; GFX11-NEXT:    v_or_b32_e32 v2, v2, v0
6536; GFX11-NEXT:    v_add_nc_u32_e32 v0, 0xffffffc0, v13
6537; GFX11-NEXT:    v_or_b32_e32 v3, v3, v1
6538; GFX11-NEXT:    v_or_b32_e32 v6, v6, v8
6539; GFX11-NEXT:    v_or_b32_e32 v7, v7, v9
6540; GFX11-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6541; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v0, s[6:7]
6542; GFX11-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6543; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6544; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
6545; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s0
6546; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s0
6547; GFX11-NEXT:    v_cndmask_b32_e64 v6, v8, s2, s4
6548; GFX11-NEXT:    v_cndmask_b32_e64 v7, v10, s3, s4
6549; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6550; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s8, s1
6551; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s9, s1
6552; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6553; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6554; GFX11-NEXT:    v_or_b32_e32 v2, v6, v2
6555; GFX11-NEXT:    v_or_b32_e32 v0, v4, v0
6556; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6557; GFX11-NEXT:    v_or_b32_e32 v1, v5, v1
6558; GFX11-NEXT:    v_or_b32_e32 v3, v7, v3
6559; GFX11-NEXT:    ; return to shader part epilog
6560  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6561  %cast.result = bitcast i128 %result to <4 x float>
6562  ret <4 x float> %cast.result
6563}
6564
6565define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6566; GFX6-LABEL: v_fshl_i128_svs:
6567; GFX6:       ; %bb.0:
6568; GFX6-NEXT:    s_and_b32 s5, s4, 0x7f
6569; GFX6-NEXT:    s_sub_i32 s12, s5, 64
6570; GFX6-NEXT:    s_sub_i32 s8, 64, s5
6571; GFX6-NEXT:    s_cmp_lt_u32 s5, 64
6572; GFX6-NEXT:    s_cselect_b32 s13, 1, 0
6573; GFX6-NEXT:    s_cmp_eq_u32 s5, 0
6574; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
6575; GFX6-NEXT:    s_lshr_b64 s[8:9], s[0:1], s8
6576; GFX6-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
6577; GFX6-NEXT:    s_lshl_b64 s[6:7], s[0:1], s4
6578; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6579; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6580; GFX6-NEXT:    s_cmp_lg_u32 s13, 0
6581; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6582; GFX6-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
6583; GFX6-NEXT:    s_cmp_lg_u32 s5, 0
6584; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6585; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 1
6586; GFX6-NEXT:    s_andn2_b32 s0, 0x7f, s4
6587; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 31, v2
6588; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], 1
6589; GFX6-NEXT:    s_sub_i32 s1, s0, 64
6590; GFX6-NEXT:    s_sub_i32 s4, 64, s0
6591; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
6592; GFX6-NEXT:    s_cmp_lt_u32 s0, 64
6593; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
6594; GFX6-NEXT:    s_cmp_eq_u32 s0, 0
6595; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s0
6596; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s4
6597; GFX6-NEXT:    s_cselect_b32 s8, 1, 0
6598; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], s0
6599; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], s1
6600; GFX6-NEXT:    s_and_b32 s0, 1, s5
6601; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
6602; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
6603; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6604; GFX6-NEXT:    s_and_b32 s0, 1, s8
6605; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6606; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6607; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6608; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6609; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6610; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6611; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6612; GFX6-NEXT:    v_or_b32_e32 v0, s6, v0
6613; GFX6-NEXT:    v_or_b32_e32 v1, s7, v1
6614; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
6615; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
6616; GFX6-NEXT:    ; return to shader part epilog
6617;
6618; GFX8-LABEL: v_fshl_i128_svs:
6619; GFX8:       ; %bb.0:
6620; GFX8-NEXT:    s_and_b32 s5, s4, 0x7f
6621; GFX8-NEXT:    s_sub_i32 s12, s5, 64
6622; GFX8-NEXT:    s_sub_i32 s8, 64, s5
6623; GFX8-NEXT:    s_cmp_lt_u32 s5, 64
6624; GFX8-NEXT:    s_cselect_b32 s13, 1, 0
6625; GFX8-NEXT:    s_cmp_eq_u32 s5, 0
6626; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
6627; GFX8-NEXT:    s_lshr_b64 s[8:9], s[0:1], s8
6628; GFX8-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
6629; GFX8-NEXT:    s_lshl_b64 s[6:7], s[0:1], s4
6630; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6631; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6632; GFX8-NEXT:    s_cmp_lg_u32 s13, 0
6633; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6634; GFX8-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
6635; GFX8-NEXT:    s_cmp_lg_u32 s5, 0
6636; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6637; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
6638; GFX8-NEXT:    s_andn2_b32 s0, 0x7f, s4
6639; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 31, v2
6640; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
6641; GFX8-NEXT:    s_sub_i32 s1, s0, 64
6642; GFX8-NEXT:    s_sub_i32 s4, 64, s0
6643; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
6644; GFX8-NEXT:    s_cmp_lt_u32 s0, 64
6645; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
6646; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
6647; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6648; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6649; GFX8-NEXT:    s_cselect_b32 s8, 1, 0
6650; GFX8-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6651; GFX8-NEXT:    v_lshrrev_b64 v[2:3], s1, v[2:3]
6652; GFX8-NEXT:    s_and_b32 s0, 1, s5
6653; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
6654; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
6655; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6656; GFX8-NEXT:    s_and_b32 s0, 1, s8
6657; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6658; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6659; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6660; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6661; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6662; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6663; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6664; GFX8-NEXT:    v_or_b32_e32 v0, s6, v0
6665; GFX8-NEXT:    v_or_b32_e32 v1, s7, v1
6666; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
6667; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
6668; GFX8-NEXT:    ; return to shader part epilog
6669;
6670; GFX9-LABEL: v_fshl_i128_svs:
6671; GFX9:       ; %bb.0:
6672; GFX9-NEXT:    s_and_b32 s5, s4, 0x7f
6673; GFX9-NEXT:    s_sub_i32 s12, s5, 64
6674; GFX9-NEXT:    s_sub_i32 s8, 64, s5
6675; GFX9-NEXT:    s_cmp_lt_u32 s5, 64
6676; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
6677; GFX9-NEXT:    s_cmp_eq_u32 s5, 0
6678; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
6679; GFX9-NEXT:    s_lshr_b64 s[8:9], s[0:1], s8
6680; GFX9-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
6681; GFX9-NEXT:    s_lshl_b64 s[6:7], s[0:1], s4
6682; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6683; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6684; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
6685; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6686; GFX9-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
6687; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
6688; GFX9-NEXT:    s_cmp_lg_u32 s5, 0
6689; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6690; GFX9-NEXT:    s_andn2_b32 s0, 0x7f, s4
6691; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 31, v1
6692; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
6693; GFX9-NEXT:    s_sub_i32 s1, s0, 64
6694; GFX9-NEXT:    s_sub_i32 s4, 64, s0
6695; GFX9-NEXT:    s_cmp_lt_u32 s0, 64
6696; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
6697; GFX9-NEXT:    s_cmp_eq_u32 s0, 0
6698; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6699; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6700; GFX9-NEXT:    s_cselect_b32 s8, 1, 0
6701; GFX9-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6702; GFX9-NEXT:    v_lshrrev_b64 v[2:3], s1, v[2:3]
6703; GFX9-NEXT:    s_and_b32 s0, 1, s5
6704; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
6705; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
6706; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6707; GFX9-NEXT:    s_and_b32 s0, 1, s8
6708; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6709; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6710; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6711; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6712; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6713; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6714; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6715; GFX9-NEXT:    v_or_b32_e32 v0, s6, v0
6716; GFX9-NEXT:    v_or_b32_e32 v1, s7, v1
6717; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
6718; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
6719; GFX9-NEXT:    ; return to shader part epilog
6720;
6721; GFX10-LABEL: v_fshl_i128_svs:
6722; GFX10:       ; %bb.0:
6723; GFX10-NEXT:    s_and_b32 s5, s4, 0x7f
6724; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
6725; GFX10-NEXT:    s_sub_i32 s12, s5, 64
6726; GFX10-NEXT:    s_sub_i32 s6, 64, s5
6727; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
6728; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
6729; GFX10-NEXT:    s_cmp_eq_u32 s5, 0
6730; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 31, v1
6731; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
6732; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6733; GFX10-NEXT:    s_lshl_b64 s[8:9], s[2:3], s4
6734; GFX10-NEXT:    s_lshl_b64 s[10:11], s[0:1], s4
6735; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6736; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6737; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
6738; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
6739; GFX10-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
6740; GFX10-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6741; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
6742; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6743; GFX10-NEXT:    s_andn2_b32 s0, 0x7f, s4
6744; GFX10-NEXT:    s_sub_i32 s1, 64, s0
6745; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6746; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s1, v[2:3]
6747; GFX10-NEXT:    s_sub_i32 s1, s0, 64
6748; GFX10-NEXT:    s_cmp_lt_u32 s0, 64
6749; GFX10-NEXT:    v_lshrrev_b64 v[8:9], s1, v[2:3]
6750; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
6751; GFX10-NEXT:    s_cmp_eq_u32 s0, 0
6752; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
6753; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
6754; GFX10-NEXT:    s_and_b32 s1, 1, s4
6755; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
6756; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
6757; GFX10-NEXT:    v_lshrrev_b64 v[2:3], s0, v[2:3]
6758; GFX10-NEXT:    s_and_b32 s0, 1, s5
6759; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
6760; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6761; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6762; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc_lo
6763; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc_lo
6764; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s0
6765; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
6766; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
6767; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
6768; GFX10-NEXT:    v_or_b32_e32 v0, s8, v0
6769; GFX10-NEXT:    v_or_b32_e32 v1, s9, v1
6770; GFX10-NEXT:    ; return to shader part epilog
6771;
6772; GFX11-LABEL: v_fshl_i128_svs:
6773; GFX11:       ; %bb.0:
6774; GFX11-NEXT:    s_and_b32 s5, s4, 0x7f
6775; GFX11-NEXT:    v_lshrrev_b64 v[0:1], 1, v[0:1]
6776; GFX11-NEXT:    s_sub_i32 s12, s5, 64
6777; GFX11-NEXT:    s_sub_i32 s6, 64, s5
6778; GFX11-NEXT:    s_cmp_lt_u32 s5, 64
6779; GFX11-NEXT:    s_cselect_b32 s13, 1, 0
6780; GFX11-NEXT:    s_cmp_eq_u32 s5, 0
6781; GFX11-NEXT:    v_lshl_or_b32 v1, v2, 31, v1
6782; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
6783; GFX11-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6784; GFX11-NEXT:    s_lshl_b64 s[8:9], s[2:3], s4
6785; GFX11-NEXT:    s_lshl_b64 s[10:11], s[0:1], s4
6786; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6787; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6788; GFX11-NEXT:    s_cmp_lg_u32 s13, 0
6789; GFX11-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
6790; GFX11-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
6791; GFX11-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6792; GFX11-NEXT:    s_cmp_lg_u32 s5, 0
6793; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6794; GFX11-NEXT:    s_and_not1_b32 s0, 0x7f, s4
6795; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6796; GFX11-NEXT:    s_sub_i32 s1, 64, s0
6797; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6798; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s1, v[2:3]
6799; GFX11-NEXT:    s_sub_i32 s1, s0, 64
6800; GFX11-NEXT:    s_cmp_lt_u32 s0, 64
6801; GFX11-NEXT:    v_lshrrev_b64 v[8:9], s1, v[2:3]
6802; GFX11-NEXT:    s_cselect_b32 s4, 1, 0
6803; GFX11-NEXT:    s_cmp_eq_u32 s0, 0
6804; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
6805; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
6806; GFX11-NEXT:    s_and_b32 s1, 1, s4
6807; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
6808; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
6809; GFX11-NEXT:    v_lshrrev_b64 v[2:3], s0, v[2:3]
6810; GFX11-NEXT:    s_and_b32 s0, 1, s5
6811; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6812; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
6813; GFX11-NEXT:    v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6814; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3
6815; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
6816; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s0
6817; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
6818; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
6819; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
6820; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
6821; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6822; GFX11-NEXT:    v_or_b32_e32 v0, s8, v0
6823; GFX11-NEXT:    v_or_b32_e32 v1, s9, v1
6824; GFX11-NEXT:    ; return to shader part epilog
6825  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6826  %cast.result = bitcast i128 %result to <4 x float>
6827  ret <4 x float> %cast.result
6828}
6829
6830define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6831; GFX6-LABEL: v_fshl_i128_vss:
6832; GFX6:       ; %bb.0:
6833; GFX6-NEXT:    s_and_b32 s5, s4, 0x7f
6834; GFX6-NEXT:    s_sub_i32 s7, s5, 64
6835; GFX6-NEXT:    s_sub_i32 s8, 64, s5
6836; GFX6-NEXT:    s_cmp_lt_u32 s5, 64
6837; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
6838; GFX6-NEXT:    s_cmp_eq_u32 s5, 0
6839; GFX6-NEXT:    s_mov_b32 s6, 0
6840; GFX6-NEXT:    s_cselect_b32 s10, 1, 0
6841; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s8
6842; GFX6-NEXT:    v_lshl_b64 v[8:9], v[0:1], s5
6843; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s7
6844; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
6845; GFX6-NEXT:    s_lshl_b32 s7, s2, 31
6846; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s5
6847; GFX6-NEXT:    s_and_b32 s5, 1, s9
6848; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[6:7]
6849; GFX6-NEXT:    s_not_b32 s6, s4
6850; GFX6-NEXT:    s_andn2_b32 s4, 0x7f, s4
6851; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6852; GFX6-NEXT:    s_and_b32 s5, 1, s10
6853; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
6854; GFX6-NEXT:    s_sub_i32 s10, s4, 64
6855; GFX6-NEXT:    s_sub_i32 s8, 64, s4
6856; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
6857; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
6858; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
6859; GFX6-NEXT:    s_cselect_b32 s11, 1, 0
6860; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
6861; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
6862; GFX6-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
6863; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
6864; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
6865; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6866; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
6867; GFX6-NEXT:    s_lshr_b64 s[4:5], s[2:3], s6
6868; GFX6-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6869; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
6870; GFX6-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6871; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6872; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6873; GFX6-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6874; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
6875; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6876; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6877; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6878; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6879; GFX6-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6880; GFX6-NEXT:    v_or_b32_e32 v0, s0, v6
6881; GFX6-NEXT:    v_or_b32_e32 v1, s1, v7
6882; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
6883; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
6884; GFX6-NEXT:    ; return to shader part epilog
6885;
6886; GFX8-LABEL: v_fshl_i128_vss:
6887; GFX8:       ; %bb.0:
6888; GFX8-NEXT:    s_and_b32 s5, s4, 0x7f
6889; GFX8-NEXT:    s_sub_i32 s7, s5, 64
6890; GFX8-NEXT:    s_sub_i32 s8, 64, s5
6891; GFX8-NEXT:    s_cmp_lt_u32 s5, 64
6892; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
6893; GFX8-NEXT:    s_cmp_eq_u32 s5, 0
6894; GFX8-NEXT:    s_mov_b32 s6, 0
6895; GFX8-NEXT:    s_cselect_b32 s10, 1, 0
6896; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6897; GFX8-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
6898; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s7, v[0:1]
6899; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
6900; GFX8-NEXT:    s_lshl_b32 s7, s2, 31
6901; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6902; GFX8-NEXT:    s_and_b32 s5, 1, s9
6903; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[6:7]
6904; GFX8-NEXT:    s_not_b32 s6, s4
6905; GFX8-NEXT:    s_andn2_b32 s4, 0x7f, s4
6906; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6907; GFX8-NEXT:    s_and_b32 s5, 1, s10
6908; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
6909; GFX8-NEXT:    s_sub_i32 s10, s4, 64
6910; GFX8-NEXT:    s_sub_i32 s8, 64, s4
6911; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
6912; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
6913; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
6914; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
6915; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
6916; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
6917; GFX8-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
6918; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
6919; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
6920; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6921; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
6922; GFX8-NEXT:    s_lshr_b64 s[4:5], s[2:3], s6
6923; GFX8-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6924; GFX8-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
6925; GFX8-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6926; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6927; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6928; GFX8-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6929; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
6930; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6931; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6932; GFX8-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6933; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6934; GFX8-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6935; GFX8-NEXT:    v_or_b32_e32 v0, s0, v6
6936; GFX8-NEXT:    v_or_b32_e32 v1, s1, v7
6937; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
6938; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
6939; GFX8-NEXT:    ; return to shader part epilog
6940;
6941; GFX9-LABEL: v_fshl_i128_vss:
6942; GFX9:       ; %bb.0:
6943; GFX9-NEXT:    s_and_b32 s5, s4, 0x7f
6944; GFX9-NEXT:    s_sub_i32 s7, s5, 64
6945; GFX9-NEXT:    s_sub_i32 s8, 64, s5
6946; GFX9-NEXT:    s_cmp_lt_u32 s5, 64
6947; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
6948; GFX9-NEXT:    s_cmp_eq_u32 s5, 0
6949; GFX9-NEXT:    s_mov_b32 s6, 0
6950; GFX9-NEXT:    s_cselect_b32 s10, 1, 0
6951; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
6952; GFX9-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
6953; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s7, v[0:1]
6954; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
6955; GFX9-NEXT:    s_lshl_b32 s7, s2, 31
6956; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6957; GFX9-NEXT:    s_and_b32 s5, 1, s9
6958; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[6:7]
6959; GFX9-NEXT:    s_not_b32 s6, s4
6960; GFX9-NEXT:    s_andn2_b32 s4, 0x7f, s4
6961; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6962; GFX9-NEXT:    s_and_b32 s5, 1, s10
6963; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
6964; GFX9-NEXT:    s_sub_i32 s10, s4, 64
6965; GFX9-NEXT:    s_sub_i32 s8, 64, s4
6966; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
6967; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
6968; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
6969; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
6970; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
6971; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
6972; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
6973; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
6974; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
6975; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6976; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
6977; GFX9-NEXT:    s_lshr_b64 s[4:5], s[2:3], s6
6978; GFX9-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6979; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
6980; GFX9-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6981; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6982; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
6983; GFX9-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
6984; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
6985; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6986; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
6987; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6988; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6989; GFX9-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
6990; GFX9-NEXT:    v_or_b32_e32 v0, s0, v6
6991; GFX9-NEXT:    v_or_b32_e32 v1, s1, v7
6992; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
6993; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
6994; GFX9-NEXT:    ; return to shader part epilog
6995;
6996; GFX10-LABEL: v_fshl_i128_vss:
6997; GFX10:       ; %bb.0:
6998; GFX10-NEXT:    s_and_b32 s5, s4, 0x7f
6999; GFX10-NEXT:    s_sub_i32 s6, s5, 64
7000; GFX10-NEXT:    s_sub_i32 s7, 64, s5
7001; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
7002; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s7, v[0:1]
7003; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
7004; GFX10-NEXT:    s_cmp_eq_u32 s5, 0
7005; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
7006; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
7007; GFX10-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
7008; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s6, v[0:1]
7009; GFX10-NEXT:    s_mov_b32 s6, 0
7010; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7011; GFX10-NEXT:    s_lshl_b32 s7, s2, 31
7012; GFX10-NEXT:    s_and_b32 s5, 1, s8
7013; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[6:7]
7014; GFX10-NEXT:    s_andn2_b32 s6, 0x7f, s4
7015; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7016; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
7017; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
7018; GFX10-NEXT:    s_and_b32 s5, 1, s9
7019; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
7020; GFX10-NEXT:    s_not_b32 s8, s4
7021; GFX10-NEXT:    s_sub_i32 s10, s6, 64
7022; GFX10-NEXT:    s_sub_i32 s7, 64, s6
7023; GFX10-NEXT:    s_cmp_lt_u32 s6, 64
7024; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc_lo
7025; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
7026; GFX10-NEXT:    s_cmp_eq_u32 s6, 0
7027; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc_lo
7028; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
7029; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
7030; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7031; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
7032; GFX10-NEXT:    s_lshr_b64 s[4:5], s[0:1], s8
7033; GFX10-NEXT:    s_lshl_b64 s[6:7], s[2:3], s7
7034; GFX10-NEXT:    s_lshr_b64 s[8:9], s[2:3], s8
7035; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
7036; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7037; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
7038; GFX10-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc_lo
7039; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
7040; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
7041; GFX10-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
7042; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7043; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
7044; GFX10-NEXT:    v_or_b32_e32 v0, s0, v6
7045; GFX10-NEXT:    s_cselect_b64 s[2:3], s[8:9], 0
7046; GFX10-NEXT:    v_or_b32_e32 v1, s1, v7
7047; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
7048; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
7049; GFX10-NEXT:    ; return to shader part epilog
7050;
7051; GFX11-LABEL: v_fshl_i128_vss:
7052; GFX11:       ; %bb.0:
7053; GFX11-NEXT:    s_and_b32 s5, s4, 0x7f
7054; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7055; GFX11-NEXT:    s_sub_i32 s6, s5, 64
7056; GFX11-NEXT:    s_sub_i32 s7, 64, s5
7057; GFX11-NEXT:    s_cmp_lt_u32 s5, 64
7058; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s7, v[0:1]
7059; GFX11-NEXT:    s_cselect_b32 s8, 1, 0
7060; GFX11-NEXT:    s_cmp_eq_u32 s5, 0
7061; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
7062; GFX11-NEXT:    s_cselect_b32 s9, 1, 0
7063; GFX11-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
7064; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s6, v[0:1]
7065; GFX11-NEXT:    s_mov_b32 s6, 0
7066; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7067; GFX11-NEXT:    s_lshl_b32 s7, s2, 31
7068; GFX11-NEXT:    s_and_b32 s5, 1, s8
7069; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[6:7]
7070; GFX11-NEXT:    s_and_not1_b32 s6, 0x7f, s4
7071; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7072; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
7073; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
7074; GFX11-NEXT:    s_and_b32 s5, 1, s9
7075; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 1
7076; GFX11-NEXT:    s_not_b32 s8, s4
7077; GFX11-NEXT:    s_sub_i32 s10, s6, 64
7078; GFX11-NEXT:    s_sub_i32 s7, 64, s6
7079; GFX11-NEXT:    s_cmp_lt_u32 s6, 64
7080; GFX11-NEXT:    v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7081; GFX11-NEXT:    s_cselect_b32 s11, 1, 0
7082; GFX11-NEXT:    s_cmp_eq_u32 s6, 0
7083; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7084; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7085; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
7086; GFX11-NEXT:    s_lshr_b64 s[4:5], s[0:1], s8
7087; GFX11-NEXT:    s_lshl_b64 s[6:7], s[2:3], s7
7088; GFX11-NEXT:    s_lshr_b64 s[8:9], s[2:3], s8
7089; GFX11-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
7090; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7091; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7092; GFX11-NEXT:    v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7093; GFX11-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
7094; GFX11-NEXT:    s_cmp_lg_u32 s12, 0
7095; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7096; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7097; GFX11-NEXT:    v_or_b32_e32 v0, s0, v6
7098; GFX11-NEXT:    s_cselect_b64 s[2:3], s[8:9], 0
7099; GFX11-NEXT:    v_or_b32_e32 v1, s1, v7
7100; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
7101; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
7102; GFX11-NEXT:    ; return to shader part epilog
7103  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
7104  %cast.result = bitcast i128 %result to <4 x float>
7105  ret <4 x float> %cast.result
7106}
7107
7108define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7109; GFX6-LABEL: s_fshl_i128_65:
7110; GFX6:       ; %bb.0:
7111; GFX6-NEXT:    s_lshl_b64 s[2:3], s[0:1], 1
7112; GFX6-NEXT:    s_lshr_b32 s4, s5, 31
7113; GFX6-NEXT:    s_mov_b32 s5, 0
7114; GFX6-NEXT:    s_lshl_b64 s[0:1], s[6:7], 1
7115; GFX6-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7116; GFX6-NEXT:    s_lshr_b32 s4, s7, 31
7117; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
7118; GFX6-NEXT:    ; return to shader part epilog
7119;
7120; GFX8-LABEL: s_fshl_i128_65:
7121; GFX8:       ; %bb.0:
7122; GFX8-NEXT:    s_lshl_b64 s[2:3], s[0:1], 1
7123; GFX8-NEXT:    s_lshr_b32 s4, s5, 31
7124; GFX8-NEXT:    s_mov_b32 s5, 0
7125; GFX8-NEXT:    s_lshl_b64 s[0:1], s[6:7], 1
7126; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7127; GFX8-NEXT:    s_lshr_b32 s4, s7, 31
7128; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
7129; GFX8-NEXT:    ; return to shader part epilog
7130;
7131; GFX9-LABEL: s_fshl_i128_65:
7132; GFX9:       ; %bb.0:
7133; GFX9-NEXT:    s_lshl_b64 s[2:3], s[0:1], 1
7134; GFX9-NEXT:    s_lshr_b32 s4, s5, 31
7135; GFX9-NEXT:    s_mov_b32 s5, 0
7136; GFX9-NEXT:    s_lshl_b64 s[0:1], s[6:7], 1
7137; GFX9-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7138; GFX9-NEXT:    s_lshr_b32 s4, s7, 31
7139; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
7140; GFX9-NEXT:    ; return to shader part epilog
7141;
7142; GFX10-LABEL: s_fshl_i128_65:
7143; GFX10:       ; %bb.0:
7144; GFX10-NEXT:    s_lshr_b32 s2, s5, 31
7145; GFX10-NEXT:    s_mov_b32 s3, 0
7146; GFX10-NEXT:    s_lshl_b64 s[4:5], s[6:7], 1
7147; GFX10-NEXT:    s_lshl_b64 s[8:9], s[0:1], 1
7148; GFX10-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
7149; GFX10-NEXT:    s_lshr_b32 s2, s7, 31
7150; GFX10-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
7151; GFX10-NEXT:    ; return to shader part epilog
7152;
7153; GFX11-LABEL: s_fshl_i128_65:
7154; GFX11:       ; %bb.0:
7155; GFX11-NEXT:    s_lshr_b32 s2, s5, 31
7156; GFX11-NEXT:    s_mov_b32 s3, 0
7157; GFX11-NEXT:    s_lshl_b64 s[4:5], s[6:7], 1
7158; GFX11-NEXT:    s_lshl_b64 s[8:9], s[0:1], 1
7159; GFX11-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
7160; GFX11-NEXT:    s_lshr_b32 s2, s7, 31
7161; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7162; GFX11-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
7163; GFX11-NEXT:    ; return to shader part epilog
7164  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
7165  ret i128 %result
7166}
7167
7168define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
7169; GFX6-LABEL: v_fshl_i128_65:
7170; GFX6:       ; %bb.0:
7171; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7172; GFX6-NEXT:    v_lshl_b64 v[2:3], v[0:1], 1
7173; GFX6-NEXT:    v_lshl_b64 v[0:1], v[6:7], 1
7174; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7175; GFX6-NEXT:    v_or_b32_e32 v0, v4, v0
7176; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v7
7177; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
7178; GFX6-NEXT:    s_setpc_b64 s[30:31]
7179;
7180; GFX8-LABEL: v_fshl_i128_65:
7181; GFX8:       ; %bb.0:
7182; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7183; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
7184; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[6:7]
7185; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7186; GFX8-NEXT:    v_or_b32_e32 v0, v4, v0
7187; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v7
7188; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
7189; GFX8-NEXT:    s_setpc_b64 s[30:31]
7190;
7191; GFX9-LABEL: v_fshl_i128_65:
7192; GFX9:       ; %bb.0:
7193; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7194; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
7195; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[6:7]
7196; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7197; GFX9-NEXT:    v_or_b32_e32 v0, v4, v0
7198; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v7
7199; GFX9-NEXT:    v_or_b32_e32 v2, v2, v4
7200; GFX9-NEXT:    s_setpc_b64 s[30:31]
7201;
7202; GFX10-LABEL: v_fshl_i128_65:
7203; GFX10:       ; %bb.0:
7204; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7205; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
7206; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[6:7]
7207; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7208; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 31, v7
7209; GFX10-NEXT:    v_or_b32_e32 v0, v4, v0
7210; GFX10-NEXT:    v_or_b32_e32 v2, v2, v5
7211; GFX10-NEXT:    s_setpc_b64 s[30:31]
7212;
7213; GFX11-LABEL: v_fshl_i128_65:
7214; GFX11:       ; %bb.0:
7215; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7216; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
7217; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[6:7]
7218; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7219; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 31, v7
7220; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7221; GFX11-NEXT:    v_or_b32_e32 v0, v4, v0
7222; GFX11-NEXT:    v_or_b32_e32 v2, v2, v5
7223; GFX11-NEXT:    s_setpc_b64 s[30:31]
7224  %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
7225  ret i128 %result
7226}
7227
7228define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7229; GFX6-LABEL: s_fshl_v2i128:
7230; GFX6:       ; %bb.0:
7231; GFX6-NEXT:    s_and_b32 s17, s16, 0x7f
7232; GFX6-NEXT:    s_sub_i32 s19, s17, 64
7233; GFX6-NEXT:    s_sub_i32 s21, 64, s17
7234; GFX6-NEXT:    s_cmp_lt_u32 s17, 64
7235; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
7236; GFX6-NEXT:    s_cmp_eq_u32 s17, 0
7237; GFX6-NEXT:    s_cselect_b32 s17, 1, 0
7238; GFX6-NEXT:    s_lshr_b64 s[24:25], s[0:1], s21
7239; GFX6-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
7240; GFX6-NEXT:    s_lshl_b64 s[22:23], s[0:1], s16
7241; GFX6-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7242; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s19
7243; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
7244; GFX6-NEXT:    s_cselect_b64 s[22:23], s[22:23], 0
7245; GFX6-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
7246; GFX6-NEXT:    s_cmp_lg_u32 s17, 0
7247; GFX6-NEXT:    s_mov_b32 s18, 0
7248; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7249; GFX6-NEXT:    s_lshr_b64 s[0:1], s[8:9], 1
7250; GFX6-NEXT:    s_lshl_b32 s19, s10, 31
7251; GFX6-NEXT:    s_lshr_b64 s[8:9], s[10:11], 1
7252; GFX6-NEXT:    s_andn2_b32 s10, 0x7f, s16
7253; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[18:19]
7254; GFX6-NEXT:    s_not_b32 s17, s16
7255; GFX6-NEXT:    s_sub_i32 s19, s10, 64
7256; GFX6-NEXT:    s_sub_i32 s21, 64, s10
7257; GFX6-NEXT:    s_cmp_lt_u32 s10, 64
7258; GFX6-NEXT:    s_cselect_b32 s26, 1, 0
7259; GFX6-NEXT:    s_cmp_eq_u32 s10, 0
7260; GFX6-NEXT:    s_cselect_b32 s27, 1, 0
7261; GFX6-NEXT:    s_lshr_b64 s[10:11], s[8:9], s17
7262; GFX6-NEXT:    s_lshr_b64 s[16:17], s[0:1], s17
7263; GFX6-NEXT:    s_lshl_b64 s[24:25], s[8:9], s21
7264; GFX6-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7265; GFX6-NEXT:    s_lshr_b64 s[8:9], s[8:9], s19
7266; GFX6-NEXT:    s_cmp_lg_u32 s26, 0
7267; GFX6-NEXT:    s_cselect_b64 s[8:9], s[16:17], s[8:9]
7268; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
7269; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
7270; GFX6-NEXT:    s_cmp_lg_u32 s26, 0
7271; GFX6-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
7272; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7273; GFX6-NEXT:    s_and_b32 s8, s20, 0x7f
7274; GFX6-NEXT:    s_or_b64 s[0:1], s[22:23], s[0:1]
7275; GFX6-NEXT:    s_sub_i32 s19, s8, 64
7276; GFX6-NEXT:    s_sub_i32 s10, 64, s8
7277; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
7278; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
7279; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
7280; GFX6-NEXT:    s_cselect_b32 s22, 1, 0
7281; GFX6-NEXT:    s_lshr_b64 s[10:11], s[4:5], s10
7282; GFX6-NEXT:    s_lshl_b64 s[16:17], s[6:7], s20
7283; GFX6-NEXT:    s_lshl_b64 s[8:9], s[4:5], s20
7284; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7285; GFX6-NEXT:    s_lshl_b64 s[4:5], s[4:5], s19
7286; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
7287; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
7288; GFX6-NEXT:    s_cselect_b64 s[4:5], s[10:11], s[4:5]
7289; GFX6-NEXT:    s_cmp_lg_u32 s22, 0
7290; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7291; GFX6-NEXT:    s_lshr_b64 s[4:5], s[12:13], 1
7292; GFX6-NEXT:    s_lshl_b32 s19, s14, 31
7293; GFX6-NEXT:    s_andn2_b32 s12, 0x7f, s20
7294; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[18:19]
7295; GFX6-NEXT:    s_lshr_b64 s[10:11], s[14:15], 1
7296; GFX6-NEXT:    s_not_b32 s14, s20
7297; GFX6-NEXT:    s_sub_i32 s18, s12, 64
7298; GFX6-NEXT:    s_sub_i32 s16, 64, s12
7299; GFX6-NEXT:    s_cmp_lt_u32 s12, 64
7300; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
7301; GFX6-NEXT:    s_cmp_eq_u32 s12, 0
7302; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
7303; GFX6-NEXT:    s_lshr_b64 s[12:13], s[10:11], s14
7304; GFX6-NEXT:    s_lshr_b64 s[14:15], s[4:5], s14
7305; GFX6-NEXT:    s_lshl_b64 s[16:17], s[10:11], s16
7306; GFX6-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
7307; GFX6-NEXT:    s_lshr_b64 s[10:11], s[10:11], s18
7308; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7309; GFX6-NEXT:    s_cselect_b64 s[10:11], s[14:15], s[10:11]
7310; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
7311; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[10:11]
7312; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7313; GFX6-NEXT:    s_cselect_b64 s[10:11], s[12:13], 0
7314; GFX6-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
7315; GFX6-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
7316; GFX6-NEXT:    ; return to shader part epilog
7317;
7318; GFX8-LABEL: s_fshl_v2i128:
7319; GFX8:       ; %bb.0:
7320; GFX8-NEXT:    s_and_b32 s17, s16, 0x7f
7321; GFX8-NEXT:    s_sub_i32 s19, s17, 64
7322; GFX8-NEXT:    s_sub_i32 s21, 64, s17
7323; GFX8-NEXT:    s_cmp_lt_u32 s17, 64
7324; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
7325; GFX8-NEXT:    s_cmp_eq_u32 s17, 0
7326; GFX8-NEXT:    s_cselect_b32 s17, 1, 0
7327; GFX8-NEXT:    s_lshr_b64 s[24:25], s[0:1], s21
7328; GFX8-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
7329; GFX8-NEXT:    s_lshl_b64 s[22:23], s[0:1], s16
7330; GFX8-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7331; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s19
7332; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
7333; GFX8-NEXT:    s_cselect_b64 s[22:23], s[22:23], 0
7334; GFX8-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
7335; GFX8-NEXT:    s_cmp_lg_u32 s17, 0
7336; GFX8-NEXT:    s_mov_b32 s18, 0
7337; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7338; GFX8-NEXT:    s_lshr_b64 s[0:1], s[8:9], 1
7339; GFX8-NEXT:    s_lshl_b32 s19, s10, 31
7340; GFX8-NEXT:    s_lshr_b64 s[8:9], s[10:11], 1
7341; GFX8-NEXT:    s_andn2_b32 s10, 0x7f, s16
7342; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[18:19]
7343; GFX8-NEXT:    s_not_b32 s17, s16
7344; GFX8-NEXT:    s_sub_i32 s19, s10, 64
7345; GFX8-NEXT:    s_sub_i32 s21, 64, s10
7346; GFX8-NEXT:    s_cmp_lt_u32 s10, 64
7347; GFX8-NEXT:    s_cselect_b32 s26, 1, 0
7348; GFX8-NEXT:    s_cmp_eq_u32 s10, 0
7349; GFX8-NEXT:    s_cselect_b32 s27, 1, 0
7350; GFX8-NEXT:    s_lshr_b64 s[10:11], s[8:9], s17
7351; GFX8-NEXT:    s_lshr_b64 s[16:17], s[0:1], s17
7352; GFX8-NEXT:    s_lshl_b64 s[24:25], s[8:9], s21
7353; GFX8-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7354; GFX8-NEXT:    s_lshr_b64 s[8:9], s[8:9], s19
7355; GFX8-NEXT:    s_cmp_lg_u32 s26, 0
7356; GFX8-NEXT:    s_cselect_b64 s[8:9], s[16:17], s[8:9]
7357; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
7358; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
7359; GFX8-NEXT:    s_cmp_lg_u32 s26, 0
7360; GFX8-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
7361; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7362; GFX8-NEXT:    s_and_b32 s8, s20, 0x7f
7363; GFX8-NEXT:    s_or_b64 s[0:1], s[22:23], s[0:1]
7364; GFX8-NEXT:    s_sub_i32 s19, s8, 64
7365; GFX8-NEXT:    s_sub_i32 s10, 64, s8
7366; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
7367; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
7368; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
7369; GFX8-NEXT:    s_cselect_b32 s22, 1, 0
7370; GFX8-NEXT:    s_lshr_b64 s[10:11], s[4:5], s10
7371; GFX8-NEXT:    s_lshl_b64 s[16:17], s[6:7], s20
7372; GFX8-NEXT:    s_lshl_b64 s[8:9], s[4:5], s20
7373; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7374; GFX8-NEXT:    s_lshl_b64 s[4:5], s[4:5], s19
7375; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
7376; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
7377; GFX8-NEXT:    s_cselect_b64 s[4:5], s[10:11], s[4:5]
7378; GFX8-NEXT:    s_cmp_lg_u32 s22, 0
7379; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7380; GFX8-NEXT:    s_lshr_b64 s[4:5], s[12:13], 1
7381; GFX8-NEXT:    s_lshl_b32 s19, s14, 31
7382; GFX8-NEXT:    s_andn2_b32 s12, 0x7f, s20
7383; GFX8-NEXT:    s_or_b64 s[4:5], s[4:5], s[18:19]
7384; GFX8-NEXT:    s_lshr_b64 s[10:11], s[14:15], 1
7385; GFX8-NEXT:    s_not_b32 s14, s20
7386; GFX8-NEXT:    s_sub_i32 s18, s12, 64
7387; GFX8-NEXT:    s_sub_i32 s16, 64, s12
7388; GFX8-NEXT:    s_cmp_lt_u32 s12, 64
7389; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
7390; GFX8-NEXT:    s_cmp_eq_u32 s12, 0
7391; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
7392; GFX8-NEXT:    s_lshr_b64 s[12:13], s[10:11], s14
7393; GFX8-NEXT:    s_lshr_b64 s[14:15], s[4:5], s14
7394; GFX8-NEXT:    s_lshl_b64 s[16:17], s[10:11], s16
7395; GFX8-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
7396; GFX8-NEXT:    s_lshr_b64 s[10:11], s[10:11], s18
7397; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7398; GFX8-NEXT:    s_cselect_b64 s[10:11], s[14:15], s[10:11]
7399; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
7400; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[10:11]
7401; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7402; GFX8-NEXT:    s_cselect_b64 s[10:11], s[12:13], 0
7403; GFX8-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
7404; GFX8-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
7405; GFX8-NEXT:    ; return to shader part epilog
7406;
7407; GFX9-LABEL: s_fshl_v2i128:
7408; GFX9:       ; %bb.0:
7409; GFX9-NEXT:    s_and_b32 s17, s16, 0x7f
7410; GFX9-NEXT:    s_sub_i32 s19, s17, 64
7411; GFX9-NEXT:    s_sub_i32 s21, 64, s17
7412; GFX9-NEXT:    s_cmp_lt_u32 s17, 64
7413; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
7414; GFX9-NEXT:    s_cmp_eq_u32 s17, 0
7415; GFX9-NEXT:    s_cselect_b32 s17, 1, 0
7416; GFX9-NEXT:    s_lshr_b64 s[24:25], s[0:1], s21
7417; GFX9-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
7418; GFX9-NEXT:    s_lshl_b64 s[22:23], s[0:1], s16
7419; GFX9-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7420; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s19
7421; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
7422; GFX9-NEXT:    s_cselect_b64 s[22:23], s[22:23], 0
7423; GFX9-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
7424; GFX9-NEXT:    s_cmp_lg_u32 s17, 0
7425; GFX9-NEXT:    s_mov_b32 s18, 0
7426; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7427; GFX9-NEXT:    s_lshr_b64 s[0:1], s[8:9], 1
7428; GFX9-NEXT:    s_lshl_b32 s19, s10, 31
7429; GFX9-NEXT:    s_lshr_b64 s[8:9], s[10:11], 1
7430; GFX9-NEXT:    s_andn2_b32 s10, 0x7f, s16
7431; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[18:19]
7432; GFX9-NEXT:    s_not_b32 s17, s16
7433; GFX9-NEXT:    s_sub_i32 s19, s10, 64
7434; GFX9-NEXT:    s_sub_i32 s21, 64, s10
7435; GFX9-NEXT:    s_cmp_lt_u32 s10, 64
7436; GFX9-NEXT:    s_cselect_b32 s26, 1, 0
7437; GFX9-NEXT:    s_cmp_eq_u32 s10, 0
7438; GFX9-NEXT:    s_cselect_b32 s27, 1, 0
7439; GFX9-NEXT:    s_lshr_b64 s[10:11], s[8:9], s17
7440; GFX9-NEXT:    s_lshr_b64 s[16:17], s[0:1], s17
7441; GFX9-NEXT:    s_lshl_b64 s[24:25], s[8:9], s21
7442; GFX9-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7443; GFX9-NEXT:    s_lshr_b64 s[8:9], s[8:9], s19
7444; GFX9-NEXT:    s_cmp_lg_u32 s26, 0
7445; GFX9-NEXT:    s_cselect_b64 s[8:9], s[16:17], s[8:9]
7446; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
7447; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
7448; GFX9-NEXT:    s_cmp_lg_u32 s26, 0
7449; GFX9-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
7450; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7451; GFX9-NEXT:    s_and_b32 s8, s20, 0x7f
7452; GFX9-NEXT:    s_or_b64 s[0:1], s[22:23], s[0:1]
7453; GFX9-NEXT:    s_sub_i32 s19, s8, 64
7454; GFX9-NEXT:    s_sub_i32 s10, 64, s8
7455; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
7456; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
7457; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
7458; GFX9-NEXT:    s_cselect_b32 s22, 1, 0
7459; GFX9-NEXT:    s_lshr_b64 s[10:11], s[4:5], s10
7460; GFX9-NEXT:    s_lshl_b64 s[16:17], s[6:7], s20
7461; GFX9-NEXT:    s_lshl_b64 s[8:9], s[4:5], s20
7462; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7463; GFX9-NEXT:    s_lshl_b64 s[4:5], s[4:5], s19
7464; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
7465; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
7466; GFX9-NEXT:    s_cselect_b64 s[4:5], s[10:11], s[4:5]
7467; GFX9-NEXT:    s_cmp_lg_u32 s22, 0
7468; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7469; GFX9-NEXT:    s_lshr_b64 s[4:5], s[12:13], 1
7470; GFX9-NEXT:    s_lshl_b32 s19, s14, 31
7471; GFX9-NEXT:    s_andn2_b32 s12, 0x7f, s20
7472; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], s[18:19]
7473; GFX9-NEXT:    s_lshr_b64 s[10:11], s[14:15], 1
7474; GFX9-NEXT:    s_not_b32 s14, s20
7475; GFX9-NEXT:    s_sub_i32 s18, s12, 64
7476; GFX9-NEXT:    s_sub_i32 s16, 64, s12
7477; GFX9-NEXT:    s_cmp_lt_u32 s12, 64
7478; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
7479; GFX9-NEXT:    s_cmp_eq_u32 s12, 0
7480; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
7481; GFX9-NEXT:    s_lshr_b64 s[12:13], s[10:11], s14
7482; GFX9-NEXT:    s_lshr_b64 s[14:15], s[4:5], s14
7483; GFX9-NEXT:    s_lshl_b64 s[16:17], s[10:11], s16
7484; GFX9-NEXT:    s_or_b64 s[14:15], s[14:15], s[16:17]
7485; GFX9-NEXT:    s_lshr_b64 s[10:11], s[10:11], s18
7486; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7487; GFX9-NEXT:    s_cselect_b64 s[10:11], s[14:15], s[10:11]
7488; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
7489; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[10:11]
7490; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7491; GFX9-NEXT:    s_cselect_b64 s[10:11], s[12:13], 0
7492; GFX9-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
7493; GFX9-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
7494; GFX9-NEXT:    ; return to shader part epilog
7495;
7496; GFX10-LABEL: s_fshl_v2i128:
7497; GFX10:       ; %bb.0:
7498; GFX10-NEXT:    s_and_b32 s17, s16, 0x7f
7499; GFX10-NEXT:    s_mov_b32 s18, 0
7500; GFX10-NEXT:    s_sub_i32 s19, s17, 64
7501; GFX10-NEXT:    s_sub_i32 s21, 64, s17
7502; GFX10-NEXT:    s_cmp_lt_u32 s17, 64
7503; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
7504; GFX10-NEXT:    s_cmp_eq_u32 s17, 0
7505; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
7506; GFX10-NEXT:    s_lshr_b64 s[22:23], s[0:1], s21
7507; GFX10-NEXT:    s_lshl_b64 s[24:25], s[2:3], s16
7508; GFX10-NEXT:    s_lshl_b64 s[26:27], s[0:1], s16
7509; GFX10-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7510; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s19
7511; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
7512; GFX10-NEXT:    s_cselect_b64 s[24:25], s[26:27], 0
7513; GFX10-NEXT:    s_cselect_b64 s[0:1], s[22:23], s[0:1]
7514; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
7515; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7516; GFX10-NEXT:    s_lshr_b64 s[0:1], s[8:9], 1
7517; GFX10-NEXT:    s_lshl_b32 s19, s10, 31
7518; GFX10-NEXT:    s_lshr_b64 s[8:9], s[10:11], 1
7519; GFX10-NEXT:    s_andn2_b32 s10, 0x7f, s16
7520; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[18:19]
7521; GFX10-NEXT:    s_not_b32 s19, s16
7522; GFX10-NEXT:    s_sub_i32 s21, s10, 64
7523; GFX10-NEXT:    s_sub_i32 s16, 64, s10
7524; GFX10-NEXT:    s_cmp_lt_u32 s10, 64
7525; GFX10-NEXT:    s_cselect_b32 s26, 1, 0
7526; GFX10-NEXT:    s_cmp_eq_u32 s10, 0
7527; GFX10-NEXT:    s_cselect_b32 s27, 1, 0
7528; GFX10-NEXT:    s_lshr_b64 s[10:11], s[0:1], s19
7529; GFX10-NEXT:    s_lshl_b64 s[16:17], s[8:9], s16
7530; GFX10-NEXT:    s_lshr_b64 s[22:23], s[8:9], s19
7531; GFX10-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7532; GFX10-NEXT:    s_lshr_b64 s[8:9], s[8:9], s21
7533; GFX10-NEXT:    s_cmp_lg_u32 s26, 0
7534; GFX10-NEXT:    s_cselect_b64 s[8:9], s[10:11], s[8:9]
7535; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
7536; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
7537; GFX10-NEXT:    s_cmp_lg_u32 s26, 0
7538; GFX10-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
7539; GFX10-NEXT:    s_and_b32 s10, s20, 0x7f
7540; GFX10-NEXT:    s_or_b64 s[0:1], s[24:25], s[0:1]
7541; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7542; GFX10-NEXT:    s_sub_i32 s19, s10, 64
7543; GFX10-NEXT:    s_sub_i32 s8, 64, s10
7544; GFX10-NEXT:    s_cmp_lt_u32 s10, 64
7545; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
7546; GFX10-NEXT:    s_cmp_eq_u32 s10, 0
7547; GFX10-NEXT:    s_cselect_b32 s22, 1, 0
7548; GFX10-NEXT:    s_lshr_b64 s[8:9], s[4:5], s8
7549; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s20
7550; GFX10-NEXT:    s_lshl_b64 s[16:17], s[4:5], s20
7551; GFX10-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
7552; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], s19
7553; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7554; GFX10-NEXT:    s_cselect_b64 s[10:11], s[16:17], 0
7555; GFX10-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
7556; GFX10-NEXT:    s_cmp_lg_u32 s22, 0
7557; GFX10-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7558; GFX10-NEXT:    s_lshr_b64 s[4:5], s[12:13], 1
7559; GFX10-NEXT:    s_lshl_b32 s19, s14, 31
7560; GFX10-NEXT:    s_andn2_b32 s12, 0x7f, s20
7561; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[18:19]
7562; GFX10-NEXT:    s_lshr_b64 s[8:9], s[14:15], 1
7563; GFX10-NEXT:    s_not_b32 s16, s20
7564; GFX10-NEXT:    s_sub_i32 s18, s12, 64
7565; GFX10-NEXT:    s_sub_i32 s14, 64, s12
7566; GFX10-NEXT:    s_cmp_lt_u32 s12, 64
7567; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
7568; GFX10-NEXT:    s_cmp_eq_u32 s12, 0
7569; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
7570; GFX10-NEXT:    s_lshr_b64 s[12:13], s[4:5], s16
7571; GFX10-NEXT:    s_lshl_b64 s[14:15], s[8:9], s14
7572; GFX10-NEXT:    s_lshr_b64 s[16:17], s[8:9], s16
7573; GFX10-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
7574; GFX10-NEXT:    s_lshr_b64 s[8:9], s[8:9], s18
7575; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7576; GFX10-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
7577; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
7578; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[8:9]
7579; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7580; GFX10-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7581; GFX10-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7582; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7583; GFX10-NEXT:    ; return to shader part epilog
7584;
7585; GFX11-LABEL: s_fshl_v2i128:
7586; GFX11:       ; %bb.0:
7587; GFX11-NEXT:    s_and_b32 s17, s16, 0x7f
7588; GFX11-NEXT:    s_mov_b32 s18, 0
7589; GFX11-NEXT:    s_sub_i32 s19, s17, 64
7590; GFX11-NEXT:    s_sub_i32 s21, 64, s17
7591; GFX11-NEXT:    s_cmp_lt_u32 s17, 64
7592; GFX11-NEXT:    s_cselect_b32 s28, 1, 0
7593; GFX11-NEXT:    s_cmp_eq_u32 s17, 0
7594; GFX11-NEXT:    s_cselect_b32 s17, 1, 0
7595; GFX11-NEXT:    s_lshr_b64 s[22:23], s[0:1], s21
7596; GFX11-NEXT:    s_lshl_b64 s[24:25], s[2:3], s16
7597; GFX11-NEXT:    s_lshl_b64 s[26:27], s[0:1], s16
7598; GFX11-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7599; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s19
7600; GFX11-NEXT:    s_cmp_lg_u32 s28, 0
7601; GFX11-NEXT:    s_cselect_b64 s[24:25], s[26:27], 0
7602; GFX11-NEXT:    s_cselect_b64 s[0:1], s[22:23], s[0:1]
7603; GFX11-NEXT:    s_cmp_lg_u32 s17, 0
7604; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7605; GFX11-NEXT:    s_lshr_b64 s[0:1], s[8:9], 1
7606; GFX11-NEXT:    s_lshl_b32 s19, s10, 31
7607; GFX11-NEXT:    s_lshr_b64 s[8:9], s[10:11], 1
7608; GFX11-NEXT:    s_and_not1_b32 s10, 0x7f, s16
7609; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[18:19]
7610; GFX11-NEXT:    s_not_b32 s19, s16
7611; GFX11-NEXT:    s_sub_i32 s21, s10, 64
7612; GFX11-NEXT:    s_sub_i32 s16, 64, s10
7613; GFX11-NEXT:    s_cmp_lt_u32 s10, 64
7614; GFX11-NEXT:    s_cselect_b32 s26, 1, 0
7615; GFX11-NEXT:    s_cmp_eq_u32 s10, 0
7616; GFX11-NEXT:    s_cselect_b32 s27, 1, 0
7617; GFX11-NEXT:    s_lshr_b64 s[10:11], s[0:1], s19
7618; GFX11-NEXT:    s_lshl_b64 s[16:17], s[8:9], s16
7619; GFX11-NEXT:    s_lshr_b64 s[22:23], s[8:9], s19
7620; GFX11-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7621; GFX11-NEXT:    s_lshr_b64 s[8:9], s[8:9], s21
7622; GFX11-NEXT:    s_cmp_lg_u32 s26, 0
7623; GFX11-NEXT:    s_cselect_b64 s[8:9], s[10:11], s[8:9]
7624; GFX11-NEXT:    s_cmp_lg_u32 s27, 0
7625; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[8:9]
7626; GFX11-NEXT:    s_cmp_lg_u32 s26, 0
7627; GFX11-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
7628; GFX11-NEXT:    s_and_b32 s10, s20, 0x7f
7629; GFX11-NEXT:    s_or_b64 s[0:1], s[24:25], s[0:1]
7630; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7631; GFX11-NEXT:    s_sub_i32 s19, s10, 64
7632; GFX11-NEXT:    s_sub_i32 s8, 64, s10
7633; GFX11-NEXT:    s_cmp_lt_u32 s10, 64
7634; GFX11-NEXT:    s_cselect_b32 s21, 1, 0
7635; GFX11-NEXT:    s_cmp_eq_u32 s10, 0
7636; GFX11-NEXT:    s_cselect_b32 s22, 1, 0
7637; GFX11-NEXT:    s_lshr_b64 s[8:9], s[4:5], s8
7638; GFX11-NEXT:    s_lshl_b64 s[10:11], s[6:7], s20
7639; GFX11-NEXT:    s_lshl_b64 s[16:17], s[4:5], s20
7640; GFX11-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
7641; GFX11-NEXT:    s_lshl_b64 s[4:5], s[4:5], s19
7642; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7643; GFX11-NEXT:    s_cselect_b64 s[10:11], s[16:17], 0
7644; GFX11-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
7645; GFX11-NEXT:    s_cmp_lg_u32 s22, 0
7646; GFX11-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7647; GFX11-NEXT:    s_lshr_b64 s[4:5], s[12:13], 1
7648; GFX11-NEXT:    s_lshl_b32 s19, s14, 31
7649; GFX11-NEXT:    s_and_not1_b32 s12, 0x7f, s20
7650; GFX11-NEXT:    s_or_b64 s[4:5], s[4:5], s[18:19]
7651; GFX11-NEXT:    s_lshr_b64 s[8:9], s[14:15], 1
7652; GFX11-NEXT:    s_not_b32 s16, s20
7653; GFX11-NEXT:    s_sub_i32 s18, s12, 64
7654; GFX11-NEXT:    s_sub_i32 s14, 64, s12
7655; GFX11-NEXT:    s_cmp_lt_u32 s12, 64
7656; GFX11-NEXT:    s_cselect_b32 s19, 1, 0
7657; GFX11-NEXT:    s_cmp_eq_u32 s12, 0
7658; GFX11-NEXT:    s_cselect_b32 s20, 1, 0
7659; GFX11-NEXT:    s_lshr_b64 s[12:13], s[4:5], s16
7660; GFX11-NEXT:    s_lshl_b64 s[14:15], s[8:9], s14
7661; GFX11-NEXT:    s_lshr_b64 s[16:17], s[8:9], s16
7662; GFX11-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
7663; GFX11-NEXT:    s_lshr_b64 s[8:9], s[8:9], s18
7664; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7665; GFX11-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
7666; GFX11-NEXT:    s_cmp_lg_u32 s20, 0
7667; GFX11-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[8:9]
7668; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7669; GFX11-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7670; GFX11-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7671; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7672; GFX11-NEXT:    ; return to shader part epilog
7673  %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7674  ret <2 x i128> %result
7675}
7676
7677define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7678; GFX6-LABEL: v_fshl_v2i128:
7679; GFX6:       ; %bb.0:
7680; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7681; GFX6-NEXT:    v_and_b32_e32 v23, 0x7f, v16
7682; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 64, v23
7683; GFX6-NEXT:    v_lshr_b64 v[17:18], v[0:1], v17
7684; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v23
7685; GFX6-NEXT:    v_lshr_b64 v[8:9], v[8:9], 1
7686; GFX6-NEXT:    v_not_b32_e32 v16, v16
7687; GFX6-NEXT:    v_or_b32_e32 v21, v17, v21
7688; GFX6-NEXT:    v_lshlrev_b32_e32 v17, 31, v10
7689; GFX6-NEXT:    v_lshr_b64 v[10:11], v[10:11], 1
7690; GFX6-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7691; GFX6-NEXT:    v_or_b32_e32 v9, v9, v17
7692; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 64, v24
7693; GFX6-NEXT:    v_or_b32_e32 v22, v18, v22
7694; GFX6-NEXT:    v_lshl_b64 v[16:17], v[10:11], v16
7695; GFX6-NEXT:    v_lshr_b64 v[18:19], v[8:9], v24
7696; GFX6-NEXT:    v_not_b32_e32 v25, 63
7697; GFX6-NEXT:    v_or_b32_e32 v18, v18, v16
7698; GFX6-NEXT:    v_add_i32_e32 v16, vcc, v23, v25
7699; GFX6-NEXT:    v_or_b32_e32 v19, v19, v17
7700; GFX6-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
7701; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v23
7702; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7703; GFX6-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
7704; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7705; GFX6-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7706; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7707; GFX6-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7708; GFX6-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7709; GFX6-NEXT:    v_add_i32_e64 v0, s[4:5], v24, v25
7710; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], v0
7711; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7712; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7713; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7714; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
7715; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7716; GFX6-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7717; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7718; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7719; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7720; GFX6-NEXT:    v_or_b32_e32 v0, v26, v2
7721; GFX6-NEXT:    v_or_b32_e32 v2, v17, v8
7722; GFX6-NEXT:    v_and_b32_e32 v17, 0x7f, v20
7723; GFX6-NEXT:    v_cndmask_b32_e64 v19, 0, v1, s[4:5]
7724; GFX6-NEXT:    v_or_b32_e32 v1, v18, v3
7725; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 64, v17
7726; GFX6-NEXT:    v_lshr_b64 v[8:9], v[4:5], v3
7727; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v17
7728; GFX6-NEXT:    v_or_b32_e32 v3, v16, v19
7729; GFX6-NEXT:    v_add_i32_e32 v16, vcc, v17, v25
7730; GFX6-NEXT:    v_or_b32_e32 v10, v8, v10
7731; GFX6-NEXT:    v_or_b32_e32 v11, v9, v11
7732; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v17
7733; GFX6-NEXT:    v_lshl_b64 v[4:5], v[4:5], v16
7734; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
7735; GFX6-NEXT:    v_cndmask_b32_e32 v16, 0, v8, vcc
7736; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
7737; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
7738; GFX6-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
7739; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
7740; GFX6-NEXT:    v_cndmask_b32_e32 v17, v4, v6, vcc
7741; GFX6-NEXT:    v_cndmask_b32_e32 v19, v5, v7, vcc
7742; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], 1
7743; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 31, v14
7744; GFX6-NEXT:    v_not_b32_e32 v8, v20
7745; GFX6-NEXT:    v_or_b32_e32 v5, v5, v6
7746; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], 1
7747; GFX6-NEXT:    v_and_b32_e32 v12, 0x7f, v8
7748; GFX6-NEXT:    v_sub_i32_e32 v10, vcc, 64, v12
7749; GFX6-NEXT:    v_lshr_b64 v[8:9], v[4:5], v12
7750; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v10
7751; GFX6-NEXT:    v_add_i32_e32 v13, vcc, v12, v25
7752; GFX6-NEXT:    v_or_b32_e32 v10, v8, v10
7753; GFX6-NEXT:    v_or_b32_e32 v11, v9, v11
7754; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v12
7755; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v13
7756; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
7757; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
7758; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
7759; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
7760; GFX6-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7761; GFX6-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7762; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
7763; GFX6-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
7764; GFX6-NEXT:    v_or_b32_e32 v4, v16, v4
7765; GFX6-NEXT:    v_or_b32_e32 v5, v18, v5
7766; GFX6-NEXT:    v_or_b32_e32 v6, v17, v6
7767; GFX6-NEXT:    v_or_b32_e32 v7, v19, v7
7768; GFX6-NEXT:    s_setpc_b64 s[30:31]
7769;
7770; GFX8-LABEL: v_fshl_v2i128:
7771; GFX8:       ; %bb.0:
7772; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7773; GFX8-NEXT:    v_and_b32_e32 v23, 0x7f, v16
7774; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, 64, v23
7775; GFX8-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
7776; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
7777; GFX8-NEXT:    v_lshrrev_b64 v[8:9], 1, v[8:9]
7778; GFX8-NEXT:    v_not_b32_e32 v16, v16
7779; GFX8-NEXT:    v_or_b32_e32 v21, v17, v21
7780; GFX8-NEXT:    v_lshlrev_b32_e32 v17, 31, v10
7781; GFX8-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
7782; GFX8-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7783; GFX8-NEXT:    v_or_b32_e32 v9, v9, v17
7784; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, 64, v24
7785; GFX8-NEXT:    v_or_b32_e32 v22, v18, v22
7786; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
7787; GFX8-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
7788; GFX8-NEXT:    v_not_b32_e32 v25, 63
7789; GFX8-NEXT:    v_or_b32_e32 v18, v18, v16
7790; GFX8-NEXT:    v_add_u32_e32 v16, vcc, v23, v25
7791; GFX8-NEXT:    v_or_b32_e32 v19, v19, v17
7792; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
7793; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
7794; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7795; GFX8-NEXT:    v_cndmask_b32_e32 v26, 0, v0, vcc
7796; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7797; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7798; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7799; GFX8-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7800; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7801; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v24, v25
7802; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
7803; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7804; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7805; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7806; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
7807; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7808; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7809; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7810; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7811; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7812; GFX8-NEXT:    v_or_b32_e32 v0, v26, v2
7813; GFX8-NEXT:    v_or_b32_e32 v2, v17, v8
7814; GFX8-NEXT:    v_and_b32_e32 v17, 0x7f, v20
7815; GFX8-NEXT:    v_cndmask_b32_e64 v19, 0, v1, s[4:5]
7816; GFX8-NEXT:    v_or_b32_e32 v1, v18, v3
7817; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 64, v17
7818; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v3, v[4:5]
7819; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
7820; GFX8-NEXT:    v_or_b32_e32 v3, v16, v19
7821; GFX8-NEXT:    v_add_u32_e32 v16, vcc, v17, v25
7822; GFX8-NEXT:    v_or_b32_e32 v10, v8, v10
7823; GFX8-NEXT:    v_or_b32_e32 v11, v9, v11
7824; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v17, v[4:5]
7825; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v16, v[4:5]
7826; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
7827; GFX8-NEXT:    v_cndmask_b32_e32 v16, 0, v8, vcc
7828; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
7829; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
7830; GFX8-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
7831; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
7832; GFX8-NEXT:    v_cndmask_b32_e32 v17, v4, v6, vcc
7833; GFX8-NEXT:    v_cndmask_b32_e32 v19, v5, v7, vcc
7834; GFX8-NEXT:    v_lshrrev_b64 v[4:5], 1, v[12:13]
7835; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 31, v14
7836; GFX8-NEXT:    v_not_b32_e32 v8, v20
7837; GFX8-NEXT:    v_or_b32_e32 v5, v5, v6
7838; GFX8-NEXT:    v_lshrrev_b64 v[6:7], 1, v[14:15]
7839; GFX8-NEXT:    v_and_b32_e32 v12, 0x7f, v8
7840; GFX8-NEXT:    v_sub_u32_e32 v10, vcc, 64, v12
7841; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v12, v[4:5]
7842; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v10, v[6:7]
7843; GFX8-NEXT:    v_add_u32_e32 v13, vcc, v12, v25
7844; GFX8-NEXT:    v_or_b32_e32 v10, v8, v10
7845; GFX8-NEXT:    v_or_b32_e32 v11, v9, v11
7846; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v12, v[6:7]
7847; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v13, v[6:7]
7848; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v12
7849; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
7850; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
7851; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v12
7852; GFX8-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7853; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7854; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
7855; GFX8-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
7856; GFX8-NEXT:    v_or_b32_e32 v4, v16, v4
7857; GFX8-NEXT:    v_or_b32_e32 v5, v18, v5
7858; GFX8-NEXT:    v_or_b32_e32 v6, v17, v6
7859; GFX8-NEXT:    v_or_b32_e32 v7, v19, v7
7860; GFX8-NEXT:    s_setpc_b64 s[30:31]
7861;
7862; GFX9-LABEL: v_fshl_v2i128:
7863; GFX9:       ; %bb.0:
7864; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7865; GFX9-NEXT:    v_and_b32_e32 v23, 0x7f, v16
7866; GFX9-NEXT:    v_lshrrev_b64 v[8:9], 1, v[8:9]
7867; GFX9-NEXT:    v_sub_u32_e32 v17, 64, v23
7868; GFX9-NEXT:    v_not_b32_e32 v16, v16
7869; GFX9-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
7870; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
7871; GFX9-NEXT:    v_lshl_or_b32 v9, v10, 31, v9
7872; GFX9-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
7873; GFX9-NEXT:    v_and_b32_e32 v24, 0x7f, v16
7874; GFX9-NEXT:    v_sub_u32_e32 v16, 64, v24
7875; GFX9-NEXT:    v_or_b32_e32 v21, v17, v21
7876; GFX9-NEXT:    v_or_b32_e32 v22, v18, v22
7877; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
7878; GFX9-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
7879; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
7880; GFX9-NEXT:    v_or_b32_e32 v18, v18, v16
7881; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v23
7882; GFX9-NEXT:    v_or_b32_e32 v19, v19, v17
7883; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
7884; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
7885; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
7886; GFX9-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
7887; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
7888; GFX9-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
7889; GFX9-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
7890; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffffc0, v24
7891; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
7892; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
7893; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
7894; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
7895; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
7896; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
7897; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
7898; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
7899; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
7900; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
7901; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
7902; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
7903; GFX9-NEXT:    v_or_b32_e32 v1, v18, v3
7904; GFX9-NEXT:    v_or_b32_e32 v3, v16, v9
7905; GFX9-NEXT:    v_and_b32_e32 v16, 0x7f, v20
7906; GFX9-NEXT:    v_or_b32_e32 v0, v25, v2
7907; GFX9-NEXT:    v_or_b32_e32 v2, v17, v8
7908; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v16
7909; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v8, v[4:5]
7910; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v16, v[6:7]
7911; GFX9-NEXT:    v_add_u32_e32 v17, 0xffffffc0, v16
7912; GFX9-NEXT:    v_or_b32_e32 v10, v8, v10
7913; GFX9-NEXT:    v_or_b32_e32 v11, v9, v11
7914; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v16, v[4:5]
7915; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v17, v[4:5]
7916; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7917; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v8, vcc
7918; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v9, vcc
7919; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
7920; GFX9-NEXT:    v_cndmask_b32_e32 v8, v5, v11, vcc
7921; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
7922; GFX9-NEXT:    v_cndmask_b32_e32 v16, v4, v6, vcc
7923; GFX9-NEXT:    v_lshrrev_b64 v[4:5], 1, v[12:13]
7924; GFX9-NEXT:    v_cndmask_b32_e32 v12, v8, v7, vcc
7925; GFX9-NEXT:    v_not_b32_e32 v8, v20
7926; GFX9-NEXT:    v_lshrrev_b64 v[6:7], 1, v[14:15]
7927; GFX9-NEXT:    v_and_b32_e32 v13, 0x7f, v8
7928; GFX9-NEXT:    v_lshl_or_b32 v5, v14, 31, v5
7929; GFX9-NEXT:    v_sub_u32_e32 v10, 64, v13
7930; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v13, v[4:5]
7931; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v10, v[6:7]
7932; GFX9-NEXT:    v_add_u32_e32 v14, 0xffffffc0, v13
7933; GFX9-NEXT:    v_or_b32_e32 v10, v8, v10
7934; GFX9-NEXT:    v_or_b32_e32 v11, v9, v11
7935; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v13, v[6:7]
7936; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v14, v[6:7]
7937; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v13
7938; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
7939; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
7940; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v13
7941; GFX9-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7942; GFX9-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7943; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc
7944; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc
7945; GFX9-NEXT:    v_or_b32_e32 v4, v17, v4
7946; GFX9-NEXT:    v_or_b32_e32 v5, v18, v5
7947; GFX9-NEXT:    v_or_b32_e32 v6, v16, v6
7948; GFX9-NEXT:    v_or_b32_e32 v7, v12, v7
7949; GFX9-NEXT:    s_setpc_b64 s[30:31]
7950;
7951; GFX10-LABEL: v_fshl_v2i128:
7952; GFX10:       ; %bb.0:
7953; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7954; GFX10-NEXT:    v_and_b32_e32 v27, 0x7f, v16
7955; GFX10-NEXT:    v_not_b32_e32 v21, v16
7956; GFX10-NEXT:    v_lshrrev_b64 v[8:9], 1, v[8:9]
7957; GFX10-NEXT:    v_sub_nc_u32_e32 v17, 64, v27
7958; GFX10-NEXT:    v_and_b32_e32 v28, 0x7f, v21
7959; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v27, v[2:3]
7960; GFX10-NEXT:    v_lshl_or_b32 v9, v10, 31, v9
7961; GFX10-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
7962; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v17, v[0:1]
7963; GFX10-NEXT:    v_add_nc_u32_e32 v29, 0xffffffc0, v27
7964; GFX10-NEXT:    v_sub_nc_u32_e32 v25, 64, v28
7965; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v27, v[0:1]
7966; GFX10-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
7967; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v27
7968; GFX10-NEXT:    v_or_b32_e32 v18, v16, v18
7969; GFX10-NEXT:    v_add_nc_u32_e32 v16, 0xffffffc0, v28
7970; GFX10-NEXT:    v_lshlrev_b64 v[25:26], v25, v[10:11]
7971; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v29, v[0:1]
7972; GFX10-NEXT:    v_or_b32_e32 v19, v17, v19
7973; GFX10-NEXT:    v_cndmask_b32_e32 v21, 0, v21, vcc_lo
7974; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v16, v[10:11]
7975; GFX10-NEXT:    v_cndmask_b32_e32 v22, 0, v22, vcc_lo
7976; GFX10-NEXT:    v_or_b32_e32 v23, v23, v25
7977; GFX10-NEXT:    v_cndmask_b32_e32 v18, v0, v18, vcc_lo
7978; GFX10-NEXT:    v_cndmask_b32_e32 v19, v1, v19, vcc_lo
7979; GFX10-NEXT:    v_or_b32_e32 v24, v24, v26
7980; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v28
7981; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v27
7982; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v28
7983; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v28, v[10:11]
7984; GFX10-NEXT:    v_cndmask_b32_e32 v16, v16, v23, vcc_lo
7985; GFX10-NEXT:    v_cndmask_b32_e32 v10, v17, v24, vcc_lo
7986; GFX10-NEXT:    v_cndmask_b32_e64 v23, v19, v3, s4
7987; GFX10-NEXT:    v_and_b32_e32 v24, 0x7f, v20
7988; GFX10-NEXT:    v_cndmask_b32_e32 v25, 0, v1, vcc_lo
7989; GFX10-NEXT:    v_cndmask_b32_e64 v3, v16, v8, s5
7990; GFX10-NEXT:    v_cndmask_b32_e64 v8, v10, v9, s5
7991; GFX10-NEXT:    v_not_b32_e32 v16, v20
7992; GFX10-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s4
7993; GFX10-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc_lo
7994; GFX10-NEXT:    v_or_b32_e32 v0, v21, v3
7995; GFX10-NEXT:    v_or_b32_e32 v1, v22, v8
7996; GFX10-NEXT:    v_lshrrev_b64 v[8:9], 1, v[12:13]
7997; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 64, v24
7998; GFX10-NEXT:    v_and_b32_e32 v22, 0x7f, v16
7999; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
8000; GFX10-NEXT:    v_lshlrev_b64 v[12:13], v24, v[6:7]
8001; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v24, v[4:5]
8002; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v3, v[4:5]
8003; GFX10-NEXT:    v_lshl_or_b32 v9, v14, 31, v9
8004; GFX10-NEXT:    v_lshrrev_b64 v[14:15], 1, v[14:15]
8005; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v22
8006; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v24
8007; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v24
8008; GFX10-NEXT:    v_or_b32_e32 v12, v10, v12
8009; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v22
8010; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v22, v[8:9]
8011; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
8012; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
8013; GFX10-NEXT:    v_or_b32_e32 v5, v11, v13
8014; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[14:15]
8015; GFX10-NEXT:    v_cndmask_b32_e32 v13, 0, v16, vcc_lo
8016; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v22
8017; GFX10-NEXT:    v_or_b32_e32 v16, v18, v20
8018; GFX10-NEXT:    v_or_b32_e32 v18, v19, v21
8019; GFX10-NEXT:    v_cndmask_b32_e32 v12, v3, v12, vcc_lo
8020; GFX10-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8021; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v22, v[14:15]
8022; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s4
8023; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v22
8024; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v24
8025; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v18, s4
8026; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8027; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, v6, s6
8028; GFX10-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s6
8029; GFX10-NEXT:    v_cndmask_b32_e64 v5, v10, v8, s5
8030; GFX10-NEXT:    v_cndmask_b32_e64 v8, v11, v9, s5
8031; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s4
8032; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s4
8033; GFX10-NEXT:    v_or_b32_e32 v3, v23, v25
8034; GFX10-NEXT:    v_or_b32_e32 v4, v13, v5
8035; GFX10-NEXT:    v_or_b32_e32 v5, v14, v8
8036; GFX10-NEXT:    v_or_b32_e32 v6, v6, v9
8037; GFX10-NEXT:    v_or_b32_e32 v7, v7, v10
8038; GFX10-NEXT:    s_setpc_b64 s[30:31]
8039;
8040; GFX11-LABEL: v_fshl_v2i128:
8041; GFX11:       ; %bb.0:
8042; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8043; GFX11-NEXT:    v_and_b32_e32 v27, 0x7f, v16
8044; GFX11-NEXT:    v_not_b32_e32 v21, v16
8045; GFX11-NEXT:    v_lshrrev_b64 v[8:9], 1, v[8:9]
8046; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8047; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v27
8048; GFX11-NEXT:    v_and_b32_e32 v28, 0x7f, v21
8049; GFX11-NEXT:    v_lshlrev_b64 v[21:22], v27, v[0:1]
8050; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
8051; GFX11-NEXT:    v_lshl_or_b32 v9, v10, 31, v9
8052; GFX11-NEXT:    v_lshrrev_b64 v[10:11], 1, v[10:11]
8053; GFX11-NEXT:    v_cndmask_b32_e32 v22, 0, v22, vcc_lo
8054; GFX11-NEXT:    v_sub_nc_u32_e32 v17, 64, v27
8055; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v27, v[2:3]
8056; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 0, v27
8057; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0, v21, vcc_lo
8058; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
8059; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v17, v[0:1]
8060; GFX11-NEXT:    v_or_b32_e32 v18, v16, v18
8061; GFX11-NEXT:    v_add_nc_u32_e32 v29, 0xffffffc0, v27
8062; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
8063; GFX11-NEXT:    v_or_b32_e32 v19, v17, v19
8064; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v29, v[0:1]
8065; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8066; GFX11-NEXT:    v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19
8067; GFX11-NEXT:    v_sub_nc_u32_e32 v25, 64, v28
8068; GFX11-NEXT:    v_add_nc_u32_e32 v16, 0xffffffc0, v28
8069; GFX11-NEXT:    v_lshrrev_b64 v[23:24], v28, v[8:9]
8070; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v28
8071; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v28, v[10:11]
8072; GFX11-NEXT:    v_lshlrev_b64 v[25:26], v25, v[10:11]
8073; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v16, v[10:11]
8074; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v28
8075; GFX11-NEXT:    v_cndmask_b32_e64 v2, v18, v2, s0
8076; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8077; GFX11-NEXT:    v_or_b32_e32 v23, v23, v25
8078; GFX11-NEXT:    v_or_b32_e32 v24, v24, v26
8079; GFX11-NEXT:    v_dual_cndmask_b32 v25, 0, v1 :: v_dual_cndmask_b32 v16, v16, v23
8080; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8081; GFX11-NEXT:    v_cndmask_b32_e32 v10, v17, v24, vcc_lo
8082; GFX11-NEXT:    v_cndmask_b32_e64 v23, v19, v3, s0
8083; GFX11-NEXT:    v_and_b32_e32 v24, 0x7f, v20
8084; GFX11-NEXT:    v_cndmask_b32_e64 v3, v16, v8, s1
8085; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
8086; GFX11-NEXT:    v_cndmask_b32_e64 v8, v10, v9, s1
8087; GFX11-NEXT:    v_not_b32_e32 v16, v20
8088; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0, v0, vcc_lo
8089; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v24
8090; GFX11-NEXT:    v_or_b32_e32 v0, v21, v3
8091; GFX11-NEXT:    v_or_b32_e32 v1, v22, v8
8092; GFX11-NEXT:    v_lshrrev_b64 v[8:9], 1, v[12:13]
8093; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 64, v24
8094; GFX11-NEXT:    v_and_b32_e32 v22, 0x7f, v16
8095; GFX11-NEXT:    v_or_b32_e32 v2, v2, v10
8096; GFX11-NEXT:    v_lshlrev_b64 v[12:13], v24, v[6:7]
8097; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v24, v[4:5]
8098; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v3, v[4:5]
8099; GFX11-NEXT:    v_lshl_or_b32 v9, v14, 31, v9
8100; GFX11-NEXT:    v_lshrrev_b64 v[14:15], 1, v[14:15]
8101; GFX11-NEXT:    v_sub_nc_u32_e32 v20, 64, v22
8102; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v24
8103; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v22
8104; GFX11-NEXT:    v_or_b32_e32 v12, v10, v12
8105; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v22
8106; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v22, v[8:9]
8107; GFX11-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
8108; GFX11-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
8109; GFX11-NEXT:    v_or_b32_e32 v5, v11, v13
8110; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0, v16, vcc_lo
8111; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[14:15]
8112; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v22
8113; GFX11-NEXT:    v_or_b32_e32 v16, v18, v20
8114; GFX11-NEXT:    v_or_b32_e32 v18, v19, v21
8115; GFX11-NEXT:    v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5
8116; GFX11-NEXT:    v_lshrrev_b64 v[3:4], v22, v[14:15]
8117; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
8118; GFX11-NEXT:    v_cndmask_b32_e64 v10, v10, v16, s0
8119; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v24
8120; GFX11-NEXT:    v_cndmask_b32_e64 v11, v11, v18, s0
8121; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8122; GFX11-NEXT:    v_cndmask_b32_e64 v6, v12, v6, s2
8123; GFX11-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s2
8124; GFX11-NEXT:    v_cndmask_b32_e64 v5, v10, v8, s1
8125; GFX11-NEXT:    v_cndmask_b32_e64 v8, v11, v9, s1
8126; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s0
8127; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s0
8128; GFX11-NEXT:    v_or_b32_e32 v3, v23, v25
8129; GFX11-NEXT:    v_or_b32_e32 v4, v13, v5
8130; GFX11-NEXT:    v_or_b32_e32 v5, v14, v8
8131; GFX11-NEXT:    v_or_b32_e32 v6, v6, v9
8132; GFX11-NEXT:    v_or_b32_e32 v7, v7, v10
8133; GFX11-NEXT:    s_setpc_b64 s[30:31]
8134  %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8135  ret <2 x i128> %result
8136}
8137
8138declare i7 @llvm.fshl.i7(i7, i7, i7) #0
8139declare i8 @llvm.fshl.i8(i8, i8, i8) #0
8140declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8141declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8142
8143declare i16 @llvm.fshl.i16(i16, i16, i16) #0
8144declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8145declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8146declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8147declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8148declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8149declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8150
8151declare i24 @llvm.fshl.i24(i24, i24, i24) #0
8152declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8153
8154declare i32 @llvm.fshl.i32(i32, i32, i32) #0
8155declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8156declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8157declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8158declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8159declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8160
8161declare i48 @llvm.fshl.i48(i48, i48, i48) #0
8162
8163declare i64 @llvm.fshl.i64(i64, i64, i64) #0
8164declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8165
8166declare i128 @llvm.fshl.i128(i128, i128, i128) #0
8167declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8168
8169attributes #0 = { nounwind readnone speculatable willreturn }
8170