xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
7
8define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9; GFX6-LABEL: s_fshr_i7:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
12; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
13; GFX6-NEXT:    s_and_b32 s2, s2, 0x7f
14; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
15; GFX6-NEXT:    s_and_b32 s1, s1, 0x7f
16; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
17; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
18; GFX6-NEXT:    v_mul_lo_u32 v1, v0, -7
19; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
20; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
21; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
22; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
23; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
24; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
25; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
26; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
27; GFX6-NEXT:    v_add_i32_e32 v1, vcc, -7, v0
28; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
29; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
30; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
31; GFX6-NEXT:    v_and_b32_e32 v0, 0x7f, v0
32; GFX6-NEXT:    v_and_b32_e32 v1, 0x7f, v1
33; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
34; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
35; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
36; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
37; GFX6-NEXT:    ; return to shader part epilog
38;
39; GFX8-LABEL: s_fshr_i7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
42; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
43; GFX8-NEXT:    s_and_b32 s2, s2, 0x7f
44; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
45; GFX8-NEXT:    s_and_b32 s1, s1, 0x7f
46; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
47; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
48; GFX8-NEXT:    v_mul_lo_u32 v1, v0, -7
49; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
50; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
51; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
52; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
53; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
54; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
55; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
56; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
57; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -7, v0
58; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
59; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
60; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
61; GFX8-NEXT:    v_and_b32_e32 v0, 0x7f, v0
62; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
63; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
64; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
65; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
66; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
67; GFX8-NEXT:    ; return to shader part epilog
68;
69; GFX9-LABEL: s_fshr_i7:
70; GFX9:       ; %bb.0:
71; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
72; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
73; GFX9-NEXT:    s_and_b32 s2, s2, 0x7f
74; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
75; GFX9-NEXT:    s_and_b32 s1, s1, 0x7f
76; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
77; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
78; GFX9-NEXT:    v_mul_lo_u32 v1, v0, -7
79; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
80; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
81; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
82; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
83; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
84; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
85; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
86; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
87; GFX9-NEXT:    v_add_u32_e32 v1, -7, v0
88; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
89; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
90; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
91; GFX9-NEXT:    v_and_b32_e32 v0, 0x7f, v0
92; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
93; GFX9-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
94; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
95; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
96; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
97; GFX9-NEXT:    ; return to shader part epilog
98;
99; GFX10-LABEL: s_fshr_i7:
100; GFX10:       ; %bb.0:
101; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
102; GFX10-NEXT:    s_and_b32 s2, s2, 0x7f
103; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
104; GFX10-NEXT:    s_and_b32 s1, s1, 0x7f
105; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
106; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
107; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
108; GFX10-NEXT:    v_mul_lo_u32 v1, v0, -7
109; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
110; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
111; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
112; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
113; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
114; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
115; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
116; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
117; GFX10-NEXT:    v_add_nc_u32_e32 v1, -7, v0
118; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
119; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
121; GFX10-NEXT:    v_and_b32_e32 v0, 0x7f, v0
122; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
123; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
124; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
125; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
126; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
127; GFX10-NEXT:    ; return to shader part epilog
128;
129; GFX11-LABEL: s_fshr_i7:
130; GFX11:       ; %bb.0:
131; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
132; GFX11-NEXT:    s_and_b32 s2, s2, 0x7f
133; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
134; GFX11-NEXT:    s_and_b32 s1, s1, 0x7f
135; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
136; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
137; GFX11-NEXT:    s_waitcnt_depctr 0xfff
138; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
139; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
140; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141; GFX11-NEXT:    v_mul_lo_u32 v1, v0, -7
142; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
143; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
144; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
145; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
146; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
147; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 7
148; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
149; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
150; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
151; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
152; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
153; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
154; GFX11-NEXT:    v_add_nc_u32_e32 v1, -7, v0
155; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
156; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
158; GFX11-NEXT:    v_sub_nc_u16 v1, 6, v0
159; GFX11-NEXT:    v_and_b32_e32 v0, 0x7f, v0
160; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
161; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
162; GFX11-NEXT:    v_lshrrev_b16 v0, v0, s1
163; GFX11-NEXT:    v_lshlrev_b16 v1, v1, s0
164; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
165; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
166; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
167; GFX11-NEXT:    ; return to shader part epilog
168  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
169  ret i7 %result
170}
171
172define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
173; GFX6-LABEL: v_fshr_i7:
174; GFX6:       ; %bb.0:
175; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
177; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
178; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
179; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
180; GFX6-NEXT:    v_and_b32_e32 v1, 0x7f, v1
181; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
182; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
183; GFX6-NEXT:    v_mul_lo_u32 v4, v3, -7
184; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
185; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
186; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
187; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
188; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
189; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
190; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
191; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
192; GFX6-NEXT:    v_add_i32_e32 v3, vcc, -7, v2
193; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
194; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
195; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
196; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
197; GFX6-NEXT:    v_and_b32_e32 v3, 0x7f, v3
198; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
199; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
200; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
201; GFX6-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX8-LABEL: v_fshr_i7:
204; GFX8:       ; %bb.0:
205; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
207; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
208; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
209; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
210; GFX8-NEXT:    v_and_b32_e32 v1, 0x7f, v1
211; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
212; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
213; GFX8-NEXT:    v_mul_lo_u32 v4, v3, -7
214; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
215; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
216; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
217; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
218; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
219; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
220; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
221; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
222; GFX8-NEXT:    v_add_u32_e32 v3, vcc, -7, v2
223; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
224; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
225; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
226; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
227; GFX8-NEXT:    v_and_b32_e32 v3, 0x7f, v3
228; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
229; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
230; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
231; GFX8-NEXT:    s_setpc_b64 s[30:31]
232;
233; GFX9-LABEL: v_fshr_i7:
234; GFX9:       ; %bb.0:
235; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
237; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
238; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
239; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
240; GFX9-NEXT:    v_and_b32_e32 v1, 0x7f, v1
241; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
242; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
243; GFX9-NEXT:    v_mul_lo_u32 v4, v3, -7
244; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
245; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
246; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
247; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
248; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
249; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
250; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
251; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
252; GFX9-NEXT:    v_add_u32_e32 v3, -7, v2
253; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
254; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
255; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
256; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
257; GFX9-NEXT:    v_and_b32_e32 v3, 0x7f, v3
258; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
259; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
260; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
261; GFX9-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX10-LABEL: v_fshr_i7:
264; GFX10:       ; %bb.0:
265; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
267; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
268; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
269; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
270; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
271; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
272; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
273; GFX10-NEXT:    v_mul_lo_u32 v4, v3, -7
274; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
275; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
276; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
277; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
278; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
279; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
280; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
281; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
282; GFX10-NEXT:    v_add_nc_u32_e32 v3, -7, v2
283; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
284; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
285; GFX10-NEXT:    v_sub_nc_u16 v3, 6, v2
286; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
287; GFX10-NEXT:    v_and_b32_e32 v3, 0x7f, v3
288; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
289; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
290; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
291; GFX10-NEXT:    s_setpc_b64 s[30:31]
292;
293; GFX11-LABEL: v_fshr_i7:
294; GFX11:       ; %bb.0:
295; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
297; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
298; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
299; GFX11-NEXT:    v_and_b32_e32 v1, 0x7f, v1
300; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
301; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
302; GFX11-NEXT:    s_waitcnt_depctr 0xfff
303; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
304; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
305; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
306; GFX11-NEXT:    v_mul_lo_u32 v4, v3, -7
307; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
308; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
309; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
310; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
311; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
312; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 7
313; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
314; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
315; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
316; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
317; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
318; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
319; GFX11-NEXT:    v_add_nc_u32_e32 v3, -7, v2
320; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
321; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
322; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
323; GFX11-NEXT:    v_sub_nc_u16 v3, 6, v2
324; GFX11-NEXT:    v_and_b32_e32 v2, 0x7f, v2
325; GFX11-NEXT:    v_and_b32_e32 v3, 0x7f, v3
326; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
327; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
328; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
329; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
330; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
331; GFX11-NEXT:    s_setpc_b64 s[30:31]
332  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
333  ret i7 %result
334}
335
336define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
337; GFX6-LABEL: s_fshr_i8:
338; GFX6:       ; %bb.0:
339; GFX6-NEXT:    s_and_b32 s3, s2, 7
340; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
341; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
342; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
343; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
344; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
345; GFX6-NEXT:    s_or_b32 s0, s0, s1
346; GFX6-NEXT:    ; return to shader part epilog
347;
348; GFX8-LABEL: s_fshr_i8:
349; GFX8:       ; %bb.0:
350; GFX8-NEXT:    s_andn2_b32 s3, 7, s2
351; GFX8-NEXT:    s_and_b32 s2, s2, 7
352; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
353; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
354; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
355; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
356; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
357; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
358; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
359; GFX8-NEXT:    s_or_b32 s0, s0, s1
360; GFX8-NEXT:    ; return to shader part epilog
361;
362; GFX9-LABEL: s_fshr_i8:
363; GFX9:       ; %bb.0:
364; GFX9-NEXT:    s_andn2_b32 s3, 7, s2
365; GFX9-NEXT:    s_and_b32 s2, s2, 7
366; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
367; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
368; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
369; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
370; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
371; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
372; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
373; GFX9-NEXT:    s_or_b32 s0, s0, s1
374; GFX9-NEXT:    ; return to shader part epilog
375;
376; GFX10-LABEL: s_fshr_i8:
377; GFX10:       ; %bb.0:
378; GFX10-NEXT:    s_andn2_b32 s3, 7, s2
379; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
380; GFX10-NEXT:    s_and_b32 s2, s2, 7
381; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
382; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
383; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
384; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
385; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
386; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
387; GFX10-NEXT:    s_or_b32 s0, s0, s1
388; GFX10-NEXT:    ; return to shader part epilog
389;
390; GFX11-LABEL: s_fshr_i8:
391; GFX11:       ; %bb.0:
392; GFX11-NEXT:    s_and_not1_b32 s3, 7, s2
393; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
394; GFX11-NEXT:    s_and_b32 s2, s2, 7
395; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
396; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
397; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
398; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
399; GFX11-NEXT:    s_lshl_b32 s0, s0, s3
400; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
401; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
402; GFX11-NEXT:    s_or_b32 s0, s0, s1
403; GFX11-NEXT:    ; return to shader part epilog
404  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
405  ret i8 %result
406}
407
408define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
409; GFX6-LABEL: v_fshr_i8:
410; GFX6:       ; %bb.0:
411; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX6-NEXT:    v_and_b32_e32 v3, 7, v2
413; GFX6-NEXT:    v_not_b32_e32 v2, v2
414; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
415; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
416; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
417; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
418; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
419; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
420; GFX6-NEXT:    s_setpc_b64 s[30:31]
421;
422; GFX8-LABEL: v_fshr_i8:
423; GFX8:       ; %bb.0:
424; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v2
426; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
427; GFX8-NEXT:    v_and_b32_e32 v3, 7, v3
428; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
429; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
430; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
431; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
432; GFX8-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX9-LABEL: v_fshr_i8:
435; GFX9:       ; %bb.0:
436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX9-NEXT:    v_xor_b32_e32 v3, -1, v2
438; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
439; GFX9-NEXT:    v_and_b32_e32 v3, 7, v3
440; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
441; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
442; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
443; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
444; GFX9-NEXT:    s_setpc_b64 s[30:31]
445;
446; GFX10-LABEL: v_fshr_i8:
447; GFX10:       ; %bb.0:
448; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
450; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
451; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
452; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
453; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
454; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
455; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
456; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
457; GFX10-NEXT:    s_setpc_b64 s[30:31]
458;
459; GFX11-LABEL: v_fshr_i8:
460; GFX11:       ; %bb.0:
461; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
463; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
464; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
465; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
466; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
467; GFX11-NEXT:    v_and_b32_e32 v3, 7, v3
468; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
469; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
470; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
471; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
472; GFX11-NEXT:    s_setpc_b64 s[30:31]
473  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
474  ret i8 %result
475}
476
477define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
478; GFX6-LABEL: s_fshr_i8_4:
479; GFX6:       ; %bb.0:
480; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
481; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x40004
482; GFX6-NEXT:    s_or_b32 s0, s0, s1
483; GFX6-NEXT:    ; return to shader part epilog
484;
485; GFX8-LABEL: s_fshr_i8_4:
486; GFX8:       ; %bb.0:
487; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
488; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
489; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
490; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
491; GFX8-NEXT:    s_or_b32 s0, s0, s1
492; GFX8-NEXT:    ; return to shader part epilog
493;
494; GFX9-LABEL: s_fshr_i8_4:
495; GFX9:       ; %bb.0:
496; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
497; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
498; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
499; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
500; GFX9-NEXT:    s_or_b32 s0, s0, s1
501; GFX9-NEXT:    ; return to shader part epilog
502;
503; GFX10-LABEL: s_fshr_i8_4:
504; GFX10:       ; %bb.0:
505; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
506; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
507; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
508; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
509; GFX10-NEXT:    s_or_b32 s0, s0, s1
510; GFX10-NEXT:    ; return to shader part epilog
511;
512; GFX11-LABEL: s_fshr_i8_4:
513; GFX11:       ; %bb.0:
514; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
515; GFX11-NEXT:    s_lshl_b32 s0, s0, 4
516; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
517; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
518; GFX11-NEXT:    s_lshr_b32 s1, s1, 4
519; GFX11-NEXT:    s_or_b32 s0, s0, s1
520; GFX11-NEXT:    ; return to shader part epilog
521  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
522  ret i8 %result
523}
524
525define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
526; GFX6-LABEL: v_fshr_i8_4:
527; GFX6:       ; %bb.0:
528; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
530; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 4
531; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
532; GFX6-NEXT:    s_setpc_b64 s[30:31]
533;
534; GFX8-LABEL: v_fshr_i8_4:
535; GFX8:       ; %bb.0:
536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; GFX8-NEXT:    v_mov_b32_e32 v2, 4
538; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
539; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
540; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
541; GFX8-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX9-LABEL: v_fshr_i8_4:
544; GFX9:       ; %bb.0:
545; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX9-NEXT:    v_mov_b32_e32 v2, 4
547; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
548; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
549; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
550; GFX9-NEXT:    s_setpc_b64 s[30:31]
551;
552; GFX10-LABEL: v_fshr_i8_4:
553; GFX10:       ; %bb.0:
554; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
556; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
557; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
558; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
559; GFX10-NEXT:    s_setpc_b64 s[30:31]
560;
561; GFX11-LABEL: v_fshr_i8_4:
562; GFX11:       ; %bb.0:
563; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
565; GFX11-NEXT:    v_lshlrev_b16 v0, 4, v0
566; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
567; GFX11-NEXT:    v_lshrrev_b16 v1, 4, v1
568; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
569; GFX11-NEXT:    s_setpc_b64 s[30:31]
570  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
571  ret i8 %result
572}
573
574define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
575; GFX6-LABEL: s_fshr_i8_5:
576; GFX6:       ; %bb.0:
577; GFX6-NEXT:    s_lshl_b32 s0, s0, 3
578; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x30005
579; GFX6-NEXT:    s_or_b32 s0, s0, s1
580; GFX6-NEXT:    ; return to shader part epilog
581;
582; GFX8-LABEL: s_fshr_i8_5:
583; GFX8:       ; %bb.0:
584; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
585; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
586; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
587; GFX8-NEXT:    s_lshr_b32 s1, s1, 5
588; GFX8-NEXT:    s_or_b32 s0, s0, s1
589; GFX8-NEXT:    ; return to shader part epilog
590;
591; GFX9-LABEL: s_fshr_i8_5:
592; GFX9:       ; %bb.0:
593; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
594; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
595; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
596; GFX9-NEXT:    s_lshr_b32 s1, s1, 5
597; GFX9-NEXT:    s_or_b32 s0, s0, s1
598; GFX9-NEXT:    ; return to shader part epilog
599;
600; GFX10-LABEL: s_fshr_i8_5:
601; GFX10:       ; %bb.0:
602; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
603; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
604; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
605; GFX10-NEXT:    s_lshr_b32 s1, s1, 5
606; GFX10-NEXT:    s_or_b32 s0, s0, s1
607; GFX10-NEXT:    ; return to shader part epilog
608;
609; GFX11-LABEL: s_fshr_i8_5:
610; GFX11:       ; %bb.0:
611; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
612; GFX11-NEXT:    s_lshl_b32 s0, s0, 3
613; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
614; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
615; GFX11-NEXT:    s_lshr_b32 s1, s1, 5
616; GFX11-NEXT:    s_or_b32 s0, s0, s1
617; GFX11-NEXT:    ; return to shader part epilog
618  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
619  ret i8 %result
620}
621
622define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
623; GFX6-LABEL: v_fshr_i8_5:
624; GFX6:       ; %bb.0:
625; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
627; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 3
628; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
629; GFX6-NEXT:    s_setpc_b64 s[30:31]
630;
631; GFX8-LABEL: v_fshr_i8_5:
632; GFX8:       ; %bb.0:
633; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX8-NEXT:    v_mov_b32_e32 v2, 5
635; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
636; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
637; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
638; GFX8-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX9-LABEL: v_fshr_i8_5:
641; GFX9:       ; %bb.0:
642; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX9-NEXT:    v_mov_b32_e32 v2, 5
644; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
645; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
646; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
647; GFX9-NEXT:    s_setpc_b64 s[30:31]
648;
649; GFX10-LABEL: v_fshr_i8_5:
650; GFX10:       ; %bb.0:
651; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
653; GFX10-NEXT:    v_lshlrev_b16 v0, 3, v0
654; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
655; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
656; GFX10-NEXT:    s_setpc_b64 s[30:31]
657;
658; GFX11-LABEL: v_fshr_i8_5:
659; GFX11:       ; %bb.0:
660; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
662; GFX11-NEXT:    v_lshlrev_b16 v0, 3, v0
663; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
664; GFX11-NEXT:    v_lshrrev_b16 v1, 5, v1
665; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
666; GFX11-NEXT:    s_setpc_b64 s[30:31]
667  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
668  ret i8 %result
669}
670
671define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
672; GFX6-LABEL: s_fshr_v2i8:
673; GFX6:       ; %bb.0:
674; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
675; GFX6-NEXT:    s_lshr_b32 s4, s2, 8
676; GFX6-NEXT:    s_and_b32 s5, s2, 7
677; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
678; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
679; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
680; GFX6-NEXT:    s_and_b32 s2, s1, 0xff
681; GFX6-NEXT:    s_lshr_b32 s2, s2, s5
682; GFX6-NEXT:    s_or_b32 s0, s0, s2
683; GFX6-NEXT:    s_and_b32 s2, s4, 7
684; GFX6-NEXT:    s_andn2_b32 s4, 7, s4
685; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
686; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80008
687; GFX6-NEXT:    s_lshl_b32 s3, s3, s4
688; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
689; GFX6-NEXT:    s_or_b32 s1, s3, s1
690; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
691; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
692; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
693; GFX6-NEXT:    s_or_b32 s0, s0, s1
694; GFX6-NEXT:    ; return to shader part epilog
695;
696; GFX8-LABEL: s_fshr_v2i8:
697; GFX8:       ; %bb.0:
698; GFX8-NEXT:    s_lshr_b32 s4, s1, 8
699; GFX8-NEXT:    s_lshr_b32 s5, s2, 8
700; GFX8-NEXT:    s_andn2_b32 s6, 7, s2
701; GFX8-NEXT:    s_and_b32 s2, s2, 7
702; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
703; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
704; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
705; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
706; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
707; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
708; GFX8-NEXT:    s_lshl_b32 s0, s0, s6
709; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
710; GFX8-NEXT:    s_andn2_b32 s2, 7, s5
711; GFX8-NEXT:    s_or_b32 s0, s0, s1
712; GFX8-NEXT:    s_lshl_b32 s1, s3, 1
713; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
714; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
715; GFX8-NEXT:    s_and_b32 s2, s5, 7
716; GFX8-NEXT:    s_and_b32 s3, s4, 0xff
717; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
718; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
719; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
720; GFX8-NEXT:    s_or_b32 s1, s1, s2
721; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
722; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
723; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
724; GFX8-NEXT:    s_or_b32 s0, s0, s1
725; GFX8-NEXT:    ; return to shader part epilog
726;
727; GFX9-LABEL: s_fshr_v2i8:
728; GFX9:       ; %bb.0:
729; GFX9-NEXT:    s_lshr_b32 s4, s1, 8
730; GFX9-NEXT:    s_lshr_b32 s5, s2, 8
731; GFX9-NEXT:    s_andn2_b32 s6, 7, s2
732; GFX9-NEXT:    s_and_b32 s2, s2, 7
733; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
734; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
735; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
736; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
737; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
738; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
739; GFX9-NEXT:    s_lshl_b32 s0, s0, s6
740; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
741; GFX9-NEXT:    s_andn2_b32 s2, 7, s5
742; GFX9-NEXT:    s_or_b32 s0, s0, s1
743; GFX9-NEXT:    s_lshl_b32 s1, s3, 1
744; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
745; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
746; GFX9-NEXT:    s_and_b32 s2, s5, 7
747; GFX9-NEXT:    s_and_b32 s3, s4, 0xff
748; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
749; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
750; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
751; GFX9-NEXT:    s_or_b32 s1, s1, s2
752; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
753; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
754; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
755; GFX9-NEXT:    s_or_b32 s0, s0, s1
756; GFX9-NEXT:    ; return to shader part epilog
757;
758; GFX10-LABEL: s_fshr_v2i8:
759; GFX10:       ; %bb.0:
760; GFX10-NEXT:    s_andn2_b32 s5, 7, s2
761; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
762; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
763; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
764; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
765; GFX10-NEXT:    s_lshr_b32 s6, s2, 8
766; GFX10-NEXT:    s_lshl_b32 s0, s0, s5
767; GFX10-NEXT:    s_andn2_b32 s5, 7, s6
768; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
769; GFX10-NEXT:    s_and_b32 s6, s6, 7
770; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
771; GFX10-NEXT:    s_and_b32 s2, s2, 7
772; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
773; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
774; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
775; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
776; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
777; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
778; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
779; GFX10-NEXT:    s_lshr_b32 s4, s4, s6
780; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
781; GFX10-NEXT:    s_or_b32 s2, s3, s4
782; GFX10-NEXT:    s_or_b32 s0, s0, s1
783; GFX10-NEXT:    s_and_b32 s1, s2, 0xff
784; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
785; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
786; GFX10-NEXT:    s_or_b32 s0, s0, s1
787; GFX10-NEXT:    ; return to shader part epilog
788;
789; GFX11-LABEL: s_fshr_v2i8:
790; GFX11:       ; %bb.0:
791; GFX11-NEXT:    s_and_not1_b32 s5, 7, s2
792; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
793; GFX11-NEXT:    s_lshr_b32 s4, s1, 8
794; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
795; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
796; GFX11-NEXT:    s_lshr_b32 s6, s2, 8
797; GFX11-NEXT:    s_lshl_b32 s0, s0, s5
798; GFX11-NEXT:    s_and_not1_b32 s5, 7, s6
799; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
800; GFX11-NEXT:    s_and_b32 s6, s6, 7
801; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
802; GFX11-NEXT:    s_and_b32 s2, s2, 7
803; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
804; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
805; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
806; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
807; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
808; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
809; GFX11-NEXT:    s_lshl_b32 s3, s3, s5
810; GFX11-NEXT:    s_lshr_b32 s4, s4, s6
811; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
812; GFX11-NEXT:    s_or_b32 s2, s3, s4
813; GFX11-NEXT:    s_or_b32 s0, s0, s1
814; GFX11-NEXT:    s_and_b32 s1, s2, 0xff
815; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
816; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
817; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
818; GFX11-NEXT:    s_or_b32 s0, s0, s1
819; GFX11-NEXT:    ; return to shader part epilog
820  %lhs = bitcast i16 %lhs.arg to <2 x i8>
821  %rhs = bitcast i16 %rhs.arg to <2 x i8>
822  %amt = bitcast i16 %amt.arg to <2 x i8>
823  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
824  %cast.result = bitcast <2 x i8> %result to i16
825  ret i16 %cast.result
826}
827
828define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
829; GFX6-LABEL: v_fshr_v2i8:
830; GFX6:       ; %bb.0:
831; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
833; GFX6-NEXT:    v_and_b32_e32 v5, 7, v2
834; GFX6-NEXT:    v_not_b32_e32 v2, v2
835; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
836; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
837; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
838; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
839; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v1
840; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v5, v2
841; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
842; GFX6-NEXT:    v_and_b32_e32 v2, 7, v4
843; GFX6-NEXT:    v_not_b32_e32 v4, v4
844; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
845; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
846; GFX6-NEXT:    v_bfe_u32 v1, v1, 8, 8
847; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
848; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
849; GFX6-NEXT:    v_or_b32_e32 v1, v3, v1
850; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
851; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
852; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
853; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
854; GFX6-NEXT:    s_setpc_b64 s[30:31]
855;
856; GFX8-LABEL: v_fshr_v2i8:
857; GFX8:       ; %bb.0:
858; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859; GFX8-NEXT:    v_xor_b32_e32 v6, -1, v2
860; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
861; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
862; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
863; GFX8-NEXT:    v_and_b32_e32 v6, 7, v6
864; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
865; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
866; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
867; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
868; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v5
869; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
870; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v3
871; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
872; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v2, v1
873; GFX8-NEXT:    v_and_b32_e32 v2, 7, v5
874; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
875; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
876; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
877; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
878; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
879; GFX8-NEXT:    s_setpc_b64 s[30:31]
880;
881; GFX9-LABEL: v_fshr_v2i8:
882; GFX9:       ; %bb.0:
883; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884; GFX9-NEXT:    v_xor_b32_e32 v6, -1, v2
885; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
886; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
887; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
888; GFX9-NEXT:    v_and_b32_e32 v6, 7, v6
889; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
890; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
891; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
892; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
893; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v5
894; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
895; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 1, v3
896; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
897; GFX9-NEXT:    v_lshlrev_b16_e32 v1, v2, v1
898; GFX9-NEXT:    v_and_b32_e32 v2, 7, v5
899; GFX9-NEXT:    v_lshrrev_b16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
900; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
901; GFX9-NEXT:    v_and_b32_e32 v1, 0xff, v1
902; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
903; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
904; GFX9-NEXT:    s_setpc_b64 s[30:31]
905;
906; GFX10-LABEL: v_fshr_v2i8:
907; GFX10:       ; %bb.0:
908; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
910; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
911; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
912; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v2
913; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
914; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v3
915; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
916; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
917; GFX10-NEXT:    v_and_b32_e32 v5, 0xff, v5
918; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
919; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
920; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
921; GFX10-NEXT:    v_and_b32_e32 v7, 7, v7
922; GFX10-NEXT:    v_lshrrev_b16 v3, v3, v5
923; GFX10-NEXT:    v_lshlrev_b16 v4, v6, v4
924; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
925; GFX10-NEXT:    v_lshlrev_b16 v0, v7, v0
926; GFX10-NEXT:    v_or_b32_e32 v2, v4, v3
927; GFX10-NEXT:    v_mov_b32_e32 v3, 0xff
928; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
929; GFX10-NEXT:    v_and_b32_sdwa v1, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
930; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
931; GFX10-NEXT:    s_setpc_b64 s[30:31]
932;
933; GFX11-LABEL: v_fshr_v2i8:
934; GFX11:       ; %bb.0:
935; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
937; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
938; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
939; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v2
940; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
941; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v3
942; GFX11-NEXT:    v_lshlrev_b16 v4, 1, v4
943; GFX11-NEXT:    v_and_b32_e32 v3, 7, v3
944; GFX11-NEXT:    v_and_b32_e32 v5, 0xff, v5
945; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
946; GFX11-NEXT:    v_and_b32_e32 v6, 7, v6
947; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
948; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
949; GFX11-NEXT:    v_lshrrev_b16 v3, v3, v5
950; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
951; GFX11-NEXT:    v_lshlrev_b16 v4, v6, v4
952; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
953; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
954; GFX11-NEXT:    v_lshlrev_b16 v0, v7, v0
955; GFX11-NEXT:    v_or_b32_e32 v2, v4, v3
956; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
957; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
958; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v2
959; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
960; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
961; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
962; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
963; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
964; GFX11-NEXT:    s_setpc_b64 s[30:31]
965  %lhs = bitcast i16 %lhs.arg to <2 x i8>
966  %rhs = bitcast i16 %rhs.arg to <2 x i8>
967  %amt = bitcast i16 %amt.arg to <2 x i8>
968  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
969  %cast.result = bitcast <2 x i8> %result to i16
970  ret i16 %cast.result
971}
972
973define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
974; GFX6-LABEL: s_fshr_v4i8:
975; GFX6:       ; %bb.0:
976; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
977; GFX6-NEXT:    s_lshr_b32 s4, s0, 16
978; GFX6-NEXT:    s_lshr_b32 s5, s0, 24
979; GFX6-NEXT:    s_lshr_b32 s7, s2, 8
980; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
981; GFX6-NEXT:    s_lshr_b32 s9, s2, 24
982; GFX6-NEXT:    s_and_b32 s10, s2, 7
983; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
984; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
985; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
986; GFX6-NEXT:    s_and_b32 s2, s1, 0xff
987; GFX6-NEXT:    s_lshr_b32 s2, s2, s10
988; GFX6-NEXT:    s_or_b32 s0, s0, s2
989; GFX6-NEXT:    s_and_b32 s2, s7, 7
990; GFX6-NEXT:    s_andn2_b32 s7, 7, s7
991; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
992; GFX6-NEXT:    s_lshl_b32 s3, s3, s7
993; GFX6-NEXT:    s_bfe_u32 s7, s1, 0x80008
994; GFX6-NEXT:    s_lshr_b32 s2, s7, s2
995; GFX6-NEXT:    s_lshr_b32 s6, s1, 24
996; GFX6-NEXT:    s_or_b32 s2, s3, s2
997; GFX6-NEXT:    s_and_b32 s3, s8, 7
998; GFX6-NEXT:    s_andn2_b32 s7, 7, s8
999; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
1000; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80010
1001; GFX6-NEXT:    s_lshl_b32 s4, s4, s7
1002; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
1003; GFX6-NEXT:    s_or_b32 s1, s4, s1
1004; GFX6-NEXT:    s_and_b32 s3, s9, 7
1005; GFX6-NEXT:    s_andn2_b32 s4, 7, s9
1006; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
1007; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
1008; GFX6-NEXT:    s_lshl_b32 s4, s5, s4
1009; GFX6-NEXT:    s_lshr_b32 s3, s6, s3
1010; GFX6-NEXT:    s_and_b32 s0, s0, 0xff
1011; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1012; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1013; GFX6-NEXT:    s_or_b32 s3, s4, s3
1014; GFX6-NEXT:    s_or_b32 s0, s0, s2
1015; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1016; GFX6-NEXT:    s_or_b32 s0, s0, s1
1017; GFX6-NEXT:    s_and_b32 s1, s3, 0xff
1018; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
1019; GFX6-NEXT:    s_or_b32 s0, s0, s1
1020; GFX6-NEXT:    ; return to shader part epilog
1021;
1022; GFX8-LABEL: s_fshr_v4i8:
1023; GFX8:       ; %bb.0:
1024; GFX8-NEXT:    s_lshr_b32 s6, s1, 8
1025; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
1026; GFX8-NEXT:    s_lshr_b32 s8, s1, 24
1027; GFX8-NEXT:    s_lshr_b32 s9, s2, 8
1028; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
1029; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
1030; GFX8-NEXT:    s_andn2_b32 s12, 7, s2
1031; GFX8-NEXT:    s_and_b32 s2, s2, 7
1032; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1033; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
1034; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
1035; GFX8-NEXT:    s_lshr_b32 s5, s0, 24
1036; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1037; GFX8-NEXT:    s_and_b32 s12, 0xffff, s12
1038; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
1039; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1040; GFX8-NEXT:    s_lshl_b32 s0, s0, s12
1041; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
1042; GFX8-NEXT:    s_andn2_b32 s2, 7, s9
1043; GFX8-NEXT:    s_or_b32 s0, s0, s1
1044; GFX8-NEXT:    s_lshl_b32 s1, s3, 1
1045; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1046; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
1047; GFX8-NEXT:    s_and_b32 s2, s9, 7
1048; GFX8-NEXT:    s_and_b32 s3, s6, 0xff
1049; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1050; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
1051; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
1052; GFX8-NEXT:    s_andn2_b32 s3, 7, s10
1053; GFX8-NEXT:    s_or_b32 s1, s1, s2
1054; GFX8-NEXT:    s_lshl_b32 s2, s4, 1
1055; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1056; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
1057; GFX8-NEXT:    s_and_b32 s3, s10, 7
1058; GFX8-NEXT:    s_and_b32 s4, s7, 0xff
1059; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
1060; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
1061; GFX8-NEXT:    s_lshr_b32 s3, s4, s3
1062; GFX8-NEXT:    s_andn2_b32 s4, 7, s11
1063; GFX8-NEXT:    s_or_b32 s2, s2, s3
1064; GFX8-NEXT:    s_lshl_b32 s3, s5, 1
1065; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
1066; GFX8-NEXT:    s_lshl_b32 s3, s3, s4
1067; GFX8-NEXT:    s_and_b32 s4, s11, 7
1068; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1069; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
1070; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
1071; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
1072; GFX8-NEXT:    s_lshr_b32 s4, s8, s4
1073; GFX8-NEXT:    s_or_b32 s0, s0, s1
1074; GFX8-NEXT:    s_and_b32 s1, s2, 0xff
1075; GFX8-NEXT:    s_or_b32 s3, s3, s4
1076; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
1077; GFX8-NEXT:    s_or_b32 s0, s0, s1
1078; GFX8-NEXT:    s_and_b32 s1, s3, 0xff
1079; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
1080; GFX8-NEXT:    s_or_b32 s0, s0, s1
1081; GFX8-NEXT:    ; return to shader part epilog
1082;
1083; GFX9-LABEL: s_fshr_v4i8:
1084; GFX9:       ; %bb.0:
1085; GFX9-NEXT:    s_lshr_b32 s6, s1, 8
1086; GFX9-NEXT:    s_lshr_b32 s7, s1, 16
1087; GFX9-NEXT:    s_lshr_b32 s8, s1, 24
1088; GFX9-NEXT:    s_lshr_b32 s9, s2, 8
1089; GFX9-NEXT:    s_lshr_b32 s10, s2, 16
1090; GFX9-NEXT:    s_lshr_b32 s11, s2, 24
1091; GFX9-NEXT:    s_andn2_b32 s12, 7, s2
1092; GFX9-NEXT:    s_and_b32 s2, s2, 7
1093; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1094; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
1095; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
1096; GFX9-NEXT:    s_lshr_b32 s5, s0, 24
1097; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1098; GFX9-NEXT:    s_and_b32 s12, 0xffff, s12
1099; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
1100; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1101; GFX9-NEXT:    s_lshl_b32 s0, s0, s12
1102; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
1103; GFX9-NEXT:    s_andn2_b32 s2, 7, s9
1104; GFX9-NEXT:    s_or_b32 s0, s0, s1
1105; GFX9-NEXT:    s_lshl_b32 s1, s3, 1
1106; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1107; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
1108; GFX9-NEXT:    s_and_b32 s2, s9, 7
1109; GFX9-NEXT:    s_and_b32 s3, s6, 0xff
1110; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1111; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
1112; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
1113; GFX9-NEXT:    s_andn2_b32 s3, 7, s10
1114; GFX9-NEXT:    s_or_b32 s1, s1, s2
1115; GFX9-NEXT:    s_lshl_b32 s2, s4, 1
1116; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1117; GFX9-NEXT:    s_lshl_b32 s2, s2, s3
1118; GFX9-NEXT:    s_and_b32 s3, s10, 7
1119; GFX9-NEXT:    s_and_b32 s4, s7, 0xff
1120; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
1121; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
1122; GFX9-NEXT:    s_lshr_b32 s3, s4, s3
1123; GFX9-NEXT:    s_andn2_b32 s4, 7, s11
1124; GFX9-NEXT:    s_or_b32 s2, s2, s3
1125; GFX9-NEXT:    s_lshl_b32 s3, s5, 1
1126; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
1127; GFX9-NEXT:    s_lshl_b32 s3, s3, s4
1128; GFX9-NEXT:    s_and_b32 s4, s11, 7
1129; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
1130; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
1131; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
1132; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
1133; GFX9-NEXT:    s_lshr_b32 s4, s8, s4
1134; GFX9-NEXT:    s_or_b32 s0, s0, s1
1135; GFX9-NEXT:    s_and_b32 s1, s2, 0xff
1136; GFX9-NEXT:    s_or_b32 s3, s3, s4
1137; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
1138; GFX9-NEXT:    s_or_b32 s0, s0, s1
1139; GFX9-NEXT:    s_and_b32 s1, s3, 0xff
1140; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
1141; GFX9-NEXT:    s_or_b32 s0, s0, s1
1142; GFX9-NEXT:    ; return to shader part epilog
1143;
1144; GFX10-LABEL: s_fshr_v4i8:
1145; GFX10:       ; %bb.0:
1146; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
1147; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
1148; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
1149; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
1150; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
1151; GFX10-NEXT:    s_lshr_b32 s11, s2, 24
1152; GFX10-NEXT:    s_andn2_b32 s12, 7, s2
1153; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1154; GFX10-NEXT:    s_and_b32 s2, s2, 7
1155; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
1156; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
1157; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
1158; GFX10-NEXT:    s_lshr_b32 s1, s1, s2
1159; GFX10-NEXT:    s_andn2_b32 s2, 7, s9
1160; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
1161; GFX10-NEXT:    s_and_b32 s9, s9, 7
1162; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
1163; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
1164; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1165; GFX10-NEXT:    s_and_b32 s12, 0xffff, s12
1166; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
1167; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
1168; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
1169; GFX10-NEXT:    s_and_b32 s9, 0xffff, s9
1170; GFX10-NEXT:    s_lshl_b32 s0, s0, s12
1171; GFX10-NEXT:    s_lshl_b32 s2, s3, s2
1172; GFX10-NEXT:    s_lshr_b32 s3, s6, s9
1173; GFX10-NEXT:    s_or_b32 s0, s0, s1
1174; GFX10-NEXT:    s_or_b32 s1, s2, s3
1175; GFX10-NEXT:    s_andn2_b32 s2, 7, s10
1176; GFX10-NEXT:    s_lshl_b32 s3, s4, 1
1177; GFX10-NEXT:    s_and_b32 s4, s7, 0xff
1178; GFX10-NEXT:    s_and_b32 s6, s10, 7
1179; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
1180; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
1181; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
1182; GFX10-NEXT:    s_lshl_b32 s2, s3, s2
1183; GFX10-NEXT:    s_lshr_b32 s3, s4, s6
1184; GFX10-NEXT:    s_lshl_b32 s4, s5, 1
1185; GFX10-NEXT:    s_andn2_b32 s5, 7, s11
1186; GFX10-NEXT:    s_and_b32 s6, s11, 7
1187; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
1188; GFX10-NEXT:    s_and_b32 s6, 0xffff, s6
1189; GFX10-NEXT:    s_lshl_b32 s4, s4, s5
1190; GFX10-NEXT:    s_lshr_b32 s5, s8, s6
1191; GFX10-NEXT:    s_or_b32 s2, s2, s3
1192; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
1193; GFX10-NEXT:    s_or_b32 s3, s4, s5
1194; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
1195; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
1196; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
1197; GFX10-NEXT:    s_or_b32 s0, s0, s1
1198; GFX10-NEXT:    s_lshl_b32 s1, s2, 16
1199; GFX10-NEXT:    s_and_b32 s2, s3, 0xff
1200; GFX10-NEXT:    s_or_b32 s0, s0, s1
1201; GFX10-NEXT:    s_lshl_b32 s1, s2, 24
1202; GFX10-NEXT:    s_or_b32 s0, s0, s1
1203; GFX10-NEXT:    ; return to shader part epilog
1204;
1205; GFX11-LABEL: s_fshr_v4i8:
1206; GFX11:       ; %bb.0:
1207; GFX11-NEXT:    s_lshr_b32 s6, s1, 8
1208; GFX11-NEXT:    s_lshr_b32 s7, s1, 16
1209; GFX11-NEXT:    s_lshr_b32 s8, s1, 24
1210; GFX11-NEXT:    s_lshr_b32 s9, s2, 8
1211; GFX11-NEXT:    s_lshr_b32 s10, s2, 16
1212; GFX11-NEXT:    s_lshr_b32 s11, s2, 24
1213; GFX11-NEXT:    s_and_not1_b32 s12, 7, s2
1214; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1215; GFX11-NEXT:    s_and_b32 s2, s2, 7
1216; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
1217; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
1218; GFX11-NEXT:    s_lshr_b32 s3, s0, 8
1219; GFX11-NEXT:    s_lshr_b32 s1, s1, s2
1220; GFX11-NEXT:    s_and_not1_b32 s2, 7, s9
1221; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
1222; GFX11-NEXT:    s_and_b32 s9, s9, 7
1223; GFX11-NEXT:    s_lshr_b32 s4, s0, 16
1224; GFX11-NEXT:    s_lshr_b32 s5, s0, 24
1225; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
1226; GFX11-NEXT:    s_and_b32 s12, 0xffff, s12
1227; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
1228; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
1229; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
1230; GFX11-NEXT:    s_and_b32 s9, 0xffff, s9
1231; GFX11-NEXT:    s_lshl_b32 s0, s0, s12
1232; GFX11-NEXT:    s_lshl_b32 s2, s3, s2
1233; GFX11-NEXT:    s_lshr_b32 s3, s6, s9
1234; GFX11-NEXT:    s_or_b32 s0, s0, s1
1235; GFX11-NEXT:    s_or_b32 s1, s2, s3
1236; GFX11-NEXT:    s_and_not1_b32 s2, 7, s10
1237; GFX11-NEXT:    s_lshl_b32 s3, s4, 1
1238; GFX11-NEXT:    s_and_b32 s4, s7, 0xff
1239; GFX11-NEXT:    s_and_b32 s6, s10, 7
1240; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
1241; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
1242; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
1243; GFX11-NEXT:    s_lshl_b32 s2, s3, s2
1244; GFX11-NEXT:    s_lshr_b32 s3, s4, s6
1245; GFX11-NEXT:    s_lshl_b32 s4, s5, 1
1246; GFX11-NEXT:    s_and_not1_b32 s5, 7, s11
1247; GFX11-NEXT:    s_and_b32 s6, s11, 7
1248; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
1249; GFX11-NEXT:    s_and_b32 s6, 0xffff, s6
1250; GFX11-NEXT:    s_lshl_b32 s4, s4, s5
1251; GFX11-NEXT:    s_lshr_b32 s5, s8, s6
1252; GFX11-NEXT:    s_or_b32 s2, s2, s3
1253; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
1254; GFX11-NEXT:    s_or_b32 s3, s4, s5
1255; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
1256; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
1257; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
1258; GFX11-NEXT:    s_or_b32 s0, s0, s1
1259; GFX11-NEXT:    s_lshl_b32 s1, s2, 16
1260; GFX11-NEXT:    s_and_b32 s2, s3, 0xff
1261; GFX11-NEXT:    s_or_b32 s0, s0, s1
1262; GFX11-NEXT:    s_lshl_b32 s1, s2, 24
1263; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1264; GFX11-NEXT:    s_or_b32 s0, s0, s1
1265; GFX11-NEXT:    ; return to shader part epilog
1266  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1267  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1268  %amt = bitcast i32 %amt.arg to <4 x i8>
1269  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1270  %cast.result = bitcast <4 x i8> %result to i32
1271  ret i32 %cast.result
1272}
1273
1274define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1275; GFX6-LABEL: v_fshr_v4i8:
1276; GFX6:       ; %bb.0:
1277; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
1279; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1280; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
1281; GFX6-NEXT:    v_and_b32_e32 v10, 7, v2
1282; GFX6-NEXT:    v_not_b32_e32 v2, v2
1283; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1284; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1285; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1286; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
1287; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1288; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
1289; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v1
1290; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v10, v2
1291; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1292; GFX6-NEXT:    v_and_b32_e32 v2, 7, v7
1293; GFX6-NEXT:    v_not_b32_e32 v7, v7
1294; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1295; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
1296; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
1297; GFX6-NEXT:    v_bfe_u32 v7, v1, 8, 8
1298; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v7
1299; GFX6-NEXT:    v_not_b32_e32 v7, v8
1300; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
1301; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
1302; GFX6-NEXT:    v_and_b32_e32 v3, 7, v8
1303; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1304; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
1305; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1306; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v7, v4
1307; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
1308; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
1309; GFX6-NEXT:    v_not_b32_e32 v4, v9
1310; GFX6-NEXT:    v_and_b32_e32 v3, 7, v9
1311; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
1312; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
1313; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v2
1314; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
1315; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v3, v6
1316; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
1317; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1318; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
1319; GFX6-NEXT:    v_or_b32_e32 v3, v4, v3
1320; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1321; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1322; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1323; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v3
1324; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1325; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1326; GFX6-NEXT:    s_setpc_b64 s[30:31]
1327;
1328; GFX8-LABEL: v_fshr_v4i8:
1329; GFX8:       ; %bb.0:
1330; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v2
1332; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
1333; GFX8-NEXT:    v_and_b32_e32 v7, 7, v7
1334; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v7, v6
1335; GFX8-NEXT:    v_and_b32_e32 v7, 7, v2
1336; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1337; GFX8-NEXT:    v_lshrrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1338; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1339; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
1340; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v5
1341; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1342; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1343; GFX8-NEXT:    v_and_b32_e32 v7, 7, v7
1344; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1345; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v7, v3
1346; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1347; GFX8-NEXT:    v_mov_b32_e32 v7, -1
1348; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1349; GFX8-NEXT:    v_mov_b32_e32 v4, 1
1350; GFX8-NEXT:    v_xor_b32_sdwa v9, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1351; GFX8-NEXT:    v_lshlrev_b16_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1352; GFX8-NEXT:    v_and_b32_e32 v9, 7, v9
1353; GFX8-NEXT:    v_mov_b32_e32 v8, 0xff
1354; GFX8-NEXT:    v_lshlrev_b16_e32 v5, v9, v5
1355; GFX8-NEXT:    v_mov_b32_e32 v9, 7
1356; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1357; GFX8-NEXT:    v_xor_b32_sdwa v4, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1358; GFX8-NEXT:    v_and_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1359; GFX8-NEXT:    v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1360; GFX8-NEXT:    v_and_b32_e32 v4, 7, v4
1361; GFX8-NEXT:    v_and_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1362; GFX8-NEXT:    v_lshrrev_b16_e32 v8, v10, v8
1363; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
1364; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1365; GFX8-NEXT:    v_or_b32_e32 v5, v5, v8
1366; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1367; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1368; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1369; GFX8-NEXT:    v_and_b32_e32 v2, 0xff, v5
1370; GFX8-NEXT:    v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1371; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1372; GFX8-NEXT:    v_and_b32_e32 v0, 0xff, v0
1373; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1374; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1375; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1376; GFX8-NEXT:    s_setpc_b64 s[30:31]
1377;
1378; GFX9-LABEL: v_fshr_v4i8:
1379; GFX9:       ; %bb.0:
1380; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1381; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v2
1382; GFX9-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
1383; GFX9-NEXT:    v_and_b32_e32 v7, 7, v7
1384; GFX9-NEXT:    v_lshlrev_b16_e32 v6, v7, v6
1385; GFX9-NEXT:    v_and_b32_e32 v7, 7, v2
1386; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1387; GFX9-NEXT:    v_lshrrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1388; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1389; GFX9-NEXT:    v_or_b32_e32 v6, v6, v7
1390; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v5
1391; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1392; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1393; GFX9-NEXT:    v_and_b32_e32 v7, 7, v7
1394; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1395; GFX9-NEXT:    v_lshlrev_b16_e32 v3, v7, v3
1396; GFX9-NEXT:    v_lshrrev_b16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1397; GFX9-NEXT:    v_mov_b32_e32 v7, -1
1398; GFX9-NEXT:    v_or_b32_e32 v3, v3, v4
1399; GFX9-NEXT:    v_mov_b32_e32 v4, 1
1400; GFX9-NEXT:    v_xor_b32_sdwa v9, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1401; GFX9-NEXT:    v_lshlrev_b16_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1402; GFX9-NEXT:    v_and_b32_e32 v9, 7, v9
1403; GFX9-NEXT:    v_mov_b32_e32 v8, 0xff
1404; GFX9-NEXT:    v_lshlrev_b16_e32 v5, v9, v5
1405; GFX9-NEXT:    v_mov_b32_e32 v9, 7
1406; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1407; GFX9-NEXT:    v_xor_b32_sdwa v4, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1408; GFX9-NEXT:    v_and_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1409; GFX9-NEXT:    v_and_b32_sdwa v11, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1410; GFX9-NEXT:    v_and_b32_e32 v4, 7, v4
1411; GFX9-NEXT:    v_and_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1412; GFX9-NEXT:    v_lshrrev_b16_e32 v10, v10, v11
1413; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
1414; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1415; GFX9-NEXT:    v_or_b32_e32 v5, v5, v10
1416; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
1417; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1418; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1419; GFX9-NEXT:    v_and_b32_e32 v2, 0xff, v5
1420; GFX9-NEXT:    v_and_b32_e32 v0, 0xff, v0
1421; GFX9-NEXT:    v_and_or_b32 v1, v6, v8, v1
1422; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1423; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1424; GFX9-NEXT:    v_or3_b32 v0, v1, v2, v0
1425; GFX9-NEXT:    s_setpc_b64 s[30:31]
1426;
1427; GFX10-LABEL: v_fshr_v4i8:
1428; GFX10:       ; %bb.0:
1429; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1431; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
1432; GFX10-NEXT:    v_xor_b32_e32 v8, -1, v2
1433; GFX10-NEXT:    v_mov_b32_e32 v3, -1
1434; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1435; GFX10-NEXT:    v_xor_b32_e32 v10, -1, v5
1436; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 24, v0
1437; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 8, v1
1438; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
1439; GFX10-NEXT:    v_and_b32_e32 v8, 7, v8
1440; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
1441; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1442; GFX10-NEXT:    v_mov_b32_e32 v14, 0xff
1443; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 24, v1
1444; GFX10-NEXT:    v_lshlrev_b16 v0, v8, v0
1445; GFX10-NEXT:    v_and_b32_e32 v8, 0xff, v9
1446; GFX10-NEXT:    v_lshlrev_b16 v4, v10, v4
1447; GFX10-NEXT:    v_xor_b32_sdwa v9, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1448; GFX10-NEXT:    v_mov_b32_e32 v10, 7
1449; GFX10-NEXT:    v_xor_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1450; GFX10-NEXT:    v_and_b32_e32 v12, 7, v2
1451; GFX10-NEXT:    v_and_b32_e32 v13, 0xff, v1
1452; GFX10-NEXT:    v_and_b32_e32 v5, 7, v5
1453; GFX10-NEXT:    v_lshlrev_b16 v6, 1, v6
1454; GFX10-NEXT:    v_and_b32_e32 v9, 7, v9
1455; GFX10-NEXT:    v_and_b32_sdwa v15, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1456; GFX10-NEXT:    v_and_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1457; GFX10-NEXT:    v_lshlrev_b16 v7, 1, v7
1458; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
1459; GFX10-NEXT:    v_and_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1460; GFX10-NEXT:    v_lshrrev_b16 v5, v5, v8
1461; GFX10-NEXT:    v_lshlrev_b16 v6, v9, v6
1462; GFX10-NEXT:    v_lshrrev_b16 v1, v15, v1
1463; GFX10-NEXT:    v_lshlrev_b16 v3, v3, v7
1464; GFX10-NEXT:    v_lshrrev_b16 v2, v2, v11
1465; GFX10-NEXT:    v_lshrrev_b16 v7, v12, v13
1466; GFX10-NEXT:    v_or_b32_e32 v4, v4, v5
1467; GFX10-NEXT:    v_mov_b32_e32 v5, 8
1468; GFX10-NEXT:    v_or_b32_e32 v1, v6, v1
1469; GFX10-NEXT:    v_or_b32_e32 v2, v3, v2
1470; GFX10-NEXT:    v_or_b32_e32 v0, v0, v7
1471; GFX10-NEXT:    v_lshlrev_b32_sdwa v3, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1472; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
1473; GFX10-NEXT:    v_and_b32_e32 v2, 0xff, v2
1474; GFX10-NEXT:    v_and_or_b32 v0, 0xff, v0, v3
1475; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1476; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1477; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1478; GFX10-NEXT:    s_setpc_b64 s[30:31]
1479;
1480; GFX11-LABEL: v_fshr_v4i8:
1481; GFX11:       ; %bb.0:
1482; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 8, v1
1484; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
1485; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1486; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
1487; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
1488; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1489; GFX11-NEXT:    v_xor_b32_e32 v12, -1, v7
1490; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
1491; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1492; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1493; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
1494; GFX11-NEXT:    v_lshlrev_b16 v3, 1, v3
1495; GFX11-NEXT:    v_and_b32_e32 v12, 7, v12
1496; GFX11-NEXT:    v_xor_b32_e32 v14, -1, v11
1497; GFX11-NEXT:    v_lshrrev_b16 v6, v7, v6
1498; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v13
1499; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1500; GFX11-NEXT:    v_xor_b32_e32 v10, -1, v2
1501; GFX11-NEXT:    v_lshlrev_b16 v3, v12, v3
1502; GFX11-NEXT:    v_lshlrev_b16 v4, 1, v4
1503; GFX11-NEXT:    v_and_b32_e32 v12, 7, v14
1504; GFX11-NEXT:    v_and_b32_e32 v11, 7, v11
1505; GFX11-NEXT:    v_and_b32_e32 v8, 0xff, v8
1506; GFX11-NEXT:    v_lshlrev_b16 v5, 1, v5
1507; GFX11-NEXT:    v_and_b32_e32 v7, 7, v7
1508; GFX11-NEXT:    v_and_b32_e32 v13, 7, v13
1509; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
1510; GFX11-NEXT:    v_and_b32_e32 v10, 7, v10
1511; GFX11-NEXT:    v_and_b32_e32 v2, 7, v2
1512; GFX11-NEXT:    v_and_b32_e32 v1, 0xff, v1
1513; GFX11-NEXT:    v_or_b32_e32 v3, v3, v6
1514; GFX11-NEXT:    v_lshlrev_b16 v4, v12, v4
1515; GFX11-NEXT:    v_lshrrev_b16 v6, v11, v8
1516; GFX11-NEXT:    v_lshlrev_b16 v5, v7, v5
1517; GFX11-NEXT:    v_lshrrev_b16 v7, v13, v9
1518; GFX11-NEXT:    v_lshlrev_b16 v0, v10, v0
1519; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
1520; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1521; GFX11-NEXT:    v_or_b32_e32 v3, v4, v6
1522; GFX11-NEXT:    v_or_b32_e32 v4, v5, v7
1523; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1524; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1525; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 8, v2
1526; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1527; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v3
1528; GFX11-NEXT:    v_and_b32_e32 v3, 0xff, v4
1529; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1530; GFX11-NEXT:    v_and_or_b32 v0, 0xff, v0, v1
1531; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
1532; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1533; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1534; GFX11-NEXT:    v_or3_b32 v0, v0, v1, v2
1535; GFX11-NEXT:    s_setpc_b64 s[30:31]
1536  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1537  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1538  %amt = bitcast i32 %amt.arg to <4 x i8>
1539  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1540  %cast.result = bitcast <4 x i8> %result to i32
1541  ret i32 %cast.result
1542}
1543
1544define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1545; GFX6-LABEL: s_fshr_i24:
1546; GFX6:       ; %bb.0:
1547; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1548; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1549; GFX6-NEXT:    v_not_b32_e32 v1, 23
1550; GFX6-NEXT:    s_and_b32 s2, s2, 0xffffff
1551; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1552; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1553; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1554; GFX6-NEXT:    s_and_b32 s1, s1, 0xffffff
1555; GFX6-NEXT:    v_mul_lo_u32 v2, v0, v1
1556; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
1557; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1558; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
1559; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1560; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
1561; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v0, v1
1562; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1563; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1564; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v0, v1
1565; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1566; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1567; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
1568; GFX6-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1569; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1570; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
1571; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
1572; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
1573; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1574; GFX6-NEXT:    ; return to shader part epilog
1575;
1576; GFX8-LABEL: s_fshr_i24:
1577; GFX8:       ; %bb.0:
1578; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1579; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1580; GFX8-NEXT:    v_not_b32_e32 v1, 23
1581; GFX8-NEXT:    s_and_b32 s2, s2, 0xffffff
1582; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1583; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1584; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1585; GFX8-NEXT:    s_and_b32 s1, s1, 0xffffff
1586; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
1587; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
1588; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1589; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
1590; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1591; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
1592; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v0, v1
1593; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1594; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1595; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
1596; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1597; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1598; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
1599; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1600; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1601; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
1602; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1603; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1604; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1605; GFX8-NEXT:    ; return to shader part epilog
1606;
1607; GFX9-LABEL: s_fshr_i24:
1608; GFX9:       ; %bb.0:
1609; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1610; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1611; GFX9-NEXT:    v_not_b32_e32 v1, 23
1612; GFX9-NEXT:    s_and_b32 s2, s2, 0xffffff
1613; GFX9-NEXT:    s_and_b32 s1, s1, 0xffffff
1614; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1615; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1616; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1617; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
1618; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
1619; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1620; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
1621; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1622; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
1623; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
1624; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1625; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1626; GFX9-NEXT:    v_add_u32_e32 v1, 0xffffffe8, v0
1627; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1628; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1629; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
1630; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1631; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1632; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1633; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1634; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1635; GFX9-NEXT:    ; return to shader part epilog
1636;
1637; GFX10-LABEL: s_fshr_i24:
1638; GFX10:       ; %bb.0:
1639; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1640; GFX10-NEXT:    s_and_b32 s2, s2, 0xffffff
1641; GFX10-NEXT:    s_and_b32 s1, s1, 0xffffff
1642; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1643; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1644; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1645; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1646; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1647; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
1648; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1649; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
1650; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1651; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1652; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1653; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1654; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1655; GFX10-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1656; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1657; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1658; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1659; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1660; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1661; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1662; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1663; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1664; GFX10-NEXT:    ; return to shader part epilog
1665;
1666; GFX11-LABEL: s_fshr_i24:
1667; GFX11:       ; %bb.0:
1668; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1669; GFX11-NEXT:    s_and_b32 s2, s2, 0xffffff
1670; GFX11-NEXT:    s_and_b32 s1, s1, 0xffffff
1671; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
1672; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1673; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1674; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1675; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1676; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
1677; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1678; GFX11-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1679; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
1680; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1681; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1682; GFX11-NEXT:    v_mul_hi_u32 v0, s2, v0
1683; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1684; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
1685; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1686; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1687; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1688; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1689; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1690; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1691; GFX11-NEXT:    v_add_nc_u32_e32 v1, 0xffffffe8, v0
1692; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1693; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1694; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1695; GFX11-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1696; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
1697; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1698; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1699; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1700; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1701; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1702; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
1703; GFX11-NEXT:    ; return to shader part epilog
1704  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1705  ret i24 %result
1706}
1707
1708define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1709; GFX6-LABEL: v_fshr_i24:
1710; GFX6:       ; %bb.0:
1711; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1713; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1714; GFX6-NEXT:    v_not_b32_e32 v4, 23
1715; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1716; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1717; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1718; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
1719; GFX6-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1720; GFX6-NEXT:    v_mul_lo_u32 v5, v3, v4
1721; GFX6-NEXT:    v_mul_hi_u32 v5, v3, v5
1722; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
1723; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
1724; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
1725; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
1726; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v4
1727; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1728; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1729; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 0xffffffe8, v2
1730; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1731; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1732; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1733; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1734; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1735; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1736; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1737; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1738; GFX6-NEXT:    s_setpc_b64 s[30:31]
1739;
1740; GFX8-LABEL: v_fshr_i24:
1741; GFX8:       ; %bb.0:
1742; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1743; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1744; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1745; GFX8-NEXT:    v_not_b32_e32 v4, 23
1746; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1747; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1748; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1749; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
1750; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1751; GFX8-NEXT:    v_mul_lo_u32 v5, v3, v4
1752; GFX8-NEXT:    v_mul_hi_u32 v5, v3, v5
1753; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
1754; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
1755; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
1756; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
1757; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v4
1758; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1759; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1760; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xffffffe8, v2
1761; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1762; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1763; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
1764; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1765; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1766; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1767; GFX8-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1768; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1769; GFX8-NEXT:    s_setpc_b64 s[30:31]
1770;
1771; GFX9-LABEL: v_fshr_i24:
1772; GFX9:       ; %bb.0:
1773; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1775; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1776; GFX9-NEXT:    v_not_b32_e32 v4, 23
1777; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1778; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1779; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1780; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
1781; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1782; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v4
1783; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
1784; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
1785; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
1786; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
1787; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
1788; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
1789; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1790; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1791; GFX9-NEXT:    v_add_u32_e32 v3, 0xffffffe8, v2
1792; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1793; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1794; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
1795; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1796; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1797; GFX9-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1798; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1799; GFX9-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX10-LABEL: v_fshr_i24:
1802; GFX10:       ; %bb.0:
1803; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1805; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1806; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1807; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1808; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1809; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1810; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
1811; GFX10-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1812; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
1813; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1814; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
1815; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
1816; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1817; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1818; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1819; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1820; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1821; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1822; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1823; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1824; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1825; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1826; GFX10-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1827; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1828; GFX10-NEXT:    s_setpc_b64 s[30:31]
1829;
1830; GFX11-LABEL: v_fshr_i24:
1831; GFX11:       ; %bb.0:
1832; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1834; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1835; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1836; GFX11-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
1837; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1838; GFX11-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1839; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1840; GFX11-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1841; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
1842; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1843; GFX11-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1844; GFX11-NEXT:    v_mul_hi_u32 v4, v3, v4
1845; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1846; GFX11-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1847; GFX11-NEXT:    v_mul_hi_u32 v3, v2, v3
1848; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1849; GFX11-NEXT:    v_mul_lo_u32 v3, v3, 24
1850; GFX11-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1851; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1852; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1853; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1854; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1855; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1856; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v2
1857; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1858; GFX11-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1859; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1860; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1861; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1862; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1863; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1864; GFX11-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1865; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1866; GFX11-NEXT:    s_setpc_b64 s[30:31]
1867  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1868  ret i24 %result
1869}
1870
1871define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1872; GFX6-LABEL: s_fshr_v2i24:
1873; GFX6:       ; %bb.0:
1874; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1875; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1876; GFX6-NEXT:    s_lshr_b32 s7, s1, 8
1877; GFX6-NEXT:    s_bfe_u32 s9, s0, 0x80008
1878; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1879; GFX6-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1880; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
1881; GFX6-NEXT:    v_mov_b32_e32 v0, s0
1882; GFX6-NEXT:    v_not_b32_e32 v3, 23
1883; GFX6-NEXT:    s_lshr_b32 s6, s0, 16
1884; GFX6-NEXT:    s_and_b32 s8, s0, 0xff
1885; GFX6-NEXT:    s_lshl_b32 s9, s9, 8
1886; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 24
1887; GFX6-NEXT:    s_and_b32 s0, s7, 0xff
1888; GFX6-NEXT:    s_lshr_b32 s1, s2, 16
1889; GFX6-NEXT:    s_lshr_b32 s7, s3, 8
1890; GFX6-NEXT:    s_bfe_u32 s10, s2, 0x80008
1891; GFX6-NEXT:    v_mul_lo_u32 v4, v2, v3
1892; GFX6-NEXT:    s_or_b32 s8, s8, s9
1893; GFX6-NEXT:    s_and_b32 s9, s2, 0xff
1894; GFX6-NEXT:    s_lshl_b32 s10, s10, 8
1895; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
1896; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
1897; GFX6-NEXT:    v_mov_b32_e32 v1, s2
1898; GFX6-NEXT:    s_and_b32 s2, s7, 0xff
1899; GFX6-NEXT:    s_or_b32 s9, s9, s10
1900; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
1901; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 24
1902; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
1903; GFX6-NEXT:    s_and_b32 s9, 0xffff, s9
1904; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1905; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1906; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1907; GFX6-NEXT:    s_or_b32 s1, s9, s1
1908; GFX6-NEXT:    v_or_b32_e32 v1, s2, v1
1909; GFX6-NEXT:    s_lshr_b32 s2, s4, 16
1910; GFX6-NEXT:    s_bfe_u32 s9, s4, 0x80008
1911; GFX6-NEXT:    v_mul_hi_u32 v4, v2, v4
1912; GFX6-NEXT:    s_and_b32 s7, s4, 0xff
1913; GFX6-NEXT:    s_lshl_b32 s9, s9, 8
1914; GFX6-NEXT:    s_and_b32 s2, s2, 0xff
1915; GFX6-NEXT:    s_or_b32 s7, s7, s9
1916; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
1917; GFX6-NEXT:    s_and_b32 s7, 0xffff, s7
1918; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1919; GFX6-NEXT:    s_or_b32 s2, s7, s2
1920; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
1921; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v2
1922; GFX6-NEXT:    s_lshr_b32 s3, s5, 8
1923; GFX6-NEXT:    s_and_b32 s5, s5, 0xff
1924; GFX6-NEXT:    v_mov_b32_e32 v5, s4
1925; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
1926; GFX6-NEXT:    v_alignbit_b32 v5, s5, v5, 24
1927; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
1928; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1929; GFX6-NEXT:    v_mul_lo_u32 v4, v4, 24
1930; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
1931; GFX6-NEXT:    v_or_b32_e32 v5, s3, v5
1932; GFX6-NEXT:    v_mul_hi_u32 v2, v5, v2
1933; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s2, v4
1934; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
1935; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
1936; GFX6-NEXT:    v_mul_lo_u32 v2, v2, 24
1937; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1938; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v4, v3
1939; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
1940; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1941; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v2
1942; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 23, v4
1943; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v2, v3
1944; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1945; GFX6-NEXT:    s_and_b32 s6, s6, 0xff
1946; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
1947; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
1948; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
1949; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v2, v3
1950; GFX6-NEXT:    s_lshl_b32 s2, s6, 17
1951; GFX6-NEXT:    s_lshl_b32 s3, s8, 1
1952; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1953; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1954; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
1955; GFX6-NEXT:    s_or_b32 s2, s2, s3
1956; GFX6-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
1957; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
1958; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1959; GFX6-NEXT:    v_lshl_b32_e32 v6, s2, v6
1960; GFX6-NEXT:    v_lshr_b32_e32 v4, s1, v4
1961; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1962; GFX6-NEXT:    s_lshl_b32 s0, s0, 17
1963; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1964; GFX6-NEXT:    v_or_b32_e32 v4, v6, v4
1965; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
1966; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
1967; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1968; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1969; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1970; GFX6-NEXT:    v_bfe_u32 v2, v4, 8, 8
1971; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1972; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v4
1973; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
1974; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1975; GFX6-NEXT:    v_bfe_u32 v2, v4, 16, 8
1976; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1977; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1978; GFX6-NEXT:    v_and_b32_e32 v2, 0xff, v0
1979; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1980; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1981; GFX6-NEXT:    v_bfe_u32 v2, v0, 8, 8
1982; GFX6-NEXT:    v_bfe_u32 v0, v0, 16, 8
1983; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
1984; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1985; GFX6-NEXT:    v_readfirstlane_b32 s0, v1
1986; GFX6-NEXT:    v_readfirstlane_b32 s1, v0
1987; GFX6-NEXT:    ; return to shader part epilog
1988;
1989; GFX8-LABEL: s_fshr_v2i24:
1990; GFX8:       ; %bb.0:
1991; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1992; GFX8-NEXT:    s_lshr_b32 s9, s1, 8
1993; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
1994; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1995; GFX8-NEXT:    s_lshr_b32 s6, s0, 8
1996; GFX8-NEXT:    s_lshr_b32 s8, s0, 24
1997; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
1998; GFX8-NEXT:    s_and_b32 s6, s6, 0xff
1999; GFX8-NEXT:    s_or_b32 s1, s8, s1
2000; GFX8-NEXT:    s_lshr_b32 s8, s2, 8
2001; GFX8-NEXT:    s_lshr_b32 s7, s0, 16
2002; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
2003; GFX8-NEXT:    s_lshl_b32 s6, s6, 8
2004; GFX8-NEXT:    s_and_b32 s8, s8, 0xff
2005; GFX8-NEXT:    s_or_b32 s0, s0, s6
2006; GFX8-NEXT:    s_and_b32 s6, s7, 0xff
2007; GFX8-NEXT:    s_and_b32 s7, s9, 0xff
2008; GFX8-NEXT:    s_lshr_b32 s9, s2, 16
2009; GFX8-NEXT:    s_lshr_b32 s10, s2, 24
2010; GFX8-NEXT:    s_and_b32 s2, s2, 0xff
2011; GFX8-NEXT:    s_lshl_b32 s8, s8, 8
2012; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2013; GFX8-NEXT:    s_or_b32 s2, s2, s8
2014; GFX8-NEXT:    s_and_b32 s8, s9, 0xff
2015; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
2016; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
2017; GFX8-NEXT:    s_lshr_b32 s11, s3, 8
2018; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
2019; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2020; GFX8-NEXT:    s_and_b32 s3, s3, 0xff
2021; GFX8-NEXT:    s_or_b32 s2, s2, s8
2022; GFX8-NEXT:    s_lshl_b32 s3, s3, 8
2023; GFX8-NEXT:    s_and_b32 s8, s11, 0xff
2024; GFX8-NEXT:    v_not_b32_e32 v1, 23
2025; GFX8-NEXT:    s_or_b32 s3, s10, s3
2026; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
2027; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
2028; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
2029; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2030; GFX8-NEXT:    s_or_b32 s3, s3, s8
2031; GFX8-NEXT:    s_lshr_b32 s8, s4, 8
2032; GFX8-NEXT:    s_and_b32 s8, s8, 0xff
2033; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
2034; GFX8-NEXT:    s_lshr_b32 s10, s4, 24
2035; GFX8-NEXT:    s_and_b32 s4, s4, 0xff
2036; GFX8-NEXT:    s_lshl_b32 s8, s8, 8
2037; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
2038; GFX8-NEXT:    s_or_b32 s4, s4, s8
2039; GFX8-NEXT:    s_and_b32 s8, s9, 0xff
2040; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
2041; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
2042; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2043; GFX8-NEXT:    s_or_b32 s4, s4, s8
2044; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
2045; GFX8-NEXT:    v_mul_hi_u32 v2, s4, v0
2046; GFX8-NEXT:    s_lshr_b32 s11, s5, 8
2047; GFX8-NEXT:    s_and_b32 s5, s5, 0xff
2048; GFX8-NEXT:    s_lshl_b32 s5, s5, 8
2049; GFX8-NEXT:    v_mul_lo_u32 v2, v2, 24
2050; GFX8-NEXT:    s_and_b32 s8, s11, 0xff
2051; GFX8-NEXT:    s_or_b32 s5, s10, s5
2052; GFX8-NEXT:    s_and_b32 s8, 0xffff, s8
2053; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
2054; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
2055; GFX8-NEXT:    s_or_b32 s5, s5, s8
2056; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s4, v2
2057; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
2058; GFX8-NEXT:    v_mul_hi_u32 v0, s5, v0
2059; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2060; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2061; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v2, v1
2062; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2063; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
2064; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
2065; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
2066; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
2067; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
2068; GFX8-NEXT:    s_lshl_b32 s4, s6, 17
2069; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
2070; GFX8-NEXT:    s_or_b32 s0, s4, s0
2071; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2072; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2073; GFX8-NEXT:    v_lshlrev_b32_e64 v3, v3, s0
2074; GFX8-NEXT:    v_lshrrev_b32_e64 v2, v2, s2
2075; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s5, v0
2076; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
2077; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v0, v1
2078; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2079; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
2080; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v0, v1
2081; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2082; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
2083; GFX8-NEXT:    s_and_b32 s7, 0xffff, s7
2084; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2085; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
2086; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
2087; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
2088; GFX8-NEXT:    s_or_b32 s0, s0, s1
2089; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2090; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2091; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
2092; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s3
2093; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
2094; GFX8-NEXT:    v_mov_b32_e32 v1, 8
2095; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2096; GFX8-NEXT:    v_mov_b32_e32 v4, 16
2097; GFX8-NEXT:    v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2098; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2099; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
2100; GFX8-NEXT:    v_and_b32_e32 v3, 0xff, v0
2101; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2102; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2103; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
2104; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2105; GFX8-NEXT:    v_readfirstlane_b32 s0, v2
2106; GFX8-NEXT:    v_readfirstlane_b32 s1, v0
2107; GFX8-NEXT:    ; return to shader part epilog
2108;
2109; GFX9-LABEL: s_fshr_v2i24:
2110; GFX9:       ; %bb.0:
2111; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2112; GFX9-NEXT:    s_lshr_b32 s9, s1, 8
2113; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
2114; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2115; GFX9-NEXT:    s_lshr_b32 s6, s0, 8
2116; GFX9-NEXT:    s_lshr_b32 s8, s0, 24
2117; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
2118; GFX9-NEXT:    s_and_b32 s6, s6, 0xff
2119; GFX9-NEXT:    s_or_b32 s1, s8, s1
2120; GFX9-NEXT:    s_lshr_b32 s8, s2, 8
2121; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
2122; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
2123; GFX9-NEXT:    s_lshl_b32 s6, s6, 8
2124; GFX9-NEXT:    s_and_b32 s8, s8, 0xff
2125; GFX9-NEXT:    s_or_b32 s0, s0, s6
2126; GFX9-NEXT:    s_and_b32 s6, s7, 0xff
2127; GFX9-NEXT:    s_and_b32 s7, s9, 0xff
2128; GFX9-NEXT:    s_lshr_b32 s9, s2, 16
2129; GFX9-NEXT:    s_lshr_b32 s10, s2, 24
2130; GFX9-NEXT:    s_and_b32 s2, s2, 0xff
2131; GFX9-NEXT:    s_lshl_b32 s8, s8, 8
2132; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2133; GFX9-NEXT:    s_or_b32 s2, s2, s8
2134; GFX9-NEXT:    s_and_b32 s8, s9, 0xff
2135; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
2136; GFX9-NEXT:    s_and_b32 s8, 0xffff, s8
2137; GFX9-NEXT:    s_lshr_b32 s11, s3, 8
2138; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
2139; GFX9-NEXT:    s_lshl_b32 s8, s8, 16
2140; GFX9-NEXT:    s_and_b32 s3, s3, 0xff
2141; GFX9-NEXT:    s_or_b32 s2, s2, s8
2142; GFX9-NEXT:    s_lshl_b32 s3, s3, 8
2143; GFX9-NEXT:    s_and_b32 s8, s11, 0xff
2144; GFX9-NEXT:    v_not_b32_e32 v1, 23
2145; GFX9-NEXT:    s_or_b32 s3, s10, s3
2146; GFX9-NEXT:    s_and_b32 s8, 0xffff, s8
2147; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
2148; GFX9-NEXT:    s_and_b32 s3, 0xffff, s3
2149; GFX9-NEXT:    s_lshl_b32 s8, s8, 16
2150; GFX9-NEXT:    s_or_b32 s3, s3, s8
2151; GFX9-NEXT:    s_lshr_b32 s8, s4, 8
2152; GFX9-NEXT:    s_and_b32 s8, s8, 0xff
2153; GFX9-NEXT:    s_lshr_b32 s9, s4, 16
2154; GFX9-NEXT:    s_lshr_b32 s10, s4, 24
2155; GFX9-NEXT:    s_and_b32 s4, s4, 0xff
2156; GFX9-NEXT:    s_lshl_b32 s8, s8, 8
2157; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
2158; GFX9-NEXT:    s_or_b32 s4, s4, s8
2159; GFX9-NEXT:    s_and_b32 s8, s9, 0xff
2160; GFX9-NEXT:    s_and_b32 s8, 0xffff, s8
2161; GFX9-NEXT:    s_and_b32 s4, 0xffff, s4
2162; GFX9-NEXT:    s_lshl_b32 s8, s8, 16
2163; GFX9-NEXT:    s_or_b32 s4, s4, s8
2164; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
2165; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v0
2166; GFX9-NEXT:    s_lshr_b32 s11, s5, 8
2167; GFX9-NEXT:    s_and_b32 s5, s5, 0xff
2168; GFX9-NEXT:    s_lshl_b32 s5, s5, 8
2169; GFX9-NEXT:    s_and_b32 s8, s11, 0xff
2170; GFX9-NEXT:    s_or_b32 s5, s10, s5
2171; GFX9-NEXT:    s_and_b32 s8, 0xffff, s8
2172; GFX9-NEXT:    s_and_b32 s5, 0xffff, s5
2173; GFX9-NEXT:    v_mul_lo_u32 v1, v1, 24
2174; GFX9-NEXT:    s_lshl_b32 s8, s8, 16
2175; GFX9-NEXT:    s_or_b32 s5, s5, s8
2176; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
2177; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
2178; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
2179; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2180; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2181; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
2182; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v1
2183; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
2184; GFX9-NEXT:    s_and_b32 s0, 0xffff, s0
2185; GFX9-NEXT:    s_and_b32 s6, 0xffff, s6
2186; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2187; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v1
2188; GFX9-NEXT:    s_lshl_b32 s4, s6, 17
2189; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
2190; GFX9-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2191; GFX9-NEXT:    s_or_b32 s0, s4, s0
2192; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2193; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
2194; GFX9-NEXT:    v_sub_u32_e32 v0, s5, v0
2195; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2196; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
2197; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2198; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2199; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v0
2200; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
2201; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
2202; GFX9-NEXT:    s_and_b32 s7, 0xffff, s7
2203; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
2204; GFX9-NEXT:    v_sub_u32_e32 v2, 23, v0
2205; GFX9-NEXT:    s_lshl_b32 s0, s7, 17
2206; GFX9-NEXT:    s_lshl_b32 s1, s1, 1
2207; GFX9-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2208; GFX9-NEXT:    s_or_b32 s0, s0, s1
2209; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2210; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s3
2211; GFX9-NEXT:    v_mov_b32_e32 v3, 8
2212; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v2, v0
2213; GFX9-NEXT:    v_mov_b32_e32 v2, 0xff
2214; GFX9-NEXT:    v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2215; GFX9-NEXT:    v_and_or_b32 v2, v1, v2, v3
2216; GFX9-NEXT:    v_mov_b32_e32 v3, 16
2217; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2218; GFX9-NEXT:    v_and_b32_e32 v3, 0xff, v0
2219; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
2220; GFX9-NEXT:    v_or3_b32 v1, v2, v1, v3
2221; GFX9-NEXT:    v_bfe_u32 v2, v0, 8, 8
2222; GFX9-NEXT:    v_bfe_u32 v0, v0, 16, 8
2223; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 8, v2
2224; GFX9-NEXT:    v_readfirstlane_b32 s0, v1
2225; GFX9-NEXT:    v_readfirstlane_b32 s1, v0
2226; GFX9-NEXT:    ; return to shader part epilog
2227;
2228; GFX10-LABEL: s_fshr_v2i24:
2229; GFX10:       ; %bb.0:
2230; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2231; GFX10-NEXT:    s_lshr_b32 s14, s4, 8
2232; GFX10-NEXT:    s_lshr_b32 s15, s4, 16
2233; GFX10-NEXT:    s_and_b32 s14, s14, 0xff
2234; GFX10-NEXT:    s_lshr_b32 s16, s4, 24
2235; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2236; GFX10-NEXT:    s_and_b32 s4, s4, 0xff
2237; GFX10-NEXT:    s_and_b32 s15, s15, 0xff
2238; GFX10-NEXT:    s_lshl_b32 s14, s14, 8
2239; GFX10-NEXT:    s_and_b32 s15, 0xffff, s15
2240; GFX10-NEXT:    s_or_b32 s4, s4, s14
2241; GFX10-NEXT:    s_lshr_b32 s17, s5, 8
2242; GFX10-NEXT:    s_and_b32 s5, s5, 0xff
2243; GFX10-NEXT:    s_lshl_b32 s14, s15, 16
2244; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
2245; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2246; GFX10-NEXT:    s_lshl_b32 s5, s5, 8
2247; GFX10-NEXT:    s_and_b32 s15, s17, 0xff
2248; GFX10-NEXT:    s_or_b32 s4, s4, s14
2249; GFX10-NEXT:    s_or_b32 s5, s16, s5
2250; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
2251; GFX10-NEXT:    s_and_b32 s14, 0xffff, s15
2252; GFX10-NEXT:    s_and_b32 s5, 0xffff, s5
2253; GFX10-NEXT:    s_lshl_b32 s14, s14, 16
2254; GFX10-NEXT:    s_lshr_b32 s9, s1, 8
2255; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
2256; GFX10-NEXT:    s_or_b32 s5, s5, s14
2257; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
2258; GFX10-NEXT:    s_lshr_b32 s10, s2, 8
2259; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
2260; GFX10-NEXT:    s_lshr_b32 s11, s2, 16
2261; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
2262; GFX10-NEXT:    s_and_b32 s9, s9, 0xff
2263; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
2264; GFX10-NEXT:    s_and_b32 s10, s10, 0xff
2265; GFX10-NEXT:    s_lshr_b32 s12, s2, 24
2266; GFX10-NEXT:    s_and_b32 s2, s2, 0xff
2267; GFX10-NEXT:    s_or_b32 s1, s8, s1
2268; GFX10-NEXT:    s_and_b32 s8, 0xffff, s9
2269; GFX10-NEXT:    s_lshl_b32 s9, s10, 8
2270; GFX10-NEXT:    s_lshr_b32 s6, s0, 8
2271; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
2272; GFX10-NEXT:    s_or_b32 s2, s2, s9
2273; GFX10-NEXT:    s_lshr_b32 s13, s3, 8
2274; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
2275; GFX10-NEXT:    s_and_b32 s3, s3, 0xff
2276; GFX10-NEXT:    v_mul_hi_u32 v1, s4, v0
2277; GFX10-NEXT:    v_mul_hi_u32 v0, s5, v0
2278; GFX10-NEXT:    s_and_b32 s6, s6, 0xff
2279; GFX10-NEXT:    s_lshl_b32 s3, s3, 8
2280; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
2281; GFX10-NEXT:    s_and_b32 s0, s0, 0xff
2282; GFX10-NEXT:    s_lshl_b32 s6, s6, 8
2283; GFX10-NEXT:    s_or_b32 s3, s12, s3
2284; GFX10-NEXT:    v_mul_lo_u32 v1, v1, 24
2285; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
2286; GFX10-NEXT:    s_and_b32 s7, s7, 0xff
2287; GFX10-NEXT:    s_or_b32 s0, s0, s6
2288; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
2289; GFX10-NEXT:    s_and_b32 s7, 0xffff, s7
2290; GFX10-NEXT:    s_and_b32 s0, 0xffff, s0
2291; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
2292; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
2293; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s5, v0
2294; GFX10-NEXT:    s_and_b32 s4, s11, 0xff
2295; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
2296; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
2297; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2298; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2299; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
2300; GFX10-NEXT:    s_lshl_b32 s1, s1, 1
2301; GFX10-NEXT:    s_or_b32 s2, s2, s4
2302; GFX10-NEXT:    s_and_b32 s4, s13, 0xff
2303; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2304; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v0
2305; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2306; GFX10-NEXT:    s_and_b32 s4, 0xffff, s4
2307; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v1
2308; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
2309; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2310; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2311; GFX10-NEXT:    s_or_b32 s3, s3, s4
2312; GFX10-NEXT:    s_lshl_b32 s4, s7, 17
2313; GFX10-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v0
2314; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2315; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2316; GFX10-NEXT:    s_or_b32 s0, s4, s0
2317; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v1
2318; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2319; GFX10-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
2320; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2321; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
2322; GFX10-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2323; GFX10-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
2324; GFX10-NEXT:    s_lshl_b32 s2, s8, 17
2325; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2326; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s3
2327; GFX10-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2328; GFX10-NEXT:    s_or_b32 s0, s2, s1
2329; GFX10-NEXT:    v_mov_b32_e32 v2, 8
2330; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
2331; GFX10-NEXT:    v_mov_b32_e32 v3, 16
2332; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2333; GFX10-NEXT:    v_and_b32_e32 v4, 0xff, v0
2334; GFX10-NEXT:    v_and_or_b32 v2, 0xff, v1, v2
2335; GFX10-NEXT:    v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2336; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
2337; GFX10-NEXT:    v_bfe_u32 v4, v0, 8, 8
2338; GFX10-NEXT:    v_bfe_u32 v0, v0, 16, 8
2339; GFX10-NEXT:    v_or3_b32 v1, v2, v1, v3
2340; GFX10-NEXT:    v_lshl_or_b32 v0, v0, 8, v4
2341; GFX10-NEXT:    v_readfirstlane_b32 s0, v1
2342; GFX10-NEXT:    v_readfirstlane_b32 s1, v0
2343; GFX10-NEXT:    ; return to shader part epilog
2344;
2345; GFX11-LABEL: s_fshr_v2i24:
2346; GFX11:       ; %bb.0:
2347; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
2348; GFX11-NEXT:    s_lshr_b32 s14, s4, 8
2349; GFX11-NEXT:    s_lshr_b32 s15, s4, 16
2350; GFX11-NEXT:    s_and_b32 s14, s14, 0xff
2351; GFX11-NEXT:    s_lshr_b32 s16, s4, 24
2352; GFX11-NEXT:    v_rcp_iflag_f32_e32 v0, v0
2353; GFX11-NEXT:    s_and_b32 s4, s4, 0xff
2354; GFX11-NEXT:    s_and_b32 s15, s15, 0xff
2355; GFX11-NEXT:    s_lshl_b32 s14, s14, 8
2356; GFX11-NEXT:    s_and_b32 s15, 0xffff, s15
2357; GFX11-NEXT:    s_or_b32 s4, s4, s14
2358; GFX11-NEXT:    s_lshr_b32 s17, s5, 8
2359; GFX11-NEXT:    s_and_b32 s5, s5, 0xff
2360; GFX11-NEXT:    s_lshl_b32 s14, s15, 16
2361; GFX11-NEXT:    s_and_b32 s4, 0xffff, s4
2362; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2363; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
2364; GFX11-NEXT:    s_lshl_b32 s5, s5, 8
2365; GFX11-NEXT:    s_and_b32 s15, s17, 0xff
2366; GFX11-NEXT:    s_or_b32 s4, s4, s14
2367; GFX11-NEXT:    s_or_b32 s5, s16, s5
2368; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
2369; GFX11-NEXT:    s_and_b32 s14, 0xffff, s15
2370; GFX11-NEXT:    s_and_b32 s5, 0xffff, s5
2371; GFX11-NEXT:    s_lshl_b32 s14, s14, 16
2372; GFX11-NEXT:    s_lshr_b32 s10, s2, 8
2373; GFX11-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
2374; GFX11-NEXT:    s_or_b32 s5, s5, s14
2375; GFX11-NEXT:    s_lshr_b32 s9, s1, 8
2376; GFX11-NEXT:    s_and_b32 s1, s1, 0xff
2377; GFX11-NEXT:    s_lshr_b32 s11, s2, 16
2378; GFX11-NEXT:    s_and_b32 s10, s10, 0xff
2379; GFX11-NEXT:    s_lshr_b32 s6, s0, 8
2380; GFX11-NEXT:    s_lshr_b32 s8, s0, 24
2381; GFX11-NEXT:    v_mul_hi_u32 v1, v0, v1
2382; GFX11-NEXT:    s_lshr_b32 s12, s2, 24
2383; GFX11-NEXT:    s_and_b32 s2, s2, 0xff
2384; GFX11-NEXT:    s_lshl_b32 s1, s1, 8
2385; GFX11-NEXT:    s_and_b32 s9, s9, 0xff
2386; GFX11-NEXT:    s_and_b32 s11, s11, 0xff
2387; GFX11-NEXT:    s_and_b32 s6, s6, 0xff
2388; GFX11-NEXT:    s_or_b32 s1, s8, s1
2389; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
2390; GFX11-NEXT:    s_and_b32 s8, 0xffff, s9
2391; GFX11-NEXT:    s_and_b32 s9, 0xffff, s11
2392; GFX11-NEXT:    s_lshr_b32 s7, s0, 16
2393; GFX11-NEXT:    s_and_b32 s0, s0, 0xff
2394; GFX11-NEXT:    v_mul_hi_u32 v1, s4, v0
2395; GFX11-NEXT:    v_mul_hi_u32 v0, s5, v0
2396; GFX11-NEXT:    s_lshr_b32 s13, s3, 8
2397; GFX11-NEXT:    s_and_b32 s3, s3, 0xff
2398; GFX11-NEXT:    s_lshl_b32 s6, s6, 8
2399; GFX11-NEXT:    s_and_b32 s7, s7, 0xff
2400; GFX11-NEXT:    s_lshl_b32 s3, s3, 8
2401; GFX11-NEXT:    s_and_b32 s13, s13, 0xff
2402; GFX11-NEXT:    v_mul_lo_u32 v1, v1, 24
2403; GFX11-NEXT:    v_mul_lo_u32 v0, v0, 24
2404; GFX11-NEXT:    s_or_b32 s0, s0, s6
2405; GFX11-NEXT:    s_and_b32 s7, 0xffff, s7
2406; GFX11-NEXT:    s_or_b32 s3, s12, s3
2407; GFX11-NEXT:    s_and_b32 s0, 0xffff, s0
2408; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
2409; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
2410; GFX11-NEXT:    v_sub_nc_u32_e32 v1, s4, v1
2411; GFX11-NEXT:    v_sub_nc_u32_e32 v0, s5, v0
2412; GFX11-NEXT:    s_lshl_b32 s4, s10, 8
2413; GFX11-NEXT:    s_and_b32 s10, 0xffff, s13
2414; GFX11-NEXT:    s_or_b32 s2, s2, s4
2415; GFX11-NEXT:    v_add_nc_u32_e32 v2, 0xffffffe8, v1
2416; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2417; GFX11-NEXT:    s_lshl_b32 s4, s9, 16
2418; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
2419; GFX11-NEXT:    s_lshl_b32 s5, s10, 16
2420; GFX11-NEXT:    s_or_b32 s2, s2, s4
2421; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v2 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
2422; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2423; GFX11-NEXT:    s_lshl_b32 s4, s7, 17
2424; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
2425; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
2426; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffe8, v1
2427; GFX11-NEXT:    s_or_b32 s0, s4, s0
2428; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2429; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
2430; GFX11-NEXT:    s_lshl_b32 s1, s1, 1
2431; GFX11-NEXT:    v_dual_cndmask_b32 v1, v1, v3 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0
2432; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
2433; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
2434; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v1
2435; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v2 :: v_dual_and_b32 v1, 0xffffff, v1
2436; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2437; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2438; GFX11-NEXT:    v_lshrrev_b32_e64 v1, v1, s2
2439; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2440; GFX11-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
2441; GFX11-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
2442; GFX11-NEXT:    s_or_b32 s2, s3, s5
2443; GFX11-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
2444; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2445; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
2446; GFX11-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
2447; GFX11-NEXT:    s_lshl_b32 s0, s8, 17
2448; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2449; GFX11-NEXT:    s_or_b32 s0, s0, s1
2450; GFX11-NEXT:    v_bfe_u32 v3, v1, 16, 8
2451; GFX11-NEXT:    v_lshl_or_b32 v0, s0, v2, v0
2452; GFX11-NEXT:    v_bfe_u32 v2, v1, 8, 8
2453; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2454; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2455; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v0
2456; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2457; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
2458; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
2459; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2460; GFX11-NEXT:    v_and_or_b32 v1, 0xff, v1, v2
2461; GFX11-NEXT:    v_bfe_u32 v2, v0, 8, 8
2462; GFX11-NEXT:    v_bfe_u32 v0, v0, 16, 8
2463; GFX11-NEXT:    v_or3_b32 v1, v1, v3, v4
2464; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2465; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 8, v2
2466; GFX11-NEXT:    v_readfirstlane_b32 s0, v1
2467; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2468; GFX11-NEXT:    v_readfirstlane_b32 s1, v0
2469; GFX11-NEXT:    ; return to shader part epilog
2470  %lhs = bitcast i48 %lhs.arg to <2 x i24>
2471  %rhs = bitcast i48 %rhs.arg to <2 x i24>
2472  %amt = bitcast i48 %amt.arg to <2 x i24>
2473  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2474  %cast.result = bitcast <2 x i24> %result to i48
2475  ret i48 %cast.result
2476}
2477
2478define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2479; GFX6-LABEL: v_fshr_v2i24:
2480; GFX6:       ; %bb.0:
2481; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2482; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2483; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2484; GFX6-NEXT:    v_not_b32_e32 v7, 23
2485; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2486; GFX6-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2487; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2488; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
2489; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2490; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2491; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2492; GFX6-NEXT:    v_mul_lo_u32 v8, v6, v7
2493; GFX6-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2494; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
2495; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2496; GFX6-NEXT:    v_mul_hi_u32 v8, v4, v6
2497; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
2498; GFX6-NEXT:    v_mul_lo_u32 v8, v8, 24
2499; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2500; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v8
2501; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
2502; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2503; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2504; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v4, v7
2505; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2506; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2507; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 23, v4
2508; GFX6-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
2509; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2510; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
2511; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2512; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
2513; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
2514; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v2, v7
2515; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2516; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2517; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 0xffffffe8, v2
2518; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2519; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2520; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
2521; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2522; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2523; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2524; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2525; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
2526; GFX6-NEXT:    s_setpc_b64 s[30:31]
2527;
2528; GFX8-LABEL: v_fshr_v2i24:
2529; GFX8:       ; %bb.0:
2530; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2531; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2532; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2533; GFX8-NEXT:    v_not_b32_e32 v7, 23
2534; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2535; GFX8-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2536; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2537; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
2538; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2539; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2540; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2541; GFX8-NEXT:    v_mul_lo_u32 v8, v6, v7
2542; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2543; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
2544; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
2545; GFX8-NEXT:    v_mul_hi_u32 v8, v4, v6
2546; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
2547; GFX8-NEXT:    v_mul_lo_u32 v8, v8, 24
2548; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2549; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v8
2550; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
2551; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2552; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2553; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v7
2554; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2555; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc
2556; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 23, v4
2557; GFX8-NEXT:    v_and_b32_e32 v8, 0xffffff, v8
2558; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2559; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
2560; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2561; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2562; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
2563; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v2, v7
2564; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2565; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2566; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xffffffe8, v2
2567; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2568; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2569; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
2570; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2571; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2572; GFX8-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2573; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2574; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
2575; GFX8-NEXT:    s_setpc_b64 s[30:31]
2576;
2577; GFX9-LABEL: v_fshr_v2i24:
2578; GFX9:       ; %bb.0:
2579; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2580; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2581; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2582; GFX9-NEXT:    v_not_b32_e32 v7, 23
2583; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2584; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2585; GFX9-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2586; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v6
2587; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2588; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2589; GFX9-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2590; GFX9-NEXT:    v_mul_lo_u32 v7, v6, v7
2591; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2592; GFX9-NEXT:    v_mul_hi_u32 v7, v6, v7
2593; GFX9-NEXT:    v_add_u32_e32 v6, v6, v7
2594; GFX9-NEXT:    v_mul_hi_u32 v7, v4, v6
2595; GFX9-NEXT:    v_mul_hi_u32 v6, v5, v6
2596; GFX9-NEXT:    v_mul_lo_u32 v7, v7, 24
2597; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
2598; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v7
2599; GFX9-NEXT:    v_sub_u32_e32 v5, v5, v6
2600; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
2601; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2602; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2603; GFX9-NEXT:    v_add_u32_e32 v6, 0xffffffe8, v4
2604; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2605; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2606; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
2607; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2608; GFX9-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2609; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2610; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2611; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffe8, v5
2612; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v5
2613; GFX9-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
2614; GFX9-NEXT:    v_add_u32_e32 v4, 0xffffffe8, v2
2615; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2616; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2617; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
2618; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2619; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2620; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2621; GFX9-NEXT:    v_lshl_or_b32 v1, v1, v4, v2
2622; GFX9-NEXT:    s_setpc_b64 s[30:31]
2623;
2624; GFX10-LABEL: v_fshr_v2i24:
2625; GFX10:       ; %bb.0:
2626; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2627; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2628; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2629; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2630; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2631; GFX10-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2632; GFX10-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2633; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2634; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2635; GFX10-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2636; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v6
2637; GFX10-NEXT:    v_mul_lo_u32 v7, 0xffffffe8, v6
2638; GFX10-NEXT:    v_mul_hi_u32 v7, v6, v7
2639; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v7
2640; GFX10-NEXT:    v_mul_hi_u32 v7, v4, v6
2641; GFX10-NEXT:    v_mul_hi_u32 v6, v5, v6
2642; GFX10-NEXT:    v_mul_lo_u32 v7, v7, 24
2643; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
2644; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
2645; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
2646; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2647; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2648; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2649; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2650; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2651; GFX10-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2652; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2653; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2654; GFX10-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2655; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2656; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2657; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2658; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2659; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2660; GFX10-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2661; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2662; GFX10-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2663; GFX10-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2664; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
2665; GFX10-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2666; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2667; GFX10-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2668; GFX10-NEXT:    s_setpc_b64 s[30:31]
2669;
2670; GFX11-LABEL: v_fshr_v2i24:
2671; GFX11:       ; %bb.0:
2672; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2674; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2675; GFX11-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
2676; GFX11-NEXT:    v_and_b32_e32 v3, 0xffffff, v3
2677; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2678; GFX11-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2679; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2680; GFX11-NEXT:    v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
2681; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2682; GFX11-NEXT:    v_cvt_u32_f32_e32 v6, v6
2683; GFX11-NEXT:    v_mul_lo_u32 v7, 0xffffffe8, v6
2684; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2685; GFX11-NEXT:    v_mul_hi_u32 v7, v6, v7
2686; GFX11-NEXT:    v_add_nc_u32_e32 v6, v6, v7
2687; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2688; GFX11-NEXT:    v_mul_hi_u32 v7, v4, v6
2689; GFX11-NEXT:    v_mul_lo_u32 v7, v7, 24
2690; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2691; GFX11-NEXT:    v_sub_nc_u32_e32 v4, v4, v7
2692; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2693; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2694; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2695; GFX11-NEXT:    v_mul_hi_u32 v6, v5, v6
2696; GFX11-NEXT:    v_mul_lo_u32 v6, v6, 24
2697; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2698; GFX11-NEXT:    v_sub_nc_u32_e32 v5, v5, v6
2699; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2700; GFX11-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2701; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2702; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2703; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xffffffe8, v4
2704; GFX11-NEXT:    v_add_nc_u32_e32 v7, 0xffffffe8, v5
2705; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2706; GFX11-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2707; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2708; GFX11-NEXT:    v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5
2709; GFX11-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2710; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
2711; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2712; GFX11-NEXT:    v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2713; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2714; GFX11-NEXT:    v_and_b32_e32 v6, 0xffffff, v6
2715; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2716; GFX11-NEXT:    v_and_b32_e32 v5, 0xffffff, v5
2717; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2718; GFX11-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2719; GFX11-NEXT:    v_and_b32_e32 v4, 0xffffff, v7
2720; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2721; GFX11-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2722; GFX11-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2723; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2724; GFX11-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2725; GFX11-NEXT:    s_setpc_b64 s[30:31]
2726  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2727  ret <2 x i24> %result
2728}
2729
2730define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2731; GFX6-LABEL: s_fshr_i32:
2732; GFX6:       ; %bb.0:
2733; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2734; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2735; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2736; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2737; GFX6-NEXT:    ; return to shader part epilog
2738;
2739; GFX8-LABEL: s_fshr_i32:
2740; GFX8:       ; %bb.0:
2741; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2742; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2743; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2744; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2745; GFX8-NEXT:    ; return to shader part epilog
2746;
2747; GFX9-LABEL: s_fshr_i32:
2748; GFX9:       ; %bb.0:
2749; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2750; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2751; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2752; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2753; GFX9-NEXT:    ; return to shader part epilog
2754;
2755; GFX10-LABEL: s_fshr_i32:
2756; GFX10:       ; %bb.0:
2757; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2758; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2759; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2760; GFX10-NEXT:    ; return to shader part epilog
2761;
2762; GFX11-LABEL: s_fshr_i32:
2763; GFX11:       ; %bb.0:
2764; GFX11-NEXT:    v_mov_b32_e32 v0, s2
2765; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2766; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2767; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2768; GFX11-NEXT:    ; return to shader part epilog
2769  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2770  ret i32 %result
2771}
2772
2773define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2774; GFX6-LABEL: s_fshr_i32_5:
2775; GFX6:       ; %bb.0:
2776; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2777; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2778; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2779; GFX6-NEXT:    ; return to shader part epilog
2780;
2781; GFX8-LABEL: s_fshr_i32_5:
2782; GFX8:       ; %bb.0:
2783; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2784; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2785; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2786; GFX8-NEXT:    ; return to shader part epilog
2787;
2788; GFX9-LABEL: s_fshr_i32_5:
2789; GFX9:       ; %bb.0:
2790; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2791; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2792; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2793; GFX9-NEXT:    ; return to shader part epilog
2794;
2795; GFX10-LABEL: s_fshr_i32_5:
2796; GFX10:       ; %bb.0:
2797; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2798; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2799; GFX10-NEXT:    ; return to shader part epilog
2800;
2801; GFX11-LABEL: s_fshr_i32_5:
2802; GFX11:       ; %bb.0:
2803; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2804; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2805; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2806; GFX11-NEXT:    ; return to shader part epilog
2807  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2808  ret i32 %result
2809}
2810
2811define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2812; GFX6-LABEL: s_fshr_i32_8:
2813; GFX6:       ; %bb.0:
2814; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2815; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2816; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2817; GFX6-NEXT:    ; return to shader part epilog
2818;
2819; GFX8-LABEL: s_fshr_i32_8:
2820; GFX8:       ; %bb.0:
2821; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2822; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2823; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2824; GFX8-NEXT:    ; return to shader part epilog
2825;
2826; GFX9-LABEL: s_fshr_i32_8:
2827; GFX9:       ; %bb.0:
2828; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2829; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2830; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2831; GFX9-NEXT:    ; return to shader part epilog
2832;
2833; GFX10-LABEL: s_fshr_i32_8:
2834; GFX10:       ; %bb.0:
2835; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2836; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2837; GFX10-NEXT:    ; return to shader part epilog
2838;
2839; GFX11-LABEL: s_fshr_i32_8:
2840; GFX11:       ; %bb.0:
2841; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2842; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2843; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
2844; GFX11-NEXT:    ; return to shader part epilog
2845  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2846  ret i32 %result
2847}
2848
2849define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2850; GCN-LABEL: v_fshr_i32:
2851; GCN:       ; %bb.0:
2852; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2853; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2854; GCN-NEXT:    s_setpc_b64 s[30:31]
2855;
2856; GFX11-LABEL: v_fshr_i32:
2857; GFX11:       ; %bb.0:
2858; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2860; GFX11-NEXT:    s_setpc_b64 s[30:31]
2861  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2862  ret i32 %result
2863}
2864
2865define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2866; GCN-LABEL: v_fshr_i32_5:
2867; GCN:       ; %bb.0:
2868; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2869; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2870; GCN-NEXT:    s_setpc_b64 s[30:31]
2871;
2872; GFX11-LABEL: v_fshr_i32_5:
2873; GFX11:       ; %bb.0:
2874; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2875; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2876; GFX11-NEXT:    s_setpc_b64 s[30:31]
2877  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2878  ret i32 %result
2879}
2880
2881define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2882; GCN-LABEL: v_fshr_i32_8:
2883; GCN:       ; %bb.0:
2884; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2885; GCN-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2886; GCN-NEXT:    s_setpc_b64 s[30:31]
2887;
2888; GFX11-LABEL: v_fshr_i32_8:
2889; GFX11:       ; %bb.0:
2890; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891; GFX11-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2892; GFX11-NEXT:    s_setpc_b64 s[30:31]
2893  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2894  ret i32 %result
2895}
2896
2897define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2898; GFX6-LABEL: v_fshr_i32_ssv:
2899; GFX6:       ; %bb.0:
2900; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2901; GFX6-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2902; GFX6-NEXT:    ; return to shader part epilog
2903;
2904; GFX8-LABEL: v_fshr_i32_ssv:
2905; GFX8:       ; %bb.0:
2906; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2907; GFX8-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2908; GFX8-NEXT:    ; return to shader part epilog
2909;
2910; GFX9-LABEL: v_fshr_i32_ssv:
2911; GFX9:       ; %bb.0:
2912; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2913; GFX9-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2914; GFX9-NEXT:    ; return to shader part epilog
2915;
2916; GFX10-LABEL: v_fshr_i32_ssv:
2917; GFX10:       ; %bb.0:
2918; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2919; GFX10-NEXT:    ; return to shader part epilog
2920;
2921; GFX11-LABEL: v_fshr_i32_ssv:
2922; GFX11:       ; %bb.0:
2923; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2924; GFX11-NEXT:    ; return to shader part epilog
2925  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2926  %cast.result = bitcast i32 %result to float
2927  ret float %cast.result
2928}
2929
2930define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2931; GFX6-LABEL: v_fshr_i32_svs:
2932; GFX6:       ; %bb.0:
2933; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2934; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2935; GFX6-NEXT:    ; return to shader part epilog
2936;
2937; GFX8-LABEL: v_fshr_i32_svs:
2938; GFX8:       ; %bb.0:
2939; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2940; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2941; GFX8-NEXT:    ; return to shader part epilog
2942;
2943; GFX9-LABEL: v_fshr_i32_svs:
2944; GFX9:       ; %bb.0:
2945; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2946; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2947; GFX9-NEXT:    ; return to shader part epilog
2948;
2949; GFX10-LABEL: v_fshr_i32_svs:
2950; GFX10:       ; %bb.0:
2951; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2952; GFX10-NEXT:    ; return to shader part epilog
2953;
2954; GFX11-LABEL: v_fshr_i32_svs:
2955; GFX11:       ; %bb.0:
2956; GFX11-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2957; GFX11-NEXT:    ; return to shader part epilog
2958  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2959  %cast.result = bitcast i32 %result to float
2960  ret float %cast.result
2961}
2962
2963define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2964; GFX6-LABEL: v_fshr_i32_vss:
2965; GFX6:       ; %bb.0:
2966; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2967; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2968; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2969; GFX6-NEXT:    ; return to shader part epilog
2970;
2971; GFX8-LABEL: v_fshr_i32_vss:
2972; GFX8:       ; %bb.0:
2973; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2974; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2975; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2976; GFX8-NEXT:    ; return to shader part epilog
2977;
2978; GFX9-LABEL: v_fshr_i32_vss:
2979; GFX9:       ; %bb.0:
2980; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2981; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2982; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2983; GFX9-NEXT:    ; return to shader part epilog
2984;
2985; GFX10-LABEL: v_fshr_i32_vss:
2986; GFX10:       ; %bb.0:
2987; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2988; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2989; GFX10-NEXT:    ; return to shader part epilog
2990;
2991; GFX11-LABEL: v_fshr_i32_vss:
2992; GFX11:       ; %bb.0:
2993; GFX11-NEXT:    v_mov_b32_e32 v0, s2
2994; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2995; GFX11-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2996; GFX11-NEXT:    ; return to shader part epilog
2997  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2998  %cast.result = bitcast i32 %result to float
2999  ret float %cast.result
3000}
3001
3002define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
3003; GCN-LABEL: v_fshr_v2i32:
3004; GCN:       ; %bb.0:
3005; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3006; GCN-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3007; GCN-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3008; GCN-NEXT:    s_setpc_b64 s[30:31]
3009;
3010; GFX11-LABEL: v_fshr_v2i32:
3011; GFX11:       ; %bb.0:
3012; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3013; GFX11-NEXT:    v_alignbit_b32 v0, v0, v2, v4
3014; GFX11-NEXT:    v_alignbit_b32 v1, v1, v3, v5
3015; GFX11-NEXT:    s_setpc_b64 s[30:31]
3016  %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
3017  ret <2 x i32> %result
3018}
3019
3020define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
3021; GCN-LABEL: v_fshr_v3i32:
3022; GCN:       ; %bb.0:
3023; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3024; GCN-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3025; GCN-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3026; GCN-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3027; GCN-NEXT:    s_setpc_b64 s[30:31]
3028;
3029; GFX11-LABEL: v_fshr_v3i32:
3030; GFX11:       ; %bb.0:
3031; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3032; GFX11-NEXT:    v_alignbit_b32 v0, v0, v3, v6
3033; GFX11-NEXT:    v_alignbit_b32 v1, v1, v4, v7
3034; GFX11-NEXT:    v_alignbit_b32 v2, v2, v5, v8
3035; GFX11-NEXT:    s_setpc_b64 s[30:31]
3036  %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
3037  ret <3 x i32> %result
3038}
3039
3040define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
3041; GCN-LABEL: v_fshr_v4i32:
3042; GCN:       ; %bb.0:
3043; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3044; GCN-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3045; GCN-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3046; GCN-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3047; GCN-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3048; GCN-NEXT:    s_setpc_b64 s[30:31]
3049;
3050; GFX11-LABEL: v_fshr_v4i32:
3051; GFX11:       ; %bb.0:
3052; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3053; GFX11-NEXT:    v_alignbit_b32 v0, v0, v4, v8
3054; GFX11-NEXT:    v_alignbit_b32 v1, v1, v5, v9
3055; GFX11-NEXT:    v_alignbit_b32 v2, v2, v6, v10
3056; GFX11-NEXT:    v_alignbit_b32 v3, v3, v7, v11
3057; GFX11-NEXT:    s_setpc_b64 s[30:31]
3058  %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3059  ret <4 x i32> %result
3060}
3061
3062define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3063; GFX6-LABEL: s_fshr_i16:
3064; GFX6:       ; %bb.0:
3065; GFX6-NEXT:    s_and_b32 s3, s2, 15
3066; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
3067; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3068; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3069; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
3070; GFX6-NEXT:    s_and_b32 s2, 0xffff, s3
3071; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
3072; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
3073; GFX6-NEXT:    s_or_b32 s0, s0, s1
3074; GFX6-NEXT:    ; return to shader part epilog
3075;
3076; GFX8-LABEL: s_fshr_i16:
3077; GFX8:       ; %bb.0:
3078; GFX8-NEXT:    s_and_b32 s3, s2, 15
3079; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3080; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
3081; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3082; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
3083; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3084; GFX8-NEXT:    s_and_b32 s2, 0xffff, s3
3085; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3086; GFX8-NEXT:    s_or_b32 s0, s0, s1
3087; GFX8-NEXT:    ; return to shader part epilog
3088;
3089; GFX9-LABEL: s_fshr_i16:
3090; GFX9:       ; %bb.0:
3091; GFX9-NEXT:    s_and_b32 s3, s2, 15
3092; GFX9-NEXT:    s_andn2_b32 s2, 15, s2
3093; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
3094; GFX9-NEXT:    s_and_b32 s2, 0xffff, s2
3095; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3096; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3097; GFX9-NEXT:    s_and_b32 s2, 0xffff, s3
3098; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
3099; GFX9-NEXT:    s_or_b32 s0, s0, s1
3100; GFX9-NEXT:    ; return to shader part epilog
3101;
3102; GFX10-LABEL: s_fshr_i16:
3103; GFX10:       ; %bb.0:
3104; GFX10-NEXT:    s_and_b32 s3, s2, 15
3105; GFX10-NEXT:    s_andn2_b32 s2, 15, s2
3106; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
3107; GFX10-NEXT:    s_and_b32 s2, 0xffff, s2
3108; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3109; GFX10-NEXT:    s_and_b32 s3, 0xffff, s3
3110; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3111; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
3112; GFX10-NEXT:    s_or_b32 s0, s0, s1
3113; GFX10-NEXT:    ; return to shader part epilog
3114;
3115; GFX11-LABEL: s_fshr_i16:
3116; GFX11:       ; %bb.0:
3117; GFX11-NEXT:    s_and_b32 s3, s2, 15
3118; GFX11-NEXT:    s_and_not1_b32 s2, 15, s2
3119; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
3120; GFX11-NEXT:    s_and_b32 s2, 0xffff, s2
3121; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3122; GFX11-NEXT:    s_and_b32 s3, 0xffff, s3
3123; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3124; GFX11-NEXT:    s_lshr_b32 s1, s1, s3
3125; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3126; GFX11-NEXT:    s_or_b32 s0, s0, s1
3127; GFX11-NEXT:    ; return to shader part epilog
3128  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3129  ret i16 %result
3130}
3131
3132define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3133; GFX6-LABEL: s_fshr_i16_4:
3134; GFX6:       ; %bb.0:
3135; GFX6-NEXT:    s_lshl_b32 s0, s0, 12
3136; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xc0004
3137; GFX6-NEXT:    s_or_b32 s0, s0, s1
3138; GFX6-NEXT:    ; return to shader part epilog
3139;
3140; GFX8-LABEL: s_fshr_i16_4:
3141; GFX8:       ; %bb.0:
3142; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3143; GFX8-NEXT:    s_lshl_b32 s0, s0, 12
3144; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
3145; GFX8-NEXT:    s_or_b32 s0, s0, s1
3146; GFX8-NEXT:    ; return to shader part epilog
3147;
3148; GFX9-LABEL: s_fshr_i16_4:
3149; GFX9:       ; %bb.0:
3150; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3151; GFX9-NEXT:    s_lshl_b32 s0, s0, 12
3152; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
3153; GFX9-NEXT:    s_or_b32 s0, s0, s1
3154; GFX9-NEXT:    ; return to shader part epilog
3155;
3156; GFX10-LABEL: s_fshr_i16_4:
3157; GFX10:       ; %bb.0:
3158; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3159; GFX10-NEXT:    s_lshl_b32 s0, s0, 12
3160; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
3161; GFX10-NEXT:    s_or_b32 s0, s0, s1
3162; GFX10-NEXT:    ; return to shader part epilog
3163;
3164; GFX11-LABEL: s_fshr_i16_4:
3165; GFX11:       ; %bb.0:
3166; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3167; GFX11-NEXT:    s_lshl_b32 s0, s0, 12
3168; GFX11-NEXT:    s_lshr_b32 s1, s1, 4
3169; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3170; GFX11-NEXT:    s_or_b32 s0, s0, s1
3171; GFX11-NEXT:    ; return to shader part epilog
3172  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3173  ret i16 %result
3174}
3175
3176define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3177; GFX6-LABEL: s_fshr_i16_5:
3178; GFX6:       ; %bb.0:
3179; GFX6-NEXT:    s_lshl_b32 s0, s0, 11
3180; GFX6-NEXT:    s_bfe_u32 s1, s1, 0xb0005
3181; GFX6-NEXT:    s_or_b32 s0, s0, s1
3182; GFX6-NEXT:    ; return to shader part epilog
3183;
3184; GFX8-LABEL: s_fshr_i16_5:
3185; GFX8:       ; %bb.0:
3186; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3187; GFX8-NEXT:    s_lshl_b32 s0, s0, 11
3188; GFX8-NEXT:    s_lshr_b32 s1, s1, 5
3189; GFX8-NEXT:    s_or_b32 s0, s0, s1
3190; GFX8-NEXT:    ; return to shader part epilog
3191;
3192; GFX9-LABEL: s_fshr_i16_5:
3193; GFX9:       ; %bb.0:
3194; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3195; GFX9-NEXT:    s_lshl_b32 s0, s0, 11
3196; GFX9-NEXT:    s_lshr_b32 s1, s1, 5
3197; GFX9-NEXT:    s_or_b32 s0, s0, s1
3198; GFX9-NEXT:    ; return to shader part epilog
3199;
3200; GFX10-LABEL: s_fshr_i16_5:
3201; GFX10:       ; %bb.0:
3202; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3203; GFX10-NEXT:    s_lshl_b32 s0, s0, 11
3204; GFX10-NEXT:    s_lshr_b32 s1, s1, 5
3205; GFX10-NEXT:    s_or_b32 s0, s0, s1
3206; GFX10-NEXT:    ; return to shader part epilog
3207;
3208; GFX11-LABEL: s_fshr_i16_5:
3209; GFX11:       ; %bb.0:
3210; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3211; GFX11-NEXT:    s_lshl_b32 s0, s0, 11
3212; GFX11-NEXT:    s_lshr_b32 s1, s1, 5
3213; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3214; GFX11-NEXT:    s_or_b32 s0, s0, s1
3215; GFX11-NEXT:    ; return to shader part epilog
3216  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3217  ret i16 %result
3218}
3219
3220define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3221; GFX6-LABEL: v_fshr_i16:
3222; GFX6:       ; %bb.0:
3223; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3224; GFX6-NEXT:    v_and_b32_e32 v3, 15, v2
3225; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
3226; GFX6-NEXT:    v_and_b32_e32 v2, 15, v2
3227; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3228; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3229; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
3230; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
3231; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3232; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
3233; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3234; GFX6-NEXT:    s_setpc_b64 s[30:31]
3235;
3236; GFX8-LABEL: v_fshr_i16:
3237; GFX8:       ; %bb.0:
3238; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
3240; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3241; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3242; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3243; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
3244; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
3245; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3246; GFX8-NEXT:    s_setpc_b64 s[30:31]
3247;
3248; GFX9-LABEL: v_fshr_i16:
3249; GFX9:       ; %bb.0:
3250; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3251; GFX9-NEXT:    v_and_b32_e32 v3, 15, v2
3252; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3253; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
3254; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3255; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
3256; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
3257; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3258; GFX9-NEXT:    s_setpc_b64 s[30:31]
3259;
3260; GFX10-LABEL: v_fshr_i16:
3261; GFX10:       ; %bb.0:
3262; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3263; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3264; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3265; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
3266; GFX10-NEXT:    v_and_b32_e32 v3, 15, v3
3267; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
3268; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
3269; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3270; GFX10-NEXT:    s_setpc_b64 s[30:31]
3271;
3272; GFX11-LABEL: v_fshr_i16:
3273; GFX11:       ; %bb.0:
3274; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
3276; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
3277; GFX11-NEXT:    v_and_b32_e32 v2, 15, v2
3278; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3279; GFX11-NEXT:    v_and_b32_e32 v3, 15, v3
3280; GFX11-NEXT:    v_lshrrev_b16 v1, v2, v1
3281; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3282; GFX11-NEXT:    v_lshlrev_b16 v0, v3, v0
3283; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3284; GFX11-NEXT:    s_setpc_b64 s[30:31]
3285  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3286  ret i16 %result
3287}
3288
3289define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
3290; GFX6-LABEL: v_fshr_i16_4:
3291; GFX6:       ; %bb.0:
3292; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3293; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
3294; GFX6-NEXT:    v_bfe_u32 v1, v1, 4, 12
3295; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3296; GFX6-NEXT:    s_setpc_b64 s[30:31]
3297;
3298; GFX8-LABEL: v_fshr_i16_4:
3299; GFX8:       ; %bb.0:
3300; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3301; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3302; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
3303; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3304; GFX8-NEXT:    s_setpc_b64 s[30:31]
3305;
3306; GFX9-LABEL: v_fshr_i16_4:
3307; GFX9:       ; %bb.0:
3308; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3309; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3310; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
3311; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3312; GFX9-NEXT:    s_setpc_b64 s[30:31]
3313;
3314; GFX10-LABEL: v_fshr_i16_4:
3315; GFX10:       ; %bb.0:
3316; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3317; GFX10-NEXT:    v_lshlrev_b16 v0, 12, v0
3318; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
3319; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3320; GFX10-NEXT:    s_setpc_b64 s[30:31]
3321;
3322; GFX11-LABEL: v_fshr_i16_4:
3323; GFX11:       ; %bb.0:
3324; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3325; GFX11-NEXT:    v_lshlrev_b16 v0, 12, v0
3326; GFX11-NEXT:    v_lshrrev_b16 v1, 4, v1
3327; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3328; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3329; GFX11-NEXT:    s_setpc_b64 s[30:31]
3330  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3331  ret i16 %result
3332}
3333
3334define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
3335; GFX6-LABEL: v_fshr_i16_5:
3336; GFX6:       ; %bb.0:
3337; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3338; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 11, v0
3339; GFX6-NEXT:    v_bfe_u32 v1, v1, 5, 11
3340; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3341; GFX6-NEXT:    s_setpc_b64 s[30:31]
3342;
3343; GFX8-LABEL: v_fshr_i16_5:
3344; GFX8:       ; %bb.0:
3345; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3346; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
3347; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
3348; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3349; GFX8-NEXT:    s_setpc_b64 s[30:31]
3350;
3351; GFX9-LABEL: v_fshr_i16_5:
3352; GFX9:       ; %bb.0:
3353; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3354; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
3355; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
3356; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3357; GFX9-NEXT:    s_setpc_b64 s[30:31]
3358;
3359; GFX10-LABEL: v_fshr_i16_5:
3360; GFX10:       ; %bb.0:
3361; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3362; GFX10-NEXT:    v_lshlrev_b16 v0, 11, v0
3363; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
3364; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3365; GFX10-NEXT:    s_setpc_b64 s[30:31]
3366;
3367; GFX11-LABEL: v_fshr_i16_5:
3368; GFX11:       ; %bb.0:
3369; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3370; GFX11-NEXT:    v_lshlrev_b16 v0, 11, v0
3371; GFX11-NEXT:    v_lshrrev_b16 v1, 5, v1
3372; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3373; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3374; GFX11-NEXT:    s_setpc_b64 s[30:31]
3375  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3376  ret i16 %result
3377}
3378
3379define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3380; GFX6-LABEL: v_fshr_i16_ssv:
3381; GFX6:       ; %bb.0:
3382; GFX6-NEXT:    v_and_b32_e32 v1, 15, v0
3383; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3384; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3385; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3386; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3387; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
3388; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3389; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
3390; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
3391; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3392; GFX6-NEXT:    ; return to shader part epilog
3393;
3394; GFX8-LABEL: v_fshr_i16_ssv:
3395; GFX8:       ; %bb.0:
3396; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
3397; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3398; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3399; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
3400; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
3401; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
3402; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3403; GFX8-NEXT:    ; return to shader part epilog
3404;
3405; GFX9-LABEL: v_fshr_i16_ssv:
3406; GFX9:       ; %bb.0:
3407; GFX9-NEXT:    v_and_b32_e32 v1, 15, v0
3408; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3409; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
3410; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
3411; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
3412; GFX9-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
3413; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3414; GFX9-NEXT:    ; return to shader part epilog
3415;
3416; GFX10-LABEL: v_fshr_i16_ssv:
3417; GFX10:       ; %bb.0:
3418; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3419; GFX10-NEXT:    v_and_b32_e32 v0, 15, v0
3420; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
3421; GFX10-NEXT:    v_and_b32_e32 v1, 15, v1
3422; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
3423; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
3424; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
3425; GFX10-NEXT:    ; return to shader part epilog
3426;
3427; GFX11-LABEL: v_fshr_i16_ssv:
3428; GFX11:       ; %bb.0:
3429; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
3430; GFX11-NEXT:    v_and_b32_e32 v0, 15, v0
3431; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
3432; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3433; GFX11-NEXT:    v_and_b32_e32 v1, 15, v1
3434; GFX11-NEXT:    v_lshrrev_b16 v0, v0, s1
3435; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3436; GFX11-NEXT:    v_lshlrev_b16 v1, v1, s0
3437; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
3438; GFX11-NEXT:    ; return to shader part epilog
3439  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3440  %cast.result = bitcast i16 %result to half
3441  ret half %cast.result
3442}
3443
3444define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3445; GFX6-LABEL: v_fshr_i16_svs:
3446; GFX6:       ; %bb.0:
3447; GFX6-NEXT:    s_and_b32 s2, s1, 15
3448; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3449; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3450; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3451; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
3452; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
3453; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3454; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s1, v0
3455; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3456; GFX6-NEXT:    ; return to shader part epilog
3457;
3458; GFX8-LABEL: v_fshr_i16_svs:
3459; GFX8:       ; %bb.0:
3460; GFX8-NEXT:    s_and_b32 s2, s1, 15
3461; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3462; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
3463; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3464; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
3465; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
3466; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3467; GFX8-NEXT:    ; return to shader part epilog
3468;
3469; GFX9-LABEL: v_fshr_i16_svs:
3470; GFX9:       ; %bb.0:
3471; GFX9-NEXT:    s_and_b32 s2, s1, 15
3472; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3473; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
3474; GFX9-NEXT:    s_and_b32 s1, 0xffff, s1
3475; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
3476; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
3477; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3478; GFX9-NEXT:    ; return to shader part epilog
3479;
3480; GFX10-LABEL: v_fshr_i16_svs:
3481; GFX10:       ; %bb.0:
3482; GFX10-NEXT:    s_and_b32 s2, s1, 15
3483; GFX10-NEXT:    s_andn2_b32 s1, 15, s1
3484; GFX10-NEXT:    v_lshrrev_b16 v0, s2, v0
3485; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
3486; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3487; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
3488; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3489; GFX10-NEXT:    ; return to shader part epilog
3490;
3491; GFX11-LABEL: v_fshr_i16_svs:
3492; GFX11:       ; %bb.0:
3493; GFX11-NEXT:    s_and_b32 s2, s1, 15
3494; GFX11-NEXT:    s_and_not1_b32 s1, 15, s1
3495; GFX11-NEXT:    v_lshrrev_b16 v0, s2, v0
3496; GFX11-NEXT:    s_lshl_b32 s0, s0, 1
3497; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3498; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3499; GFX11-NEXT:    s_lshl_b32 s0, s0, s1
3500; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3501; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3502; GFX11-NEXT:    ; return to shader part epilog
3503  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3504  %cast.result = bitcast i16 %result to half
3505  ret half %cast.result
3506}
3507
3508define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3509; GFX6-LABEL: v_fshr_i16_vss:
3510; GFX6:       ; %bb.0:
3511; GFX6-NEXT:    s_and_b32 s2, s1, 15
3512; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3513; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3514; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3515; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s1, v0
3516; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
3517; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
3518; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
3519; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3520; GFX6-NEXT:    ; return to shader part epilog
3521;
3522; GFX8-LABEL: v_fshr_i16_vss:
3523; GFX8:       ; %bb.0:
3524; GFX8-NEXT:    s_and_b32 s2, s1, 15
3525; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3526; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3527; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3528; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
3529; GFX8-NEXT:    s_and_b32 s1, 0xffff, s2
3530; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3531; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3532; GFX8-NEXT:    ; return to shader part epilog
3533;
3534; GFX9-LABEL: v_fshr_i16_vss:
3535; GFX9:       ; %bb.0:
3536; GFX9-NEXT:    s_and_b32 s2, s1, 15
3537; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3538; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3539; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3540; GFX9-NEXT:    s_and_b32 s0, 0xffff, s0
3541; GFX9-NEXT:    s_and_b32 s1, 0xffff, s2
3542; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
3543; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3544; GFX9-NEXT:    ; return to shader part epilog
3545;
3546; GFX10-LABEL: v_fshr_i16_vss:
3547; GFX10:       ; %bb.0:
3548; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3549; GFX10-NEXT:    s_andn2_b32 s2, 15, s1
3550; GFX10-NEXT:    s_and_b32 s1, s1, 15
3551; GFX10-NEXT:    s_and_b32 s0, 0xffff, s0
3552; GFX10-NEXT:    s_and_b32 s1, 0xffff, s1
3553; GFX10-NEXT:    v_lshlrev_b16 v0, s2, v0
3554; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3555; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3556; GFX10-NEXT:    ; return to shader part epilog
3557;
3558; GFX11-LABEL: v_fshr_i16_vss:
3559; GFX11:       ; %bb.0:
3560; GFX11-NEXT:    v_lshlrev_b16 v0, 1, v0
3561; GFX11-NEXT:    s_and_not1_b32 s2, 15, s1
3562; GFX11-NEXT:    s_and_b32 s1, s1, 15
3563; GFX11-NEXT:    s_and_b32 s0, 0xffff, s0
3564; GFX11-NEXT:    s_and_b32 s1, 0xffff, s1
3565; GFX11-NEXT:    v_lshlrev_b16 v0, s2, v0
3566; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
3567; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3568; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
3569; GFX11-NEXT:    ; return to shader part epilog
3570  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3571  %cast.result = bitcast i16 %result to half
3572  ret half %cast.result
3573}
3574
3575define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3576; GFX6-LABEL: s_fshr_v2i16:
3577; GFX6:       ; %bb.0:
3578; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
3579; GFX6-NEXT:    s_and_b32 s4, s4, 0xffff
3580; GFX6-NEXT:    s_or_b32 s4, s5, s4
3581; GFX6-NEXT:    s_bfe_u32 s5, s2, 0xf0001
3582; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3583; GFX6-NEXT:    s_lshr_b32 s5, s5, 14
3584; GFX6-NEXT:    s_or_b32 s0, s0, s5
3585; GFX6-NEXT:    s_bfe_u32 s5, s3, 0xf0001
3586; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
3587; GFX6-NEXT:    s_lshr_b32 s5, s5, 14
3588; GFX6-NEXT:    s_xor_b32 s4, s4, -1
3589; GFX6-NEXT:    s_or_b32 s1, s1, s5
3590; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3591; GFX6-NEXT:    s_lshr_b32 s5, s4, 16
3592; GFX6-NEXT:    s_and_b32 s6, s4, 15
3593; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
3594; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
3595; GFX6-NEXT:    s_bfe_u32 s2, s2, 0xf0001
3596; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
3597; GFX6-NEXT:    s_lshl_b32 s0, s0, s6
3598; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
3599; GFX6-NEXT:    s_or_b32 s0, s0, s2
3600; GFX6-NEXT:    s_and_b32 s2, s5, 15
3601; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3602; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
3603; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
3604; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
3605; GFX6-NEXT:    s_bfe_u32 s2, s3, 0xf0001
3606; GFX6-NEXT:    s_and_b32 s3, 0xffff, s4
3607; GFX6-NEXT:    s_lshr_b32 s2, s2, s3
3608; GFX6-NEXT:    s_or_b32 s1, s1, s2
3609; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
3610; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
3611; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3612; GFX6-NEXT:    s_or_b32 s0, s0, s1
3613; GFX6-NEXT:    ; return to shader part epilog
3614;
3615; GFX8-LABEL: s_fshr_v2i16:
3616; GFX8:       ; %bb.0:
3617; GFX8-NEXT:    s_and_b32 s5, 0xffff, s1
3618; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
3619; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3620; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
3621; GFX8-NEXT:    s_lshr_b32 s5, s5, 15
3622; GFX8-NEXT:    s_or_b32 s0, s0, s5
3623; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
3624; GFX8-NEXT:    s_lshr_b32 s5, s4, 15
3625; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
3626; GFX8-NEXT:    s_xor_b32 s2, s2, -1
3627; GFX8-NEXT:    s_or_b32 s3, s3, s5
3628; GFX8-NEXT:    s_lshr_b32 s5, s2, 16
3629; GFX8-NEXT:    s_and_b32 s6, s2, 15
3630; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3631; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3632; GFX8-NEXT:    s_and_b32 s6, 0xffff, s6
3633; GFX8-NEXT:    s_lshr_b32 s1, s1, 1
3634; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3635; GFX8-NEXT:    s_lshl_b32 s0, s0, s6
3636; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3637; GFX8-NEXT:    s_or_b32 s0, s0, s1
3638; GFX8-NEXT:    s_and_b32 s1, s5, 15
3639; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
3640; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3641; GFX8-NEXT:    s_andn2_b32 s2, 15, s5
3642; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
3643; GFX8-NEXT:    s_and_b32 s3, 0xffff, s4
3644; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
3645; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
3646; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
3647; GFX8-NEXT:    s_or_b32 s1, s1, s2
3648; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
3649; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
3650; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
3651; GFX8-NEXT:    s_or_b32 s0, s0, s1
3652; GFX8-NEXT:    ; return to shader part epilog
3653;
3654; GFX9-LABEL: s_fshr_v2i16:
3655; GFX9:       ; %bb.0:
3656; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
3657; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3658; GFX9-NEXT:    s_lshl_b32 s4, s4, 1
3659; GFX9-NEXT:    s_and_b32 s3, s2, 0xf000f
3660; GFX9-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3661; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3662; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
3663; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3664; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3665; GFX9-NEXT:    s_lshl_b32 s2, s4, s5
3666; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3667; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
3668; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
3669; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
3670; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
3671; GFX9-NEXT:    s_lshr_b32 s2, s2, s4
3672; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3673; GFX9-NEXT:    s_or_b32 s0, s0, s1
3674; GFX9-NEXT:    ; return to shader part epilog
3675;
3676; GFX10-LABEL: s_fshr_v2i16:
3677; GFX10:       ; %bb.0:
3678; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3679; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3680; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
3681; GFX10-NEXT:    s_and_b32 s4, s2, 0xf000f
3682; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3683; GFX10-NEXT:    s_andn2_b32 s2, 0xf000f, s2
3684; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3685; GFX10-NEXT:    s_lshr_b32 s5, s2, 16
3686; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3687; GFX10-NEXT:    s_lshl_b32 s2, s3, s5
3688; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
3689; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
3690; GFX10-NEXT:    s_lshr_b32 s5, s4, 16
3691; GFX10-NEXT:    s_lshr_b32 s1, s1, s4
3692; GFX10-NEXT:    s_lshr_b32 s3, s3, s5
3693; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3694; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3695; GFX10-NEXT:    s_or_b32 s0, s0, s1
3696; GFX10-NEXT:    ; return to shader part epilog
3697;
3698; GFX11-LABEL: s_fshr_v2i16:
3699; GFX11:       ; %bb.0:
3700; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
3701; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
3702; GFX11-NEXT:    s_lshl_b32 s3, s3, 1
3703; GFX11-NEXT:    s_and_b32 s4, s2, 0xf000f
3704; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3705; GFX11-NEXT:    s_and_not1_b32 s2, 0xf000f, s2
3706; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
3707; GFX11-NEXT:    s_lshr_b32 s5, s2, 16
3708; GFX11-NEXT:    s_lshl_b32 s0, s0, s2
3709; GFX11-NEXT:    s_lshl_b32 s2, s3, s5
3710; GFX11-NEXT:    s_lshr_b32 s3, s1, 16
3711; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
3712; GFX11-NEXT:    s_lshr_b32 s5, s4, 16
3713; GFX11-NEXT:    s_lshr_b32 s1, s1, s4
3714; GFX11-NEXT:    s_lshr_b32 s3, s3, s5
3715; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3716; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3717; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3718; GFX11-NEXT:    s_or_b32 s0, s0, s1
3719; GFX11-NEXT:    ; return to shader part epilog
3720  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3721  %cast = bitcast <2 x i16> %result to i32
3722  ret i32 %cast
3723}
3724
3725define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3726; GFX6-LABEL: v_fshr_v2i16:
3727; GFX6:       ; %bb.0:
3728; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3729; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
3730; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
3731; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
3732; GFX6-NEXT:    v_bfe_u32 v5, v2, 1, 15
3733; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3734; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 14, v5
3735; GFX6-NEXT:    v_or_b32_e32 v0, v0, v5
3736; GFX6-NEXT:    v_bfe_u32 v5, v3, 1, 15
3737; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
3738; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 14, v5
3739; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3740; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
3741; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
3742; GFX6-NEXT:    v_and_b32_e32 v6, 15, v4
3743; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3744; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
3745; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3746; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
3747; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3748; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
3749; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v6, v0
3750; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
3751; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3752; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
3753; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
3754; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
3755; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3756; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3757; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
3758; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3759; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v4
3760; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
3761; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3762; GFX6-NEXT:    s_setpc_b64 s[30:31]
3763;
3764; GFX8-LABEL: v_fshr_v2i16:
3765; GFX8:       ; %bb.0:
3766; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3767; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3768; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v1
3769; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3770; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3771; GFX8-NEXT:    v_mov_b32_e32 v5, 15
3772; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3773; GFX8-NEXT:    v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3774; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3775; GFX8-NEXT:    v_or_b32_e32 v0, v0, v6
3776; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v1
3777; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3778; GFX8-NEXT:    v_and_b32_e32 v4, 15, v2
3779; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v2
3780; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
3781; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v4, v3
3782; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 1, v6
3783; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v7, v4
3784; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3785; GFX8-NEXT:    v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3786; GFX8-NEXT:    v_mov_b32_e32 v5, -1
3787; GFX8-NEXT:    v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3788; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3789; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
3790; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v4, v0
3791; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
3792; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3793; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3794; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3795; GFX8-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3796; GFX8-NEXT:    s_setpc_b64 s[30:31]
3797;
3798; GFX9-LABEL: v_fshr_v2i16:
3799; GFX9:       ; %bb.0:
3800; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3801; GFX9-NEXT:    v_and_b32_e32 v3, 0xf000f, v2
3802; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3803; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
3804; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3805; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
3806; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
3807; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3808; GFX9-NEXT:    s_setpc_b64 s[30:31]
3809;
3810; GFX10-LABEL: v_fshr_v2i16:
3811; GFX10:       ; %bb.0:
3812; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3814; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3815; GFX10-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
3816; GFX10-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
3817; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3818; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3819; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3820; GFX10-NEXT:    s_setpc_b64 s[30:31]
3821;
3822; GFX11-LABEL: v_fshr_v2i16:
3823; GFX11:       ; %bb.0:
3824; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3825; GFX11-NEXT:    v_xor_b32_e32 v3, -1, v2
3826; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3827; GFX11-NEXT:    v_and_b32_e32 v2, 0xf000f, v2
3828; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3829; GFX11-NEXT:    v_and_b32_e32 v3, 0xf000f, v3
3830; GFX11-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3831; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3832; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3833; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3834; GFX11-NEXT:    s_setpc_b64 s[30:31]
3835  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3836  ret <2 x i16> %result
3837}
3838
3839define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3840; GFX6-LABEL: v_fshr_v2i16_4_8:
3841; GFX6:       ; %bb.0:
3842; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3843; GFX6-NEXT:    v_bfe_u32 v2, v2, 1, 15
3844; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
3845; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 3, v2
3846; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3847; GFX6-NEXT:    v_bfe_u32 v2, v3, 1, 15
3848; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
3849; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 7, v2
3850; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3851; GFX6-NEXT:    s_setpc_b64 s[30:31]
3852;
3853; GFX8-LABEL: v_fshr_v2i16_4_8:
3854; GFX8:       ; %bb.0:
3855; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3856; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
3857; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3858; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 4, v1
3859; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
3860; GFX8-NEXT:    v_mov_b32_e32 v3, 8
3861; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
3862; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3863; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
3864; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3865; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3866; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3867; GFX8-NEXT:    s_setpc_b64 s[30:31]
3868;
3869; GFX9-LABEL: v_fshr_v2i16_4_8:
3870; GFX9:       ; %bb.0:
3871; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3872; GFX9-NEXT:    v_mov_b32_e32 v2, 0x8000c
3873; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
3874; GFX9-NEXT:    v_mov_b32_e32 v2, 0x80004
3875; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3876; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3877; GFX9-NEXT:    s_setpc_b64 s[30:31]
3878;
3879; GFX10-LABEL: v_fshr_v2i16_4_8:
3880; GFX10:       ; %bb.0:
3881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3882; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 0x8000c, v0
3883; GFX10-NEXT:    v_pk_lshrrev_b16 v1, 0x80004, v1
3884; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3885; GFX10-NEXT:    s_setpc_b64 s[30:31]
3886;
3887; GFX11-LABEL: v_fshr_v2i16_4_8:
3888; GFX11:       ; %bb.0:
3889; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3890; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 0x8000c, v0
3891; GFX11-NEXT:    v_pk_lshrrev_b16 v1, 0x80004, v1
3892; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3893; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
3894; GFX11-NEXT:    s_setpc_b64 s[30:31]
3895  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3896  ret <2 x i16> %result
3897}
3898
3899define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3900; GFX6-LABEL: v_fshr_v2i16_ssv:
3901; GFX6:       ; %bb.0:
3902; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3903; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3904; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
3905; GFX6-NEXT:    s_bfe_u32 s4, s2, 0xf0001
3906; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3907; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3908; GFX6-NEXT:    s_lshr_b32 s4, s4, 14
3909; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
3910; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
3911; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3912; GFX6-NEXT:    s_or_b32 s0, s0, s4
3913; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3914; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3915; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3916; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
3917; GFX6-NEXT:    s_bfe_u32 s0, s2, 0xf0001
3918; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3919; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
3920; GFX6-NEXT:    s_bfe_u32 s4, s3, 0xf0001
3921; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
3922; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
3923; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
3924; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
3925; GFX6-NEXT:    s_lshr_b32 s4, s4, 14
3926; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3927; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
3928; GFX6-NEXT:    s_or_b32 s1, s1, s4
3929; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
3930; GFX6-NEXT:    s_bfe_u32 s0, s3, 0xf0001
3931; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3932; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
3933; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
3934; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
3935; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
3936; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3937; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3938; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3939; GFX6-NEXT:    ; return to shader part epilog
3940;
3941; GFX8-LABEL: v_fshr_v2i16_ssv:
3942; GFX8:       ; %bb.0:
3943; GFX8-NEXT:    s_and_b32 s4, 0xffff, s1
3944; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3945; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
3946; GFX8-NEXT:    s_lshr_b32 s4, s4, 15
3947; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3948; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
3949; GFX8-NEXT:    s_or_b32 s0, s0, s4
3950; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
3951; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
3952; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v0
3953; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
3954; GFX8-NEXT:    s_and_b32 s0, 0xffff, s1
3955; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3956; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
3957; GFX8-NEXT:    v_lshrrev_b16_e64 v2, v2, s0
3958; GFX8-NEXT:    s_lshr_b32 s4, s3, 15
3959; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
3960; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
3961; GFX8-NEXT:    v_mov_b32_e32 v2, 15
3962; GFX8-NEXT:    v_mov_b32_e32 v3, -1
3963; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
3964; GFX8-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3965; GFX8-NEXT:    v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3966; GFX8-NEXT:    s_and_b32 s0, 0xffff, s3
3967; GFX8-NEXT:    s_or_b32 s2, s2, s4
3968; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3969; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
3970; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
3971; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
3972; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
3973; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3974; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
3975; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3976; GFX8-NEXT:    ; return to shader part epilog
3977;
3978; GFX9-LABEL: v_fshr_v2i16_ssv:
3979; GFX9:       ; %bb.0:
3980; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3981; GFX9-NEXT:    v_and_b32_e32 v1, 0xf000f, v0
3982; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3983; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3984; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
3985; GFX9-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
3986; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3987; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
3988; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
3989; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3990; GFX9-NEXT:    ; return to shader part epilog
3991;
3992; GFX10-LABEL: v_fshr_v2i16_ssv:
3993; GFX10:       ; %bb.0:
3994; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3995; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
3996; GFX10-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
3997; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3998; GFX10-NEXT:    s_lshl_b32 s2, s2, 1
3999; GFX10-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4000; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4001; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
4002; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
4003; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
4004; GFX10-NEXT:    ; return to shader part epilog
4005;
4006; GFX11-LABEL: v_fshr_v2i16_ssv:
4007; GFX11:       ; %bb.0:
4008; GFX11-NEXT:    v_xor_b32_e32 v1, -1, v0
4009; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4010; GFX11-NEXT:    v_and_b32_e32 v0, 0xf000f, v0
4011; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4012; GFX11-NEXT:    s_lshl_b32 s2, s2, 1
4013; GFX11-NEXT:    v_and_b32_e32 v1, 0xf000f, v1
4014; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4015; GFX11-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
4016; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4017; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
4018; GFX11-NEXT:    v_or_b32_e32 v0, v1, v0
4019; GFX11-NEXT:    ; return to shader part epilog
4020  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4021  %cast = bitcast <2 x i16> %result to float
4022  ret float %cast
4023}
4024
4025define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
4026; GFX6-LABEL: v_fshr_v2i16_svs:
4027; GFX6:       ; %bb.0:
4028; GFX6-NEXT:    v_bfe_u32 v2, v0, 1, 15
4029; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
4030; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
4031; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
4032; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 14, v2
4033; GFX6-NEXT:    v_bfe_u32 v3, v1, 1, 15
4034; GFX6-NEXT:    s_or_b32 s2, s3, s2
4035; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
4036; GFX6-NEXT:    s_lshl_b32 s0, s1, 1
4037; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 14, v3
4038; GFX6-NEXT:    v_or_b32_e32 v3, s0, v3
4039; GFX6-NEXT:    s_xor_b32 s0, s2, -1
4040; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
4041; GFX6-NEXT:    s_lshr_b32 s1, s0, 16
4042; GFX6-NEXT:    s_and_b32 s2, s0, 15
4043; GFX6-NEXT:    s_andn2_b32 s0, 15, s0
4044; GFX6-NEXT:    v_bfe_u32 v0, v0, 1, 15
4045; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4046; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
4047; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
4048; GFX6-NEXT:    s_and_b32 s0, s1, 15
4049; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
4050; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s2, v2
4051; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
4052; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4053; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
4054; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s0, v3
4055; GFX6-NEXT:    v_bfe_u32 v1, v1, 1, 15
4056; GFX6-NEXT:    s_and_b32 s0, 0xffff, s1
4057; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
4058; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4059; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4060; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4061; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4062; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4063; GFX6-NEXT:    ; return to shader part epilog
4064;
4065; GFX8-LABEL: v_fshr_v2i16_svs:
4066; GFX8:       ; %bb.0:
4067; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4068; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
4069; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
4070; GFX8-NEXT:    v_mov_b32_e32 v2, 15
4071; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4072; GFX8-NEXT:    s_lshl_b32 s0, s2, 1
4073; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4074; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
4075; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
4076; GFX8-NEXT:    v_mov_b32_e32 v4, 1
4077; GFX8-NEXT:    s_xor_b32 s0, s1, -1
4078; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4079; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
4080; GFX8-NEXT:    s_and_b32 s2, s0, 15
4081; GFX8-NEXT:    s_andn2_b32 s0, 15, s0
4082; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
4083; GFX8-NEXT:    v_lshrrev_b16_e32 v3, s0, v3
4084; GFX8-NEXT:    s_and_b32 s0, s1, 15
4085; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4086; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
4087; GFX8-NEXT:    v_lshlrev_b16_e32 v2, s0, v2
4088; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
4089; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
4090; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s2, v1
4091; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4092; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4093; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4094; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4095; GFX8-NEXT:    ; return to shader part epilog
4096;
4097; GFX9-LABEL: v_fshr_v2i16_svs:
4098; GFX9:       ; %bb.0:
4099; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
4100; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4101; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
4102; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4103; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4104; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
4105; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
4106; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
4107; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
4108; GFX9-NEXT:    s_lshl_b32 s1, s3, s4
4109; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4110; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s2, v0
4111; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4112; GFX9-NEXT:    ; return to shader part epilog
4113;
4114; GFX10-LABEL: v_fshr_v2i16_svs:
4115; GFX10:       ; %bb.0:
4116; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4117; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4118; GFX10-NEXT:    s_lshl_b32 s2, s2, 1
4119; GFX10-NEXT:    s_and_b32 s3, s1, 0xf000f
4120; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4121; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4122; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
4123; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
4124; GFX10-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
4125; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
4126; GFX10-NEXT:    s_lshl_b32 s1, s2, s4
4127; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4128; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4129; GFX10-NEXT:    ; return to shader part epilog
4130;
4131; GFX11-LABEL: v_fshr_v2i16_svs:
4132; GFX11:       ; %bb.0:
4133; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4134; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4135; GFX11-NEXT:    s_lshl_b32 s2, s2, 1
4136; GFX11-NEXT:    s_and_b32 s3, s1, 0xf000f
4137; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
4138; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4139; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
4140; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
4141; GFX11-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
4142; GFX11-NEXT:    s_lshl_b32 s0, s0, s1
4143; GFX11-NEXT:    s_lshl_b32 s1, s2, s4
4144; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4145; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4146; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4147; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4148; GFX11-NEXT:    ; return to shader part epilog
4149  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4150  %cast = bitcast <2 x i16> %result to float
4151  ret float %cast
4152}
4153
4154define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4155; GFX6-LABEL: v_fshr_v2i16_vss:
4156; GFX6:       ; %bb.0:
4157; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
4158; GFX6-NEXT:    s_and_b32 s2, s2, 0xffff
4159; GFX6-NEXT:    s_or_b32 s2, s3, s2
4160; GFX6-NEXT:    s_bfe_u32 s3, s0, 0xf0001
4161; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
4162; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
4163; GFX6-NEXT:    v_or_b32_e32 v0, s3, v0
4164; GFX6-NEXT:    s_bfe_u32 s3, s1, 0xf0001
4165; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
4166; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
4167; GFX6-NEXT:    s_xor_b32 s2, s2, -1
4168; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
4169; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
4170; GFX6-NEXT:    s_lshr_b32 s3, s2, 16
4171; GFX6-NEXT:    s_and_b32 s4, s2, 15
4172; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
4173; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4174; GFX6-NEXT:    s_bfe_u32 s0, s0, 0xf0001
4175; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
4176; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4177; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
4178; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4179; GFX6-NEXT:    s_and_b32 s0, s3, 15
4180; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
4181; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
4182; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4183; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
4184; GFX6-NEXT:    s_bfe_u32 s0, s1, 0xf0001
4185; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4186; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
4187; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
4188; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4189; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4190; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4191; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
4192; GFX6-NEXT:    ; return to shader part epilog
4193;
4194; GFX8-LABEL: v_fshr_v2i16_vss:
4195; GFX8:       ; %bb.0:
4196; GFX8-NEXT:    s_and_b32 s3, 0xffff, s0
4197; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
4198; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
4199; GFX8-NEXT:    s_lshr_b32 s3, s3, 15
4200; GFX8-NEXT:    v_mov_b32_e32 v2, 1
4201; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
4202; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4203; GFX8-NEXT:    s_lshr_b32 s3, s2, 15
4204; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
4205; GFX8-NEXT:    s_xor_b32 s1, s1, -1
4206; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
4207; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
4208; GFX8-NEXT:    s_and_b32 s4, s1, 15
4209; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
4210; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4211; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
4212; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4213; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s4, v1
4214; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4215; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
4216; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
4217; GFX8-NEXT:    s_and_b32 s0, s3, 15
4218; GFX8-NEXT:    s_andn2_b32 s1, 15, s3
4219; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
4220; GFX8-NEXT:    s_and_b32 s0, 0xffff, s2
4221; GFX8-NEXT:    s_lshr_b32 s0, s0, 1
4222; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4223; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
4224; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4225; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4226; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4227; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4228; GFX8-NEXT:    ; return to shader part epilog
4229;
4230; GFX9-LABEL: v_fshr_v2i16_vss:
4231; GFX9:       ; %bb.0:
4232; GFX9-NEXT:    s_and_b32 s2, s1, 0xf000f
4233; GFX9-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4234; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4235; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4236; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
4237; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4238; GFX9-NEXT:    s_lshr_b32 s3, s2, 16
4239; GFX9-NEXT:    s_lshr_b32 s0, s0, s2
4240; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
4241; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4242; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4243; GFX9-NEXT:    ; return to shader part epilog
4244;
4245; GFX10-LABEL: v_fshr_v2i16_vss:
4246; GFX10:       ; %bb.0:
4247; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4248; GFX10-NEXT:    s_and_b32 s2, s1, 0xf000f
4249; GFX10-NEXT:    s_andn2_b32 s1, 0xf000f, s1
4250; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
4251; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
4252; GFX10-NEXT:    s_lshr_b32 s4, s2, 16
4253; GFX10-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4254; GFX10-NEXT:    s_lshr_b32 s0, s0, s2
4255; GFX10-NEXT:    s_lshr_b32 s1, s3, s4
4256; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4257; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4258; GFX10-NEXT:    ; return to shader part epilog
4259;
4260; GFX11-LABEL: v_fshr_v2i16_vss:
4261; GFX11:       ; %bb.0:
4262; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4263; GFX11-NEXT:    s_and_b32 s2, s1, 0xf000f
4264; GFX11-NEXT:    s_and_not1_b32 s1, 0xf000f, s1
4265; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
4266; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
4267; GFX11-NEXT:    s_lshr_b32 s4, s2, 16
4268; GFX11-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
4269; GFX11-NEXT:    s_lshr_b32 s0, s0, s2
4270; GFX11-NEXT:    s_lshr_b32 s1, s3, s4
4271; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4272; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
4273; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4274; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
4275; GFX11-NEXT:    ; return to shader part epilog
4276  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4277  %cast = bitcast <2 x i16> %result to float
4278  ret float %cast
4279}
4280
4281define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4282; GFX6-LABEL: s_fshr_v3i16:
4283; GFX6:       ; %bb.0:
4284; GFX6-NEXT:    s_and_b32 s7, s7, 0xffff
4285; GFX6-NEXT:    s_and_b32 s6, s6, 0xffff
4286; GFX6-NEXT:    s_lshl_b32 s7, s7, 16
4287; GFX6-NEXT:    s_or_b32 s6, s6, s7
4288; GFX6-NEXT:    s_and_b32 s7, s8, 0xffff
4289; GFX6-NEXT:    s_bfe_u32 s8, s3, 0xf0001
4290; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
4291; GFX6-NEXT:    s_lshr_b32 s8, s8, 14
4292; GFX6-NEXT:    s_or_b32 s0, s0, s8
4293; GFX6-NEXT:    s_bfe_u32 s8, s4, 0xf0001
4294; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
4295; GFX6-NEXT:    s_lshr_b32 s8, s8, 14
4296; GFX6-NEXT:    s_xor_b32 s6, s6, -1
4297; GFX6-NEXT:    s_or_b32 s1, s1, s8
4298; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
4299; GFX6-NEXT:    s_lshr_b32 s8, s6, 16
4300; GFX6-NEXT:    s_and_b32 s9, s6, 15
4301; GFX6-NEXT:    s_andn2_b32 s6, 15, s6
4302; GFX6-NEXT:    s_and_b32 s9, 0xffff, s9
4303; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
4304; GFX6-NEXT:    s_and_b32 s6, 0xffff, s6
4305; GFX6-NEXT:    s_lshl_b32 s0, s0, s9
4306; GFX6-NEXT:    s_lshr_b32 s3, s3, s6
4307; GFX6-NEXT:    s_or_b32 s0, s0, s3
4308; GFX6-NEXT:    s_and_b32 s3, s8, 15
4309; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
4310; GFX6-NEXT:    s_andn2_b32 s6, 15, s8
4311; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
4312; GFX6-NEXT:    s_lshl_b32 s1, s1, s3
4313; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
4314; GFX6-NEXT:    s_and_b32 s4, 0xffff, s6
4315; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4316; GFX6-NEXT:    s_or_b32 s1, s1, s3
4317; GFX6-NEXT:    s_bfe_u32 s3, s5, 0xf0001
4318; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
4319; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
4320; GFX6-NEXT:    s_xor_b32 s4, s7, -1
4321; GFX6-NEXT:    s_or_b32 s2, s2, s3
4322; GFX6-NEXT:    s_lshl_b32 s3, s5, 1
4323; GFX6-NEXT:    s_and_b32 s5, s4, 15
4324; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
4325; GFX6-NEXT:    s_and_b32 s5, 0xffff, s5
4326; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
4327; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4328; GFX6-NEXT:    s_lshl_b32 s2, s2, s5
4329; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4330; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
4331; GFX6-NEXT:    s_or_b32 s2, s2, s3
4332; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4333; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
4334; GFX6-NEXT:    s_or_b32 s0, s0, s1
4335; GFX6-NEXT:    s_and_b32 s1, 0xffff, s2
4336; GFX6-NEXT:    ; return to shader part epilog
4337;
4338; GFX8-LABEL: s_fshr_v3i16:
4339; GFX8:       ; %bb.0:
4340; GFX8-NEXT:    s_and_b32 s8, 0xffff, s2
4341; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
4342; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
4343; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
4344; GFX8-NEXT:    s_lshr_b32 s8, s8, 15
4345; GFX8-NEXT:    s_or_b32 s0, s0, s8
4346; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
4347; GFX8-NEXT:    s_lshr_b32 s8, s7, 15
4348; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
4349; GFX8-NEXT:    s_xor_b32 s4, s4, -1
4350; GFX8-NEXT:    s_or_b32 s6, s6, s8
4351; GFX8-NEXT:    s_lshr_b32 s8, s4, 16
4352; GFX8-NEXT:    s_and_b32 s9, s4, 15
4353; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4354; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4355; GFX8-NEXT:    s_and_b32 s9, 0xffff, s9
4356; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
4357; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4358; GFX8-NEXT:    s_lshl_b32 s0, s0, s9
4359; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
4360; GFX8-NEXT:    s_or_b32 s0, s0, s2
4361; GFX8-NEXT:    s_and_b32 s2, s8, 15
4362; GFX8-NEXT:    s_lshl_b32 s7, s7, 1
4363; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4364; GFX8-NEXT:    s_andn2_b32 s4, 15, s8
4365; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
4366; GFX8-NEXT:    s_and_b32 s6, 0xffff, s7
4367; GFX8-NEXT:    s_lshr_b32 s6, s6, 1
4368; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4369; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
4370; GFX8-NEXT:    s_or_b32 s2, s2, s4
4371; GFX8-NEXT:    s_and_b32 s4, 0xffff, s3
4372; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
4373; GFX8-NEXT:    s_lshr_b32 s4, s4, 15
4374; GFX8-NEXT:    s_or_b32 s1, s1, s4
4375; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
4376; GFX8-NEXT:    s_xor_b32 s4, s5, -1
4377; GFX8-NEXT:    s_and_b32 s5, s4, 15
4378; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4379; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4380; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
4381; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
4382; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4383; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
4384; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4385; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4386; GFX8-NEXT:    s_or_b32 s1, s1, s3
4387; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4388; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4389; GFX8-NEXT:    s_or_b32 s0, s0, s2
4390; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4391; GFX8-NEXT:    ; return to shader part epilog
4392;
4393; GFX9-LABEL: s_fshr_v3i16:
4394; GFX9:       ; %bb.0:
4395; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4396; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4397; GFX9-NEXT:    s_lshl_b32 s7, s7, 1
4398; GFX9-NEXT:    s_and_b32 s6, s4, 0xf000f
4399; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4400; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s7
4401; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4402; GFX9-NEXT:    s_lshr_b32 s8, s4, 16
4403; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
4404; GFX9-NEXT:    s_lshl_b32 s4, s7, s8
4405; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4406; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
4407; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4408; GFX9-NEXT:    s_lshr_b32 s7, s6, 16
4409; GFX9-NEXT:    s_lshr_b32 s2, s2, s6
4410; GFX9-NEXT:    s_lshr_b32 s4, s4, s7
4411; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4412; GFX9-NEXT:    s_or_b32 s0, s0, s2
4413; GFX9-NEXT:    s_and_b32 s2, s5, 0xf000f
4414; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4415; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4416; GFX9-NEXT:    s_lshl_b32 s1, s1, 0x10001
4417; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
4418; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
4419; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4420; GFX9-NEXT:    s_lshr_b32 s6, s4, 16
4421; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
4422; GFX9-NEXT:    s_lshl_b32 s4, s5, s6
4423; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4424; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
4425; GFX9-NEXT:    s_and_b32 s3, s3, 0xffff
4426; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
4427; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
4428; GFX9-NEXT:    s_lshr_b32 s3, s4, s5
4429; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
4430; GFX9-NEXT:    s_or_b32 s1, s1, s2
4431; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
4432; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
4433; GFX9-NEXT:    s_lshl_b32 s2, s2, 16
4434; GFX9-NEXT:    s_or_b32 s0, s0, s2
4435; GFX9-NEXT:    s_and_b32 s1, s1, 0xffff
4436; GFX9-NEXT:    ; return to shader part epilog
4437;
4438; GFX10-LABEL: s_fshr_v3i16:
4439; GFX10:       ; %bb.0:
4440; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4441; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4442; GFX10-NEXT:    s_lshl_b32 s6, s6, 1
4443; GFX10-NEXT:    s_and_b32 s7, s4, 0xf000f
4444; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4445; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4446; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4447; GFX10-NEXT:    s_lshr_b32 s8, s4, 16
4448; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
4449; GFX10-NEXT:    s_lshl_b32 s4, s6, s8
4450; GFX10-NEXT:    s_lshr_b32 s6, s2, 16
4451; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4452; GFX10-NEXT:    s_lshr_b32 s8, s7, 16
4453; GFX10-NEXT:    s_lshr_b32 s2, s2, s7
4454; GFX10-NEXT:    s_lshr_b32 s6, s6, s8
4455; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4456; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4457; GFX10-NEXT:    s_and_b32 s4, s5, 0xf000f
4458; GFX10-NEXT:    s_or_b32 s0, s0, s2
4459; GFX10-NEXT:    s_lshr_b32 s2, s1, 16
4460; GFX10-NEXT:    s_lshl_b32 s1, s1, 0x10001
4461; GFX10-NEXT:    s_lshl_b32 s2, s2, 1
4462; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4463; GFX10-NEXT:    s_andn2_b32 s2, 0xf000f, s5
4464; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
4465; GFX10-NEXT:    s_lshr_b32 s6, s2, 16
4466; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
4467; GFX10-NEXT:    s_lshl_b32 s2, s5, s6
4468; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
4469; GFX10-NEXT:    s_and_b32 s3, s3, 0xffff
4470; GFX10-NEXT:    s_lshr_b32 s6, s4, 16
4471; GFX10-NEXT:    s_lshr_b32 s3, s3, s4
4472; GFX10-NEXT:    s_lshr_b32 s4, s5, s6
4473; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4474; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s3, s4
4475; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
4476; GFX10-NEXT:    s_and_b32 s0, s0, 0xffff
4477; GFX10-NEXT:    s_lshl_b32 s3, s3, 16
4478; GFX10-NEXT:    s_or_b32 s1, s1, s2
4479; GFX10-NEXT:    s_or_b32 s0, s0, s3
4480; GFX10-NEXT:    s_and_b32 s1, s1, 0xffff
4481; GFX10-NEXT:    ; return to shader part epilog
4482;
4483; GFX11-LABEL: s_fshr_v3i16:
4484; GFX11:       ; %bb.0:
4485; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4486; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4487; GFX11-NEXT:    s_lshl_b32 s6, s6, 1
4488; GFX11-NEXT:    s_and_b32 s7, s4, 0xf000f
4489; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4490; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s4
4491; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4492; GFX11-NEXT:    s_lshr_b32 s8, s4, 16
4493; GFX11-NEXT:    s_lshl_b32 s0, s0, s4
4494; GFX11-NEXT:    s_lshl_b32 s4, s6, s8
4495; GFX11-NEXT:    s_lshr_b32 s6, s2, 16
4496; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4497; GFX11-NEXT:    s_lshr_b32 s8, s7, 16
4498; GFX11-NEXT:    s_lshr_b32 s2, s2, s7
4499; GFX11-NEXT:    s_lshr_b32 s6, s6, s8
4500; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4501; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4502; GFX11-NEXT:    s_and_b32 s4, s5, 0xf000f
4503; GFX11-NEXT:    s_or_b32 s0, s0, s2
4504; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
4505; GFX11-NEXT:    s_lshl_b32 s1, s1, 0x10001
4506; GFX11-NEXT:    s_lshl_b32 s2, s2, 1
4507; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4508; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4509; GFX11-NEXT:    s_and_not1_b32 s2, 0xf000f, s5
4510; GFX11-NEXT:    s_lshr_b32 s5, s1, 16
4511; GFX11-NEXT:    s_lshr_b32 s6, s2, 16
4512; GFX11-NEXT:    s_lshl_b32 s1, s1, s2
4513; GFX11-NEXT:    s_lshl_b32 s2, s5, s6
4514; GFX11-NEXT:    s_lshr_b32 s5, s3, 16
4515; GFX11-NEXT:    s_and_b32 s3, s3, 0xffff
4516; GFX11-NEXT:    s_lshr_b32 s6, s4, 16
4517; GFX11-NEXT:    s_lshr_b32 s3, s3, s4
4518; GFX11-NEXT:    s_lshr_b32 s4, s5, s6
4519; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
4520; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s3, s4
4521; GFX11-NEXT:    s_lshr_b32 s3, s0, 16
4522; GFX11-NEXT:    s_and_b32 s0, s0, 0xffff
4523; GFX11-NEXT:    s_lshl_b32 s3, s3, 16
4524; GFX11-NEXT:    s_or_b32 s1, s1, s2
4525; GFX11-NEXT:    s_or_b32 s0, s0, s3
4526; GFX11-NEXT:    s_and_b32 s1, s1, 0xffff
4527; GFX11-NEXT:    ; return to shader part epilog
4528  %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4529  %cast = bitcast <3 x i16> %result to i48
4530  ret i48 %cast
4531}
4532
4533define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4534; GFX6-LABEL: v_fshr_v3i16:
4535; GFX6:       ; %bb.0:
4536; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4537; GFX6-NEXT:    v_and_b32_e32 v7, 0xffff, v7
4538; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
4539; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
4540; GFX6-NEXT:    v_or_b32_e32 v6, v6, v7
4541; GFX6-NEXT:    v_and_b32_e32 v7, 0xffff, v8
4542; GFX6-NEXT:    v_bfe_u32 v8, v3, 1, 15
4543; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
4544; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 14, v8
4545; GFX6-NEXT:    v_or_b32_e32 v0, v0, v8
4546; GFX6-NEXT:    v_bfe_u32 v8, v4, 1, 15
4547; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
4548; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 14, v8
4549; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
4550; GFX6-NEXT:    v_or_b32_e32 v1, v1, v8
4551; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
4552; GFX6-NEXT:    v_and_b32_e32 v9, 15, v6
4553; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
4554; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
4555; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4556; GFX6-NEXT:    v_and_b32_e32 v9, 0xffff, v9
4557; GFX6-NEXT:    v_bfe_u32 v3, v3, 1, 15
4558; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
4559; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v9, v0
4560; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v6, v3
4561; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
4562; GFX6-NEXT:    v_and_b32_e32 v3, 15, v8
4563; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v8
4564; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
4565; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4566; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v3
4567; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
4568; GFX6-NEXT:    v_bfe_u32 v3, v4, 1, 15
4569; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v6
4570; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
4571; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4572; GFX6-NEXT:    v_bfe_u32 v3, v5, 1, 15
4573; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
4574; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 14, v3
4575; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v7
4576; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
4577; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v5
4578; GFX6-NEXT:    v_and_b32_e32 v5, 15, v4
4579; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
4580; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
4581; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v5
4582; GFX6-NEXT:    v_bfe_u32 v3, v3, 1, 15
4583; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
4584; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v5, v2
4585; GFX6-NEXT:    v_lshrrev_b32_e32 v3, v4, v3
4586; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
4587; GFX6-NEXT:    s_setpc_b64 s[30:31]
4588;
4589; GFX8-LABEL: v_fshr_v3i16:
4590; GFX8:       ; %bb.0:
4591; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4592; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
4593; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
4594; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4595; GFX8-NEXT:    v_mov_b32_e32 v7, 1
4596; GFX8-NEXT:    v_mov_b32_e32 v8, 15
4597; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4598; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4599; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4600; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
4601; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
4602; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4603; GFX8-NEXT:    v_and_b32_e32 v7, 15, v4
4604; GFX8-NEXT:    v_xor_b32_e32 v10, -1, v4
4605; GFX8-NEXT:    v_and_b32_e32 v10, 15, v10
4606; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v7, v6
4607; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v9
4608; GFX8-NEXT:    v_lshrrev_b16_e32 v7, v10, v7
4609; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4610; GFX8-NEXT:    v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4611; GFX8-NEXT:    v_mov_b32_e32 v8, -1
4612; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4613; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4614; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
4615; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
4616; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
4617; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4618; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v1
4619; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 15, v3
4620; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
4621; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 1, v3
4622; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v5
4623; GFX8-NEXT:    v_and_b32_e32 v4, 15, v3
4624; GFX8-NEXT:    v_xor_b32_e32 v3, -1, v3
4625; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
4626; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
4627; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v4, v1
4628; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v3, v2
4629; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
4630; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
4631; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
4632; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4633; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
4634; GFX8-NEXT:    s_setpc_b64 s[30:31]
4635;
4636; GFX9-LABEL: v_fshr_v3i16:
4637; GFX9:       ; %bb.0:
4638; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4639; GFX9-NEXT:    v_and_b32_e32 v6, 0xf000f, v4
4640; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4641; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4642; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4643; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4644; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4645; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
4646; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4647; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v5
4648; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4649; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4650; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v4, v1
4651; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v2, v3
4652; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
4653; GFX9-NEXT:    s_setpc_b64 s[30:31]
4654;
4655; GFX10-LABEL: v_fshr_v3i16:
4656; GFX10:       ; %bb.0:
4657; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
4659; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
4660; GFX10-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4661; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4662; GFX10-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4663; GFX10-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
4664; GFX10-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4665; GFX10-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
4666; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4667; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
4668; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4669; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
4670; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4671; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4672; GFX10-NEXT:    s_setpc_b64 s[30:31]
4673;
4674; GFX11-LABEL: v_fshr_v3i16:
4675; GFX11:       ; %bb.0:
4676; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4677; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
4678; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v5
4679; GFX11-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
4680; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4681; GFX11-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
4682; GFX11-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
4683; GFX11-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4684; GFX11-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
4685; GFX11-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4686; GFX11-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
4687; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4688; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
4689; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
4690; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
4691; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4692; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
4693; GFX11-NEXT:    s_setpc_b64 s[30:31]
4694  %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4695  %cast.result = bitcast <3 x i16> %result to <3 x half>
4696  ret <3 x half> %cast.result
4697}
4698
4699define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4700; GFX6-LABEL: s_fshr_v4i16:
4701; GFX6:       ; %bb.0:
4702; GFX6-NEXT:    s_lshl_b32 s9, s9, 16
4703; GFX6-NEXT:    s_and_b32 s8, s8, 0xffff
4704; GFX6-NEXT:    s_or_b32 s8, s9, s8
4705; GFX6-NEXT:    s_lshl_b32 s9, s11, 16
4706; GFX6-NEXT:    s_and_b32 s10, s10, 0xffff
4707; GFX6-NEXT:    s_or_b32 s9, s9, s10
4708; GFX6-NEXT:    s_bfe_u32 s10, s4, 0xf0001
4709; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
4710; GFX6-NEXT:    s_lshr_b32 s10, s10, 14
4711; GFX6-NEXT:    s_or_b32 s0, s0, s10
4712; GFX6-NEXT:    s_bfe_u32 s10, s5, 0xf0001
4713; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
4714; GFX6-NEXT:    s_lshr_b32 s10, s10, 14
4715; GFX6-NEXT:    s_xor_b32 s8, s8, -1
4716; GFX6-NEXT:    s_or_b32 s1, s1, s10
4717; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
4718; GFX6-NEXT:    s_lshr_b32 s10, s8, 16
4719; GFX6-NEXT:    s_and_b32 s11, s8, 15
4720; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
4721; GFX6-NEXT:    s_and_b32 s11, 0xffff, s11
4722; GFX6-NEXT:    s_bfe_u32 s4, s4, 0xf0001
4723; GFX6-NEXT:    s_and_b32 s8, 0xffff, s8
4724; GFX6-NEXT:    s_lshl_b32 s0, s0, s11
4725; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
4726; GFX6-NEXT:    s_or_b32 s0, s0, s4
4727; GFX6-NEXT:    s_and_b32 s4, s10, 15
4728; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
4729; GFX6-NEXT:    s_andn2_b32 s8, 15, s10
4730; GFX6-NEXT:    s_and_b32 s4, 0xffff, s4
4731; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
4732; GFX6-NEXT:    s_bfe_u32 s4, s5, 0xf0001
4733; GFX6-NEXT:    s_and_b32 s5, 0xffff, s8
4734; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
4735; GFX6-NEXT:    s_or_b32 s1, s1, s4
4736; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
4737; GFX6-NEXT:    s_and_b32 s0, 0xffff, s0
4738; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
4739; GFX6-NEXT:    s_or_b32 s0, s0, s1
4740; GFX6-NEXT:    s_lshl_b32 s1, s2, 1
4741; GFX6-NEXT:    s_bfe_u32 s2, s6, 0xf0001
4742; GFX6-NEXT:    s_lshr_b32 s2, s2, 14
4743; GFX6-NEXT:    s_or_b32 s1, s1, s2
4744; GFX6-NEXT:    s_lshl_b32 s2, s3, 1
4745; GFX6-NEXT:    s_bfe_u32 s3, s7, 0xf0001
4746; GFX6-NEXT:    s_lshr_b32 s3, s3, 14
4747; GFX6-NEXT:    s_xor_b32 s5, s9, -1
4748; GFX6-NEXT:    s_or_b32 s2, s2, s3
4749; GFX6-NEXT:    s_lshl_b32 s3, s6, 1
4750; GFX6-NEXT:    s_lshl_b32 s4, s7, 1
4751; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
4752; GFX6-NEXT:    s_and_b32 s7, s5, 15
4753; GFX6-NEXT:    s_andn2_b32 s5, 15, s5
4754; GFX6-NEXT:    s_and_b32 s7, 0xffff, s7
4755; GFX6-NEXT:    s_bfe_u32 s3, s3, 0xf0001
4756; GFX6-NEXT:    s_and_b32 s5, 0xffff, s5
4757; GFX6-NEXT:    s_lshl_b32 s1, s1, s7
4758; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
4759; GFX6-NEXT:    s_or_b32 s1, s1, s3
4760; GFX6-NEXT:    s_and_b32 s3, s6, 15
4761; GFX6-NEXT:    s_andn2_b32 s5, 15, s6
4762; GFX6-NEXT:    s_and_b32 s3, 0xffff, s3
4763; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
4764; GFX6-NEXT:    s_bfe_u32 s3, s4, 0xf0001
4765; GFX6-NEXT:    s_and_b32 s4, 0xffff, s5
4766; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
4767; GFX6-NEXT:    s_or_b32 s2, s2, s3
4768; GFX6-NEXT:    s_and_b32 s2, 0xffff, s2
4769; GFX6-NEXT:    s_and_b32 s1, 0xffff, s1
4770; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
4771; GFX6-NEXT:    s_or_b32 s1, s1, s2
4772; GFX6-NEXT:    ; return to shader part epilog
4773;
4774; GFX8-LABEL: s_fshr_v4i16:
4775; GFX8:       ; %bb.0:
4776; GFX8-NEXT:    s_and_b32 s8, 0xffff, s2
4777; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
4778; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
4779; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
4780; GFX8-NEXT:    s_lshr_b32 s8, s8, 15
4781; GFX8-NEXT:    s_or_b32 s0, s0, s8
4782; GFX8-NEXT:    s_lshl_b32 s6, s6, 1
4783; GFX8-NEXT:    s_lshr_b32 s8, s7, 15
4784; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
4785; GFX8-NEXT:    s_xor_b32 s4, s4, -1
4786; GFX8-NEXT:    s_or_b32 s6, s6, s8
4787; GFX8-NEXT:    s_lshr_b32 s8, s4, 16
4788; GFX8-NEXT:    s_and_b32 s9, s4, 15
4789; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
4790; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4791; GFX8-NEXT:    s_and_b32 s9, 0xffff, s9
4792; GFX8-NEXT:    s_lshr_b32 s2, s2, 1
4793; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4794; GFX8-NEXT:    s_lshl_b32 s0, s0, s9
4795; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
4796; GFX8-NEXT:    s_or_b32 s0, s0, s2
4797; GFX8-NEXT:    s_and_b32 s2, s8, 15
4798; GFX8-NEXT:    s_lshl_b32 s7, s7, 1
4799; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4800; GFX8-NEXT:    s_andn2_b32 s4, 15, s8
4801; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
4802; GFX8-NEXT:    s_and_b32 s6, 0xffff, s7
4803; GFX8-NEXT:    s_lshr_b32 s6, s6, 1
4804; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4
4805; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
4806; GFX8-NEXT:    s_or_b32 s2, s2, s4
4807; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4808; GFX8-NEXT:    s_and_b32 s0, 0xffff, s0
4809; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4810; GFX8-NEXT:    s_and_b32 s6, 0xffff, s3
4811; GFX8-NEXT:    s_or_b32 s0, s0, s2
4812; GFX8-NEXT:    s_lshr_b32 s2, s1, 16
4813; GFX8-NEXT:    s_lshr_b32 s4, s3, 16
4814; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
4815; GFX8-NEXT:    s_lshr_b32 s6, s6, 15
4816; GFX8-NEXT:    s_or_b32 s1, s1, s6
4817; GFX8-NEXT:    s_lshl_b32 s2, s2, 1
4818; GFX8-NEXT:    s_lshr_b32 s6, s4, 15
4819; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
4820; GFX8-NEXT:    s_xor_b32 s5, s5, -1
4821; GFX8-NEXT:    s_or_b32 s2, s2, s6
4822; GFX8-NEXT:    s_lshr_b32 s6, s5, 16
4823; GFX8-NEXT:    s_and_b32 s7, s5, 15
4824; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
4825; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4826; GFX8-NEXT:    s_and_b32 s7, 0xffff, s7
4827; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
4828; GFX8-NEXT:    s_and_b32 s5, 0xffff, s5
4829; GFX8-NEXT:    s_lshl_b32 s1, s1, s7
4830; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
4831; GFX8-NEXT:    s_or_b32 s1, s1, s3
4832; GFX8-NEXT:    s_and_b32 s3, s6, 15
4833; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
4834; GFX8-NEXT:    s_and_b32 s3, 0xffff, s3
4835; GFX8-NEXT:    s_andn2_b32 s5, 15, s6
4836; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
4837; GFX8-NEXT:    s_and_b32 s3, 0xffff, s4
4838; GFX8-NEXT:    s_lshr_b32 s3, s3, 1
4839; GFX8-NEXT:    s_and_b32 s4, 0xffff, s5
4840; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
4841; GFX8-NEXT:    s_or_b32 s2, s2, s3
4842; GFX8-NEXT:    s_and_b32 s2, 0xffff, s2
4843; GFX8-NEXT:    s_and_b32 s1, 0xffff, s1
4844; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
4845; GFX8-NEXT:    s_or_b32 s1, s1, s2
4846; GFX8-NEXT:    ; return to shader part epilog
4847;
4848; GFX9-LABEL: s_fshr_v4i16:
4849; GFX9:       ; %bb.0:
4850; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4851; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
4852; GFX9-NEXT:    s_lshl_b32 s7, s7, 1
4853; GFX9-NEXT:    s_and_b32 s6, s4, 0xf000f
4854; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4855; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s7
4856; GFX9-NEXT:    s_lshr_b32 s7, s0, 16
4857; GFX9-NEXT:    s_lshr_b32 s8, s4, 16
4858; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
4859; GFX9-NEXT:    s_lshl_b32 s4, s7, s8
4860; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4861; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
4862; GFX9-NEXT:    s_and_b32 s2, s2, 0xffff
4863; GFX9-NEXT:    s_lshr_b32 s7, s6, 16
4864; GFX9-NEXT:    s_lshr_b32 s2, s2, s6
4865; GFX9-NEXT:    s_lshr_b32 s4, s4, s7
4866; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
4867; GFX9-NEXT:    s_or_b32 s0, s0, s2
4868; GFX9-NEXT:    s_and_b32 s2, s5, 0xf000f
4869; GFX9-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4870; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4871; GFX9-NEXT:    s_lshl_b32 s1, s1, 0x10001
4872; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
4873; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
4874; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
4875; GFX9-NEXT:    s_lshr_b32 s6, s4, 16
4876; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
4877; GFX9-NEXT:    s_lshl_b32 s4, s5, s6
4878; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4879; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
4880; GFX9-NEXT:    s_and_b32 s3, s3, 0xffff
4881; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
4882; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
4883; GFX9-NEXT:    s_lshr_b32 s3, s4, s5
4884; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
4885; GFX9-NEXT:    s_or_b32 s1, s1, s2
4886; GFX9-NEXT:    ; return to shader part epilog
4887;
4888; GFX10-LABEL: s_fshr_v4i16:
4889; GFX10:       ; %bb.0:
4890; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4891; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
4892; GFX10-NEXT:    s_lshl_b32 s6, s6, 1
4893; GFX10-NEXT:    s_and_b32 s7, s4, 0xf000f
4894; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4895; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s4
4896; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
4897; GFX10-NEXT:    s_lshr_b32 s8, s4, 16
4898; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
4899; GFX10-NEXT:    s_lshl_b32 s4, s6, s8
4900; GFX10-NEXT:    s_lshr_b32 s6, s2, 16
4901; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4902; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
4903; GFX10-NEXT:    s_and_b32 s2, s2, 0xffff
4904; GFX10-NEXT:    s_lshr_b32 s8, s7, 16
4905; GFX10-NEXT:    s_lshl_b32 s1, s1, 0x10001
4906; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
4907; GFX10-NEXT:    s_lshr_b32 s2, s2, s7
4908; GFX10-NEXT:    s_lshr_b32 s6, s6, s8
4909; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4910; GFX10-NEXT:    s_andn2_b32 s4, 0xf000f, s5
4911; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4912; GFX10-NEXT:    s_and_b32 s6, s5, 0xf000f
4913; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
4914; GFX10-NEXT:    s_lshr_b32 s7, s4, 16
4915; GFX10-NEXT:    s_lshl_b32 s1, s1, s4
4916; GFX10-NEXT:    s_lshl_b32 s4, s5, s7
4917; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
4918; GFX10-NEXT:    s_and_b32 s3, s3, 0xffff
4919; GFX10-NEXT:    s_lshr_b32 s7, s6, 16
4920; GFX10-NEXT:    s_lshr_b32 s3, s3, s6
4921; GFX10-NEXT:    s_lshr_b32 s5, s5, s7
4922; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4923; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4924; GFX10-NEXT:    s_or_b32 s0, s0, s2
4925; GFX10-NEXT:    s_or_b32 s1, s1, s3
4926; GFX10-NEXT:    ; return to shader part epilog
4927;
4928; GFX11-LABEL: s_fshr_v4i16:
4929; GFX11:       ; %bb.0:
4930; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4931; GFX11-NEXT:    s_lshl_b32 s0, s0, 0x10001
4932; GFX11-NEXT:    s_lshl_b32 s6, s6, 1
4933; GFX11-NEXT:    s_and_b32 s7, s4, 0xf000f
4934; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s6
4935; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s4
4936; GFX11-NEXT:    s_lshr_b32 s6, s0, 16
4937; GFX11-NEXT:    s_lshr_b32 s8, s4, 16
4938; GFX11-NEXT:    s_lshl_b32 s0, s0, s4
4939; GFX11-NEXT:    s_lshl_b32 s4, s6, s8
4940; GFX11-NEXT:    s_lshr_b32 s6, s2, 16
4941; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4942; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
4943; GFX11-NEXT:    s_and_b32 s2, s2, 0xffff
4944; GFX11-NEXT:    s_lshr_b32 s8, s7, 16
4945; GFX11-NEXT:    s_lshl_b32 s1, s1, 0x10001
4946; GFX11-NEXT:    s_lshl_b32 s4, s4, 1
4947; GFX11-NEXT:    s_lshr_b32 s2, s2, s7
4948; GFX11-NEXT:    s_lshr_b32 s6, s6, s8
4949; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4950; GFX11-NEXT:    s_and_not1_b32 s4, 0xf000f, s5
4951; GFX11-NEXT:    s_pack_ll_b32_b16 s2, s2, s6
4952; GFX11-NEXT:    s_and_b32 s6, s5, 0xf000f
4953; GFX11-NEXT:    s_lshr_b32 s5, s1, 16
4954; GFX11-NEXT:    s_lshr_b32 s7, s4, 16
4955; GFX11-NEXT:    s_lshl_b32 s1, s1, s4
4956; GFX11-NEXT:    s_lshl_b32 s4, s5, s7
4957; GFX11-NEXT:    s_lshr_b32 s5, s3, 16
4958; GFX11-NEXT:    s_and_b32 s3, s3, 0xffff
4959; GFX11-NEXT:    s_lshr_b32 s7, s6, 16
4960; GFX11-NEXT:    s_lshr_b32 s3, s3, s6
4961; GFX11-NEXT:    s_lshr_b32 s5, s5, s7
4962; GFX11-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4963; GFX11-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4964; GFX11-NEXT:    s_or_b32 s0, s0, s2
4965; GFX11-NEXT:    s_or_b32 s1, s1, s3
4966; GFX11-NEXT:    ; return to shader part epilog
4967  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4968  %cast.result = bitcast <4 x i16> %result to <2 x i32>
4969  ret <2 x i32> %cast.result
4970}
4971
4972define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
4973; GFX6-LABEL: v_fshr_v4i16:
4974; GFX6:       ; %bb.0:
4975; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4976; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
4977; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
4978; GFX6-NEXT:    v_or_b32_e32 v8, v9, v8
4979; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
4980; GFX6-NEXT:    v_and_b32_e32 v10, 0xffff, v10
4981; GFX6-NEXT:    v_or_b32_e32 v9, v9, v10
4982; GFX6-NEXT:    v_bfe_u32 v10, v4, 1, 15
4983; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
4984; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 14, v10
4985; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
4986; GFX6-NEXT:    v_bfe_u32 v10, v5, 1, 15
4987; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
4988; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 14, v10
4989; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4990; GFX6-NEXT:    v_or_b32_e32 v1, v1, v10
4991; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
4992; GFX6-NEXT:    v_and_b32_e32 v11, 15, v8
4993; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4994; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
4995; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
4996; GFX6-NEXT:    v_and_b32_e32 v11, 0xffff, v11
4997; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
4998; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
4999; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v11, v0
5000; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
5001; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
5002; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
5003; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
5004; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
5005; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
5006; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
5007; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
5008; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
5009; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v8
5010; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
5011; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
5012; GFX6-NEXT:    v_bfe_u32 v4, v6, 1, 15
5013; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
5014; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 14, v4
5015; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
5016; GFX6-NEXT:    v_bfe_u32 v4, v7, 1, 15
5017; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
5018; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 14, v4
5019; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
5020; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v6
5021; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v9
5022; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v7
5023; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
5024; GFX6-NEXT:    v_and_b32_e32 v8, 15, v6
5025; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
5026; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
5027; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
5028; GFX6-NEXT:    v_bfe_u32 v4, v4, 1, 15
5029; GFX6-NEXT:    v_and_b32_e32 v6, 0xffff, v6
5030; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
5031; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
5032; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
5033; GFX6-NEXT:    v_and_b32_e32 v4, 15, v7
5034; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
5035; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
5036; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
5037; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
5038; GFX6-NEXT:    v_bfe_u32 v4, v5, 1, 15
5039; GFX6-NEXT:    v_and_b32_e32 v5, 0xffff, v6
5040; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
5041; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
5042; GFX6-NEXT:    s_setpc_b64 s[30:31]
5043;
5044; GFX8-LABEL: v_fshr_v4i16:
5045; GFX8:       ; %bb.0:
5046; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5047; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
5048; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
5049; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
5050; GFX8-NEXT:    v_mov_b32_e32 v7, 1
5051; GFX8-NEXT:    v_mov_b32_e32 v8, 15
5052; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5053; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5054; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
5055; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
5056; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
5057; GFX8-NEXT:    v_xor_b32_e32 v11, -1, v4
5058; GFX8-NEXT:    v_and_b32_e32 v10, 15, v4
5059; GFX8-NEXT:    v_and_b32_e32 v11, 15, v11
5060; GFX8-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
5061; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v10, v6
5062; GFX8-NEXT:    v_lshrrev_b16_e32 v9, v11, v9
5063; GFX8-NEXT:    v_mov_b32_e32 v10, -1
5064; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5065; GFX8-NEXT:    v_or_b32_e32 v6, v6, v9
5066; GFX8-NEXT:    v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5067; GFX8-NEXT:    v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5068; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
5069; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
5070; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v9, v0
5071; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v4, v2
5072; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5073; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 1, v1
5074; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v3
5075; GFX8-NEXT:    v_and_b32_e32 v0, 0xffff, v0
5076; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
5077; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5078; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5079; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
5080; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
5081; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
5082; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 1, v3
5083; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5084; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v5
5085; GFX8-NEXT:    v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5086; GFX8-NEXT:    v_and_b32_e32 v6, 15, v5
5087; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
5088; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 1, v4
5089; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v6, v2
5090; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v7, v4
5091; GFX8-NEXT:    v_or_b32_e32 v2, v2, v4
5092; GFX8-NEXT:    v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5093; GFX8-NEXT:    v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5094; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
5095; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
5096; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v4, v1
5097; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v5, v3
5098; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5099; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
5100; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5101; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5102; GFX8-NEXT:    s_setpc_b64 s[30:31]
5103;
5104; GFX9-LABEL: v_fshr_v4i16:
5105; GFX9:       ; %bb.0:
5106; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5107; GFX9-NEXT:    v_and_b32_e32 v6, 0xf000f, v4
5108; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
5109; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5110; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5111; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
5112; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
5113; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
5114; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5115; GFX9-NEXT:    v_and_b32_e32 v2, 0xf000f, v5
5116; GFX9-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5117; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5118; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v4, v1
5119; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v2, v3
5120; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
5121; GFX9-NEXT:    s_setpc_b64 s[30:31]
5122;
5123; GFX10-LABEL: v_fshr_v4i16:
5124; GFX10:       ; %bb.0:
5125; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5126; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
5127; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
5128; GFX10-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5129; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5130; GFX10-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
5131; GFX10-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
5132; GFX10-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5133; GFX10-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
5134; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
5135; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
5136; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
5137; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
5138; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5139; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5140; GFX10-NEXT:    s_setpc_b64 s[30:31]
5141;
5142; GFX11-LABEL: v_fshr_v4i16:
5143; GFX11:       ; %bb.0:
5144; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5145; GFX11-NEXT:    v_xor_b32_e32 v6, -1, v4
5146; GFX11-NEXT:    v_xor_b32_e32 v7, -1, v5
5147; GFX11-NEXT:    v_and_b32_e32 v4, 0xf000f, v4
5148; GFX11-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5149; GFX11-NEXT:    v_and_b32_e32 v5, 0xf000f, v5
5150; GFX11-NEXT:    v_and_b32_e32 v6, 0xf000f, v6
5151; GFX11-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5152; GFX11-NEXT:    v_and_b32_e32 v7, 0xf000f, v7
5153; GFX11-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
5154; GFX11-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
5155; GFX11-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
5156; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5157; GFX11-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
5158; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5159; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5160; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5161; GFX11-NEXT:    s_setpc_b64 s[30:31]
5162  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5163  %cast.result = bitcast <4 x i16> %result to <4 x half>
5164  ret <4 x half> %cast.result
5165}
5166
5167define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5168; GFX6-LABEL: s_fshr_i64:
5169; GFX6:       ; %bb.0:
5170; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5171; GFX6-NEXT:    s_not_b32 s5, s4
5172; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s5
5173; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5174; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5175; GFX6-NEXT:    ; return to shader part epilog
5176;
5177; GFX8-LABEL: s_fshr_i64:
5178; GFX8:       ; %bb.0:
5179; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5180; GFX8-NEXT:    s_not_b32 s5, s4
5181; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s5
5182; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5183; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5184; GFX8-NEXT:    ; return to shader part epilog
5185;
5186; GFX9-LABEL: s_fshr_i64:
5187; GFX9:       ; %bb.0:
5188; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5189; GFX9-NEXT:    s_not_b32 s5, s4
5190; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s5
5191; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5192; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5193; GFX9-NEXT:    ; return to shader part epilog
5194;
5195; GFX10-LABEL: s_fshr_i64:
5196; GFX10:       ; %bb.0:
5197; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5198; GFX10-NEXT:    s_not_b32 s5, s4
5199; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5200; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s5
5201; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5202; GFX10-NEXT:    ; return to shader part epilog
5203;
5204; GFX11-LABEL: s_fshr_i64:
5205; GFX11:       ; %bb.0:
5206; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5207; GFX11-NEXT:    s_not_b32 s5, s4
5208; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
5209; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s5
5210; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5211; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5212; GFX11-NEXT:    ; return to shader part epilog
5213  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5214  ret i64 %result
5215}
5216
5217define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5218; GCN-LABEL: s_fshr_i64_5:
5219; GCN:       ; %bb.0:
5220; GCN-NEXT:    s_lshl_b32 s1, s0, 27
5221; GCN-NEXT:    s_mov_b32 s0, 0
5222; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
5223; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5224; GCN-NEXT:    ; return to shader part epilog
5225;
5226; GFX11-LABEL: s_fshr_i64_5:
5227; GFX11:       ; %bb.0:
5228; GFX11-NEXT:    s_lshl_b32 s1, s0, 27
5229; GFX11-NEXT:    s_mov_b32 s0, 0
5230; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
5231; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5232; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5233; GFX11-NEXT:    ; return to shader part epilog
5234  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5235  ret i64 %result
5236}
5237
5238define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5239; GCN-LABEL: s_fshr_i64_32:
5240; GCN:       ; %bb.0:
5241; GCN-NEXT:    s_mov_b32 s1, s0
5242; GCN-NEXT:    s_mov_b32 s0, 0
5243; GCN-NEXT:    s_mov_b32 s2, s3
5244; GCN-NEXT:    s_mov_b32 s3, s0
5245; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5246; GCN-NEXT:    ; return to shader part epilog
5247;
5248; GFX11-LABEL: s_fshr_i64_32:
5249; GFX11:       ; %bb.0:
5250; GFX11-NEXT:    s_mov_b32 s1, s0
5251; GFX11-NEXT:    s_mov_b32 s0, 0
5252; GFX11-NEXT:    s_mov_b32 s2, s3
5253; GFX11-NEXT:    s_mov_b32 s3, s0
5254; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5255; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5256; GFX11-NEXT:    ; return to shader part epilog
5257  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5258  ret i64 %result
5259}
5260
5261define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5262; GCN-LABEL: s_fshr_i64_48:
5263; GCN:       ; %bb.0:
5264; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
5265; GCN-NEXT:    s_lshr_b32 s2, s3, 16
5266; GCN-NEXT:    s_mov_b32 s3, 0
5267; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5268; GCN-NEXT:    ; return to shader part epilog
5269;
5270; GFX11-LABEL: s_fshr_i64_48:
5271; GFX11:       ; %bb.0:
5272; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
5273; GFX11-NEXT:    s_lshr_b32 s2, s3, 16
5274; GFX11-NEXT:    s_mov_b32 s3, 0
5275; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5276; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
5277; GFX11-NEXT:    ; return to shader part epilog
5278  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5279  ret i64 %result
5280}
5281
5282define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5283; GFX6-LABEL: v_fshr_i64:
5284; GFX6:       ; %bb.0:
5285; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5286; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5287; GFX6-NEXT:    v_not_b32_e32 v5, v4
5288; GFX6-NEXT:    v_and_b32_e32 v5, 63, v5
5289; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
5290; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v5
5291; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v4
5292; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5293; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
5294; GFX6-NEXT:    s_setpc_b64 s[30:31]
5295;
5296; GFX8-LABEL: v_fshr_i64:
5297; GFX8:       ; %bb.0:
5298; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5299; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5300; GFX8-NEXT:    v_not_b32_e32 v5, v4
5301; GFX8-NEXT:    v_and_b32_e32 v5, 63, v5
5302; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
5303; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5304; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5305; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
5306; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
5307; GFX8-NEXT:    s_setpc_b64 s[30:31]
5308;
5309; GFX9-LABEL: v_fshr_i64:
5310; GFX9:       ; %bb.0:
5311; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5312; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5313; GFX9-NEXT:    v_not_b32_e32 v5, v4
5314; GFX9-NEXT:    v_and_b32_e32 v5, 63, v5
5315; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
5316; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5317; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5318; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
5319; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
5320; GFX9-NEXT:    s_setpc_b64 s[30:31]
5321;
5322; GFX10-LABEL: v_fshr_i64:
5323; GFX10:       ; %bb.0:
5324; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5325; GFX10-NEXT:    v_not_b32_e32 v5, v4
5326; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5327; GFX10-NEXT:    v_and_b32_e32 v4, 63, v4
5328; GFX10-NEXT:    v_and_b32_e32 v5, 63, v5
5329; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5330; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5331; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
5332; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
5333; GFX10-NEXT:    s_setpc_b64 s[30:31]
5334;
5335; GFX11-LABEL: v_fshr_i64:
5336; GFX11:       ; %bb.0:
5337; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5338; GFX11-NEXT:    v_not_b32_e32 v5, v4
5339; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5340; GFX11-NEXT:    v_and_b32_e32 v4, 63, v4
5341; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5342; GFX11-NEXT:    v_and_b32_e32 v5, 63, v5
5343; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
5344; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5345; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
5346; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5347; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5348; GFX11-NEXT:    v_or_b32_e32 v1, v1, v3
5349; GFX11-NEXT:    s_setpc_b64 s[30:31]
5350  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5351  ret i64 %result
5352}
5353
5354define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
5355; GFX6-LABEL: v_fshr_i64_5:
5356; GFX6:       ; %bb.0:
5357; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5358; GFX6-NEXT:    v_mov_b32_e32 v4, v0
5359; GFX6-NEXT:    v_lshr_b64 v[0:1], v[2:3], 5
5360; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5361; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
5362; GFX6-NEXT:    s_setpc_b64 s[30:31]
5363;
5364; GFX8-LABEL: v_fshr_i64_5:
5365; GFX8:       ; %bb.0:
5366; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5367; GFX8-NEXT:    v_mov_b32_e32 v4, v0
5368; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5369; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
5370; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
5371; GFX8-NEXT:    s_setpc_b64 s[30:31]
5372;
5373; GFX9-LABEL: v_fshr_i64_5:
5374; GFX9:       ; %bb.0:
5375; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5376; GFX9-NEXT:    v_mov_b32_e32 v4, v0
5377; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5378; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 27, v1
5379; GFX9-NEXT:    s_setpc_b64 s[30:31]
5380;
5381; GFX10-LABEL: v_fshr_i64_5:
5382; GFX10:       ; %bb.0:
5383; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5384; GFX10-NEXT:    v_mov_b32_e32 v4, v0
5385; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5386; GFX10-NEXT:    v_lshl_or_b32 v1, v4, 27, v1
5387; GFX10-NEXT:    s_setpc_b64 s[30:31]
5388;
5389; GFX11-LABEL: v_fshr_i64_5:
5390; GFX11:       ; %bb.0:
5391; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5392; GFX11-NEXT:    v_mov_b32_e32 v4, v0
5393; GFX11-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
5394; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5395; GFX11-NEXT:    v_lshl_or_b32 v1, v4, 27, v1
5396; GFX11-NEXT:    s_setpc_b64 s[30:31]
5397  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5398  ret i64 %result
5399}
5400
5401define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
5402; GCN-LABEL: v_fshr_i64_32:
5403; GCN:       ; %bb.0:
5404; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5405; GCN-NEXT:    v_mov_b32_e32 v1, v0
5406; GCN-NEXT:    v_mov_b32_e32 v0, v3
5407; GCN-NEXT:    s_setpc_b64 s[30:31]
5408;
5409; GFX11-LABEL: v_fshr_i64_32:
5410; GFX11:       ; %bb.0:
5411; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5412; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5413; GFX11-NEXT:    s_setpc_b64 s[30:31]
5414  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5415  ret i64 %result
5416}
5417
5418define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
5419; GFX6-LABEL: v_fshr_i64_48:
5420; GFX6:       ; %bb.0:
5421; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5422; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 16
5423; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
5424; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
5425; GFX6-NEXT:    s_setpc_b64 s[30:31]
5426;
5427; GFX8-LABEL: v_fshr_i64_48:
5428; GFX8:       ; %bb.0:
5429; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5430; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5431; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5432; GFX8-NEXT:    s_setpc_b64 s[30:31]
5433;
5434; GFX9-LABEL: v_fshr_i64_48:
5435; GFX9:       ; %bb.0:
5436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5437; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5438; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5439; GFX9-NEXT:    s_setpc_b64 s[30:31]
5440;
5441; GFX10-LABEL: v_fshr_i64_48:
5442; GFX10:       ; %bb.0:
5443; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5444; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5445; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5446; GFX10-NEXT:    s_setpc_b64 s[30:31]
5447;
5448; GFX11-LABEL: v_fshr_i64_48:
5449; GFX11:       ; %bb.0:
5450; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5451; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
5452; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
5453; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5454; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
5455; GFX11-NEXT:    s_setpc_b64 s[30:31]
5456  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5457  ret i64 %result
5458}
5459
5460define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5461; GFX6-LABEL: v_fshr_i64_ssv:
5462; GFX6:       ; %bb.0:
5463; GFX6-NEXT:    v_not_b32_e32 v1, v0
5464; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5465; GFX6-NEXT:    v_and_b32_e32 v1, 63, v1
5466; GFX6-NEXT:    v_and_b32_e32 v0, 63, v0
5467; GFX6-NEXT:    v_lshl_b64 v[1:2], s[0:1], v1
5468; GFX6-NEXT:    v_lshr_b64 v[3:4], s[2:3], v0
5469; GFX6-NEXT:    v_or_b32_e32 v0, v1, v3
5470; GFX6-NEXT:    v_or_b32_e32 v1, v2, v4
5471; GFX6-NEXT:    ; return to shader part epilog
5472;
5473; GFX8-LABEL: v_fshr_i64_ssv:
5474; GFX8:       ; %bb.0:
5475; GFX8-NEXT:    v_not_b32_e32 v1, v0
5476; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5477; GFX8-NEXT:    v_and_b32_e32 v1, 63, v1
5478; GFX8-NEXT:    v_and_b32_e32 v0, 63, v0
5479; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v1, s[0:1]
5480; GFX8-NEXT:    v_lshrrev_b64 v[3:4], v0, s[2:3]
5481; GFX8-NEXT:    v_or_b32_e32 v0, v1, v3
5482; GFX8-NEXT:    v_or_b32_e32 v1, v2, v4
5483; GFX8-NEXT:    ; return to shader part epilog
5484;
5485; GFX9-LABEL: v_fshr_i64_ssv:
5486; GFX9:       ; %bb.0:
5487; GFX9-NEXT:    v_not_b32_e32 v1, v0
5488; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5489; GFX9-NEXT:    v_and_b32_e32 v1, 63, v1
5490; GFX9-NEXT:    v_and_b32_e32 v0, 63, v0
5491; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v1, s[0:1]
5492; GFX9-NEXT:    v_lshrrev_b64 v[3:4], v0, s[2:3]
5493; GFX9-NEXT:    v_or_b32_e32 v0, v1, v3
5494; GFX9-NEXT:    v_or_b32_e32 v1, v2, v4
5495; GFX9-NEXT:    ; return to shader part epilog
5496;
5497; GFX10-LABEL: v_fshr_i64_ssv:
5498; GFX10:       ; %bb.0:
5499; GFX10-NEXT:    v_not_b32_e32 v1, v0
5500; GFX10-NEXT:    v_and_b32_e32 v0, 63, v0
5501; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5502; GFX10-NEXT:    v_and_b32_e32 v2, 63, v1
5503; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
5504; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
5505; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
5506; GFX10-NEXT:    v_or_b32_e32 v1, v3, v1
5507; GFX10-NEXT:    ; return to shader part epilog
5508;
5509; GFX11-LABEL: v_fshr_i64_ssv:
5510; GFX11:       ; %bb.0:
5511; GFX11-NEXT:    v_not_b32_e32 v1, v0
5512; GFX11-NEXT:    v_and_b32_e32 v0, 63, v0
5513; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5514; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5515; GFX11-NEXT:    v_and_b32_e32 v2, 63, v1
5516; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
5517; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5518; GFX11-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
5519; GFX11-NEXT:    v_or_b32_e32 v0, v2, v0
5520; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5521; GFX11-NEXT:    v_or_b32_e32 v1, v3, v1
5522; GFX11-NEXT:    ; return to shader part epilog
5523  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5524  %cast = bitcast i64 %result to <2 x float>
5525  ret <2 x float> %cast
5526}
5527
5528define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5529; GFX6-LABEL: v_fshr_i64_svs:
5530; GFX6:       ; %bb.0:
5531; GFX6-NEXT:    s_not_b32 s3, s2
5532; GFX6-NEXT:    s_and_b32 s2, s2, 63
5533; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5534; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s2
5535; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s3
5536; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5537; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5538; GFX6-NEXT:    ; return to shader part epilog
5539;
5540; GFX8-LABEL: v_fshr_i64_svs:
5541; GFX8:       ; %bb.0:
5542; GFX8-NEXT:    s_not_b32 s3, s2
5543; GFX8-NEXT:    s_and_b32 s2, s2, 63
5544; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5545; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s2, v[0:1]
5546; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s3
5547; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5548; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5549; GFX8-NEXT:    ; return to shader part epilog
5550;
5551; GFX9-LABEL: v_fshr_i64_svs:
5552; GFX9:       ; %bb.0:
5553; GFX9-NEXT:    s_not_b32 s3, s2
5554; GFX9-NEXT:    s_and_b32 s2, s2, 63
5555; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5556; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s2, v[0:1]
5557; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s3
5558; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5559; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5560; GFX9-NEXT:    ; return to shader part epilog
5561;
5562; GFX10-LABEL: v_fshr_i64_svs:
5563; GFX10:       ; %bb.0:
5564; GFX10-NEXT:    s_and_b32 s3, s2, 63
5565; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5566; GFX10-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5567; GFX10-NEXT:    s_not_b32 s2, s2
5568; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5569; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5570; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5571; GFX10-NEXT:    ; return to shader part epilog
5572;
5573; GFX11-LABEL: v_fshr_i64_svs:
5574; GFX11:       ; %bb.0:
5575; GFX11-NEXT:    s_and_b32 s3, s2, 63
5576; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5577; GFX11-NEXT:    v_lshrrev_b64 v[0:1], s3, v[0:1]
5578; GFX11-NEXT:    s_not_b32 s2, s2
5579; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5580; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
5581; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5582; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5583; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5584; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5585; GFX11-NEXT:    ; return to shader part epilog
5586  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5587  %cast = bitcast i64 %result to <2 x float>
5588  ret <2 x float> %cast
5589}
5590
5591define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5592; GFX6-LABEL: v_fshr_i64_vss:
5593; GFX6:       ; %bb.0:
5594; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5595; GFX6-NEXT:    s_andn2_b32 s3, 63, s2
5596; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s3
5597; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5598; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
5599; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
5600; GFX6-NEXT:    ; return to shader part epilog
5601;
5602; GFX8-LABEL: v_fshr_i64_vss:
5603; GFX8:       ; %bb.0:
5604; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5605; GFX8-NEXT:    s_andn2_b32 s3, 63, s2
5606; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5607; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5608; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
5609; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
5610; GFX8-NEXT:    ; return to shader part epilog
5611;
5612; GFX9-LABEL: v_fshr_i64_vss:
5613; GFX9:       ; %bb.0:
5614; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5615; GFX9-NEXT:    s_andn2_b32 s3, 63, s2
5616; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5617; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5618; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
5619; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
5620; GFX9-NEXT:    ; return to shader part epilog
5621;
5622; GFX10-LABEL: v_fshr_i64_vss:
5623; GFX10:       ; %bb.0:
5624; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5625; GFX10-NEXT:    s_andn2_b32 s3, 63, s2
5626; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5627; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5628; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
5629; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
5630; GFX10-NEXT:    ; return to shader part epilog
5631;
5632; GFX11-LABEL: v_fshr_i64_vss:
5633; GFX11:       ; %bb.0:
5634; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5635; GFX11-NEXT:    s_and_not1_b32 s3, 63, s2
5636; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
5637; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5638; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s3, v[0:1]
5639; GFX11-NEXT:    v_or_b32_e32 v0, s0, v0
5640; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5641; GFX11-NEXT:    v_or_b32_e32 v1, s1, v1
5642; GFX11-NEXT:    ; return to shader part epilog
5643  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5644  %cast = bitcast i64 %result to <2 x float>
5645  ret <2 x float> %cast
5646}
5647
5648define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5649; GFX6-LABEL: s_fshr_v2i64:
5650; GFX6:       ; %bb.0:
5651; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5652; GFX6-NEXT:    s_not_b32 s9, s8
5653; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5654; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5655; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5656; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5657; GFX6-NEXT:    s_not_b32 s4, s10
5658; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
5659; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], s10
5660; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5661; GFX6-NEXT:    ; return to shader part epilog
5662;
5663; GFX8-LABEL: s_fshr_v2i64:
5664; GFX8:       ; %bb.0:
5665; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5666; GFX8-NEXT:    s_not_b32 s9, s8
5667; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5668; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5669; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5670; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5671; GFX8-NEXT:    s_not_b32 s4, s10
5672; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
5673; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], s10
5674; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5675; GFX8-NEXT:    ; return to shader part epilog
5676;
5677; GFX9-LABEL: s_fshr_v2i64:
5678; GFX9:       ; %bb.0:
5679; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5680; GFX9-NEXT:    s_not_b32 s9, s8
5681; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5682; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5683; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5684; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5685; GFX9-NEXT:    s_not_b32 s4, s10
5686; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
5687; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], s10
5688; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
5689; GFX9-NEXT:    ; return to shader part epilog
5690;
5691; GFX10-LABEL: s_fshr_v2i64:
5692; GFX10:       ; %bb.0:
5693; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5694; GFX10-NEXT:    s_not_b32 s9, s8
5695; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5696; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5697; GFX10-NEXT:    s_not_b32 s9, s10
5698; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5699; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], s9
5700; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
5701; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5702; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5703; GFX10-NEXT:    ; return to shader part epilog
5704;
5705; GFX11-LABEL: s_fshr_v2i64:
5706; GFX11:       ; %bb.0:
5707; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5708; GFX11-NEXT:    s_not_b32 s9, s8
5709; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5710; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5711; GFX11-NEXT:    s_not_b32 s9, s10
5712; GFX11-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
5713; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], s9
5714; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
5715; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
5716; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5717; GFX11-NEXT:    ; return to shader part epilog
5718  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5719  ret <2 x i64> %result
5720}
5721
5722define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5723; GFX6-LABEL: v_fshr_v2i64:
5724; GFX6:       ; %bb.0:
5725; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5726; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
5727; GFX6-NEXT:    v_not_b32_e32 v9, v8
5728; GFX6-NEXT:    v_and_b32_e32 v9, 63, v9
5729; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
5730; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v9
5731; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v8
5732; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
5733; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
5734; GFX6-NEXT:    v_not_b32_e32 v4, v10
5735; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
5736; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v4
5737; GFX6-NEXT:    v_and_b32_e32 v4, 63, v10
5738; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v4
5739; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
5740; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
5741; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
5742; GFX6-NEXT:    s_setpc_b64 s[30:31]
5743;
5744; GFX8-LABEL: v_fshr_v2i64:
5745; GFX8:       ; %bb.0:
5746; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5747; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5748; GFX8-NEXT:    v_not_b32_e32 v9, v8
5749; GFX8-NEXT:    v_and_b32_e32 v9, 63, v9
5750; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
5751; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5752; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5753; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5754; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
5755; GFX8-NEXT:    v_not_b32_e32 v4, v10
5756; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
5757; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v4, v[2:3]
5758; GFX8-NEXT:    v_and_b32_e32 v4, 63, v10
5759; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5760; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
5761; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
5762; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
5763; GFX8-NEXT:    s_setpc_b64 s[30:31]
5764;
5765; GFX9-LABEL: v_fshr_v2i64:
5766; GFX9:       ; %bb.0:
5767; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5768; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5769; GFX9-NEXT:    v_not_b32_e32 v9, v8
5770; GFX9-NEXT:    v_and_b32_e32 v9, 63, v9
5771; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
5772; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5773; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5774; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5775; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
5776; GFX9-NEXT:    v_not_b32_e32 v4, v10
5777; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
5778; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v4, v[2:3]
5779; GFX9-NEXT:    v_and_b32_e32 v4, 63, v10
5780; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
5781; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
5782; GFX9-NEXT:    v_or_b32_e32 v2, v2, v6
5783; GFX9-NEXT:    v_or_b32_e32 v3, v3, v7
5784; GFX9-NEXT:    s_setpc_b64 s[30:31]
5785;
5786; GFX10-LABEL: v_fshr_v2i64:
5787; GFX10:       ; %bb.0:
5788; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5789; GFX10-NEXT:    v_not_b32_e32 v9, v8
5790; GFX10-NEXT:    v_not_b32_e32 v11, v10
5791; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5792; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5793; GFX10-NEXT:    v_and_b32_e32 v8, 63, v8
5794; GFX10-NEXT:    v_and_b32_e32 v9, 63, v9
5795; GFX10-NEXT:    v_and_b32_e32 v11, 63, v11
5796; GFX10-NEXT:    v_and_b32_e32 v10, 63, v10
5797; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5798; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5799; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
5800; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
5801; GFX10-NEXT:    v_or_b32_e32 v0, v0, v4
5802; GFX10-NEXT:    v_or_b32_e32 v1, v1, v5
5803; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
5804; GFX10-NEXT:    v_or_b32_e32 v3, v3, v7
5805; GFX10-NEXT:    s_setpc_b64 s[30:31]
5806;
5807; GFX11-LABEL: v_fshr_v2i64:
5808; GFX11:       ; %bb.0:
5809; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5810; GFX11-NEXT:    v_not_b32_e32 v9, v8
5811; GFX11-NEXT:    v_not_b32_e32 v11, v10
5812; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5813; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5814; GFX11-NEXT:    v_and_b32_e32 v8, 63, v8
5815; GFX11-NEXT:    v_and_b32_e32 v9, 63, v9
5816; GFX11-NEXT:    v_and_b32_e32 v11, 63, v11
5817; GFX11-NEXT:    v_and_b32_e32 v10, 63, v10
5818; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5819; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
5820; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
5821; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5822; GFX11-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
5823; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
5824; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5825; GFX11-NEXT:    v_or_b32_e32 v0, v0, v4
5826; GFX11-NEXT:    v_or_b32_e32 v1, v1, v5
5827; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5828; GFX11-NEXT:    v_or_b32_e32 v2, v2, v6
5829; GFX11-NEXT:    v_or_b32_e32 v3, v3, v7
5830; GFX11-NEXT:    s_setpc_b64 s[30:31]
5831  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5832  ret <2 x i64> %result
5833}
5834
5835define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5836; GFX6-LABEL: s_fshr_i128:
5837; GFX6:       ; %bb.0:
5838; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5839; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5840; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
5841; GFX6-NEXT:    s_mov_b32 s1, 0
5842; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5843; GFX6-NEXT:    s_andn2_b32 s2, 0x7f, s8
5844; GFX6-NEXT:    s_not_b32 s9, s8
5845; GFX6-NEXT:    s_sub_i32 s16, s2, 64
5846; GFX6-NEXT:    s_sub_i32 s12, 64, s2
5847; GFX6-NEXT:    s_cmp_lt_u32 s2, 64
5848; GFX6-NEXT:    s_cselect_b32 s17, 1, 0
5849; GFX6-NEXT:    s_cmp_eq_u32 s2, 0
5850; GFX6-NEXT:    s_cselect_b32 s18, 1, 0
5851; GFX6-NEXT:    s_lshr_b64 s[12:13], s[10:11], s12
5852; GFX6-NEXT:    s_lshl_b64 s[14:15], s[0:1], s9
5853; GFX6-NEXT:    s_lshl_b64 s[2:3], s[10:11], s9
5854; GFX6-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
5855; GFX6-NEXT:    s_lshl_b64 s[10:11], s[10:11], s16
5856; GFX6-NEXT:    s_cmp_lg_u32 s17, 0
5857; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5858; GFX6-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
5859; GFX6-NEXT:    s_cmp_lg_u32 s18, 0
5860; GFX6-NEXT:    s_cselect_b64 s[10:11], s[0:1], s[10:11]
5861; GFX6-NEXT:    s_and_b32 s0, s8, 0x7f
5862; GFX6-NEXT:    s_sub_i32 s14, s0, 64
5863; GFX6-NEXT:    s_sub_i32 s12, 64, s0
5864; GFX6-NEXT:    s_cmp_lt_u32 s0, 64
5865; GFX6-NEXT:    s_cselect_b32 s15, 1, 0
5866; GFX6-NEXT:    s_cmp_eq_u32 s0, 0
5867; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
5868; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], s8
5869; GFX6-NEXT:    s_lshr_b64 s[8:9], s[4:5], s8
5870; GFX6-NEXT:    s_lshl_b64 s[12:13], s[6:7], s12
5871; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
5872; GFX6-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5873; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5874; GFX6-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
5875; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
5876; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5877; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
5878; GFX6-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5879; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5880; GFX6-NEXT:    s_or_b64 s[2:3], s[10:11], s[6:7]
5881; GFX6-NEXT:    ; return to shader part epilog
5882;
5883; GFX8-LABEL: s_fshr_i128:
5884; GFX8:       ; %bb.0:
5885; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5886; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5887; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
5888; GFX8-NEXT:    s_mov_b32 s1, 0
5889; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5890; GFX8-NEXT:    s_andn2_b32 s2, 0x7f, s8
5891; GFX8-NEXT:    s_not_b32 s9, s8
5892; GFX8-NEXT:    s_sub_i32 s16, s2, 64
5893; GFX8-NEXT:    s_sub_i32 s12, 64, s2
5894; GFX8-NEXT:    s_cmp_lt_u32 s2, 64
5895; GFX8-NEXT:    s_cselect_b32 s17, 1, 0
5896; GFX8-NEXT:    s_cmp_eq_u32 s2, 0
5897; GFX8-NEXT:    s_cselect_b32 s18, 1, 0
5898; GFX8-NEXT:    s_lshr_b64 s[12:13], s[10:11], s12
5899; GFX8-NEXT:    s_lshl_b64 s[14:15], s[0:1], s9
5900; GFX8-NEXT:    s_lshl_b64 s[2:3], s[10:11], s9
5901; GFX8-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
5902; GFX8-NEXT:    s_lshl_b64 s[10:11], s[10:11], s16
5903; GFX8-NEXT:    s_cmp_lg_u32 s17, 0
5904; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5905; GFX8-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
5906; GFX8-NEXT:    s_cmp_lg_u32 s18, 0
5907; GFX8-NEXT:    s_cselect_b64 s[10:11], s[0:1], s[10:11]
5908; GFX8-NEXT:    s_and_b32 s0, s8, 0x7f
5909; GFX8-NEXT:    s_sub_i32 s14, s0, 64
5910; GFX8-NEXT:    s_sub_i32 s12, 64, s0
5911; GFX8-NEXT:    s_cmp_lt_u32 s0, 64
5912; GFX8-NEXT:    s_cselect_b32 s15, 1, 0
5913; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
5914; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
5915; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], s8
5916; GFX8-NEXT:    s_lshr_b64 s[8:9], s[4:5], s8
5917; GFX8-NEXT:    s_lshl_b64 s[12:13], s[6:7], s12
5918; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
5919; GFX8-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5920; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5921; GFX8-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
5922; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
5923; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5924; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
5925; GFX8-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5926; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5927; GFX8-NEXT:    s_or_b64 s[2:3], s[10:11], s[6:7]
5928; GFX8-NEXT:    ; return to shader part epilog
5929;
5930; GFX9-LABEL: s_fshr_i128:
5931; GFX9:       ; %bb.0:
5932; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5933; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5934; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
5935; GFX9-NEXT:    s_mov_b32 s1, 0
5936; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5937; GFX9-NEXT:    s_andn2_b32 s2, 0x7f, s8
5938; GFX9-NEXT:    s_not_b32 s9, s8
5939; GFX9-NEXT:    s_sub_i32 s16, s2, 64
5940; GFX9-NEXT:    s_sub_i32 s12, 64, s2
5941; GFX9-NEXT:    s_cmp_lt_u32 s2, 64
5942; GFX9-NEXT:    s_cselect_b32 s17, 1, 0
5943; GFX9-NEXT:    s_cmp_eq_u32 s2, 0
5944; GFX9-NEXT:    s_cselect_b32 s18, 1, 0
5945; GFX9-NEXT:    s_lshr_b64 s[12:13], s[10:11], s12
5946; GFX9-NEXT:    s_lshl_b64 s[14:15], s[0:1], s9
5947; GFX9-NEXT:    s_lshl_b64 s[2:3], s[10:11], s9
5948; GFX9-NEXT:    s_or_b64 s[12:13], s[12:13], s[14:15]
5949; GFX9-NEXT:    s_lshl_b64 s[10:11], s[10:11], s16
5950; GFX9-NEXT:    s_cmp_lg_u32 s17, 0
5951; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5952; GFX9-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
5953; GFX9-NEXT:    s_cmp_lg_u32 s18, 0
5954; GFX9-NEXT:    s_cselect_b64 s[10:11], s[0:1], s[10:11]
5955; GFX9-NEXT:    s_and_b32 s0, s8, 0x7f
5956; GFX9-NEXT:    s_sub_i32 s14, s0, 64
5957; GFX9-NEXT:    s_sub_i32 s12, 64, s0
5958; GFX9-NEXT:    s_cmp_lt_u32 s0, 64
5959; GFX9-NEXT:    s_cselect_b32 s15, 1, 0
5960; GFX9-NEXT:    s_cmp_eq_u32 s0, 0
5961; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
5962; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], s8
5963; GFX9-NEXT:    s_lshr_b64 s[8:9], s[4:5], s8
5964; GFX9-NEXT:    s_lshl_b64 s[12:13], s[6:7], s12
5965; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
5966; GFX9-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
5967; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5968; GFX9-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
5969; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
5970; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5971; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
5972; GFX9-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
5973; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
5974; GFX9-NEXT:    s_or_b64 s[2:3], s[10:11], s[6:7]
5975; GFX9-NEXT:    ; return to shader part epilog
5976;
5977; GFX10-LABEL: s_fshr_i128:
5978; GFX10:       ; %bb.0:
5979; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5980; GFX10-NEXT:    s_lshr_b32 s10, s1, 31
5981; GFX10-NEXT:    s_mov_b32 s11, 0
5982; GFX10-NEXT:    s_andn2_b32 s9, 0x7f, s8
5983; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5984; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
5985; GFX10-NEXT:    s_not_b32 s14, s8
5986; GFX10-NEXT:    s_sub_i32 s16, s9, 64
5987; GFX10-NEXT:    s_sub_i32 s10, 64, s9
5988; GFX10-NEXT:    s_cmp_lt_u32 s9, 64
5989; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
5990; GFX10-NEXT:    s_cmp_eq_u32 s9, 0
5991; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
5992; GFX10-NEXT:    s_lshr_b64 s[10:11], s[0:1], s10
5993; GFX10-NEXT:    s_lshl_b64 s[12:13], s[2:3], s14
5994; GFX10-NEXT:    s_lshl_b64 s[14:15], s[0:1], s14
5995; GFX10-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
5996; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s16
5997; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
5998; GFX10-NEXT:    s_cselect_b64 s[12:13], s[14:15], 0
5999; GFX10-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
6000; GFX10-NEXT:    s_cmp_lg_u32 s9, 0
6001; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6002; GFX10-NEXT:    s_and_b32 s0, s8, 0x7f
6003; GFX10-NEXT:    s_sub_i32 s14, s0, 64
6004; GFX10-NEXT:    s_sub_i32 s9, 64, s0
6005; GFX10-NEXT:    s_cmp_lt_u32 s0, 64
6006; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
6007; GFX10-NEXT:    s_cmp_eq_u32 s0, 0
6008; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
6009; GFX10-NEXT:    s_lshr_b64 s[0:1], s[4:5], s8
6010; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s9
6011; GFX10-NEXT:    s_lshr_b64 s[8:9], s[6:7], s8
6012; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
6013; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
6014; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
6015; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
6016; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
6017; GFX10-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
6018; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
6019; GFX10-NEXT:    s_cselect_b64 s[4:5], s[8:9], 0
6020; GFX10-NEXT:    s_or_b64 s[0:1], s[12:13], s[0:1]
6021; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
6022; GFX10-NEXT:    ; return to shader part epilog
6023;
6024; GFX11-LABEL: s_fshr_i128:
6025; GFX11:       ; %bb.0:
6026; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6027; GFX11-NEXT:    s_lshr_b32 s10, s1, 31
6028; GFX11-NEXT:    s_mov_b32 s11, 0
6029; GFX11-NEXT:    s_and_not1_b32 s9, 0x7f, s8
6030; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6031; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
6032; GFX11-NEXT:    s_not_b32 s14, s8
6033; GFX11-NEXT:    s_sub_i32 s16, s9, 64
6034; GFX11-NEXT:    s_sub_i32 s10, 64, s9
6035; GFX11-NEXT:    s_cmp_lt_u32 s9, 64
6036; GFX11-NEXT:    s_cselect_b32 s17, 1, 0
6037; GFX11-NEXT:    s_cmp_eq_u32 s9, 0
6038; GFX11-NEXT:    s_cselect_b32 s9, 1, 0
6039; GFX11-NEXT:    s_lshr_b64 s[10:11], s[0:1], s10
6040; GFX11-NEXT:    s_lshl_b64 s[12:13], s[2:3], s14
6041; GFX11-NEXT:    s_lshl_b64 s[14:15], s[0:1], s14
6042; GFX11-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
6043; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s16
6044; GFX11-NEXT:    s_cmp_lg_u32 s17, 0
6045; GFX11-NEXT:    s_cselect_b64 s[12:13], s[14:15], 0
6046; GFX11-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
6047; GFX11-NEXT:    s_cmp_lg_u32 s9, 0
6048; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6049; GFX11-NEXT:    s_and_b32 s0, s8, 0x7f
6050; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6051; GFX11-NEXT:    s_sub_i32 s14, s0, 64
6052; GFX11-NEXT:    s_sub_i32 s9, 64, s0
6053; GFX11-NEXT:    s_cmp_lt_u32 s0, 64
6054; GFX11-NEXT:    s_cselect_b32 s15, 1, 0
6055; GFX11-NEXT:    s_cmp_eq_u32 s0, 0
6056; GFX11-NEXT:    s_cselect_b32 s16, 1, 0
6057; GFX11-NEXT:    s_lshr_b64 s[0:1], s[4:5], s8
6058; GFX11-NEXT:    s_lshl_b64 s[10:11], s[6:7], s9
6059; GFX11-NEXT:    s_lshr_b64 s[8:9], s[6:7], s8
6060; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
6061; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
6062; GFX11-NEXT:    s_cmp_lg_u32 s15, 0
6063; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
6064; GFX11-NEXT:    s_cmp_lg_u32 s16, 0
6065; GFX11-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
6066; GFX11-NEXT:    s_cmp_lg_u32 s15, 0
6067; GFX11-NEXT:    s_cselect_b64 s[4:5], s[8:9], 0
6068; GFX11-NEXT:    s_or_b64 s[0:1], s[12:13], s[0:1]
6069; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
6070; GFX11-NEXT:    ; return to shader part epilog
6071  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6072  ret i128 %result
6073}
6074
6075define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
6076; GFX6-LABEL: v_fshr_i128:
6077; GFX6:       ; %bb.0:
6078; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6079; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
6080; GFX6-NEXT:    v_lshl_b64 v[9:10], v[0:1], 1
6081; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6082; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
6083; GFX6-NEXT:    v_not_b32_e32 v0, v8
6084; GFX6-NEXT:    v_and_b32_e32 v15, 0x7f, v0
6085; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v15
6086; GFX6-NEXT:    v_not_b32_e32 v16, 63
6087; GFX6-NEXT:    v_lshr_b64 v[0:1], v[9:10], v0
6088; GFX6-NEXT:    v_lshl_b64 v[11:12], v[2:3], v15
6089; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v15, v16
6090; GFX6-NEXT:    v_lshl_b64 v[13:14], v[9:10], v15
6091; GFX6-NEXT:    v_or_b32_e32 v11, v0, v11
6092; GFX6-NEXT:    v_or_b32_e32 v12, v1, v12
6093; GFX6-NEXT:    v_lshl_b64 v[0:1], v[9:10], v17
6094; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6095; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
6096; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
6097; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
6098; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
6099; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6100; GFX6-NEXT:    v_and_b32_e32 v14, 0x7f, v8
6101; GFX6-NEXT:    v_cndmask_b32_e32 v11, v0, v2, vcc
6102; GFX6-NEXT:    v_cndmask_b32_e32 v12, v1, v3, vcc
6103; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v14
6104; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], v14
6105; GFX6-NEXT:    v_lshl_b64 v[2:3], v[6:7], v2
6106; GFX6-NEXT:    v_add_i32_e32 v15, vcc, v14, v16
6107; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6108; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6109; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
6110; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v14
6111; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6112; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6113; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6114; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6115; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6116; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6117; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6118; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6119; GFX6-NEXT:    v_or_b32_e32 v0, v10, v0
6120; GFX6-NEXT:    v_or_b32_e32 v1, v13, v1
6121; GFX6-NEXT:    v_or_b32_e32 v2, v11, v2
6122; GFX6-NEXT:    v_or_b32_e32 v3, v12, v3
6123; GFX6-NEXT:    s_setpc_b64 s[30:31]
6124;
6125; GFX8-LABEL: v_fshr_i128:
6126; GFX8:       ; %bb.0:
6127; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6128; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6129; GFX8-NEXT:    v_lshlrev_b64 v[9:10], 1, v[0:1]
6130; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6131; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
6132; GFX8-NEXT:    v_not_b32_e32 v0, v8
6133; GFX8-NEXT:    v_and_b32_e32 v15, 0x7f, v0
6134; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v15
6135; GFX8-NEXT:    v_not_b32_e32 v16, 63
6136; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[9:10]
6137; GFX8-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
6138; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v15, v16
6139; GFX8-NEXT:    v_lshlrev_b64 v[13:14], v15, v[9:10]
6140; GFX8-NEXT:    v_or_b32_e32 v11, v0, v11
6141; GFX8-NEXT:    v_or_b32_e32 v12, v1, v12
6142; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v17, v[9:10]
6143; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6144; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
6145; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
6146; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
6147; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
6148; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6149; GFX8-NEXT:    v_and_b32_e32 v14, 0x7f, v8
6150; GFX8-NEXT:    v_cndmask_b32_e32 v11, v0, v2, vcc
6151; GFX8-NEXT:    v_cndmask_b32_e32 v12, v1, v3, vcc
6152; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v14
6153; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
6154; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
6155; GFX8-NEXT:    v_add_u32_e32 v15, vcc, v14, v16
6156; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6157; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6158; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
6159; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
6160; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6161; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6162; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6163; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6164; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6165; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6166; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6167; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6168; GFX8-NEXT:    v_or_b32_e32 v0, v10, v0
6169; GFX8-NEXT:    v_or_b32_e32 v1, v13, v1
6170; GFX8-NEXT:    v_or_b32_e32 v2, v11, v2
6171; GFX8-NEXT:    v_or_b32_e32 v3, v12, v3
6172; GFX8-NEXT:    s_setpc_b64 s[30:31]
6173;
6174; GFX9-LABEL: v_fshr_i128:
6175; GFX9:       ; %bb.0:
6176; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6177; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6178; GFX9-NEXT:    v_lshlrev_b64 v[9:10], 1, v[0:1]
6179; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6180; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
6181; GFX9-NEXT:    v_not_b32_e32 v0, v8
6182; GFX9-NEXT:    v_and_b32_e32 v15, 0x7f, v0
6183; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v15
6184; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[9:10]
6185; GFX9-NEXT:    v_lshlrev_b64 v[11:12], v15, v[2:3]
6186; GFX9-NEXT:    v_add_u32_e32 v16, 0xffffffc0, v15
6187; GFX9-NEXT:    v_lshlrev_b64 v[13:14], v15, v[9:10]
6188; GFX9-NEXT:    v_or_b32_e32 v11, v0, v11
6189; GFX9-NEXT:    v_or_b32_e32 v12, v1, v12
6190; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[9:10]
6191; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
6192; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v13, vcc
6193; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v14, vcc
6194; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v11, vcc
6195; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v12, vcc
6196; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
6197; GFX9-NEXT:    v_and_b32_e32 v14, 0x7f, v8
6198; GFX9-NEXT:    v_cndmask_b32_e32 v11, v0, v2, vcc
6199; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v14
6200; GFX9-NEXT:    v_cndmask_b32_e32 v12, v1, v3, vcc
6201; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
6202; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
6203; GFX9-NEXT:    v_add_u32_e32 v15, 0xffffffc0, v14
6204; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6205; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6206; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
6207; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
6208; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
6209; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6210; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6211; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
6212; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6213; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6214; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6215; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6216; GFX9-NEXT:    v_or_b32_e32 v0, v10, v0
6217; GFX9-NEXT:    v_or_b32_e32 v1, v13, v1
6218; GFX9-NEXT:    v_or_b32_e32 v2, v11, v2
6219; GFX9-NEXT:    v_or_b32_e32 v3, v12, v3
6220; GFX9-NEXT:    s_setpc_b64 s[30:31]
6221;
6222; GFX10-LABEL: v_fshr_i128:
6223; GFX10:       ; %bb.0:
6224; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6225; GFX10-NEXT:    v_not_b32_e32 v9, v8
6226; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6227; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
6228; GFX10-NEXT:    v_and_b32_e32 v19, 0x7f, v8
6229; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6230; GFX10-NEXT:    v_and_b32_e32 v18, 0x7f, v9
6231; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
6232; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6233; GFX10-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v19
6234; GFX10-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
6235; GFX10-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
6236; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6237; GFX10-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
6238; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
6239; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
6240; GFX10-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
6241; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6242; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6243; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
6244; GFX10-NEXT:    v_or_b32_e32 v12, v12, v16
6245; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
6246; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
6247; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
6248; GFX10-NEXT:    v_or_b32_e32 v13, v13, v17
6249; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v19
6250; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6251; GFX10-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6252; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
6253; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s4
6254; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v18
6255; GFX10-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s4
6256; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6257; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6258; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s5
6259; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s6
6260; GFX10-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s6
6261; GFX10-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s5
6262; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s4
6263; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s4
6264; GFX10-NEXT:    v_or_b32_e32 v0, v14, v4
6265; GFX10-NEXT:    v_or_b32_e32 v1, v7, v5
6266; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
6267; GFX10-NEXT:    v_or_b32_e32 v3, v3, v8
6268; GFX10-NEXT:    s_setpc_b64 s[30:31]
6269;
6270; GFX11-LABEL: v_fshr_i128:
6271; GFX11:       ; %bb.0:
6272; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6273; GFX11-NEXT:    v_not_b32_e32 v9, v8
6274; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6275; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
6276; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6277; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
6278; GFX11-NEXT:    v_and_b32_e32 v18, 0x7f, v9
6279; GFX11-NEXT:    v_or_b32_e32 v2, v2, v10
6280; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
6281; GFX11-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
6282; GFX11-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
6283; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
6284; GFX11-NEXT:    v_and_b32_e32 v19, 0x7f, v8
6285; GFX11-NEXT:    v_add_nc_u32_e32 v20, 0xffffffc0, v18
6286; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
6287; GFX11-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
6288; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6289; GFX11-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
6290; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v20, v[0:1]
6291; GFX11-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
6292; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v19
6293; GFX11-NEXT:    v_or_b32_e32 v10, v10, v8
6294; GFX11-NEXT:    v_add_nc_u32_e32 v21, 0xffffffc0, v19
6295; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
6296; GFX11-NEXT:    v_or_b32_e32 v11, v11, v9
6297; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v19
6298; GFX11-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6299; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v21, v[6:7]
6300; GFX11-NEXT:    v_or_b32_e32 v12, v12, v16
6301; GFX11-NEXT:    v_or_b32_e32 v13, v13, v17
6302; GFX11-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6303; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
6304; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v18
6305; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s0
6306; GFX11-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s0
6307; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6308; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
6309; GFX11-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s2
6310; GFX11-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s2
6311; GFX11-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s1
6312; GFX11-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s1
6313; GFX11-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s0
6314; GFX11-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s0
6315; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6316; GFX11-NEXT:    v_or_b32_e32 v0, v14, v4
6317; GFX11-NEXT:    v_or_b32_e32 v1, v7, v5
6318; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6319; GFX11-NEXT:    v_or_b32_e32 v2, v2, v6
6320; GFX11-NEXT:    v_or_b32_e32 v3, v3, v8
6321; GFX11-NEXT:    s_setpc_b64 s[30:31]
6322  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6323  ret i128 %result
6324}
6325
6326define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6327; GFX6-LABEL: v_fshr_i128_ssv:
6328; GFX6:       ; %bb.0:
6329; GFX6-NEXT:    v_not_b32_e32 v1, v0
6330; GFX6-NEXT:    s_lshl_b64 s[8:9], s[0:1], 1
6331; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6332; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
6333; GFX6-NEXT:    s_mov_b32 s1, 0
6334; GFX6-NEXT:    v_and_b32_e32 v7, 0x7f, v1
6335; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6336; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 64, v7
6337; GFX6-NEXT:    v_not_b32_e32 v8, 63
6338; GFX6-NEXT:    v_lshr_b64 v[1:2], s[8:9], v1
6339; GFX6-NEXT:    v_lshl_b64 v[3:4], s[0:1], v7
6340; GFX6-NEXT:    v_add_i32_e32 v9, vcc, v7, v8
6341; GFX6-NEXT:    v_lshl_b64 v[5:6], s[8:9], v7
6342; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6343; GFX6-NEXT:    v_or_b32_e32 v4, v2, v4
6344; GFX6-NEXT:    v_lshl_b64 v[1:2], s[8:9], v9
6345; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6346; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6347; GFX6-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6348; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6349; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6350; GFX6-NEXT:    v_mov_b32_e32 v3, s0
6351; GFX6-NEXT:    v_mov_b32_e32 v4, s1
6352; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6353; GFX6-NEXT:    v_and_b32_e32 v11, 0x7f, v0
6354; GFX6-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6355; GFX6-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
6356; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v11
6357; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v11
6358; GFX6-NEXT:    v_lshl_b64 v[2:3], s[6:7], v2
6359; GFX6-NEXT:    v_add_i32_e32 v8, vcc, v11, v8
6360; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
6361; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
6362; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v8
6363; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v11
6364; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
6365; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6366; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6367; GFX6-NEXT:    v_mov_b32_e32 v2, s4
6368; GFX6-NEXT:    v_mov_b32_e32 v3, s5
6369; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
6370; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6371; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6372; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6373; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6374; GFX6-NEXT:    v_or_b32_e32 v0, v9, v0
6375; GFX6-NEXT:    v_or_b32_e32 v1, v6, v1
6376; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
6377; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
6378; GFX6-NEXT:    ; return to shader part epilog
6379;
6380; GFX8-LABEL: v_fshr_i128_ssv:
6381; GFX8:       ; %bb.0:
6382; GFX8-NEXT:    v_not_b32_e32 v1, v0
6383; GFX8-NEXT:    s_lshl_b64 s[8:9], s[0:1], 1
6384; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6385; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
6386; GFX8-NEXT:    s_mov_b32 s1, 0
6387; GFX8-NEXT:    v_and_b32_e32 v7, 0x7f, v1
6388; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6389; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 64, v7
6390; GFX8-NEXT:    v_not_b32_e32 v8, 63
6391; GFX8-NEXT:    v_lshrrev_b64 v[1:2], v1, s[8:9]
6392; GFX8-NEXT:    v_lshlrev_b64 v[3:4], v7, s[0:1]
6393; GFX8-NEXT:    v_add_u32_e32 v9, vcc, v7, v8
6394; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v7, s[8:9]
6395; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6396; GFX8-NEXT:    v_or_b32_e32 v4, v2, v4
6397; GFX8-NEXT:    v_lshlrev_b64 v[1:2], v9, s[8:9]
6398; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6399; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
6400; GFX8-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6401; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6402; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6403; GFX8-NEXT:    v_mov_b32_e32 v3, s0
6404; GFX8-NEXT:    v_mov_b32_e32 v4, s1
6405; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6406; GFX8-NEXT:    v_and_b32_e32 v11, 0x7f, v0
6407; GFX8-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6408; GFX8-NEXT:    v_cndmask_b32_e32 v10, v2, v4, vcc
6409; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v11
6410; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[4:5]
6411; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
6412; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v11, v8
6413; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
6414; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
6415; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v8, s[6:7]
6416; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v11, s[6:7]
6417; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v11
6418; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6419; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6420; GFX8-NEXT:    v_mov_b32_e32 v2, s4
6421; GFX8-NEXT:    v_mov_b32_e32 v3, s5
6422; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
6423; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6424; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6425; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6426; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6427; GFX8-NEXT:    v_or_b32_e32 v0, v9, v0
6428; GFX8-NEXT:    v_or_b32_e32 v1, v6, v1
6429; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
6430; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
6431; GFX8-NEXT:    ; return to shader part epilog
6432;
6433; GFX9-LABEL: v_fshr_i128_ssv:
6434; GFX9:       ; %bb.0:
6435; GFX9-NEXT:    v_not_b32_e32 v1, v0
6436; GFX9-NEXT:    s_lshl_b64 s[8:9], s[0:1], 1
6437; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6438; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
6439; GFX9-NEXT:    s_mov_b32 s1, 0
6440; GFX9-NEXT:    v_and_b32_e32 v7, 0x7f, v1
6441; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6442; GFX9-NEXT:    v_sub_u32_e32 v1, 64, v7
6443; GFX9-NEXT:    v_lshrrev_b64 v[1:2], v1, s[8:9]
6444; GFX9-NEXT:    v_lshlrev_b64 v[3:4], v7, s[0:1]
6445; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v7
6446; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v7, s[8:9]
6447; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6448; GFX9-NEXT:    v_or_b32_e32 v4, v2, v4
6449; GFX9-NEXT:    v_lshlrev_b64 v[1:2], v8, s[8:9]
6450; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
6451; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v5, vcc
6452; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc
6453; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6454; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6455; GFX9-NEXT:    v_mov_b32_e32 v4, s1
6456; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
6457; GFX9-NEXT:    v_and_b32_e32 v10, 0x7f, v0
6458; GFX9-NEXT:    v_mov_b32_e32 v3, s0
6459; GFX9-NEXT:    v_cndmask_b32_e32 v9, v2, v4, vcc
6460; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v10
6461; GFX9-NEXT:    v_cndmask_b32_e32 v7, v1, v3, vcc
6462; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v10, s[4:5]
6463; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
6464; GFX9-NEXT:    v_add_u32_e32 v11, 0xffffffc0, v10
6465; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
6466; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
6467; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
6468; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, s[6:7]
6469; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
6470; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
6471; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
6472; GFX9-NEXT:    v_mov_b32_e32 v2, s4
6473; GFX9-NEXT:    v_mov_b32_e32 v3, s5
6474; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v10
6475; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6476; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6477; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
6478; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
6479; GFX9-NEXT:    v_or_b32_e32 v0, v8, v0
6480; GFX9-NEXT:    v_or_b32_e32 v1, v6, v1
6481; GFX9-NEXT:    v_or_b32_e32 v2, v7, v2
6482; GFX9-NEXT:    v_or_b32_e32 v3, v9, v3
6483; GFX9-NEXT:    ; return to shader part epilog
6484;
6485; GFX10-LABEL: v_fshr_i128_ssv:
6486; GFX10:       ; %bb.0:
6487; GFX10-NEXT:    v_not_b32_e32 v1, v0
6488; GFX10-NEXT:    v_and_b32_e32 v13, 0x7f, v0
6489; GFX10-NEXT:    s_mov_b32 s9, 0
6490; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6491; GFX10-NEXT:    s_lshr_b32 s8, s1, 31
6492; GFX10-NEXT:    v_and_b32_e32 v12, 0x7f, v1
6493; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6494; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6495; GFX10-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
6496; GFX10-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
6497; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
6498; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
6499; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
6500; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
6501; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6502; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
6503; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6504; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6505; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
6506; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6507; GFX10-NEXT:    v_or_b32_e32 v4, v4, v8
6508; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
6509; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
6510; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
6511; GFX10-NEXT:    v_or_b32_e32 v5, v5, v9
6512; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6513; GFX10-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6514; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6515; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6516; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
6517; GFX10-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
6518; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
6519; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6520; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6521; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
6522; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
6523; GFX10-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
6524; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
6525; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6526; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6527; GFX10-NEXT:    v_or_b32_e32 v0, v6, v0
6528; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
6529; GFX10-NEXT:    v_or_b32_e32 v2, v5, v2
6530; GFX10-NEXT:    v_or_b32_e32 v3, v7, v3
6531; GFX10-NEXT:    ; return to shader part epilog
6532;
6533; GFX11-LABEL: v_fshr_i128_ssv:
6534; GFX11:       ; %bb.0:
6535; GFX11-NEXT:    v_not_b32_e32 v1, v0
6536; GFX11-NEXT:    s_lshr_b32 s8, s1, 31
6537; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6538; GFX11-NEXT:    s_mov_b32 s9, 0
6539; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6540; GFX11-NEXT:    v_and_b32_e32 v12, 0x7f, v1
6541; GFX11-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
6542; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6543; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
6544; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
6545; GFX11-NEXT:    v_and_b32_e32 v13, 0x7f, v0
6546; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
6547; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
6548; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v12
6549; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6550; GFX11-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
6551; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
6552; GFX11-NEXT:    v_add_nc_u32_e32 v14, 0xffffffc0, v13
6553; GFX11-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
6554; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
6555; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
6556; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
6557; GFX11-NEXT:    v_or_b32_e32 v2, v2, v0
6558; GFX11-NEXT:    v_or_b32_e32 v3, v3, v1
6559; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
6560; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
6561; GFX11-NEXT:    v_or_b32_e32 v4, v4, v8
6562; GFX11-NEXT:    v_or_b32_e32 v5, v5, v9
6563; GFX11-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6564; GFX11-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6565; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
6566; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
6567; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
6568; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
6569; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6571; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
6572; GFX11-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
6573; GFX11-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
6574; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
6575; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
6576; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
6577; GFX11-NEXT:    v_or_b32_e32 v0, v6, v0
6578; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6579; GFX11-NEXT:    v_or_b32_e32 v1, v4, v1
6580; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
6581; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
6582; GFX11-NEXT:    v_or_b32_e32 v3, v7, v3
6583; GFX11-NEXT:    ; return to shader part epilog
6584  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6585  %cast.result = bitcast i128 %result to <4 x float>
6586  ret <4 x float> %cast.result
6587}
6588
6589define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6590; GFX6-LABEL: v_fshr_i128_svs:
6591; GFX6:       ; %bb.0:
6592; GFX6-NEXT:    s_lshl_b64 s[6:7], s[0:1], 1
6593; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6594; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
6595; GFX6-NEXT:    s_mov_b32 s1, 0
6596; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6597; GFX6-NEXT:    s_andn2_b32 s2, 0x7f, s4
6598; GFX6-NEXT:    s_not_b32 s5, s4
6599; GFX6-NEXT:    s_sub_i32 s12, s2, 64
6600; GFX6-NEXT:    s_sub_i32 s8, 64, s2
6601; GFX6-NEXT:    s_cmp_lt_u32 s2, 64
6602; GFX6-NEXT:    s_cselect_b32 s13, 1, 0
6603; GFX6-NEXT:    s_cmp_eq_u32 s2, 0
6604; GFX6-NEXT:    s_cselect_b32 s14, 1, 0
6605; GFX6-NEXT:    s_lshr_b64 s[8:9], s[6:7], s8
6606; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], s5
6607; GFX6-NEXT:    s_lshl_b64 s[2:3], s[6:7], s5
6608; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6609; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], s12
6610; GFX6-NEXT:    s_cmp_lg_u32 s13, 0
6611; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6612; GFX6-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
6613; GFX6-NEXT:    s_cmp_lg_u32 s14, 0
6614; GFX6-NEXT:    s_cselect_b64 s[6:7], s[0:1], s[6:7]
6615; GFX6-NEXT:    s_and_b32 s0, s4, 0x7f
6616; GFX6-NEXT:    s_sub_i32 s1, s0, 64
6617; GFX6-NEXT:    s_sub_i32 s4, 64, s0
6618; GFX6-NEXT:    s_cmp_lt_u32 s0, 64
6619; GFX6-NEXT:    s_cselect_b32 s5, 1, 0
6620; GFX6-NEXT:    s_cmp_eq_u32 s0, 0
6621; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s0
6622; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s4
6623; GFX6-NEXT:    s_cselect_b32 s8, 1, 0
6624; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], s0
6625; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], s1
6626; GFX6-NEXT:    s_and_b32 s0, 1, s5
6627; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
6628; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
6629; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6630; GFX6-NEXT:    s_and_b32 s0, 1, s8
6631; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6632; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6633; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6634; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6635; GFX6-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6636; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6637; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6638; GFX6-NEXT:    v_or_b32_e32 v0, s2, v0
6639; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
6640; GFX6-NEXT:    v_or_b32_e32 v2, s6, v2
6641; GFX6-NEXT:    v_or_b32_e32 v3, s7, v3
6642; GFX6-NEXT:    ; return to shader part epilog
6643;
6644; GFX8-LABEL: v_fshr_i128_svs:
6645; GFX8:       ; %bb.0:
6646; GFX8-NEXT:    s_lshl_b64 s[6:7], s[0:1], 1
6647; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6648; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
6649; GFX8-NEXT:    s_mov_b32 s1, 0
6650; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6651; GFX8-NEXT:    s_andn2_b32 s2, 0x7f, s4
6652; GFX8-NEXT:    s_not_b32 s5, s4
6653; GFX8-NEXT:    s_sub_i32 s12, s2, 64
6654; GFX8-NEXT:    s_sub_i32 s8, 64, s2
6655; GFX8-NEXT:    s_cmp_lt_u32 s2, 64
6656; GFX8-NEXT:    s_cselect_b32 s13, 1, 0
6657; GFX8-NEXT:    s_cmp_eq_u32 s2, 0
6658; GFX8-NEXT:    s_cselect_b32 s14, 1, 0
6659; GFX8-NEXT:    s_lshr_b64 s[8:9], s[6:7], s8
6660; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], s5
6661; GFX8-NEXT:    s_lshl_b64 s[2:3], s[6:7], s5
6662; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6663; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], s12
6664; GFX8-NEXT:    s_cmp_lg_u32 s13, 0
6665; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6666; GFX8-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
6667; GFX8-NEXT:    s_cmp_lg_u32 s14, 0
6668; GFX8-NEXT:    s_cselect_b64 s[6:7], s[0:1], s[6:7]
6669; GFX8-NEXT:    s_and_b32 s0, s4, 0x7f
6670; GFX8-NEXT:    s_sub_i32 s1, s0, 64
6671; GFX8-NEXT:    s_sub_i32 s4, 64, s0
6672; GFX8-NEXT:    s_cmp_lt_u32 s0, 64
6673; GFX8-NEXT:    s_cselect_b32 s5, 1, 0
6674; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
6675; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6676; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6677; GFX8-NEXT:    s_cselect_b32 s8, 1, 0
6678; GFX8-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6679; GFX8-NEXT:    v_lshrrev_b64 v[2:3], s1, v[2:3]
6680; GFX8-NEXT:    s_and_b32 s0, 1, s5
6681; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
6682; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
6683; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6684; GFX8-NEXT:    s_and_b32 s0, 1, s8
6685; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6686; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6687; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6688; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6689; GFX8-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6690; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6691; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6692; GFX8-NEXT:    v_or_b32_e32 v0, s2, v0
6693; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
6694; GFX8-NEXT:    v_or_b32_e32 v2, s6, v2
6695; GFX8-NEXT:    v_or_b32_e32 v3, s7, v3
6696; GFX8-NEXT:    ; return to shader part epilog
6697;
6698; GFX9-LABEL: v_fshr_i128_svs:
6699; GFX9:       ; %bb.0:
6700; GFX9-NEXT:    s_lshl_b64 s[6:7], s[0:1], 1
6701; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6702; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
6703; GFX9-NEXT:    s_mov_b32 s1, 0
6704; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6705; GFX9-NEXT:    s_andn2_b32 s2, 0x7f, s4
6706; GFX9-NEXT:    s_not_b32 s5, s4
6707; GFX9-NEXT:    s_sub_i32 s12, s2, 64
6708; GFX9-NEXT:    s_sub_i32 s8, 64, s2
6709; GFX9-NEXT:    s_cmp_lt_u32 s2, 64
6710; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
6711; GFX9-NEXT:    s_cmp_eq_u32 s2, 0
6712; GFX9-NEXT:    s_cselect_b32 s14, 1, 0
6713; GFX9-NEXT:    s_lshr_b64 s[8:9], s[6:7], s8
6714; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], s5
6715; GFX9-NEXT:    s_lshl_b64 s[2:3], s[6:7], s5
6716; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
6717; GFX9-NEXT:    s_lshl_b64 s[6:7], s[6:7], s12
6718; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
6719; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6720; GFX9-NEXT:    s_cselect_b64 s[6:7], s[8:9], s[6:7]
6721; GFX9-NEXT:    s_cmp_lg_u32 s14, 0
6722; GFX9-NEXT:    s_cselect_b64 s[6:7], s[0:1], s[6:7]
6723; GFX9-NEXT:    s_and_b32 s0, s4, 0x7f
6724; GFX9-NEXT:    s_sub_i32 s1, s0, 64
6725; GFX9-NEXT:    s_sub_i32 s4, 64, s0
6726; GFX9-NEXT:    s_cmp_lt_u32 s0, 64
6727; GFX9-NEXT:    s_cselect_b32 s5, 1, 0
6728; GFX9-NEXT:    s_cmp_eq_u32 s0, 0
6729; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6730; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
6731; GFX9-NEXT:    s_cselect_b32 s8, 1, 0
6732; GFX9-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
6733; GFX9-NEXT:    v_lshrrev_b64 v[2:3], s1, v[2:3]
6734; GFX9-NEXT:    s_and_b32 s0, 1, s5
6735; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
6736; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
6737; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
6738; GFX9-NEXT:    s_and_b32 s0, 1, s8
6739; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
6740; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
6741; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
6742; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6743; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6744; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
6745; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
6746; GFX9-NEXT:    v_or_b32_e32 v0, s2, v0
6747; GFX9-NEXT:    v_or_b32_e32 v1, s3, v1
6748; GFX9-NEXT:    v_or_b32_e32 v2, s6, v2
6749; GFX9-NEXT:    v_or_b32_e32 v3, s7, v3
6750; GFX9-NEXT:    ; return to shader part epilog
6751;
6752; GFX10-LABEL: v_fshr_i128_svs:
6753; GFX10:       ; %bb.0:
6754; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6755; GFX10-NEXT:    s_lshr_b32 s6, s1, 31
6756; GFX10-NEXT:    s_mov_b32 s7, 0
6757; GFX10-NEXT:    s_andn2_b32 s5, 0x7f, s4
6758; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6759; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
6760; GFX10-NEXT:    s_not_b32 s10, s4
6761; GFX10-NEXT:    s_sub_i32 s12, s5, 64
6762; GFX10-NEXT:    s_sub_i32 s6, 64, s5
6763; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
6764; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
6765; GFX10-NEXT:    s_cmp_eq_u32 s5, 0
6766; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
6767; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6768; GFX10-NEXT:    s_lshl_b64 s[8:9], s[2:3], s10
6769; GFX10-NEXT:    s_lshl_b64 s[10:11], s[0:1], s10
6770; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6771; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6772; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
6773; GFX10-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
6774; GFX10-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6775; GFX10-NEXT:    s_cmp_lg_u32 s5, 0
6776; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6777; GFX10-NEXT:    s_and_b32 s0, s4, 0x7f
6778; GFX10-NEXT:    s_sub_i32 s1, 64, s0
6779; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6780; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s1, v[2:3]
6781; GFX10-NEXT:    s_sub_i32 s1, s0, 64
6782; GFX10-NEXT:    s_cmp_lt_u32 s0, 64
6783; GFX10-NEXT:    v_lshrrev_b64 v[8:9], s1, v[2:3]
6784; GFX10-NEXT:    s_cselect_b32 s4, 1, 0
6785; GFX10-NEXT:    s_cmp_eq_u32 s0, 0
6786; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
6787; GFX10-NEXT:    s_cselect_b32 s5, 1, 0
6788; GFX10-NEXT:    s_and_b32 s1, 1, s4
6789; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
6790; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
6791; GFX10-NEXT:    v_lshrrev_b64 v[2:3], s0, v[2:3]
6792; GFX10-NEXT:    s_and_b32 s0, 1, s5
6793; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
6794; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6795; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6796; GFX10-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc_lo
6797; GFX10-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc_lo
6798; GFX10-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s0
6799; GFX10-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
6800; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
6801; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
6802; GFX10-NEXT:    v_or_b32_e32 v0, s8, v0
6803; GFX10-NEXT:    v_or_b32_e32 v1, s9, v1
6804; GFX10-NEXT:    ; return to shader part epilog
6805;
6806; GFX11-LABEL: v_fshr_i128_svs:
6807; GFX11:       ; %bb.0:
6808; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6809; GFX11-NEXT:    s_lshr_b32 s6, s1, 31
6810; GFX11-NEXT:    s_mov_b32 s7, 0
6811; GFX11-NEXT:    s_and_not1_b32 s5, 0x7f, s4
6812; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6813; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
6814; GFX11-NEXT:    s_not_b32 s10, s4
6815; GFX11-NEXT:    s_sub_i32 s12, s5, 64
6816; GFX11-NEXT:    s_sub_i32 s6, 64, s5
6817; GFX11-NEXT:    s_cmp_lt_u32 s5, 64
6818; GFX11-NEXT:    s_cselect_b32 s13, 1, 0
6819; GFX11-NEXT:    s_cmp_eq_u32 s5, 0
6820; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
6821; GFX11-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
6822; GFX11-NEXT:    s_lshl_b64 s[8:9], s[2:3], s10
6823; GFX11-NEXT:    s_lshl_b64 s[10:11], s[0:1], s10
6824; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6825; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
6826; GFX11-NEXT:    s_cmp_lg_u32 s13, 0
6827; GFX11-NEXT:    s_cselect_b64 s[8:9], s[10:11], 0
6828; GFX11-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
6829; GFX11-NEXT:    s_cmp_lg_u32 s5, 0
6830; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6831; GFX11-NEXT:    s_and_b32 s0, s4, 0x7f
6832; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6833; GFX11-NEXT:    s_sub_i32 s1, 64, s0
6834; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s0, v[0:1]
6835; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s1, v[2:3]
6836; GFX11-NEXT:    s_sub_i32 s1, s0, 64
6837; GFX11-NEXT:    s_cmp_lt_u32 s0, 64
6838; GFX11-NEXT:    v_lshrrev_b64 v[8:9], s1, v[2:3]
6839; GFX11-NEXT:    s_cselect_b32 s4, 1, 0
6840; GFX11-NEXT:    s_cmp_eq_u32 s0, 0
6841; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
6842; GFX11-NEXT:    s_cselect_b32 s5, 1, 0
6843; GFX11-NEXT:    s_and_b32 s1, 1, s4
6844; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
6845; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
6846; GFX11-NEXT:    v_lshrrev_b64 v[2:3], s0, v[2:3]
6847; GFX11-NEXT:    s_and_b32 s0, 1, s5
6848; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6849; GFX11-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
6850; GFX11-NEXT:    v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6851; GFX11-NEXT:    v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3
6852; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
6853; GFX11-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s0
6854; GFX11-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s0
6855; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
6856; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
6857; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
6858; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6859; GFX11-NEXT:    v_or_b32_e32 v0, s8, v0
6860; GFX11-NEXT:    v_or_b32_e32 v1, s9, v1
6861; GFX11-NEXT:    ; return to shader part epilog
6862  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6863  %cast.result = bitcast i128 %result to <4 x float>
6864  ret <4 x float> %cast.result
6865}
6866
6867define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6868; GFX6-LABEL: v_fshr_i128_vss:
6869; GFX6:       ; %bb.0:
6870; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
6871; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], 1
6872; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6873; GFX6-NEXT:    s_andn2_b32 s5, 0x7f, s4
6874; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
6875; GFX6-NEXT:    s_sub_i32 s6, s5, 64
6876; GFX6-NEXT:    s_sub_i32 s7, 64, s5
6877; GFX6-NEXT:    s_cmp_lt_u32 s5, 64
6878; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], s7
6879; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s5
6880; GFX6-NEXT:    s_cselect_b32 s8, 1, 0
6881; GFX6-NEXT:    s_cmp_eq_u32 s5, 0
6882; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
6883; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], s5
6884; GFX6-NEXT:    v_or_b32_e32 v6, v0, v6
6885; GFX6-NEXT:    v_or_b32_e32 v7, v1, v7
6886; GFX6-NEXT:    v_lshl_b64 v[0:1], v[4:5], s6
6887; GFX6-NEXT:    s_and_b32 s5, 1, s8
6888; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6889; GFX6-NEXT:    s_and_b32 s5, 1, s9
6890; GFX6-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6891; GFX6-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6892; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6893; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
6894; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6895; GFX6-NEXT:    s_and_b32 s5, s4, 0x7f
6896; GFX6-NEXT:    s_sub_i32 s10, s5, 64
6897; GFX6-NEXT:    s_sub_i32 s8, 64, s5
6898; GFX6-NEXT:    s_cmp_lt_u32 s5, 64
6899; GFX6-NEXT:    s_cselect_b32 s11, 1, 0
6900; GFX6-NEXT:    s_cmp_eq_u32 s5, 0
6901; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
6902; GFX6-NEXT:    s_lshr_b64 s[6:7], s[2:3], s4
6903; GFX6-NEXT:    s_lshr_b64 s[4:5], s[0:1], s4
6904; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
6905; GFX6-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
6906; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6907; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6908; GFX6-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
6909; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
6910; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6911; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
6912; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6913; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6914; GFX6-NEXT:    s_cselect_b64 s[2:3], s[6:7], 0
6915; GFX6-NEXT:    v_or_b32_e32 v0, s0, v4
6916; GFX6-NEXT:    v_or_b32_e32 v1, s1, v5
6917; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
6918; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
6919; GFX6-NEXT:    ; return to shader part epilog
6920;
6921; GFX8-LABEL: v_fshr_i128_vss:
6922; GFX8:       ; %bb.0:
6923; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6924; GFX8-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
6925; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6926; GFX8-NEXT:    s_andn2_b32 s5, 0x7f, s4
6927; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
6928; GFX8-NEXT:    s_sub_i32 s6, s5, 64
6929; GFX8-NEXT:    s_sub_i32 s7, 64, s5
6930; GFX8-NEXT:    s_cmp_lt_u32 s5, 64
6931; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s7, v[4:5]
6932; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6933; GFX8-NEXT:    s_cselect_b32 s8, 1, 0
6934; GFX8-NEXT:    s_cmp_eq_u32 s5, 0
6935; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
6936; GFX8-NEXT:    v_lshlrev_b64 v[8:9], s5, v[4:5]
6937; GFX8-NEXT:    v_or_b32_e32 v6, v0, v6
6938; GFX8-NEXT:    v_or_b32_e32 v7, v1, v7
6939; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s6, v[4:5]
6940; GFX8-NEXT:    s_and_b32 s5, 1, s8
6941; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6942; GFX8-NEXT:    s_and_b32 s5, 1, s9
6943; GFX8-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6944; GFX8-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6945; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6946; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
6947; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6948; GFX8-NEXT:    s_and_b32 s5, s4, 0x7f
6949; GFX8-NEXT:    s_sub_i32 s10, s5, 64
6950; GFX8-NEXT:    s_sub_i32 s8, 64, s5
6951; GFX8-NEXT:    s_cmp_lt_u32 s5, 64
6952; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
6953; GFX8-NEXT:    s_cmp_eq_u32 s5, 0
6954; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
6955; GFX8-NEXT:    s_lshr_b64 s[6:7], s[2:3], s4
6956; GFX8-NEXT:    s_lshr_b64 s[4:5], s[0:1], s4
6957; GFX8-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
6958; GFX8-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
6959; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
6960; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6961; GFX8-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
6962; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
6963; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
6964; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
6965; GFX8-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6966; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
6967; GFX8-NEXT:    s_cselect_b64 s[2:3], s[6:7], 0
6968; GFX8-NEXT:    v_or_b32_e32 v0, s0, v4
6969; GFX8-NEXT:    v_or_b32_e32 v1, s1, v5
6970; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
6971; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
6972; GFX8-NEXT:    ; return to shader part epilog
6973;
6974; GFX9-LABEL: v_fshr_i128_vss:
6975; GFX9:       ; %bb.0:
6976; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6977; GFX9-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
6978; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
6979; GFX9-NEXT:    s_andn2_b32 s5, 0x7f, s4
6980; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
6981; GFX9-NEXT:    s_sub_i32 s6, s5, 64
6982; GFX9-NEXT:    s_sub_i32 s7, 64, s5
6983; GFX9-NEXT:    s_cmp_lt_u32 s5, 64
6984; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s7, v[4:5]
6985; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
6986; GFX9-NEXT:    s_cselect_b32 s8, 1, 0
6987; GFX9-NEXT:    s_cmp_eq_u32 s5, 0
6988; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
6989; GFX9-NEXT:    v_lshlrev_b64 v[8:9], s5, v[4:5]
6990; GFX9-NEXT:    v_or_b32_e32 v6, v0, v6
6991; GFX9-NEXT:    v_or_b32_e32 v7, v1, v7
6992; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s6, v[4:5]
6993; GFX9-NEXT:    s_and_b32 s5, 1, s8
6994; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
6995; GFX9-NEXT:    s_and_b32 s5, 1, s9
6996; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
6997; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
6998; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
6999; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
7000; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s5
7001; GFX9-NEXT:    s_and_b32 s5, s4, 0x7f
7002; GFX9-NEXT:    s_sub_i32 s10, s5, 64
7003; GFX9-NEXT:    s_sub_i32 s8, 64, s5
7004; GFX9-NEXT:    s_cmp_lt_u32 s5, 64
7005; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
7006; GFX9-NEXT:    s_cmp_eq_u32 s5, 0
7007; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
7008; GFX9-NEXT:    s_lshr_b64 s[6:7], s[2:3], s4
7009; GFX9-NEXT:    s_lshr_b64 s[4:5], s[0:1], s4
7010; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
7011; GFX9-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
7012; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7013; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
7014; GFX9-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
7015; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
7016; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7017; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
7018; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
7019; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
7020; GFX9-NEXT:    s_cselect_b64 s[2:3], s[6:7], 0
7021; GFX9-NEXT:    v_or_b32_e32 v0, s0, v4
7022; GFX9-NEXT:    v_or_b32_e32 v1, s1, v5
7023; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
7024; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
7025; GFX9-NEXT:    ; return to shader part epilog
7026;
7027; GFX10-LABEL: v_fshr_i128_vss:
7028; GFX10:       ; %bb.0:
7029; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7030; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
7031; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7032; GFX10-NEXT:    s_andn2_b32 s5, 0x7f, s4
7033; GFX10-NEXT:    s_sub_i32 s6, s5, 64
7034; GFX10-NEXT:    v_or_b32_e32 v2, v2, v4
7035; GFX10-NEXT:    s_sub_i32 s7, 64, s5
7036; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
7037; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s7, v[0:1]
7038; GFX10-NEXT:    s_cselect_b32 s8, 1, 0
7039; GFX10-NEXT:    s_cmp_eq_u32 s5, 0
7040; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
7041; GFX10-NEXT:    s_cselect_b32 s9, 1, 0
7042; GFX10-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
7043; GFX10-NEXT:    s_and_b32 s5, 1, s8
7044; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s6, v[0:1]
7045; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7046; GFX10-NEXT:    s_and_b32 s5, s4, 0x7f
7047; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
7048; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
7049; GFX10-NEXT:    s_and_b32 s6, 1, s9
7050; GFX10-NEXT:    s_sub_i32 s10, s5, 64
7051; GFX10-NEXT:    s_sub_i32 s8, 64, s5
7052; GFX10-NEXT:    s_cmp_lt_u32 s5, 64
7053; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc_lo
7054; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
7055; GFX10-NEXT:    s_cmp_eq_u32 s5, 0
7056; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc_lo
7057; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
7058; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
7059; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s6
7060; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
7061; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s4
7062; GFX10-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
7063; GFX10-NEXT:    s_lshr_b64 s[4:5], s[2:3], s4
7064; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7065; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7066; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
7067; GFX10-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc_lo
7068; GFX10-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
7069; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
7070; GFX10-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
7071; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7072; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
7073; GFX10-NEXT:    v_or_b32_e32 v0, s0, v6
7074; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
7075; GFX10-NEXT:    v_or_b32_e32 v1, s1, v7
7076; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
7077; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
7078; GFX10-NEXT:    ; return to shader part epilog
7079;
7080; GFX11-LABEL: v_fshr_i128_vss:
7081; GFX11:       ; %bb.0:
7082; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7083; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
7084; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7085; GFX11-NEXT:    s_and_not1_b32 s5, 0x7f, s4
7086; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
7087; GFX11-NEXT:    s_sub_i32 s6, s5, 64
7088; GFX11-NEXT:    v_or_b32_e32 v2, v2, v4
7089; GFX11-NEXT:    s_sub_i32 s7, 64, s5
7090; GFX11-NEXT:    s_cmp_lt_u32 s5, 64
7091; GFX11-NEXT:    v_lshrrev_b64 v[4:5], s7, v[0:1]
7092; GFX11-NEXT:    s_cselect_b32 s8, 1, 0
7093; GFX11-NEXT:    s_cmp_eq_u32 s5, 0
7094; GFX11-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
7095; GFX11-NEXT:    s_cselect_b32 s9, 1, 0
7096; GFX11-NEXT:    v_lshlrev_b64 v[8:9], s5, v[0:1]
7097; GFX11-NEXT:    s_and_b32 s5, 1, s8
7098; GFX11-NEXT:    v_lshlrev_b64 v[0:1], s6, v[0:1]
7099; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s5
7100; GFX11-NEXT:    s_and_b32 s5, s4, 0x7f
7101; GFX11-NEXT:    v_or_b32_e32 v4, v4, v6
7102; GFX11-NEXT:    v_or_b32_e32 v5, v5, v7
7103; GFX11-NEXT:    s_and_b32 s6, 1, s9
7104; GFX11-NEXT:    s_sub_i32 s10, s5, 64
7105; GFX11-NEXT:    s_sub_i32 s8, 64, s5
7106; GFX11-NEXT:    s_cmp_lt_u32 s5, 64
7107; GFX11-NEXT:    v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7108; GFX11-NEXT:    s_cselect_b32 s11, 1, 0
7109; GFX11-NEXT:    s_cmp_eq_u32 s5, 0
7110; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7111; GFX11-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s6
7112; GFX11-NEXT:    s_cselect_b32 s12, 1, 0
7113; GFX11-NEXT:    s_lshr_b64 s[6:7], s[0:1], s4
7114; GFX11-NEXT:    s_lshl_b64 s[8:9], s[2:3], s8
7115; GFX11-NEXT:    s_lshr_b64 s[4:5], s[2:3], s4
7116; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7117; GFX11-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
7118; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7119; GFX11-NEXT:    v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7120; GFX11-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
7121; GFX11-NEXT:    s_cmp_lg_u32 s12, 0
7122; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
7123; GFX11-NEXT:    s_cmp_lg_u32 s11, 0
7124; GFX11-NEXT:    v_or_b32_e32 v0, s0, v6
7125; GFX11-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
7126; GFX11-NEXT:    v_or_b32_e32 v1, s1, v7
7127; GFX11-NEXT:    v_or_b32_e32 v2, s2, v2
7128; GFX11-NEXT:    v_or_b32_e32 v3, s3, v3
7129; GFX11-NEXT:    ; return to shader part epilog
7130  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
7131  %cast.result = bitcast i128 %result to <4 x float>
7132  ret <4 x float> %cast.result
7133}
7134
7135define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7136; GFX6-LABEL: s_fshr_i128_65:
7137; GFX6:       ; %bb.0:
7138; GFX6-NEXT:    s_mov_b32 s4, 0
7139; GFX6-NEXT:    s_lshl_b32 s5, s0, 31
7140; GFX6-NEXT:    s_lshl_b32 s3, s2, 31
7141; GFX6-NEXT:    s_mov_b32 s2, s4
7142; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7143; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7144; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7145; GFX6-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7146; GFX6-NEXT:    ; return to shader part epilog
7147;
7148; GFX8-LABEL: s_fshr_i128_65:
7149; GFX8:       ; %bb.0:
7150; GFX8-NEXT:    s_mov_b32 s4, 0
7151; GFX8-NEXT:    s_lshl_b32 s5, s0, 31
7152; GFX8-NEXT:    s_lshl_b32 s3, s2, 31
7153; GFX8-NEXT:    s_mov_b32 s2, s4
7154; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7155; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7156; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7157; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7158; GFX8-NEXT:    ; return to shader part epilog
7159;
7160; GFX9-LABEL: s_fshr_i128_65:
7161; GFX9:       ; %bb.0:
7162; GFX9-NEXT:    s_mov_b32 s4, 0
7163; GFX9-NEXT:    s_lshl_b32 s5, s0, 31
7164; GFX9-NEXT:    s_lshl_b32 s3, s2, 31
7165; GFX9-NEXT:    s_mov_b32 s2, s4
7166; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
7167; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
7168; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
7169; GFX9-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
7170; GFX9-NEXT:    ; return to shader part epilog
7171;
7172; GFX10-LABEL: s_fshr_i128_65:
7173; GFX10:       ; %bb.0:
7174; GFX10-NEXT:    s_mov_b32 s4, 0
7175; GFX10-NEXT:    s_lshl_b32 s5, s0, 31
7176; GFX10-NEXT:    s_lshl_b32 s3, s2, 31
7177; GFX10-NEXT:    s_mov_b32 s2, s4
7178; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
7179; GFX10-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
7180; GFX10-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
7181; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7182; GFX10-NEXT:    ; return to shader part epilog
7183;
7184; GFX11-LABEL: s_fshr_i128_65:
7185; GFX11:       ; %bb.0:
7186; GFX11-NEXT:    s_mov_b32 s4, 0
7187; GFX11-NEXT:    s_lshl_b32 s5, s0, 31
7188; GFX11-NEXT:    s_lshl_b32 s3, s2, 31
7189; GFX11-NEXT:    s_mov_b32 s2, s4
7190; GFX11-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
7191; GFX11-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
7192; GFX11-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
7193; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7194; GFX11-NEXT:    ; return to shader part epilog
7195  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7196  ret i128 %result
7197}
7198
7199define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
7200; GFX6-LABEL: v_fshr_i128_65:
7201; GFX6:       ; %bb.0:
7202; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7203; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
7204; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
7205; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 1
7206; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], 1
7207; GFX6-NEXT:    v_or_b32_e32 v3, v5, v3
7208; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
7209; GFX6-NEXT:    s_setpc_b64 s[30:31]
7210;
7211; GFX8-LABEL: v_fshr_i128_65:
7212; GFX8:       ; %bb.0:
7213; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7214; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
7215; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
7216; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7217; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
7218; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
7219; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
7220; GFX8-NEXT:    s_setpc_b64 s[30:31]
7221;
7222; GFX9-LABEL: v_fshr_i128_65:
7223; GFX9:       ; %bb.0:
7224; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7225; GFX9-NEXT:    v_mov_b32_e32 v8, v2
7226; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7227; GFX9-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
7228; GFX9-NEXT:    v_lshl_or_b32 v3, v8, 31, v3
7229; GFX9-NEXT:    v_lshl_or_b32 v1, v0, 31, v5
7230; GFX9-NEXT:    v_mov_b32_e32 v0, v4
7231; GFX9-NEXT:    s_setpc_b64 s[30:31]
7232;
7233; GFX10-LABEL: v_fshr_i128_65:
7234; GFX10:       ; %bb.0:
7235; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7236; GFX10-NEXT:    v_mov_b32_e32 v8, v2
7237; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
7238; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7239; GFX10-NEXT:    v_lshl_or_b32 v1, v0, 31, v5
7240; GFX10-NEXT:    v_lshl_or_b32 v3, v8, 31, v3
7241; GFX10-NEXT:    v_mov_b32_e32 v0, v4
7242; GFX10-NEXT:    s_setpc_b64 s[30:31]
7243;
7244; GFX11-LABEL: v_fshr_i128_65:
7245; GFX11:       ; %bb.0:
7246; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7247; GFX11-NEXT:    v_mov_b32_e32 v8, v2
7248; GFX11-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
7249; GFX11-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
7250; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7251; GFX11-NEXT:    v_lshl_or_b32 v1, v0, 31, v5
7252; GFX11-NEXT:    v_lshl_or_b32 v3, v8, 31, v3
7253; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
7254; GFX11-NEXT:    v_mov_b32_e32 v0, v4
7255; GFX11-NEXT:    s_setpc_b64 s[30:31]
7256  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7257  ret i128 %result
7258}
7259
7260define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7261; GFX6-LABEL: s_fshr_v2i128:
7262; GFX6:       ; %bb.0:
7263; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7264; GFX6-NEXT:    s_lshr_b32 s22, s1, 31
7265; GFX6-NEXT:    s_mov_b32 s23, 0
7266; GFX6-NEXT:    s_lshl_b64 s[18:19], s[0:1], 1
7267; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[22:23]
7268; GFX6-NEXT:    s_andn2_b32 s2, 0x7f, s16
7269; GFX6-NEXT:    s_not_b32 s17, s16
7270; GFX6-NEXT:    s_sub_i32 s21, s2, 64
7271; GFX6-NEXT:    s_sub_i32 s22, 64, s2
7272; GFX6-NEXT:    s_cmp_lt_u32 s2, 64
7273; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
7274; GFX6-NEXT:    s_cmp_eq_u32 s2, 0
7275; GFX6-NEXT:    s_cselect_b32 s29, 1, 0
7276; GFX6-NEXT:    s_lshr_b64 s[24:25], s[18:19], s22
7277; GFX6-NEXT:    s_lshl_b64 s[26:27], s[0:1], s17
7278; GFX6-NEXT:    s_lshl_b64 s[2:3], s[18:19], s17
7279; GFX6-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7280; GFX6-NEXT:    s_lshl_b64 s[18:19], s[18:19], s21
7281; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
7282; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7283; GFX6-NEXT:    s_cselect_b64 s[18:19], s[24:25], s[18:19]
7284; GFX6-NEXT:    s_cmp_lg_u32 s29, 0
7285; GFX6-NEXT:    s_cselect_b64 s[18:19], s[0:1], s[18:19]
7286; GFX6-NEXT:    s_and_b32 s0, s16, 0x7f
7287; GFX6-NEXT:    s_sub_i32 s21, s0, 64
7288; GFX6-NEXT:    s_sub_i32 s22, 64, s0
7289; GFX6-NEXT:    s_cmp_lt_u32 s0, 64
7290; GFX6-NEXT:    s_cselect_b32 s26, 1, 0
7291; GFX6-NEXT:    s_cmp_eq_u32 s0, 0
7292; GFX6-NEXT:    s_cselect_b32 s27, 1, 0
7293; GFX6-NEXT:    s_lshr_b64 s[0:1], s[10:11], s16
7294; GFX6-NEXT:    s_lshr_b64 s[16:17], s[8:9], s16
7295; GFX6-NEXT:    s_lshl_b64 s[24:25], s[10:11], s22
7296; GFX6-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7297; GFX6-NEXT:    s_lshr_b64 s[10:11], s[10:11], s21
7298; GFX6-NEXT:    s_cmp_lg_u32 s26, 0
7299; GFX6-NEXT:    s_cselect_b64 s[10:11], s[16:17], s[10:11]
7300; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
7301; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7302; GFX6-NEXT:    s_cmp_lg_u32 s26, 0
7303; GFX6-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7304; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7305; GFX6-NEXT:    s_lshr_b32 s22, s5, 31
7306; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7307; GFX6-NEXT:    s_lshl_b64 s[8:9], s[4:5], 1
7308; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[22:23]
7309; GFX6-NEXT:    s_andn2_b32 s6, 0x7f, s20
7310; GFX6-NEXT:    s_or_b64 s[2:3], s[18:19], s[10:11]
7311; GFX6-NEXT:    s_not_b32 s16, s20
7312; GFX6-NEXT:    s_sub_i32 s18, s6, 64
7313; GFX6-NEXT:    s_sub_i32 s10, 64, s6
7314; GFX6-NEXT:    s_cmp_lt_u32 s6, 64
7315; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
7316; GFX6-NEXT:    s_cmp_eq_u32 s6, 0
7317; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
7318; GFX6-NEXT:    s_lshl_b64 s[6:7], s[8:9], s16
7319; GFX6-NEXT:    s_lshr_b64 s[10:11], s[8:9], s10
7320; GFX6-NEXT:    s_lshl_b64 s[16:17], s[4:5], s16
7321; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7322; GFX6-NEXT:    s_lshl_b64 s[8:9], s[8:9], s18
7323; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7324; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7325; GFX6-NEXT:    s_cselect_b64 s[8:9], s[10:11], s[8:9]
7326; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
7327; GFX6-NEXT:    s_cselect_b64 s[8:9], s[4:5], s[8:9]
7328; GFX6-NEXT:    s_and_b32 s4, s20, 0x7f
7329; GFX6-NEXT:    s_sub_i32 s18, s4, 64
7330; GFX6-NEXT:    s_sub_i32 s16, 64, s4
7331; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
7332; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
7333; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
7334; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
7335; GFX6-NEXT:    s_lshr_b64 s[10:11], s[12:13], s20
7336; GFX6-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7337; GFX6-NEXT:    s_lshr_b64 s[4:5], s[14:15], s20
7338; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7339; GFX6-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7340; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7341; GFX6-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[14:15]
7342; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
7343; GFX6-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
7344; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
7345; GFX6-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7346; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[10:11]
7347; GFX6-NEXT:    s_or_b64 s[6:7], s[8:9], s[12:13]
7348; GFX6-NEXT:    ; return to shader part epilog
7349;
7350; GFX8-LABEL: s_fshr_v2i128:
7351; GFX8:       ; %bb.0:
7352; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7353; GFX8-NEXT:    s_lshr_b32 s22, s1, 31
7354; GFX8-NEXT:    s_mov_b32 s23, 0
7355; GFX8-NEXT:    s_lshl_b64 s[18:19], s[0:1], 1
7356; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[22:23]
7357; GFX8-NEXT:    s_andn2_b32 s2, 0x7f, s16
7358; GFX8-NEXT:    s_not_b32 s17, s16
7359; GFX8-NEXT:    s_sub_i32 s21, s2, 64
7360; GFX8-NEXT:    s_sub_i32 s22, 64, s2
7361; GFX8-NEXT:    s_cmp_lt_u32 s2, 64
7362; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
7363; GFX8-NEXT:    s_cmp_eq_u32 s2, 0
7364; GFX8-NEXT:    s_cselect_b32 s29, 1, 0
7365; GFX8-NEXT:    s_lshr_b64 s[24:25], s[18:19], s22
7366; GFX8-NEXT:    s_lshl_b64 s[26:27], s[0:1], s17
7367; GFX8-NEXT:    s_lshl_b64 s[2:3], s[18:19], s17
7368; GFX8-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7369; GFX8-NEXT:    s_lshl_b64 s[18:19], s[18:19], s21
7370; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
7371; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7372; GFX8-NEXT:    s_cselect_b64 s[18:19], s[24:25], s[18:19]
7373; GFX8-NEXT:    s_cmp_lg_u32 s29, 0
7374; GFX8-NEXT:    s_cselect_b64 s[18:19], s[0:1], s[18:19]
7375; GFX8-NEXT:    s_and_b32 s0, s16, 0x7f
7376; GFX8-NEXT:    s_sub_i32 s21, s0, 64
7377; GFX8-NEXT:    s_sub_i32 s22, 64, s0
7378; GFX8-NEXT:    s_cmp_lt_u32 s0, 64
7379; GFX8-NEXT:    s_cselect_b32 s26, 1, 0
7380; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
7381; GFX8-NEXT:    s_cselect_b32 s27, 1, 0
7382; GFX8-NEXT:    s_lshr_b64 s[0:1], s[10:11], s16
7383; GFX8-NEXT:    s_lshr_b64 s[16:17], s[8:9], s16
7384; GFX8-NEXT:    s_lshl_b64 s[24:25], s[10:11], s22
7385; GFX8-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7386; GFX8-NEXT:    s_lshr_b64 s[10:11], s[10:11], s21
7387; GFX8-NEXT:    s_cmp_lg_u32 s26, 0
7388; GFX8-NEXT:    s_cselect_b64 s[10:11], s[16:17], s[10:11]
7389; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
7390; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7391; GFX8-NEXT:    s_cmp_lg_u32 s26, 0
7392; GFX8-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7393; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7394; GFX8-NEXT:    s_lshr_b32 s22, s5, 31
7395; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7396; GFX8-NEXT:    s_lshl_b64 s[8:9], s[4:5], 1
7397; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[22:23]
7398; GFX8-NEXT:    s_andn2_b32 s6, 0x7f, s20
7399; GFX8-NEXT:    s_or_b64 s[2:3], s[18:19], s[10:11]
7400; GFX8-NEXT:    s_not_b32 s16, s20
7401; GFX8-NEXT:    s_sub_i32 s18, s6, 64
7402; GFX8-NEXT:    s_sub_i32 s10, 64, s6
7403; GFX8-NEXT:    s_cmp_lt_u32 s6, 64
7404; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
7405; GFX8-NEXT:    s_cmp_eq_u32 s6, 0
7406; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
7407; GFX8-NEXT:    s_lshl_b64 s[6:7], s[8:9], s16
7408; GFX8-NEXT:    s_lshr_b64 s[10:11], s[8:9], s10
7409; GFX8-NEXT:    s_lshl_b64 s[16:17], s[4:5], s16
7410; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7411; GFX8-NEXT:    s_lshl_b64 s[8:9], s[8:9], s18
7412; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7413; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7414; GFX8-NEXT:    s_cselect_b64 s[8:9], s[10:11], s[8:9]
7415; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
7416; GFX8-NEXT:    s_cselect_b64 s[8:9], s[4:5], s[8:9]
7417; GFX8-NEXT:    s_and_b32 s4, s20, 0x7f
7418; GFX8-NEXT:    s_sub_i32 s18, s4, 64
7419; GFX8-NEXT:    s_sub_i32 s16, 64, s4
7420; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
7421; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
7422; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
7423; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
7424; GFX8-NEXT:    s_lshr_b64 s[10:11], s[12:13], s20
7425; GFX8-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7426; GFX8-NEXT:    s_lshr_b64 s[4:5], s[14:15], s20
7427; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7428; GFX8-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7429; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7430; GFX8-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[14:15]
7431; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
7432; GFX8-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
7433; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
7434; GFX8-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7435; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[10:11]
7436; GFX8-NEXT:    s_or_b64 s[6:7], s[8:9], s[12:13]
7437; GFX8-NEXT:    ; return to shader part epilog
7438;
7439; GFX9-LABEL: s_fshr_v2i128:
7440; GFX9:       ; %bb.0:
7441; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7442; GFX9-NEXT:    s_lshr_b32 s22, s1, 31
7443; GFX9-NEXT:    s_mov_b32 s23, 0
7444; GFX9-NEXT:    s_lshl_b64 s[18:19], s[0:1], 1
7445; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[22:23]
7446; GFX9-NEXT:    s_andn2_b32 s2, 0x7f, s16
7447; GFX9-NEXT:    s_not_b32 s17, s16
7448; GFX9-NEXT:    s_sub_i32 s21, s2, 64
7449; GFX9-NEXT:    s_sub_i32 s22, 64, s2
7450; GFX9-NEXT:    s_cmp_lt_u32 s2, 64
7451; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
7452; GFX9-NEXT:    s_cmp_eq_u32 s2, 0
7453; GFX9-NEXT:    s_cselect_b32 s29, 1, 0
7454; GFX9-NEXT:    s_lshr_b64 s[24:25], s[18:19], s22
7455; GFX9-NEXT:    s_lshl_b64 s[26:27], s[0:1], s17
7456; GFX9-NEXT:    s_lshl_b64 s[2:3], s[18:19], s17
7457; GFX9-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
7458; GFX9-NEXT:    s_lshl_b64 s[18:19], s[18:19], s21
7459; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
7460; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
7461; GFX9-NEXT:    s_cselect_b64 s[18:19], s[24:25], s[18:19]
7462; GFX9-NEXT:    s_cmp_lg_u32 s29, 0
7463; GFX9-NEXT:    s_cselect_b64 s[18:19], s[0:1], s[18:19]
7464; GFX9-NEXT:    s_and_b32 s0, s16, 0x7f
7465; GFX9-NEXT:    s_sub_i32 s21, s0, 64
7466; GFX9-NEXT:    s_sub_i32 s22, 64, s0
7467; GFX9-NEXT:    s_cmp_lt_u32 s0, 64
7468; GFX9-NEXT:    s_cselect_b32 s26, 1, 0
7469; GFX9-NEXT:    s_cmp_eq_u32 s0, 0
7470; GFX9-NEXT:    s_cselect_b32 s27, 1, 0
7471; GFX9-NEXT:    s_lshr_b64 s[0:1], s[10:11], s16
7472; GFX9-NEXT:    s_lshr_b64 s[16:17], s[8:9], s16
7473; GFX9-NEXT:    s_lshl_b64 s[24:25], s[10:11], s22
7474; GFX9-NEXT:    s_or_b64 s[16:17], s[16:17], s[24:25]
7475; GFX9-NEXT:    s_lshr_b64 s[10:11], s[10:11], s21
7476; GFX9-NEXT:    s_cmp_lg_u32 s26, 0
7477; GFX9-NEXT:    s_cselect_b64 s[10:11], s[16:17], s[10:11]
7478; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
7479; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
7480; GFX9-NEXT:    s_cmp_lg_u32 s26, 0
7481; GFX9-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
7482; GFX9-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7483; GFX9-NEXT:    s_lshr_b32 s22, s5, 31
7484; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
7485; GFX9-NEXT:    s_lshl_b64 s[8:9], s[4:5], 1
7486; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[22:23]
7487; GFX9-NEXT:    s_andn2_b32 s6, 0x7f, s20
7488; GFX9-NEXT:    s_or_b64 s[2:3], s[18:19], s[10:11]
7489; GFX9-NEXT:    s_not_b32 s16, s20
7490; GFX9-NEXT:    s_sub_i32 s18, s6, 64
7491; GFX9-NEXT:    s_sub_i32 s10, 64, s6
7492; GFX9-NEXT:    s_cmp_lt_u32 s6, 64
7493; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
7494; GFX9-NEXT:    s_cmp_eq_u32 s6, 0
7495; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
7496; GFX9-NEXT:    s_lshl_b64 s[6:7], s[8:9], s16
7497; GFX9-NEXT:    s_lshr_b64 s[10:11], s[8:9], s10
7498; GFX9-NEXT:    s_lshl_b64 s[16:17], s[4:5], s16
7499; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7500; GFX9-NEXT:    s_lshl_b64 s[8:9], s[8:9], s18
7501; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7502; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
7503; GFX9-NEXT:    s_cselect_b64 s[8:9], s[10:11], s[8:9]
7504; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
7505; GFX9-NEXT:    s_cselect_b64 s[8:9], s[4:5], s[8:9]
7506; GFX9-NEXT:    s_and_b32 s4, s20, 0x7f
7507; GFX9-NEXT:    s_sub_i32 s18, s4, 64
7508; GFX9-NEXT:    s_sub_i32 s16, 64, s4
7509; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
7510; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
7511; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
7512; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
7513; GFX9-NEXT:    s_lshr_b64 s[10:11], s[12:13], s20
7514; GFX9-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
7515; GFX9-NEXT:    s_lshr_b64 s[4:5], s[14:15], s20
7516; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
7517; GFX9-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
7518; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7519; GFX9-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[14:15]
7520; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
7521; GFX9-NEXT:    s_cselect_b64 s[10:11], s[12:13], s[10:11]
7522; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
7523; GFX9-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
7524; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[10:11]
7525; GFX9-NEXT:    s_or_b64 s[6:7], s[8:9], s[12:13]
7526; GFX9-NEXT:    ; return to shader part epilog
7527;
7528; GFX10-LABEL: s_fshr_v2i128:
7529; GFX10:       ; %bb.0:
7530; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7531; GFX10-NEXT:    s_lshr_b32 s18, s1, 31
7532; GFX10-NEXT:    s_mov_b32 s19, 0
7533; GFX10-NEXT:    s_andn2_b32 s17, 0x7f, s16
7534; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
7535; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[18:19]
7536; GFX10-NEXT:    s_not_b32 s18, s16
7537; GFX10-NEXT:    s_sub_i32 s21, s17, 64
7538; GFX10-NEXT:    s_sub_i32 s22, 64, s17
7539; GFX10-NEXT:    s_cmp_lt_u32 s17, 64
7540; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
7541; GFX10-NEXT:    s_cmp_eq_u32 s17, 0
7542; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
7543; GFX10-NEXT:    s_lshr_b64 s[22:23], s[0:1], s22
7544; GFX10-NEXT:    s_lshl_b64 s[24:25], s[2:3], s18
7545; GFX10-NEXT:    s_lshl_b64 s[26:27], s[0:1], s18
7546; GFX10-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7547; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s21
7548; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
7549; GFX10-NEXT:    s_cselect_b64 s[24:25], s[26:27], 0
7550; GFX10-NEXT:    s_cselect_b64 s[0:1], s[22:23], s[0:1]
7551; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
7552; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7553; GFX10-NEXT:    s_and_b32 s0, s16, 0x7f
7554; GFX10-NEXT:    s_sub_i32 s18, s0, 64
7555; GFX10-NEXT:    s_sub_i32 s17, 64, s0
7556; GFX10-NEXT:    s_cmp_lt_u32 s0, 64
7557; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
7558; GFX10-NEXT:    s_cmp_eq_u32 s0, 0
7559; GFX10-NEXT:    s_cselect_b32 s26, 1, 0
7560; GFX10-NEXT:    s_lshr_b64 s[0:1], s[8:9], s16
7561; GFX10-NEXT:    s_lshl_b64 s[22:23], s[10:11], s17
7562; GFX10-NEXT:    s_lshr_b64 s[16:17], s[10:11], s16
7563; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[22:23]
7564; GFX10-NEXT:    s_lshr_b64 s[10:11], s[10:11], s18
7565; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7566; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
7567; GFX10-NEXT:    s_cmp_lg_u32 s26, 0
7568; GFX10-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
7569; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7570; GFX10-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7571; GFX10-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7572; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7573; GFX10-NEXT:    s_lshr_b32 s18, s5, 31
7574; GFX10-NEXT:    s_andn2_b32 s8, 0x7f, s20
7575; GFX10-NEXT:    s_or_b64 s[0:1], s[24:25], s[0:1]
7576; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
7577; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
7578; GFX10-NEXT:    s_not_b32 s16, s20
7579; GFX10-NEXT:    s_sub_i32 s18, s8, 64
7580; GFX10-NEXT:    s_sub_i32 s9, 64, s8
7581; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
7582; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
7583; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
7584; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
7585; GFX10-NEXT:    s_lshr_b64 s[8:9], s[4:5], s9
7586; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s16
7587; GFX10-NEXT:    s_lshl_b64 s[16:17], s[4:5], s16
7588; GFX10-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
7589; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], s18
7590; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7591; GFX10-NEXT:    s_cselect_b64 s[10:11], s[16:17], 0
7592; GFX10-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
7593; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7594; GFX10-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7595; GFX10-NEXT:    s_and_b32 s4, s20, 0x7f
7596; GFX10-NEXT:    s_sub_i32 s18, s4, 64
7597; GFX10-NEXT:    s_sub_i32 s8, 64, s4
7598; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
7599; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
7600; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
7601; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
7602; GFX10-NEXT:    s_lshr_b64 s[4:5], s[12:13], s20
7603; GFX10-NEXT:    s_lshl_b64 s[8:9], s[14:15], s8
7604; GFX10-NEXT:    s_lshr_b64 s[16:17], s[14:15], s20
7605; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
7606; GFX10-NEXT:    s_lshr_b64 s[8:9], s[14:15], s18
7607; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7608; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[8:9]
7609; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
7610; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
7611; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
7612; GFX10-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7613; GFX10-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7614; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7615; GFX10-NEXT:    ; return to shader part epilog
7616;
7617; GFX11-LABEL: s_fshr_v2i128:
7618; GFX11:       ; %bb.0:
7619; GFX11-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
7620; GFX11-NEXT:    s_lshr_b32 s18, s1, 31
7621; GFX11-NEXT:    s_mov_b32 s19, 0
7622; GFX11-NEXT:    s_and_not1_b32 s17, 0x7f, s16
7623; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
7624; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[18:19]
7625; GFX11-NEXT:    s_not_b32 s18, s16
7626; GFX11-NEXT:    s_sub_i32 s21, s17, 64
7627; GFX11-NEXT:    s_sub_i32 s22, 64, s17
7628; GFX11-NEXT:    s_cmp_lt_u32 s17, 64
7629; GFX11-NEXT:    s_cselect_b32 s28, 1, 0
7630; GFX11-NEXT:    s_cmp_eq_u32 s17, 0
7631; GFX11-NEXT:    s_cselect_b32 s17, 1, 0
7632; GFX11-NEXT:    s_lshr_b64 s[22:23], s[0:1], s22
7633; GFX11-NEXT:    s_lshl_b64 s[24:25], s[2:3], s18
7634; GFX11-NEXT:    s_lshl_b64 s[26:27], s[0:1], s18
7635; GFX11-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
7636; GFX11-NEXT:    s_lshl_b64 s[0:1], s[0:1], s21
7637; GFX11-NEXT:    s_cmp_lg_u32 s28, 0
7638; GFX11-NEXT:    s_cselect_b64 s[24:25], s[26:27], 0
7639; GFX11-NEXT:    s_cselect_b64 s[0:1], s[22:23], s[0:1]
7640; GFX11-NEXT:    s_cmp_lg_u32 s17, 0
7641; GFX11-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
7642; GFX11-NEXT:    s_and_b32 s0, s16, 0x7f
7643; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7644; GFX11-NEXT:    s_sub_i32 s18, s0, 64
7645; GFX11-NEXT:    s_sub_i32 s17, 64, s0
7646; GFX11-NEXT:    s_cmp_lt_u32 s0, 64
7647; GFX11-NEXT:    s_cselect_b32 s21, 1, 0
7648; GFX11-NEXT:    s_cmp_eq_u32 s0, 0
7649; GFX11-NEXT:    s_cselect_b32 s26, 1, 0
7650; GFX11-NEXT:    s_lshr_b64 s[0:1], s[8:9], s16
7651; GFX11-NEXT:    s_lshl_b64 s[22:23], s[10:11], s17
7652; GFX11-NEXT:    s_lshr_b64 s[16:17], s[10:11], s16
7653; GFX11-NEXT:    s_or_b64 s[0:1], s[0:1], s[22:23]
7654; GFX11-NEXT:    s_lshr_b64 s[10:11], s[10:11], s18
7655; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7656; GFX11-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
7657; GFX11-NEXT:    s_cmp_lg_u32 s26, 0
7658; GFX11-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
7659; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7660; GFX11-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7661; GFX11-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
7662; GFX11-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
7663; GFX11-NEXT:    s_lshr_b32 s18, s5, 31
7664; GFX11-NEXT:    s_and_not1_b32 s8, 0x7f, s20
7665; GFX11-NEXT:    s_or_b64 s[0:1], s[24:25], s[0:1]
7666; GFX11-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
7667; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
7668; GFX11-NEXT:    s_not_b32 s16, s20
7669; GFX11-NEXT:    s_sub_i32 s18, s8, 64
7670; GFX11-NEXT:    s_sub_i32 s9, 64, s8
7671; GFX11-NEXT:    s_cmp_lt_u32 s8, 64
7672; GFX11-NEXT:    s_cselect_b32 s19, 1, 0
7673; GFX11-NEXT:    s_cmp_eq_u32 s8, 0
7674; GFX11-NEXT:    s_cselect_b32 s21, 1, 0
7675; GFX11-NEXT:    s_lshr_b64 s[8:9], s[4:5], s9
7676; GFX11-NEXT:    s_lshl_b64 s[10:11], s[6:7], s16
7677; GFX11-NEXT:    s_lshl_b64 s[16:17], s[4:5], s16
7678; GFX11-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
7679; GFX11-NEXT:    s_lshl_b64 s[4:5], s[4:5], s18
7680; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7681; GFX11-NEXT:    s_cselect_b64 s[10:11], s[16:17], 0
7682; GFX11-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
7683; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7684; GFX11-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
7685; GFX11-NEXT:    s_and_b32 s4, s20, 0x7f
7686; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7687; GFX11-NEXT:    s_sub_i32 s18, s4, 64
7688; GFX11-NEXT:    s_sub_i32 s8, 64, s4
7689; GFX11-NEXT:    s_cmp_lt_u32 s4, 64
7690; GFX11-NEXT:    s_cselect_b32 s19, 1, 0
7691; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
7692; GFX11-NEXT:    s_cselect_b32 s21, 1, 0
7693; GFX11-NEXT:    s_lshr_b64 s[4:5], s[12:13], s20
7694; GFX11-NEXT:    s_lshl_b64 s[8:9], s[14:15], s8
7695; GFX11-NEXT:    s_lshr_b64 s[16:17], s[14:15], s20
7696; GFX11-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
7697; GFX11-NEXT:    s_lshr_b64 s[8:9], s[14:15], s18
7698; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7699; GFX11-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[8:9]
7700; GFX11-NEXT:    s_cmp_lg_u32 s21, 0
7701; GFX11-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
7702; GFX11-NEXT:    s_cmp_lg_u32 s19, 0
7703; GFX11-NEXT:    s_cselect_b64 s[8:9], s[16:17], 0
7704; GFX11-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
7705; GFX11-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
7706; GFX11-NEXT:    ; return to shader part epilog
7707  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7708  ret <2 x i128> %result
7709}
7710
7711define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7712; GFX6-LABEL: v_fshr_v2i128:
7713; GFX6:       ; %bb.0:
7714; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7715; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
7716; GFX6-NEXT:    v_lshl_b64 v[17:18], v[0:1], 1
7717; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
7718; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
7719; GFX6-NEXT:    v_not_b32_e32 v0, v16
7720; GFX6-NEXT:    v_and_b32_e32 v19, 0x7f, v0
7721; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v19
7722; GFX6-NEXT:    v_lshr_b64 v[0:1], v[17:18], v0
7723; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v19
7724; GFX6-NEXT:    v_and_b32_e32 v25, 0x7f, v16
7725; GFX6-NEXT:    v_or_b32_e32 v23, v0, v21
7726; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v25
7727; GFX6-NEXT:    v_or_b32_e32 v24, v1, v22
7728; GFX6-NEXT:    v_lshl_b64 v[0:1], v[10:11], v0
7729; GFX6-NEXT:    v_lshr_b64 v[21:22], v[8:9], v25
7730; GFX6-NEXT:    v_not_b32_e32 v26, 63
7731; GFX6-NEXT:    v_or_b32_e32 v21, v21, v0
7732; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v19, v26
7733; GFX6-NEXT:    v_or_b32_e32 v22, v22, v1
7734; GFX6-NEXT:    v_lshl_b64 v[0:1], v[17:18], v0
7735; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
7736; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v23, vcc
7737; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v24, vcc
7738; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v19
7739; GFX6-NEXT:    v_cndmask_b32_e64 v2, v0, v2, s[4:5]
7740; GFX6-NEXT:    v_cndmask_b32_e64 v3, v1, v3, s[4:5]
7741; GFX6-NEXT:    v_add_i32_e64 v0, s[4:5], v25, v26
7742; GFX6-NEXT:    v_lshl_b64 v[16:17], v[17:18], v19
7743; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v0
7744; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
7745; GFX6-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
7746; GFX6-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
7747; GFX6-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
7748; GFX6-NEXT:    v_cndmask_b32_e32 v17, 0, v17, vcc
7749; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v25
7750; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v25
7751; GFX6-NEXT:    v_cndmask_b32_e32 v8, v18, v8, vcc
7752; GFX6-NEXT:    v_cndmask_b32_e32 v9, v19, v9, vcc
7753; GFX6-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
7754; GFX6-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[4:5]
7755; GFX6-NEXT:    v_cndmask_b32_e64 v18, 0, v1, s[4:5]
7756; GFX6-NEXT:    v_or_b32_e32 v0, v16, v8
7757; GFX6-NEXT:    v_or_b32_e32 v1, v17, v9
7758; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], 1
7759; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7760; GFX6-NEXT:    v_or_b32_e32 v6, v6, v4
7761; GFX6-NEXT:    v_not_b32_e32 v4, v20
7762; GFX6-NEXT:    v_and_b32_e32 v16, 0x7f, v4
7763; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v16
7764; GFX6-NEXT:    v_or_b32_e32 v2, v2, v10
7765; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], v4
7766; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v16
7767; GFX6-NEXT:    v_add_i32_e32 v17, vcc, v16, v26
7768; GFX6-NEXT:    v_or_b32_e32 v10, v4, v10
7769; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
7770; GFX6-NEXT:    v_lshl_b64 v[4:5], v[8:9], v16
7771; GFX6-NEXT:    v_lshl_b64 v[8:9], v[8:9], v17
7772; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7773; GFX6-NEXT:    v_or_b32_e32 v3, v3, v18
7774; GFX6-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
7775; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
7776; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7777; GFX6-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7778; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
7779; GFX6-NEXT:    v_and_b32_e32 v10, 0x7f, v20
7780; GFX6-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7781; GFX6-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7782; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v10
7783; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], v10
7784; GFX6-NEXT:    v_lshl_b64 v[6:7], v[14:15], v6
7785; GFX6-NEXT:    v_add_i32_e32 v11, vcc, v10, v26
7786; GFX6-NEXT:    v_or_b32_e32 v16, v4, v6
7787; GFX6-NEXT:    v_or_b32_e32 v19, v5, v7
7788; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], v11
7789; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v10
7790; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
7791; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
7792; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
7793; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
7794; GFX6-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7795; GFX6-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7796; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7797; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7798; GFX6-NEXT:    v_or_b32_e32 v4, v17, v6
7799; GFX6-NEXT:    v_or_b32_e32 v5, v18, v7
7800; GFX6-NEXT:    v_or_b32_e32 v6, v8, v10
7801; GFX6-NEXT:    v_or_b32_e32 v7, v9, v11
7802; GFX6-NEXT:    s_setpc_b64 s[30:31]
7803;
7804; GFX8-LABEL: v_fshr_v2i128:
7805; GFX8:       ; %bb.0:
7806; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7807; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7808; GFX8-NEXT:    v_lshlrev_b64 v[17:18], 1, v[0:1]
7809; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
7810; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
7811; GFX8-NEXT:    v_not_b32_e32 v0, v16
7812; GFX8-NEXT:    v_and_b32_e32 v19, 0x7f, v0
7813; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v19
7814; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[17:18]
7815; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
7816; GFX8-NEXT:    v_and_b32_e32 v25, 0x7f, v16
7817; GFX8-NEXT:    v_or_b32_e32 v23, v0, v21
7818; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v25
7819; GFX8-NEXT:    v_or_b32_e32 v24, v1, v22
7820; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, v[10:11]
7821; GFX8-NEXT:    v_lshrrev_b64 v[21:22], v25, v[8:9]
7822; GFX8-NEXT:    v_not_b32_e32 v26, 63
7823; GFX8-NEXT:    v_or_b32_e32 v21, v21, v0
7824; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v19, v26
7825; GFX8-NEXT:    v_or_b32_e32 v22, v22, v1
7826; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, v[17:18]
7827; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
7828; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v23, vcc
7829; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v24, vcc
7830; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v19
7831; GFX8-NEXT:    v_cndmask_b32_e64 v2, v0, v2, s[4:5]
7832; GFX8-NEXT:    v_cndmask_b32_e64 v3, v1, v3, s[4:5]
7833; GFX8-NEXT:    v_add_u32_e64 v0, s[4:5], v25, v26
7834; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v19, v[17:18]
7835; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[10:11]
7836; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
7837; GFX8-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
7838; GFX8-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
7839; GFX8-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
7840; GFX8-NEXT:    v_cndmask_b32_e32 v17, 0, v17, vcc
7841; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v25, v[10:11]
7842; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v25
7843; GFX8-NEXT:    v_cndmask_b32_e32 v8, v18, v8, vcc
7844; GFX8-NEXT:    v_cndmask_b32_e32 v9, v19, v9, vcc
7845; GFX8-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7846; GFX8-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[4:5]
7847; GFX8-NEXT:    v_cndmask_b32_e64 v18, 0, v1, s[4:5]
7848; GFX8-NEXT:    v_or_b32_e32 v0, v16, v8
7849; GFX8-NEXT:    v_or_b32_e32 v1, v17, v9
7850; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
7851; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7852; GFX8-NEXT:    v_or_b32_e32 v6, v6, v4
7853; GFX8-NEXT:    v_not_b32_e32 v4, v20
7854; GFX8-NEXT:    v_and_b32_e32 v16, 0x7f, v4
7855; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v16
7856; GFX8-NEXT:    v_or_b32_e32 v2, v2, v10
7857; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
7858; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v16, v[6:7]
7859; GFX8-NEXT:    v_add_u32_e32 v17, vcc, v16, v26
7860; GFX8-NEXT:    v_or_b32_e32 v10, v4, v10
7861; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
7862; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v16, v[8:9]
7863; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v17, v[8:9]
7864; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7865; GFX8-NEXT:    v_or_b32_e32 v3, v3, v18
7866; GFX8-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
7867; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
7868; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7869; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7870; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
7871; GFX8-NEXT:    v_and_b32_e32 v10, 0x7f, v20
7872; GFX8-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7873; GFX8-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7874; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v10
7875; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v10, v[12:13]
7876; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
7877; GFX8-NEXT:    v_add_u32_e32 v11, vcc, v10, v26
7878; GFX8-NEXT:    v_or_b32_e32 v16, v4, v6
7879; GFX8-NEXT:    v_or_b32_e32 v19, v5, v7
7880; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v11, v[14:15]
7881; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v10, v[14:15]
7882; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
7883; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
7884; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
7885; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
7886; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7887; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7888; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7889; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7890; GFX8-NEXT:    v_or_b32_e32 v4, v17, v6
7891; GFX8-NEXT:    v_or_b32_e32 v5, v18, v7
7892; GFX8-NEXT:    v_or_b32_e32 v6, v8, v10
7893; GFX8-NEXT:    v_or_b32_e32 v7, v9, v11
7894; GFX8-NEXT:    s_setpc_b64 s[30:31]
7895;
7896; GFX9-LABEL: v_fshr_v2i128:
7897; GFX9:       ; %bb.0:
7898; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7899; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7900; GFX9-NEXT:    v_lshlrev_b64 v[17:18], 1, v[0:1]
7901; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
7902; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
7903; GFX9-NEXT:    v_not_b32_e32 v0, v16
7904; GFX9-NEXT:    v_and_b32_e32 v19, 0x7f, v0
7905; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v19
7906; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[17:18]
7907; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v19, v[2:3]
7908; GFX9-NEXT:    v_and_b32_e32 v25, 0x7f, v16
7909; GFX9-NEXT:    v_or_b32_e32 v23, v0, v21
7910; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v25
7911; GFX9-NEXT:    v_or_b32_e32 v24, v1, v22
7912; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, v[10:11]
7913; GFX9-NEXT:    v_lshrrev_b64 v[21:22], v25, v[8:9]
7914; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v19
7915; GFX9-NEXT:    v_or_b32_e32 v21, v21, v0
7916; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffffc0, v19
7917; GFX9-NEXT:    v_or_b32_e32 v22, v22, v1
7918; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, v[17:18]
7919; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v19
7920; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v23, vcc
7921; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v24, vcc
7922; GFX9-NEXT:    v_cndmask_b32_e64 v2, v0, v2, s[4:5]
7923; GFX9-NEXT:    v_add_u32_e32 v0, 0xffffffc0, v25
7924; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v19, v[17:18]
7925; GFX9-NEXT:    v_cndmask_b32_e64 v3, v1, v3, s[4:5]
7926; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[10:11]
7927; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v25
7928; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v16, vcc
7929; GFX9-NEXT:    v_cndmask_b32_e64 v18, v0, v21, s[4:5]
7930; GFX9-NEXT:    v_cndmask_b32_e64 v19, v1, v22, s[4:5]
7931; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v17, vcc
7932; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v25, v[10:11]
7933; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v25
7934; GFX9-NEXT:    v_cndmask_b32_e32 v8, v18, v8, vcc
7935; GFX9-NEXT:    v_cndmask_b32_e32 v9, v19, v9, vcc
7936; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7937; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, v0, s[4:5]
7938; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v1, s[4:5]
7939; GFX9-NEXT:    v_or_b32_e32 v0, v16, v8
7940; GFX9-NEXT:    v_or_b32_e32 v1, v17, v9
7941; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
7942; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
7943; GFX9-NEXT:    v_or_b32_e32 v6, v6, v4
7944; GFX9-NEXT:    v_not_b32_e32 v4, v20
7945; GFX9-NEXT:    v_and_b32_e32 v16, 0x7f, v4
7946; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v16
7947; GFX9-NEXT:    v_or_b32_e32 v2, v2, v10
7948; GFX9-NEXT:    v_or_b32_e32 v3, v3, v11
7949; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
7950; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v16, v[6:7]
7951; GFX9-NEXT:    v_add_u32_e32 v17, 0xffffffc0, v16
7952; GFX9-NEXT:    v_or_b32_e32 v10, v4, v10
7953; GFX9-NEXT:    v_or_b32_e32 v11, v5, v11
7954; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v16, v[8:9]
7955; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v17, v[8:9]
7956; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
7957; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v4, vcc
7958; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v5, vcc
7959; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
7960; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
7961; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v16
7962; GFX9-NEXT:    v_and_b32_e32 v10, 0x7f, v20
7963; GFX9-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
7964; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v10
7965; GFX9-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
7966; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, v[12:13]
7967; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
7968; GFX9-NEXT:    v_add_u32_e32 v11, 0xffffffc0, v10
7969; GFX9-NEXT:    v_or_b32_e32 v16, v4, v6
7970; GFX9-NEXT:    v_or_b32_e32 v19, v5, v7
7971; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v11, v[14:15]
7972; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v10, v[14:15]
7973; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v10
7974; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v16, vcc
7975; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v19, vcc
7976; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
7977; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
7978; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
7979; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
7980; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
7981; GFX9-NEXT:    v_or_b32_e32 v4, v17, v6
7982; GFX9-NEXT:    v_or_b32_e32 v5, v18, v7
7983; GFX9-NEXT:    v_or_b32_e32 v6, v8, v10
7984; GFX9-NEXT:    v_or_b32_e32 v7, v9, v11
7985; GFX9-NEXT:    s_setpc_b64 s[30:31]
7986;
7987; GFX10-LABEL: v_fshr_v2i128:
7988; GFX10:       ; %bb.0:
7989; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7990; GFX10-NEXT:    v_not_b32_e32 v17, v16
7991; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
7992; GFX10-NEXT:    v_and_b32_e32 v26, 0x7f, v16
7993; GFX10-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
7994; GFX10-NEXT:    v_and_b32_e32 v25, 0x7f, v17
7995; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
7996; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
7997; GFX10-NEXT:    v_add_nc_u32_e32 v27, 0xffffffc0, v26
7998; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v26
7999; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
8000; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
8001; GFX10-NEXT:    v_add_nc_u32_e32 v19, 0xffffffc0, v25
8002; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
8003; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8004; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
8005; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
8006; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
8007; GFX10-NEXT:    v_cndmask_b32_e32 v23, 0, v23, vcc_lo
8008; GFX10-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8009; GFX10-NEXT:    v_or_b32_e32 v22, v18, v22
8010; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
8011; GFX10-NEXT:    v_or_b32_e32 v21, v17, v21
8012; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
8013; GFX10-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
8014; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
8015; GFX10-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
8016; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
8017; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
8018; GFX10-NEXT:    v_or_b32_e32 v16, v16, v18
8019; GFX10-NEXT:    v_or_b32_e32 v17, v17, v19
8020; GFX10-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8021; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8022; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
8023; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s4
8024; GFX10-NEXT:    v_not_b32_e32 v16, v20
8025; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s4
8026; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
8027; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
8028; GFX10-NEXT:    v_and_b32_e32 v25, 0x7f, v16
8029; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
8030; GFX10-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
8031; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
8032; GFX10-NEXT:    v_or_b32_e32 v0, v23, v0
8033; GFX10-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
8034; GFX10-NEXT:    v_or_b32_e32 v6, v6, v8
8035; GFX10-NEXT:    v_and_b32_e32 v23, 0x7f, v20
8036; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s4
8037; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s4
8038; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
8039; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
8040; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
8041; GFX10-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v25
8042; GFX10-NEXT:    v_or_b32_e32 v2, v18, v2
8043; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
8044; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
8045; GFX10-NEXT:    v_or_b32_e32 v10, v8, v10
8046; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v23
8047; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
8048; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8049; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
8050; GFX10-NEXT:    v_or_b32_e32 v5, v9, v11
8051; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
8052; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v23
8053; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0, v16, vcc_lo
8054; GFX10-NEXT:    v_or_b32_e32 v16, v18, v20
8055; GFX10-NEXT:    v_or_b32_e32 v18, v19, v21
8056; GFX10-NEXT:    v_cndmask_b32_e32 v10, v3, v10, vcc_lo
8057; GFX10-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8058; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
8059; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s4
8060; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v23
8061; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v25
8062; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s4
8063; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8064; GFX10-NEXT:    v_or_b32_e32 v1, v24, v1
8065; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s6
8066; GFX10-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s6
8067; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s5
8068; GFX10-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s5
8069; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s4
8070; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s4
8071; GFX10-NEXT:    v_or_b32_e32 v3, v22, v26
8072; GFX10-NEXT:    v_or_b32_e32 v4, v11, v5
8073; GFX10-NEXT:    v_or_b32_e32 v5, v14, v8
8074; GFX10-NEXT:    v_or_b32_e32 v6, v6, v9
8075; GFX10-NEXT:    v_or_b32_e32 v7, v7, v10
8076; GFX10-NEXT:    s_setpc_b64 s[30:31]
8077;
8078; GFX11-LABEL: v_fshr_v2i128:
8079; GFX11:       ; %bb.0:
8080; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8081; GFX11-NEXT:    v_not_b32_e32 v17, v16
8082; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
8083; GFX11-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
8084; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
8085; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v17
8086; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
8087; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
8088; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8089; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8090; GFX11-NEXT:    v_or_b32_e32 v2, v2, v17
8091; GFX11-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
8092; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8093; GFX11-NEXT:    v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16
8094; GFX11-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8095; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
8096; GFX11-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
8097; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
8098; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v26
8099; GFX11-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
8100; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
8101; GFX11-NEXT:    v_or_b32_e32 v22, v18, v22
8102; GFX11-NEXT:    v_add_nc_u32_e32 v19, 0xffffffc0, v25
8103; GFX11-NEXT:    v_or_b32_e32 v21, v17, v21
8104; GFX11-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
8105; GFX11-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
8106; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
8107; GFX11-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
8108; GFX11-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
8109; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8110; GFX11-NEXT:    v_dual_cndmask_b32 v22, v1, v22 :: v_dual_cndmask_b32 v21, v0, v21
8111; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
8112; GFX11-NEXT:    v_add_nc_u32_e32 v27, 0xffffffc0, v26
8113; GFX11-NEXT:    v_or_b32_e32 v16, v16, v18
8114; GFX11-NEXT:    v_or_b32_e32 v17, v17, v19
8115; GFX11-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8116; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
8117; GFX11-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
8118; GFX11-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s0
8119; GFX11-NEXT:    v_not_b32_e32 v16, v20
8120; GFX11-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8121; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
8122; GFX11-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s0
8123; GFX11-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
8124; GFX11-NEXT:    v_and_b32_e32 v25, 0x7f, v16
8125; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8126; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9
8127; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
8128; GFX11-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
8129; GFX11-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
8130; GFX11-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s0
8131; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xffffffc0, v25
8132; GFX11-NEXT:    v_or_b32_e32 v6, v6, v8
8133; GFX11-NEXT:    v_or_b32_e32 v0, v23, v0
8134; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
8135; GFX11-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
8136; GFX11-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
8137; GFX11-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
8138; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
8139; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
8140; GFX11-NEXT:    v_cmp_eq_u32_e64 s2, 0, v25
8141; GFX11-NEXT:    v_or_b32_e32 v1, v24, v1
8142; GFX11-NEXT:    v_or_b32_e32 v10, v8, v10
8143; GFX11-NEXT:    v_and_b32_e32 v23, 0x7f, v20
8144; GFX11-NEXT:    v_or_b32_e32 v2, v18, v2
8145; GFX11-NEXT:    v_or_b32_e32 v5, v9, v11
8146; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8147; GFX11-NEXT:    v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
8148; GFX11-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
8149; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v23
8150; GFX11-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
8151; GFX11-NEXT:    v_cmp_gt_u32_e64 s0, 64, v23
8152; GFX11-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8153; GFX11-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
8154; GFX11-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
8155; GFX11-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
8156; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8157; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, 0, v23
8158; GFX11-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s2
8159; GFX11-NEXT:    v_or_b32_e32 v16, v18, v20
8160; GFX11-NEXT:    v_or_b32_e32 v18, v19, v21
8161; GFX11-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s2
8162; GFX11-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s0
8163; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8164; GFX11-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s0
8165; GFX11-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s0
8166; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8167; GFX11-NEXT:    v_or_b32_e32 v7, v7, v10
8168; GFX11-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s1
8169; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8170; GFX11-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s1
8171; GFX11-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s0
8172; GFX11-NEXT:    v_or_b32_e32 v3, v22, v26
8173; GFX11-NEXT:    v_or_b32_e32 v4, v11, v5
8174; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8175; GFX11-NEXT:    v_or_b32_e32 v5, v14, v8
8176; GFX11-NEXT:    v_or_b32_e32 v6, v6, v9
8177; GFX11-NEXT:    s_setpc_b64 s[30:31]
8178  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8179  ret <2 x i128> %result
8180}
8181
8182declare i7 @llvm.fshr.i7(i7, i7, i7) #0
8183declare i8 @llvm.fshr.i8(i8, i8, i8) #0
8184declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8185declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8186
8187declare i16 @llvm.fshr.i16(i16, i16, i16) #0
8188declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8189declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8190declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8191declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8192declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8193declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8194
8195declare i24 @llvm.fshr.i24(i24, i24, i24) #0
8196declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8197
8198declare i32 @llvm.fshr.i32(i32, i32, i32) #0
8199declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8200declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8201declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8202declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8203declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8204
8205declare i48 @llvm.fshr.i48(i48, i48, i48) #0
8206
8207declare i64 @llvm.fshr.i64(i64, i64, i64) #0
8208declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8209
8210declare i128 @llvm.fshr.i128(i128, i128, i128) #0
8211declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8212
8213attributes #0 = { nounwind readnone speculatable willreturn }
8214