xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (revision 17f3e00911b860d535f41185e605c47babcc2039)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7
8define i8 @v_ashr_i8(i8 %value, i8 %amount) {
9; GFX6-LABEL: v_ashr_i8:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
13; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
14; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
15; GFX6-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: v_ashr_i8:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
21; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1
22; GFX8-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-LABEL: v_ashr_i8:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
28; GFX9-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10PLUS-LABEL: v_ashr_i8:
31; GFX10PLUS:       ; %bb.0:
32; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10PLUS-NEXT:    v_and_b32_e32 v1, 0xff, v1
34; GFX10PLUS-NEXT:    v_bfe_i32 v0, v0, 0, 8
35; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, v1, v0
36; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
37  %result = ashr i8 %value, %amount
38  ret i8 %result
39}
40
41define i8 @v_ashr_i8_7(i8 %value) {
42; GFX6-LABEL: v_ashr_i8_7:
43; GFX6:       ; %bb.0:
44; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
46; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
47; GFX6-NEXT:    s_setpc_b64 s[30:31]
48;
49; GFX8-LABEL: v_ashr_i8_7:
50; GFX8:       ; %bb.0:
51; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
53; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v0
54; GFX8-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX9-LABEL: v_ashr_i8_7:
57; GFX9:       ; %bb.0:
58; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX9-NEXT:    v_mov_b32_e32 v1, 7
60; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
61; GFX9-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX10PLUS-LABEL: v_ashr_i8_7:
64; GFX10PLUS:       ; %bb.0:
65; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX10PLUS-NEXT:    v_bfe_i32 v0, v0, 0, 8
67; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, 7, v0
68; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
69  %result = ashr i8 %value, 7
70  ret i8 %result
71}
72
73define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) {
74; GFX6-LABEL: s_ashr_i8:
75; GFX6:       ; %bb.0:
76; GFX6-NEXT:    s_sext_i32_i8 s0, s0
77; GFX6-NEXT:    s_ashr_i32 s0, s0, s1
78; GFX6-NEXT:    ; return to shader part epilog
79;
80; GFX8-LABEL: s_ashr_i8:
81; GFX8:       ; %bb.0:
82; GFX8-NEXT:    s_sext_i32_i8 s0, s0
83; GFX8-NEXT:    s_sext_i32_i8 s1, s1
84; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
85; GFX8-NEXT:    ; return to shader part epilog
86;
87; GFX9-LABEL: s_ashr_i8:
88; GFX9:       ; %bb.0:
89; GFX9-NEXT:    s_sext_i32_i8 s0, s0
90; GFX9-NEXT:    s_sext_i32_i8 s1, s1
91; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
92; GFX9-NEXT:    ; return to shader part epilog
93;
94; GFX10PLUS-LABEL: s_ashr_i8:
95; GFX10PLUS:       ; %bb.0:
96; GFX10PLUS-NEXT:    s_sext_i32_i8 s0, s0
97; GFX10PLUS-NEXT:    s_sext_i32_i8 s1, s1
98; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s1
99; GFX10PLUS-NEXT:    ; return to shader part epilog
100  %result = ashr i8 %value, %amount
101  ret i8 %result
102}
103
104define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) {
105; GCN-LABEL: s_ashr_i8_7:
106; GCN:       ; %bb.0:
107; GCN-NEXT:    s_sext_i32_i8 s0, s0
108; GCN-NEXT:    s_ashr_i32 s0, s0, 7
109; GCN-NEXT:    ; return to shader part epilog
110;
111; GFX10PLUS-LABEL: s_ashr_i8_7:
112; GFX10PLUS:       ; %bb.0:
113; GFX10PLUS-NEXT:    s_sext_i32_i8 s0, s0
114; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 7
115; GFX10PLUS-NEXT:    ; return to shader part epilog
116  %result = ashr i8 %value, 7
117  ret i8 %result
118}
119
120
121define i24 @v_ashr_i24(i24 %value, i24 %amount) {
122; GCN-LABEL: v_ashr_i24:
123; GCN:       ; %bb.0:
124; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GCN-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
126; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
127; GCN-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
128; GCN-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX10PLUS-LABEL: v_ashr_i24:
131; GFX10PLUS:       ; %bb.0:
132; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX10PLUS-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
134; GFX10PLUS-NEXT:    v_bfe_i32 v0, v0, 0, 24
135; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
136; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
137  %result = ashr i24 %value, %amount
138  ret i24 %result
139}
140
141define i24 @v_ashr_i24_7(i24 %value) {
142; GCN-LABEL: v_ashr_i24_7:
143; GCN:       ; %bb.0:
144; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
146; GCN-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
147; GCN-NEXT:    s_setpc_b64 s[30:31]
148;
149; GFX10PLUS-LABEL: v_ashr_i24_7:
150; GFX10PLUS:       ; %bb.0:
151; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX10PLUS-NEXT:    v_bfe_i32 v0, v0, 0, 24
153; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, 7, v0
154; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
155  %result = ashr i24 %value, 7
156  ret i24 %result
157}
158
159define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) {
160; GCN-LABEL: s_ashr_i24:
161; GCN:       ; %bb.0:
162; GCN-NEXT:    s_bfe_i32 s0, s0, 0x180000
163; GCN-NEXT:    s_ashr_i32 s0, s0, s1
164; GCN-NEXT:    ; return to shader part epilog
165;
166; GFX10PLUS-LABEL: s_ashr_i24:
167; GFX10PLUS:       ; %bb.0:
168; GFX10PLUS-NEXT:    s_bfe_i32 s0, s0, 0x180000
169; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s1
170; GFX10PLUS-NEXT:    ; return to shader part epilog
171  %result = ashr i24 %value, %amount
172  ret i24 %result
173}
174
175define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) {
176; GCN-LABEL: s_ashr_i24_7:
177; GCN:       ; %bb.0:
178; GCN-NEXT:    s_bfe_i32 s0, s0, 0x180000
179; GCN-NEXT:    s_ashr_i32 s0, s0, 7
180; GCN-NEXT:    ; return to shader part epilog
181;
182; GFX10PLUS-LABEL: s_ashr_i24_7:
183; GFX10PLUS:       ; %bb.0:
184; GFX10PLUS-NEXT:    s_bfe_i32 s0, s0, 0x180000
185; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 7
186; GFX10PLUS-NEXT:    ; return to shader part epilog
187  %result = ashr i24 %value, 7
188  ret i24 %result
189}
190
191define i32 @v_ashr_i32(i32 %value, i32 %amount) {
192; GCN-LABEL: v_ashr_i32:
193; GCN:       ; %bb.0:
194; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GCN-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
196; GCN-NEXT:    s_setpc_b64 s[30:31]
197;
198; GFX10PLUS-LABEL: v_ashr_i32:
199; GFX10PLUS:       ; %bb.0:
200; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
202; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
203  %result = ashr i32 %value, %amount
204  ret i32 %result
205}
206
207define i32 @v_ashr_i32_31(i32 %value) {
208; GCN-LABEL: v_ashr_i32_31:
209; GCN:       ; %bb.0:
210; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
212; GCN-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX10PLUS-LABEL: v_ashr_i32_31:
215; GFX10PLUS:       ; %bb.0:
216; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
218; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
219  %result = ashr i32 %value, 31
220  ret i32 %result
221}
222
223define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) {
224; GCN-LABEL: s_ashr_i32:
225; GCN:       ; %bb.0:
226; GCN-NEXT:    s_ashr_i32 s0, s0, s1
227; GCN-NEXT:    ; return to shader part epilog
228;
229; GFX10PLUS-LABEL: s_ashr_i32:
230; GFX10PLUS:       ; %bb.0:
231; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s1
232; GFX10PLUS-NEXT:    ; return to shader part epilog
233  %result = ashr i32 %value, %amount
234  ret i32 %result
235}
236
237define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) {
238; GCN-LABEL: s_ashr_i32_31:
239; GCN:       ; %bb.0:
240; GCN-NEXT:    s_ashr_i32 s0, s0, 31
241; GCN-NEXT:    ; return to shader part epilog
242;
243; GFX10PLUS-LABEL: s_ashr_i32_31:
244; GFX10PLUS:       ; %bb.0:
245; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 31
246; GFX10PLUS-NEXT:    ; return to shader part epilog
247  %result = ashr i32 %value, 31
248  ret i32 %result
249}
250
251define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) {
252; GFX6-LABEL: ashr_i32_sv:
253; GFX6:       ; %bb.0:
254; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
255; GFX6-NEXT:    ; return to shader part epilog
256;
257; GFX8-LABEL: ashr_i32_sv:
258; GFX8:       ; %bb.0:
259; GFX8-NEXT:    v_ashrrev_i32_e64 v0, v0, s0
260; GFX8-NEXT:    ; return to shader part epilog
261;
262; GFX9-LABEL: ashr_i32_sv:
263; GFX9:       ; %bb.0:
264; GFX9-NEXT:    v_ashrrev_i32_e64 v0, v0, s0
265; GFX9-NEXT:    ; return to shader part epilog
266;
267; GFX10PLUS-LABEL: ashr_i32_sv:
268; GFX10PLUS:       ; %bb.0:
269; GFX10PLUS-NEXT:    v_ashrrev_i32_e64 v0, v0, s0
270; GFX10PLUS-NEXT:    ; return to shader part epilog
271  %result = ashr i32 %value, %amount
272  %cast = bitcast i32 %result to float
273  ret float %cast
274}
275
276define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) {
277; GCN-LABEL: ashr_i32_vs:
278; GCN:       ; %bb.0:
279; GCN-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
280; GCN-NEXT:    ; return to shader part epilog
281;
282; GFX10PLUS-LABEL: ashr_i32_vs:
283; GFX10PLUS:       ; %bb.0:
284; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
285; GFX10PLUS-NEXT:    ; return to shader part epilog
286  %result = ashr i32 %value, %amount
287  %cast = bitcast i32 %result to float
288  ret float %cast
289}
290
291define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) {
292; GCN-LABEL: v_ashr_v2i32:
293; GCN:       ; %bb.0:
294; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295; GCN-NEXT:    v_ashrrev_i32_e32 v0, v2, v0
296; GCN-NEXT:    v_ashrrev_i32_e32 v1, v3, v1
297; GCN-NEXT:    s_setpc_b64 s[30:31]
298;
299; GFX10PLUS-LABEL: v_ashr_v2i32:
300; GFX10PLUS:       ; %bb.0:
301; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v2, v0
303; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, v3, v1
304; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
305  %result = ashr <2 x i32> %value, %amount
306  ret <2 x i32> %result
307}
308
309define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) {
310; GCN-LABEL: v_ashr_v2i32_31:
311; GCN:       ; %bb.0:
312; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
314; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v1
315; GCN-NEXT:    s_setpc_b64 s[30:31]
316;
317; GFX10PLUS-LABEL: v_ashr_v2i32_31:
318; GFX10PLUS:       ; %bb.0:
319; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, 31, v0
321; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, 31, v1
322; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
323  %result = ashr <2 x i32> %value, <i32 31, i32 31>
324  ret <2 x i32> %result
325}
326
327define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
328; GCN-LABEL: s_ashr_v2i32:
329; GCN:       ; %bb.0:
330; GCN-NEXT:    s_ashr_i32 s0, s0, s2
331; GCN-NEXT:    s_ashr_i32 s1, s1, s3
332; GCN-NEXT:    ; return to shader part epilog
333;
334; GFX10PLUS-LABEL: s_ashr_v2i32:
335; GFX10PLUS:       ; %bb.0:
336; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s2
337; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s3
338; GFX10PLUS-NEXT:    ; return to shader part epilog
339  %result = ashr <2 x i32> %value, %amount
340  ret <2 x i32> %result
341}
342
343define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) {
344; GCN-LABEL: v_ashr_v3i32:
345; GCN:       ; %bb.0:
346; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GCN-NEXT:    v_ashrrev_i32_e32 v0, v3, v0
348; GCN-NEXT:    v_ashrrev_i32_e32 v1, v4, v1
349; GCN-NEXT:    v_ashrrev_i32_e32 v2, v5, v2
350; GCN-NEXT:    s_setpc_b64 s[30:31]
351;
352; GFX10PLUS-LABEL: v_ashr_v3i32:
353; GFX10PLUS:       ; %bb.0:
354; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v3, v0
356; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, v4, v1
357; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v2, v5, v2
358; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
359  %result = ashr <3 x i32> %value, %amount
360  ret <3 x i32> %result
361}
362
363define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
364; GCN-LABEL: s_ashr_v3i32:
365; GCN:       ; %bb.0:
366; GCN-NEXT:    s_ashr_i32 s0, s0, s3
367; GCN-NEXT:    s_ashr_i32 s1, s1, s4
368; GCN-NEXT:    s_ashr_i32 s2, s2, s5
369; GCN-NEXT:    ; return to shader part epilog
370;
371; GFX10PLUS-LABEL: s_ashr_v3i32:
372; GFX10PLUS:       ; %bb.0:
373; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s3
374; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s4
375; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s5
376; GFX10PLUS-NEXT:    ; return to shader part epilog
377  %result = ashr <3 x i32> %value, %amount
378  ret <3 x i32> %result
379}
380
381define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) {
382; GCN-LABEL: v_ashr_v4i32:
383; GCN:       ; %bb.0:
384; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; GCN-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
386; GCN-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
387; GCN-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
388; GCN-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
389; GCN-NEXT:    s_setpc_b64 s[30:31]
390;
391; GFX10PLUS-LABEL: v_ashr_v4i32:
392; GFX10PLUS:       ; %bb.0:
393; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
395; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
396; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
397; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
398; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
399  %result = ashr <4 x i32> %value, %amount
400  ret <4 x i32> %result
401}
402
403define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
404; GCN-LABEL: s_ashr_v4i32:
405; GCN:       ; %bb.0:
406; GCN-NEXT:    s_ashr_i32 s0, s0, s4
407; GCN-NEXT:    s_ashr_i32 s1, s1, s5
408; GCN-NEXT:    s_ashr_i32 s2, s2, s6
409; GCN-NEXT:    s_ashr_i32 s3, s3, s7
410; GCN-NEXT:    ; return to shader part epilog
411;
412; GFX10PLUS-LABEL: s_ashr_v4i32:
413; GFX10PLUS:       ; %bb.0:
414; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s4
415; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s5
416; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s6
417; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, s7
418; GFX10PLUS-NEXT:    ; return to shader part epilog
419  %result = ashr <4 x i32> %value, %amount
420  ret <4 x i32> %result
421}
422
423define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) {
424; GCN-LABEL: v_ashr_v5i32:
425; GCN:       ; %bb.0:
426; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GCN-NEXT:    v_ashrrev_i32_e32 v0, v5, v0
428; GCN-NEXT:    v_ashrrev_i32_e32 v1, v6, v1
429; GCN-NEXT:    v_ashrrev_i32_e32 v2, v7, v2
430; GCN-NEXT:    v_ashrrev_i32_e32 v3, v8, v3
431; GCN-NEXT:    v_ashrrev_i32_e32 v4, v9, v4
432; GCN-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX10PLUS-LABEL: v_ashr_v5i32:
435; GFX10PLUS:       ; %bb.0:
436; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, v5, v0
438; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, v6, v1
439; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v2, v7, v2
440; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v3, v8, v3
441; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v4, v9, v4
442; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
443  %result = ashr <5 x i32> %value, %amount
444  ret <5 x i32> %result
445}
446
447define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
448; GCN-LABEL: s_ashr_v5i32:
449; GCN:       ; %bb.0:
450; GCN-NEXT:    s_ashr_i32 s0, s0, s5
451; GCN-NEXT:    s_ashr_i32 s1, s1, s6
452; GCN-NEXT:    s_ashr_i32 s2, s2, s7
453; GCN-NEXT:    s_ashr_i32 s3, s3, s8
454; GCN-NEXT:    s_ashr_i32 s4, s4, s9
455; GCN-NEXT:    ; return to shader part epilog
456;
457; GFX10PLUS-LABEL: s_ashr_v5i32:
458; GFX10PLUS:       ; %bb.0:
459; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s5
460; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s6
461; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s7
462; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, s8
463; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, s9
464; GFX10PLUS-NEXT:    ; return to shader part epilog
465  %result = ashr <5 x i32> %value, %amount
466  ret <5 x i32> %result
467}
468
469define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) {
470; GCN-LABEL: v_ashr_v16i32:
471; GCN:       ; %bb.0:
472; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473; GCN-NEXT:    v_ashrrev_i32_e32 v0, v16, v0
474; GCN-NEXT:    buffer_load_dword v16, off, s[0:3], s32
475; GCN-NEXT:    v_ashrrev_i32_e32 v1, v17, v1
476; GCN-NEXT:    v_ashrrev_i32_e32 v2, v18, v2
477; GCN-NEXT:    v_ashrrev_i32_e32 v3, v19, v3
478; GCN-NEXT:    v_ashrrev_i32_e32 v4, v20, v4
479; GCN-NEXT:    v_ashrrev_i32_e32 v5, v21, v5
480; GCN-NEXT:    v_ashrrev_i32_e32 v6, v22, v6
481; GCN-NEXT:    v_ashrrev_i32_e32 v7, v23, v7
482; GCN-NEXT:    v_ashrrev_i32_e32 v8, v24, v8
483; GCN-NEXT:    v_ashrrev_i32_e32 v9, v25, v9
484; GCN-NEXT:    v_ashrrev_i32_e32 v10, v26, v10
485; GCN-NEXT:    v_ashrrev_i32_e32 v11, v27, v11
486; GCN-NEXT:    v_ashrrev_i32_e32 v12, v28, v12
487; GCN-NEXT:    v_ashrrev_i32_e32 v13, v29, v13
488; GCN-NEXT:    v_ashrrev_i32_e32 v14, v30, v14
489; GCN-NEXT:    s_waitcnt vmcnt(0)
490; GCN-NEXT:    v_ashrrev_i32_e32 v15, v16, v15
491; GCN-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX10-LABEL: v_ashr_v16i32:
494; GFX10:       ; %bb.0:
495; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
497; GFX10-NEXT:    v_ashrrev_i32_e32 v0, v16, v0
498; GFX10-NEXT:    v_ashrrev_i32_e32 v1, v17, v1
499; GFX10-NEXT:    v_ashrrev_i32_e32 v2, v18, v2
500; GFX10-NEXT:    v_ashrrev_i32_e32 v3, v19, v3
501; GFX10-NEXT:    v_ashrrev_i32_e32 v4, v20, v4
502; GFX10-NEXT:    v_ashrrev_i32_e32 v5, v21, v5
503; GFX10-NEXT:    v_ashrrev_i32_e32 v6, v22, v6
504; GFX10-NEXT:    v_ashrrev_i32_e32 v7, v23, v7
505; GFX10-NEXT:    v_ashrrev_i32_e32 v8, v24, v8
506; GFX10-NEXT:    v_ashrrev_i32_e32 v9, v25, v9
507; GFX10-NEXT:    v_ashrrev_i32_e32 v10, v26, v10
508; GFX10-NEXT:    v_ashrrev_i32_e32 v11, v27, v11
509; GFX10-NEXT:    v_ashrrev_i32_e32 v12, v28, v12
510; GFX10-NEXT:    v_ashrrev_i32_e32 v13, v29, v13
511; GFX10-NEXT:    v_ashrrev_i32_e32 v14, v30, v14
512; GFX10-NEXT:    s_waitcnt vmcnt(0)
513; GFX10-NEXT:    v_ashrrev_i32_e32 v15, v31, v15
514; GFX10-NEXT:    s_setpc_b64 s[30:31]
515;
516; GFX11-LABEL: v_ashr_v16i32:
517; GFX11:       ; %bb.0:
518; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX11-NEXT:    scratch_load_b32 v31, off, s32
520; GFX11-NEXT:    v_ashrrev_i32_e32 v0, v16, v0
521; GFX11-NEXT:    v_ashrrev_i32_e32 v1, v17, v1
522; GFX11-NEXT:    v_ashrrev_i32_e32 v2, v18, v2
523; GFX11-NEXT:    v_ashrrev_i32_e32 v3, v19, v3
524; GFX11-NEXT:    v_ashrrev_i32_e32 v4, v20, v4
525; GFX11-NEXT:    v_ashrrev_i32_e32 v5, v21, v5
526; GFX11-NEXT:    v_ashrrev_i32_e32 v6, v22, v6
527; GFX11-NEXT:    v_ashrrev_i32_e32 v7, v23, v7
528; GFX11-NEXT:    v_ashrrev_i32_e32 v8, v24, v8
529; GFX11-NEXT:    v_ashrrev_i32_e32 v9, v25, v9
530; GFX11-NEXT:    v_ashrrev_i32_e32 v10, v26, v10
531; GFX11-NEXT:    v_ashrrev_i32_e32 v11, v27, v11
532; GFX11-NEXT:    v_ashrrev_i32_e32 v12, v28, v12
533; GFX11-NEXT:    v_ashrrev_i32_e32 v13, v29, v13
534; GFX11-NEXT:    v_ashrrev_i32_e32 v14, v30, v14
535; GFX11-NEXT:    s_waitcnt vmcnt(0)
536; GFX11-NEXT:    v_ashrrev_i32_e32 v15, v31, v15
537; GFX11-NEXT:    s_setpc_b64 s[30:31]
538  %result = ashr <16 x i32> %value, %amount
539  ret <16 x i32> %result
540}
541
542define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
543; GCN-LABEL: s_ashr_v16i32:
544; GCN:       ; %bb.0:
545; GCN-NEXT:    s_ashr_i32 s0, s0, s16
546; GCN-NEXT:    s_ashr_i32 s1, s1, s17
547; GCN-NEXT:    s_ashr_i32 s2, s2, s18
548; GCN-NEXT:    s_ashr_i32 s3, s3, s19
549; GCN-NEXT:    s_ashr_i32 s4, s4, s20
550; GCN-NEXT:    s_ashr_i32 s5, s5, s21
551; GCN-NEXT:    s_ashr_i32 s6, s6, s22
552; GCN-NEXT:    s_ashr_i32 s7, s7, s23
553; GCN-NEXT:    s_ashr_i32 s8, s8, s24
554; GCN-NEXT:    s_ashr_i32 s9, s9, s25
555; GCN-NEXT:    s_ashr_i32 s10, s10, s26
556; GCN-NEXT:    s_ashr_i32 s11, s11, s27
557; GCN-NEXT:    s_ashr_i32 s12, s12, s28
558; GCN-NEXT:    s_ashr_i32 s13, s13, s29
559; GCN-NEXT:    s_ashr_i32 s14, s14, s30
560; GCN-NEXT:    s_ashr_i32 s15, s15, s31
561; GCN-NEXT:    ; return to shader part epilog
562;
563; GFX10PLUS-LABEL: s_ashr_v16i32:
564; GFX10PLUS:       ; %bb.0:
565; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s16
566; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s17
567; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s18
568; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, s19
569; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, s20
570; GFX10PLUS-NEXT:    s_ashr_i32 s5, s5, s21
571; GFX10PLUS-NEXT:    s_ashr_i32 s6, s6, s22
572; GFX10PLUS-NEXT:    s_ashr_i32 s7, s7, s23
573; GFX10PLUS-NEXT:    s_ashr_i32 s8, s8, s24
574; GFX10PLUS-NEXT:    s_ashr_i32 s9, s9, s25
575; GFX10PLUS-NEXT:    s_ashr_i32 s10, s10, s26
576; GFX10PLUS-NEXT:    s_ashr_i32 s11, s11, s27
577; GFX10PLUS-NEXT:    s_ashr_i32 s12, s12, s28
578; GFX10PLUS-NEXT:    s_ashr_i32 s13, s13, s29
579; GFX10PLUS-NEXT:    s_ashr_i32 s14, s14, s30
580; GFX10PLUS-NEXT:    s_ashr_i32 s15, s15, s31
581; GFX10PLUS-NEXT:    ; return to shader part epilog
582  %result = ashr <16 x i32> %value, %amount
583  ret <16 x i32> %result
584}
585
586define i16 @v_ashr_i16(i16 %value, i16 %amount) {
587; GFX6-LABEL: v_ashr_i16:
588; GFX6:       ; %bb.0:
589; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
591; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
592; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
593; GFX6-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX8-LABEL: v_ashr_i16:
596; GFX8:       ; %bb.0:
597; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX8-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
599; GFX8-NEXT:    s_setpc_b64 s[30:31]
600;
601; GFX9-LABEL: v_ashr_i16:
602; GFX9:       ; %bb.0:
603; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; GFX9-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
605; GFX9-NEXT:    s_setpc_b64 s[30:31]
606;
607; GFX10PLUS-LABEL: v_ashr_i16:
608; GFX10PLUS:       ; %bb.0:
609; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, v1, v0
611; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
612  %result = ashr i16 %value, %amount
613  ret i16 %result
614}
615
616define i16 @v_ashr_i16_15(i16 %value) {
617; GFX6-LABEL: v_ashr_i16_15:
618; GFX6:       ; %bb.0:
619; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
621; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v0
622; GFX6-NEXT:    s_setpc_b64 s[30:31]
623;
624; GFX8-LABEL: v_ashr_i16_15:
625; GFX8:       ; %bb.0:
626; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627; GFX8-NEXT:    v_ashrrev_i16_e32 v0, 15, v0
628; GFX8-NEXT:    s_setpc_b64 s[30:31]
629;
630; GFX9-LABEL: v_ashr_i16_15:
631; GFX9:       ; %bb.0:
632; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633; GFX9-NEXT:    v_ashrrev_i16_e32 v0, 15, v0
634; GFX9-NEXT:    s_setpc_b64 s[30:31]
635;
636; GFX10PLUS-LABEL: v_ashr_i16_15:
637; GFX10PLUS:       ; %bb.0:
638; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, 15, v0
640; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
641  %result = ashr i16 %value, 15
642  ret i16 %result
643}
644
645define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) {
646; GFX6-LABEL: s_ashr_i16:
647; GFX6:       ; %bb.0:
648; GFX6-NEXT:    s_sext_i32_i16 s0, s0
649; GFX6-NEXT:    s_ashr_i32 s0, s0, s1
650; GFX6-NEXT:    ; return to shader part epilog
651;
652; GFX8-LABEL: s_ashr_i16:
653; GFX8:       ; %bb.0:
654; GFX8-NEXT:    s_sext_i32_i16 s0, s0
655; GFX8-NEXT:    s_sext_i32_i16 s1, s1
656; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
657; GFX8-NEXT:    ; return to shader part epilog
658;
659; GFX9-LABEL: s_ashr_i16:
660; GFX9:       ; %bb.0:
661; GFX9-NEXT:    s_sext_i32_i16 s0, s0
662; GFX9-NEXT:    s_sext_i32_i16 s1, s1
663; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
664; GFX9-NEXT:    ; return to shader part epilog
665;
666; GFX10PLUS-LABEL: s_ashr_i16:
667; GFX10PLUS:       ; %bb.0:
668; GFX10PLUS-NEXT:    s_sext_i32_i16 s0, s0
669; GFX10PLUS-NEXT:    s_sext_i32_i16 s1, s1
670; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s1
671; GFX10PLUS-NEXT:    ; return to shader part epilog
672  %result = ashr i16 %value, %amount
673  ret i16 %result
674}
675
676define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) {
677; GCN-LABEL: s_ashr_i16_15:
678; GCN:       ; %bb.0:
679; GCN-NEXT:    s_sext_i32_i16 s0, s0
680; GCN-NEXT:    s_ashr_i32 s0, s0, 15
681; GCN-NEXT:    ; return to shader part epilog
682;
683; GFX10PLUS-LABEL: s_ashr_i16_15:
684; GFX10PLUS:       ; %bb.0:
685; GFX10PLUS-NEXT:    s_sext_i32_i16 s0, s0
686; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 15
687; GFX10PLUS-NEXT:    ; return to shader part epilog
688  %result = ashr i16 %value, 15
689  ret i16 %result
690}
691
692define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) {
693; GFX6-LABEL: ashr_i16_sv:
694; GFX6:       ; %bb.0:
695; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
696; GFX6-NEXT:    s_sext_i32_i16 s0, s0
697; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
698; GFX6-NEXT:    ; return to shader part epilog
699;
700; GFX8-LABEL: ashr_i16_sv:
701; GFX8:       ; %bb.0:
702; GFX8-NEXT:    v_ashrrev_i16_e64 v0, v0, s0
703; GFX8-NEXT:    ; return to shader part epilog
704;
705; GFX9-LABEL: ashr_i16_sv:
706; GFX9:       ; %bb.0:
707; GFX9-NEXT:    v_ashrrev_i16_e64 v0, v0, s0
708; GFX9-NEXT:    ; return to shader part epilog
709;
710; GFX10PLUS-LABEL: ashr_i16_sv:
711; GFX10PLUS:       ; %bb.0:
712; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, v0, s0
713; GFX10PLUS-NEXT:    ; return to shader part epilog
714  %result = ashr i16 %value, %amount
715  %cast = bitcast i16 %result to half
716  ret half %cast
717}
718
719define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) {
720; GFX6-LABEL: ashr_i16_vs:
721; GFX6:       ; %bb.0:
722; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
723; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
724; GFX6-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
725; GFX6-NEXT:    ; return to shader part epilog
726;
727; GFX8-LABEL: ashr_i16_vs:
728; GFX8:       ; %bb.0:
729; GFX8-NEXT:    v_ashrrev_i16_e32 v0, s0, v0
730; GFX8-NEXT:    ; return to shader part epilog
731;
732; GFX9-LABEL: ashr_i16_vs:
733; GFX9:       ; %bb.0:
734; GFX9-NEXT:    v_ashrrev_i16_e32 v0, s0, v0
735; GFX9-NEXT:    ; return to shader part epilog
736;
737; GFX10PLUS-LABEL: ashr_i16_vs:
738; GFX10PLUS:       ; %bb.0:
739; GFX10PLUS-NEXT:    v_ashrrev_i16 v0, s0, v0
740; GFX10PLUS-NEXT:    ; return to shader part epilog
741  %result = ashr i16 %value, %amount
742  %cast = bitcast i16 %result to half
743  ret half %cast
744}
745
746define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) {
747; GFX6-LABEL: v_ashr_v2i16:
748; GFX6:       ; %bb.0:
749; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
751; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
752; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v2, v0
753; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
754; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
755; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v2, v1
756; GFX6-NEXT:    s_setpc_b64 s[30:31]
757;
758; GFX8-LABEL: v_ashr_v2i16:
759; GFX8:       ; %bb.0:
760; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761; GFX8-NEXT:    v_ashrrev_i16_e32 v2, v1, v0
762; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
763; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
764; GFX8-NEXT:    s_setpc_b64 s[30:31]
765;
766; GFX9-LABEL: v_ashr_v2i16:
767; GFX9:       ; %bb.0:
768; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v1, v0
770; GFX9-NEXT:    s_setpc_b64 s[30:31]
771;
772; GFX10PLUS-LABEL: v_ashr_v2i16:
773; GFX10PLUS:       ; %bb.0:
774; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, v1, v0
776; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
777  %result = ashr <2 x i16> %value, %amount
778  ret <2 x i16> %result
779}
780
781define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) {
782; GFX6-LABEL: v_ashr_v2i16_15:
783; GFX6:       ; %bb.0:
784; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
786; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
787; GFX6-NEXT:    v_ashrrev_i32_e32 v0, 15, v0
788; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 15, v1
789; GFX6-NEXT:    s_setpc_b64 s[30:31]
790;
791; GFX8-LABEL: v_ashr_v2i16_15:
792; GFX8:       ; %bb.0:
793; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794; GFX8-NEXT:    v_mov_b32_e32 v2, 15
795; GFX8-NEXT:    v_ashrrev_i16_e32 v1, 15, v0
796; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
797; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
798; GFX8-NEXT:    s_setpc_b64 s[30:31]
799;
800; GFX9-LABEL: v_ashr_v2i16_15:
801; GFX9:       ; %bb.0:
802; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
804; GFX9-NEXT:    s_setpc_b64 s[30:31]
805;
806; GFX10PLUS-LABEL: v_ashr_v2i16_15:
807; GFX10PLUS:       ; %bb.0:
808; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1]
810; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
811  %result = ashr <2 x i16> %value, <i16 15, i16 15>
812  ret <2 x i16> %result
813}
814
815define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
816; GFX6-LABEL: s_ashr_v2i16:
817; GFX6:       ; %bb.0:
818; GFX6-NEXT:    s_sext_i32_i16 s1, s1
819; GFX6-NEXT:    s_sext_i32_i16 s0, s0
820; GFX6-NEXT:    s_ashr_i32 s1, s1, s3
821; GFX6-NEXT:    s_ashr_i32 s0, s0, s2
822; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
823; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
824; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
825; GFX6-NEXT:    s_or_b32 s0, s0, s1
826; GFX6-NEXT:    ; return to shader part epilog
827;
828; GFX8-LABEL: s_ashr_v2i16:
829; GFX8:       ; %bb.0:
830; GFX8-NEXT:    s_sext_i32_i16 s2, s0
831; GFX8-NEXT:    s_bfe_i32 s0, s0, 0x100010
832; GFX8-NEXT:    s_sext_i32_i16 s3, s1
833; GFX8-NEXT:    s_bfe_i32 s1, s1, 0x100010
834; GFX8-NEXT:    s_ashr_i32 s2, s2, s3
835; GFX8-NEXT:    s_ashr_i32 s0, s0, s1
836; GFX8-NEXT:    s_lshl_b32 s0, s0, 16
837; GFX8-NEXT:    s_and_b32 s1, s2, 0xffff
838; GFX8-NEXT:    s_or_b32 s0, s0, s1
839; GFX8-NEXT:    ; return to shader part epilog
840;
841; GFX9-LABEL: s_ashr_v2i16:
842; GFX9:       ; %bb.0:
843; GFX9-NEXT:    s_sext_i32_i16 s2, s0
844; GFX9-NEXT:    s_ashr_i32 s0, s0, 16
845; GFX9-NEXT:    s_sext_i32_i16 s3, s1
846; GFX9-NEXT:    s_ashr_i32 s1, s1, 16
847; GFX9-NEXT:    s_ashr_i32 s2, s2, s3
848; GFX9-NEXT:    s_ashr_i32 s0, s0, s1
849; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
850; GFX9-NEXT:    ; return to shader part epilog
851;
852; GFX10PLUS-LABEL: s_ashr_v2i16:
853; GFX10PLUS:       ; %bb.0:
854; GFX10PLUS-NEXT:    s_sext_i32_i16 s2, s0
855; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 16
856; GFX10PLUS-NEXT:    s_sext_i32_i16 s3, s1
857; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, 16
858; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s3
859; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s1
860; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s2, s0
861; GFX10PLUS-NEXT:    ; return to shader part epilog
862  %result = ashr <2 x i16> %value, %amount
863  %cast = bitcast <2 x i16> %result to i32
864  ret i32 %cast
865}
866
867define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
868; GFX6-LABEL: ashr_v2i16_sv:
869; GFX6:       ; %bb.0:
870; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
871; GFX6-NEXT:    s_sext_i32_i16 s0, s0
872; GFX6-NEXT:    v_ashr_i32_e32 v0, s0, v0
873; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
874; GFX6-NEXT:    s_sext_i32_i16 s0, s1
875; GFX6-NEXT:    v_ashr_i32_e32 v1, s0, v1
876; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
877; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
878; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
879; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
880; GFX6-NEXT:    ; return to shader part epilog
881;
882; GFX8-LABEL: ashr_v2i16_sv:
883; GFX8:       ; %bb.0:
884; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
885; GFX8-NEXT:    v_mov_b32_e32 v2, s1
886; GFX8-NEXT:    v_ashrrev_i16_e64 v1, v0, s0
887; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
888; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
889; GFX8-NEXT:    ; return to shader part epilog
890;
891; GFX9-LABEL: ashr_v2i16_sv:
892; GFX9:       ; %bb.0:
893; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v0, s0
894; GFX9-NEXT:    ; return to shader part epilog
895;
896; GFX10PLUS-LABEL: ashr_v2i16_sv:
897; GFX10PLUS:       ; %bb.0:
898; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, v0, s0
899; GFX10PLUS-NEXT:    ; return to shader part epilog
900  %result = ashr <2 x i16> %value, %amount
901  %cast = bitcast <2 x i16> %result to float
902  ret float %cast
903}
904
905define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
906; GFX6-LABEL: ashr_v2i16_vs:
907; GFX6:       ; %bb.0:
908; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
909; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
910; GFX6-NEXT:    v_ashrrev_i32_e32 v0, s0, v0
911; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
912; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
913; GFX6-NEXT:    v_ashrrev_i32_e32 v1, s0, v1
914; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
915; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
916; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
917; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
918; GFX6-NEXT:    ; return to shader part epilog
919;
920; GFX8-LABEL: ashr_v2i16_vs:
921; GFX8:       ; %bb.0:
922; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
923; GFX8-NEXT:    v_mov_b32_e32 v2, s1
924; GFX8-NEXT:    v_ashrrev_i16_e32 v1, s0, v0
925; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
926; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
927; GFX8-NEXT:    ; return to shader part epilog
928;
929; GFX9-LABEL: ashr_v2i16_vs:
930; GFX9:       ; %bb.0:
931; GFX9-NEXT:    v_pk_ashrrev_i16 v0, s0, v0
932; GFX9-NEXT:    ; return to shader part epilog
933;
934; GFX10PLUS-LABEL: ashr_v2i16_vs:
935; GFX10PLUS:       ; %bb.0:
936; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, s0, v0
937; GFX10PLUS-NEXT:    ; return to shader part epilog
938  %result = ashr <2 x i16> %value, %amount
939  %cast = bitcast <2 x i16> %result to float
940  ret float %cast
941}
942
943; FIXME
944; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) {
945;   %result = ashr <3 x i16> %value, %amount
946;   ret <3 x i16> %result
947; }
948
949; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
950;   %result = ashr <3 x i16> %value, %amount
951;   ret <3 x i16> %result
952; }
953
954define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) {
955; GFX6-LABEL: v_ashr_v4i16:
956; GFX6:       ; %bb.0:
957; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
959; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
960; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
961; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v5
962; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
963; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v4, v1
964; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v6
965; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
966; GFX6-NEXT:    v_ashrrev_i32_e32 v2, v4, v2
967; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v7
968; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
969; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
970; GFX6-NEXT:    v_ashrrev_i32_e32 v3, v4, v3
971; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
972; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
973; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
974; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v2
975; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
976; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
977; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
978; GFX6-NEXT:    s_setpc_b64 s[30:31]
979;
980; GFX8-LABEL: v_ashr_v4i16:
981; GFX8:       ; %bb.0:
982; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v2, v0
984; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
985; GFX8-NEXT:    v_ashrrev_i16_e32 v2, v3, v1
986; GFX8-NEXT:    v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
987; GFX8-NEXT:    v_or_b32_e32 v0, v4, v0
988; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
989; GFX8-NEXT:    s_setpc_b64 s[30:31]
990;
991; GFX9-LABEL: v_ashr_v4i16:
992; GFX9:       ; %bb.0:
993; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
994; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v2, v0
995; GFX9-NEXT:    v_pk_ashrrev_i16 v1, v3, v1
996; GFX9-NEXT:    s_setpc_b64 s[30:31]
997;
998; GFX10PLUS-LABEL: v_ashr_v4i16:
999; GFX10PLUS:       ; %bb.0:
1000; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, v2, v0
1002; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v1, v3, v1
1003; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1004  %result = ashr <4 x i16> %value, %amount
1005  %cast = bitcast <4 x i16> %result to <2 x float>
1006  ret <2 x float> %cast
1007}
1008
1009define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
1010; GFX6-LABEL: s_ashr_v4i16:
1011; GFX6:       ; %bb.0:
1012; GFX6-NEXT:    s_sext_i32_i16 s1, s1
1013; GFX6-NEXT:    s_sext_i32_i16 s0, s0
1014; GFX6-NEXT:    s_ashr_i32 s1, s1, s5
1015; GFX6-NEXT:    s_ashr_i32 s0, s0, s4
1016; GFX6-NEXT:    s_sext_i32_i16 s2, s2
1017; GFX6-NEXT:    s_sext_i32_i16 s3, s3
1018; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
1019; GFX6-NEXT:    s_ashr_i32 s2, s2, s6
1020; GFX6-NEXT:    s_ashr_i32 s3, s3, s7
1021; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
1022; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1023; GFX6-NEXT:    s_or_b32 s0, s0, s1
1024; GFX6-NEXT:    s_and_b32 s1, s2, 0xffff
1025; GFX6-NEXT:    s_and_b32 s2, s3, 0xffff
1026; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1027; GFX6-NEXT:    s_or_b32 s1, s1, s2
1028; GFX6-NEXT:    ; return to shader part epilog
1029;
1030; GFX8-LABEL: s_ashr_v4i16:
1031; GFX8:       ; %bb.0:
1032; GFX8-NEXT:    s_sext_i32_i16 s4, s0
1033; GFX8-NEXT:    s_bfe_i32 s0, s0, 0x100010
1034; GFX8-NEXT:    s_sext_i32_i16 s5, s1
1035; GFX8-NEXT:    s_bfe_i32 s1, s1, 0x100010
1036; GFX8-NEXT:    s_sext_i32_i16 s6, s2
1037; GFX8-NEXT:    s_bfe_i32 s2, s2, 0x100010
1038; GFX8-NEXT:    s_sext_i32_i16 s7, s3
1039; GFX8-NEXT:    s_bfe_i32 s3, s3, 0x100010
1040; GFX8-NEXT:    s_ashr_i32 s4, s4, s6
1041; GFX8-NEXT:    s_ashr_i32 s0, s0, s2
1042; GFX8-NEXT:    s_ashr_i32 s2, s5, s7
1043; GFX8-NEXT:    s_ashr_i32 s1, s1, s3
1044; GFX8-NEXT:    s_lshl_b32 s0, s0, 16
1045; GFX8-NEXT:    s_and_b32 s3, s4, 0xffff
1046; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
1047; GFX8-NEXT:    s_and_b32 s2, s2, 0xffff
1048; GFX8-NEXT:    s_or_b32 s0, s0, s3
1049; GFX8-NEXT:    s_or_b32 s1, s1, s2
1050; GFX8-NEXT:    ; return to shader part epilog
1051;
1052; GFX9-LABEL: s_ashr_v4i16:
1053; GFX9:       ; %bb.0:
1054; GFX9-NEXT:    s_sext_i32_i16 s4, s0
1055; GFX9-NEXT:    s_ashr_i32 s0, s0, 16
1056; GFX9-NEXT:    s_sext_i32_i16 s5, s2
1057; GFX9-NEXT:    s_ashr_i32 s2, s2, 16
1058; GFX9-NEXT:    s_ashr_i32 s4, s4, s5
1059; GFX9-NEXT:    s_ashr_i32 s0, s0, s2
1060; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s4, s0
1061; GFX9-NEXT:    s_sext_i32_i16 s2, s1
1062; GFX9-NEXT:    s_ashr_i32 s1, s1, 16
1063; GFX9-NEXT:    s_sext_i32_i16 s4, s3
1064; GFX9-NEXT:    s_ashr_i32 s3, s3, 16
1065; GFX9-NEXT:    s_ashr_i32 s2, s2, s4
1066; GFX9-NEXT:    s_ashr_i32 s1, s1, s3
1067; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s2, s1
1068; GFX9-NEXT:    ; return to shader part epilog
1069;
1070; GFX10PLUS-LABEL: s_ashr_v4i16:
1071; GFX10PLUS:       ; %bb.0:
1072; GFX10PLUS-NEXT:    s_sext_i32_i16 s4, s0
1073; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 16
1074; GFX10PLUS-NEXT:    s_sext_i32_i16 s5, s2
1075; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, 16
1076; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, s5
1077; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s2
1078; GFX10PLUS-NEXT:    s_sext_i32_i16 s2, s1
1079; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, 16
1080; GFX10PLUS-NEXT:    s_sext_i32_i16 s5, s3
1081; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, 16
1082; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s5
1083; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s3
1084; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s4, s0
1085; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s1, s2, s1
1086; GFX10PLUS-NEXT:    ; return to shader part epilog
1087  %result = ashr <4 x i16> %value, %amount
1088  %cast = bitcast <4 x i16> %result to <2 x i32>
1089  ret <2 x i32> %cast
1090}
1091
1092; FIXME
1093; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1094;   %result = ashr <5 x i16> %value, %amount
1095;   ret <5 x i16> %result
1096; }
1097
1098; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1099;   %result = ashr <5 x i16> %value, %amount
1100;   ret <5 x i16> %result
1101; }
1102
1103; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1104;   %result = ashr <6 x i16> %value, %amount
1105;   %cast = bitcast <6 x i16> %result to <3 x float>
1106;   ret <3 x float> %cast
1107; }
1108
1109; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1110;   %result = ashr <6 x i16> %value, %amount
1111;   %cast = bitcast <6 x i16> %result to <3 x i32>
1112;   ret <3 x i32> %cast
1113; }
1114
1115define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1116; GFX6-LABEL: v_ashr_v8i16:
1117; GFX6:       ; %bb.0:
1118; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1120; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 16
1121; GFX6-NEXT:    v_ashrrev_i32_e32 v0, v8, v0
1122; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v9
1123; GFX6-NEXT:    v_bfe_i32 v1, v1, 0, 16
1124; GFX6-NEXT:    v_ashrrev_i32_e32 v1, v8, v1
1125; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v10
1126; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 16
1127; GFX6-NEXT:    v_ashrrev_i32_e32 v2, v8, v2
1128; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v11
1129; GFX6-NEXT:    v_bfe_i32 v3, v3, 0, 16
1130; GFX6-NEXT:    v_ashrrev_i32_e32 v3, v8, v3
1131; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v12
1132; GFX6-NEXT:    v_bfe_i32 v4, v4, 0, 16
1133; GFX6-NEXT:    v_ashrrev_i32_e32 v4, v8, v4
1134; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v13
1135; GFX6-NEXT:    v_bfe_i32 v5, v5, 0, 16
1136; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1137; GFX6-NEXT:    v_ashrrev_i32_e32 v5, v8, v5
1138; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v14
1139; GFX6-NEXT:    v_bfe_i32 v6, v6, 0, 16
1140; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1141; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1142; GFX6-NEXT:    v_ashrrev_i32_e32 v6, v8, v6
1143; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v15
1144; GFX6-NEXT:    v_bfe_i32 v7, v7, 0, 16
1145; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1146; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v2
1147; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
1148; GFX6-NEXT:    v_ashrrev_i32_e32 v7, v8, v7
1149; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1150; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v5
1151; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1152; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v4
1153; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1154; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v7
1155; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1156; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v6
1157; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
1158; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
1159; GFX6-NEXT:    s_setpc_b64 s[30:31]
1160;
1161; GFX8-LABEL: v_ashr_v8i16:
1162; GFX8:       ; %bb.0:
1163; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164; GFX8-NEXT:    v_ashrrev_i16_e32 v8, v4, v0
1165; GFX8-NEXT:    v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1166; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v5, v1
1167; GFX8-NEXT:    v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1168; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
1169; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v6, v2
1170; GFX8-NEXT:    v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1171; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
1172; GFX8-NEXT:    v_ashrrev_i16_e32 v4, v7, v3
1173; GFX8-NEXT:    v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1174; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
1175; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
1176; GFX8-NEXT:    s_setpc_b64 s[30:31]
1177;
1178; GFX9-LABEL: v_ashr_v8i16:
1179; GFX9:       ; %bb.0:
1180; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181; GFX9-NEXT:    v_pk_ashrrev_i16 v0, v4, v0
1182; GFX9-NEXT:    v_pk_ashrrev_i16 v1, v5, v1
1183; GFX9-NEXT:    v_pk_ashrrev_i16 v2, v6, v2
1184; GFX9-NEXT:    v_pk_ashrrev_i16 v3, v7, v3
1185; GFX9-NEXT:    s_setpc_b64 s[30:31]
1186;
1187; GFX10PLUS-LABEL: v_ashr_v8i16:
1188; GFX10PLUS:       ; %bb.0:
1189; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v0, v4, v0
1191; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v1, v5, v1
1192; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v2, v6, v2
1193; GFX10PLUS-NEXT:    v_pk_ashrrev_i16 v3, v7, v3
1194; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1195  %result = ashr <8 x i16> %value, %amount
1196  %cast = bitcast <8 x i16> %result to <4 x float>
1197  ret <4 x float> %cast
1198}
1199
1200define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1201; GFX6-LABEL: s_ashr_v8i16:
1202; GFX6:       ; %bb.0:
1203; GFX6-NEXT:    s_sext_i32_i16 s1, s1
1204; GFX6-NEXT:    s_sext_i32_i16 s0, s0
1205; GFX6-NEXT:    s_ashr_i32 s1, s1, s9
1206; GFX6-NEXT:    s_ashr_i32 s0, s0, s8
1207; GFX6-NEXT:    s_sext_i32_i16 s2, s2
1208; GFX6-NEXT:    s_sext_i32_i16 s3, s3
1209; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
1210; GFX6-NEXT:    s_ashr_i32 s2, s2, s10
1211; GFX6-NEXT:    s_ashr_i32 s3, s3, s11
1212; GFX6-NEXT:    s_sext_i32_i16 s5, s5
1213; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
1214; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1215; GFX6-NEXT:    s_sext_i32_i16 s4, s4
1216; GFX6-NEXT:    s_ashr_i32 s5, s5, s13
1217; GFX6-NEXT:    s_sext_i32_i16 s7, s7
1218; GFX6-NEXT:    s_or_b32 s0, s0, s1
1219; GFX6-NEXT:    s_and_b32 s1, s2, 0xffff
1220; GFX6-NEXT:    s_and_b32 s2, s3, 0xffff
1221; GFX6-NEXT:    s_ashr_i32 s4, s4, s12
1222; GFX6-NEXT:    s_sext_i32_i16 s6, s6
1223; GFX6-NEXT:    s_ashr_i32 s7, s7, s15
1224; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1225; GFX6-NEXT:    s_and_b32 s3, s5, 0xffff
1226; GFX6-NEXT:    s_ashr_i32 s6, s6, s14
1227; GFX6-NEXT:    s_or_b32 s1, s1, s2
1228; GFX6-NEXT:    s_and_b32 s2, s4, 0xffff
1229; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
1230; GFX6-NEXT:    s_and_b32 s4, s7, 0xffff
1231; GFX6-NEXT:    s_or_b32 s2, s2, s3
1232; GFX6-NEXT:    s_and_b32 s3, s6, 0xffff
1233; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
1234; GFX6-NEXT:    s_or_b32 s3, s3, s4
1235; GFX6-NEXT:    ; return to shader part epilog
1236;
1237; GFX8-LABEL: s_ashr_v8i16:
1238; GFX8:       ; %bb.0:
1239; GFX8-NEXT:    s_sext_i32_i16 s8, s0
1240; GFX8-NEXT:    s_bfe_i32 s0, s0, 0x100010
1241; GFX8-NEXT:    s_sext_i32_i16 s9, s1
1242; GFX8-NEXT:    s_bfe_i32 s1, s1, 0x100010
1243; GFX8-NEXT:    s_sext_i32_i16 s12, s4
1244; GFX8-NEXT:    s_bfe_i32 s4, s4, 0x100010
1245; GFX8-NEXT:    s_sext_i32_i16 s13, s5
1246; GFX8-NEXT:    s_bfe_i32 s5, s5, 0x100010
1247; GFX8-NEXT:    s_sext_i32_i16 s10, s2
1248; GFX8-NEXT:    s_bfe_i32 s2, s2, 0x100010
1249; GFX8-NEXT:    s_sext_i32_i16 s14, s6
1250; GFX8-NEXT:    s_bfe_i32 s6, s6, 0x100010
1251; GFX8-NEXT:    s_ashr_i32 s0, s0, s4
1252; GFX8-NEXT:    s_ashr_i32 s4, s9, s13
1253; GFX8-NEXT:    s_ashr_i32 s1, s1, s5
1254; GFX8-NEXT:    s_sext_i32_i16 s11, s3
1255; GFX8-NEXT:    s_bfe_i32 s3, s3, 0x100010
1256; GFX8-NEXT:    s_sext_i32_i16 s15, s7
1257; GFX8-NEXT:    s_bfe_i32 s7, s7, 0x100010
1258; GFX8-NEXT:    s_ashr_i32 s5, s10, s14
1259; GFX8-NEXT:    s_ashr_i32 s2, s2, s6
1260; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
1261; GFX8-NEXT:    s_and_b32 s4, s4, 0xffff
1262; GFX8-NEXT:    s_ashr_i32 s8, s8, s12
1263; GFX8-NEXT:    s_ashr_i32 s6, s11, s15
1264; GFX8-NEXT:    s_ashr_i32 s3, s3, s7
1265; GFX8-NEXT:    s_or_b32 s1, s1, s4
1266; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
1267; GFX8-NEXT:    s_and_b32 s4, s5, 0xffff
1268; GFX8-NEXT:    s_lshl_b32 s0, s0, 16
1269; GFX8-NEXT:    s_and_b32 s7, s8, 0xffff
1270; GFX8-NEXT:    s_or_b32 s2, s2, s4
1271; GFX8-NEXT:    s_lshl_b32 s3, s3, 16
1272; GFX8-NEXT:    s_and_b32 s4, s6, 0xffff
1273; GFX8-NEXT:    s_or_b32 s0, s0, s7
1274; GFX8-NEXT:    s_or_b32 s3, s3, s4
1275; GFX8-NEXT:    ; return to shader part epilog
1276;
1277; GFX9-LABEL: s_ashr_v8i16:
1278; GFX9:       ; %bb.0:
1279; GFX9-NEXT:    s_sext_i32_i16 s8, s0
1280; GFX9-NEXT:    s_ashr_i32 s0, s0, 16
1281; GFX9-NEXT:    s_sext_i32_i16 s9, s4
1282; GFX9-NEXT:    s_ashr_i32 s4, s4, 16
1283; GFX9-NEXT:    s_ashr_i32 s8, s8, s9
1284; GFX9-NEXT:    s_ashr_i32 s0, s0, s4
1285; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s8, s0
1286; GFX9-NEXT:    s_sext_i32_i16 s4, s1
1287; GFX9-NEXT:    s_ashr_i32 s1, s1, 16
1288; GFX9-NEXT:    s_sext_i32_i16 s8, s5
1289; GFX9-NEXT:    s_ashr_i32 s5, s5, 16
1290; GFX9-NEXT:    s_ashr_i32 s4, s4, s8
1291; GFX9-NEXT:    s_ashr_i32 s1, s1, s5
1292; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s4, s1
1293; GFX9-NEXT:    s_sext_i32_i16 s4, s2
1294; GFX9-NEXT:    s_ashr_i32 s2, s2, 16
1295; GFX9-NEXT:    s_sext_i32_i16 s5, s6
1296; GFX9-NEXT:    s_ashr_i32 s6, s6, 16
1297; GFX9-NEXT:    s_ashr_i32 s4, s4, s5
1298; GFX9-NEXT:    s_ashr_i32 s2, s2, s6
1299; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s4, s2
1300; GFX9-NEXT:    s_sext_i32_i16 s4, s3
1301; GFX9-NEXT:    s_ashr_i32 s3, s3, 16
1302; GFX9-NEXT:    s_sext_i32_i16 s5, s7
1303; GFX9-NEXT:    s_ashr_i32 s6, s7, 16
1304; GFX9-NEXT:    s_ashr_i32 s4, s4, s5
1305; GFX9-NEXT:    s_ashr_i32 s3, s3, s6
1306; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s4, s3
1307; GFX9-NEXT:    ; return to shader part epilog
1308;
1309; GFX10PLUS-LABEL: s_ashr_v8i16:
1310; GFX10PLUS:       ; %bb.0:
1311; GFX10PLUS-NEXT:    s_sext_i32_i16 s8, s0
1312; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, 16
1313; GFX10PLUS-NEXT:    s_sext_i32_i16 s9, s4
1314; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, 16
1315; GFX10PLUS-NEXT:    s_ashr_i32 s8, s8, s9
1316; GFX10PLUS-NEXT:    s_ashr_i32 s0, s0, s4
1317; GFX10PLUS-NEXT:    s_sext_i32_i16 s4, s1
1318; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, 16
1319; GFX10PLUS-NEXT:    s_sext_i32_i16 s9, s5
1320; GFX10PLUS-NEXT:    s_ashr_i32 s5, s5, 16
1321; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, s9
1322; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, s5
1323; GFX10PLUS-NEXT:    s_sext_i32_i16 s5, s6
1324; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s1, s4, s1
1325; GFX10PLUS-NEXT:    s_sext_i32_i16 s4, s2
1326; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, 16
1327; GFX10PLUS-NEXT:    s_ashr_i32 s6, s6, 16
1328; GFX10PLUS-NEXT:    s_ashr_i32 s4, s4, s5
1329; GFX10PLUS-NEXT:    s_ashr_i32 s2, s2, s6
1330; GFX10PLUS-NEXT:    s_sext_i32_i16 s5, s3
1331; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, 16
1332; GFX10PLUS-NEXT:    s_sext_i32_i16 s6, s7
1333; GFX10PLUS-NEXT:    s_ashr_i32 s7, s7, 16
1334; GFX10PLUS-NEXT:    s_ashr_i32 s5, s5, s6
1335; GFX10PLUS-NEXT:    s_ashr_i32 s3, s3, s7
1336; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s8, s0
1337; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s2, s4, s2
1338; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s3, s5, s3
1339; GFX10PLUS-NEXT:    ; return to shader part epilog
1340  %result = ashr <8 x i16> %value, %amount
1341  %cast = bitcast <8 x i16> %result to <4 x i32>
1342  ret <4 x i32> %cast
1343}
1344
1345define i64 @v_ashr_i64(i64 %value, i64 %amount) {
1346; GFX6-LABEL: v_ashr_i64:
1347; GFX6:       ; %bb.0:
1348; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], v2
1350; GFX6-NEXT:    s_setpc_b64 s[30:31]
1351;
1352; GFX8-LABEL: v_ashr_i64:
1353; GFX8:       ; %bb.0:
1354; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
1356; GFX8-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; GFX9-LABEL: v_ashr_i64:
1359; GFX9:       ; %bb.0:
1360; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
1362; GFX9-NEXT:    s_setpc_b64 s[30:31]
1363;
1364; GFX10PLUS-LABEL: v_ashr_i64:
1365; GFX10PLUS:       ; %bb.0:
1366; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
1368; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1369  %result = ashr i64 %value, %amount
1370  ret i64 %result
1371}
1372
1373define i64 @v_ashr_i64_63(i64 %value) {
1374; GCN-LABEL: v_ashr_i64_63:
1375; GCN:       ; %bb.0:
1376; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1377; GCN-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
1378; GCN-NEXT:    v_mov_b32_e32 v1, v0
1379; GCN-NEXT:    s_setpc_b64 s[30:31]
1380;
1381; GFX10PLUS-LABEL: v_ashr_i64_63:
1382; GFX10PLUS:       ; %bb.0:
1383; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, 31, v1
1385; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
1386; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1387  %result = ashr i64 %value, 63
1388  ret i64 %result
1389}
1390
1391define i64 @v_ashr_i64_33(i64 %value) {
1392; GCN-LABEL: v_ashr_i64_33:
1393; GCN:       ; %bb.0:
1394; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1395; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1396; GCN-NEXT:    v_ashrrev_i32_e32 v0, 1, v1
1397; GCN-NEXT:    v_mov_b32_e32 v1, v2
1398; GCN-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX10PLUS-LABEL: v_ashr_i64_33:
1401; GFX10PLUS:       ; %bb.0:
1402; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v0, 1, v1
1404; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, 31, v1
1405; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1406  %result = ashr i64 %value, 33
1407  ret i64 %result
1408}
1409
1410define i64 @v_ashr_i64_32(i64 %value) {
1411; GCN-LABEL: v_ashr_i64_32:
1412; GCN:       ; %bb.0:
1413; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414; GCN-NEXT:    v_mov_b32_e32 v0, v1
1415; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1416; GCN-NEXT:    s_setpc_b64 s[30:31]
1417;
1418; GFX10PLUS-LABEL: v_ashr_i64_32:
1419; GFX10PLUS:       ; %bb.0:
1420; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, v1
1422; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
1423; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1424  %result = ashr i64 %value, 32
1425  ret i64 %result
1426}
1427
1428define i64 @v_ashr_i64_31(i64 %value) {
1429; GFX6-LABEL: v_ashr_i64_31:
1430; GFX6:       ; %bb.0:
1431; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], 31
1433; GFX6-NEXT:    s_setpc_b64 s[30:31]
1434;
1435; GFX8-LABEL: v_ashr_i64_31:
1436; GFX8:       ; %bb.0:
1437; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438; GFX8-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1439; GFX8-NEXT:    s_setpc_b64 s[30:31]
1440;
1441; GFX9-LABEL: v_ashr_i64_31:
1442; GFX9:       ; %bb.0:
1443; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1445; GFX9-NEXT:    s_setpc_b64 s[30:31]
1446;
1447; GFX10PLUS-LABEL: v_ashr_i64_31:
1448; GFX10PLUS:       ; %bb.0:
1449; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1451; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1452  %result = ashr i64 %value, 31
1453  ret i64 %result
1454}
1455
1456define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) {
1457; GCN-LABEL: s_ashr_i64:
1458; GCN:       ; %bb.0:
1459; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
1460; GCN-NEXT:    ; return to shader part epilog
1461;
1462; GFX10PLUS-LABEL: s_ashr_i64:
1463; GFX10PLUS:       ; %bb.0:
1464; GFX10PLUS-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
1465; GFX10PLUS-NEXT:    ; return to shader part epilog
1466  %result = ashr i64 %value, %amount
1467  ret i64 %result
1468}
1469
1470define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) {
1471; GCN-LABEL: s_ashr_i64_63:
1472; GCN:       ; %bb.0:
1473; GCN-NEXT:    s_ashr_i32 s0, s1, 31
1474; GCN-NEXT:    s_mov_b32 s1, s0
1475; GCN-NEXT:    ; return to shader part epilog
1476;
1477; GFX10PLUS-LABEL: s_ashr_i64_63:
1478; GFX10PLUS:       ; %bb.0:
1479; GFX10PLUS-NEXT:    s_ashr_i32 s0, s1, 31
1480; GFX10PLUS-NEXT:    s_mov_b32 s1, s0
1481; GFX10PLUS-NEXT:    ; return to shader part epilog
1482  %result = ashr i64 %value, 63
1483  ret i64 %result
1484}
1485
1486define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) {
1487; GCN-LABEL: s_ashr_i64_33:
1488; GCN:       ; %bb.0:
1489; GCN-NEXT:    s_ashr_i32 s2, s1, 31
1490; GCN-NEXT:    s_ashr_i32 s0, s1, 1
1491; GCN-NEXT:    s_mov_b32 s1, s2
1492; GCN-NEXT:    ; return to shader part epilog
1493;
1494; GFX10PLUS-LABEL: s_ashr_i64_33:
1495; GFX10PLUS:       ; %bb.0:
1496; GFX10PLUS-NEXT:    s_ashr_i32 s0, s1, 1
1497; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, 31
1498; GFX10PLUS-NEXT:    ; return to shader part epilog
1499  %result = ashr i64 %value, 33
1500  ret i64 %result
1501}
1502
1503define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) {
1504; GCN-LABEL: s_ashr_i64_32:
1505; GCN:       ; %bb.0:
1506; GCN-NEXT:    s_mov_b32 s0, s1
1507; GCN-NEXT:    s_ashr_i32 s1, s1, 31
1508; GCN-NEXT:    ; return to shader part epilog
1509;
1510; GFX10PLUS-LABEL: s_ashr_i64_32:
1511; GFX10PLUS:       ; %bb.0:
1512; GFX10PLUS-NEXT:    s_mov_b32 s0, s1
1513; GFX10PLUS-NEXT:    s_ashr_i32 s1, s1, 31
1514; GFX10PLUS-NEXT:    ; return to shader part epilog
1515  %result = ashr i64 %value, 32
1516  ret i64 %result
1517}
1518
1519define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) {
1520; GCN-LABEL: s_ashr_i64_31:
1521; GCN:       ; %bb.0:
1522; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], 31
1523; GCN-NEXT:    ; return to shader part epilog
1524;
1525; GFX10PLUS-LABEL: s_ashr_i64_31:
1526; GFX10PLUS:       ; %bb.0:
1527; GFX10PLUS-NEXT:    s_ashr_i64 s[0:1], s[0:1], 31
1528; GFX10PLUS-NEXT:    ; return to shader part epilog
1529  %result = ashr i64 %value, 31
1530  ret i64 %result
1531}
1532
1533define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) {
1534; GFX6-LABEL: ashr_i64_sv:
1535; GFX6:       ; %bb.0:
1536; GFX6-NEXT:    v_ashr_i64 v[0:1], s[0:1], v0
1537; GFX6-NEXT:    ; return to shader part epilog
1538;
1539; GFX8-LABEL: ashr_i64_sv:
1540; GFX8:       ; %bb.0:
1541; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v0, s[0:1]
1542; GFX8-NEXT:    ; return to shader part epilog
1543;
1544; GFX9-LABEL: ashr_i64_sv:
1545; GFX9:       ; %bb.0:
1546; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v0, s[0:1]
1547; GFX9-NEXT:    ; return to shader part epilog
1548;
1549; GFX10PLUS-LABEL: ashr_i64_sv:
1550; GFX10PLUS:       ; %bb.0:
1551; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], v0, s[0:1]
1552; GFX10PLUS-NEXT:    ; return to shader part epilog
1553  %result = ashr i64 %value, %amount
1554  %cast = bitcast i64 %result to <2 x float>
1555  ret <2 x float> %cast
1556}
1557
1558define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) {
1559; GFX6-LABEL: ashr_i64_vs:
1560; GFX6:       ; %bb.0:
1561; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], s0
1562; GFX6-NEXT:    ; return to shader part epilog
1563;
1564; GFX8-LABEL: ashr_i64_vs:
1565; GFX8:       ; %bb.0:
1566; GFX8-NEXT:    v_ashrrev_i64 v[0:1], s0, v[0:1]
1567; GFX8-NEXT:    ; return to shader part epilog
1568;
1569; GFX9-LABEL: ashr_i64_vs:
1570; GFX9:       ; %bb.0:
1571; GFX9-NEXT:    v_ashrrev_i64 v[0:1], s0, v[0:1]
1572; GFX9-NEXT:    ; return to shader part epilog
1573;
1574; GFX10PLUS-LABEL: ashr_i64_vs:
1575; GFX10PLUS:       ; %bb.0:
1576; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], s0, v[0:1]
1577; GFX10PLUS-NEXT:    ; return to shader part epilog
1578  %result = ashr i64 %value, %amount
1579  %cast = bitcast i64 %result to <2 x float>
1580  ret <2 x float> %cast
1581}
1582
1583define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1584; GFX6-LABEL: v_ashr_v2i64:
1585; GFX6:       ; %bb.0:
1586; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], v4
1588; GFX6-NEXT:    v_ashr_i64 v[2:3], v[2:3], v6
1589; GFX6-NEXT:    s_setpc_b64 s[30:31]
1590;
1591; GFX8-LABEL: v_ashr_v2i64:
1592; GFX8:       ; %bb.0:
1593; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1594; GFX8-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
1595; GFX8-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
1596; GFX8-NEXT:    s_setpc_b64 s[30:31]
1597;
1598; GFX9-LABEL: v_ashr_v2i64:
1599; GFX9:       ; %bb.0:
1600; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; GFX9-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
1602; GFX9-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
1603; GFX9-NEXT:    s_setpc_b64 s[30:31]
1604;
1605; GFX10PLUS-LABEL: v_ashr_v2i64:
1606; GFX10PLUS:       ; %bb.0:
1607; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], v4, v[0:1]
1609; GFX10PLUS-NEXT:    v_ashrrev_i64 v[2:3], v6, v[2:3]
1610; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1611  %result = ashr <2 x i64> %value, %amount
1612  ret <2 x i64> %result
1613}
1614
1615define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) {
1616; GFX6-LABEL: v_ashr_v2i64_31:
1617; GFX6:       ; %bb.0:
1618; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619; GFX6-NEXT:    v_ashr_i64 v[0:1], v[0:1], 31
1620; GFX6-NEXT:    v_ashr_i64 v[2:3], v[2:3], 31
1621; GFX6-NEXT:    s_setpc_b64 s[30:31]
1622;
1623; GFX8-LABEL: v_ashr_v2i64_31:
1624; GFX8:       ; %bb.0:
1625; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626; GFX8-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1627; GFX8-NEXT:    v_ashrrev_i64 v[2:3], 31, v[2:3]
1628; GFX8-NEXT:    s_setpc_b64 s[30:31]
1629;
1630; GFX9-LABEL: v_ashr_v2i64_31:
1631; GFX9:       ; %bb.0:
1632; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1633; GFX9-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1634; GFX9-NEXT:    v_ashrrev_i64 v[2:3], 31, v[2:3]
1635; GFX9-NEXT:    s_setpc_b64 s[30:31]
1636;
1637; GFX10PLUS-LABEL: v_ashr_v2i64_31:
1638; GFX10PLUS:       ; %bb.0:
1639; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640; GFX10PLUS-NEXT:    v_ashrrev_i64 v[0:1], 31, v[0:1]
1641; GFX10PLUS-NEXT:    v_ashrrev_i64 v[2:3], 31, v[2:3]
1642; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1643  %result = ashr <2 x i64> %value, <i64 31, i64 31>
1644  ret <2 x i64> %result
1645}
1646
1647define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1648; GCN-LABEL: s_ashr_v2i64:
1649; GCN:       ; %bb.0:
1650; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], s4
1651; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], s6
1652; GCN-NEXT:    ; return to shader part epilog
1653;
1654; GFX10PLUS-LABEL: s_ashr_v2i64:
1655; GFX10PLUS:       ; %bb.0:
1656; GFX10PLUS-NEXT:    s_ashr_i64 s[0:1], s[0:1], s4
1657; GFX10PLUS-NEXT:    s_ashr_i64 s[2:3], s[2:3], s6
1658; GFX10PLUS-NEXT:    ; return to shader part epilog
1659  %result = ashr <2 x i64> %value, %amount
1660  ret <2 x i64> %result
1661}
1662
1663define i65 @v_ashr_i65(i65 %value, i65 %amount) {
1664; GFX6-LABEL: v_ashr_i65:
1665; GFX6:       ; %bb.0:
1666; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1667; GFX6-NEXT:    v_bfe_i32 v4, v2, 0, 1
1668; GFX6-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
1669; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, 64, v3
1670; GFX6-NEXT:    v_lshr_b64 v[6:7], v[0:1], v3
1671; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
1672; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffc0, v3
1673; GFX6-NEXT:    v_ashr_i64 v[10:11], v[4:5], v3
1674; GFX6-NEXT:    v_or_b32_e32 v6, v6, v8
1675; GFX6-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
1676; GFX6-NEXT:    v_ashr_i64 v[4:5], v[4:5], v2
1677; GFX6-NEXT:    v_or_b32_e32 v7, v7, v9
1678; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1679; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
1680; GFX6-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
1681; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1682; GFX6-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1683; GFX6-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1684; GFX6-NEXT:    v_cndmask_b32_e32 v2, v8, v10, vcc
1685; GFX6-NEXT:    s_setpc_b64 s[30:31]
1686;
1687; GFX8-LABEL: v_ashr_i65:
1688; GFX8:       ; %bb.0:
1689; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1690; GFX8-NEXT:    v_bfe_i32 v4, v2, 0, 1
1691; GFX8-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
1692; GFX8-NEXT:    v_sub_u32_e32 v8, vcc, 64, v3
1693; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
1694; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
1695; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffc0, v3
1696; GFX8-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
1697; GFX8-NEXT:    v_or_b32_e32 v6, v6, v8
1698; GFX8-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
1699; GFX8-NEXT:    v_ashrrev_i64 v[4:5], v2, v[4:5]
1700; GFX8-NEXT:    v_or_b32_e32 v7, v7, v9
1701; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1702; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
1703; GFX8-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
1704; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1705; GFX8-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1706; GFX8-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1707; GFX8-NEXT:    v_cndmask_b32_e32 v2, v8, v10, vcc
1708; GFX8-NEXT:    s_setpc_b64 s[30:31]
1709;
1710; GFX9-LABEL: v_ashr_i65:
1711; GFX9:       ; %bb.0:
1712; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713; GFX9-NEXT:    v_bfe_i32 v4, v2, 0, 1
1714; GFX9-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
1715; GFX9-NEXT:    v_sub_u32_e32 v8, 64, v3
1716; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
1717; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v8, v[4:5]
1718; GFX9-NEXT:    v_add_u32_e32 v2, 0xffffffc0, v3
1719; GFX9-NEXT:    v_ashrrev_i64 v[10:11], v3, v[4:5]
1720; GFX9-NEXT:    v_or_b32_e32 v6, v6, v8
1721; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v5
1722; GFX9-NEXT:    v_ashrrev_i64 v[4:5], v2, v[4:5]
1723; GFX9-NEXT:    v_or_b32_e32 v7, v7, v9
1724; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1725; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
1726; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v7, vcc
1727; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v3
1728; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
1729; GFX9-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1730; GFX9-NEXT:    v_cndmask_b32_e32 v2, v8, v10, vcc
1731; GFX9-NEXT:    s_setpc_b64 s[30:31]
1732;
1733; GFX10-LABEL: v_ashr_i65:
1734; GFX10:       ; %bb.0:
1735; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736; GFX10-NEXT:    v_bfe_i32 v4, v2, 0, 1
1737; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
1738; GFX10-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
1739; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
1740; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
1741; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
1742; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 0, v3
1743; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
1744; GFX10-NEXT:    v_ashrrev_i64 v[10:11], v10, v[4:5]
1745; GFX10-NEXT:    v_or_b32_e32 v2, v6, v8
1746; GFX10-NEXT:    v_or_b32_e32 v8, v7, v9
1747; GFX10-NEXT:    v_ashrrev_i64 v[6:7], v3, v[4:5]
1748; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
1749; GFX10-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
1750; GFX10-NEXT:    v_cndmask_b32_e32 v4, v11, v8, vcc_lo
1751; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s4
1752; GFX10-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s4
1753; GFX10-NEXT:    v_cndmask_b32_e32 v2, v3, v6, vcc_lo
1754; GFX10-NEXT:    s_setpc_b64 s[30:31]
1755;
1756; GFX11-LABEL: v_ashr_i65:
1757; GFX11:       ; %bb.0:
1758; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759; GFX11-NEXT:    v_bfe_i32 v4, v2, 0, 1
1760; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 64, v3
1761; GFX11-NEXT:    v_add_nc_u32_e32 v10, 0xffffffc0, v3
1762; GFX11-NEXT:    v_lshrrev_b64 v[6:7], v3, v[0:1]
1763; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
1764; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
1765; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 0, v3
1766; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v2, v[4:5]
1767; GFX11-NEXT:    v_ashrrev_i64 v[10:11], v10, v[4:5]
1768; GFX11-NEXT:    v_or_b32_e32 v2, v6, v8
1769; GFX11-NEXT:    v_or_b32_e32 v8, v7, v9
1770; GFX11-NEXT:    v_ashrrev_i64 v[6:7], v3, v[4:5]
1771; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v5
1772; GFX11-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
1773; GFX11-NEXT:    v_cndmask_b32_e32 v4, v11, v8, vcc_lo
1774; GFX11-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s0
1775; GFX11-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s0
1776; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v6, vcc_lo
1777; GFX11-NEXT:    s_setpc_b64 s[30:31]
1778  %result = ashr i65 %value, %amount
1779  ret i65 %result
1780}
1781
1782define i65 @v_ashr_i65_33(i65 %value) {
1783; GFX6-LABEL: v_ashr_i65_33:
1784; GFX6:       ; %bb.0:
1785; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786; GFX6-NEXT:    v_mov_b32_e32 v3, v1
1787; GFX6-NEXT:    v_bfe_i32 v1, v2, 0, 1
1788; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1789; GFX6-NEXT:    v_lshl_b64 v[0:1], v[1:2], 31
1790; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
1791; GFX6-NEXT:    v_or_b32_e32 v0, v3, v0
1792; GFX6-NEXT:    v_ashrrev_i32_e32 v2, 1, v2
1793; GFX6-NEXT:    s_setpc_b64 s[30:31]
1794;
1795; GFX8-LABEL: v_ashr_i65_33:
1796; GFX8:       ; %bb.0:
1797; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1798; GFX8-NEXT:    v_mov_b32_e32 v3, v1
1799; GFX8-NEXT:    v_bfe_i32 v1, v2, 0, 1
1800; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1801; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[1:2]
1802; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
1803; GFX8-NEXT:    v_or_b32_e32 v0, v3, v0
1804; GFX8-NEXT:    v_ashrrev_i32_e32 v2, 1, v2
1805; GFX8-NEXT:    s_setpc_b64 s[30:31]
1806;
1807; GFX9-LABEL: v_ashr_i65_33:
1808; GFX9:       ; %bb.0:
1809; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1810; GFX9-NEXT:    v_mov_b32_e32 v3, v1
1811; GFX9-NEXT:    v_bfe_i32 v1, v2, 0, 1
1812; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1813; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[1:2]
1814; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
1815; GFX9-NEXT:    v_or_b32_e32 v0, v3, v0
1816; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 1, v2
1817; GFX9-NEXT:    s_setpc_b64 s[30:31]
1818;
1819; GFX10PLUS-LABEL: v_ashr_i65_33:
1820; GFX10PLUS:       ; %bb.0:
1821; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1822; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v1
1823; GFX10PLUS-NEXT:    v_bfe_i32 v1, v2, 0, 1
1824; GFX10PLUS-NEXT:    v_lshrrev_b32_e32 v3, 1, v3
1825; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
1826; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], 31, v[1:2]
1827; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v2, 1, v2
1828; GFX10PLUS-NEXT:    v_or_b32_e32 v0, v3, v0
1829; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1830  %result = ashr i65 %value, 33
1831  ret i65 %result
1832}
1833
1834define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
1835; GCN-LABEL: s_ashr_i65:
1836; GCN:       ; %bb.0:
1837; GCN-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x10000
1838; GCN-NEXT:    s_sub_i32 s10, s3, 64
1839; GCN-NEXT:    s_sub_i32 s8, 64, s3
1840; GCN-NEXT:    s_cmp_lt_u32 s3, 64
1841; GCN-NEXT:    s_cselect_b32 s11, 1, 0
1842; GCN-NEXT:    s_cmp_eq_u32 s3, 0
1843; GCN-NEXT:    s_cselect_b32 s12, 1, 0
1844; GCN-NEXT:    s_ashr_i64 s[6:7], s[4:5], s3
1845; GCN-NEXT:    s_lshr_b64 s[2:3], s[0:1], s3
1846; GCN-NEXT:    s_lshl_b64 s[8:9], s[4:5], s8
1847; GCN-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
1848; GCN-NEXT:    s_ashr_i32 s7, s5, 31
1849; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], s10
1850; GCN-NEXT:    s_cmp_lg_u32 s11, 0
1851; GCN-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
1852; GCN-NEXT:    s_cmp_lg_u32 s12, 0
1853; GCN-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
1854; GCN-NEXT:    s_cmp_lg_u32 s11, 0
1855; GCN-NEXT:    s_cselect_b32 s2, s6, s7
1856; GCN-NEXT:    ; return to shader part epilog
1857;
1858; GFX10PLUS-LABEL: s_ashr_i65:
1859; GFX10PLUS:       ; %bb.0:
1860; GFX10PLUS-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x10000
1861; GFX10PLUS-NEXT:    s_sub_i32 s10, s3, 64
1862; GFX10PLUS-NEXT:    s_sub_i32 s2, 64, s3
1863; GFX10PLUS-NEXT:    s_cmp_lt_u32 s3, 64
1864; GFX10PLUS-NEXT:    s_cselect_b32 s11, 1, 0
1865; GFX10PLUS-NEXT:    s_cmp_eq_u32 s3, 0
1866; GFX10PLUS-NEXT:    s_cselect_b32 s12, 1, 0
1867; GFX10PLUS-NEXT:    s_lshr_b64 s[6:7], s[0:1], s3
1868; GFX10PLUS-NEXT:    s_lshl_b64 s[8:9], s[4:5], s2
1869; GFX10PLUS-NEXT:    s_ashr_i64 s[2:3], s[4:5], s3
1870; GFX10PLUS-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
1871; GFX10PLUS-NEXT:    s_ashr_i32 s3, s5, 31
1872; GFX10PLUS-NEXT:    s_ashr_i64 s[4:5], s[4:5], s10
1873; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
1874; GFX10PLUS-NEXT:    s_cselect_b64 s[4:5], s[6:7], s[4:5]
1875; GFX10PLUS-NEXT:    s_cmp_lg_u32 s12, 0
1876; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
1877; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
1878; GFX10PLUS-NEXT:    s_cselect_b32 s2, s2, s3
1879; GFX10PLUS-NEXT:    ; return to shader part epilog
1880  %result = ashr i65 %value, %amount
1881  ret i65 %result
1882}
1883
1884define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
1885; GCN-LABEL: s_ashr_i65_33:
1886; GCN:       ; %bb.0:
1887; GCN-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
1888; GCN-NEXT:    s_lshr_b32 s0, s1, 1
1889; GCN-NEXT:    s_mov_b32 s1, 0
1890; GCN-NEXT:    s_lshl_b64 s[4:5], s[2:3], 31
1891; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
1892; GCN-NEXT:    s_ashr_i32 s2, s3, 1
1893; GCN-NEXT:    ; return to shader part epilog
1894;
1895; GFX10PLUS-LABEL: s_ashr_i65_33:
1896; GFX10PLUS:       ; %bb.0:
1897; GFX10PLUS-NEXT:    s_bfe_i64 s[2:3], s[2:3], 0x10000
1898; GFX10PLUS-NEXT:    s_lshr_b32 s0, s1, 1
1899; GFX10PLUS-NEXT:    s_mov_b32 s1, 0
1900; GFX10PLUS-NEXT:    s_lshl_b64 s[4:5], s[2:3], 31
1901; GFX10PLUS-NEXT:    s_ashr_i32 s2, s3, 1
1902; GFX10PLUS-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
1903; GFX10PLUS-NEXT:    ; return to shader part epilog
1904  %result = ashr i65 %value, 33
1905  ret i65 %result
1906}
1907
1908; FIXME: Argument lowering asserts
1909; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1910;   %result = ashr <2 x i65> %value, %amount
1911;   ret <2 x i65> %result
1912; }
1913
1914; define amdgpu_ps <2 x i65> @s_ashr_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1915;   %result = ashr <2 x i65> %value, %amount
1916;   ret <2 x i65> %result
1917; }
1918