xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (revision 17f3e00911b860d535f41185e605c47babcc2039)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
7
8define i8 @v_shl_i8(i8 %value, i8 %amount) {
9; GFX6-LABEL: v_shl_i8:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
13; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
14; GFX6-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX8-LABEL: v_shl_i8:
17; GFX8:       ; %bb.0:
18; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
20; GFX8-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX9-LABEL: v_shl_i8:
23; GFX9:       ; %bb.0:
24; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
26; GFX9-NEXT:    s_setpc_b64 s[30:31]
27;
28; GFX10PLUS-LABEL: v_shl_i8:
29; GFX10PLUS:       ; %bb.0:
30; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX10PLUS-NEXT:    v_and_b32_e32 v1, 0xff, v1
32; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, v1, v0
33; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
34  %result = shl i8 %value, %amount
35  ret i8 %result
36}
37
38define i8 @v_shl_i8_7(i8 %value) {
39; GFX6-LABEL: v_shl_i8_7:
40; GFX6:       ; %bb.0:
41; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
43; GFX6-NEXT:    s_setpc_b64 s[30:31]
44;
45; GFX8-LABEL: v_shl_i8_7:
46; GFX8:       ; %bb.0:
47; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 7, v0
49; GFX8-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX9-LABEL: v_shl_i8_7:
52; GFX9:       ; %bb.0:
53; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 7, v0
55; GFX9-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX10PLUS-LABEL: v_shl_i8_7:
58; GFX10PLUS:       ; %bb.0:
59; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, 7, v0
61; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
62  %result = shl i8 %value, 7
63  ret i8 %result
64}
65
66define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) {
67; GFX6-LABEL: s_shl_i8:
68; GFX6:       ; %bb.0:
69; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
70; GFX6-NEXT:    ; return to shader part epilog
71;
72; GFX8-LABEL: s_shl_i8:
73; GFX8:       ; %bb.0:
74; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
75; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
76; GFX8-NEXT:    ; return to shader part epilog
77;
78; GFX9-LABEL: s_shl_i8:
79; GFX9:       ; %bb.0:
80; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
81; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
82; GFX9-NEXT:    ; return to shader part epilog
83;
84; GFX10PLUS-LABEL: s_shl_i8:
85; GFX10PLUS:       ; %bb.0:
86; GFX10PLUS-NEXT:    s_and_b32 s0, s0, 0xff
87; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s1
88; GFX10PLUS-NEXT:    ; return to shader part epilog
89  %result = shl i8 %value, %amount
90  ret i8 %result
91}
92
93define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
94; GCN-LABEL: s_shl_i8_7:
95; GCN:       ; %bb.0:
96; GCN-NEXT:    s_lshl_b32 s0, s0, 7
97; GCN-NEXT:    ; return to shader part epilog
98;
99; GFX10PLUS-LABEL: s_shl_i8_7:
100; GFX10PLUS:       ; %bb.0:
101; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, 7
102; GFX10PLUS-NEXT:    ; return to shader part epilog
103  %result = shl i8 %value, 7
104  ret i8 %result
105}
106
107
108define i24 @v_shl_i24(i24 %value, i24 %amount) {
109; GCN-LABEL: v_shl_i24:
110; GCN:       ; %bb.0:
111; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GCN-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
113; GCN-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
114; GCN-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX10PLUS-LABEL: v_shl_i24:
117; GFX10PLUS:       ; %bb.0:
118; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX10PLUS-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
120; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
121; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
122  %result = shl i24 %value, %amount
123  ret i24 %result
124}
125
126define i24 @v_shl_i24_7(i24 %value) {
127; GCN-LABEL: v_shl_i24_7:
128; GCN:       ; %bb.0:
129; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GCN-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
131; GCN-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX10PLUS-LABEL: v_shl_i24_7:
134; GFX10PLUS:       ; %bb.0:
135; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
137; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
138  %result = shl i24 %value, 7
139  ret i24 %result
140}
141
142define amdgpu_ps i24 @s_shl_i24(i24 inreg %value, i24 inreg %amount) {
143; GCN-LABEL: s_shl_i24:
144; GCN:       ; %bb.0:
145; GCN-NEXT:    s_lshl_b32 s0, s0, s1
146; GCN-NEXT:    ; return to shader part epilog
147;
148; GFX10PLUS-LABEL: s_shl_i24:
149; GFX10PLUS:       ; %bb.0:
150; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s1
151; GFX10PLUS-NEXT:    ; return to shader part epilog
152  %result = shl i24 %value, %amount
153  ret i24 %result
154}
155
156define amdgpu_ps i24 @s_shl_i24_7(i24 inreg %value) {
157; GCN-LABEL: s_shl_i24_7:
158; GCN:       ; %bb.0:
159; GCN-NEXT:    s_lshl_b32 s0, s0, 7
160; GCN-NEXT:    ; return to shader part epilog
161;
162; GFX10PLUS-LABEL: s_shl_i24_7:
163; GFX10PLUS:       ; %bb.0:
164; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, 7
165; GFX10PLUS-NEXT:    ; return to shader part epilog
166  %result = shl i24 %value, 7
167  ret i24 %result
168}
169
170define i32 @v_shl_i32(i32 %value, i32 %amount) {
171; GCN-LABEL: v_shl_i32:
172; GCN:       ; %bb.0:
173; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GCN-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
175; GCN-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX10PLUS-LABEL: v_shl_i32:
178; GFX10PLUS:       ; %bb.0:
179; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
181; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
182  %result = shl i32 %value, %amount
183  ret i32 %result
184}
185
186define i32 @v_shl_i32_31(i32 %value) {
187; GCN-LABEL: v_shl_i32_31:
188; GCN:       ; %bb.0:
189; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GCN-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
191; GCN-NEXT:    s_setpc_b64 s[30:31]
192;
193; GFX10PLUS-LABEL: v_shl_i32_31:
194; GFX10PLUS:       ; %bb.0:
195; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
197; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
198  %result = shl i32 %value, 31
199  ret i32 %result
200}
201
202define amdgpu_ps i32 @s_shl_i32(i32 inreg %value, i32 inreg %amount) {
203; GCN-LABEL: s_shl_i32:
204; GCN:       ; %bb.0:
205; GCN-NEXT:    s_lshl_b32 s0, s0, s1
206; GCN-NEXT:    ; return to shader part epilog
207;
208; GFX10PLUS-LABEL: s_shl_i32:
209; GFX10PLUS:       ; %bb.0:
210; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s1
211; GFX10PLUS-NEXT:    ; return to shader part epilog
212  %result = shl i32 %value, %amount
213  ret i32 %result
214}
215
216define amdgpu_ps i32 @s_shl_i32_31(i32 inreg %value) {
217; GCN-LABEL: s_shl_i32_31:
218; GCN:       ; %bb.0:
219; GCN-NEXT:    s_lshl_b32 s0, s0, 31
220; GCN-NEXT:    ; return to shader part epilog
221;
222; GFX10PLUS-LABEL: s_shl_i32_31:
223; GFX10PLUS:       ; %bb.0:
224; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, 31
225; GFX10PLUS-NEXT:    ; return to shader part epilog
226  %result = shl i32 %value, 31
227  ret i32 %result
228}
229
230define amdgpu_ps float @shl_i32_sv(i32 inreg %value, i32 %amount) {
231; GFX6-LABEL: shl_i32_sv:
232; GFX6:       ; %bb.0:
233; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
234; GFX6-NEXT:    ; return to shader part epilog
235;
236; GFX8-LABEL: shl_i32_sv:
237; GFX8:       ; %bb.0:
238; GFX8-NEXT:    v_lshlrev_b32_e64 v0, v0, s0
239; GFX8-NEXT:    ; return to shader part epilog
240;
241; GFX9-LABEL: shl_i32_sv:
242; GFX9:       ; %bb.0:
243; GFX9-NEXT:    v_lshlrev_b32_e64 v0, v0, s0
244; GFX9-NEXT:    ; return to shader part epilog
245;
246; GFX10PLUS-LABEL: shl_i32_sv:
247; GFX10PLUS:       ; %bb.0:
248; GFX10PLUS-NEXT:    v_lshlrev_b32_e64 v0, v0, s0
249; GFX10PLUS-NEXT:    ; return to shader part epilog
250  %result = shl i32 %value, %amount
251  %cast = bitcast i32 %result to float
252  ret float %cast
253}
254
255define amdgpu_ps float @shl_i32_vs(i32 %value, i32 inreg %amount) {
256; GCN-LABEL: shl_i32_vs:
257; GCN:       ; %bb.0:
258; GCN-NEXT:    v_lshlrev_b32_e32 v0, s0, v0
259; GCN-NEXT:    ; return to shader part epilog
260;
261; GFX10PLUS-LABEL: shl_i32_vs:
262; GFX10PLUS:       ; %bb.0:
263; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, s0, v0
264; GFX10PLUS-NEXT:    ; return to shader part epilog
265  %result = shl i32 %value, %amount
266  %cast = bitcast i32 %result to float
267  ret float %cast
268}
269
270define <2 x i32> @v_shl_v2i32(<2 x i32> %value, <2 x i32> %amount) {
271; GCN-LABEL: v_shl_v2i32:
272; GCN:       ; %bb.0:
273; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GCN-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
275; GCN-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
276; GCN-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX10PLUS-LABEL: v_shl_v2i32:
279; GFX10PLUS:       ; %bb.0:
280; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
282; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
283; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
284  %result = shl <2 x i32> %value, %amount
285  ret <2 x i32> %result
286}
287
288define <2 x i32> @v_shl_v2i32_31(<2 x i32> %value) {
289; GCN-LABEL: v_shl_v2i32_31:
290; GCN:       ; %bb.0:
291; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GCN-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
293; GCN-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
294; GCN-NEXT:    s_setpc_b64 s[30:31]
295;
296; GFX10PLUS-LABEL: v_shl_v2i32_31:
297; GFX10PLUS:       ; %bb.0:
298; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, 31, v0
300; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v1, 31, v1
301; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
302  %result = shl <2 x i32> %value, <i32 31, i32 31>
303  ret <2 x i32> %result
304}
305
306define amdgpu_ps <2 x i32> @s_shl_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) {
307; GCN-LABEL: s_shl_v2i32:
308; GCN:       ; %bb.0:
309; GCN-NEXT:    s_lshl_b32 s0, s0, s2
310; GCN-NEXT:    s_lshl_b32 s1, s1, s3
311; GCN-NEXT:    ; return to shader part epilog
312;
313; GFX10PLUS-LABEL: s_shl_v2i32:
314; GFX10PLUS:       ; %bb.0:
315; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s2
316; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s3
317; GFX10PLUS-NEXT:    ; return to shader part epilog
318  %result = shl <2 x i32> %value, %amount
319  ret <2 x i32> %result
320}
321
322define <3 x i32> @v_shl_v3i32(<3 x i32> %value, <3 x i32> %amount) {
323; GCN-LABEL: v_shl_v3i32:
324; GCN:       ; %bb.0:
325; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326; GCN-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
327; GCN-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
328; GCN-NEXT:    v_lshlrev_b32_e32 v2, v5, v2
329; GCN-NEXT:    s_setpc_b64 s[30:31]
330;
331; GFX10PLUS-LABEL: v_shl_v3i32:
332; GFX10PLUS:       ; %bb.0:
333; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
335; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
336; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v2, v5, v2
337; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
338  %result = shl <3 x i32> %value, %amount
339  ret <3 x i32> %result
340}
341
342define amdgpu_ps <3 x i32> @s_shl_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) {
343; GCN-LABEL: s_shl_v3i32:
344; GCN:       ; %bb.0:
345; GCN-NEXT:    s_lshl_b32 s0, s0, s3
346; GCN-NEXT:    s_lshl_b32 s1, s1, s4
347; GCN-NEXT:    s_lshl_b32 s2, s2, s5
348; GCN-NEXT:    ; return to shader part epilog
349;
350; GFX10PLUS-LABEL: s_shl_v3i32:
351; GFX10PLUS:       ; %bb.0:
352; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s3
353; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s4
354; GFX10PLUS-NEXT:    s_lshl_b32 s2, s2, s5
355; GFX10PLUS-NEXT:    ; return to shader part epilog
356  %result = shl <3 x i32> %value, %amount
357  ret <3 x i32> %result
358}
359
360define <4 x i32> @v_shl_v4i32(<4 x i32> %value, <4 x i32> %amount) {
361; GCN-LABEL: v_shl_v4i32:
362; GCN:       ; %bb.0:
363; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GCN-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
365; GCN-NEXT:    v_lshlrev_b32_e32 v1, v5, v1
366; GCN-NEXT:    v_lshlrev_b32_e32 v2, v6, v2
367; GCN-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
368; GCN-NEXT:    s_setpc_b64 s[30:31]
369;
370; GFX10PLUS-LABEL: v_shl_v4i32:
371; GFX10PLUS:       ; %bb.0:
372; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
374; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v1, v5, v1
375; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v2, v6, v2
376; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
377; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
378  %result = shl <4 x i32> %value, %amount
379  ret <4 x i32> %result
380}
381
382define amdgpu_ps <4 x i32> @s_shl_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) {
383; GCN-LABEL: s_shl_v4i32:
384; GCN:       ; %bb.0:
385; GCN-NEXT:    s_lshl_b32 s0, s0, s4
386; GCN-NEXT:    s_lshl_b32 s1, s1, s5
387; GCN-NEXT:    s_lshl_b32 s2, s2, s6
388; GCN-NEXT:    s_lshl_b32 s3, s3, s7
389; GCN-NEXT:    ; return to shader part epilog
390;
391; GFX10PLUS-LABEL: s_shl_v4i32:
392; GFX10PLUS:       ; %bb.0:
393; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s4
394; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s5
395; GFX10PLUS-NEXT:    s_lshl_b32 s2, s2, s6
396; GFX10PLUS-NEXT:    s_lshl_b32 s3, s3, s7
397; GFX10PLUS-NEXT:    ; return to shader part epilog
398  %result = shl <4 x i32> %value, %amount
399  ret <4 x i32> %result
400}
401
402define <5 x i32> @v_shl_v5i32(<5 x i32> %value, <5 x i32> %amount) {
403; GCN-LABEL: v_shl_v5i32:
404; GCN:       ; %bb.0:
405; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GCN-NEXT:    v_lshlrev_b32_e32 v0, v5, v0
407; GCN-NEXT:    v_lshlrev_b32_e32 v1, v6, v1
408; GCN-NEXT:    v_lshlrev_b32_e32 v2, v7, v2
409; GCN-NEXT:    v_lshlrev_b32_e32 v3, v8, v3
410; GCN-NEXT:    v_lshlrev_b32_e32 v4, v9, v4
411; GCN-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX10PLUS-LABEL: v_shl_v5i32:
414; GFX10PLUS:       ; %bb.0:
415; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v0, v5, v0
417; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v1, v6, v1
418; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v2, v7, v2
419; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v3, v8, v3
420; GFX10PLUS-NEXT:    v_lshlrev_b32_e32 v4, v9, v4
421; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
422  %result = shl <5 x i32> %value, %amount
423  ret <5 x i32> %result
424}
425
426define amdgpu_ps <5 x i32> @s_shl_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) {
427; GCN-LABEL: s_shl_v5i32:
428; GCN:       ; %bb.0:
429; GCN-NEXT:    s_lshl_b32 s0, s0, s5
430; GCN-NEXT:    s_lshl_b32 s1, s1, s6
431; GCN-NEXT:    s_lshl_b32 s2, s2, s7
432; GCN-NEXT:    s_lshl_b32 s3, s3, s8
433; GCN-NEXT:    s_lshl_b32 s4, s4, s9
434; GCN-NEXT:    ; return to shader part epilog
435;
436; GFX10PLUS-LABEL: s_shl_v5i32:
437; GFX10PLUS:       ; %bb.0:
438; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s5
439; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s6
440; GFX10PLUS-NEXT:    s_lshl_b32 s2, s2, s7
441; GFX10PLUS-NEXT:    s_lshl_b32 s3, s3, s8
442; GFX10PLUS-NEXT:    s_lshl_b32 s4, s4, s9
443; GFX10PLUS-NEXT:    ; return to shader part epilog
444  %result = shl <5 x i32> %value, %amount
445  ret <5 x i32> %result
446}
447
448define <16 x i32> @v_shl_v16i32(<16 x i32> %value, <16 x i32> %amount) {
449; GCN-LABEL: v_shl_v16i32:
450; GCN:       ; %bb.0:
451; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GCN-NEXT:    v_lshlrev_b32_e32 v0, v16, v0
453; GCN-NEXT:    buffer_load_dword v16, off, s[0:3], s32
454; GCN-NEXT:    v_lshlrev_b32_e32 v1, v17, v1
455; GCN-NEXT:    v_lshlrev_b32_e32 v2, v18, v2
456; GCN-NEXT:    v_lshlrev_b32_e32 v3, v19, v3
457; GCN-NEXT:    v_lshlrev_b32_e32 v4, v20, v4
458; GCN-NEXT:    v_lshlrev_b32_e32 v5, v21, v5
459; GCN-NEXT:    v_lshlrev_b32_e32 v6, v22, v6
460; GCN-NEXT:    v_lshlrev_b32_e32 v7, v23, v7
461; GCN-NEXT:    v_lshlrev_b32_e32 v8, v24, v8
462; GCN-NEXT:    v_lshlrev_b32_e32 v9, v25, v9
463; GCN-NEXT:    v_lshlrev_b32_e32 v10, v26, v10
464; GCN-NEXT:    v_lshlrev_b32_e32 v11, v27, v11
465; GCN-NEXT:    v_lshlrev_b32_e32 v12, v28, v12
466; GCN-NEXT:    v_lshlrev_b32_e32 v13, v29, v13
467; GCN-NEXT:    v_lshlrev_b32_e32 v14, v30, v14
468; GCN-NEXT:    s_waitcnt vmcnt(0)
469; GCN-NEXT:    v_lshlrev_b32_e32 v15, v16, v15
470; GCN-NEXT:    s_setpc_b64 s[30:31]
471;
472; GFX10-LABEL: v_shl_v16i32:
473; GFX10:       ; %bb.0:
474; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX10-NEXT:    buffer_load_dword v31, off, s[0:3], s32
476; GFX10-NEXT:    v_lshlrev_b32_e32 v0, v16, v0
477; GFX10-NEXT:    v_lshlrev_b32_e32 v1, v17, v1
478; GFX10-NEXT:    v_lshlrev_b32_e32 v2, v18, v2
479; GFX10-NEXT:    v_lshlrev_b32_e32 v3, v19, v3
480; GFX10-NEXT:    v_lshlrev_b32_e32 v4, v20, v4
481; GFX10-NEXT:    v_lshlrev_b32_e32 v5, v21, v5
482; GFX10-NEXT:    v_lshlrev_b32_e32 v6, v22, v6
483; GFX10-NEXT:    v_lshlrev_b32_e32 v7, v23, v7
484; GFX10-NEXT:    v_lshlrev_b32_e32 v8, v24, v8
485; GFX10-NEXT:    v_lshlrev_b32_e32 v9, v25, v9
486; GFX10-NEXT:    v_lshlrev_b32_e32 v10, v26, v10
487; GFX10-NEXT:    v_lshlrev_b32_e32 v11, v27, v11
488; GFX10-NEXT:    v_lshlrev_b32_e32 v12, v28, v12
489; GFX10-NEXT:    v_lshlrev_b32_e32 v13, v29, v13
490; GFX10-NEXT:    v_lshlrev_b32_e32 v14, v30, v14
491; GFX10-NEXT:    s_waitcnt vmcnt(0)
492; GFX10-NEXT:    v_lshlrev_b32_e32 v15, v31, v15
493; GFX10-NEXT:    s_setpc_b64 s[30:31]
494;
495; GFX11-LABEL: v_shl_v16i32:
496; GFX11:       ; %bb.0:
497; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; GFX11-NEXT:    scratch_load_b32 v31, off, s32
499; GFX11-NEXT:    v_lshlrev_b32_e32 v0, v16, v0
500; GFX11-NEXT:    v_lshlrev_b32_e32 v1, v17, v1
501; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v18, v2
502; GFX11-NEXT:    v_lshlrev_b32_e32 v3, v19, v3
503; GFX11-NEXT:    v_lshlrev_b32_e32 v4, v20, v4
504; GFX11-NEXT:    v_lshlrev_b32_e32 v5, v21, v5
505; GFX11-NEXT:    v_lshlrev_b32_e32 v6, v22, v6
506; GFX11-NEXT:    v_lshlrev_b32_e32 v7, v23, v7
507; GFX11-NEXT:    v_lshlrev_b32_e32 v8, v24, v8
508; GFX11-NEXT:    v_lshlrev_b32_e32 v9, v25, v9
509; GFX11-NEXT:    v_lshlrev_b32_e32 v10, v26, v10
510; GFX11-NEXT:    v_lshlrev_b32_e32 v11, v27, v11
511; GFX11-NEXT:    v_lshlrev_b32_e32 v12, v28, v12
512; GFX11-NEXT:    v_lshlrev_b32_e32 v13, v29, v13
513; GFX11-NEXT:    v_lshlrev_b32_e32 v14, v30, v14
514; GFX11-NEXT:    s_waitcnt vmcnt(0)
515; GFX11-NEXT:    v_lshlrev_b32_e32 v15, v31, v15
516; GFX11-NEXT:    s_setpc_b64 s[30:31]
517  %result = shl <16 x i32> %value, %amount
518  ret <16 x i32> %result
519}
520
521define amdgpu_ps <16 x i32> @s_shl_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) {
522; GCN-LABEL: s_shl_v16i32:
523; GCN:       ; %bb.0:
524; GCN-NEXT:    s_lshl_b32 s0, s0, s16
525; GCN-NEXT:    s_lshl_b32 s1, s1, s17
526; GCN-NEXT:    s_lshl_b32 s2, s2, s18
527; GCN-NEXT:    s_lshl_b32 s3, s3, s19
528; GCN-NEXT:    s_lshl_b32 s4, s4, s20
529; GCN-NEXT:    s_lshl_b32 s5, s5, s21
530; GCN-NEXT:    s_lshl_b32 s6, s6, s22
531; GCN-NEXT:    s_lshl_b32 s7, s7, s23
532; GCN-NEXT:    s_lshl_b32 s8, s8, s24
533; GCN-NEXT:    s_lshl_b32 s9, s9, s25
534; GCN-NEXT:    s_lshl_b32 s10, s10, s26
535; GCN-NEXT:    s_lshl_b32 s11, s11, s27
536; GCN-NEXT:    s_lshl_b32 s12, s12, s28
537; GCN-NEXT:    s_lshl_b32 s13, s13, s29
538; GCN-NEXT:    s_lshl_b32 s14, s14, s30
539; GCN-NEXT:    s_lshl_b32 s15, s15, s31
540; GCN-NEXT:    ; return to shader part epilog
541;
542; GFX10PLUS-LABEL: s_shl_v16i32:
543; GFX10PLUS:       ; %bb.0:
544; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s16
545; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s17
546; GFX10PLUS-NEXT:    s_lshl_b32 s2, s2, s18
547; GFX10PLUS-NEXT:    s_lshl_b32 s3, s3, s19
548; GFX10PLUS-NEXT:    s_lshl_b32 s4, s4, s20
549; GFX10PLUS-NEXT:    s_lshl_b32 s5, s5, s21
550; GFX10PLUS-NEXT:    s_lshl_b32 s6, s6, s22
551; GFX10PLUS-NEXT:    s_lshl_b32 s7, s7, s23
552; GFX10PLUS-NEXT:    s_lshl_b32 s8, s8, s24
553; GFX10PLUS-NEXT:    s_lshl_b32 s9, s9, s25
554; GFX10PLUS-NEXT:    s_lshl_b32 s10, s10, s26
555; GFX10PLUS-NEXT:    s_lshl_b32 s11, s11, s27
556; GFX10PLUS-NEXT:    s_lshl_b32 s12, s12, s28
557; GFX10PLUS-NEXT:    s_lshl_b32 s13, s13, s29
558; GFX10PLUS-NEXT:    s_lshl_b32 s14, s14, s30
559; GFX10PLUS-NEXT:    s_lshl_b32 s15, s15, s31
560; GFX10PLUS-NEXT:    ; return to shader part epilog
561  %result = shl <16 x i32> %value, %amount
562  ret <16 x i32> %result
563}
564
565define i16 @v_shl_i16(i16 %value, i16 %amount) {
566; GFX6-LABEL: v_shl_i16:
567; GFX6:       ; %bb.0:
568; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
570; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
571; GFX6-NEXT:    s_setpc_b64 s[30:31]
572;
573; GFX8-LABEL: v_shl_i16:
574; GFX8:       ; %bb.0:
575; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
577; GFX8-NEXT:    s_setpc_b64 s[30:31]
578;
579; GFX9-LABEL: v_shl_i16:
580; GFX9:       ; %bb.0:
581; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v1, v0
583; GFX9-NEXT:    s_setpc_b64 s[30:31]
584;
585; GFX10PLUS-LABEL: v_shl_i16:
586; GFX10PLUS:       ; %bb.0:
587; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, v1, v0
589; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
590  %result = shl i16 %value, %amount
591  ret i16 %result
592}
593
594define i16 @v_shl_i16_15(i16 %value) {
595; GFX6-LABEL: v_shl_i16_15:
596; GFX6:       ; %bb.0:
597; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 15, v0
599; GFX6-NEXT:    s_setpc_b64 s[30:31]
600;
601; GFX8-LABEL: v_shl_i16_15:
602; GFX8:       ; %bb.0:
603; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
605; GFX8-NEXT:    s_setpc_b64 s[30:31]
606;
607; GFX9-LABEL: v_shl_i16_15:
608; GFX9:       ; %bb.0:
609; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 15, v0
611; GFX9-NEXT:    s_setpc_b64 s[30:31]
612;
613; GFX10PLUS-LABEL: v_shl_i16_15:
614; GFX10PLUS:       ; %bb.0:
615; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, 15, v0
617; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
618  %result = shl i16 %value, 15
619  ret i16 %result
620}
621
622define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) {
623; GFX6-LABEL: s_shl_i16:
624; GFX6:       ; %bb.0:
625; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
626; GFX6-NEXT:    ; return to shader part epilog
627;
628; GFX8-LABEL: s_shl_i16:
629; GFX8:       ; %bb.0:
630; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
631; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
632; GFX8-NEXT:    ; return to shader part epilog
633;
634; GFX9-LABEL: s_shl_i16:
635; GFX9:       ; %bb.0:
636; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
637; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
638; GFX9-NEXT:    ; return to shader part epilog
639;
640; GFX10PLUS-LABEL: s_shl_i16:
641; GFX10PLUS:       ; %bb.0:
642; GFX10PLUS-NEXT:    s_and_b32 s0, s0, 0xffff
643; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s1
644; GFX10PLUS-NEXT:    ; return to shader part epilog
645  %result = shl i16 %value, %amount
646  ret i16 %result
647}
648
649define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
650; GCN-LABEL: s_shl_i16_15:
651; GCN:       ; %bb.0:
652; GCN-NEXT:    s_lshl_b32 s0, s0, 15
653; GCN-NEXT:    ; return to shader part epilog
654;
655; GFX10PLUS-LABEL: s_shl_i16_15:
656; GFX10PLUS:       ; %bb.0:
657; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, 15
658; GFX10PLUS-NEXT:    ; return to shader part epilog
659  %result = shl i16 %value, 15
660  ret i16 %result
661}
662
663define amdgpu_ps half @shl_i16_sv(i16 inreg %value, i16 %amount) {
664; GFX6-LABEL: shl_i16_sv:
665; GFX6:       ; %bb.0:
666; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
667; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
668; GFX6-NEXT:    ; return to shader part epilog
669;
670; GFX8-LABEL: shl_i16_sv:
671; GFX8:       ; %bb.0:
672; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
673; GFX8-NEXT:    ; return to shader part epilog
674;
675; GFX9-LABEL: shl_i16_sv:
676; GFX9:       ; %bb.0:
677; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
678; GFX9-NEXT:    ; return to shader part epilog
679;
680; GFX10PLUS-LABEL: shl_i16_sv:
681; GFX10PLUS:       ; %bb.0:
682; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, v0, s0
683; GFX10PLUS-NEXT:    ; return to shader part epilog
684  %result = shl i16 %value, %amount
685  %cast = bitcast i16 %result to half
686  ret half %cast
687}
688
689define amdgpu_ps half @shl_i16_vs(i16 %value, i16 inreg %amount) {
690; GFX6-LABEL: shl_i16_vs:
691; GFX6:       ; %bb.0:
692; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
693; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s0, v0
694; GFX6-NEXT:    ; return to shader part epilog
695;
696; GFX8-LABEL: shl_i16_vs:
697; GFX8:       ; %bb.0:
698; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
699; GFX8-NEXT:    ; return to shader part epilog
700;
701; GFX9-LABEL: shl_i16_vs:
702; GFX9:       ; %bb.0:
703; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
704; GFX9-NEXT:    ; return to shader part epilog
705;
706; GFX10PLUS-LABEL: shl_i16_vs:
707; GFX10PLUS:       ; %bb.0:
708; GFX10PLUS-NEXT:    v_lshlrev_b16 v0, s0, v0
709; GFX10PLUS-NEXT:    ; return to shader part epilog
710  %result = shl i16 %value, %amount
711  %cast = bitcast i16 %result to half
712  ret half %cast
713}
714
715define <2 x i16> @v_shl_v2i16(<2 x i16> %value, <2 x i16> %amount) {
716; GFX6-LABEL: v_shl_v2i16:
717; GFX6:       ; %bb.0:
718; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
719; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
720; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
721; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
722; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
723; GFX6-NEXT:    s_setpc_b64 s[30:31]
724;
725; GFX8-LABEL: v_shl_v2i16:
726; GFX8:       ; %bb.0:
727; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
729; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
730; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
731; GFX8-NEXT:    s_setpc_b64 s[30:31]
732;
733; GFX9-LABEL: v_shl_v2i16:
734; GFX9:       ; %bb.0:
735; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v1, v0
737; GFX9-NEXT:    s_setpc_b64 s[30:31]
738;
739; GFX10PLUS-LABEL: v_shl_v2i16:
740; GFX10PLUS:       ; %bb.0:
741; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, v1, v0
743; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
744  %result = shl <2 x i16> %value, %amount
745  ret <2 x i16> %result
746}
747
748define <2 x i16> @v_shl_v2i16_15(<2 x i16> %value) {
749; GFX6-LABEL: v_shl_v2i16_15:
750; GFX6:       ; %bb.0:
751; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 15, v0
753; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 15, v1
754; GFX6-NEXT:    s_setpc_b64 s[30:31]
755;
756; GFX8-LABEL: v_shl_v2i16_15:
757; GFX8:       ; %bb.0:
758; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GFX8-NEXT:    v_mov_b32_e32 v2, 15
760; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 15, v0
761; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
762; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
763; GFX8-NEXT:    s_setpc_b64 s[30:31]
764;
765; GFX9-LABEL: v_shl_v2i16_15:
766; GFX9:       ; %bb.0:
767; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
769; GFX9-NEXT:    s_setpc_b64 s[30:31]
770;
771; GFX10PLUS-LABEL: v_shl_v2i16_15:
772; GFX10PLUS:       ; %bb.0:
773; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
774; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1]
775; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
776  %result = shl <2 x i16> %value, <i16 15, i16 15>
777  ret <2 x i16> %result
778}
779
780define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) {
781; GFX6-LABEL: s_shl_v2i16:
782; GFX6:       ; %bb.0:
783; GFX6-NEXT:    s_lshl_b32 s1, s1, s3
784; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
785; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
786; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
787; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
788; GFX6-NEXT:    s_or_b32 s0, s0, s1
789; GFX6-NEXT:    ; return to shader part epilog
790;
791; GFX8-LABEL: s_shl_v2i16:
792; GFX8:       ; %bb.0:
793; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
794; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
795; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
796; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
797; GFX8-NEXT:    s_lshl_b32 s1, s2, s3
798; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
799; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
800; GFX8-NEXT:    s_or_b32 s0, s1, s0
801; GFX8-NEXT:    ; return to shader part epilog
802;
803; GFX9-LABEL: s_shl_v2i16:
804; GFX9:       ; %bb.0:
805; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
806; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
807; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
808; GFX9-NEXT:    s_lshl_b32 s1, s2, s3
809; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
810; GFX9-NEXT:    ; return to shader part epilog
811;
812; GFX10PLUS-LABEL: s_shl_v2i16:
813; GFX10PLUS:       ; %bb.0:
814; GFX10PLUS-NEXT:    s_lshr_b32 s2, s0, 16
815; GFX10PLUS-NEXT:    s_lshr_b32 s3, s1, 16
816; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s1
817; GFX10PLUS-NEXT:    s_lshl_b32 s1, s2, s3
818; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
819; GFX10PLUS-NEXT:    ; return to shader part epilog
820  %result = shl <2 x i16> %value, %amount
821  %cast = bitcast <2 x i16> %result to i32
822  ret i32 %cast
823}
824
825define amdgpu_ps float @shl_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) {
826; GFX6-LABEL: shl_v2i16_sv:
827; GFX6:       ; %bb.0:
828; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
829; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
830; GFX6-NEXT:    v_lshl_b32_e32 v1, s1, v1
831; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
832; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
833; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
834; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
835; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
836; GFX6-NEXT:    ; return to shader part epilog
837;
838; GFX8-LABEL: shl_v2i16_sv:
839; GFX8:       ; %bb.0:
840; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
841; GFX8-NEXT:    v_mov_b32_e32 v2, s1
842; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v0, s0
843; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
844; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
845; GFX8-NEXT:    ; return to shader part epilog
846;
847; GFX9-LABEL: shl_v2i16_sv:
848; GFX9:       ; %bb.0:
849; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
850; GFX9-NEXT:    ; return to shader part epilog
851;
852; GFX10PLUS-LABEL: shl_v2i16_sv:
853; GFX10PLUS:       ; %bb.0:
854; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
855; GFX10PLUS-NEXT:    ; return to shader part epilog
856  %result = shl <2 x i16> %value, %amount
857  %cast = bitcast <2 x i16> %result to float
858  ret float %cast
859}
860
861define amdgpu_ps float @shl_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) {
862; GFX6-LABEL: shl_v2i16_vs:
863; GFX6:       ; %bb.0:
864; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
865; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s0, v0
866; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
867; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
868; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
869; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
870; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
871; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
872; GFX6-NEXT:    ; return to shader part epilog
873;
874; GFX8-LABEL: shl_v2i16_vs:
875; GFX8:       ; %bb.0:
876; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
877; GFX8-NEXT:    v_mov_b32_e32 v2, s1
878; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s0, v0
879; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
880; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
881; GFX8-NEXT:    ; return to shader part epilog
882;
883; GFX9-LABEL: shl_v2i16_vs:
884; GFX9:       ; %bb.0:
885; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s0, v0
886; GFX9-NEXT:    ; return to shader part epilog
887;
888; GFX10PLUS-LABEL: shl_v2i16_vs:
889; GFX10PLUS:       ; %bb.0:
890; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, s0, v0
891; GFX10PLUS-NEXT:    ; return to shader part epilog
892  %result = shl <2 x i16> %value, %amount
893  %cast = bitcast <2 x i16> %result to float
894  ret float %cast
895}
896
897; FIXME
898; define <3 x i16> @v_shl_v3i16(<3 x i16> %value, <3 x i16> %amount) {
899;   %result = shl <3 x i16> %value, %amount
900;   ret <3 x i16> %result
901; }
902
903; define amdgpu_ps <3 x i16> @s_shl_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) {
904;   %result = shl <3 x i16> %value, %amount
905;   ret <3 x i16> %result
906; }
907
908define <2 x float> @v_shl_v4i16(<4 x i16> %value, <4 x i16> %amount) {
909; GFX6-LABEL: v_shl_v4i16:
910; GFX6:       ; %bb.0:
911; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v4
913; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v4, v0
914; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v5
915; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
916; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v6
917; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
918; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v7
919; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
920; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
921; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
922; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
923; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
924; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v2
925; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
926; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
927; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
928; GFX6-NEXT:    s_setpc_b64 s[30:31]
929;
930; GFX8-LABEL: v_shl_v4i16:
931; GFX8:       ; %bb.0:
932; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v2, v0
934; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
935; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v3, v1
936; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
937; GFX8-NEXT:    v_or_b32_e32 v0, v4, v0
938; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
939; GFX8-NEXT:    s_setpc_b64 s[30:31]
940;
941; GFX9-LABEL: v_shl_v4i16:
942; GFX9:       ; %bb.0:
943; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
945; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v3, v1
946; GFX9-NEXT:    s_setpc_b64 s[30:31]
947;
948; GFX10PLUS-LABEL: v_shl_v4i16:
949; GFX10PLUS:       ; %bb.0:
950; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
951; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
952; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v1, v3, v1
953; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
954  %result = shl <4 x i16> %value, %amount
955  %cast = bitcast <4 x i16> %result to <2 x float>
956  ret <2 x float> %cast
957}
958
959define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) {
960; GFX6-LABEL: s_shl_v4i16:
961; GFX6:       ; %bb.0:
962; GFX6-NEXT:    s_lshl_b32 s1, s1, s5
963; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
964; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
965; GFX6-NEXT:    s_lshl_b32 s2, s2, s6
966; GFX6-NEXT:    s_lshl_b32 s3, s3, s7
967; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
968; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
969; GFX6-NEXT:    s_or_b32 s0, s0, s1
970; GFX6-NEXT:    s_and_b32 s1, s2, 0xffff
971; GFX6-NEXT:    s_and_b32 s2, s3, 0xffff
972; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
973; GFX6-NEXT:    s_or_b32 s1, s1, s2
974; GFX6-NEXT:    ; return to shader part epilog
975;
976; GFX8-LABEL: s_shl_v4i16:
977; GFX8:       ; %bb.0:
978; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
979; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
980; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
981; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
982; GFX8-NEXT:    s_and_b32 s1, s1, 0xffff
983; GFX8-NEXT:    s_lshr_b32 s7, s3, 16
984; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
985; GFX8-NEXT:    s_lshl_b32 s2, s4, s6
986; GFX8-NEXT:    s_lshl_b32 s1, s1, s3
987; GFX8-NEXT:    s_lshl_b32 s3, s5, s7
988; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
989; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
990; GFX8-NEXT:    s_or_b32 s0, s2, s0
991; GFX8-NEXT:    s_lshl_b32 s2, s3, 16
992; GFX8-NEXT:    s_and_b32 s1, s1, 0xffff
993; GFX8-NEXT:    s_or_b32 s1, s2, s1
994; GFX8-NEXT:    ; return to shader part epilog
995;
996; GFX9-LABEL: s_shl_v4i16:
997; GFX9:       ; %bb.0:
998; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
999; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
1000; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
1001; GFX9-NEXT:    s_lshl_b32 s2, s4, s5
1002; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
1003; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
1004; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
1005; GFX9-NEXT:    s_lshl_b32 s1, s1, s3
1006; GFX9-NEXT:    s_lshl_b32 s2, s2, s4
1007; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
1008; GFX9-NEXT:    ; return to shader part epilog
1009;
1010; GFX10PLUS-LABEL: s_shl_v4i16:
1011; GFX10PLUS:       ; %bb.0:
1012; GFX10PLUS-NEXT:    s_lshr_b32 s4, s0, 16
1013; GFX10PLUS-NEXT:    s_lshr_b32 s5, s2, 16
1014; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s2
1015; GFX10PLUS-NEXT:    s_lshl_b32 s2, s4, s5
1016; GFX10PLUS-NEXT:    s_lshr_b32 s4, s1, 16
1017; GFX10PLUS-NEXT:    s_lshr_b32 s5, s3, 16
1018; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s3
1019; GFX10PLUS-NEXT:    s_lshl_b32 s3, s4, s5
1020; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
1021; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
1022; GFX10PLUS-NEXT:    ; return to shader part epilog
1023  %result = shl <4 x i16> %value, %amount
1024  %cast = bitcast <4 x i16> %result to <2 x i32>
1025  ret <2 x i32> %cast
1026}
1027
1028; FIXME
1029; define <5 x i16> @v_shl_v5i16(<5 x i16> %value, <5 x i16> %amount) {
1030;   %result = shl <5 x i16> %value, %amount
1031;   ret <5 x i16> %result
1032; }
1033
1034; define amdgpu_ps <5 x i16> @s_shl_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) {
1035;   %result = shl <5 x i16> %value, %amount
1036;   ret <5 x i16> %result
1037; }
1038
1039; define <3 x float> @v_shl_v6i16(<6 x i16> %value, <6 x i16> %amount) {
1040;   %result = shl <6 x i16> %value, %amount
1041;   %cast = bitcast <6 x i16> %result to <3 x float>
1042;   ret <3 x float> %cast
1043; }
1044
1045; define amdgpu_ps <3 x i32> @s_shl_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) {
1046;   %result = shl <6 x i16> %value, %amount
1047;   %cast = bitcast <6 x i16> %result to <3 x i32>
1048;   ret <3 x i32> %cast
1049; }
1050
1051define <4 x float> @v_shl_v8i16(<8 x i16> %value, <8 x i16> %amount) {
1052; GFX6-LABEL: v_shl_v8i16:
1053; GFX6:       ; %bb.0:
1054; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1056; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v8, v0
1057; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v9
1058; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v8, v1
1059; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v10
1060; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
1061; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v11
1062; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v8, v3
1063; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v12
1064; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
1065; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v13
1066; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1067; GFX6-NEXT:    v_lshlrev_b32_e32 v5, v8, v5
1068; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v14
1069; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1070; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1071; GFX6-NEXT:    v_lshlrev_b32_e32 v6, v8, v6
1072; GFX6-NEXT:    v_and_b32_e32 v8, 0xffff, v15
1073; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1074; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v2
1075; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v3
1076; GFX6-NEXT:    v_lshlrev_b32_e32 v7, v8, v7
1077; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1078; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v5
1079; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
1080; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v4
1081; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1082; GFX6-NEXT:    v_and_b32_e32 v4, 0xffff, v7
1083; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1084; GFX6-NEXT:    v_and_b32_e32 v3, 0xffff, v6
1085; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
1086; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
1087; GFX6-NEXT:    s_setpc_b64 s[30:31]
1088;
1089; GFX8-LABEL: v_shl_v8i16:
1090; GFX8:       ; %bb.0:
1091; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092; GFX8-NEXT:    v_lshlrev_b16_e32 v8, v4, v0
1093; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1094; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v5, v1
1095; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1096; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
1097; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v6, v2
1098; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1099; GFX8-NEXT:    v_or_b32_e32 v2, v4, v2
1100; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v7, v3
1101; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
1102; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
1103; GFX8-NEXT:    v_or_b32_e32 v3, v4, v3
1104; GFX8-NEXT:    s_setpc_b64 s[30:31]
1105;
1106; GFX9-LABEL: v_shl_v8i16:
1107; GFX9:       ; %bb.0:
1108; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
1110; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
1111; GFX9-NEXT:    v_pk_lshlrev_b16 v2, v6, v2
1112; GFX9-NEXT:    v_pk_lshlrev_b16 v3, v7, v3
1113; GFX9-NEXT:    s_setpc_b64 s[30:31]
1114;
1115; GFX10PLUS-LABEL: v_shl_v8i16:
1116; GFX10PLUS:       ; %bb.0:
1117; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
1119; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v1, v5, v1
1120; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v2, v6, v2
1121; GFX10PLUS-NEXT:    v_pk_lshlrev_b16 v3, v7, v3
1122; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1123  %result = shl <8 x i16> %value, %amount
1124  %cast = bitcast <8 x i16> %result to <4 x float>
1125  ret <4 x float> %cast
1126}
1127
1128define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) {
1129; GFX6-LABEL: s_shl_v8i16:
1130; GFX6:       ; %bb.0:
1131; GFX6-NEXT:    s_lshl_b32 s1, s1, s9
1132; GFX6-NEXT:    s_lshl_b32 s0, s0, s8
1133; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
1134; GFX6-NEXT:    s_lshl_b32 s2, s2, s10
1135; GFX6-NEXT:    s_lshl_b32 s3, s3, s11
1136; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
1137; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
1138; GFX6-NEXT:    s_lshl_b32 s5, s5, s13
1139; GFX6-NEXT:    s_or_b32 s0, s0, s1
1140; GFX6-NEXT:    s_and_b32 s1, s2, 0xffff
1141; GFX6-NEXT:    s_and_b32 s2, s3, 0xffff
1142; GFX6-NEXT:    s_lshl_b32 s4, s4, s12
1143; GFX6-NEXT:    s_lshl_b32 s7, s7, s15
1144; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
1145; GFX6-NEXT:    s_and_b32 s3, s5, 0xffff
1146; GFX6-NEXT:    s_lshl_b32 s6, s6, s14
1147; GFX6-NEXT:    s_or_b32 s1, s1, s2
1148; GFX6-NEXT:    s_and_b32 s2, s4, 0xffff
1149; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
1150; GFX6-NEXT:    s_and_b32 s4, s7, 0xffff
1151; GFX6-NEXT:    s_or_b32 s2, s2, s3
1152; GFX6-NEXT:    s_and_b32 s3, s6, 0xffff
1153; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
1154; GFX6-NEXT:    s_or_b32 s3, s3, s4
1155; GFX6-NEXT:    ; return to shader part epilog
1156;
1157; GFX8-LABEL: s_shl_v8i16:
1158; GFX8:       ; %bb.0:
1159; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
1160; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
1161; GFX8-NEXT:    s_lshr_b32 s12, s4, 16
1162; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
1163; GFX8-NEXT:    s_and_b32 s1, s1, 0xffff
1164; GFX8-NEXT:    s_lshr_b32 s13, s5, 16
1165; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
1166; GFX8-NEXT:    s_lshl_b32 s4, s8, s12
1167; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
1168; GFX8-NEXT:    s_and_b32 s2, s2, 0xffff
1169; GFX8-NEXT:    s_lshr_b32 s14, s6, 16
1170; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
1171; GFX8-NEXT:    s_lshl_b32 s5, s9, s13
1172; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
1173; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
1174; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
1175; GFX8-NEXT:    s_and_b32 s3, s3, 0xffff
1176; GFX8-NEXT:    s_lshr_b32 s15, s7, 16
1177; GFX8-NEXT:    s_lshl_b32 s2, s2, s6
1178; GFX8-NEXT:    s_lshl_b32 s6, s10, s14
1179; GFX8-NEXT:    s_or_b32 s0, s4, s0
1180; GFX8-NEXT:    s_lshl_b32 s4, s5, 16
1181; GFX8-NEXT:    s_and_b32 s1, s1, 0xffff
1182; GFX8-NEXT:    s_lshl_b32 s3, s3, s7
1183; GFX8-NEXT:    s_lshl_b32 s7, s11, s15
1184; GFX8-NEXT:    s_or_b32 s1, s4, s1
1185; GFX8-NEXT:    s_lshl_b32 s4, s6, 16
1186; GFX8-NEXT:    s_and_b32 s2, s2, 0xffff
1187; GFX8-NEXT:    s_or_b32 s2, s4, s2
1188; GFX8-NEXT:    s_lshl_b32 s4, s7, 16
1189; GFX8-NEXT:    s_and_b32 s3, s3, 0xffff
1190; GFX8-NEXT:    s_or_b32 s3, s4, s3
1191; GFX8-NEXT:    ; return to shader part epilog
1192;
1193; GFX9-LABEL: s_shl_v8i16:
1194; GFX9:       ; %bb.0:
1195; GFX9-NEXT:    s_lshr_b32 s8, s0, 16
1196; GFX9-NEXT:    s_lshr_b32 s9, s4, 16
1197; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
1198; GFX9-NEXT:    s_lshl_b32 s4, s8, s9
1199; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
1200; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
1201; GFX9-NEXT:    s_lshr_b32 s8, s5, 16
1202; GFX9-NEXT:    s_lshl_b32 s1, s1, s5
1203; GFX9-NEXT:    s_lshl_b32 s4, s4, s8
1204; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
1205; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
1206; GFX9-NEXT:    s_lshr_b32 s5, s6, 16
1207; GFX9-NEXT:    s_lshl_b32 s2, s2, s6
1208; GFX9-NEXT:    s_lshl_b32 s4, s4, s5
1209; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
1210; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
1211; GFX9-NEXT:    s_lshr_b32 s5, s7, 16
1212; GFX9-NEXT:    s_lshl_b32 s3, s3, s7
1213; GFX9-NEXT:    s_lshl_b32 s4, s4, s5
1214; GFX9-NEXT:    s_pack_ll_b32_b16 s3, s3, s4
1215; GFX9-NEXT:    ; return to shader part epilog
1216;
1217; GFX10PLUS-LABEL: s_shl_v8i16:
1218; GFX10PLUS:       ; %bb.0:
1219; GFX10PLUS-NEXT:    s_lshr_b32 s8, s0, 16
1220; GFX10PLUS-NEXT:    s_lshr_b32 s9, s4, 16
1221; GFX10PLUS-NEXT:    s_lshl_b32 s0, s0, s4
1222; GFX10PLUS-NEXT:    s_lshl_b32 s4, s8, s9
1223; GFX10PLUS-NEXT:    s_lshr_b32 s8, s1, 16
1224; GFX10PLUS-NEXT:    s_lshr_b32 s9, s5, 16
1225; GFX10PLUS-NEXT:    s_lshl_b32 s1, s1, s5
1226; GFX10PLUS-NEXT:    s_lshl_b32 s5, s8, s9
1227; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
1228; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
1229; GFX10PLUS-NEXT:    s_lshr_b32 s4, s2, 16
1230; GFX10PLUS-NEXT:    s_lshr_b32 s5, s6, 16
1231; GFX10PLUS-NEXT:    s_lshl_b32 s2, s2, s6
1232; GFX10PLUS-NEXT:    s_lshl_b32 s4, s4, s5
1233; GFX10PLUS-NEXT:    s_lshr_b32 s5, s3, 16
1234; GFX10PLUS-NEXT:    s_lshr_b32 s6, s7, 16
1235; GFX10PLUS-NEXT:    s_lshl_b32 s3, s3, s7
1236; GFX10PLUS-NEXT:    s_lshl_b32 s5, s5, s6
1237; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
1238; GFX10PLUS-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
1239; GFX10PLUS-NEXT:    ; return to shader part epilog
1240  %result = shl <8 x i16> %value, %amount
1241  %cast = bitcast <8 x i16> %result to <4 x i32>
1242  ret <4 x i32> %cast
1243}
1244
1245define i64 @v_shl_i64(i64 %value, i64 %amount) {
1246; GFX6-LABEL: v_shl_i64:
1247; GFX6:       ; %bb.0:
1248; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v2
1250; GFX6-NEXT:    s_setpc_b64 s[30:31]
1251;
1252; GFX8-LABEL: v_shl_i64:
1253; GFX8:       ; %bb.0:
1254; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
1256; GFX8-NEXT:    s_setpc_b64 s[30:31]
1257;
1258; GFX9-LABEL: v_shl_i64:
1259; GFX9:       ; %bb.0:
1260; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
1262; GFX9-NEXT:    s_setpc_b64 s[30:31]
1263;
1264; GFX10PLUS-LABEL: v_shl_i64:
1265; GFX10PLUS:       ; %bb.0:
1266; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
1268; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1269  %result = shl i64 %value, %amount
1270  ret i64 %result
1271}
1272
1273define i64 @v_shl_i64_63(i64 %value) {
1274; GCN-LABEL: v_shl_i64_63:
1275; GCN:       ; %bb.0:
1276; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277; GCN-NEXT:    v_lshlrev_b32_e32 v1, 31, v0
1278; GCN-NEXT:    v_mov_b32_e32 v0, 0
1279; GCN-NEXT:    s_setpc_b64 s[30:31]
1280;
1281; GFX10-LABEL: v_shl_i64_63:
1282; GFX10:       ; %bb.0:
1283; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 31, v0
1285; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1286; GFX10-NEXT:    s_setpc_b64 s[30:31]
1287;
1288; GFX11-LABEL: v_shl_i64_63:
1289; GFX11:       ; %bb.0:
1290; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 31, v0
1292; GFX11-NEXT:    s_setpc_b64 s[30:31]
1293  %result = shl i64 %value, 63
1294  ret i64 %result
1295}
1296
1297define i64 @v_shl_i64_33(i64 %value) {
1298; GCN-LABEL: v_shl_i64_33:
1299; GCN:       ; %bb.0:
1300; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301; GCN-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
1302; GCN-NEXT:    v_mov_b32_e32 v0, 0
1303; GCN-NEXT:    s_setpc_b64 s[30:31]
1304;
1305; GFX10-LABEL: v_shl_i64_33:
1306; GFX10:       ; %bb.0:
1307; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
1309; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1310; GFX10-NEXT:    s_setpc_b64 s[30:31]
1311;
1312; GFX11-LABEL: v_shl_i64_33:
1313; GFX11:       ; %bb.0:
1314; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1315; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 1, v0
1316; GFX11-NEXT:    s_setpc_b64 s[30:31]
1317  %result = shl i64 %value, 33
1318  ret i64 %result
1319}
1320
1321define i64 @v_shl_i64_32(i64 %value) {
1322; GCN-LABEL: v_shl_i64_32:
1323; GCN:       ; %bb.0:
1324; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325; GCN-NEXT:    v_mov_b32_e32 v1, v0
1326; GCN-NEXT:    v_mov_b32_e32 v0, 0
1327; GCN-NEXT:    s_setpc_b64 s[30:31]
1328;
1329; GFX10-LABEL: v_shl_i64_32:
1330; GFX10:       ; %bb.0:
1331; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332; GFX10-NEXT:    v_mov_b32_e32 v1, v0
1333; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1334; GFX10-NEXT:    s_setpc_b64 s[30:31]
1335;
1336; GFX11-LABEL: v_shl_i64_32:
1337; GFX11:       ; %bb.0:
1338; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
1340; GFX11-NEXT:    s_setpc_b64 s[30:31]
1341  %result = shl i64 %value, 32
1342  ret i64 %result
1343}
1344
1345define i64 @v_shl_i64_31(i64 %value) {
1346; GFX6-LABEL: v_shl_i64_31:
1347; GFX6:       ; %bb.0:
1348; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
1350; GFX6-NEXT:    s_setpc_b64 s[30:31]
1351;
1352; GFX8-LABEL: v_shl_i64_31:
1353; GFX8:       ; %bb.0:
1354; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1356; GFX8-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; GFX9-LABEL: v_shl_i64_31:
1359; GFX9:       ; %bb.0:
1360; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1362; GFX9-NEXT:    s_setpc_b64 s[30:31]
1363;
1364; GFX10PLUS-LABEL: v_shl_i64_31:
1365; GFX10PLUS:       ; %bb.0:
1366; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1368; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1369  %result = shl i64 %value, 31
1370  ret i64 %result
1371}
1372
1373define amdgpu_ps i64 @s_shl_i64(i64 inreg %value, i64 inreg %amount) {
1374; GCN-LABEL: s_shl_i64:
1375; GCN:       ; %bb.0:
1376; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
1377; GCN-NEXT:    ; return to shader part epilog
1378;
1379; GFX10PLUS-LABEL: s_shl_i64:
1380; GFX10PLUS:       ; %bb.0:
1381; GFX10PLUS-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
1382; GFX10PLUS-NEXT:    ; return to shader part epilog
1383  %result = shl i64 %value, %amount
1384  ret i64 %result
1385}
1386
1387define amdgpu_ps i64 @s_shl_i64_63(i64 inreg %value) {
1388; GCN-LABEL: s_shl_i64_63:
1389; GCN:       ; %bb.0:
1390; GCN-NEXT:    s_lshl_b32 s1, s0, 31
1391; GCN-NEXT:    s_mov_b32 s0, 0
1392; GCN-NEXT:    ; return to shader part epilog
1393;
1394; GFX10PLUS-LABEL: s_shl_i64_63:
1395; GFX10PLUS:       ; %bb.0:
1396; GFX10PLUS-NEXT:    s_lshl_b32 s1, s0, 31
1397; GFX10PLUS-NEXT:    s_mov_b32 s0, 0
1398; GFX10PLUS-NEXT:    ; return to shader part epilog
1399  %result = shl i64 %value, 63
1400  ret i64 %result
1401}
1402
1403define amdgpu_ps i64 @s_shl_i64_33(i64 inreg %value) {
1404; GCN-LABEL: s_shl_i64_33:
1405; GCN:       ; %bb.0:
1406; GCN-NEXT:    s_lshl_b32 s1, s0, 1
1407; GCN-NEXT:    s_mov_b32 s0, 0
1408; GCN-NEXT:    ; return to shader part epilog
1409;
1410; GFX10PLUS-LABEL: s_shl_i64_33:
1411; GFX10PLUS:       ; %bb.0:
1412; GFX10PLUS-NEXT:    s_lshl_b32 s1, s0, 1
1413; GFX10PLUS-NEXT:    s_mov_b32 s0, 0
1414; GFX10PLUS-NEXT:    ; return to shader part epilog
1415  %result = shl i64 %value, 33
1416  ret i64 %result
1417}
1418
1419define amdgpu_ps i64 @s_shl_i64_32(i64 inreg %value) {
1420; GCN-LABEL: s_shl_i64_32:
1421; GCN:       ; %bb.0:
1422; GCN-NEXT:    s_mov_b32 s1, s0
1423; GCN-NEXT:    s_mov_b32 s0, 0
1424; GCN-NEXT:    ; return to shader part epilog
1425;
1426; GFX10PLUS-LABEL: s_shl_i64_32:
1427; GFX10PLUS:       ; %bb.0:
1428; GFX10PLUS-NEXT:    s_mov_b32 s1, s0
1429; GFX10PLUS-NEXT:    s_mov_b32 s0, 0
1430; GFX10PLUS-NEXT:    ; return to shader part epilog
1431  %result = shl i64 %value, 32
1432  ret i64 %result
1433}
1434
1435define amdgpu_ps i64 @s_shl_i64_31(i64 inreg %value) {
1436; GCN-LABEL: s_shl_i64_31:
1437; GCN:       ; %bb.0:
1438; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 31
1439; GCN-NEXT:    ; return to shader part epilog
1440;
1441; GFX10PLUS-LABEL: s_shl_i64_31:
1442; GFX10PLUS:       ; %bb.0:
1443; GFX10PLUS-NEXT:    s_lshl_b64 s[0:1], s[0:1], 31
1444; GFX10PLUS-NEXT:    ; return to shader part epilog
1445  %result = shl i64 %value, 31
1446  ret i64 %result
1447}
1448
1449define amdgpu_ps <2 x float> @shl_i64_sv(i64 inreg %value, i64 %amount) {
1450; GFX6-LABEL: shl_i64_sv:
1451; GFX6:       ; %bb.0:
1452; GFX6-NEXT:    v_lshl_b64 v[0:1], s[0:1], v0
1453; GFX6-NEXT:    ; return to shader part epilog
1454;
1455; GFX8-LABEL: shl_i64_sv:
1456; GFX8:       ; %bb.0:
1457; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
1458; GFX8-NEXT:    ; return to shader part epilog
1459;
1460; GFX9-LABEL: shl_i64_sv:
1461; GFX9:       ; %bb.0:
1462; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
1463; GFX9-NEXT:    ; return to shader part epilog
1464;
1465; GFX10PLUS-LABEL: shl_i64_sv:
1466; GFX10PLUS:       ; %bb.0:
1467; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
1468; GFX10PLUS-NEXT:    ; return to shader part epilog
1469  %result = shl i64 %value, %amount
1470  %cast = bitcast i64 %result to <2 x float>
1471  ret <2 x float> %cast
1472}
1473
1474define amdgpu_ps <2 x float> @shl_i64_vs(i64 %value, i64 inreg %amount) {
1475; GFX6-LABEL: shl_i64_vs:
1476; GFX6:       ; %bb.0:
1477; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s0
1478; GFX6-NEXT:    ; return to shader part epilog
1479;
1480; GFX8-LABEL: shl_i64_vs:
1481; GFX8:       ; %bb.0:
1482; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s0, v[0:1]
1483; GFX8-NEXT:    ; return to shader part epilog
1484;
1485; GFX9-LABEL: shl_i64_vs:
1486; GFX9:       ; %bb.0:
1487; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s0, v[0:1]
1488; GFX9-NEXT:    ; return to shader part epilog
1489;
1490; GFX10PLUS-LABEL: shl_i64_vs:
1491; GFX10PLUS:       ; %bb.0:
1492; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], s0, v[0:1]
1493; GFX10PLUS-NEXT:    ; return to shader part epilog
1494  %result = shl i64 %value, %amount
1495  %cast = bitcast i64 %result to <2 x float>
1496  ret <2 x float> %cast
1497}
1498
1499define <2 x i64> @v_shl_v2i64(<2 x i64> %value, <2 x i64> %amount) {
1500; GFX6-LABEL: v_shl_v2i64:
1501; GFX6:       ; %bb.0:
1502; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
1504; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v6
1505; GFX6-NEXT:    s_setpc_b64 s[30:31]
1506;
1507; GFX8-LABEL: v_shl_v2i64:
1508; GFX8:       ; %bb.0:
1509; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
1511; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v6, v[2:3]
1512; GFX8-NEXT:    s_setpc_b64 s[30:31]
1513;
1514; GFX9-LABEL: v_shl_v2i64:
1515; GFX9:       ; %bb.0:
1516; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
1518; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v6, v[2:3]
1519; GFX9-NEXT:    s_setpc_b64 s[30:31]
1520;
1521; GFX10PLUS-LABEL: v_shl_v2i64:
1522; GFX10PLUS:       ; %bb.0:
1523; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1524; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
1525; GFX10PLUS-NEXT:    v_lshlrev_b64 v[2:3], v6, v[2:3]
1526; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1527  %result = shl <2 x i64> %value, %amount
1528  ret <2 x i64> %result
1529}
1530
1531define <2 x i64> @v_shl_v2i64_31(<2 x i64> %value) {
1532; GFX6-LABEL: v_shl_v2i64_31:
1533; GFX6:       ; %bb.0:
1534; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
1536; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 31
1537; GFX6-NEXT:    s_setpc_b64 s[30:31]
1538;
1539; GFX8-LABEL: v_shl_v2i64_31:
1540; GFX8:       ; %bb.0:
1541; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1542; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1543; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 31, v[2:3]
1544; GFX8-NEXT:    s_setpc_b64 s[30:31]
1545;
1546; GFX9-LABEL: v_shl_v2i64_31:
1547; GFX9:       ; %bb.0:
1548; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1549; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1550; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 31, v[2:3]
1551; GFX9-NEXT:    s_setpc_b64 s[30:31]
1552;
1553; GFX10PLUS-LABEL: v_shl_v2i64_31:
1554; GFX10PLUS:       ; %bb.0:
1555; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556; GFX10PLUS-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
1557; GFX10PLUS-NEXT:    v_lshlrev_b64 v[2:3], 31, v[2:3]
1558; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
1559  %result = shl <2 x i64> %value, <i64 31, i64 31>
1560  ret <2 x i64> %result
1561}
1562
1563define amdgpu_ps <2 x i64> @s_shl_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) {
1564; GCN-LABEL: s_shl_v2i64:
1565; GCN:       ; %bb.0:
1566; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
1567; GCN-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
1568; GCN-NEXT:    ; return to shader part epilog
1569;
1570; GFX10PLUS-LABEL: s_shl_v2i64:
1571; GFX10PLUS:       ; %bb.0:
1572; GFX10PLUS-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
1573; GFX10PLUS-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
1574; GFX10PLUS-NEXT:    ; return to shader part epilog
1575  %result = shl <2 x i64> %value, %amount
1576  ret <2 x i64> %result
1577}
1578
1579define i65 @v_shl_i65(i65 %value, i65 %amount) {
1580; GFX6-LABEL: v_shl_i65:
1581; GFX6:       ; %bb.0:
1582; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v3
1584; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], v4
1585; GFX6-NEXT:    v_lshl_b64 v[5:6], v[2:3], v3
1586; GFX6-NEXT:    v_add_i32_e32 v8, vcc, 0xffffffc0, v3
1587; GFX6-NEXT:    v_lshl_b64 v[6:7], v[0:1], v3
1588; GFX6-NEXT:    v_or_b32_e32 v9, v4, v5
1589; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], v8
1590; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1591; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
1592; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
1593; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
1594; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1595; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1596; GFX6-NEXT:    s_setpc_b64 s[30:31]
1597;
1598; GFX8-LABEL: v_shl_i65:
1599; GFX8:       ; %bb.0:
1600; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v3
1602; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[0:1]
1603; GFX8-NEXT:    v_lshlrev_b64 v[5:6], v3, v[2:3]
1604; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 0xffffffc0, v3
1605; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
1606; GFX8-NEXT:    v_or_b32_e32 v9, v4, v5
1607; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
1608; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1609; GFX8-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
1610; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
1611; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
1612; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1613; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1614; GFX8-NEXT:    s_setpc_b64 s[30:31]
1615;
1616; GFX9-LABEL: v_shl_i65:
1617; GFX9:       ; %bb.0:
1618; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v3
1620; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[0:1]
1621; GFX9-NEXT:    v_lshlrev_b64 v[5:6], v3, v[2:3]
1622; GFX9-NEXT:    v_add_u32_e32 v8, 0xffffffc0, v3
1623; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
1624; GFX9-NEXT:    v_or_b32_e32 v9, v4, v5
1625; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v8, v[0:1]
1626; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v3
1627; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc
1628; GFX9-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc
1629; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
1630; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
1631; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
1632; GFX9-NEXT:    s_setpc_b64 s[30:31]
1633;
1634; GFX10-LABEL: v_shl_i65:
1635; GFX10:       ; %bb.0:
1636; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
1638; GFX10-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
1639; GFX10-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
1640; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
1641; GFX10-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
1642; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
1643; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
1644; GFX10-NEXT:    v_or_b32_e32 v1, v5, v4
1645; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
1646; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v1, vcc_lo
1647; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0, v7, vcc_lo
1648; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
1649; GFX10-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
1650; GFX10-NEXT:    s_setpc_b64 s[30:31]
1651;
1652; GFX11-LABEL: v_shl_i65:
1653; GFX11:       ; %bb.0:
1654; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655; GFX11-NEXT:    v_sub_nc_u32_e32 v6, 64, v3
1656; GFX11-NEXT:    v_lshlrev_b64 v[4:5], v3, v[2:3]
1657; GFX11-NEXT:    v_add_nc_u32_e32 v8, 0xffffffc0, v3
1658; GFX11-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v3
1659; GFX11-NEXT:    v_lshrrev_b64 v[5:6], v6, v[0:1]
1660; GFX11-NEXT:    v_lshlrev_b64 v[6:7], v3, v[0:1]
1661; GFX11-NEXT:    v_lshlrev_b64 v[8:9], v8, v[0:1]
1662; GFX11-NEXT:    v_or_b32_e32 v1, v5, v4
1663; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0, v6, vcc_lo
1664; GFX11-NEXT:    v_dual_cndmask_b32 v4, v8, v1 :: v_dual_cndmask_b32 v1, 0, v7
1665; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
1666; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
1667; GFX11-NEXT:    s_setpc_b64 s[30:31]
1668  %result = shl i65 %value, %amount
1669  ret i65 %result
1670}
1671
1672define i65 @v_shl_i65_33(i65 %value) {
1673; GFX6-LABEL: v_shl_i65_33:
1674; GFX6:       ; %bb.0:
1675; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
1677; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 31
1678; GFX6-NEXT:    v_mov_b32_e32 v0, 0
1679; GFX6-NEXT:    v_mov_b32_e32 v1, v4
1680; GFX6-NEXT:    s_setpc_b64 s[30:31]
1681;
1682; GFX8-LABEL: v_shl_i65_33:
1683; GFX8:       ; %bb.0:
1684; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1685; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
1686; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 31, v[0:1]
1687; GFX8-NEXT:    v_mov_b32_e32 v0, 0
1688; GFX8-NEXT:    v_mov_b32_e32 v1, v4
1689; GFX8-NEXT:    s_setpc_b64 s[30:31]
1690;
1691; GFX9-LABEL: v_shl_i65_33:
1692; GFX9:       ; %bb.0:
1693; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
1695; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 31, v[0:1]
1696; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1697; GFX9-NEXT:    v_mov_b32_e32 v1, v4
1698; GFX9-NEXT:    s_setpc_b64 s[30:31]
1699;
1700; GFX10-LABEL: v_shl_i65_33:
1701; GFX10:       ; %bb.0:
1702; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
1704; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 31, v[0:1]
1705; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1706; GFX10-NEXT:    v_mov_b32_e32 v1, v4
1707; GFX10-NEXT:    s_setpc_b64 s[30:31]
1708;
1709; GFX11-LABEL: v_shl_i65_33:
1710; GFX11:       ; %bb.0:
1711; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 1, v0
1713; GFX11-NEXT:    v_lshrrev_b64 v[2:3], 31, v[0:1]
1714; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v4
1715; GFX11-NEXT:    s_setpc_b64 s[30:31]
1716  %result = shl i65 %value, 33
1717  ret i65 %result
1718}
1719
1720define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
1721; GCN-LABEL: s_shl_i65:
1722; GCN:       ; %bb.0:
1723; GCN-NEXT:    s_sub_i32 s10, s3, 64
1724; GCN-NEXT:    s_sub_i32 s6, 64, s3
1725; GCN-NEXT:    s_cmp_lt_u32 s3, 64
1726; GCN-NEXT:    s_cselect_b32 s11, 1, 0
1727; GCN-NEXT:    s_cmp_eq_u32 s3, 0
1728; GCN-NEXT:    s_cselect_b32 s12, 1, 0
1729; GCN-NEXT:    s_lshr_b64 s[6:7], s[0:1], s6
1730; GCN-NEXT:    s_lshl_b64 s[8:9], s[2:3], s3
1731; GCN-NEXT:    s_lshl_b64 s[4:5], s[0:1], s3
1732; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
1733; GCN-NEXT:    s_lshl_b64 s[8:9], s[0:1], s10
1734; GCN-NEXT:    s_cmp_lg_u32 s11, 0
1735; GCN-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
1736; GCN-NEXT:    s_cselect_b32 s3, s6, s8
1737; GCN-NEXT:    s_cmp_lg_u32 s12, 0
1738; GCN-NEXT:    s_cselect_b32 s2, s2, s3
1739; GCN-NEXT:    ; return to shader part epilog
1740;
1741; GFX10PLUS-LABEL: s_shl_i65:
1742; GFX10PLUS:       ; %bb.0:
1743; GFX10PLUS-NEXT:    s_sub_i32 s10, s3, 64
1744; GFX10PLUS-NEXT:    s_sub_i32 s4, 64, s3
1745; GFX10PLUS-NEXT:    s_cmp_lt_u32 s3, 64
1746; GFX10PLUS-NEXT:    s_cselect_b32 s11, 1, 0
1747; GFX10PLUS-NEXT:    s_cmp_eq_u32 s3, 0
1748; GFX10PLUS-NEXT:    s_cselect_b32 s12, 1, 0
1749; GFX10PLUS-NEXT:    s_lshr_b64 s[4:5], s[0:1], s4
1750; GFX10PLUS-NEXT:    s_lshl_b64 s[6:7], s[2:3], s3
1751; GFX10PLUS-NEXT:    s_lshl_b64 s[8:9], s[0:1], s3
1752; GFX10PLUS-NEXT:    s_or_b64 s[4:5], s[4:5], s[6:7]
1753; GFX10PLUS-NEXT:    s_lshl_b64 s[6:7], s[0:1], s10
1754; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
1755; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[8:9], 0
1756; GFX10PLUS-NEXT:    s_cselect_b32 s3, s4, s6
1757; GFX10PLUS-NEXT:    s_cmp_lg_u32 s12, 0
1758; GFX10PLUS-NEXT:    s_cselect_b32 s2, s2, s3
1759; GFX10PLUS-NEXT:    ; return to shader part epilog
1760  %result = shl i65 %value, %amount
1761  ret i65 %result
1762}
1763
1764define amdgpu_ps i65 @s_shl_i65_33(i65 inreg %value) {
1765; GCN-LABEL: s_shl_i65_33:
1766; GCN:       ; %bb.0:
1767; GCN-NEXT:    s_lshl_b32 s4, s0, 1
1768; GCN-NEXT:    s_mov_b32 s6, 0
1769; GCN-NEXT:    s_lshl_b32 s7, s2, 1
1770; GCN-NEXT:    s_lshr_b64 s[0:1], s[0:1], 31
1771; GCN-NEXT:    s_or_b64 s[2:3], s[6:7], s[0:1]
1772; GCN-NEXT:    s_mov_b32 s0, 0
1773; GCN-NEXT:    s_mov_b32 s1, s4
1774; GCN-NEXT:    ; return to shader part epilog
1775;
1776; GFX10PLUS-LABEL: s_shl_i65_33:
1777; GFX10PLUS:       ; %bb.0:
1778; GFX10PLUS-NEXT:    s_mov_b32 s4, 0
1779; GFX10PLUS-NEXT:    s_lshl_b32 s5, s2, 1
1780; GFX10PLUS-NEXT:    s_lshr_b64 s[2:3], s[0:1], 31
1781; GFX10PLUS-NEXT:    s_lshl_b32 s1, s0, 1
1782; GFX10PLUS-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
1783; GFX10PLUS-NEXT:    s_mov_b32 s0, 0
1784; GFX10PLUS-NEXT:    ; return to shader part epilog
1785  %result = shl i65 %value, 33
1786  ret i65 %result
1787}
1788
1789; FIXME: Argument lowering asserts
1790; define <2 x i65> @v_shl_v2i65(<2 x i65> %value, <2 x i65> %amount) {
1791;   %result = shl <2 x i65> %value, %amount
1792;   ret <2 x i65> %result
1793; }
1794
1795; define amdgpu_ps <2 x i65> @s_shl_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) {
1796;   %result = shl <2 x i65> %value, %amount
1797;   ret <2 x i65> %result
1798; }
1799