xref: /llvm-project/llvm/test/CodeGen/AMDGPU/constrained-shift.ll (revision 1941f341722178390f71e07502e08a2250a704c7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck -check-prefix=GISEL %s
4
5define i16 @csh_16(i16 %a, i16 %b) {
6; CHECK-LABEL: csh_16:
7; CHECK:       ; %bb.0:
8; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; CHECK-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
10; CHECK-NEXT:    v_lshrrev_b16_e32 v3, v1, v0
11; CHECK-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
12; CHECK-NEXT:    v_add_u16_e32 v1, v2, v3
13; CHECK-NEXT:    v_add_u16_e32 v0, v1, v0
14; CHECK-NEXT:    s_setpc_b64 s[30:31]
15;
16; GISEL-LABEL: csh_16:
17; GISEL:       ; %bb.0:
18; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GISEL-NEXT:    v_and_b32_e32 v1, 15, v1
20; GISEL-NEXT:    v_lshlrev_b16_e32 v2, v1, v0
21; GISEL-NEXT:    v_lshrrev_b16_e32 v3, v1, v0
22; GISEL-NEXT:    v_ashrrev_i16_e32 v0, v1, v0
23; GISEL-NEXT:    v_add_u16_e32 v1, v2, v3
24; GISEL-NEXT:    v_add_u16_e32 v0, v1, v0
25; GISEL-NEXT:    s_setpc_b64 s[30:31]
26  %and = and i16 %b, 15
27  %shl = shl i16 %a, %and
28  %lshr = lshr i16 %a, %and
29  %ashr = ashr i16 %a, %and
30  %ret.0 = add i16 %shl, %lshr
31  %ret = add i16 %ret.0, %ashr
32  ret i16 %ret
33}
34
35define i32 @csh_32(i32 %a, i32 %b) {
36; CHECK-LABEL: csh_32:
37; CHECK:       ; %bb.0:
38; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; CHECK-NEXT:    v_lshlrev_b32_e32 v2, v1, v0
40; CHECK-NEXT:    v_lshrrev_b32_e32 v3, v1, v0
41; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
42; CHECK-NEXT:    v_add3_u32 v0, v2, v3, v0
43; CHECK-NEXT:    s_setpc_b64 s[30:31]
44;
45; GISEL-LABEL: csh_32:
46; GISEL:       ; %bb.0:
47; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
49; GISEL-NEXT:    v_lshlrev_b32_e32 v2, v1, v0
50; GISEL-NEXT:    v_lshrrev_b32_e32 v3, v1, v0
51; GISEL-NEXT:    v_ashrrev_i32_e32 v0, v1, v0
52; GISEL-NEXT:    v_add3_u32 v0, v2, v3, v0
53; GISEL-NEXT:    s_setpc_b64 s[30:31]
54  %and = and i32 %b, 31
55  %shl = shl i32 %a, %and
56  %lshr = lshr i32 %a, %and
57  %ashr = ashr i32 %a, %and
58  %ret.0 = add i32 %shl, %lshr
59  %ret = add i32 %ret.0, %ashr
60  ret i32 %ret
61}
62
63define amdgpu_ps i32 @s_csh_32_0(i32 inreg %a, i32 inreg %b) {
64; CHECK-LABEL: s_csh_32_0:
65; CHECK:       ; %bb.0:
66; CHECK-NEXT:    s_lshl_b32 s2, s0, s1
67; CHECK-NEXT:    s_lshr_b32 s3, s0, s1
68; CHECK-NEXT:    s_ashr_i32 s0, s0, s1
69; CHECK-NEXT:    s_add_i32 s1, s2, s3
70; CHECK-NEXT:    s_add_i32 s0, s1, s0
71; CHECK-NEXT:    ; return to shader part epilog
72;
73; GISEL-LABEL: s_csh_32_0:
74; GISEL:       ; %bb.0:
75; GISEL-NEXT:    s_lshl_b32 s2, s0, s1
76; GISEL-NEXT:    s_lshr_b32 s3, s0, s1
77; GISEL-NEXT:    s_ashr_i32 s0, s0, s1
78; GISEL-NEXT:    s_add_i32 s1, s2, s3
79; GISEL-NEXT:    s_add_i32 s0, s1, s0
80; GISEL-NEXT:    ; return to shader part epilog
81  %and = and i32 %b, 31
82  %shl = shl i32 %a, %and
83  %lshr = lshr i32 %a, %and
84  %ashr = ashr i32 %a, %and
85  %ret.0 = add i32 %shl, %lshr
86  %ret = add i32 %ret.0, %ashr
87  ret i32 %ret
88}
89
90define amdgpu_ps i32 @s_csh_32_1(i32 inreg %a, i32 inreg %b) {
91; CHECK-LABEL: s_csh_32_1:
92; CHECK:       ; %bb.0:
93; CHECK-NEXT:    s_lshl_b32 s2, s0, s1
94; CHECK-NEXT:    s_lshr_b32 s3, s0, s1
95; CHECK-NEXT:    s_ashr_i32 s0, s0, s1
96; CHECK-NEXT:    s_add_i32 s1, s2, s3
97; CHECK-NEXT:    s_add_i32 s0, s1, s0
98; CHECK-NEXT:    ; return to shader part epilog
99;
100; GISEL-LABEL: s_csh_32_1:
101; GISEL:       ; %bb.0:
102; GISEL-NEXT:    s_lshl_b32 s2, s0, s1
103; GISEL-NEXT:    s_lshr_b32 s3, s0, s1
104; GISEL-NEXT:    s_ashr_i32 s0, s0, s1
105; GISEL-NEXT:    s_add_i32 s1, s2, s3
106; GISEL-NEXT:    s_add_i32 s0, s1, s0
107; GISEL-NEXT:    ; return to shader part epilog
108  %and = and i32 %b, 127
109  %shl = shl i32 %a, %and
110  %lshr = lshr i32 %a, %and
111  %ashr = ashr i32 %a, %and
112  %ret.0 = add i32 %shl, %lshr
113  %ret = add i32 %ret.0, %ashr
114  ret i32 %ret
115}
116
117define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
118; CHECK-LABEL: csh_v4i32:
119; CHECK:       ; %bb.0:
120; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; CHECK-NEXT:    v_lshlrev_b32_e32 v8, v7, v3
122; CHECK-NEXT:    v_lshlrev_b32_e32 v9, v6, v2
123; CHECK-NEXT:    v_lshlrev_b32_e32 v10, v5, v1
124; CHECK-NEXT:    v_lshlrev_b32_e32 v11, v4, v0
125; CHECK-NEXT:    v_lshrrev_b32_e32 v12, v7, v3
126; CHECK-NEXT:    v_lshrrev_b32_e32 v13, v6, v2
127; CHECK-NEXT:    v_lshrrev_b32_e32 v14, v5, v1
128; CHECK-NEXT:    v_lshrrev_b32_e32 v15, v4, v0
129; CHECK-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
130; CHECK-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
131; CHECK-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
132; CHECK-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
133; CHECK-NEXT:    v_add3_u32 v0, v11, v15, v0
134; CHECK-NEXT:    v_add3_u32 v1, v10, v14, v1
135; CHECK-NEXT:    v_add3_u32 v2, v9, v13, v2
136; CHECK-NEXT:    v_add3_u32 v3, v8, v12, v3
137; CHECK-NEXT:    s_setpc_b64 s[30:31]
138;
139; GISEL-LABEL: csh_v4i32:
140; GISEL:       ; %bb.0:
141; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GISEL-NEXT:    v_lshlrev_b32_e32 v8, v4, v0
143; GISEL-NEXT:    v_lshlrev_b32_e32 v9, v5, v1
144; GISEL-NEXT:    v_lshlrev_b32_e32 v10, v6, v2
145; GISEL-NEXT:    v_lshlrev_b32_e32 v11, v7, v3
146; GISEL-NEXT:    v_lshrrev_b32_e32 v12, v4, v0
147; GISEL-NEXT:    v_lshrrev_b32_e32 v13, v5, v1
148; GISEL-NEXT:    v_lshrrev_b32_e32 v14, v6, v2
149; GISEL-NEXT:    v_lshrrev_b32_e32 v15, v7, v3
150; GISEL-NEXT:    v_ashrrev_i32_e32 v0, v4, v0
151; GISEL-NEXT:    v_ashrrev_i32_e32 v1, v5, v1
152; GISEL-NEXT:    v_ashrrev_i32_e32 v2, v6, v2
153; GISEL-NEXT:    v_ashrrev_i32_e32 v3, v7, v3
154; GISEL-NEXT:    v_add3_u32 v0, v8, v12, v0
155; GISEL-NEXT:    v_add3_u32 v1, v9, v13, v1
156; GISEL-NEXT:    v_add3_u32 v2, v10, v14, v2
157; GISEL-NEXT:    v_add3_u32 v3, v11, v15, v3
158; GISEL-NEXT:    s_setpc_b64 s[30:31]
159  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
160  %shl = shl <4 x i32> %a, %and
161  %lshr = lshr <4 x i32> %a, %and
162  %ashr = ashr <4 x i32> %a, %and
163  %ret.0 = add <4 x i32> %shl, %lshr
164  %ret = add <4 x i32> %ret.0, %ashr
165  ret <4 x i32> %ret
166}
167
168define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) {
169; CHECK-LABEL: s_csh_v4i32:
170; CHECK:       ; %bb.0:
171; CHECK-NEXT:    s_lshl_b32 s8, s0, s4
172; CHECK-NEXT:    s_lshl_b32 s9, s1, s5
173; CHECK-NEXT:    s_lshl_b32 s10, s2, s6
174; CHECK-NEXT:    s_lshl_b32 s11, s3, s7
175; CHECK-NEXT:    s_lshr_b32 s12, s0, s4
176; CHECK-NEXT:    s_lshr_b32 s13, s1, s5
177; CHECK-NEXT:    s_lshr_b32 s14, s2, s6
178; CHECK-NEXT:    s_lshr_b32 s15, s3, s7
179; CHECK-NEXT:    s_ashr_i32 s3, s3, s7
180; CHECK-NEXT:    s_ashr_i32 s2, s2, s6
181; CHECK-NEXT:    s_ashr_i32 s1, s1, s5
182; CHECK-NEXT:    s_ashr_i32 s0, s0, s4
183; CHECK-NEXT:    s_add_i32 s4, s11, s15
184; CHECK-NEXT:    s_add_i32 s5, s10, s14
185; CHECK-NEXT:    s_add_i32 s6, s9, s13
186; CHECK-NEXT:    s_add_i32 s7, s8, s12
187; CHECK-NEXT:    s_add_i32 s0, s7, s0
188; CHECK-NEXT:    s_add_i32 s1, s6, s1
189; CHECK-NEXT:    s_add_i32 s2, s5, s2
190; CHECK-NEXT:    s_add_i32 s3, s4, s3
191; CHECK-NEXT:    ; return to shader part epilog
192;
193; GISEL-LABEL: s_csh_v4i32:
194; GISEL:       ; %bb.0:
195; GISEL-NEXT:    s_mov_b32 s8, 31
196; GISEL-NEXT:    s_mov_b32 s9, s8
197; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
198; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], s[8:9]
199; GISEL-NEXT:    s_lshl_b32 s8, s0, s4
200; GISEL-NEXT:    s_lshl_b32 s9, s1, s5
201; GISEL-NEXT:    s_lshl_b32 s10, s2, s6
202; GISEL-NEXT:    s_lshl_b32 s11, s3, s7
203; GISEL-NEXT:    s_lshr_b32 s12, s0, s4
204; GISEL-NEXT:    s_lshr_b32 s13, s1, s5
205; GISEL-NEXT:    s_lshr_b32 s14, s2, s6
206; GISEL-NEXT:    s_lshr_b32 s15, s3, s7
207; GISEL-NEXT:    s_ashr_i32 s0, s0, s4
208; GISEL-NEXT:    s_ashr_i32 s1, s1, s5
209; GISEL-NEXT:    s_ashr_i32 s2, s2, s6
210; GISEL-NEXT:    s_ashr_i32 s3, s3, s7
211; GISEL-NEXT:    s_add_i32 s4, s8, s12
212; GISEL-NEXT:    s_add_i32 s5, s9, s13
213; GISEL-NEXT:    s_add_i32 s6, s10, s14
214; GISEL-NEXT:    s_add_i32 s7, s11, s15
215; GISEL-NEXT:    s_add_i32 s0, s4, s0
216; GISEL-NEXT:    s_add_i32 s1, s5, s1
217; GISEL-NEXT:    s_add_i32 s2, s6, s2
218; GISEL-NEXT:    s_add_i32 s3, s7, s3
219; GISEL-NEXT:    ; return to shader part epilog
220  %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
221  %shl = shl <4 x i32> %a, %and
222  %lshr = lshr <4 x i32> %a, %and
223  %ashr = ashr <4 x i32> %a, %and
224  %ret.0 = add <4 x i32> %shl, %lshr
225  %ret = add <4 x i32> %ret.0, %ashr
226  ret <4 x i32> %ret
227}
228
229define i64 @csh_64(i64 %a, i64 %b) {
230; CHECK-LABEL: csh_64:
231; CHECK:       ; %bb.0:
232; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; CHECK-NEXT:    v_lshlrev_b64 v[3:4], v2, v[0:1]
234; CHECK-NEXT:    v_lshrrev_b64 v[5:6], v2, v[0:1]
235; CHECK-NEXT:    v_ashrrev_i64 v[0:1], v2, v[0:1]
236; CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, v3, v5
237; CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v4, v6, vcc
238; CHECK-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
239; CHECK-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
240; CHECK-NEXT:    s_setpc_b64 s[30:31]
241;
242; GISEL-LABEL: csh_64:
243; GISEL:       ; %bb.0:
244; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GISEL-NEXT:    v_and_b32_e32 v6, 63, v2
246; GISEL-NEXT:    v_lshlrev_b64 v[2:3], v6, v[0:1]
247; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v6, v[0:1]
248; GISEL-NEXT:    v_ashrrev_i64 v[0:1], v6, v[0:1]
249; GISEL-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
250; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
251; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v2, v0
252; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
253; GISEL-NEXT:    s_setpc_b64 s[30:31]
254  %and = and i64 %b, 63
255  %shl = shl i64 %a, %and
256  %lshr = lshr i64 %a, %and
257  %ashr = ashr i64 %a, %and
258  %ret.0 = add i64 %shl, %lshr
259  %ret = add i64 %ret.0, %ashr
260  ret i64 %ret
261}
262
263define amdgpu_ps i64 @s_csh_64_0(i64 inreg %a, i64 inreg %b) {
264; CHECK-LABEL: s_csh_64_0:
265; CHECK:       ; %bb.0:
266; CHECK-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
267; CHECK-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
268; CHECK-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
269; CHECK-NEXT:    s_add_u32 s2, s4, s6
270; CHECK-NEXT:    s_addc_u32 s3, s5, s7
271; CHECK-NEXT:    s_add_u32 s0, s2, s0
272; CHECK-NEXT:    s_addc_u32 s1, s3, s1
273; CHECK-NEXT:    ; return to shader part epilog
274;
275; GISEL-LABEL: s_csh_64_0:
276; GISEL:       ; %bb.0:
277; GISEL-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
278; GISEL-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
279; GISEL-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
280; GISEL-NEXT:    s_add_u32 s2, s4, s6
281; GISEL-NEXT:    s_addc_u32 s3, s5, s7
282; GISEL-NEXT:    s_add_u32 s0, s2, s0
283; GISEL-NEXT:    s_addc_u32 s1, s3, s1
284; GISEL-NEXT:    ; return to shader part epilog
285  %and = and i64 %b, 63
286  %shl = shl i64 %a, %and
287  %lshr = lshr i64 %a, %and
288  %ashr = ashr i64 %a, %and
289  %ret.0 = add i64 %shl, %lshr
290  %ret = add i64 %ret.0, %ashr
291  ret i64 %ret
292}
293
294define amdgpu_ps i64 @s_csh_64_1(i64 inreg %a, i64 inreg %b) {
295; CHECK-LABEL: s_csh_64_1:
296; CHECK:       ; %bb.0:
297; CHECK-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
298; CHECK-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
299; CHECK-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
300; CHECK-NEXT:    s_add_u32 s2, s4, s6
301; CHECK-NEXT:    s_addc_u32 s3, s5, s7
302; CHECK-NEXT:    s_add_u32 s0, s2, s0
303; CHECK-NEXT:    s_addc_u32 s1, s3, s1
304; CHECK-NEXT:    ; return to shader part epilog
305;
306; GISEL-LABEL: s_csh_64_1:
307; GISEL:       ; %bb.0:
308; GISEL-NEXT:    s_lshl_b64 s[4:5], s[0:1], s2
309; GISEL-NEXT:    s_lshr_b64 s[6:7], s[0:1], s2
310; GISEL-NEXT:    s_ashr_i64 s[0:1], s[0:1], s2
311; GISEL-NEXT:    s_add_u32 s2, s4, s6
312; GISEL-NEXT:    s_addc_u32 s3, s5, s7
313; GISEL-NEXT:    s_add_u32 s0, s2, s0
314; GISEL-NEXT:    s_addc_u32 s1, s3, s1
315; GISEL-NEXT:    ; return to shader part epilog
316  %and = and i64 %b, 255
317  %shl = shl i64 %a, %and
318  %lshr = lshr i64 %a, %and
319  %ashr = ashr i64 %a, %and
320  %ret.0 = add i64 %shl, %lshr
321  %ret = add i64 %ret.0, %ashr
322  ret i64 %ret
323}
324
325define i32 @cshl_or(i32 %a, i32 %b) {
326; CHECK-LABEL: cshl_or:
327; CHECK:       ; %bb.0:
328; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329; CHECK-NEXT:    v_lshl_or_b32 v0, v0, v1, v0
330; CHECK-NEXT:    s_setpc_b64 s[30:31]
331;
332; GISEL-LABEL: cshl_or:
333; GISEL:       ; %bb.0:
334; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
336; GISEL-NEXT:    v_lshl_or_b32 v0, v0, v1, v0
337; GISEL-NEXT:    s_setpc_b64 s[30:31]
338  %and = and i32 %b, 31
339  %shl = shl i32 %a, %and
340  %or = or i32 %shl, %a
341  ret i32 %or
342}
343
344define i32 @cshl_add(i32 %a, i32 %b, i32 %c) {
345; CHECK-LABEL: cshl_add:
346; CHECK:       ; %bb.0:
347; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; CHECK-NEXT:    v_lshl_add_u32 v0, v0, v1, v2
349; CHECK-NEXT:    s_setpc_b64 s[30:31]
350;
351; GISEL-LABEL: cshl_add:
352; GISEL:       ; %bb.0:
353; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GISEL-NEXT:    v_and_b32_e32 v1, 31, v1
355; GISEL-NEXT:    v_lshl_add_u32 v0, v0, v1, v2
356; GISEL-NEXT:    s_setpc_b64 s[30:31]
357  %and = and i32 %b, 31
358  %shl = shl i32 %a, %and
359  %add = add i32 %shl, %c
360  ret i32 %add
361}
362
363define i32 @add_cshl(i32 %a, i32 %b) {
364; CHECK-LABEL: add_cshl:
365; CHECK:       ; %bb.0:
366; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367; CHECK-NEXT:    v_add_lshl_u32 v0, v0, v1, v1
368; CHECK-NEXT:    s_setpc_b64 s[30:31]
369;
370; GISEL-LABEL: add_cshl:
371; GISEL:       ; %bb.0:
372; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GISEL-NEXT:    v_and_b32_e32 v2, 31, v1
374; GISEL-NEXT:    v_add_lshl_u32 v0, v0, v1, v2
375; GISEL-NEXT:    s_setpc_b64 s[30:31]
376  %add = add i32 %a, %b
377  %and = and i32 %b, 31
378  %shl = shl i32 %add, %and
379  ret i32 %shl
380}
381