xref: /llvm-project/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll (revision e3e7c756fb439f4e92691c6f8c891fecd2c918ed)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4
5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6; but with all 64-bit tests, and tests with loads dropped.
7
8; Patterns:
9;   a) x &  (1 << nbits) - 1
10;   b) x & ~(-1 << nbits)
11;   c) x &  (-1 >> (32 - y))
12;   d) x << (32 - y) >> (32 - y)
13; are equivalent.
14
15; ---------------------------------------------------------------------------- ;
16; Pattern a. 32-bit
17; ---------------------------------------------------------------------------- ;
18
19define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
20; GCN-LABEL: bzhi32_a0:
21; GCN:       ; %bb.0:
22; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
24; GCN-NEXT:    s_setpc_b64 s[30:31]
25  %onebit = shl i32 1, %numlowbits
26  %mask = add nsw i32 %onebit, -1
27  %masked = and i32 %mask, %val
28  ret i32 %masked
29}
30
31define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
32; GCN-LABEL: bzhi32_a1_indexzext:
33; GCN:       ; %bb.0:
34; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
36; GCN-NEXT:    s_setpc_b64 s[30:31]
37  %conv = zext i8 %numlowbits to i32
38  %onebit = shl i32 1, %conv
39  %mask = add nsw i32 %onebit, -1
40  %masked = and i32 %mask, %val
41  ret i32 %masked
42}
43
44define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
45; GCN-LABEL: bzhi32_a4_commutative:
46; GCN:       ; %bb.0:
47; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50  %onebit = shl i32 1, %numlowbits
51  %mask = add nsw i32 %onebit, -1
52  %masked = and i32 %val, %mask ; swapped order
53  ret i32 %masked
54}
55
56; ---------------------------------------------------------------------------- ;
57; Pattern b. 32-bit
58; ---------------------------------------------------------------------------- ;
59
60define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
61; GCN-LABEL: bzhi32_b0:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
65; GCN-NEXT:    s_setpc_b64 s[30:31]
66  %notmask = shl i32 -1, %numlowbits
67  %mask = xor i32 %notmask, -1
68  %masked = and i32 %mask, %val
69  ret i32 %masked
70}
71
72define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
73; GCN-LABEL: bzhi32_b1_indexzext:
74; GCN:       ; %bb.0:
75; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
77; GCN-NEXT:    s_setpc_b64 s[30:31]
78  %conv = zext i8 %numlowbits to i32
79  %notmask = shl i32 -1, %conv
80  %mask = xor i32 %notmask, -1
81  %masked = and i32 %mask, %val
82  ret i32 %masked
83}
84
85define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
86; GCN-LABEL: bzhi32_b4_commutative:
87; GCN:       ; %bb.0:
88; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
90; GCN-NEXT:    s_setpc_b64 s[30:31]
91  %notmask = shl i32 -1, %numlowbits
92  %mask = xor i32 %notmask, -1
93  %masked = and i32 %val, %mask ; swapped order
94  ret i32 %masked
95}
96
97; ---------------------------------------------------------------------------- ;
98; Pattern c. 32-bit
99; ---------------------------------------------------------------------------- ;
100
101define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
102; SI-LABEL: bzhi32_c0:
103; SI:       ; %bb.0:
104; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
106; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
107; SI-NEXT:    v_and_b32_e32 v0, v1, v0
108; SI-NEXT:    s_setpc_b64 s[30:31]
109;
110; VI-LABEL: bzhi32_c0:
111; VI:       ; %bb.0:
112; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
114; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
115; VI-NEXT:    v_and_b32_e32 v0, v1, v0
116; VI-NEXT:    s_setpc_b64 s[30:31]
117  %numhighbits = sub i32 32, %numlowbits
118  %mask = lshr i32 -1, %numhighbits
119  %masked = and i32 %mask, %val
120  ret i32 %masked
121}
122
123define i32 @bzhi32_c0_clamp(i32 %val, i32 %numlowbits) nounwind {
124; GCN-LABEL: bzhi32_c0_clamp:
125; GCN:       ; %bb.0:
126; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GCN-NEXT:    v_and_b32_e32 v1, 31, v1
128; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
129; GCN-NEXT:    s_setpc_b64 s[30:31]
130  %low5bits = and i32 %numlowbits, 31
131  %numhighbits = sub i32 32, %low5bits
132  %mask = lshr i32 -1, %numhighbits
133  %masked = and i32 %mask, %val
134  ret i32 %masked
135}
136
137define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
138; SI-LABEL: bzhi32_c1_indexzext:
139; SI:       ; %bb.0:
140; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
142; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
143; SI-NEXT:    v_and_b32_e32 v0, v1, v0
144; SI-NEXT:    s_setpc_b64 s[30:31]
145;
146; VI-LABEL: bzhi32_c1_indexzext:
147; VI:       ; %bb.0:
148; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
150; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
151; VI-NEXT:    v_and_b32_e32 v0, v1, v0
152; VI-NEXT:    s_setpc_b64 s[30:31]
153  %numhighbits = sub i8 32, %numlowbits
154  %sh_prom = zext i8 %numhighbits to i32
155  %mask = lshr i32 -1, %sh_prom
156  %masked = and i32 %mask, %val
157  ret i32 %masked
158}
159
160define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
161; SI-LABEL: bzhi32_c4_commutative:
162; SI:       ; %bb.0:
163; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
165; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
166; SI-NEXT:    v_and_b32_e32 v0, v0, v1
167; SI-NEXT:    s_setpc_b64 s[30:31]
168;
169; VI-LABEL: bzhi32_c4_commutative:
170; VI:       ; %bb.0:
171; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
173; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
174; VI-NEXT:    v_and_b32_e32 v0, v0, v1
175; VI-NEXT:    s_setpc_b64 s[30:31]
176  %numhighbits = sub i32 32, %numlowbits
177  %mask = lshr i32 -1, %numhighbits
178  %masked = and i32 %val, %mask ; swapped order
179  ret i32 %masked
180}
181
182; ---------------------------------------------------------------------------- ;
183; Pattern d. 32-bit.
184; ---------------------------------------------------------------------------- ;
185
186define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
187; SI-LABEL: bzhi32_d0:
188; SI:       ; %bb.0:
189; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
191; SI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
192; SI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
193; SI-NEXT:    s_setpc_b64 s[30:31]
194;
195; VI-LABEL: bzhi32_d0:
196; VI:       ; %bb.0:
197; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
199; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
200; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
201; VI-NEXT:    s_setpc_b64 s[30:31]
202  %numhighbits = sub i32 32, %numlowbits
203  %highbitscleared = shl i32 %val, %numhighbits
204  %masked = lshr i32 %highbitscleared, %numhighbits
205  ret i32 %masked
206}
207
208define i32 @bzhi32_d0_5bits(i32 %val, i32 %numlowbits) nounwind {
209; SI-LABEL: bzhi32_d0_5bits:
210; SI:       ; %bb.0:
211; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; SI-NEXT:    v_and_b32_e32 v1, 31, v1
213; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
214; SI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
215; SI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
216; SI-NEXT:    s_setpc_b64 s[30:31]
217;
218; VI-LABEL: bzhi32_d0_5bits:
219; VI:       ; %bb.0:
220; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; VI-NEXT:    v_and_b32_e32 v1, 31, v1
222; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
223; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
224; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
225; VI-NEXT:    s_setpc_b64 s[30:31]
226  %numlow5bits = and i32 %numlowbits, 31
227  %numhighbits = sub i32 32, %numlow5bits
228  %highbitscleared = shl i32 %val, %numhighbits
229  %masked = lshr i32 %highbitscleared, %numhighbits
230  ret i32 %masked
231}
232
233define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
234; SI-LABEL: bzhi32_d1_indexzext:
235; SI:       ; %bb.0:
236; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
238; SI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
239; SI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
240; SI-NEXT:    s_setpc_b64 s[30:31]
241;
242; VI-LABEL: bzhi32_d1_indexzext:
243; VI:       ; %bb.0:
244; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; VI-NEXT:    v_sub_u16_e32 v1, 32, v1
246; VI-NEXT:    v_lshlrev_b32_e32 v0, v1, v0
247; VI-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
248; VI-NEXT:    s_setpc_b64 s[30:31]
249  %numhighbits = sub i8 32, %numlowbits
250  %sh_prom = zext i8 %numhighbits to i32
251  %highbitscleared = shl i32 %val, %sh_prom
252  %masked = lshr i32 %highbitscleared, %sh_prom
253  ret i32 %masked
254}
255