xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-load-store-vectorizer=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3
4define i32 @v_bfe_i32_arg_arg_arg(i32 %src0, i32 %src1, i32 %src2) #0 {
5; GFX6-LABEL: v_bfe_i32_arg_arg_arg:
6; GFX6:       ; %bb.0:
7; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8; GFX6-NEXT:    v_bfe_u32 v0, v0, v1, v2
9; GFX6-NEXT:    s_setpc_b64 s[30:31]
10  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
11  ret i32 %bfe_i32
12}
13
14define amdgpu_ps i32 @s_bfe_i32_arg_arg_arg(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
15; GFX6-LABEL: s_bfe_i32_arg_arg_arg:
16; GFX6:       ; %bb.0:
17; GFX6-NEXT:    s_and_b32 s1, s1, 63
18; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
19; GFX6-NEXT:    s_or_b32 s1, s1, s2
20; GFX6-NEXT:    s_bfe_u32 s0, s0, s1
21; GFX6-NEXT:    ; return to shader part epilog
22  %bfe_i32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
23  ret i32 %bfe_i32
24}
25
26; TODO: Need to expand this.
27; define i64 @v_bfe_i64_arg_arg_arg(i64 %src0, i32 %src1, i32 %src2) #0 {
28;   %bfe_i64 = call i32 @llvm.amdgcn.ubfe.i64(i32 %src0, i32 %src1, i32 %src2)
29;   ret i64 %bfe_i64
30; }
31
32define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i32 inreg %src2) #0 {
33; GFX6-LABEL: s_bfe_i64_arg_arg_arg:
34; GFX6:       ; %bb.0:
35; GFX6-NEXT:    s_and_b32 s2, s2, 63
36; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
37; GFX6-NEXT:    s_or_b32 s2, s2, s3
38; GFX6-NEXT:    s_bfe_u64 s[0:1], s[0:1], s2
39; GFX6-NEXT:    ; return to shader part epilog
40  %bfe_i32 = call i64 @llvm.amdgcn.ubfe.i64(i64 %src0, i32 %src1, i32 %src2)
41  ret i64 %bfe_i32
42}
43
44define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
45; GFX6-LABEL: bfe_u32_arg_arg_arg:
46; GFX6:       ; %bb.0:
47; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
48; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
49; GFX6-NEXT:    s_and_b32 s4, s3, 63
50; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
51; GFX6-NEXT:    s_or_b32 s3, s4, s3
52; GFX6-NEXT:    s_bfe_u32 s3, s2, s3
53; GFX6-NEXT:    s_mov_b32 s2, -1
54; GFX6-NEXT:    v_mov_b32_e32 v0, s3
55; GFX6-NEXT:    s_mov_b32 s3, 0xf000
56; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
57; GFX6-NEXT:    s_endpgm
58  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
59  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
60  ret void
61}
62
63define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
64; GFX6-LABEL: bfe_u32_arg_arg_imm:
65; GFX6:       ; %bb.0:
66; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
67; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX6-NEXT:    s_and_b32 s3, s3, 63
69; GFX6-NEXT:    s_or_b32 s3, s3, 0x7b0000
70; GFX6-NEXT:    s_bfe_u32 s3, s2, s3
71; GFX6-NEXT:    s_mov_b32 s2, -1
72; GFX6-NEXT:    v_mov_b32_e32 v0, s3
73; GFX6-NEXT:    s_mov_b32 s3, 0xf000
74; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
75; GFX6-NEXT:    s_endpgm
76  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
77  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
78  ret void
79}
80
81define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
82; GFX6-LABEL: bfe_u32_arg_imm_arg:
83; GFX6:       ; %bb.0:
84; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
85; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
86; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
87; GFX6-NEXT:    s_or_b32 s3, 59, s3
88; GFX6-NEXT:    s_bfe_u32 s3, s2, s3
89; GFX6-NEXT:    s_mov_b32 s2, -1
90; GFX6-NEXT:    v_mov_b32_e32 v0, s3
91; GFX6-NEXT:    s_mov_b32 s3, 0xf000
92; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
93; GFX6-NEXT:    s_endpgm
94  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
95  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
96  ret void
97}
98
99define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
100; GFX6-LABEL: bfe_u32_imm_arg_arg:
101; GFX6:       ; %bb.0:
102; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
103; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
104; GFX6-NEXT:    s_and_b32 s4, s2, 63
105; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
106; GFX6-NEXT:    s_or_b32 s3, s4, s3
107; GFX6-NEXT:    s_bfe_u32 s3, 0x7b, s3
108; GFX6-NEXT:    s_mov_b32 s2, -1
109; GFX6-NEXT:    v_mov_b32_e32 v0, s3
110; GFX6-NEXT:    s_mov_b32 s3, 0xf000
111; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
112; GFX6-NEXT:    s_endpgm
113  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
114  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
115  ret void
116}
117
118define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
119; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
120; GFX6:       ; %bb.0:
121; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
122; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
123; GFX6-NEXT:    s_and_b32 s3, s3, 63
124; GFX6-NEXT:    s_bfe_u32 s3, s2, s3
125; GFX6-NEXT:    s_mov_b32 s2, -1
126; GFX6-NEXT:    v_mov_b32_e32 v0, s3
127; GFX6-NEXT:    s_mov_b32 s3, 0xf000
128; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
129; GFX6-NEXT:    s_endpgm
130  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
131  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
132  ret void
133}
134
135define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
136; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
137; GFX6:       ; %bb.0:
138; GFX6-NEXT:    s_load_dword s3, s[4:5], 0x2
139; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
140; GFX6-NEXT:    s_mov_b32 s2, -1
141; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX6-NEXT:    s_bfe_u32 s3, s3, 8
143; GFX6-NEXT:    v_mov_b32_e32 v0, s3
144; GFX6-NEXT:    s_mov_b32 s3, 0xf000
145; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
146; GFX6-NEXT:    s_endpgm
147  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
148  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
149  ret void
150}
151
152define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
153; GFX6-LABEL: bfe_u32_zextload_i8:
154; GFX6:       ; %bb.0:
155; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
156; GFX6-NEXT:    s_mov_b32 s6, -1
157; GFX6-NEXT:    s_mov_b32 s7, 0xf000
158; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
159; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
160; GFX6-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
161; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
162; GFX6-NEXT:    s_waitcnt vmcnt(0)
163; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
164; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
165; GFX6-NEXT:    s_endpgm
166  %load = load i8, ptr addrspace(1) %in
167  %ext = zext i8 %load to i32
168  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
169  store i32 %bfe, ptr addrspace(1) %out, align 4
170  ret void
171}
172
173; FIXME: Should be using s_add_i32
174define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
175; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
176; GFX6:       ; %bb.0:
177; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
178; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
179; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
180; GFX6-NEXT:    s_mov_b32 s2, -1
181; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
182; GFX6-NEXT:    s_add_i32 s3, s3, 1
183; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
184; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80000
185; GFX6-NEXT:    v_mov_b32_e32 v0, s3
186; GFX6-NEXT:    s_mov_b32 s3, 0xf000
187; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
188; GFX6-NEXT:    s_endpgm
189  %load = load i32, ptr addrspace(1) %in, align 4
190  %add = add i32 %load, 1
191  %ext = and i32 %add, 255
192  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
193  store i32 %bfe, ptr addrspace(1) %out, align 4
194  ret void
195}
196
197define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
198; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
199; GFX6:       ; %bb.0:
200; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
201; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
202; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
203; GFX6-NEXT:    s_mov_b32 s2, -1
204; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
205; GFX6-NEXT:    s_add_i32 s3, s3, 1
206; GFX6-NEXT:    s_and_b32 s3, s3, 0xffff
207; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
208; GFX6-NEXT:    v_mov_b32_e32 v0, s3
209; GFX6-NEXT:    s_mov_b32 s3, 0xf000
210; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
211; GFX6-NEXT:    s_endpgm
212  %load = load i32, ptr addrspace(1) %in, align 4
213  %add = add i32 %load, 1
214  %ext = and i32 %add, 65535
215  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
216  store i32 %bfe, ptr addrspace(1) %out, align 4
217  ret void
218}
219
220define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
221; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
222; GFX6:       ; %bb.0:
223; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
224; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
225; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
226; GFX6-NEXT:    s_mov_b32 s2, -1
227; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
228; GFX6-NEXT:    s_add_i32 s3, s3, 1
229; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
230; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80001
231; GFX6-NEXT:    v_mov_b32_e32 v0, s3
232; GFX6-NEXT:    s_mov_b32 s3, 0xf000
233; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
234; GFX6-NEXT:    s_endpgm
235  %load = load i32, ptr addrspace(1) %in, align 4
236  %add = add i32 %load, 1
237  %ext = and i32 %add, 255
238  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
239  store i32 %bfe, ptr addrspace(1) %out, align 4
240  ret void
241}
242
243define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
244; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
245; GFX6:       ; %bb.0:
246; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
247; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
248; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
249; GFX6-NEXT:    s_mov_b32 s2, -1
250; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
251; GFX6-NEXT:    s_add_i32 s3, s3, 1
252; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
253; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80003
254; GFX6-NEXT:    v_mov_b32_e32 v0, s3
255; GFX6-NEXT:    s_mov_b32 s3, 0xf000
256; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
257; GFX6-NEXT:    s_endpgm
258  %load = load i32, ptr addrspace(1) %in, align 4
259  %add = add i32 %load, 1
260  %ext = and i32 %add, 255
261  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
262  store i32 %bfe, ptr addrspace(1) %out, align 4
263  ret void
264}
265
266define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
267; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
268; GFX6:       ; %bb.0:
269; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
270; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
271; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
272; GFX6-NEXT:    s_mov_b32 s2, -1
273; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
274; GFX6-NEXT:    s_add_i32 s3, s3, 1
275; GFX6-NEXT:    s_and_b32 s3, s3, 0xff
276; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80007
277; GFX6-NEXT:    v_mov_b32_e32 v0, s3
278; GFX6-NEXT:    s_mov_b32 s3, 0xf000
279; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
280; GFX6-NEXT:    s_endpgm
281  %load = load i32, ptr addrspace(1) %in, align 4
282  %add = add i32 %load, 1
283  %ext = and i32 %add, 255
284  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
285  store i32 %bfe, ptr addrspace(1) %out, align 4
286  ret void
287}
288
289define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
290; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
291; GFX6:       ; %bb.0:
292; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
293; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
294; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
295; GFX6-NEXT:    s_mov_b32 s2, -1
296; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
297; GFX6-NEXT:    s_add_i32 s3, s3, 1
298; GFX6-NEXT:    s_and_b32 s3, s3, 0xffff
299; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80008
300; GFX6-NEXT:    v_mov_b32_e32 v0, s3
301; GFX6-NEXT:    s_mov_b32 s3, 0xf000
302; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
303; GFX6-NEXT:    s_endpgm
304  %load = load i32, ptr addrspace(1) %in, align 4
305  %add = add i32 %load, 1
306  %ext = and i32 %add, 65535
307  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
308  store i32 %bfe, ptr addrspace(1) %out, align 4
309  ret void
310}
311
312define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
313; GFX6-LABEL: bfe_u32_test_1:
314; GFX6:       ; %bb.0:
315; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
316; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
317; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
318; GFX6-NEXT:    s_mov_b32 s2, -1
319; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
320; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
321; GFX6-NEXT:    v_mov_b32_e32 v0, s3
322; GFX6-NEXT:    s_mov_b32 s3, 0xf000
323; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
324; GFX6-NEXT:    s_endpgm
325  %x = load i32, ptr addrspace(1) %in, align 4
326  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
327  store i32 %bfe, ptr addrspace(1) %out, align 4
328  ret void
329}
330
331define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
332; GFX6-LABEL: bfe_u32_test_2:
333; GFX6:       ; %bb.0:
334; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
335; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
336; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
337; GFX6-NEXT:    s_mov_b32 s2, -1
338; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
339; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
340; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80000
341; GFX6-NEXT:    v_mov_b32_e32 v0, s3
342; GFX6-NEXT:    s_mov_b32 s3, 0xf000
343; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
344; GFX6-NEXT:    s_endpgm
345  %x = load i32, ptr addrspace(1) %in, align 4
346  %shl = shl i32 %x, 31
347  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
348  store i32 %bfe, ptr addrspace(1) %out, align 4
349  ret void
350}
351
352define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
353; GFX6-LABEL: bfe_u32_test_3:
354; GFX6:       ; %bb.0:
355; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
356; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
357; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
358; GFX6-NEXT:    s_mov_b32 s2, -1
359; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
360; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
361; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
362; GFX6-NEXT:    v_mov_b32_e32 v0, s3
363; GFX6-NEXT:    s_mov_b32 s3, 0xf000
364; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
365; GFX6-NEXT:    s_endpgm
366  %x = load i32, ptr addrspace(1) %in, align 4
367  %shl = shl i32 %x, 31
368  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
369  store i32 %bfe, ptr addrspace(1) %out, align 4
370  ret void
371}
372
373define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
374; GFX6-LABEL: bfe_u32_test_4:
375; GFX6:       ; %bb.0:
376; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
377; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
379; GFX6-NEXT:    s_mov_b32 s2, -1
380; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
381; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
382; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
383; GFX6-NEXT:    v_mov_b32_e32 v0, s3
384; GFX6-NEXT:    s_mov_b32 s3, 0xf000
385; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
386; GFX6-NEXT:    s_endpgm
387  %x = load i32, ptr addrspace(1) %in, align 4
388  %shl = shl i32 %x, 31
389  %shr = lshr i32 %shl, 31
390  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
391  store i32 %bfe, ptr addrspace(1) %out, align 4
392  ret void
393}
394
395define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
396; GFX6-LABEL: bfe_u32_test_5:
397; GFX6:       ; %bb.0:
398; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
399; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
401; GFX6-NEXT:    s_mov_b32 s2, -1
402; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
403; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x10000
404; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
405; GFX6-NEXT:    v_mov_b32_e32 v0, s3
406; GFX6-NEXT:    s_mov_b32 s3, 0xf000
407; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
408; GFX6-NEXT:    s_endpgm
409  %x = load i32, ptr addrspace(1) %in, align 4
410  %shl = shl i32 %x, 31
411  %shr = ashr i32 %shl, 31
412  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
413  store i32 %bfe, ptr addrspace(1) %out, align 4
414  ret void
415}
416
417define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
418; GFX6-LABEL: bfe_u32_test_6:
419; GFX6:       ; %bb.0:
420; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
421; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
422; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
423; GFX6-NEXT:    s_mov_b32 s2, -1
424; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
425; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
426; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1f0001
427; GFX6-NEXT:    v_mov_b32_e32 v0, s3
428; GFX6-NEXT:    s_mov_b32 s3, 0xf000
429; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
430; GFX6-NEXT:    s_endpgm
431  %x = load i32, ptr addrspace(1) %in, align 4
432  %shl = shl i32 %x, 31
433  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
434  store i32 %bfe, ptr addrspace(1) %out, align 4
435  ret void
436}
437
438define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
439; GFX6-LABEL: bfe_u32_test_7:
440; GFX6:       ; %bb.0:
441; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
442; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
443; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
444; GFX6-NEXT:    s_mov_b32 s2, -1
445; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
446; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
447; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1f0000
448; GFX6-NEXT:    v_mov_b32_e32 v0, s3
449; GFX6-NEXT:    s_mov_b32 s3, 0xf000
450; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
451; GFX6-NEXT:    s_endpgm
452  %x = load i32, ptr addrspace(1) %in, align 4
453  %shl = shl i32 %x, 31
454  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
455  store i32 %bfe, ptr addrspace(1) %out, align 4
456  ret void
457}
458
459define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
460; GFX6-LABEL: bfe_u32_test_8:
461; GFX6:       ; %bb.0:
462; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
463; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
464; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
465; GFX6-NEXT:    s_mov_b32 s2, -1
466; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
467; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
468; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
469; GFX6-NEXT:    v_mov_b32_e32 v0, s3
470; GFX6-NEXT:    s_mov_b32 s3, 0xf000
471; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
472; GFX6-NEXT:    s_endpgm
473  %x = load i32, ptr addrspace(1) %in, align 4
474  %shl = shl i32 %x, 31
475  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
476  store i32 %bfe, ptr addrspace(1) %out, align 4
477  ret void
478}
479
480define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
481; GFX6-LABEL: bfe_u32_test_9:
482; GFX6:       ; %bb.0:
483; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
484; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
485; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
486; GFX6-NEXT:    s_mov_b32 s2, -1
487; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
488; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
489; GFX6-NEXT:    v_mov_b32_e32 v0, s3
490; GFX6-NEXT:    s_mov_b32 s3, 0xf000
491; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
492; GFX6-NEXT:    s_endpgm
493  %x = load i32, ptr addrspace(1) %in, align 4
494  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
495  store i32 %bfe, ptr addrspace(1) %out, align 4
496  ret void
497}
498
499define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
500; GFX6-LABEL: bfe_u32_test_10:
501; GFX6:       ; %bb.0:
502; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
503; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
504; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
505; GFX6-NEXT:    s_mov_b32 s2, -1
506; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
507; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1f0001
508; GFX6-NEXT:    v_mov_b32_e32 v0, s3
509; GFX6-NEXT:    s_mov_b32 s3, 0xf000
510; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
511; GFX6-NEXT:    s_endpgm
512  %x = load i32, ptr addrspace(1) %in, align 4
513  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
514  store i32 %bfe, ptr addrspace(1) %out, align 4
515  ret void
516}
517
518define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
519; GFX6-LABEL: bfe_u32_test_11:
520; GFX6:       ; %bb.0:
521; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
522; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
523; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
524; GFX6-NEXT:    s_mov_b32 s2, -1
525; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
526; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x180008
527; GFX6-NEXT:    v_mov_b32_e32 v0, s3
528; GFX6-NEXT:    s_mov_b32 s3, 0xf000
529; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
530; GFX6-NEXT:    s_endpgm
531  %x = load i32, ptr addrspace(1) %in, align 4
532  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
533  store i32 %bfe, ptr addrspace(1) %out, align 4
534  ret void
535}
536
537define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
538; GFX6-LABEL: bfe_u32_test_12:
539; GFX6:       ; %bb.0:
540; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
541; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
542; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
543; GFX6-NEXT:    s_mov_b32 s2, -1
544; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80018
546; GFX6-NEXT:    v_mov_b32_e32 v0, s3
547; GFX6-NEXT:    s_mov_b32 s3, 0xf000
548; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
549; GFX6-NEXT:    s_endpgm
550  %x = load i32, ptr addrspace(1) %in, align 4
551  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
552  store i32 %bfe, ptr addrspace(1) %out, align 4
553  ret void
554}
555
556; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
557define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
558; GFX6-LABEL: bfe_u32_test_13:
559; GFX6:       ; %bb.0:
560; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
561; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
562; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
563; GFX6-NEXT:    s_mov_b32 s2, -1
564; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
565; GFX6-NEXT:    s_ashr_i32 s3, s3, 31
566; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
567; GFX6-NEXT:    v_mov_b32_e32 v0, s3
568; GFX6-NEXT:    s_mov_b32 s3, 0xf000
569; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
570; GFX6-NEXT:    s_endpgm
571  %x = load i32, ptr addrspace(1) %in, align 4
572  %shl = ashr i32 %x, 31
573  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
574  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
575}
576
577define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
578; GFX6-LABEL: bfe_u32_test_14:
579; GFX6:       ; %bb.0:
580; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
581; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
582; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
583; GFX6-NEXT:    s_mov_b32 s2, -1
584; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
585; GFX6-NEXT:    s_lshr_b32 s3, s3, 31
586; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
587; GFX6-NEXT:    v_mov_b32_e32 v0, s3
588; GFX6-NEXT:    s_mov_b32 s3, 0xf000
589; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
590; GFX6-NEXT:    s_endpgm
591  %x = load i32, ptr addrspace(1) %in, align 4
592  %shl = lshr i32 %x, 31
593  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
594  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
595}
596
597define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
598; GFX6-LABEL: bfe_u32_constant_fold_test_0:
599; GFX6:       ; %bb.0:
600; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
601; GFX6-NEXT:    s_bfe_u32 s2, 0, 0
602; GFX6-NEXT:    v_mov_b32_e32 v0, s2
603; GFX6-NEXT:    s_mov_b32 s2, -1
604; GFX6-NEXT:    s_mov_b32 s3, 0xf000
605; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
606; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
607; GFX6-NEXT:    s_endpgm
608  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
609  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
610  ret void
611}
612
613define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
614; GFX6-LABEL: bfe_u32_constant_fold_test_1:
615; GFX6:       ; %bb.0:
616; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
617; GFX6-NEXT:    s_bfe_u32 s2, 0x302e, 0
618; GFX6-NEXT:    v_mov_b32_e32 v0, s2
619; GFX6-NEXT:    s_mov_b32 s2, -1
620; GFX6-NEXT:    s_mov_b32 s3, 0xf000
621; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
622; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
623; GFX6-NEXT:    s_endpgm
624  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
625  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
626  ret void
627}
628
629define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
630; GFX6-LABEL: bfe_u32_constant_fold_test_2:
631; GFX6:       ; %bb.0:
632; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
633; GFX6-NEXT:    s_bfe_u32 s2, 0, 0x10000
634; GFX6-NEXT:    v_mov_b32_e32 v0, s2
635; GFX6-NEXT:    s_mov_b32 s2, -1
636; GFX6-NEXT:    s_mov_b32 s3, 0xf000
637; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
638; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
639; GFX6-NEXT:    s_endpgm
640  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
641  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
642  ret void
643}
644
645define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
646; GFX6-LABEL: bfe_u32_constant_fold_test_3:
647; GFX6:       ; %bb.0:
648; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
649; GFX6-NEXT:    s_bfe_u32 s2, 1, 0x10000
650; GFX6-NEXT:    v_mov_b32_e32 v0, s2
651; GFX6-NEXT:    s_mov_b32 s2, -1
652; GFX6-NEXT:    s_mov_b32 s3, 0xf000
653; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
654; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
655; GFX6-NEXT:    s_endpgm
656  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
657  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
658  ret void
659}
660
661define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
662; GFX6-LABEL: bfe_u32_constant_fold_test_4:
663; GFX6:       ; %bb.0:
664; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
665; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x10000
666; GFX6-NEXT:    v_mov_b32_e32 v0, s2
667; GFX6-NEXT:    s_mov_b32 s2, -1
668; GFX6-NEXT:    s_mov_b32 s3, 0xf000
669; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
670; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
671; GFX6-NEXT:    s_endpgm
672  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
673  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
674  ret void
675}
676
677define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
678; GFX6-LABEL: bfe_u32_constant_fold_test_5:
679; GFX6:       ; %bb.0:
680; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
681; GFX6-NEXT:    s_mov_b32 s2, 0x10007
682; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
683; GFX6-NEXT:    v_mov_b32_e32 v0, s2
684; GFX6-NEXT:    s_mov_b32 s2, -1
685; GFX6-NEXT:    s_mov_b32 s3, 0xf000
686; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
687; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
688; GFX6-NEXT:    s_endpgm
689  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
690  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
691  ret void
692}
693
694define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
695; GFX6-LABEL: bfe_u32_constant_fold_test_6:
696; GFX6:       ; %bb.0:
697; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
698; GFX6-NEXT:    s_mov_b32 s2, 0x80000
699; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
700; GFX6-NEXT:    v_mov_b32_e32 v0, s2
701; GFX6-NEXT:    s_mov_b32 s2, -1
702; GFX6-NEXT:    s_mov_b32 s3, 0xf000
703; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
704; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
705; GFX6-NEXT:    s_endpgm
706  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
707  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
708  ret void
709}
710
711define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
712; GFX6-LABEL: bfe_u32_constant_fold_test_7:
713; GFX6:       ; %bb.0:
714; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
715; GFX6-NEXT:    s_mov_b32 s2, 0x80000
716; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
717; GFX6-NEXT:    v_mov_b32_e32 v0, s2
718; GFX6-NEXT:    s_mov_b32 s2, -1
719; GFX6-NEXT:    s_mov_b32 s3, 0xf000
720; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
721; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
722; GFX6-NEXT:    s_endpgm
723  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
724  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
725  ret void
726}
727
728define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
729; GFX6-LABEL: bfe_u32_constant_fold_test_8:
730; GFX6:       ; %bb.0:
731; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
732; GFX6-NEXT:    s_mov_b32 s2, 0x80006
733; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
734; GFX6-NEXT:    v_mov_b32_e32 v0, s2
735; GFX6-NEXT:    s_mov_b32 s2, -1
736; GFX6-NEXT:    s_mov_b32 s3, 0xf000
737; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
738; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
739; GFX6-NEXT:    s_endpgm
740  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
741  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
742  ret void
743}
744
745define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
746; GFX6-LABEL: bfe_u32_constant_fold_test_9:
747; GFX6:       ; %bb.0:
748; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
749; GFX6-NEXT:    s_mov_b32 s2, 0x80010
750; GFX6-NEXT:    s_bfe_u32 s2, 0x10000, s2
751; GFX6-NEXT:    v_mov_b32_e32 v0, s2
752; GFX6-NEXT:    s_mov_b32 s2, -1
753; GFX6-NEXT:    s_mov_b32 s3, 0xf000
754; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
755; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
756; GFX6-NEXT:    s_endpgm
757  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
758  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
759  ret void
760}
761
762define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
763; GFX6-LABEL: bfe_u32_constant_fold_test_10:
764; GFX6:       ; %bb.0:
765; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
766; GFX6-NEXT:    s_mov_b32 s2, 0x100010
767; GFX6-NEXT:    s_bfe_u32 s2, 0xffff, s2
768; GFX6-NEXT:    v_mov_b32_e32 v0, s2
769; GFX6-NEXT:    s_mov_b32 s2, -1
770; GFX6-NEXT:    s_mov_b32 s3, 0xf000
771; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
773; GFX6-NEXT:    s_endpgm
774  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
775  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
776  ret void
777}
778
779define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
780; GFX6-LABEL: bfe_u32_constant_fold_test_11:
781; GFX6:       ; %bb.0:
782; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
783; GFX6-NEXT:    s_mov_b32 s2, 0x40004
784; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
785; GFX6-NEXT:    v_mov_b32_e32 v0, s2
786; GFX6-NEXT:    s_mov_b32 s2, -1
787; GFX6-NEXT:    s_mov_b32 s3, 0xf000
788; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
789; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
790; GFX6-NEXT:    s_endpgm
791  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
792  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
793  ret void
794}
795
796define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
797; GFX6-LABEL: bfe_u32_constant_fold_test_12:
798; GFX6:       ; %bb.0:
799; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
800; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
801; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
802; GFX6-NEXT:    v_mov_b32_e32 v0, s2
803; GFX6-NEXT:    s_mov_b32 s2, -1
804; GFX6-NEXT:    s_mov_b32 s3, 0xf000
805; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
806; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
807; GFX6-NEXT:    s_endpgm
808  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
809  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
810  ret void
811}
812
813define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
814; GFX6-LABEL: bfe_u32_constant_fold_test_13:
815; GFX6:       ; %bb.0:
816; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
817; GFX6-NEXT:    s_mov_b32 s2, 0x100010
818; GFX6-NEXT:    s_bfe_u32 s2, 0x1fffe, s2
819; GFX6-NEXT:    v_mov_b32_e32 v0, s2
820; GFX6-NEXT:    s_mov_b32 s2, -1
821; GFX6-NEXT:    s_mov_b32 s3, 0xf000
822; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
823; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
824; GFX6-NEXT:    s_endpgm
825  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
826  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
827  ret void
828}
829
830define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
831; GFX6-LABEL: bfe_u32_constant_fold_test_14:
832; GFX6:       ; %bb.0:
833; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
834; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
835; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
836; GFX6-NEXT:    v_mov_b32_e32 v0, s2
837; GFX6-NEXT:    s_mov_b32 s2, -1
838; GFX6-NEXT:    s_mov_b32 s3, 0xf000
839; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
840; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
841; GFX6-NEXT:    s_endpgm
842  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
843  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
844  ret void
845}
846
847define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
848; GFX6-LABEL: bfe_u32_constant_fold_test_15:
849; GFX6:       ; %bb.0:
850; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
851; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
852; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
853; GFX6-NEXT:    v_mov_b32_e32 v0, s2
854; GFX6-NEXT:    s_mov_b32 s2, -1
855; GFX6-NEXT:    s_mov_b32 s3, 0xf000
856; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
857; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
858; GFX6-NEXT:    s_endpgm
859  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
860  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
861  ret void
862}
863
864define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
865; GFX6-LABEL: bfe_u32_constant_fold_test_16:
866; GFX6:       ; %bb.0:
867; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
868; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x70001
869; GFX6-NEXT:    v_mov_b32_e32 v0, s2
870; GFX6-NEXT:    s_mov_b32 s2, -1
871; GFX6-NEXT:    s_mov_b32 s3, 0xf000
872; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
873; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
874; GFX6-NEXT:    s_endpgm
875  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
876  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
877  ret void
878}
879
880define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
881; GFX6-LABEL: bfe_u32_constant_fold_test_17:
882; GFX6:       ; %bb.0:
883; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
884; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
885; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
886; GFX6-NEXT:    v_mov_b32_e32 v0, s2
887; GFX6-NEXT:    s_mov_b32 s2, -1
888; GFX6-NEXT:    s_mov_b32 s3, 0xf000
889; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
890; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
891; GFX6-NEXT:    s_endpgm
892  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
893  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
894  ret void
895}
896
897define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
898; GFX6-LABEL: bfe_u32_constant_fold_test_18:
899; GFX6:       ; %bb.0:
900; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
901; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
902; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
903; GFX6-NEXT:    v_mov_b32_e32 v0, s2
904; GFX6-NEXT:    s_mov_b32 s2, -1
905; GFX6-NEXT:    s_mov_b32 s3, 0xf000
906; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
907; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
908; GFX6-NEXT:    s_endpgm
909  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
910  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
911  ret void
912}
913
914; Make sure that SimplifyDemandedBits doesn't cause the and to be
915; reduced to the bits demanded by the bfe.
916
917; XXX: The operand to v_bfe_u32 could also just directly be the load register.
918define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0,
919; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
920; GFX6:       ; %bb.0:
921; GFX6-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x4
922; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
923; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
924; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x0
925; GFX6-NEXT:    s_mov_b64 s[4:5], s[0:1]
926; GFX6-NEXT:    s_mov_b32 s6, -1
927; GFX6-NEXT:    s_mov_b32 s7, 0xf000
928; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
929; GFX6-NEXT:    s_and_b32 s0, s8, 63
930; GFX6-NEXT:    s_bfe_u32 s1, s0, 0x20002
931; GFX6-NEXT:    v_mov_b32_e32 v1, s1
932; GFX6-NEXT:    v_mov_b32_e32 v0, s0
933; GFX6-NEXT:    buffer_store_dword v1, off, s[4:7], 0
934; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
935; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
936; GFX6-NEXT:    s_endpgm
937                                            ptr addrspace(1) %out1,
938                                            ptr addrspace(1) %in) #0 {
939  %src = load i32, ptr addrspace(1) %in, align 4
940  %and = and i32 %src, 63
941  %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
942  store i32 %bfe_u32, ptr addrspace(1) %out0, align 4
943  store i32 %and, ptr addrspace(1) %out1, align 4
944  ret void
945}
946
947define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
948; GFX6-LABEL: lshr_and:
949; GFX6:       ; %bb.0:
950; GFX6-NEXT:    s_load_dword s3, s[4:5], 0x2
951; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
952; GFX6-NEXT:    s_mov_b32 s2, -1
953; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
954; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
955; GFX6-NEXT:    v_mov_b32_e32 v0, s3
956; GFX6-NEXT:    s_mov_b32 s3, 0xf000
957; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
958; GFX6-NEXT:    s_endpgm
959  %b = lshr i32 %a, 6
960  %c = and i32 %b, 7
961  store i32 %c, ptr addrspace(1) %out, align 8
962  ret void
963}
964
965define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
966; GFX6-LABEL: v_lshr_and:
967; GFX6:       ; %bb.0:
968; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
969; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
970; GFX6-NEXT:    s_lshr_b32 s3, s2, s3
971; GFX6-NEXT:    s_and_b32 s3, s3, 7
972; GFX6-NEXT:    s_mov_b32 s2, -1
973; GFX6-NEXT:    v_mov_b32_e32 v0, s3
974; GFX6-NEXT:    s_mov_b32 s3, 0xf000
975; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
976; GFX6-NEXT:    s_endpgm
977  %c = lshr i32 %a, %b
978  %d = and i32 %c, 7
979  store i32 %d, ptr addrspace(1) %out, align 8
980  ret void
981}
982
983define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
984; GFX6-LABEL: and_lshr:
985; GFX6:       ; %bb.0:
986; GFX6-NEXT:    s_load_dword s3, s[4:5], 0x2
987; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
988; GFX6-NEXT:    s_mov_b32 s2, -1
989; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
990; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
991; GFX6-NEXT:    v_mov_b32_e32 v0, s3
992; GFX6-NEXT:    s_mov_b32 s3, 0xf000
993; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
994; GFX6-NEXT:    s_endpgm
995  %b = and i32 %a, 448
996  %c = lshr i32 %b, 6
997  store i32 %c, ptr addrspace(1) %out, align 8
998  ret void
999}
1000
1001define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
1002; GFX6-LABEL: and_lshr2:
1003; GFX6:       ; %bb.0:
1004; GFX6-NEXT:    s_load_dword s3, s[4:5], 0x2
1005; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1006; GFX6-NEXT:    s_mov_b32 s2, -1
1007; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1008; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
1009; GFX6-NEXT:    v_mov_b32_e32 v0, s3
1010; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1011; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1012; GFX6-NEXT:    s_endpgm
1013  %b = and i32 %a, 511
1014  %c = lshr i32 %b, 6
1015  store i32 %c, ptr addrspace(1) %out, align 8
1016  ret void
1017}
1018
1019define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
1020; GFX6-LABEL: shl_lshr:
1021; GFX6:       ; %bb.0:
1022; GFX6-NEXT:    s_load_dword s3, s[4:5], 0x2
1023; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1024; GFX6-NEXT:    s_mov_b32 s2, -1
1025; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1026; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x150002
1027; GFX6-NEXT:    v_mov_b32_e32 v0, s3
1028; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1029; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1030; GFX6-NEXT:    s_endpgm
1031  %b = shl i32 %a, 9
1032  %c = lshr i32 %b, 11
1033  store i32 %c, ptr addrspace(1) %out, align 8
1034  ret void
1035}
1036
1037declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
1038declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
1039
1040attributes #0 = { nounwind }
1041attributes #1 = { nounwind readnone }
1042