xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn < %s | FileCheck --check-prefixes=SI,GCN,FUNC %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga  < %s | FileCheck --check-prefixes=VI,GCN,FUNC %s
4; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=R600,FUNC %s
5
6define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) {
7; SI-LABEL: local_size_x:
8; SI:       ; %bb.0: ; %entry
9; SI-NEXT:    s_load_dword s6, s[4:5], 0x6
10; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
11; SI-NEXT:    s_mov_b32 s3, 0xf000
12; SI-NEXT:    s_mov_b32 s2, -1
13; SI-NEXT:    s_waitcnt lgkmcnt(0)
14; SI-NEXT:    v_mov_b32_e32 v0, s6
15; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
16; SI-NEXT:    s_endpgm
17;
18; VI-LABEL: local_size_x:
19; VI:       ; %bb.0: ; %entry
20; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
21; VI-NEXT:    s_load_dword s2, s[4:5], 0x18
22; VI-NEXT:    s_waitcnt lgkmcnt(0)
23; VI-NEXT:    v_mov_b32_e32 v0, s0
24; VI-NEXT:    v_mov_b32_e32 v1, s1
25; VI-NEXT:    v_mov_b32_e32 v2, s2
26; VI-NEXT:    flat_store_dword v[0:1], v2
27; VI-NEXT:    s_endpgm
28;
29; R600-LABEL: local_size_x:
30; R600:       ; %bb.0: ; %entry
31; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
32; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
33; R600-NEXT:    CF_END
34; R600-NEXT:    PAD
35; R600-NEXT:    ALU clause starting at 4:
36; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
37; R600-NEXT:     MOV * T1.X, KC0[1].Z,
38; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
39entry:
40  %0 = call i32 @llvm.r600.read.local.size.x() #0
41  store i32 %0, ptr addrspace(1) %out
42  ret void
43}
44
45define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) {
46; SI-LABEL: local_size_y:
47; SI:       ; %bb.0: ; %entry
48; SI-NEXT:    s_load_dword s6, s[4:5], 0x7
49; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
50; SI-NEXT:    s_mov_b32 s3, 0xf000
51; SI-NEXT:    s_mov_b32 s2, -1
52; SI-NEXT:    s_waitcnt lgkmcnt(0)
53; SI-NEXT:    v_mov_b32_e32 v0, s6
54; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
55; SI-NEXT:    s_endpgm
56;
57; VI-LABEL: local_size_y:
58; VI:       ; %bb.0: ; %entry
59; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
60; VI-NEXT:    s_load_dword s2, s[4:5], 0x1c
61; VI-NEXT:    s_waitcnt lgkmcnt(0)
62; VI-NEXT:    v_mov_b32_e32 v0, s0
63; VI-NEXT:    v_mov_b32_e32 v1, s1
64; VI-NEXT:    v_mov_b32_e32 v2, s2
65; VI-NEXT:    flat_store_dword v[0:1], v2
66; VI-NEXT:    s_endpgm
67;
68; R600-LABEL: local_size_y:
69; R600:       ; %bb.0: ; %entry
70; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
71; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
72; R600-NEXT:    CF_END
73; R600-NEXT:    PAD
74; R600-NEXT:    ALU clause starting at 4:
75; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
76; R600-NEXT:     MOV * T1.X, KC0[1].W,
77; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
78entry:
79  %0 = call i32 @llvm.r600.read.local.size.y() #0
80  store i32 %0, ptr addrspace(1) %out
81  ret void
82}
83
84define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) {
85; SI-LABEL: local_size_z:
86; SI:       ; %bb.0: ; %entry
87; SI-NEXT:    s_load_dword s6, s[4:5], 0x8
88; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
89; SI-NEXT:    s_mov_b32 s3, 0xf000
90; SI-NEXT:    s_mov_b32 s2, -1
91; SI-NEXT:    s_waitcnt lgkmcnt(0)
92; SI-NEXT:    v_mov_b32_e32 v0, s6
93; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
94; SI-NEXT:    s_endpgm
95;
96; VI-LABEL: local_size_z:
97; VI:       ; %bb.0: ; %entry
98; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
99; VI-NEXT:    s_load_dword s2, s[4:5], 0x20
100; VI-NEXT:    s_waitcnt lgkmcnt(0)
101; VI-NEXT:    v_mov_b32_e32 v0, s0
102; VI-NEXT:    v_mov_b32_e32 v1, s1
103; VI-NEXT:    v_mov_b32_e32 v2, s2
104; VI-NEXT:    flat_store_dword v[0:1], v2
105; VI-NEXT:    s_endpgm
106;
107; R600-LABEL: local_size_z:
108; R600:       ; %bb.0: ; %entry
109; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
110; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
111; R600-NEXT:    CF_END
112; R600-NEXT:    PAD
113; R600-NEXT:    ALU clause starting at 4:
114; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
115; R600-NEXT:     MOV * T1.X, KC0[2].X,
116; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
117entry:
118  %0 = call i32 @llvm.r600.read.local.size.z() #0
119  store i32 %0, ptr addrspace(1) %out
120  ret void
121}
122
123define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) {
124; SI-LABEL: local_size_xy:
125; SI:       ; %bb.0: ; %entry
126; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x6
127; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
128; SI-NEXT:    s_mov_b32 s3, 0xf000
129; SI-NEXT:    s_waitcnt lgkmcnt(0)
130; SI-NEXT:    s_mul_i32 s4, s6, s7
131; SI-NEXT:    s_mov_b32 s2, -1
132; SI-NEXT:    v_mov_b32_e32 v0, s4
133; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
134; SI-NEXT:    s_endpgm
135;
136; VI-LABEL: local_size_xy:
137; VI:       ; %bb.0: ; %entry
138; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x18
139; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
140; VI-NEXT:    s_waitcnt lgkmcnt(0)
141; VI-NEXT:    s_mul_i32 s0, s0, s1
142; VI-NEXT:    v_mov_b32_e32 v0, s2
143; VI-NEXT:    v_mov_b32_e32 v1, s3
144; VI-NEXT:    v_mov_b32_e32 v2, s0
145; VI-NEXT:    flat_store_dword v[0:1], v2
146; VI-NEXT:    s_endpgm
147;
148; R600-LABEL: local_size_xy:
149; R600:       ; %bb.0: ; %entry
150; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
151; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
152; R600-NEXT:    CF_END
153; R600-NEXT:    PAD
154; R600-NEXT:    ALU clause starting at 4:
155; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
156; R600-NEXT:     MULLO_INT * T1.X, KC0[1].Z, KC0[1].W,
157; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
158entry:
159  %x = call i32 @llvm.r600.read.local.size.x() #0
160  %y = call i32 @llvm.r600.read.local.size.y() #0
161  %val = mul i32 %x, %y
162  store i32 %val, ptr addrspace(1) %out
163  ret void
164}
165
166define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) {
167; SI-LABEL: local_size_xz:
168; SI:       ; %bb.0: ; %entry
169; SI-NEXT:    s_load_dword s2, s[4:5], 0x6
170; SI-NEXT:    s_load_dword s6, s[4:5], 0x8
171; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
172; SI-NEXT:    s_mov_b32 s3, 0xf000
173; SI-NEXT:    s_waitcnt lgkmcnt(0)
174; SI-NEXT:    s_mul_i32 s4, s2, s6
175; SI-NEXT:    s_mov_b32 s2, -1
176; SI-NEXT:    v_mov_b32_e32 v0, s4
177; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
178; SI-NEXT:    s_endpgm
179;
180; VI-LABEL: local_size_xz:
181; VI:       ; %bb.0: ; %entry
182; VI-NEXT:    s_load_dword s2, s[4:5], 0x18
183; VI-NEXT:    s_load_dword s3, s[4:5], 0x20
184; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
185; VI-NEXT:    s_waitcnt lgkmcnt(0)
186; VI-NEXT:    s_mul_i32 s2, s2, s3
187; VI-NEXT:    v_mov_b32_e32 v0, s0
188; VI-NEXT:    v_mov_b32_e32 v1, s1
189; VI-NEXT:    v_mov_b32_e32 v2, s2
190; VI-NEXT:    flat_store_dword v[0:1], v2
191; VI-NEXT:    s_endpgm
192;
193; R600-LABEL: local_size_xz:
194; R600:       ; %bb.0: ; %entry
195; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
196; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
197; R600-NEXT:    CF_END
198; R600-NEXT:    PAD
199; R600-NEXT:    ALU clause starting at 4:
200; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
201; R600-NEXT:     MULLO_INT * T1.X, KC0[1].Z, KC0[2].X,
202; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
203entry:
204  %x = call i32 @llvm.r600.read.local.size.x() #0
205  %z = call i32 @llvm.r600.read.local.size.z() #0
206  %val = mul i32 %x, %z
207  store i32 %val, ptr addrspace(1) %out
208  ret void
209}
210
211define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) {
212; SI-LABEL: local_size_yz:
213; SI:       ; %bb.0: ; %entry
214; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x7
215; SI-NEXT:    s_mov_b32 s7, 0xf000
216; SI-NEXT:    s_waitcnt lgkmcnt(0)
217; SI-NEXT:    s_mul_i32 s0, s0, s1
218; SI-NEXT:    s_mov_b32 s6, -1
219; SI-NEXT:    s_mov_b32 s4, s2
220; SI-NEXT:    s_mov_b32 s5, s3
221; SI-NEXT:    v_mov_b32_e32 v0, s0
222; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
223; SI-NEXT:    s_endpgm
224;
225; VI-LABEL: local_size_yz:
226; VI:       ; %bb.0: ; %entry
227; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x1c
228; VI-NEXT:    s_waitcnt lgkmcnt(0)
229; VI-NEXT:    s_mul_i32 s0, s0, s1
230; VI-NEXT:    v_mov_b32_e32 v0, s2
231; VI-NEXT:    v_mov_b32_e32 v1, s3
232; VI-NEXT:    v_mov_b32_e32 v2, s0
233; VI-NEXT:    flat_store_dword v[0:1], v2
234; VI-NEXT:    s_endpgm
235;
236; R600-LABEL: local_size_yz:
237; R600:       ; %bb.0: ; %entry
238; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
239; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
240; R600-NEXT:    CF_END
241; R600-NEXT:    PAD
242; R600-NEXT:    ALU clause starting at 4:
243; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
244; R600-NEXT:     MULLO_INT * T1.X, KC0[1].W, KC0[2].X,
245; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
246entry:
247  %y = call i32 @llvm.r600.read.local.size.y() #0
248  %z = call i32 @llvm.r600.read.local.size.z() #0
249  %val = mul i32 %y, %z
250  store i32 %val, ptr addrspace(1) %out
251  ret void
252}
253
254define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) {
255; SI-LABEL: local_size_xyz:
256; SI:       ; %bb.0: ; %entry
257; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x6
258; SI-NEXT:    s_load_dword s2, s[4:5], 0x8
259; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
260; SI-NEXT:    s_mov_b32 s3, 0xf000
261; SI-NEXT:    s_waitcnt lgkmcnt(0)
262; SI-NEXT:    s_mul_i32 s4, s6, s7
263; SI-NEXT:    s_add_i32 s4, s4, s2
264; SI-NEXT:    s_mov_b32 s2, -1
265; SI-NEXT:    v_mov_b32_e32 v0, s4
266; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
267; SI-NEXT:    s_endpgm
268;
269; VI-LABEL: local_size_xyz:
270; VI:       ; %bb.0: ; %entry
271; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x18
272; VI-NEXT:    s_load_dword s6, s[4:5], 0x20
273; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
274; VI-NEXT:    s_waitcnt lgkmcnt(0)
275; VI-NEXT:    s_mul_i32 s0, s0, s1
276; VI-NEXT:    s_add_i32 s0, s0, s6
277; VI-NEXT:    v_mov_b32_e32 v0, s2
278; VI-NEXT:    v_mov_b32_e32 v1, s3
279; VI-NEXT:    v_mov_b32_e32 v2, s0
280; VI-NEXT:    flat_store_dword v[0:1], v2
281; VI-NEXT:    s_endpgm
282;
283; R600-LABEL: local_size_xyz:
284; R600:       ; %bb.0: ; %entry
285; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
286; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
287; R600-NEXT:    CF_END
288; R600-NEXT:    PAD
289; R600-NEXT:    ALU clause starting at 4:
290; R600-NEXT:     MULLO_INT * T0.X, KC0[1].Z, KC0[1].W,
291; R600-NEXT:     ADD_INT T0.X, PS, KC0[2].X,
292; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
293; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
294entry:
295  %x = call i32 @llvm.r600.read.local.size.x() #0
296  %y = call i32 @llvm.r600.read.local.size.y() #0
297  %z = call i32 @llvm.r600.read.local.size.z() #0
298  %xy = mul i32 %x, %y
299  %xyz = add i32 %xy, %z
300  store i32 %xyz, ptr addrspace(1) %out
301  ret void
302}
303
304define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) {
305; SI-LABEL: local_size_x_known_bits:
306; SI:       ; %bb.0: ; %entry
307; SI-NEXT:    s_load_dword s6, s[4:5], 0x6
308; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
309; SI-NEXT:    s_mov_b32 s3, 0xf000
310; SI-NEXT:    s_mov_b32 s2, -1
311; SI-NEXT:    s_waitcnt lgkmcnt(0)
312; SI-NEXT:    v_mov_b32_e32 v0, s6
313; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
314; SI-NEXT:    s_endpgm
315;
316; VI-LABEL: local_size_x_known_bits:
317; VI:       ; %bb.0: ; %entry
318; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
319; VI-NEXT:    s_load_dword s2, s[4:5], 0x18
320; VI-NEXT:    s_waitcnt lgkmcnt(0)
321; VI-NEXT:    v_mov_b32_e32 v0, s0
322; VI-NEXT:    v_mov_b32_e32 v1, s1
323; VI-NEXT:    v_mov_b32_e32 v2, s2
324; VI-NEXT:    flat_store_dword v[0:1], v2
325; VI-NEXT:    s_endpgm
326;
327; R600-LABEL: local_size_x_known_bits:
328; R600:       ; %bb.0: ; %entry
329; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
330; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
331; R600-NEXT:    CF_END
332; R600-NEXT:    PAD
333; R600-NEXT:    ALU clause starting at 4:
334; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
335; R600-NEXT:     AND_INT * T1.X, KC0[1].Z, literal.y,
336; R600-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
337entry:
338  %size = call i32 @llvm.r600.read.local.size.x() #0
339  %shl = shl i32 %size, 16
340  %shr = lshr i32 %shl, 16
341  store i32 %shr, ptr addrspace(1) %out
342  ret void
343}
344
345define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) {
346; SI-LABEL: local_size_y_known_bits:
347; SI:       ; %bb.0: ; %entry
348; SI-NEXT:    s_load_dword s6, s[4:5], 0x7
349; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
350; SI-NEXT:    s_mov_b32 s3, 0xf000
351; SI-NEXT:    s_mov_b32 s2, -1
352; SI-NEXT:    s_waitcnt lgkmcnt(0)
353; SI-NEXT:    v_mov_b32_e32 v0, s6
354; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
355; SI-NEXT:    s_endpgm
356;
357; VI-LABEL: local_size_y_known_bits:
358; VI:       ; %bb.0: ; %entry
359; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
360; VI-NEXT:    s_load_dword s2, s[4:5], 0x1c
361; VI-NEXT:    s_waitcnt lgkmcnt(0)
362; VI-NEXT:    v_mov_b32_e32 v0, s0
363; VI-NEXT:    v_mov_b32_e32 v1, s1
364; VI-NEXT:    v_mov_b32_e32 v2, s2
365; VI-NEXT:    flat_store_dword v[0:1], v2
366; VI-NEXT:    s_endpgm
367;
368; R600-LABEL: local_size_y_known_bits:
369; R600:       ; %bb.0: ; %entry
370; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
371; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
372; R600-NEXT:    CF_END
373; R600-NEXT:    PAD
374; R600-NEXT:    ALU clause starting at 4:
375; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
376; R600-NEXT:     AND_INT * T1.X, KC0[1].W, literal.y,
377; R600-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
378entry:
379  %size = call i32 @llvm.r600.read.local.size.y() #0
380  %shl = shl i32 %size, 16
381  %shr = lshr i32 %shl, 16
382  store i32 %shr, ptr addrspace(1) %out
383  ret void
384}
385
386define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) {
387; SI-LABEL: local_size_z_known_bits:
388; SI:       ; %bb.0: ; %entry
389; SI-NEXT:    s_load_dword s6, s[4:5], 0x8
390; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
391; SI-NEXT:    s_mov_b32 s3, 0xf000
392; SI-NEXT:    s_mov_b32 s2, -1
393; SI-NEXT:    s_waitcnt lgkmcnt(0)
394; SI-NEXT:    v_mov_b32_e32 v0, s6
395; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
396; SI-NEXT:    s_endpgm
397;
398; VI-LABEL: local_size_z_known_bits:
399; VI:       ; %bb.0: ; %entry
400; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
401; VI-NEXT:    s_load_dword s2, s[4:5], 0x20
402; VI-NEXT:    s_waitcnt lgkmcnt(0)
403; VI-NEXT:    v_mov_b32_e32 v0, s0
404; VI-NEXT:    v_mov_b32_e32 v1, s1
405; VI-NEXT:    v_mov_b32_e32 v2, s2
406; VI-NEXT:    flat_store_dword v[0:1], v2
407; VI-NEXT:    s_endpgm
408;
409; R600-LABEL: local_size_z_known_bits:
410; R600:       ; %bb.0: ; %entry
411; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
412; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
413; R600-NEXT:    CF_END
414; R600-NEXT:    PAD
415; R600-NEXT:    ALU clause starting at 4:
416; R600-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
417; R600-NEXT:     AND_INT * T1.X, KC0[2].X, literal.y,
418; R600-NEXT:    2(2.802597e-45), 65535(9.183409e-41)
419entry:
420  %size = call i32 @llvm.r600.read.local.size.z() #0
421  %shl = shl i32 %size, 16
422  %shr = lshr i32 %shl, 16
423  store i32 %shr, ptr addrspace(1) %out
424  ret void
425}
426
427declare i32 @llvm.r600.read.local.size.x() #0
428declare i32 @llvm.r600.read.local.size.y() #0
429declare i32 @llvm.r600.read.local.size.z() #0
430
431attributes #0 = { nounwind readnone }
432;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
433; FUNC: {{.*}}
434; GCN: {{.*}}
435