xref: /llvm-project/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
4
5declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
6
7define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
8; CI-LABEL: sint_to_fp_i32_to_f64:
9; CI:       ; %bb.0:
10; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
11; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
12; CI-NEXT:    s_waitcnt lgkmcnt(0)
13; CI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s2
14; CI-NEXT:    v_mov_b32_e32 v3, s1
15; CI-NEXT:    v_mov_b32_e32 v2, s0
16; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
17; CI-NEXT:    s_endpgm
18;
19; VI-LABEL: sint_to_fp_i32_to_f64:
20; VI:       ; %bb.0:
21; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
22; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
23; VI-NEXT:    s_waitcnt lgkmcnt(0)
24; VI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s2
25; VI-NEXT:    v_mov_b32_e32 v3, s1
26; VI-NEXT:    v_mov_b32_e32 v2, s0
27; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
28; VI-NEXT:    s_endpgm
29  %result = sitofp i32 %in to double
30  store double %result, ptr addrspace(1) %out
31  ret void
32}
33
34; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
35; uses an SGPR (implicit vcc).
36define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
37; CI-LABEL: sint_to_fp_i1_f64:
38; CI:       ; %bb.0:
39; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
40; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
41; CI-NEXT:    v_mov_b32_e32 v0, 0
42; CI-NEXT:    s_waitcnt lgkmcnt(0)
43; CI-NEXT:    s_cmp_eq_u32 s2, 0
44; CI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
45; CI-NEXT:    v_mov_b32_e32 v3, s1
46; CI-NEXT:    v_mov_b32_e32 v1, s2
47; CI-NEXT:    v_mov_b32_e32 v2, s0
48; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
49; CI-NEXT:    s_endpgm
50;
51; VI-LABEL: sint_to_fp_i1_f64:
52; VI:       ; %bb.0:
53; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
54; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
55; VI-NEXT:    v_mov_b32_e32 v0, 0
56; VI-NEXT:    s_waitcnt lgkmcnt(0)
57; VI-NEXT:    s_cmp_eq_u32 s2, 0
58; VI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
59; VI-NEXT:    v_mov_b32_e32 v3, s1
60; VI-NEXT:    v_mov_b32_e32 v1, s2
61; VI-NEXT:    v_mov_b32_e32 v2, s0
62; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
63; VI-NEXT:    s_endpgm
64  %cmp = icmp eq i32 %in, 0
65  %fp = sitofp i1 %cmp to double
66  store double %fp, ptr addrspace(1) %out, align 4
67  ret void
68}
69
70define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) {
71; CI-LABEL: sint_to_fp_i1_f64_load:
72; CI:       ; %bb.0:
73; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
74; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
75; CI-NEXT:    s_waitcnt lgkmcnt(0)
76; CI-NEXT:    s_bitcmp1_b32 s2, 0
77; CI-NEXT:    s_cselect_b64 s[2:3], -1, 0
78; CI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
79; CI-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
80; CI-NEXT:    v_mov_b32_e32 v3, s1
81; CI-NEXT:    v_mov_b32_e32 v2, s0
82; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
83; CI-NEXT:    s_endpgm
84;
85; VI-LABEL: sint_to_fp_i1_f64_load:
86; VI:       ; %bb.0:
87; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
88; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
89; VI-NEXT:    s_waitcnt lgkmcnt(0)
90; VI-NEXT:    s_bitcmp1_b32 s2, 0
91; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
92; VI-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
93; VI-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
94; VI-NEXT:    v_mov_b32_e32 v3, s1
95; VI-NEXT:    v_mov_b32_e32 v2, s0
96; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
97; VI-NEXT:    s_endpgm
98  %fp = sitofp i1 %in to double
99  store double %fp, ptr addrspace(1) %out, align 8
100  ret void
101}
102
103define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) {
104; CI-LABEL: s_sint_to_fp_i64_to_f64:
105; CI:       ; %bb.0:
106; CI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
107; CI-NEXT:    s_waitcnt lgkmcnt(0)
108; CI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s3
109; CI-NEXT:    v_cvt_f64_u32_e32 v[2:3], s2
110; CI-NEXT:    v_mov_b32_e32 v4, s0
111; CI-NEXT:    v_mov_b32_e32 v5, s1
112; CI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], 32
113; CI-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
114; CI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
115; CI-NEXT:    s_endpgm
116;
117; VI-LABEL: s_sint_to_fp_i64_to_f64:
118; VI:       ; %bb.0:
119; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
120; VI-NEXT:    s_waitcnt lgkmcnt(0)
121; VI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s3
122; VI-NEXT:    v_cvt_f64_u32_e32 v[2:3], s2
123; VI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], 32
124; VI-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
125; VI-NEXT:    v_mov_b32_e32 v2, s0
126; VI-NEXT:    v_mov_b32_e32 v3, s1
127; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
128; VI-NEXT:    s_endpgm
129  %result = sitofp i64 %in to double
130  store double %result, ptr addrspace(1) %out
131  ret void
132}
133
134define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
135; CI-LABEL: v_sint_to_fp_i64_to_f64:
136; CI:       ; %bb.0:
137; CI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
138; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
139; CI-NEXT:    s_waitcnt lgkmcnt(0)
140; CI-NEXT:    v_mov_b32_e32 v1, s3
141; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
142; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
143; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
144; CI-NEXT:    s_waitcnt vmcnt(0)
145; CI-NEXT:    v_cvt_f64_i32_e32 v[1:2], v1
146; CI-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
147; CI-NEXT:    v_ldexp_f64 v[0:1], v[1:2], 32
148; CI-NEXT:    v_mov_b32_e32 v2, s0
149; CI-NEXT:    v_add_f64 v[0:1], v[0:1], v[3:4]
150; CI-NEXT:    v_mov_b32_e32 v3, s1
151; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
152; CI-NEXT:    s_endpgm
153;
154; VI-LABEL: v_sint_to_fp_i64_to_f64:
155; VI:       ; %bb.0:
156; VI-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
157; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
158; VI-NEXT:    s_waitcnt lgkmcnt(0)
159; VI-NEXT:    v_mov_b32_e32 v1, s3
160; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
161; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
162; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
163; VI-NEXT:    s_waitcnt vmcnt(0)
164; VI-NEXT:    v_cvt_f64_i32_e32 v[1:2], v1
165; VI-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
166; VI-NEXT:    v_ldexp_f64 v[1:2], v[1:2], 32
167; VI-NEXT:    v_add_f64 v[0:1], v[1:2], v[3:4]
168; VI-NEXT:    v_mov_b32_e32 v2, s0
169; VI-NEXT:    v_mov_b32_e32 v3, s1
170; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
171; VI-NEXT:    s_endpgm
172  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
173  %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
174  %val = load i64, ptr addrspace(1) %gep, align 8
175  %result = sitofp i64 %val to double
176  store double %result, ptr addrspace(1) %out
177  ret void
178}
179
180; FIXME: bfe and sext on VI+
181define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) {
182; CI-LABEL: s_sint_to_fp_i8_to_f64:
183; CI:       ; %bb.0:
184; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
185; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
186; CI-NEXT:    s_waitcnt lgkmcnt(0)
187; CI-NEXT:    s_sext_i32_i8 s2, s2
188; CI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s2
189; CI-NEXT:    v_mov_b32_e32 v3, s1
190; CI-NEXT:    v_mov_b32_e32 v2, s0
191; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
192; CI-NEXT:    s_endpgm
193;
194; VI-LABEL: s_sint_to_fp_i8_to_f64:
195; VI:       ; %bb.0:
196; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
197; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
198; VI-NEXT:    s_waitcnt lgkmcnt(0)
199; VI-NEXT:    s_bfe_i32 s2, s2, 0x80000
200; VI-NEXT:    s_sext_i32_i16 s2, s2
201; VI-NEXT:    v_cvt_f64_i32_e32 v[0:1], s2
202; VI-NEXT:    v_mov_b32_e32 v3, s1
203; VI-NEXT:    v_mov_b32_e32 v2, s0
204; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
205; VI-NEXT:    s_endpgm
206  %fp = sitofp i8 %in to double
207  store double %fp, ptr addrspace(1) %out
208  ret void
209}
210
211define double @v_sint_to_fp_i8_to_f64(i8 %in) {
212; CI-LABEL: v_sint_to_fp_i8_to_f64:
213; CI:       ; %bb.0:
214; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; CI-NEXT:    v_bfe_i32 v0, v0, 0, 8
216; CI-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
217; CI-NEXT:    s_setpc_b64 s[30:31]
218;
219; VI-LABEL: v_sint_to_fp_i8_to_f64:
220; VI:       ; %bb.0:
221; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; VI-NEXT:    v_bfe_i32 v0, v0, 0, 8
223; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
224; VI-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
225; VI-NEXT:    s_setpc_b64 s[30:31]
226  %fp = sitofp i8 %in to double
227  ret double %fp
228  }
229
230define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
231; CI-LABEL: s_select_sint_to_fp_i1_vals_f64:
232; CI:       ; %bb.0:
233; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
234; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
235; CI-NEXT:    v_mov_b32_e32 v0, 0
236; CI-NEXT:    s_waitcnt lgkmcnt(0)
237; CI-NEXT:    s_cmp_eq_u32 s2, 0
238; CI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
239; CI-NEXT:    v_mov_b32_e32 v3, s1
240; CI-NEXT:    v_mov_b32_e32 v1, s2
241; CI-NEXT:    v_mov_b32_e32 v2, s0
242; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
243; CI-NEXT:    s_endpgm
244;
245; VI-LABEL: s_select_sint_to_fp_i1_vals_f64:
246; VI:       ; %bb.0:
247; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
248; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
249; VI-NEXT:    v_mov_b32_e32 v0, 0
250; VI-NEXT:    s_waitcnt lgkmcnt(0)
251; VI-NEXT:    s_cmp_eq_u32 s2, 0
252; VI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
253; VI-NEXT:    v_mov_b32_e32 v3, s1
254; VI-NEXT:    v_mov_b32_e32 v1, s2
255; VI-NEXT:    v_mov_b32_e32 v2, s0
256; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
257; VI-NEXT:    s_endpgm
258  %cmp = icmp eq i32 %in, 0
259  %select = select i1 %cmp, double -1.0, double 0.0
260  store double %select, ptr addrspace(1) %out, align 8
261  ret void
262}
263
264define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
265; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64:
266; GCN:       ; %bb.0:
267; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
269; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
270; GCN-NEXT:    v_mov_b32_e32 v3, 0
271; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
272; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
273; GCN-NEXT:    s_waitcnt vmcnt(0)
274; GCN-NEXT:    s_setpc_b64 s[30:31]
275  %cmp = icmp eq i32 %in, 0
276  %select = select i1 %cmp, double -1.0, double 0.0
277  store double %select, ptr addrspace(1) %out, align 8
278  ret void
279}
280
281define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
282; CI-LABEL: s_select_sint_to_fp_i1_vals_i64:
283; CI:       ; %bb.0:
284; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
285; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
286; CI-NEXT:    v_mov_b32_e32 v0, 0
287; CI-NEXT:    s_waitcnt lgkmcnt(0)
288; CI-NEXT:    s_cmp_eq_u32 s2, 0
289; CI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
290; CI-NEXT:    v_mov_b32_e32 v3, s1
291; CI-NEXT:    v_mov_b32_e32 v1, s2
292; CI-NEXT:    v_mov_b32_e32 v2, s0
293; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
294; CI-NEXT:    s_endpgm
295;
296; VI-LABEL: s_select_sint_to_fp_i1_vals_i64:
297; VI:       ; %bb.0:
298; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
299; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
300; VI-NEXT:    v_mov_b32_e32 v0, 0
301; VI-NEXT:    s_waitcnt lgkmcnt(0)
302; VI-NEXT:    s_cmp_eq_u32 s2, 0
303; VI-NEXT:    s_cselect_b32 s2, 0xbff00000, 0
304; VI-NEXT:    v_mov_b32_e32 v3, s1
305; VI-NEXT:    v_mov_b32_e32 v1, s2
306; VI-NEXT:    v_mov_b32_e32 v2, s0
307; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
308; VI-NEXT:    s_endpgm
309  %cmp = icmp eq i32 %in, 0
310  %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
311  store i64 %select, ptr addrspace(1) %out, align 8
312  ret void
313}
314
315define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
316; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64:
317; GCN:       ; %bb.0:
318; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
320; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
321; GCN-NEXT:    v_mov_b32_e32 v3, 0
322; GCN-NEXT:    v_cndmask_b32_e32 v4, 0, v4, vcc
323; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
324; GCN-NEXT:    s_waitcnt vmcnt(0)
325; GCN-NEXT:    s_setpc_b64 s[30:31]
326  %cmp = icmp eq i32 %in, 0
327  %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
328  store i64 %select, ptr addrspace(1) %out, align 8
329  ret void
330}
331
332; TODO: This should swap the selected order / invert the compare and do it.
333define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
334; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
335; GCN:       ; %bb.0:
336; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GCN-NEXT:    v_mov_b32_e32 v4, 0xbff00000
338; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
339; GCN-NEXT:    v_mov_b32_e32 v3, 0
340; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, 0, vcc
341; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[3:4]
342; GCN-NEXT:    s_waitcnt vmcnt(0)
343; GCN-NEXT:    s_setpc_b64 s[30:31]
344  %cmp = icmp eq i32 %in, 0
345  %select = select i1 %cmp, double 0.0, double -1.0
346  store double %select, ptr addrspace(1) %out, align 8
347  ret void
348}
349
350; TODO: This should swap the selected order / invert the compare and do it.
351define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
352; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
353; CI:       ; %bb.0:
354; CI-NEXT:    s_load_dword s2, s[8:9], 0x2
355; CI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
356; CI-NEXT:    v_mov_b32_e32 v0, 0
357; CI-NEXT:    s_waitcnt lgkmcnt(0)
358; CI-NEXT:    s_cmp_eq_u32 s2, 0
359; CI-NEXT:    s_cselect_b32 s2, 0, 0xbff00000
360; CI-NEXT:    v_mov_b32_e32 v3, s1
361; CI-NEXT:    v_mov_b32_e32 v1, s2
362; CI-NEXT:    v_mov_b32_e32 v2, s0
363; CI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
364; CI-NEXT:    s_endpgm
365;
366; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
367; VI:       ; %bb.0:
368; VI-NEXT:    s_load_dword s2, s[8:9], 0x8
369; VI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
370; VI-NEXT:    v_mov_b32_e32 v0, 0
371; VI-NEXT:    s_waitcnt lgkmcnt(0)
372; VI-NEXT:    s_cmp_eq_u32 s2, 0
373; VI-NEXT:    s_cselect_b32 s2, 0, 0xbff00000
374; VI-NEXT:    v_mov_b32_e32 v3, s1
375; VI-NEXT:    v_mov_b32_e32 v1, s2
376; VI-NEXT:    v_mov_b32_e32 v2, s0
377; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
378; VI-NEXT:    s_endpgm
379  %cmp = icmp eq i32 %in, 0
380  %select = select i1 %cmp, double 0.0, double -1.0
381  store double %select, ptr addrspace(1) %out, align 8
382  ret void
383}
384