xref: /llvm-project/llvm/test/CodeGen/WebAssembly/tailcall.ll (revision dd0bbae5efa4d23322eda905b2f9e11dfd3c5d36)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s
3; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s
4; RUN: llc < %s --filetype=obj -mattr=+tail-call | obj2yaml | FileCheck --check-prefix=YAML %s
5
6; Test that the tail calls lower correctly
7
8target triple = "wasm32-unknown-unknown"
9
10%fn = type <{ptr}>
11declare i1 @foo(i1)
12declare i1 @bar(i1)
13
14define void @recursive_notail_nullary() {
15; CHECK-LABEL: recursive_notail_nullary:
16; CHECK:         .functype recursive_notail_nullary () -> ()
17; CHECK-NEXT:  # %bb.0:
18; CHECK-NEXT:    call recursive_notail_nullary
19; CHECK-NEXT:    return
20  notail call void @recursive_notail_nullary()
21  ret void
22}
23
24define void @recursive_musttail_nullary() {
25; CHECK-LABEL: recursive_musttail_nullary:
26; CHECK:         .functype recursive_musttail_nullary () -> ()
27; CHECK-NEXT:  # %bb.0:
28; CHECK-NEXT:    return_call recursive_musttail_nullary
29  musttail call void @recursive_musttail_nullary()
30  ret void
31}
32define void @recursive_tail_nullary() {
33; SLOW-LABEL: recursive_tail_nullary:
34; SLOW:         .functype recursive_tail_nullary () -> ()
35; SLOW-NEXT:  # %bb.0:
36; SLOW-NEXT:    return_call recursive_tail_nullary
37;
38; FAST-LABEL: recursive_tail_nullary:
39; FAST:         .functype recursive_tail_nullary () -> ()
40; FAST-NEXT:  # %bb.0:
41; FAST-NEXT:    call recursive_tail_nullary
42; FAST-NEXT:    return
43  tail call void @recursive_tail_nullary()
44  ret void
45}
46
47define i32 @recursive_notail(i32 %x, i32 %y) {
48; CHECK-LABEL: recursive_notail:
49; CHECK:         .functype recursive_notail (i32, i32) -> (i32)
50; CHECK-NEXT:  # %bb.0:
51; CHECK-NEXT:    call $push0=, recursive_notail, $0, $1
52; CHECK-NEXT:    return $pop0
53  %v = notail call i32 @recursive_notail(i32 %x, i32 %y)
54  ret i32 %v
55}
56
57define i32 @recursive_musttail(i32 %x, i32 %y) {
58; CHECK-LABEL: recursive_musttail:
59; CHECK:         .functype recursive_musttail (i32, i32) -> (i32)
60; CHECK-NEXT:  # %bb.0:
61; CHECK-NEXT:    return_call recursive_musttail, $0, $1
62  %v = musttail call i32 @recursive_musttail(i32 %x, i32 %y)
63  ret i32 %v
64}
65
66define i32 @recursive_tail(i32 %x, i32 %y) {
67; SLOW-LABEL: recursive_tail:
68; SLOW:         .functype recursive_tail (i32, i32) -> (i32)
69; SLOW-NEXT:  # %bb.0:
70; SLOW-NEXT:    return_call recursive_tail, $0, $1
71;
72; FAST-LABEL: recursive_tail:
73; FAST:         .functype recursive_tail (i32, i32) -> (i32)
74; FAST-NEXT:  # %bb.0:
75; FAST-NEXT:    call $push0=, recursive_tail, $0, $1
76; FAST-NEXT:    return $pop0
77  %v = tail call i32 @recursive_tail(i32 %x, i32 %y)
78  ret i32 %v
79}
80
81define i32 @indirect_notail(%fn %f, i32 %x, i32 %y) {
82; CHECK-LABEL: indirect_notail:
83; CHECK:         .functype indirect_notail (i32, i32, i32) -> (i32)
84; CHECK-NEXT:  # %bb.0:
85; CHECK-NEXT:    call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument!
86; CHECK-NEXT:    return $pop0
87  %p = extractvalue %fn %f, 0
88  %v = notail call i32 %p(%fn %f, i32 %x, i32 %y)
89  ret i32 %v
90}
91
92define i32 @indirect_musttail(%fn %f, i32 %x, i32 %y) {
93; CHECK-LABEL: indirect_musttail:
94; CHECK:         .functype indirect_musttail (i32, i32, i32) -> (i32)
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    return_call_indirect , $0, $1, $2, $0
97  %p = extractvalue %fn %f, 0
98  %v = musttail call i32 %p(%fn %f, i32 %x, i32 %y)
99  ret i32 %v
100}
101
102define i32 @indirect_tail(%fn %f, i32 %x, i32 %y) {
103; CHECK-LABEL: indirect_tail:
104; CHECK:         .functype indirect_tail (i32, i32, i32) -> (i32)
105; CHECK-NEXT:  # %bb.0:
106; CHECK-NEXT:    return_call_indirect , $0, $1, $2, $0
107  %p = extractvalue %fn %f, 0
108  %v = tail call i32 %p(%fn %f, i32 %x, i32 %y)
109  ret i32 %v
110}
111
112define i1 @choice_notail(i1 %x) {
113; SLOW-LABEL: choice_notail:
114; SLOW:         .functype choice_notail (i32) -> (i32)
115; SLOW-NEXT:  # %bb.0:
116; SLOW-NEXT:    i32.const $push3=, foo
117; SLOW-NEXT:    i32.const $push2=, bar
118; SLOW-NEXT:    i32.const $push0=, 1
119; SLOW-NEXT:    i32.and $push1=, $0, $pop0
120; SLOW-NEXT:    i32.select $push4=, $pop3, $pop2, $pop1
121; SLOW-NEXT:    call_indirect $push5=, $0, $pop4 # Invalid depth argument!
122; SLOW-NEXT:    return $pop5
123;
124; FAST-LABEL: choice_notail:
125; FAST:         .functype choice_notail (i32) -> (i32)
126; FAST-NEXT:  # %bb.0:
127; FAST-NEXT:    i32.const $push3=, foo
128; FAST-NEXT:    i32.const $push4=, bar
129; FAST-NEXT:    i32.const $push1=, 1
130; FAST-NEXT:    i32.and $push2=, $0, $pop1
131; FAST-NEXT:    i32.select $push5=, $pop3, $pop4, $pop2
132; FAST-NEXT:    call_indirect $push0=, $0, $pop5 # Invalid depth argument!
133; FAST-NEXT:    return $pop0
134  %p = select i1 %x, ptr @foo, ptr @bar
135  %v = notail call i1 %p(i1 %x)
136  ret i1 %v
137}
138
139define i1 @choice_musttail(i1 %x) {
140; SLOW-LABEL: choice_musttail:
141; SLOW:         .functype choice_musttail (i32) -> (i32)
142; SLOW-NEXT:  # %bb.0:
143; SLOW-NEXT:    i32.const $push3=, foo
144; SLOW-NEXT:    i32.const $push2=, bar
145; SLOW-NEXT:    i32.const $push0=, 1
146; SLOW-NEXT:    i32.and $push1=, $0, $pop0
147; SLOW-NEXT:    i32.select $push4=, $pop3, $pop2, $pop1
148; SLOW-NEXT:    return_call_indirect , $0, $pop4
149;
150; FAST-LABEL: choice_musttail:
151; FAST:         .functype choice_musttail (i32) -> (i32)
152; FAST-NEXT:  # %bb.0:
153; FAST-NEXT:    i32.const $push4=, foo
154; FAST-NEXT:    i32.const $push3=, bar
155; FAST-NEXT:    i32.const $push1=, 1
156; FAST-NEXT:    i32.and $push2=, $0, $pop1
157; FAST-NEXT:    i32.select $push0=, $pop4, $pop3, $pop2
158; FAST-NEXT:    return_call_indirect , $0, $pop0
159  %p = select i1 %x, ptr @foo, ptr @bar
160  %v = musttail call i1 %p(i1 %x)
161  ret i1 %v
162}
163
164define i1 @choice_tail(i1 %x) {
165; SLOW-LABEL: choice_tail:
166; SLOW:         .functype choice_tail (i32) -> (i32)
167; SLOW-NEXT:  # %bb.0:
168; SLOW-NEXT:    i32.const $push3=, foo
169; SLOW-NEXT:    i32.const $push2=, bar
170; SLOW-NEXT:    i32.const $push0=, 1
171; SLOW-NEXT:    i32.and $push1=, $0, $pop0
172; SLOW-NEXT:    i32.select $push4=, $pop3, $pop2, $pop1
173; SLOW-NEXT:    return_call_indirect , $0, $pop4
174;
175; FAST-LABEL: choice_tail:
176; FAST:         .functype choice_tail (i32) -> (i32)
177; FAST-NEXT:  # %bb.0:
178; FAST-NEXT:    i32.const $push3=, foo
179; FAST-NEXT:    i32.const $push4=, bar
180; FAST-NEXT:    i32.const $push1=, 1
181; FAST-NEXT:    i32.and $push2=, $0, $pop1
182; FAST-NEXT:    i32.select $push5=, $pop3, $pop4, $pop2
183; FAST-NEXT:    call_indirect $push0=, $0, $pop5 # Invalid depth argument!
184; FAST-NEXT:    return $pop0
185  %p = select i1 %x, ptr @foo, ptr @bar
186  %v = tail call i1 %p(i1 %x)
187  ret i1 %v
188}
189
190; It is an LLVM validation error for a 'musttail' callee to have a different
191; prototype than its caller, so the following tests can only be done with
192; 'tail'.
193
194declare i32 @baz(i32, i32, i32)
195define i32 @mismatched_prototypes() {
196; SLOW-LABEL: mismatched_prototypes:
197; SLOW:         .functype mismatched_prototypes () -> (i32)
198; SLOW-NEXT:  # %bb.0:
199; SLOW-NEXT:    i32.const $push2=, 0
200; SLOW-NEXT:    i32.const $push1=, 42
201; SLOW-NEXT:    i32.const $push0=, 6
202; SLOW-NEXT:    return_call baz, $pop2, $pop1, $pop0
203;
204; FAST-LABEL: mismatched_prototypes:
205; FAST:         .functype mismatched_prototypes () -> (i32)
206; FAST-NEXT:  # %bb.0:
207; FAST-NEXT:    i32.const $push1=, 0
208; FAST-NEXT:    i32.const $push2=, 42
209; FAST-NEXT:    i32.const $push3=, 6
210; FAST-NEXT:    call $push0=, baz, $pop1, $pop2, $pop3
211; FAST-NEXT:    return $pop0
212  %v = tail call i32 @baz(i32 0, i32 42, i32 6)
213  ret i32 %v
214}
215
216define void @mismatched_return_void() {
217; SLOW-LABEL: mismatched_return_void:
218; SLOW:         .functype mismatched_return_void () -> ()
219; SLOW-NEXT:  # %bb.0:
220; SLOW-NEXT:    i32.const $push2=, 0
221; SLOW-NEXT:    i32.const $push1=, 42
222; SLOW-NEXT:    i32.const $push0=, 6
223; SLOW-NEXT:    call $drop=, baz, $pop2, $pop1, $pop0
224; SLOW-NEXT:    return
225;
226; FAST-LABEL: mismatched_return_void:
227; FAST:         .functype mismatched_return_void () -> ()
228; FAST-NEXT:  # %bb.0:
229; FAST-NEXT:    i32.const $push0=, 0
230; FAST-NEXT:    i32.const $push1=, 42
231; FAST-NEXT:    i32.const $push2=, 6
232; FAST-NEXT:    call $drop=, baz, $pop0, $pop1, $pop2
233; FAST-NEXT:    return
234  %v = tail call i32 @baz(i32 0, i32 42, i32 6)
235  ret void
236}
237
238define float @mismatched_return_f32() {
239; SLOW-LABEL: mismatched_return_f32:
240; SLOW:         .functype mismatched_return_f32 () -> (f32)
241; SLOW-NEXT:  # %bb.0:
242; SLOW-NEXT:    i32.const $push2=, 0
243; SLOW-NEXT:    i32.const $push1=, 42
244; SLOW-NEXT:    i32.const $push0=, 6
245; SLOW-NEXT:    call $push3=, baz, $pop2, $pop1, $pop0
246; SLOW-NEXT:    f32.reinterpret_i32 $push4=, $pop3
247; SLOW-NEXT:    return $pop4
248;
249; FAST-LABEL: mismatched_return_f32:
250; FAST:         .functype mismatched_return_f32 () -> (f32)
251; FAST-NEXT:  # %bb.0:
252; FAST-NEXT:    i32.const $push2=, 0
253; FAST-NEXT:    i32.const $push3=, 42
254; FAST-NEXT:    i32.const $push4=, 6
255; FAST-NEXT:    call $push1=, baz, $pop2, $pop3, $pop4
256; FAST-NEXT:    f32.reinterpret_i32 $push0=, $pop1
257; FAST-NEXT:    return $pop0
258  %v = tail call i32 @baz(i32 0, i32 42, i32 6)
259  %u = bitcast i32 %v to float
260  ret float %u
261}
262
263define void @mismatched_indirect_void(%fn %f, i32 %x, i32 %y) {
264; CHECK-LABEL: mismatched_indirect_void:
265; CHECK:         .functype mismatched_indirect_void (i32, i32, i32) -> ()
266; CHECK-NEXT:  # %bb.0:
267; CHECK-NEXT:    call_indirect $drop=, $0, $1, $2, $0 # Invalid depth argument!
268; CHECK-NEXT:    return
269  %p = extractvalue %fn %f, 0
270  %v = tail call i32 %p(%fn %f, i32 %x, i32 %y)
271  ret void
272}
273
274define float @mismatched_indirect_f32(%fn %f, i32 %x, i32 %y) {
275; CHECK-LABEL: mismatched_indirect_f32:
276; CHECK:         .functype mismatched_indirect_f32 (i32, i32, i32) -> (f32)
277; CHECK-NEXT:  # %bb.0:
278; CHECK-NEXT:    call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument!
279; CHECK-NEXT:    f32.reinterpret_i32 $push1=, $pop0
280; CHECK-NEXT:    return $pop1
281  %p = extractvalue %fn %f, 0
282  %v = tail call i32 %p(%fn %f, i32 %x, i32 %y)
283  %u = bitcast i32 %v to float
284  ret float %u
285}
286
287declare i32 @quux(ptr byval(i32))
288define i32 @mismatched_byval(ptr %x) {
289; CHECK-LABEL: mismatched_byval:
290; CHECK:         .functype mismatched_byval (i32) -> (i32)
291; CHECK-NEXT:  # %bb.0:
292; CHECK-NEXT:    global.get $push1=, __stack_pointer
293; CHECK-NEXT:    i32.const $push2=, 16
294; CHECK-NEXT:    i32.sub $push8=, $pop1, $pop2
295; CHECK-NEXT:    local.tee $push7=, $1=, $pop8
296; CHECK-NEXT:    global.set __stack_pointer, $pop7
297; CHECK-NEXT:    i32.load $push0=, 0($0)
298; CHECK-NEXT:    i32.store 12($1), $pop0
299; CHECK-NEXT:    i32.const $push3=, 16
300; CHECK-NEXT:    i32.add $push4=, $1, $pop3
301; CHECK-NEXT:    global.set __stack_pointer, $pop4
302; CHECK-NEXT:    i32.const $push5=, 12
303; CHECK-NEXT:    i32.add $push6=, $1, $pop5
304; CHECK-NEXT:    return_call quux, $pop6
305  %v = tail call i32 @quux(ptr byval(i32) %x)
306  ret i32 %v
307}
308
309declare i32 @var(...)
310define i32 @varargs(i32 %x) {
311; CHECK-LABEL: varargs:
312; CHECK:         .functype varargs (i32) -> (i32)
313; CHECK-NEXT:  # %bb.0:
314; CHECK-NEXT:    global.get $push0=, __stack_pointer
315; CHECK-NEXT:    i32.const $push1=, 16
316; CHECK-NEXT:    i32.sub $push5=, $pop0, $pop1
317; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
318; CHECK-NEXT:    global.set __stack_pointer, $pop4
319; CHECK-NEXT:    i32.store 0($1), $0
320; CHECK-NEXT:    call $0=, var, $1
321; CHECK-NEXT:    i32.const $push2=, 16
322; CHECK-NEXT:    i32.add $push3=, $1, $pop2
323; CHECK-NEXT:    global.set __stack_pointer, $pop3
324; CHECK-NEXT:    return $0
325  %v = tail call i32 (...) @var(i32 %x)
326  ret i32 %v
327}
328
329; Type transformations inhibit tail calls, even when they are nops
330
331define i32 @mismatched_return_zext() {
332; SLOW-LABEL: mismatched_return_zext:
333; SLOW:         .functype mismatched_return_zext () -> (i32)
334; SLOW-NEXT:  # %bb.0:
335; SLOW-NEXT:    i32.const $push0=, 1
336; SLOW-NEXT:    call $push1=, foo, $pop0
337; SLOW-NEXT:    i32.const $push3=, 1
338; SLOW-NEXT:    i32.and $push2=, $pop1, $pop3
339; SLOW-NEXT:    return $pop2
340;
341; FAST-LABEL: mismatched_return_zext:
342; FAST:         .functype mismatched_return_zext () -> (i32)
343; FAST-NEXT:  # %bb.0:
344; FAST-NEXT:    i32.const $push2=, 1
345; FAST-NEXT:    call $push1=, foo, $pop2
346; FAST-NEXT:    i32.const $push3=, 1
347; FAST-NEXT:    i32.and $push0=, $pop1, $pop3
348; FAST-NEXT:    return $pop0
349  %v = tail call i1 @foo(i1 1)
350  %u = zext i1 %v to i32
351  ret i32 %u
352}
353
354define i32 @mismatched_return_sext() {
355; SLOW-LABEL: mismatched_return_sext:
356; SLOW:         .functype mismatched_return_sext () -> (i32)
357; SLOW-NEXT:  # %bb.0:
358; SLOW-NEXT:    i32.const $push3=, 0
359; SLOW-NEXT:    i32.const $push0=, 1
360; SLOW-NEXT:    call $push1=, foo, $pop0
361; SLOW-NEXT:    i32.const $push5=, 1
362; SLOW-NEXT:    i32.and $push2=, $pop1, $pop5
363; SLOW-NEXT:    i32.sub $push4=, $pop3, $pop2
364; SLOW-NEXT:    return $pop4
365;
366; FAST-LABEL: mismatched_return_sext:
367; FAST:         .functype mismatched_return_sext () -> (i32)
368; FAST-NEXT:  # %bb.0:
369; FAST-NEXT:    i32.const $push4=, 1
370; FAST-NEXT:    call $push3=, foo, $pop4
371; FAST-NEXT:    i32.const $push0=, 31
372; FAST-NEXT:    i32.shl $push1=, $pop3, $pop0
373; FAST-NEXT:    i32.const $push5=, 31
374; FAST-NEXT:    i32.shr_s $push2=, $pop1, $pop5
375; FAST-NEXT:    return $pop2
376  %v = tail call i1 @foo(i1 1)
377  %u = sext i1 %v to i32
378  ret i32 %u
379}
380
381declare i32 @int()
382define i1 @mismatched_return_trunc() {
383; CHECK-LABEL: mismatched_return_trunc:
384; CHECK:         .functype mismatched_return_trunc () -> (i32)
385; CHECK-NEXT:  # %bb.0:
386; CHECK-NEXT:    call $push0=, int
387; CHECK-NEXT:    return $pop0
388  %v = tail call i32 @int()
389  %u = trunc i32 %v to i1
390  ret i1 %u
391}
392
393; Stack-allocated arguments inhibit tail calls
394
395define i32 @stack_arg(ptr %x) {
396; SLOW-LABEL: stack_arg:
397; SLOW:         .functype stack_arg (i32) -> (i32)
398; SLOW-NEXT:  # %bb.0:
399; SLOW-NEXT:    global.get $push0=, __stack_pointer
400; SLOW-NEXT:    i32.const $push1=, 16
401; SLOW-NEXT:    i32.sub $push7=, $pop0, $pop1
402; SLOW-NEXT:    local.tee $push6=, $2=, $pop7
403; SLOW-NEXT:    global.set __stack_pointer, $pop6
404; SLOW-NEXT:    i32.const $push4=, 12
405; SLOW-NEXT:    i32.add $push5=, $2, $pop4
406; SLOW-NEXT:    call $1=, stack_arg, $pop5
407; SLOW-NEXT:    i32.const $push2=, 16
408; SLOW-NEXT:    i32.add $push3=, $2, $pop2
409; SLOW-NEXT:    global.set __stack_pointer, $pop3
410; SLOW-NEXT:    return $1
411;
412; FAST-LABEL: stack_arg:
413; FAST:         .functype stack_arg (i32) -> (i32)
414; FAST-NEXT:  # %bb.0:
415; FAST-NEXT:    global.get $push1=, __stack_pointer
416; FAST-NEXT:    i32.const $push2=, 16
417; FAST-NEXT:    i32.sub $push8=, $pop1, $pop2
418; FAST-NEXT:    local.tee $push7=, $2=, $pop8
419; FAST-NEXT:    global.set __stack_pointer, $pop7
420; FAST-NEXT:    i32.const $push5=, 12
421; FAST-NEXT:    i32.add $push6=, $2, $pop5
422; FAST-NEXT:    local.copy $push0=, $pop6
423; FAST-NEXT:    call $1=, stack_arg, $pop0
424; FAST-NEXT:    i32.const $push3=, 16
425; FAST-NEXT:    i32.add $push4=, $2, $pop3
426; FAST-NEXT:    global.set __stack_pointer, $pop4
427; FAST-NEXT:    return $1
428  %a = alloca i32
429  %v = tail call i32 @stack_arg(ptr %a)
430  ret i32 %v
431}
432
433define i32 @stack_arg_gep(ptr %x) {
434; SLOW-LABEL: stack_arg_gep:
435; SLOW:         .functype stack_arg_gep (i32) -> (i32)
436; SLOW-NEXT:  # %bb.0:
437; SLOW-NEXT:    global.get $push2=, __stack_pointer
438; SLOW-NEXT:    i32.const $push3=, 16
439; SLOW-NEXT:    i32.sub $push9=, $pop2, $pop3
440; SLOW-NEXT:    local.tee $push8=, $2=, $pop9
441; SLOW-NEXT:    global.set __stack_pointer, $pop8
442; SLOW-NEXT:    i32.const $push6=, 8
443; SLOW-NEXT:    i32.add $push7=, $2, $pop6
444; SLOW-NEXT:    i32.const $push0=, 4
445; SLOW-NEXT:    i32.or $push1=, $pop7, $pop0
446; SLOW-NEXT:    call $1=, stack_arg_gep, $pop1
447; SLOW-NEXT:    i32.const $push4=, 16
448; SLOW-NEXT:    i32.add $push5=, $2, $pop4
449; SLOW-NEXT:    global.set __stack_pointer, $pop5
450; SLOW-NEXT:    return $1
451;
452; FAST-LABEL: stack_arg_gep:
453; FAST:         .functype stack_arg_gep (i32) -> (i32)
454; FAST-NEXT:  # %bb.0:
455; FAST-NEXT:    global.get $push3=, __stack_pointer
456; FAST-NEXT:    i32.const $push4=, 16
457; FAST-NEXT:    i32.sub $push10=, $pop3, $pop4
458; FAST-NEXT:    local.tee $push9=, $2=, $pop10
459; FAST-NEXT:    global.set __stack_pointer, $pop9
460; FAST-NEXT:    i32.const $push7=, 8
461; FAST-NEXT:    i32.add $push8=, $2, $pop7
462; FAST-NEXT:    local.copy $push0=, $pop8
463; FAST-NEXT:    i32.const $push1=, 4
464; FAST-NEXT:    i32.add $push2=, $pop0, $pop1
465; FAST-NEXT:    call $1=, stack_arg_gep, $pop2
466; FAST-NEXT:    i32.const $push5=, 16
467; FAST-NEXT:    i32.add $push6=, $2, $pop5
468; FAST-NEXT:    global.set __stack_pointer, $pop6
469; FAST-NEXT:    return $1
470  %a = alloca { i32, i32 }
471  %p = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1
472  %v = tail call i32 @stack_arg_gep(ptr %p)
473  ret i32 %v
474}
475
476define i32 @stack_arg_cast(i32 %x) {
477; SLOW-LABEL: stack_arg_cast:
478; SLOW:         .functype stack_arg_cast (i32) -> (i32)
479; SLOW-NEXT:  # %bb.0:
480; SLOW-NEXT:    global.get $push0=, __stack_pointer
481; SLOW-NEXT:    i32.const $push1=, 256
482; SLOW-NEXT:    i32.sub $push5=, $pop0, $pop1
483; SLOW-NEXT:    local.tee $push4=, $1=, $pop5
484; SLOW-NEXT:    global.set __stack_pointer, $pop4
485; SLOW-NEXT:    i32.const $push2=, 256
486; SLOW-NEXT:    i32.add $push3=, $1, $pop2
487; SLOW-NEXT:    global.set __stack_pointer, $pop3
488; SLOW-NEXT:    return_call stack_arg_cast, $1
489;
490; FAST-LABEL: stack_arg_cast:
491; FAST:         .functype stack_arg_cast (i32) -> (i32)
492; FAST-NEXT:  # %bb.0:
493; FAST-NEXT:    global.get $push1=, __stack_pointer
494; FAST-NEXT:    i32.const $push2=, 256
495; FAST-NEXT:    i32.sub $push6=, $pop1, $pop2
496; FAST-NEXT:    local.tee $push5=, $2=, $pop6
497; FAST-NEXT:    global.set __stack_pointer, $pop5
498; FAST-NEXT:    local.copy $push0=, $2
499; FAST-NEXT:    call $1=, stack_arg_cast, $pop0
500; FAST-NEXT:    i32.const $push3=, 256
501; FAST-NEXT:    i32.add $push4=, $2, $pop3
502; FAST-NEXT:    global.set __stack_pointer, $pop4
503; FAST-NEXT:    return $1
504  %a = alloca [64 x i32]
505  %i = ptrtoint ptr %a to i32
506  %v = tail call i32 @stack_arg_cast(i32 %i)
507  ret i32 %v
508}
509
510; Checks that epilogues are inserted after return calls.
511define i32 @direct_epilogue() {
512; CHECK-LABEL: direct_epilogue:
513; CHECK:         .functype direct_epilogue () -> (i32)
514; CHECK-NEXT:  # %bb.0:
515; CHECK-NEXT:    global.get $push0=, __stack_pointer
516; CHECK-NEXT:    i32.const $push1=, 256
517; CHECK-NEXT:    i32.sub $push5=, $pop0, $pop1
518; CHECK-NEXT:    local.tee $push4=, $0=, $pop5
519; CHECK-NEXT:    global.set __stack_pointer, $pop4
520; CHECK-NEXT:    i32.const $push2=, 256
521; CHECK-NEXT:    i32.add $push3=, $0, $pop2
522; CHECK-NEXT:    global.set __stack_pointer, $pop3
523; CHECK-NEXT:    return_call direct_epilogue
524  %a = alloca [64 x i32]
525  %v = musttail call i32 @direct_epilogue()
526  ret i32 %v
527}
528
529define i32 @indirect_epilogue(ptr %p) {
530; CHECK-LABEL: indirect_epilogue:
531; CHECK:         .functype indirect_epilogue (i32) -> (i32)
532; CHECK-NEXT:  # %bb.0:
533; CHECK-NEXT:    global.get $push0=, __stack_pointer
534; CHECK-NEXT:    i32.const $push1=, 256
535; CHECK-NEXT:    i32.sub $push5=, $pop0, $pop1
536; CHECK-NEXT:    local.tee $push4=, $1=, $pop5
537; CHECK-NEXT:    global.set __stack_pointer, $pop4
538; CHECK-NEXT:    i32.const $push2=, 256
539; CHECK-NEXT:    i32.add $push3=, $1, $pop2
540; CHECK-NEXT:    global.set __stack_pointer, $pop3
541; CHECK-NEXT:    return_call_indirect , $0, $0
542  %a = alloca [64 x i32]
543  %v = musttail call i32 %p(ptr %p)
544  ret i32 %v
545}
546
547; Check that the signatures generated for external indirectly
548; return-called functions include the proper return types
549
550; YAML-LABEL: - Index:           8
551; YAML-NEXT:    ParamTypes:
552; YAML-NEXT:      - I32
553; YAML-NEXT:      - F32
554; YAML-NEXT:      - I64
555; YAML-NEXT:      - F64
556; YAML-NEXT:    ReturnTypes:
557; YAML-NEXT:      - I32
558define i32 @unique_caller(ptr %p) {
559; SLOW-LABEL: unique_caller:
560; SLOW:         .functype unique_caller (i32) -> (i32)
561; SLOW-NEXT:  # %bb.0:
562; SLOW-NEXT:    i32.const $push4=, 0
563; SLOW-NEXT:    f32.const $push3=, 0x0p0
564; SLOW-NEXT:    i64.const $push2=, 0
565; SLOW-NEXT:    f64.const $push1=, 0x0p0
566; SLOW-NEXT:    i32.load $push0=, 0($0)
567; SLOW-NEXT:    return_call_indirect , $pop4, $pop3, $pop2, $pop1, $pop0
568;
569; FAST-LABEL: unique_caller:
570; FAST:         .functype unique_caller (i32) -> (i32)
571; FAST-NEXT:  # %bb.0:
572; FAST-NEXT:    i32.const $push1=, 0
573; FAST-NEXT:    i32.const $push7=, 0
574; FAST-NEXT:    f32.convert_i32_s $push2=, $pop7
575; FAST-NEXT:    i64.const $push3=, 0
576; FAST-NEXT:    i32.const $push6=, 0
577; FAST-NEXT:    f64.convert_i32_s $push4=, $pop6
578; FAST-NEXT:    i32.load $push5=, 0($0)
579; FAST-NEXT:    call_indirect $push0=, $pop1, $pop2, $pop3, $pop4, $pop5 # Invalid depth argument!
580; FAST-NEXT:    return $pop0
581  %f = load ptr, ptr %p
582  %v = tail call i32 %f(i32 0, float 0., i64 0, double 0.)
583  ret i32 %v
584}
585
586; CHECK-LABEL: .section .custom_section.target_features
587; CHECK-NEXT: .int8 1
588; CHECK-NEXT: .int8 43
589; CHECK-NEXT: .int8 9
590; CHECK-NEXT: .ascii "tail-call"
591