xref: /llvm-project/llvm/test/CodeGen/WebAssembly/reg-stackify.ll (revision fb6e024f49ddbf1a018eccab7ccfa7c1f41964d0)
1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS
3
4; Test the register stackifier pass.
5
6; We have two sets of tests, one with registers and implicit locals, and
7; a stack / explicit locals based version (NOREGS).
8
9target triple = "wasm32-unknown-unknown"
10
11; No because of pointer aliasing.
12
13; CHECK-LABEL: no0:
14; CHECK: return $1{{$}}
15; NOREGS-LABEL: no0:
16; NOREGS: return{{$}}
17define i32 @no0(ptr %p, ptr %q) {
18  %t = load i32, ptr %q
19  store i32 0, ptr %p
20  ret i32 %t
21}
22
23; No because of side effects.
24
25; CHECK-LABEL: no1:
26; CHECK: return $1{{$}}
27; NOREGS-LABEL: no1:
28; NOREGS: return{{$}}
29define i32 @no1(ptr %p, ptr dereferenceable(4) align(4) %q) {
30  %t = load volatile i32, ptr %q, !invariant.load !0
31  store volatile i32 0, ptr %p
32  ret i32 %t
33}
34
35; Yes because of invariant load and no side effects.
36
37; CHECK-LABEL: yes0:
38; CHECK: return $pop{{[0-9]+}}{{$}}
39; NOREGS-LABEL: yes0:
40; NOREGS: return{{$}}
41define i32 @yes0(ptr %p, ptr dereferenceable(4) align(4) %q) {
42  %t = load i32, ptr %q, !invariant.load !0
43  store i32 0, ptr %p
44  ret i32 %t
45}
46
47; Yes because of no intervening side effects.
48
49; CHECK-LABEL: yes1:
50; CHECK: return $pop0{{$}}
51; NOREGS-LABEL: yes1:
52; NOREGS: return{{$}}
53define i32 @yes1(ptr %q) {
54  %t = load volatile i32, ptr %q
55  ret i32 %t
56}
57
58; Yes because undefined behavior can be sunk past a store.
59
60; CHECK-LABEL: sink_trap:
61; CHECK: return $pop{{[0-9]+}}{{$}}
62; NOREGS-LABEL: sink_trap:
63; NOREGS: return{{$}}
64define i32 @sink_trap(i32 %x, i32 %y, ptr %p) {
65  %t = sdiv i32 %x, %y
66  store volatile i32 0, ptr %p
67  ret i32 %t
68}
69
70; Yes because the call is readnone.
71
72; CHECK-LABEL: sink_readnone_call:
73; CHECK: return $pop1{{$}}
74; NOREGS-LABEL: sink_readnone_call:
75; NOREGS: return{{$}}
76declare i32 @readnone_callee() readnone nounwind
77define i32 @sink_readnone_call(i32 %x, i32 %y, ptr %p) {
78  %t = call i32 @readnone_callee()
79  store volatile i32 0, ptr %p
80  ret i32 %t
81}
82
83; No because the call is readonly and there's an intervening store.
84
85; CHECK-LABEL: no_sink_readonly_call:
86; CHECK: return ${{[0-9]+}}{{$}}
87; NOREGS-LABEL: no_sink_readonly_call:
88; NOREGS: return{{$}}
89declare i32 @readonly_callee() readonly nounwind
90define i32 @no_sink_readonly_call(i32 %x, i32 %y, ptr %p) {
91  %t = call i32 @readonly_callee()
92  store i32 0, ptr %p
93  ret i32 %t
94}
95
96; Don't schedule stack uses into the stack. To reduce register pressure, the
97; scheduler might be tempted to move the definition of $2 down. However, this
98; would risk getting incorrect liveness if the instructions are later
99; rearranged to make the stack contiguous.
100
101; CHECK-LABEL: stack_uses:
102; CHECK: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
103; CHECK-NEXT: block   {{$}}
104; CHECK-NEXT: i32.const   $push[[L13:[0-9]+]]=, 1{{$}}
105; CHECK-NEXT: i32.lt_s    $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}}
106; CHECK-NEXT: i32.const   $push[[L1:[0-9]+]]=, 2{{$}}
107; CHECK-NEXT: i32.lt_s    $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}}
108; CHECK-NEXT: i32.xor     $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}}
109; CHECK-NEXT: i32.const   $push[[L12:[0-9]+]]=, 1{{$}}
110; CHECK-NEXT: i32.lt_s    $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}}
111; CHECK-NEXT: i32.const   $push[[L11:[0-9]+]]=, 2{{$}}
112; CHECK-NEXT: i32.lt_s    $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
113; CHECK-NEXT: i32.xor     $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
114; CHECK-NEXT: i32.eq      $push7=, $pop[[L5]], $pop[[L6]]{{$}}
115; CHECK-NEXT: br_if       0, $pop7{{$}}
116; CHECK-NEXT: i32.const   $push8=, 0{{$}}
117; CHECK-NEXT: return      $pop8{{$}}
118; CHECK-NEXT: .LBB{{[0-9]+}}_2:
119; CHECK-NEXT: end_block{{$}}
120; CHECK-NEXT: i32.const   $push12=, 1{{$}}
121; CHECK-NEXT: return      $pop12{{$}}
122; NOREGS-LABEL: stack_uses:
123; NOREGS: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
124; NOREGS-NEXT: block {{$}}
125; NOREGS-NEXT: local.get 0{{$}}
126; NOREGS-NEXT: i32.const   1{{$}}
127; NOREGS-NEXT: i32.lt_s
128; NOREGS-NEXT: local.get 1{{$}}
129; NOREGS-NEXT: i32.const   2{{$}}
130; NOREGS-NEXT: i32.lt_s
131; NOREGS-NEXT: i32.xor {{$}}
132; NOREGS-NEXT: local.get 2{{$}}
133; NOREGS-NEXT: i32.const   1{{$}}
134; NOREGS-NEXT: i32.lt_s
135; NOREGS-NEXT: local.get 3{{$}}
136; NOREGS-NEXT: i32.const   2{{$}}
137; NOREGS-NEXT: i32.lt_s
138; NOREGS-NEXT: i32.xor {{$}}
139; NOREGS-NEXT: i32.eq {{$}}
140; NOREGS-NEXT: br_if       0{{$}}
141; NOREGS-NEXT: i32.const   0{{$}}
142; NOREGS-NEXT: return{{$}}
143; NOREGS-NEXT: .LBB{{[0-9]+}}_2:
144; NOREGS-NEXT: end_block{{$}}
145; NOREGS-NEXT: i32.const   1{{$}}
146; NOREGS-NEXT: return{{$}}
147define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
148entry:
149  %c = icmp sle i32 %x, 0
150  %d = icmp sle i32 %y, 1
151  %e = icmp sle i32 %z, 0
152  %f = icmp sle i32 %w, 1
153  %g = xor i1 %c, %d
154  %h = xor i1 %e, %f
155  %i = xor i1 %g, %h
156  br i1 %i, label %true, label %false
157true:
158  ret i32 0
159false:
160  ret i32 1
161}
162
163; Test an interesting case where the load has multiple uses and cannot
164; be trivially stackified. However, it can be stackified with a local.tee.
165
166; CHECK-LABEL: multiple_uses:
167; CHECK: .functype multiple_uses (i32, i32, i32) -> (){{$}}
168; CHECK-NEXT: block   {{$}}
169; CHECK-NEXT: i32.load    $push[[NUM0:[0-9]+]]=, 0($2){{$}}
170; CHECK-NEXT: local.tee   $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}}
171; CHECK-NEXT: i32.ge_u    $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}}
172; CHECK-NEXT: br_if       0, $pop[[NUM2]]{{$}}
173; CHECK-NEXT: i32.lt_u    $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
174; CHECK-NEXT: br_if       0, $pop[[NUM3]]{{$}}
175; CHECK-NEXT: i32.store   0($2), $3{{$}}
176; CHECK-NEXT: .LBB{{[0-9]+}}_3:
177; CHECK-NEXT: end_block{{$}}
178; CHECK-NEXT: return{{$}}
179; NOREGS-LABEL: multiple_uses:
180; NOREGS: .functype multiple_uses (i32, i32, i32) -> (){{$}}
181; NOREGS: .local i32{{$}}
182; NOREGS-NEXT: block {{$}}
183; NOREGS-NEXT: local.get   2{{$}}
184; NOREGS-NEXT: i32.load    0{{$}}
185; NOREGS-NEXT: local.tee   3{{$}}
186; NOREGS-NEXT: local.get   1{{$}}
187; NOREGS-NEXT: i32.ge_u
188; NOREGS-NEXT: br_if       0{{$}}
189; NOREGS-NEXT: local.get   3{{$}}
190; NOREGS-NEXT: local.get   0{{$}}
191; NOREGS-NEXT: i32.lt_u
192; NOREGS-NEXT: br_if       0{{$}}
193; NOREGS-NEXT: local.get   2{{$}}
194; NOREGS-NEXT: local.get   3{{$}}
195; NOREGS-NEXT: i32.store   0{{$}}
196; NOREGS-NEXT: .LBB{{[0-9]+}}_3:
197; NOREGS-NEXT: end_block{{$}}
198; NOREGS-NEXT: return{{$}}
199define void @multiple_uses(ptr %arg0, ptr %arg1, ptr %arg2) nounwind {
200bb:
201  br label %loop
202
203loop:
204  %tmp7 = load i32, ptr %arg2
205  %tmp8 = inttoptr i32 %tmp7 to ptr
206  %tmp9 = icmp uge ptr %tmp8, %arg1
207  %tmp10 = icmp ult ptr %tmp8, %arg0
208  %tmp11 = or i1 %tmp9, %tmp10
209  br i1 %tmp11, label %back, label %then
210
211then:
212  store i32 %tmp7, ptr %arg2
213  br label %back
214
215back:
216  br i1 undef, label %return, label %loop
217
218return:
219  ret void
220}
221
222; Don't stackify stores effects across other instructions with side effects.
223
224; CHECK:      side_effects:
225; CHECK:      store
226; CHECK:      call
227; CHECK:      store
228; CHECK-NEXT: call
229; NOREGS:      side_effects:
230; NOREGS:      store
231; NOREGS:      call
232; NOREGS:      store
233; NOREGS-NEXT: call
234declare void @evoke_side_effects()
235define hidden void @stackify_store_across_side_effects(ptr nocapture %d) {
236entry:
237  store double 2.0, ptr %d
238  call void @evoke_side_effects()
239  store double 2.0, ptr %d
240  call void @evoke_side_effects()
241  ret void
242}
243
244; Div instructions have side effects and can't be reordered, but this entire
245; function should still be able to be stackified because it's already in
246; tree order.
247
248; CHECK-LABEL: div_tree:
249; CHECK: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
250; CHECK-NEXT: i32.div_s   $push[[L0:[0-9]+]]=, $0, $1{{$}}
251; CHECK-NEXT: i32.div_s   $push[[L1:[0-9]+]]=, $2, $3{{$}}
252; CHECK-NEXT: i32.div_s   $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
253; CHECK-NEXT: i32.div_s   $push[[L3:[0-9]+]]=, $4, $5{{$}}
254; CHECK-NEXT: i32.div_s   $push[[L4:[0-9]+]]=, $6, $7{{$}}
255; CHECK-NEXT: i32.div_s   $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
256; CHECK-NEXT: i32.div_s   $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}}
257; CHECK-NEXT: i32.div_s   $push[[L7:[0-9]+]]=, $8, $9{{$}}
258; CHECK-NEXT: i32.div_s   $push[[L8:[0-9]+]]=, $10, $11{{$}}
259; CHECK-NEXT: i32.div_s   $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
260; CHECK-NEXT: i32.div_s   $push[[L10:[0-9]+]]=, $12, $13{{$}}
261; CHECK-NEXT: i32.div_s   $push[[L11:[0-9]+]]=, $14, $15{{$}}
262; CHECK-NEXT: i32.div_s   $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}}
263; CHECK-NEXT: i32.div_s   $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}}
264; CHECK-NEXT: i32.div_s   $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}}
265; CHECK-NEXT: return      $pop[[L14]]{{$}}
266; NOREGS-LABEL: div_tree:
267; NOREGS: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
268; NOREGS-NEXT: local.get 0{{$}}
269; NOREGS-NEXT: local.get 1{{$}}
270; NOREGS-NEXT: i32.div_s{{$}}
271; NOREGS-NEXT: local.get 2{{$}}
272; NOREGS-NEXT: local.get 3{{$}}
273; NOREGS-NEXT: i32.div_s{{$}}
274; NOREGS-NEXT: i32.div_s{{$}}
275; NOREGS-NEXT: local.get 4{{$}}
276; NOREGS-NEXT: local.get 5{{$}}
277; NOREGS-NEXT: i32.div_s{{$}}
278; NOREGS-NEXT: local.get 6{{$}}
279; NOREGS-NEXT: local.get 7{{$}}
280; NOREGS-NEXT: i32.div_s{{$}}
281; NOREGS-NEXT: i32.div_s{{$}}
282; NOREGS-NEXT: i32.div_s{{$}}
283; NOREGS-NEXT: local.get 8{{$}}
284; NOREGS-NEXT: local.get 9{{$}}
285; NOREGS-NEXT: i32.div_s{{$}}
286; NOREGS-NEXT: local.get 10{{$}}
287; NOREGS-NEXT: local.get 11{{$}}
288; NOREGS-NEXT: i32.div_s{{$}}
289; NOREGS-NEXT: i32.div_s{{$}}
290; NOREGS-NEXT: local.get 12{{$}}
291; NOREGS-NEXT: local.get 13{{$}}
292; NOREGS-NEXT: i32.div_s{{$}}
293; NOREGS-NEXT: local.get 14{{$}}
294; NOREGS-NEXT: local.get 15{{$}}
295; NOREGS-NEXT: i32.div_s{{$}}
296; NOREGS-NEXT: i32.div_s{{$}}
297; NOREGS-NEXT: i32.div_s{{$}}
298; NOREGS-NEXT: i32.div_s{{$}}
299; NOREGS-NEXT: return{{$}}
300define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
301entry:
302  %div = sdiv i32 %a, %b
303  %div1 = sdiv i32 %c, %d
304  %div2 = sdiv i32 %div, %div1
305  %div3 = sdiv i32 %e, %f
306  %div4 = sdiv i32 %g, %h
307  %div5 = sdiv i32 %div3, %div4
308  %div6 = sdiv i32 %div2, %div5
309  %div7 = sdiv i32 %i, %j
310  %div8 = sdiv i32 %k, %l
311  %div9 = sdiv i32 %div7, %div8
312  %div10 = sdiv i32 %m, %n
313  %div11 = sdiv i32 %o, %p
314  %div12 = sdiv i32 %div10, %div11
315  %div13 = sdiv i32 %div9, %div12
316  %div14 = sdiv i32 %div6, %div13
317  ret i32 %div14
318}
319
320; A simple multiple-use case.
321
322; CHECK-LABEL: simple_multiple_use:
323; CHECK:       .functype simple_multiple_use (i32, i32) -> (){{$}}
324; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
325; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
326; CHECK-NEXT:  call        use_a, $pop[[NUM1]]{{$}}
327; CHECK-NEXT:  call        use_b, $[[NUM2]]{{$}}
328; CHECK-NEXT:  return{{$}}
329; NOREGS-LABEL: simple_multiple_use:
330; NOREGS:       .functype simple_multiple_use (i32, i32) -> (){{$}}
331; NOREGS-NEXT:  local.get 1{{$}}
332; NOREGS-NEXT:  local.get 0{{$}}
333; NOREGS-NEXT:  i32.mul
334; NOREGS-NEXT:  local.tee   0{{$}}
335; NOREGS-NEXT:  call        use_a{{$}}
336; NOREGS-NEXT:  local.get   0{{$}}
337; NOREGS-NEXT:  call        use_b{{$}}
338; NOREGS-NEXT:  return{{$}}
339declare void @use_a(i32)
340declare void @use_b(i32)
341define void @simple_multiple_use(i32 %x, i32 %y) {
342  %mul = mul i32 %y, %x
343  call void @use_a(i32 %mul)
344  call void @use_b(i32 %mul)
345  ret void
346}
347
348; Multiple uses of the same value in one instruction.
349
350; CHECK-LABEL: multiple_uses_in_same_insn:
351; CHECK:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
352; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
353; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
354; CHECK-NEXT:  call        use_2, $pop[[NUM1]], $[[NUM2]]{{$}}
355; CHECK-NEXT:  return{{$}}
356; NOREGS-LABEL: multiple_uses_in_same_insn:
357; NOREGS:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
358; NOREGS-NEXT:  local.get 1{{$}}
359; NOREGS-NEXT:  local.get 0{{$}}
360; NOREGS-NEXT:  i32.mul
361; NOREGS-NEXT:  local.tee   0{{$}}
362; NOREGS-NEXT:  local.get   0{{$}}
363; NOREGS-NEXT:  call        use_2{{$}}
364; NOREGS-NEXT:  return{{$}}
365declare void @use_2(i32, i32)
366define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
367  %mul = mul i32 %y, %x
368  call void @use_2(i32 %mul, i32 %mul)
369  ret void
370}
371
372; Commute operands to achieve better stackifying.
373
374; CHECK-LABEL: commute:
375; CHECK:  .functype commute () -> (i32){{$}}
376; CHECK-NEXT:  call        $push0=, red{{$}}
377; CHECK-NEXT:  call        $push1=, green{{$}}
378; CHECK-NEXT:  i32.add     $push2=, $pop0, $pop1{{$}}
379; CHECK-NEXT:  call        $push3=, blue{{$}}
380; CHECK-NEXT:  i32.add     $push4=, $pop2, $pop3{{$}}
381; CHECK-NEXT:  return      $pop4{{$}}
382; NOREGS-LABEL: commute:
383; NOREGS:  .functype commute () -> (i32){{$}}
384; NOREGS-NEXT:  call        red{{$}}
385; NOREGS-NEXT:  call        green{{$}}
386; NOREGS-NEXT:  i32.add {{$}}
387; NOREGS-NEXT:  call        blue{{$}}
388; NOREGS-NEXT:  i32.add {{$}}
389; NOREGS-NEXT:  return{{$}}
390declare i32 @red()
391declare i32 @green()
392declare i32 @blue()
393define i32 @commute() {
394  %call = call i32 @red()
395  %call1 = call i32 @green()
396  %add = add i32 %call1, %call
397  %call2 = call i32 @blue()
398  %add3 = add i32 %add, %call2
399  ret i32 %add3
400}
401
402; Don't stackify a register when it would move a the def of the register past
403; an implicit local.get for the register.
404
405; CHECK-LABEL: no_stackify_past_use:
406; CHECK:      call            $1=, callee, $0
407; CHECK-NEXT: i32.const       $push0=, 1
408; CHECK-NEXT: i32.add         $push1=, $0, $pop0
409; CHECK-NEXT: call            $push2=, callee, $pop1
410; CHECK-NEXT: i32.sub         $push3=, $pop2, $1
411; CHECK-NEXT: i32.div_s       $push4=, $pop3, $1
412; CHECK-NEXT: return          $pop4
413; NOREGS-LABEL: no_stackify_past_use:
414; NOREGS:      local.get       0{{$}}
415; NOREGS-NEXT: call            callee
416; NOREGS-NEXT: local.set       1{{$}}
417; NOREGS-NEXT: local.get       0{{$}}
418; NOREGS-NEXT: i32.const       1
419; NOREGS-NEXT: i32.add
420; NOREGS-NEXT: call            callee
421; NOREGS-NEXT: local.get       1{{$}}
422; NOREGS-NEXT: i32.sub
423; NOREGS-NEXT: local.get       1{{$}}
424; NOREGS-NEXT: i32.div_s
425; NOREGS-NEXT: return
426declare i32 @callee(i32)
427define i32 @no_stackify_past_use(i32 %arg) {
428  %tmp1 = call i32 @callee(i32 %arg)
429  %tmp2 = add i32 %arg, 1
430  %tmp3 = call i32 @callee(i32 %tmp2)
431  %tmp5 = sub i32 %tmp3, %tmp1
432  %tmp6 = sdiv i32 %tmp5, %tmp1
433  ret i32 %tmp6
434}
435
436; This is the same as no_stackify_past_use, except using a commutative operator,
437; so we can reorder the operands and stackify.
438
439; CHECK-LABEL: commute_to_fix_ordering:
440; CHECK: call            $push[[L0:.+]]=, callee, $0
441; CHECK: local.tee       $push[[L1:.+]]=, $1=, $pop[[L0]]
442; CHECK: i32.const       $push0=, 1
443; CHECK: i32.add         $push1=, $0, $pop0
444; CHECK: call            $push2=, callee, $pop1
445; CHECK: i32.add         $push3=, $1, $pop2
446; CHECK: i32.mul         $push4=, $pop[[L1]], $pop3
447; CHECK: return          $pop4
448; NOREGS-LABEL: commute_to_fix_ordering:
449; NOREGS: local.get       0{{$}}
450; NOREGS: call            callee
451; NOREGS: local.tee       1
452; NOREGS: local.get       1{{$}}
453; NOREGS: local.get       0{{$}}
454; NOREGS: i32.const       1
455; NOREGS: i32.add
456; NOREGS: call            callee
457; NOREGS: i32.add
458; NOREGS: i32.mul
459; NOREGS: return
460define i32 @commute_to_fix_ordering(i32 %arg) {
461  %tmp1 = call i32 @callee(i32 %arg)
462  %tmp2 = add i32 %arg, 1
463  %tmp3 = call i32 @callee(i32 %tmp2)
464  %tmp5 = add i32 %tmp3, %tmp1
465  %tmp6 = mul i32 %tmp5, %tmp1
466  ret i32 %tmp6
467}
468
469; Stackify individual defs of virtual registers with multiple defs.
470
471; CHECK-LABEL: multiple_defs:
472; CHECK:        f64.add         $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}}
473; CHECK-NEXT:   local.tee       $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
474; CHECK-NEXT:   f64.select      ${{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
475; NOREGS-LABEL: multiple_defs:
476; NOREGS:        f64.add
477; NOREGS:        local.tee
478; NOREGS:        f64.select
479define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
480bb:
481  br label %bb5
482
483bb5:                                              ; preds = %bb21, %bb
484  %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ]
485  %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ]
486  %tmp7 = fcmp olt double %tmp6, 2.323450e+01
487  br i1 %tmp7, label %bb8, label %bb21
488
489bb8:                                              ; preds = %bb17, %bb5
490  %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ]
491  %tmp10 = fadd double %tmp6, -1.000000e+00
492  %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10
493  %tmp12 = fadd double %tmp11, %tmp9
494  br i1 %arg3, label %bb17, label %bb13
495
496bb13:                                             ; preds = %bb8
497  %tmp14 = or i32 %arg1, 2
498  %tmp15 = icmp eq i32 %tmp14, 14
499  %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000
500  br label %bb17
501
502bb17:                                             ; preds = %bb13, %bb8
503  %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ]
504  %tmp19 = fadd double %tmp18, %tmp12
505  %tmp20 = fcmp olt double %tmp6, 2.323450e+01
506  br i1 %tmp20, label %bb8, label %bb21
507
508bb21:                                             ; preds = %bb17, %bb5
509  %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ]
510  %tmp23 = fadd double %tmp6, 1.000000e+00
511  br i1 %arg4, label %exit, label %bb5
512exit:
513  ret void
514}
515
516; Don't move calls past loads
517; CHECK-LABEL: no_stackify_call_past_load:
518; CHECK: call $0=, red
519; CHECK: i32.const $push0=, 0
520; CHECK: i32.load $1=, count($pop0)
521; NOREGS-LABEL: no_stackify_call_past_load:
522; NOREGS: call red
523; NOREGS: i32.const 0
524; NOREGS: i32.load count
525@count = hidden global i32 0, align 4
526define i32 @no_stackify_call_past_load() {
527  %a = call i32 @red()
528  %b = load i32, ptr @count, align 4
529  call i32 @callee(i32 %a)
530  ret i32 %b
531  ; use of a
532}
533
534; Don't move stores past loads if there may be aliasing
535; CHECK-LABEL: no_stackify_store_past_load
536; CHECK: i32.store 0($1), $0
537; CHECK: i32.load {{.*}}, 0($2)
538; CHECK: call {{.*}}, callee, $0{{$}}
539; NOREGS-LABEL: no_stackify_store_past_load
540; NOREGS: i32.store 0
541; NOREGS: i32.load 0
542; NOREGS: call callee{{$}}
543define i32 @no_stackify_store_past_load(i32 %a, ptr %p1, ptr %p2) {
544  store i32 %a, ptr %p1
545  %b = load i32, ptr %p2, align 4
546  call i32 @callee(i32 %a)
547  ret i32 %b
548}
549
550; Can still stackify past invariant loads.
551; CHECK-LABEL: store_past_invar_load
552; CHECK: i32.store 0($1), $0
553; CHECK: call {{.*}}, callee, $0
554; CHECK: i32.load $push{{.*}}, 0($2)
555; CHECK: return $pop
556; NOREGS-LABEL: store_past_invar_load
557; NOREGS: i32.store 0
558; NOREGS: call callee
559; NOREGS: i32.load 0
560; NOREGS: return
561define i32 @store_past_invar_load(i32 %a, ptr %p1, ptr dereferenceable(4) align(4) %p2) {
562  store i32 %a, ptr %p1
563  %b = load i32, ptr %p2, !invariant.load !0
564  call i32 @callee(i32 %a)
565  ret i32 %b
566}
567
568; CHECK-LABEL: ignore_dbg_value:
569; CHECK:      .Lfunc_begin
570; CHECK:       unreachable
571; NOREGS-LABEL: ignore_dbg_value:
572; NOREGS:      .Lfunc_begin
573; NOREGS:       unreachable
574declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
575define void @ignore_dbg_value() {
576  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10
577  unreachable
578}
579
580; Don't stackify an expression that might use the stack into a return, since we
581; might insert a prologue before the return.
582
583; CHECK-LABEL: no_stackify_past_epilogue:
584; CHECK: return ${{[0-9]+}}{{$}}
585; NOREGS-LABEL: no_stackify_past_epilogue:
586; NOREGS: return{{$}}
587declare i32 @use_memory(ptr)
588define i32 @no_stackify_past_epilogue() {
589  %x = alloca i32
590  %call = call i32 @use_memory(ptr %x)
591  ret i32 %call
592}
593
594; Stackify a loop induction variable into a loop comparison.
595
596; CHECK-LABEL: stackify_indvar:
597; CHECK:             i32.const   $push[[L5:.+]]=, 1{{$}}
598; CHECK-NEXT:        i32.add     $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}}
599; CHECK-NEXT:        local.tee   $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}}
600; CHECK-NEXT:        i32.ne      $push[[L2:.+]]=, $0, $pop[[L3]]{{$}}
601; NOREGS-LABEL: stackify_indvar:
602; NOREGS:             i32.const   1{{$}}
603; NOREGS-NEXT:        i32.add
604; NOREGS-NEXT:        local.tee   2{{$}}
605; NOREGS-NEXT:        i32.ne
606define void @stackify_indvar(i32 %tmp, ptr %v) #0 {
607bb:
608  br label %bb3
609
610bb3:                                              ; preds = %bb3, %bb2
611  %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ]
612  %tmp5 = load volatile i32, ptr %v, align 4
613  %tmp6 = add nsw i32 %tmp5, %tmp4
614  store volatile i32 %tmp6, ptr %v, align 4
615  %tmp7 = add nuw nsw i32 %tmp4, 1
616  %tmp8 = icmp eq i32 %tmp7, %tmp
617  br i1 %tmp8, label %bb10, label %bb3
618
619bb10:                                             ; preds = %bb9, %bb
620  ret void
621}
622
623; Don't stackify a call past a __stack_pointer store.
624
625; CHECK-LABEL: stackpointer_dependency:
626; CHECK:      call {{.+}}, stackpointer_callee,
627; CHECK-NEXT: global.set __stack_pointer,
628; NOREGS-LABEL: stackpointer_dependency:
629; NOREGS:      call stackpointer_callee
630; NOREGS:      global.set __stack_pointer
631declare i32 @stackpointer_callee(ptr readnone, ptr readnone) nounwind readnone
632declare ptr @llvm.frameaddress(i32)
633define i32 @stackpointer_dependency(ptr readnone) {
634  %2 = tail call ptr @llvm.frameaddress(i32 0)
635  %3 = tail call i32 @stackpointer_callee(ptr %0, ptr %2)
636  ret i32 %3
637}
638
639; Stackify a call_indirect with respect to its ordering
640
641; CHECK-LABEL: call_indirect_stackify:
642; CHECK: i32.load  $push[[L4:.+]]=, 0($0)
643; CHECK-NEXT: local.tee $push[[L3:.+]]=, $0=, $pop[[L4]]
644; CHECK-NEXT: i32.load  $push[[L0:.+]]=, 0($0)
645; CHECK-NEXT: i32.load  $push[[L1:.+]]=, 0($pop[[L0]])
646; CHECK-NEXT: call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]]
647; NOREGS-LABEL: call_indirect_stackify:
648; NOREGS: i32.load  0
649; NOREGS-NEXT: local.tee 0
650; NOREGS:      i32.load  0
651; NOREGS-NEXT: i32.load  0
652; NOREGS-NEXT: call_indirect __indirect_function_table, (i32, i32) -> (i32)
653%class.call_indirect = type { ptr }
654define i32 @call_indirect_stackify(ptr %objptr, i32 %arg) {
655  %obj = load ptr, ptr %objptr
656  %vtable = load ptr, ptr %obj
657  %f = load ptr, ptr %vtable
658  %ret = call i32 %f(ptr %obj, i32 %arg)
659  ret i32 %ret
660}
661
662!llvm.module.flags = !{!0}
663!llvm.dbg.cu = !{!1}
664
665!0 = !{i32 2, !"Debug Info Version", i32 3}
666!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
667!2 = !DIFile(filename: "test.c", directory: "/")
668!3 = !{}
669!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3)
670!6 = !DISubroutineType(types: !3)
671!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8)
672!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
673!9 = !DIExpression()
674!10 = !DILocation(line: 15, column: 6, scope: !5)
675