xref: /llvm-project/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll (revision 9a28814f59e8f52cc63ae3d17023cee8348d9b53)
1; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
2; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64
3; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-32,ENABLE-32
4; RUN: llc -mtriple=powerpc-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-32,DISABLE-32
5; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mattr=-altivec -mcpu=pwr8 %s -o - -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,ENABLE,CHECK-64,ENABLE-64
6; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff %s -o - -enable-shrink-wrap=false -verify-machineinstrs |  FileCheck %s --check-prefixes=CHECK,DISABLE,CHECK-64,DISABLE-64,DISABLE-64-AIX
7;
8;
9; Note: Lots of tests use inline asm instead of regular calls.
10; This allows to have a better control on what the allocation will do.
11; Otherwise, we may have spill right in the entry block, defeating
12; shrink-wrapping. Moreover, some of the inline asm statement (nop)
13; are here to ensure that the related paths do not end up as critical
14; edges.
15
16
17; Initial motivating example: Simple diamond with a call just on one side.
18; CHECK-LABEL: {{.*}}foo:
19;
20; Compare the arguments and return
21; No prologue needed.
22; ENABLE: cmpw 3, 4
23; ENABLE-NEXT: bgelr 0
24;
25; Prologue code.
26;  At a minimum, we save/restore the link register. Other registers may be saved
27;  as well.
28; CHECK: mflr
29;
30; Compare the arguments and jump to exit.
31; After the prologue is set.
32; DISABLE: cmpw 3, 4
33; DISABLE-32: stw 0,
34; DISABLE-64-AIX: std 0,
35; DISABLE-NEXT: bge 0, {{.*}}[[EXIT_LABEL:BB[0-9_]+]]
36;
37; Store %a on the stack
38; CHECK: stw 3, {{[0-9]+([0-9]+)}}
39; Set the alloca address in the second argument.
40; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
41; Set the first argument to zero.
42; CHECK-NEXT: li 3, 0
43; CHECK-NEXT: bl {{.*}}doSomething
44;
45; With shrink-wrapping, epilogue is just after the call.
46; Restore the link register and return.
47; Note that there could be other epilog code before the link register is
48; restored but we will not check for it here.
49; ENABLE: mtlr
50; ENABLE-NEXT: blr
51;
52; DISABLE: [[EXIT_LABEL]]:
53;
54; Without shrink-wrapping, epilogue is in the exit block.
55; Epilogue code. (What we pop does not matter.)
56; DISABLE: mtlr {{[0-9]+}}
57; DISABLE-NEXT: blr
58;
59
60define i32 @foo(i32 %a, i32 %b) {
61  %tmp = alloca i32, align 4
62  %tmp2 = icmp slt i32 %a, %b
63  br i1 %tmp2, label %true, label %false
64
65true:
66  store i32 %a, ptr %tmp, align 4
67  %tmp4 = call i32 @doSomething(i32 0, ptr %tmp)
68  br label %false
69
70false:
71  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
72  ret i32 %tmp.0
73}
74
75; Function Attrs: optsize
76declare i32 @doSomething(i32, ptr)
77
78
79; Check that we do not perform the restore inside the loop whereas the save
80; is outside.
81; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop:
82;
83; Shrink-wrapping allows to skip the prologue in the else case.
84; ENABLE: cmplwi 3, 0
85; ENABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
86;
87; Prologue code.
88; Make sure we save the link register
89; CHECK: mflr {{[0-9]+}}
90;
91; DISABLE: cmplwi 3, 0
92; DISABLE: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
93;
94; Loop preheader
95; CHECK-DAG: li [[SUM:[0-9]+]], 0
96; CHECK-DAG: li [[IV:[0-9]+]], 10
97;
98; Loop body
99; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
100; CHECK: bl {{.*}}something
101;
102; CHECK-64-DAG: addi [[IV]], [[IV]], -1
103; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
104; CHECK-64-DAG: cmpldi [[IV]], 0
105; CHECK-32-DAG: addi [[IV]], [[IV]], -1
106; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
107; CHECK-32-DAG: cmplwi [[IV]], 0
108; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
109;
110; Next BB.
111; CHECK: slwi 3, [[SUM]], 3
112;
113; Jump to epilogue.
114; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
115;
116; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
117; Shift second argument by one and store into returned register.
118; DISABLE: slwi 3, 4, 1
119; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
120;
121; Epilogue code.
122; CHECK: mtlr {{[0-9]+}}
123; CHECK: blr
124;
125; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
126; Shift second argument by one and store into returned register.
127; ENABLE: slwi 3, 4, 1
128; ENABLE-NEXT: blr
129define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
130entry:
131  %tobool = icmp eq i32 %cond, 0
132  br i1 %tobool, label %if.else, label %for.preheader
133
134for.preheader:
135  tail call void asm "nop", ""()
136  br label %for.body
137
138for.body:                                         ; preds = %entry, %for.body
139  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
140  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
141  %call = tail call i32 @something()
142  %add = add nsw i32 %call, %sum.04
143  %inc = add nuw nsw i32 %i.05, 1
144  %exitcond = icmp eq i32 %inc, 10
145  br i1 %exitcond, label %for.end, label %for.body
146
147for.end:                                          ; preds = %for.body
148  %shl = shl i32 %add, 3
149  br label %if.end
150
151if.else:                                          ; preds = %entry
152  %mul = shl nsw i32 %N, 1
153  br label %if.end
154
155if.end:                                           ; preds = %if.else, %for.end
156  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
157  ret i32 %sum.1
158}
159
160declare i32 @something(...)
161
162; Check that we do not perform the shrink-wrapping inside the loop even
163; though that would be legal. The cost model must prevent that.
164; CHECK-LABEL: {{.*}}freqSaveAndRestoreOutsideLoop2:
165; Prologue code.
166; Make sure we save the link register before the call
167; CHECK: mflr {{[0-9]+}}
168;
169; Loop preheader
170; CHECK-DAG: li [[SUM:[0-9]+]], 0
171; CHECK-DAG: li [[IV:[0-9]+]], 10
172;
173; Loop body
174; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
175; CHECK: bl {{.*}}something
176;
177; CHECK-64-DAG: addi [[IV]], [[IV]], -1
178; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
179; CHECK-64-DAG: cmpldi [[IV]], 0
180; CHECK-32-DAG: addi [[IV]], [[IV]], -1
181; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
182; CHECK-32-DAG: cmplwi [[IV]], 0
183;
184; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
185;
186; Next BB
187; CHECK: %for.exit
188; CHECK: mtlr {{[0-9]+}}
189; CHECK: blr
190define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
191entry:
192  br label %for.preheader
193
194for.preheader:
195  tail call void asm "nop", ""()
196  br label %for.body
197
198for.body:                                         ; preds = %for.body, %entry
199  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
200  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
201  %call = tail call i32 @something()
202  %add = add nsw i32 %call, %sum.03
203  %inc = add nuw nsw i32 %i.04, 1
204  %exitcond = icmp eq i32 %inc, 10
205  br i1 %exitcond, label %for.exit, label %for.body
206
207for.exit:
208  tail call void asm "nop", ""()
209  br label %for.end
210
211for.end:                                          ; preds = %for.body
212  ret i32 %add
213}
214
215
216; Check with a more complex case that we do not have save within the loop and
217; restore outside.
218; CHECK-LABEL: {{.*}}loopInfoSaveOutsideLoop:
219;
220; ENABLE: cmplwi 3, 0
221; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
222;
223; Prologue code.
224; Make sure we save the link register
225; CHECK: mflr {{[0-9]+}}
226;
227; DISABLE-64-DAG: std {{[0-9]+}}
228; DISABLE-64-DAG: std {{[0-9]+}}
229; DISABLE-64-DAG: std {{[0-9]+}}
230; DISABLE-64-DAG: stdu 1,
231; DISABLE-64-DAG: cmplwi 3, 0
232;
233; DISABLE-32-DAG: stw {{[0-9]+}}
234; DISABLE-32-DAG: stw {{[0-9]+}}
235; DISABLE-32-DAG: stw {{[0-9]+}}
236; DISABLE-32-DAG: stwu 1,
237; DISABLE-32-DAG: cmplwi 3, 0
238;
239; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
240;
241; Loop preheader
242; CHECK-DAG: li [[SUM:[0-9]+]], 0
243; CHECK-DAG: li [[IV:[0-9]+]], 10
244;
245; Loop body
246; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
247; CHECK: bl {{.*}}something
248;
249; CHECK-64-DAG: addi [[IV]], [[IV]], -1
250; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
251; CHECK-64-DAG: cmpldi [[IV]], 0
252; CHECK-32-DAG: addi [[IV]], [[IV]], -1
253; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
254; CHECK-32-DAG: cmplwi [[IV]], 0
255;
256; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
257;
258; Next BB
259; CHECK: bl {{.*}}somethingElse
260; CHECK: slwi 3, [[SUM]], 3
261;
262; Jump to epilogue
263; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
264;
265; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
266; Shift second argument by one and store into returned register.
267; DISABLE: slwi 3, 4, 1
268;
269; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
270;
271; Epilog code
272; CHECK: mtlr {{[0-9]+}}
273; CHECK: blr
274;
275; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
276; Shift second argument by one and store into returned register.
277; ENABLE: slwi 3, 4, 1
278; ENABLE-NEXT: blr
279define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
280entry:
281  %tobool = icmp eq i32 %cond, 0
282  br i1 %tobool, label %if.else, label %for.preheader
283
284for.preheader:
285  tail call void asm "nop", ""()
286  br label %for.body
287
288for.body:                                         ; preds = %entry, %for.body
289  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
290  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
291  %call = tail call i32 @something()
292  %add = add nsw i32 %call, %sum.04
293  %inc = add nuw nsw i32 %i.05, 1
294  %exitcond = icmp eq i32 %inc, 10
295  br i1 %exitcond, label %for.end, label %for.body
296
297for.end:                                          ; preds = %for.body
298  tail call void @somethingElse()
299  %shl = shl i32 %add, 3
300  br label %if.end
301
302if.else:                                          ; preds = %entry
303  %mul = shl nsw i32 %N, 1
304  br label %if.end
305
306if.end:                                           ; preds = %if.else, %for.end
307  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
308  ret i32 %sum.1
309}
310
311declare void @somethingElse(...)
312
313; Check with a more complex case that we do not have restore within the loop and
314; save outside.
315; CHECK-LABEL: {{.*}}loopInfoRestoreOutsideLoop:
316;
317; ENABLE: cmplwi 3, 0
318; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
319;
320; Prologue code.
321; Make sure we save the link register
322; CHECK: mflr {{[0-9]+}}
323;
324; DISABLE-64-DAG: std {{[0-9]+}}
325; DISABLE-64-DAG: std {{[0-9]+}}
326; DISABLE-64-DAG: std {{[0-9]+}}
327; DISABLE-64-DAG: stdu 1,
328; DISABLE-64-DAG: cmplwi 3, 0
329;
330; DISABLE-32-DAG: stw {{[0-9]+}}
331; DISABLE-32-DAG: stw {{[0-9]+}}
332; DISABLE-32-DAG: stw {{[0-9]+}}
333; DISABLE-32-DAG: stwu 1,
334; DISABLE-32-DAG: cmplwi 3, 0
335;
336; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
337;
338; CHECK: bl {{.*}}somethingElse
339;
340; Loop preheader
341; CHECK-DAG: li [[SUM:[0-9]+]], 0
342; CHECK-DAG: li [[IV:[0-9]+]], 10
343;
344; Loop body
345; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
346; CHECK: bl {{.*}}something
347;
348; CHECK-64-DAG: addi [[IV]], [[IV]], -1
349; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
350; CHECK-64-DAG: cmpldi [[IV]], 0
351; CHECK-32-DAG: addi [[IV]], [[IV]], -1
352; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
353; CHECK-32-DAG: cmplwi [[IV]], 0
354;
355; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
356;
357; Next BB.
358; CHECK: slwi 3, [[SUM]], 3
359;
360; DISABLE: b {{.*}}[[EPILOG_BB:BB[0-9_]+]]
361;
362; DISABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
363; Shift second argument by one and store into returned register.
364; DISABLE: slwi 3, 4, 1
365; DISABLE: {{.*}}[[EPILOG_BB]]: # %if.end
366;
367; Epilogue code.
368; CHECK: mtlr {{[0-9]+}}
369; CHECK: blr
370;
371; ENABLE: {{.*}}[[ELSE_LABEL]]: # %if.else
372; Shift second argument by one and store into returned register.
373; ENABLE: slwi 3, 4, 1
374; ENABLE-NEXT: blr
375define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) nounwind {
376entry:
377  %tobool = icmp eq i32 %cond, 0
378  br i1 %tobool, label %if.else, label %if.then
379
380if.then:                                          ; preds = %entry
381  tail call void @somethingElse()
382  br label %for.body
383
384for.body:                                         ; preds = %for.body, %if.then
385  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
386  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
387  %call = tail call i32 @something()
388  %add = add nsw i32 %call, %sum.04
389  %inc = add nuw nsw i32 %i.05, 1
390  %exitcond = icmp eq i32 %inc, 10
391  br i1 %exitcond, label %for.end, label %for.body
392
393for.end:                                          ; preds = %for.body
394  %shl = shl i32 %add, 3
395  br label %if.end
396
397if.else:                                          ; preds = %entry
398  %mul = shl nsw i32 %N, 1
399  br label %if.end
400
401if.end:                                           ; preds = %if.else, %for.end
402  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
403  ret i32 %sum.1
404}
405
406; Check that we handle function with no frame information correctly.
407; CHECK-LABEL: {{.*}}emptyFrame:
408; CHECK: # %entry
409; CHECK-NEXT: li 3, 0
410; CHECK-NEXT: blr
411define i32 @emptyFrame() {
412entry:
413  ret i32 0
414}
415
416
417; Check that we handle inline asm correctly.
418; CHECK-LABEL: {{.*}}inlineAsm:
419;
420; ENABLE: cmplwi 3, 0
421; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
422;
423; Prologue code.
424; Make sure we save the CSR used in the inline asm: r31
425; ENABLE-DAG: li [[IV:[0-9]+]], 10
426; ENABLE-64-DAG: std 31, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
427; ENABLE-32-DAG: stw 31, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
428;
429; DISABLE: cmplwi 3, 0
430; DISABLE-64-NEXT: std 31, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
431; DISABLE-32-NEXT: stw 31, -[[STACK_OFFSET:[0-9]+]](1) # 4-byte Folded Spill
432; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
433; DISABLE: li [[IV:[0-9]+]], 10
434;
435; CHECK: nop
436; CHECK: mtctr [[IV]]
437;
438; CHECK: {{.*}}[[LOOP_LABEL:BB[0-9_]+]]: # %for.body
439; Inline asm statement.
440; CHECK: addi 31, 14, 1
441; CHECK: bdnz {{.*}}[[LOOP_LABEL]]
442;
443; Epilogue code.
444; CHECK: li 3, 0
445; CHECK-64-DAG: ld 31, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
446; CHECK-32-DAG: lwz 31, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
447; CHECK-DAG: nop
448; CHECK: blr
449;
450; CHECK: [[ELSE_LABEL]]
451; CHECK-NEXT: slwi 3, 4, 1
452; DISABLE-64-NEXT: ld 31, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
453; DISABLE-32-NEXT: lwz 31, -[[STACK_OFFSET]](1) # 4-byte Folded Reload
454; CHECK-NEXT: blr
455define i32 @inlineAsm(i32 %cond, i32 %N) {
456entry:
457  %tobool = icmp eq i32 %cond, 0
458  br i1 %tobool, label %if.else, label %for.preheader
459
460for.preheader:
461  tail call void asm "nop", ""()
462  br label %for.body
463
464for.body:                                         ; preds = %entry, %for.body
465  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
466  tail call void asm "addi 31, 14, 1", "~{r31}"()
467  %inc = add nuw nsw i32 %i.03, 1
468  %exitcond = icmp eq i32 %inc, 10
469  br i1 %exitcond, label %for.exit, label %for.body
470
471for.exit:
472  tail call void asm "nop", ""()
473  br label %if.end
474
475if.else:                                          ; preds = %entry
476  %mul = shl nsw i32 %N, 1
477  br label %if.end
478
479if.end:                                           ; preds = %for.body, %if.else
480  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
481  ret i32 %sum.0
482}
483
484
485; Check that we handle calls to variadic functions correctly.
486; CHECK-LABEL: {{.*}}callVariadicFunc:
487;
488; ENABLE: cmplwi 3, 0
489; ENABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
490;
491; Prologue code.
492; CHECK: mflr {{[0-9]+}}
493;
494; DISABLE: cmplwi 3, 0
495; DISABLE-32: stw 0, 72(1)
496; DISABLE-64-AIX: std 0,
497; DISABLE-NEXT: beq 0, {{.*}}[[ELSE_LABEL:BB[0-9_]+]]
498;
499; Setup of the varags.
500; CHECK-64: mr 4, 3
501; CHECK-64-NEXT: mr 5, 3
502; CHECK-64-NEXT: mr 6, 3
503; CHECK-64-NEXT: mr 7, 3
504; CHECK-64-NEXT: mr 8, 3
505; CHECK-64-NEXT: mr 9, 3
506;
507; CHECK-32: mr 3, 4
508; CHECK-32-NEXT: mr 5, 4
509; ENABLE-32-NEXT: stw 0, 72(1)
510; CHECK-32: mr 6, 4
511; CHECK-32-NEXT: mr 7, 4
512; CHECK-32-NEXT: mr 8, 4
513; CHECK-32-NEXT: mr 9, 4
514;
515; CHECK-NEXT: bl {{.*}}someVariadicFunc
516; CHECK: slwi 3, 3, 3
517; DISABLE: b {{.*}}[[EPILOGUE_BB:BB[0-9_]+]]
518;
519; ENABLE: mtlr {{[0-9]+}}
520; ENABLE-NEXT: blr
521;
522; CHECK: {{.*}}[[ELSE_LABEL]]: # %if.else
523; CHECK-NEXT: slwi 3, 4, 1
524;
525; DISABLE: {{.*}}[[EPILOGUE_BB]]: # %if.end
526; DISABLE: mtlr
527; CHECK: blr
528define i32 @callVariadicFunc(i32 %cond, i32 %N) {
529entry:
530  %tobool = icmp eq i32 %cond, 0
531  br i1 %tobool, label %if.else, label %if.then
532
533if.then:                                          ; preds = %entry
534  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
535  %shl = shl i32 %call, 3
536  br label %if.end
537
538if.else:                                          ; preds = %entry
539  %mul = shl nsw i32 %N, 1
540  br label %if.end
541
542if.end:                                           ; preds = %if.else, %if.then
543  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
544  ret i32 %sum.0
545}
546
547declare i32 @someVariadicFunc(i32, ...)
548
549
550
551; Make sure we do not insert unreachable code after noreturn function.
552; Although this is not incorrect to insert such code, it is useless
553; and it hurts the binary size.
554;
555; CHECK-LABEL: {{.*}}noreturn:
556; DISABLE: mflr {{[0-9]+}}
557;
558; CHECK: cmplwi 3, 0
559; DISABLE-32: stw 0, 72(1)
560; DISABLE-64-AIX: std 0,
561; CHECK-NEXT: bne{{[-]?}} 0, {{.*}}[[ABORT:BB[0-9_]+]]
562;
563; CHECK: li 3, 42
564;
565; DISABLE: mtlr {{[0-9]+}}
566;
567; CHECK-NEXT: blr
568;
569; CHECK: {{.*}}[[ABORT]]: # %if.abort
570; ENABLE: mflr {{[0-9]+}}
571; CHECK: bl {{.*}}abort
572; ENABLE-NOT: mtlr {{[0-9]+}}
573define i32 @noreturn(i8 signext %bad_thing) {
574entry:
575  %tobool = icmp eq i8 %bad_thing, 0
576  br i1 %tobool, label %if.end, label %if.abort
577
578if.abort:
579  tail call void @abort() #0
580  unreachable
581
582if.end:
583  ret i32 42
584}
585
586declare void @abort() #0
587
588attributes #0 = { noreturn nounwind }
589
590
591; Make sure that we handle infinite loops properly When checking that the Save
592; and Restore blocks are control flow equivalent, the loop searches for the
593; immediate (post) dominator for the (restore) save blocks. When either the Save
594; or Restore block is located in an infinite loop the only immediate (post)
595; dominator is itself. In this case, we cannot perform shrink wrapping, but we
596; should return gracefully and continue compilation.
597; The only condition for this test is the compilation finishes correctly.
598;
599; CHECK-LABEL: {{.*}}infiniteloop
600; CHECK: blr
601define void @infiniteloop() {
602entry:
603  br i1 undef, label %if.then, label %if.end
604
605if.then:
606  %ptr = alloca i32, i32 4
607  br label %for.body
608
609for.body:                                         ; preds = %for.body, %entry
610  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
611  %call = tail call i32 @something()
612  %add = add nsw i32 %call, %sum.03
613  store i32 %add, ptr %ptr
614  br label %for.body
615
616if.end:
617  ret void
618}
619
620; Another infinite loop test this time with a body bigger than just one block.
621; CHECK-LABEL: {{.*}}infiniteloop2
622; CHECK: blr
623define void @infiniteloop2() {
624entry:
625  br i1 undef, label %if.then, label %if.end
626
627if.then:
628  %ptr = alloca i32, i32 4
629  br label %for.body
630
631for.body:                                         ; preds = %for.body, %entry
632  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
633  %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
634  %add = add nsw i32 %call, %sum.03
635  store i32 %add, ptr %ptr
636  br i1 undef, label %body1, label %body2
637
638body1:
639  tail call void asm sideeffect "nop", "~{r14}"()
640  br label %for.body
641
642body2:
643  tail call void asm sideeffect "nop", "~{r14}"()
644  br label %for.body
645
646if.end:
647  ret void
648}
649
650; Another infinite loop test this time with two nested infinite loop.
651; CHECK-LABEL: {{.*}}infiniteloop3
652; CHECK: bclr
653define void @infiniteloop3() {
654entry:
655  br i1 undef, label %loop2a, label %body
656
657body:                                             ; preds = %entry
658  br i1 undef, label %loop2a, label %end
659
660loop1:                                            ; preds = %loop2a, %loop2b
661  %var.phi = phi ptr [ %next.phi, %loop2b ], [ %var, %loop2a ]
662  %next.phi = phi ptr [ %next.load, %loop2b ], [ %next.var, %loop2a ]
663  %0 = icmp eq ptr %var, null
664  %next.load = load ptr, ptr undef
665  br i1 %0, label %loop2a, label %loop2b
666
667loop2a:                                           ; preds = %loop1, %body, %entry
668  %var = phi ptr [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
669  %next.var = phi ptr [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
670  br label %loop1
671
672loop2b:                                           ; preds = %loop1
673  store ptr %next.phi, ptr %var.phi
674  br label %loop1
675
676end:
677  ret void
678}
679
680@columns = external global [0 x i32], align 4
681@lock = common global i32 0, align 4
682@htindex = common global i32 0, align 4
683@stride = common global i32 0, align 4
684@ht = common global ptr null, align 8
685@he = common global ptr null, align 8
686
687; Test for a bug that was caused when save point was equal to restore point.
688; Function Attrs: nounwind
689; CHECK-LABEL: {{.*}}transpose
690;
691; Store of callee-save register saved by shrink wrapping
692; FIXME: Test disabled: Improved scheduling needs no spills/reloads any longer!
693; CHECKXX: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
694;
695; Reload of callee-save register
696; CHECKXX: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
697;
698; Ensure no subsequent uses of callee-save register before end of function
699; CHECKXX-NOT: {{[a-z]+}} [[CSR]]
700; CHECK: blr
701define signext i32 @transpose() {
702entry:
703  %0 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 1), align 4
704  %shl.i = shl i32 %0, 7
705  %1 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 2), align 4
706  %or.i = or i32 %shl.i, %1
707  %shl1.i = shl i32 %or.i, 7
708  %2 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 3), align 4
709  %or2.i = or i32 %shl1.i, %2
710  %3 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 7), align 4
711  %shl3.i = shl i32 %3, 7
712  %4 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 6), align 4
713  %or4.i = or i32 %shl3.i, %4
714  %shl5.i = shl i32 %or4.i, 7
715  %5 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 5), align 4
716  %or6.i = or i32 %shl5.i, %5
717  %cmp.i = icmp ugt i32 %or2.i, %or6.i
718  br i1 %cmp.i, label %cond.true.i, label %cond.false.i
719
720cond.true.i:
721  %shl7.i = shl i32 %or2.i, 7
722  %6 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 4), align 4
723  %or8.i = or i32 %6, %shl7.i
724  %conv.i = zext i32 %or8.i to i64
725  %shl9.i = shl nuw nsw i64 %conv.i, 21
726  %conv10.i = zext i32 %or6.i to i64
727  %or11.i = or i64 %shl9.i, %conv10.i
728  br label %hash.exit
729
730cond.false.i:
731  %shl12.i = shl i32 %or6.i, 7
732  %7 = load i32, ptr getelementptr inbounds ([0 x i32], ptr @columns, i64 0, i64 4), align 4
733  %or13.i = or i32 %7, %shl12.i
734  %conv14.i = zext i32 %or13.i to i64
735  %shl15.i = shl nuw nsw i64 %conv14.i, 21
736  %conv16.i = zext i32 %or2.i to i64
737  %or17.i = or i64 %shl15.i, %conv16.i
738  br label %hash.exit
739
740hash.exit:
741  %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
742  %shr.29.i = lshr i64 %cond.i, 17
743  %conv18.i = trunc i64 %shr.29.i to i32
744  store i32 %conv18.i, ptr @lock, align 4
745  %rem.i = srem i64 %cond.i, 1050011
746  %conv19.i = trunc i64 %rem.i to i32
747  store i32 %conv19.i, ptr @htindex, align 4
748  %rem20.i = urem i32 %conv18.i, 179
749  %add.i = or i32 %rem20.i, 131072
750  store i32 %add.i, ptr @stride, align 4
751  %8 = load ptr, ptr @ht, align 8
752  %arrayidx = getelementptr inbounds i32, ptr %8, i64 %rem.i
753  %9 = load i32, ptr %arrayidx, align 4
754  %cmp1 = icmp eq i32 %9, %conv18.i
755  br i1 %cmp1, label %if.then, label %if.end
756
757if.then:
758  %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
759  %10 = load ptr, ptr @he, align 8
760  %arrayidx3 = getelementptr inbounds i8, ptr %10, i64 %idxprom.lcssa
761  %11 = load i8, ptr %arrayidx3, align 1
762  %conv = sext i8 %11 to i32
763  br label %cleanup
764
765if.end:
766  %add = add nsw i32 %add.i, %conv19.i
767  %cmp4 = icmp sgt i32 %add, 1050010
768  %sub = add nsw i32 %add, -1050011
769  %sub.add = select i1 %cmp4, i32 %sub, i32 %add
770  %idxprom.1 = sext i32 %sub.add to i64
771  %arrayidx.1 = getelementptr inbounds i32, ptr %8, i64 %idxprom.1
772  %12 = load i32, ptr %arrayidx.1, align 4
773  %cmp1.1 = icmp eq i32 %12, %conv18.i
774  br i1 %cmp1.1, label %if.then, label %if.end.1
775
776cleanup:
777  %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
778  ret i32 %retval.0
779
780if.end.1:
781  %add.1 = add nsw i32 %add.i, %sub.add
782  %cmp4.1 = icmp sgt i32 %add.1, 1050010
783  %sub.1 = add nsw i32 %add.1, -1050011
784  %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
785  %idxprom.2 = sext i32 %sub.add.1 to i64
786  %arrayidx.2 = getelementptr inbounds i32, ptr %8, i64 %idxprom.2
787  %13 = load i32, ptr %arrayidx.2, align 4
788  %cmp1.2 = icmp eq i32 %13, %conv18.i
789  br i1 %cmp1.2, label %if.then, label %if.end.2
790
791if.end.2:
792  %add.2 = add nsw i32 %add.i, %sub.add.1
793  %cmp4.2 = icmp sgt i32 %add.2, 1050010
794  %sub.2 = add nsw i32 %add.2, -1050011
795  %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
796  %idxprom.3 = sext i32 %sub.add.2 to i64
797  %arrayidx.3 = getelementptr inbounds i32, ptr %8, i64 %idxprom.3
798  %14 = load i32, ptr %arrayidx.3, align 4
799  %cmp1.3 = icmp eq i32 %14, %conv18.i
800  br i1 %cmp1.3, label %if.then, label %if.end.3
801
802if.end.3:
803  %add.3 = add nsw i32 %add.i, %sub.add.2
804  %cmp4.3 = icmp sgt i32 %add.3, 1050010
805  %sub.3 = add nsw i32 %add.3, -1050011
806  %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
807  %idxprom.4 = sext i32 %sub.add.3 to i64
808  %arrayidx.4 = getelementptr inbounds i32, ptr %8, i64 %idxprom.4
809  %15 = load i32, ptr %arrayidx.4, align 4
810  %cmp1.4 = icmp eq i32 %15, %conv18.i
811  br i1 %cmp1.4, label %if.then, label %if.end.4
812
813if.end.4:
814  %add.4 = add nsw i32 %add.i, %sub.add.3
815  %cmp4.4 = icmp sgt i32 %add.4, 1050010
816  %sub.4 = add nsw i32 %add.4, -1050011
817  %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
818  %idxprom.5 = sext i32 %sub.add.4 to i64
819  %arrayidx.5 = getelementptr inbounds i32, ptr %8, i64 %idxprom.5
820  %16 = load i32, ptr %arrayidx.5, align 4
821  %cmp1.5 = icmp eq i32 %16, %conv18.i
822  br i1 %cmp1.5, label %if.then, label %if.end.5
823
824if.end.5:
825  %add.5 = add nsw i32 %add.i, %sub.add.4
826  %cmp4.5 = icmp sgt i32 %add.5, 1050010
827  %sub.5 = add nsw i32 %add.5, -1050011
828  %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
829  %idxprom.6 = sext i32 %sub.add.5 to i64
830  %arrayidx.6 = getelementptr inbounds i32, ptr %8, i64 %idxprom.6
831  %17 = load i32, ptr %arrayidx.6, align 4
832  %cmp1.6 = icmp eq i32 %17, %conv18.i
833  br i1 %cmp1.6, label %if.then, label %if.end.6
834
835if.end.6:
836  %add.6 = add nsw i32 %add.i, %sub.add.5
837  %cmp4.6 = icmp sgt i32 %add.6, 1050010
838  %sub.6 = add nsw i32 %add.6, -1050011
839  %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
840  %idxprom.7 = sext i32 %sub.add.6 to i64
841  %arrayidx.7 = getelementptr inbounds i32, ptr %8, i64 %idxprom.7
842  %18 = load i32, ptr %arrayidx.7, align 4
843  %cmp1.7 = icmp eq i32 %18, %conv18.i
844  br i1 %cmp1.7, label %if.then, label %cleanup
845}
846