1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes="default<O3>" -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefix=HOIST 3; RUN: opt -passes="default<O3>" -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefix=HOIST 4; RUN: opt -passes="default<O3>" -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATE 5; RUN: opt -passes="default<O3>" -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE 6 7; This example is produced from a very basic C code: 8; 9; void f0(); 10; void f1(); 11; void f2(); 12; 13; void loop(int width) { 14; if(width < 1) 15; return; 16; for(int i = 0; i < width - 1; ++i) { 17; f0(); 18; f1(); 19; } 20; f0(); 21; f2(); 22; } 23 24; We have a choice here. We can either 25; * hoist the f0() call into loop header, 26; * which potentially makes loop rotation unprofitable since loop header might 27; have grown above certain threshold, and such unrotated loops will be 28; ignored by LoopVectorizer, preventing vectorization 29; * or loop rotation will succeed, resulting in some weird PHIs that will also 30; harm vectorization 31; * or not hoist f0() call before performing loop rotation, 32; at the cost of potential code bloat and/or potentially successfully rotating 33; the loops, vectorizing them at the cost of compile time. 34 35target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 36 37declare void @f0() 38declare void @f1() 39declare void @f2() 40 41declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) 42declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) 43 44define void @_Z4loopi(i32 %width) { 45; HOIST-LABEL: @_Z4loopi( 46; HOIST-NEXT: entry: 47; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 48; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 49; HOIST: for.cond.preheader: 50; HOIST-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 51; HOIST-NEXT: br label [[FOR_COND:%.*]] 52; HOIST: for.cond: 53; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] 54; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] 55; HOIST-NEXT: tail call void @f0() 56; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 57; HOIST: for.cond.cleanup: 58; HOIST-NEXT: tail call void @f2() 59; HOIST-NEXT: br label [[RETURN]] 60; HOIST: for.body: 61; HOIST-NEXT: tail call void @f1() 62; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 63; HOIST-NEXT: br label [[FOR_COND]] 64; HOIST: return: 65; HOIST-NEXT: ret void 66; 67; ROTATE-LABEL: @_Z4loopi( 68; ROTATE-NEXT: entry: 69; ROTATE-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 70; ROTATE-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] 71; ROTATE: for.cond.preheader: 72; ROTATE-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 73; ROTATE-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] 74; ROTATE: for.body.preheader: 75; ROTATE-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -2 76; ROTATE-NEXT: br label [[FOR_BODY:%.*]] 77; ROTATE: for.cond.cleanup: 78; ROTATE-NEXT: tail call void @f0() 79; ROTATE-NEXT: tail call void @f2() 80; ROTATE-NEXT: br label [[RETURN]] 81; ROTATE: for.body: 82; ROTATE-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 83; ROTATE-NEXT: tail call void @f0() 84; ROTATE-NEXT: tail call void @f1() 85; ROTATE-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 86; ROTATE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_04]], [[TMP0]] 87; ROTATE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] 88; ROTATE: return: 89; ROTATE-NEXT: ret void 90; 91entry: 92 %width.addr = alloca i32, align 4 93 %i = alloca i32, align 4 94 store i32 %width, ptr %width.addr, align 4 95 %i1 = load i32, ptr %width.addr, align 4 96 %cmp = icmp slt i32 %i1, 1 97 br i1 %cmp, label %if.then, label %if.end 98 99if.then: 100 br label %return 101 102if.end: 103 call void @llvm.lifetime.start.p0(i64 4, ptr %i) 104 store i32 0, ptr %i, align 4 105 br label %for.cond 106 107for.cond: 108 %i3 = load i32, ptr %i, align 4 109 %i4 = load i32, ptr %width.addr, align 4 110 %sub = sub nsw i32 %i4, 1 111 %cmp1 = icmp slt i32 %i3, %sub 112 br i1 %cmp1, label %for.body, label %for.cond.cleanup 113 114for.cond.cleanup: 115 call void @llvm.lifetime.end.p0(i64 4, ptr %i) 116 br label %for.end 117 118for.body: 119 call void @f0() 120 call void @f1() 121 br label %for.inc 122 123for.inc: 124 %i6 = load i32, ptr %i, align 4 125 %inc = add nsw i32 %i6, 1 126 store i32 %inc, ptr %i, align 4 127 br label %for.cond 128 129for.end: 130 call void @f0() 131 call void @f2() 132 br label %return 133 134return: 135 ret void 136} 137