1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name unnamed --version 5 2; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s 3 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" 5 6@f = common global i32 0, align 4 7@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 8@c = common global i32 0, align 4 9@a = common global i32 0, align 4 10@b = common global i32 0, align 4 11@e = common global i32 0, align 4 12 13; It has a value that is used outside of the loop 14; and is not a recognized reduction variable "tmp17". 15; However, tmp17 is a non-header phi which is an allowed exit. 16 17 18define i32 @test1() { 19; CHECK-LABEL: define i32 @test1() { 20; CHECK-NEXT: [[BB:.*]]: 21; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 22; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1 23; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) 24; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 25; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 26; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 27; CHECK: [[VECTOR_PH]]: 28; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 29; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 30; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 31; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 32; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 33; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 34; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 35; CHECK: [[VECTOR_BODY]]: 36; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 37; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 38; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) 39; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer 40; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 41; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 42; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 43; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 44; CHECK: [[MIDDLE_BLOCK]]: 45; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 46; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 47; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 48; CHECK: [[SCALAR_PH]]: 49; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] 50; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 51; CHECK: [[_LR_PH_I:.*:]] 52; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 53; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 54; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 55; CHECK: [[UNNAMEDBB10]]: 56; CHECK-NEXT: br label %[[BB16]] 57; CHECK: [[BB16]]: 58; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ] 59; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1 60; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4 61; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] 62; CHECK: [[F1_EXIT_LOOPEXIT]]: 63; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] 64; CHECK-NEXT: ret i32 [[DOTLCSSA]] 65; 66bb: 67 %b.promoted = load i32, ptr @b, align 4 68 br label %.lr.ph.i 69 70.lr.ph.i: 71 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] 72 %tmp2 = icmp sgt i32 %tmp8, 10 73 br i1 %tmp2, label %bb16, label %bb10 74 75bb10: 76 br label %bb16 77 78bb16: 79 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] 80 %tmp18 = add nsw i32 %tmp8, 1 81 %tmp19 = icmp slt i32 %tmp18, 4 82 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 83 84f1.exit.loopexit: 85 %.lcssa = phi i32 [ %tmp17, %bb16 ] 86 ret i32 %.lcssa 87} 88 89; non-hdr phi depends on header phi. 90 91define i32 @test2() { 92; CHECK-LABEL: define i32 @test2() { 93; CHECK-NEXT: [[BB:.*]]: 94; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 95; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1 96; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) 97; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 98; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 99; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 100; CHECK: [[VECTOR_PH]]: 101; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 102; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 103; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 104; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 105; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 106; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 107; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 108; CHECK: [[VECTOR_BODY]]: 109; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 110; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 111; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) 112; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[VEC_IND]] 113; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 114; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 115; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 116; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 117; CHECK: [[MIDDLE_BLOCK]]: 118; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 119; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 120; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 121; CHECK: [[SCALAR_PH]]: 122; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] 123; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 124; CHECK: [[_LR_PH_I:.*:]] 125; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 126; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 127; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 128; CHECK: [[UNNAMEDBB10]]: 129; CHECK-NEXT: br label %[[BB16]] 130; CHECK: [[BB16]]: 131; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[UNNAMEDTMP8]], %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ] 132; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1 133; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4 134; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] 135; CHECK: [[F1_EXIT_LOOPEXIT]]: 136; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] 137; CHECK-NEXT: ret i32 [[DOTLCSSA]] 138; 139bb: 140 %b.promoted = load i32, ptr @b, align 4 141 br label %.lr.ph.i 142 143.lr.ph.i: 144 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] 145 %tmp2 = icmp sgt i32 %tmp8, 10 146 br i1 %tmp2, label %bb16, label %bb10 147 148bb10: 149 br label %bb16 150 151bb16: 152 %tmp17 = phi i32 [ %tmp8, %bb10 ], [ 1, %.lr.ph.i ] 153 %tmp18 = add nsw i32 %tmp8, 1 154 %tmp19 = icmp slt i32 %tmp18, 4 155 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 156 157f1.exit.loopexit: 158 %.lcssa = phi i32 [ %tmp17, %bb16 ] 159 ret i32 %.lcssa 160} 161 162; more than 2 incoming values for tmp17 phi that is used outside loop. 163define i32 @test3(i32 %N) { 164; CHECK-LABEL: define i32 @test3( 165; CHECK-SAME: i32 [[N:%.*]]) { 166; CHECK-NEXT: [[BB:.*]]: 167; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 168; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1 169; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) 170; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 171; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 172; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 173; CHECK: [[VECTOR_PH]]: 174; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 175; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 176; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 177; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 178; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 179; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 180; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i64 0 181; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 182; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 183; CHECK: [[VECTOR_BODY]]: 184; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 185; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 186; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) 187; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) 188; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 189; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer 190; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer 191; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]] 192; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 193; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 194; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 195; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 196; CHECK: [[MIDDLE_BLOCK]]: 197; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 198; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 199; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 200; CHECK: [[SCALAR_PH]]: 201; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] 202; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 203; CHECK: [[_LR_PH_I:.*:]] 204; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 205; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 206; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 207; CHECK: [[UNNAMEDBB10]]: 208; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], [[N]] 209; CHECK-NEXT: br i1 [[CMP]], label %[[UNNAMEDBB12:.*]], label %[[BB16]] 210; CHECK: [[UNNAMEDBB12]]: 211; CHECK-NEXT: br label %[[BB16]] 212; CHECK: [[BB16]]: 213; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ], [ 2, %[[UNNAMEDBB12]] ] 214; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1 215; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4 216; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] 217; CHECK: [[F1_EXIT_LOOPEXIT]]: 218; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] 219; CHECK-NEXT: ret i32 [[DOTLCSSA]] 220; 221bb: 222 %b.promoted = load i32, ptr @b, align 4 223 br label %.lr.ph.i 224 225.lr.ph.i: 226 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] 227 %tmp2 = icmp sgt i32 %tmp8, 10 228 br i1 %tmp2, label %bb16, label %bb10 229 230bb10: 231 %cmp = icmp sgt i32 %tmp8, %N 232 br i1 %cmp, label %bb12, label %bb16 233 234bb12: 235 br label %bb16 236 237bb16: 238 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ], [ 2, %bb12 ] 239 %tmp18 = add nsw i32 %tmp8, 1 240 %tmp19 = icmp slt i32 %tmp18, 4 241 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 242 243f1.exit.loopexit: 244 %.lcssa = phi i32 [ %tmp17, %bb16 ] 245 ret i32 %.lcssa 246} 247 248; more than one incoming value for outside user: %.lcssa 249define i32 @test4(i32 %N) { 250; CHECK-LABEL: define i32 @test4( 251; CHECK-SAME: i32 [[N:%.*]]) { 252; CHECK-NEXT: [[BB:.*]]: 253; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 254; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32 [[B_PROMOTED]], [[N]] 255; CHECK-NEXT: br i1 [[ICMP]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[DOTLR_PH_I_PREHEADER:.*]] 256; CHECK: [[_LR_PH_I_PREHEADER:.*:]] 257; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1 258; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) 259; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 260; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 261; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 262; CHECK: [[VECTOR_PH]]: 263; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 264; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 265; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 266; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 267; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 268; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 269; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 270; CHECK: [[VECTOR_BODY]]: 271; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 272; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 273; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) 274; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer 275; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 276; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 277; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 278; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 279; CHECK: [[MIDDLE_BLOCK]]: 280; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 281; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 282; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 283; CHECK: [[SCALAR_PH]]: 284; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] 285; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 286; CHECK: [[_LR_PH_I:.*:]] 287; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 288; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 289; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 290; CHECK: [[UNNAMEDBB10]]: 291; CHECK-NEXT: br label %[[BB16]] 292; CHECK: [[BB16]]: 293; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ] 294; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1 295; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4 296; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] 297; CHECK: [[F1_EXIT_LOOPEXIT_LOOPEXIT]]: 298; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] 299; CHECK-NEXT: br label %[[F1_EXIT_LOOPEXIT]] 300; CHECK: [[F1_EXIT_LOOPEXIT]]: 301; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ 2, %[[BB]] ], [ [[TMP17_LCSSA]], %[[F1_EXIT_LOOPEXIT_LOOPEXIT]] ] 302; CHECK-NEXT: ret i32 [[DOTLCSSA]] 303; 304bb: 305 %b.promoted = load i32, ptr @b, align 4 306 %icmp = icmp slt i32 %b.promoted, %N 307 br i1 %icmp, label %f1.exit.loopexit, label %.lr.ph.i 308 309.lr.ph.i: 310 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] 311 %tmp2 = icmp sgt i32 %tmp8, 10 312 br i1 %tmp2, label %bb16, label %bb10 313 314bb10: 315 br label %bb16 316 317bb16: 318 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] 319 %tmp18 = add nsw i32 %tmp8, 1 320 %tmp19 = icmp slt i32 %tmp18, 4 321 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 322 323f1.exit.loopexit: 324 %.lcssa = phi i32 [ %tmp17, %bb16 ], [ 2, %bb ] 325 ret i32 %.lcssa 326} 327 328; non hdr phi that depends on reduction and is used outside the loop. 329; reduction phis are only allowed to have bump or reduction operations as the inside user, so we should 330; not vectorize this. 331define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp { 332; CHECK-LABEL: define i32 @reduction_sum( 333; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] { 334; CHECK-NEXT: [[ENTRY:.*]]: 335; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[N]], 0 336; CHECK-NEXT: br i1 [[C1]], label %[[HEADER_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] 337; CHECK: [[HEADER_PREHEADER]]: 338; CHECK-NEXT: br label %[[HEADER:.*]] 339; CHECK: [[HEADER]]: 340; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB16:.*]] ], [ 0, %[[HEADER_PREHEADER]] ] 341; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[C9:%.*]], %[[BB16]] ], [ 0, %[[HEADER_PREHEADER]] ] 342; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 343; CHECK-NEXT: [[C3:%.*]] = load i32, ptr [[C2]], align 4 344; CHECK-NEXT: [[C4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 345; CHECK-NEXT: [[C5:%.*]] = load i32, ptr [[C4]], align 4 346; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[SUM_02]], 10 347; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 348; CHECK: [[UNNAMEDBB10]]: 349; CHECK-NEXT: br label %[[BB16]] 350; CHECK: [[BB16]]: 351; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ [[SUM_02]], %[[UNNAMEDBB10]] ], [ 1, %[[HEADER]] ] 352; CHECK-NEXT: [[C6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 353; CHECK-NEXT: [[C7:%.*]] = add i32 [[SUM_02]], [[C6]] 354; CHECK-NEXT: [[C8:%.*]] = add i32 [[C7]], [[C3]] 355; CHECK-NEXT: [[C9]] = add i32 [[C8]], [[C5]] 356; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 357; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 358; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 359; CHECK-NEXT: br i1 [[EXITCOND]], label %[[DOT_CRIT_EDGE_LOOPEXIT:.*]], label %[[HEADER]] 360; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]] 361; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ] 362; CHECK-NEXT: [[C9_LCSSA:%.*]] = phi i32 [ [[C9]], %[[BB16]] ] 363; CHECK-NEXT: br [[DOT_CRIT_EDGE]] 364; CHECK: [[__CRIT_EDGE:.*:]] 365; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[C9_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] 366; CHECK-NEXT: [[NONHDR_LCSSA:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[DOT_CRIT_EDGE_LOOPEXIT]] ] 367; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 368; 369entry: 370 %c1 = icmp sgt i32 %n, 0 371 br i1 %c1, label %header, label %._crit_edge 372 373header: ; preds = %0, %.lr.ph 374 %indvars.iv = phi i64 [ %indvars.iv.next, %bb16 ], [ 0, %entry ] 375 %sum.02 = phi i32 [ %c9, %bb16 ], [ 0, %entry ] 376 %c2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 377 %c3 = load i32, ptr %c2, align 4 378 %c4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 379 %c5 = load i32, ptr %c4, align 4 380 %tmp2 = icmp sgt i32 %sum.02, 10 381 br i1 %tmp2, label %bb16, label %bb10 382 383bb10: 384 br label %bb16 385 386bb16: 387 %tmp17 = phi i32 [ %sum.02, %bb10 ], [ 1, %header ] 388 %c6 = trunc i64 %indvars.iv to i32 389 %c7 = add i32 %sum.02, %c6 390 %c8 = add i32 %c7, %c3 391 %c9 = add i32 %c8, %c5 392 %indvars.iv.next = add i64 %indvars.iv, 1 393 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 394 %exitcond = icmp eq i32 %lftr.wideiv, %n 395 br i1 %exitcond, label %._crit_edge, label %header 396 397._crit_edge: ; preds = %.lr.ph, %0 398 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %c9, %bb16 ] 399 %nonhdr.lcssa = phi i32 [ 1, %entry], [ %tmp17, %bb16 ] 400 ret i32 %sum.0.lcssa 401} 402 403; invalid cyclic dependency with header phi iv, which prevents iv from being 404; recognized as induction var. 405; cannot vectorize. 406define i32 @cyclic_dep_with_indvar() { 407; CHECK-LABEL: define i32 @cyclic_dep_with_indvar() { 408; CHECK-NEXT: [[BB:.*]]: 409; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 410; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 411; CHECK: [[_LR_PH_I:.*:]] 412; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[BB16:.*]] ], [ [[B_PROMOTED]], %[[BB]] ] 413; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[IV]], 10 414; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 415; CHECK: [[UNNAMEDBB10]]: 416; CHECK-NEXT: br label %[[BB16]] 417; CHECK: [[BB16]]: 418; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ [[IV]], %[[DOTLR_PH_I]] ] 419; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[UNNAMEDTMP17]], 1 420; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], 4 421; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT:.*]] 422; CHECK: [[F1_EXIT_LOOPEXIT]]: 423; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[BB16]] ] 424; CHECK-NEXT: ret i32 [[DOTLCSSA]] 425; 426bb: 427 %b.promoted = load i32, ptr @b, align 4 428 br label %.lr.ph.i 429 430.lr.ph.i: 431 %iv = phi i32 [ %ivnext, %bb16 ], [ %b.promoted, %bb ] 432 %tmp2 = icmp sgt i32 %iv, 10 433 br i1 %tmp2, label %bb16, label %bb10 434 435bb10: 436 br label %bb16 437 438bb16: 439 %tmp17 = phi i32 [ 0, %bb10 ], [ %iv, %.lr.ph.i ] 440 %ivnext = add nsw i32 %tmp17, 1 441 %tmp19 = icmp slt i32 %ivnext, 4 442 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 443 444f1.exit.loopexit: 445 %.lcssa = phi i32 [ %tmp17, %bb16 ] 446 ret i32 %.lcssa 447} 448 449; non-reduction phi 'tmp17' used outside loop has cyclic dependence with %x.05 phi 450; cannot vectorize. 451define i32 @not_valid_reduction(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly { 452; CHECK-LABEL: define i32 @not_valid_reduction( 453; CHECK-SAME: i32 [[N:%.*]], ptr noalias captures(none) [[A:%.*]]) #[[ATTR1:[0-9]+]] { 454; CHECK-NEXT: [[ENTRY:.*]]: 455; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0 456; CHECK-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 457; CHECK: [[FOR_BODY_PREHEADER]]: 458; CHECK-NEXT: br label %[[FOR_BODY:.*]] 459; CHECK: [[FOR_BODY]]: 460; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 461; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[UNNAMEDTMP17:%.*]], %[[LATCH]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 462; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 463; CHECK-NEXT: [[UNNAMEDTMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 464; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 10 465; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[UNNAMEDTMP0]] 466; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[UNNAMEDBB16:.*]], label %[[UNNAMEDBB10:.*]] 467; CHECK: [[UNNAMEDBB10]]: 468; CHECK-NEXT: br label %[[UNNAMEDBB16]] 469; CHECK: [[UNNAMEDBB16]]: 470; CHECK-NEXT: [[UNNAMEDTMP17]] = phi i32 [ 1, %[[UNNAMEDBB10]] ], [ [[SUB]], %[[FOR_BODY]] ] 471; CHECK-NEXT: br label %[[LATCH]] 472; CHECK: [[LATCH]]: 473; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 474; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 475; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 476; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 477; CHECK: [[FOR_END_LOOPEXIT]]: 478; CHECK-NEXT: [[TMP17_LCSSA:%.*]] = phi i32 [ [[UNNAMEDTMP17]], %[[LATCH]] ] 479; CHECK-NEXT: br label %[[FOR_END]] 480; CHECK: [[FOR_END]]: 481; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP17_LCSSA]], %[[FOR_END_LOOPEXIT]] ] 482; CHECK-NEXT: ret i32 [[X_0_LCSSA]] 483; 484entry: 485 %cmp4 = icmp sgt i32 %n, 0 486 br i1 %cmp4, label %for.body, label %for.end 487 488for.body: ; preds = %entry, %for.body 489 %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ] 490 %x.05 = phi i32 [ %tmp17, %latch ], [ 0, %entry ] 491 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 492 %tmp0 = load i32, ptr %arrayidx, align 4 493 %tmp2 = icmp sgt i64 %indvars.iv, 10 494 %sub = sub nsw i32 %x.05, %tmp0 495 br i1 %tmp2, label %bb16, label %bb10 496 497bb10: 498 br label %bb16 499 500bb16: 501 %tmp17 = phi i32 [ 1, %bb10 ], [ %sub, %for.body ] 502 br label %latch 503 504latch: 505 %indvars.iv.next = add i64 %indvars.iv, 1 506 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 507 %exitcond = icmp eq i32 %lftr.wideiv, %n 508 br i1 %exitcond, label %for.end, label %for.body 509 510for.end: ; preds = %for.body, %entry 511 %x.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17 , %latch ] 512 ret i32 %x.0.lcssa 513} 514 515define i8 @outside_user_non_phi() { 516; CHECK-LABEL: define i8 @outside_user_non_phi() { 517; CHECK-NEXT: [[BB:.*]]: 518; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 519; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B_PROMOTED]], 1 520; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) 521; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 522; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 523; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 524; CHECK: [[VECTOR_PH]]: 525; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 526; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 527; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 528; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[B_PROMOTED]], i64 0 529; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 530; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 531; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 532; CHECK: [[VECTOR_BODY]]: 533; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 534; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 535; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) 536; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer 537; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[PREDPHI]] to <2 x i8> 538; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 539; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 540; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 541; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 542; CHECK: [[MIDDLE_BLOCK]]: 543; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1 544; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 545; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 546; CHECK: [[SCALAR_PH]]: 547; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] 548; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 549; CHECK: [[_LR_PH_I:.*:]] 550; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 551; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 552; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] 553; CHECK: [[UNNAMEDBB10]]: 554; CHECK-NEXT: br label %[[BB16]] 555; CHECK: [[BB16]]: 556; CHECK-NEXT: [[UNNAMEDTMP17:%.*]] = phi i32 [ 0, %[[UNNAMEDBB10]] ], [ 1, %[[DOTLR_PH_I]] ] 557; CHECK-NEXT: [[TMP17_TRUNC:%.*]] = trunc i32 [[UNNAMEDTMP17]] to i8 558; CHECK-NEXT: [[UNNAMEDTMP18]] = add nsw i32 [[UNNAMEDTMP8]], 1 559; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[UNNAMEDTMP18]], 4 560; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]] 561; CHECK: [[F1_EXIT_LOOPEXIT]]: 562; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i8 [ [[TMP17_TRUNC]], %[[BB16]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] 563; CHECK-NEXT: ret i8 [[DOTLCSSA]] 564; 565bb: 566 %b.promoted = load i32, ptr @b, align 4 567 br label %.lr.ph.i 568 569.lr.ph.i: 570 %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ] 571 %tmp2 = icmp sgt i32 %tmp8, 10 572 br i1 %tmp2, label %bb16, label %bb10 573 574bb10: 575 br label %bb16 576 577bb16: 578 %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ] 579 %tmp17.trunc = trunc i32 %tmp17 to i8 580 %tmp18 = add nsw i32 %tmp8, 1 581 %tmp19 = icmp slt i32 %tmp18, 4 582 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 583 584f1.exit.loopexit: 585 %.lcssa = phi i8 [ %tmp17.trunc, %bb16 ] 586 ret i8 %.lcssa 587} 588 589define i32 @no_vectorize_reduction_with_outside_use(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly { 590; CHECK-LABEL: define i32 @no_vectorize_reduction_with_outside_use( 591; CHECK-SAME: i32 [[N:%.*]], ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) #[[ATTR1]] { 592; CHECK-NEXT: [[ENTRY:.*]]: 593; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0 594; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] 595; CHECK: [[FOR_BODY_PREHEADER]]: 596; CHECK-NEXT: br label %[[FOR_BODY:.*]] 597; CHECK: [[FOR_BODY]]: 598; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 599; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 600; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 601; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 602; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 603; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 604; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] 605; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] 606; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 607; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 608; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 609; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] 610; CHECK: [[FOR_END_LOOPEXIT]]: 611; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP1]], %[[FOR_BODY]] ] 612; CHECK-NEXT: br label %[[FOR_END]] 613; CHECK: [[FOR_END]]: 614; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DOTLCSSA]], %[[FOR_END_LOOPEXIT]] ] 615; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 616; 617entry: 618 %cmp7 = icmp sgt i32 %n, 0 619 br i1 %cmp7, label %for.body, label %for.end 620 621for.body: ; preds = %entry, %for.body 622 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 623 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 624 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 625 %0 = load i32, ptr %arrayidx, align 4 626 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 627 %1 = load i32, ptr %arrayidx2, align 4 628 %add = add nsw i32 %1, %0 629 %or = or i32 %add, %result.08 630 %indvars.iv.next = add i64 %indvars.iv, 1 631 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 632 %exitcond = icmp eq i32 %lftr.wideiv, %n 633 br i1 %exitcond, label %for.end, label %for.body 634 635for.end: ; preds = %for.body, %entry 636 %result.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.body ] 637 ret i32 %result.0.lcssa 638} 639 640; vectorize c[i] = a[i] + b[i] loop where result of c[i] is used outside the 641; loop 642define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { 643; CHECK-LABEL: define i32 @sum_arrays_outside_use( 644; CHECK-SAME: ptr [[B:%.*]], ptr [[A:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { 645; CHECK-NEXT: [[BB:.*]]: 646; CHECK-NEXT: [[A3:%.*]] = ptrtoint ptr [[A]] to i32 647; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B]] to i32 648; CHECK-NEXT: [[C1:%.*]] = ptrtoint ptr [[C]] to i32 649; CHECK-NEXT: [[B_PROMOTED:%.*]] = load i32, ptr @b, align 4 650; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B_PROMOTED]], 1 651; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) 652; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] 653; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 654; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] 655; CHECK: [[VECTOR_MEMCHECK]]: 656; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] 657; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 658; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] 659; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 660; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] 661; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] 662; CHECK: [[VECTOR_PH]]: 663; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 664; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 665; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[B_PROMOTED]], [[N_VEC]] 666; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 667; CHECK: [[VECTOR_BODY]]: 668; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 669; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[B_PROMOTED]], [[INDEX]] 670; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0 671; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 672; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]] 673; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 674; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 675; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] 676; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 677; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4 678; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD5]] 679; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]] 680; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 681; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP13]], align 4 682; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 683; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 684; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 685; CHECK: [[MIDDLE_BLOCK]]: 686; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 687; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 688; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 689; CHECK: [[SCALAR_PH]]: 690; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ], [ [[B_PROMOTED]], %[[BB]] ] 691; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] 692; CHECK: [[_LR_PH_I:.*:]] 693; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] 694; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 695; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 696; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 697; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 698; CHECK-NEXT: [[ALOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 699; CHECK-NEXT: [[SUM:%.*]] = add nsw i32 [[BLOAD]], [[ALOAD]] 700; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] 701; CHECK-NEXT: store i32 [[SUM]], ptr [[ARRAYIDX3]], align 4 702; CHECK-NEXT: [[IVNEXT]] = add nsw i32 [[IV]], 1 703; CHECK-NEXT: [[UNNAMEDTMP19:%.*]] = icmp slt i32 [[IVNEXT]], [[N]] 704; CHECK-NEXT: br i1 [[UNNAMEDTMP19]], label %[[DOTLR_PH_I]], label %[[F1_EXIT_LOOPEXIT]], !llvm.loop [[LOOP13:![0-9]+]] 705; CHECK: [[F1_EXIT_LOOPEXIT]]: 706; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[SUM]], %[[DOTLR_PH_I]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ] 707; CHECK-NEXT: ret i32 [[DOTLCSSA]] 708; 709bb: 710 %b.promoted = load i32, ptr @b, align 4 711 br label %.lr.ph.i 712 713.lr.ph.i: 714 %iv = phi i32 [ %ivnext, %.lr.ph.i ], [ %b.promoted, %bb ] 715 %indvars.iv = sext i32 %iv to i64 716 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 717 %Bload = load i32, ptr %arrayidx2, align 4 718 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 719 %Aload = load i32, ptr %arrayidx, align 4 720 %sum = add nsw i32 %Bload, %Aload 721 %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv 722 store i32 %sum, ptr %arrayidx3, align 4 723 %ivnext = add nsw i32 %iv, 1 724 %tmp19 = icmp slt i32 %ivnext, %N 725 br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit 726 727f1.exit.loopexit: 728 %.lcssa = phi i32 [ %sum, %.lr.ph.i ] 729 ret i32 %.lcssa 730} 731 732@tab = common global [32 x i8] zeroinitializer, align 1 733 734define i32 @non_uniform_live_out() { 735; CHECK-LABEL: define i32 @non_uniform_live_out() { 736; CHECK-NEXT: [[ENTRY:.*]]: 737; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 738; CHECK: [[VECTOR_PH]]: 739; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 740; CHECK: [[VECTOR_BODY]]: 741; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 742; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 743; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 7) 744; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 745; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP1]] 746; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 747; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 748; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i8> [[WIDE_LOAD]], splat (i8 1) 749; CHECK-NEXT: store <2 x i8> [[TMP4]], ptr [[TMP3]], align 1 750; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 751; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 752; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 753; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 754; CHECK: [[MIDDLE_BLOCK]]: 755; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 756; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] 757; CHECK: [[SCALAR_PH]]: 758; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 759; CHECK-NEXT: br label %[[FOR_BODY:.*]] 760; CHECK: [[FOR_BODY]]: 761; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 762; CHECK-NEXT: [[I_09:%.*]] = add i32 [[I_08]], 7 763; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_09]] 764; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 765; CHECK-NEXT: [[BUMP:%.*]] = add i8 [[TMP7]], 1 766; CHECK-NEXT: store i8 [[BUMP]], ptr [[ARRAYIDX]], align 1 767; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 768; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 20000 769; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 770; CHECK: [[FOR_END]]: 771; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ [[I_09]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] 772; CHECK-NEXT: [[ARRAYIDX_OUT:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[LCSSA]] 773; CHECK-NEXT: store i8 42, ptr [[ARRAYIDX_OUT]], align 1 774; CHECK-NEXT: ret i32 0 775; 776entry: 777 br label %for.body 778 779for.body: ; preds = %for.body, %entry 780 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 781 %i.09 = add i32 %i.08, 7 782 %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.09 783 %0 = load i8, ptr %arrayidx, align 1 784 %bump = add i8 %0, 1 785 store i8 %bump, ptr %arrayidx, align 1 786 %inc = add nsw i32 %i.08, 1 787 %exitcond = icmp eq i32 %i.08, 20000 788 br i1 %exitcond, label %for.end, label %for.body 789 790for.end: ; preds = %for.body 791 %lcssa = phi i32 [%i.09, %for.body] 792 %arrayidx.out = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %lcssa 793 store i8 42, ptr %arrayidx.out, align 1 794 ret i32 0 795} 796;. 797; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 798; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 799; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 800; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 801; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 802; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 803; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 804; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 805; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} 806; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} 807; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} 808; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} 809; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} 810; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} 811; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} 812; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} 813;. 814