1; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4 %s 2; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S %s | FileCheck --check-prefix=IC2 %s 3 4; rdar://problem/12848162 5 6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 7 8@a = common global [2048 x i32] zeroinitializer, align 16 9 10define void @example12() { 11; VF4-LABEL: @example12( 12; VF4-LABEL: vector.body: 13; VF4: [[VEC_IND:%.+]] = phi <4 x i32> 14; VF4: store <4 x i32> [[VEC_IND]] 15; VF4: middle.block: 16; 17; IC2-LABEL: @example12( 18; IC2-LABEL: vector.body: 19; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] 20; IC2: [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32 21; IC2-NEXT: [[TRUNC0:%.+]] = add i32 [[TRUNC]], 0 22; IC2-NEXT: [[TRUNC1:%.+]] = add i32 [[TRUNC]], 1 23; IC2: store i32 [[TRUNC0]], 24; IC2-NEXT: store i32 [[TRUNC1]], 25; 26entry: 27 br label %loop 28 29loop: 30 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 31 %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv 32 %iv.trunc = trunc i64 %iv to i32 33 store i32 %iv.trunc, ptr %gep, align 4 34 %iv.next = add i64 %iv, 1 35 %iv.next.trunc = trunc i64 %iv.next to i32 36 %exitcond = icmp eq i32 %iv.next.trunc, 1024 37 br i1 %exitcond, label %exit, label %loop 38 39exit: 40 ret void 41} 42 43define void @redundant_iv_cast(ptr %dst) { 44; VF4-LABEL: @redundant_iv_cast 45; VF4: vector.body: 46; VF4: [[VEC_IND:%.+]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ] 47; VF4: store <4 x i16> [[VEC_IND]] 48; VF4: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) 49; 50; IC2-LABEL: @redundant_iv_cast 51; IC2: vector.body: 52; IC2-NEXT: [[CAN_IV:%.+]] = phi i32 [ 0, %vector.ph ], [ [[CAN_IV_NEXT:%.+]], %vector.body ] 53; IC2-NEXT: [[OFFSET_IDX:%.+]] = trunc i32 [[CAN_IV]] to i16 54; IC2-NEXT: [[P0:%.+]] = add i16 [[OFFSET_IDX]], 0 55; IC2-NEXT: [[P1:%.+]] = add i16 [[OFFSET_IDX]], 1 56; IC2: store i16 [[P0]] 57; IC2-NEXT: store i16 [[P1]] 58; 59entry: 60 br label %loop 61 62loop: 63 %j.0 = phi i16 [ 0, %entry ], [ %inc, %loop ] 64 %ext = zext i16 %j.0 to i32 65 %trunc = trunc i32 %ext to i16 66 %gep = getelementptr inbounds i16, ptr %dst, i16 %j.0 67 store i16 %trunc, ptr %gep 68 %0 = icmp eq i16 10000, %j.0 69 %inc = add i16 %j.0, 1 70 br i1 %0, label %exit, label %loop 71 72 73exit: 74 ret void 75} 76 77 78define void @cast_variable_step(i64 %step) { 79; VF4-LABEL: @cast_variable_step( 80; VF4-LABEL: vector.body: 81; VF4: [[VEC_IND:%.+]] = phi <4 x i32> 82; VF4: store <4 x i32> [[VEC_IND]] 83; VF4: middle.block: 84; 85; IC2-LABEL: @cast_variable_step( 86; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32 87; IC2: br label %vector.body 88 89; IC2-LABEL: vector.body: 90; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] 91; IC2: [[MUL:%.+]] = mul i64 %index, %step 92; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]] 93; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 94; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]] 95; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]] 96; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]] 97; IC2-NEXT: [[T1:%.+]] = add i32 [[TRUNC_OFF]], [[STEP1]] 98; IC2: store i32 [[T0]], 99; IC2-NEXT: store i32 [[T1]], 100; 101entry: 102 br label %loop 103 104loop: 105 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 106 %iv.2 = phi i64 [ 10, %entry ], [ %iv.2.next, %loop ] 107 %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv 108 %iv.2.trunc = trunc i64 %iv.2 to i32 109 store i32 %iv.2.trunc, ptr %gep, align 4 110 %iv.next = add i64 %iv, 1 111 %iv.2.next = add i64 %iv.2, %step 112 %exitcond = icmp eq i64 %iv.next, 1024 113 br i1 %exitcond, label %exit, label %loop 114 115exit: 116 ret void 117} 118 119define void @cast_induction_tail_folding(ptr %A) { 120; VF4-LABEL: @cast_induction_tail_folding( 121; VF4: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] 122; VF4-NEXT: [[VEC_IND:%.+]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ] 123; VF4-NEXT: = icmp ule <4 x i32> [[VEC_IND]], splat (i32 2) 124; VF4-NEXT: = sext <4 x i32> [[VEC_IND]] to <4 x i64> 125 126; IC2-LABEL: @cast_induction_tail_folding( 127; IC2: [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ] 128; IC2-NEXT: [[INDEX0:%.+]] = add i32 [[INDEX]], 0 129; IC2-NEXT: [[INDEX1:%.+]] = add i32 [[INDEX]], 1 130; IC2-NEXT: = icmp ule i32 [[INDEX0]], 2 131; IC2-NEXT: = icmp ule i32 [[INDEX1]], 2 132; 133entry: 134 br label %loop 135 136loop: 137 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] 138 %iv.ext = sext i32 %iv to i64 139 %iv.trunc = trunc i64 %iv.ext to i32 140 %gep = getelementptr inbounds i32, ptr %A, i64 %iv.ext 141 store i32 %iv.trunc, ptr %gep 142 %iv.next = add i32 %iv, 1 143 %c = icmp slt i32 %iv.next, 3 144 br i1 %c, label %loop, label %exit 145 146exit: 147 ret void 148} 149