xref: /llvm-project/llvm/test/Transforms/LoopVectorize/cast-induction.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck --check-prefix=VF4 %s
2; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S %s | FileCheck --check-prefix=IC2 %s
3
4; rdar://problem/12848162
5
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7
8@a = common global [2048 x i32] zeroinitializer, align 16
9
10define void @example12() {
11; VF4-LABEL: @example12(
12; VF4-LABEL: vector.body:
13; VF4: [[VEC_IND:%.+]] = phi <4 x i32>
14; VF4: store <4 x i32> [[VEC_IND]]
15; VF4: middle.block:
16;
17; IC2-LABEL: @example12(
18; IC2-LABEL: vector.body:
19; IC2-NEXT:   [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
20; IC2:        [[TRUNC:%.+]] = trunc i64 [[INDEX]] to i32
21; IC2-NEXT:   [[TRUNC0:%.+]] = add i32 [[TRUNC]], 0
22; IC2-NEXT:   [[TRUNC1:%.+]] = add i32 [[TRUNC]], 1
23; IC2:        store i32 [[TRUNC0]],
24; IC2-NEXT:   store i32 [[TRUNC1]],
25;
26entry:
27  br label %loop
28
29loop:
30  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
31  %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv
32  %iv.trunc = trunc i64 %iv to i32
33  store i32 %iv.trunc, ptr %gep, align 4
34  %iv.next = add i64 %iv, 1
35  %iv.next.trunc = trunc i64 %iv.next to i32
36  %exitcond = icmp eq i32 %iv.next.trunc, 1024
37  br i1 %exitcond, label %exit, label %loop
38
39exit:
40  ret void
41}
42
43define void @redundant_iv_cast(ptr %dst) {
44; VF4-LABEL: @redundant_iv_cast
45; VF4: vector.body:
46; VF4:   [[VEC_IND:%.+]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.+]], %vector.body ]
47; VF4:  store <4 x i16> [[VEC_IND]]
48; VF4:  [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
49;
50; IC2-LABEL: @redundant_iv_cast
51; IC2:      vector.body:
52; IC2-NEXT:  [[CAN_IV:%.+]] = phi i32 [ 0, %vector.ph ], [ [[CAN_IV_NEXT:%.+]], %vector.body ]
53; IC2-NEXT:  [[OFFSET_IDX:%.+]] = trunc i32 [[CAN_IV]] to i16
54; IC2-NEXT:  [[P0:%.+]] = add i16 [[OFFSET_IDX]], 0
55; IC2-NEXT:  [[P1:%.+]] = add i16 [[OFFSET_IDX]], 1
56; IC2:       store i16 [[P0]]
57; IC2-NEXT:  store i16 [[P1]]
58;
59entry:
60  br label %loop
61
62loop:
63  %j.0 = phi i16 [ 0, %entry ], [ %inc, %loop ]
64  %ext = zext i16 %j.0 to i32
65  %trunc = trunc i32 %ext to i16
66  %gep = getelementptr inbounds i16, ptr %dst, i16 %j.0
67  store i16 %trunc, ptr %gep
68  %0 = icmp eq i16 10000, %j.0
69  %inc = add i16 %j.0, 1
70  br i1 %0, label %exit, label %loop
71
72
73exit:
74  ret void
75}
76
77
78define void @cast_variable_step(i64 %step) {
79; VF4-LABEL: @cast_variable_step(
80; VF4-LABEL: vector.body:
81; VF4: [[VEC_IND:%.+]] = phi <4 x i32>
82; VF4: store <4 x i32> [[VEC_IND]]
83; VF4: middle.block:
84;
85; IC2-LABEL: @cast_variable_step(
86; IC2:   [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
87; IC2:   br label %vector.body
88
89; IC2-LABEL: vector.body:
90; IC2-NEXT:   [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
91; IC2:        [[MUL:%.+]] = mul i64 %index, %step
92; IC2-NEXT:   [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
93; IC2-NEXT:   [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
94; IC2-NEXT:   [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]
95; IC2-NEXT:   [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]]
96; IC2-NEXT:   [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]
97; IC2-NEXT:   [[T1:%.+]] = add i32 [[TRUNC_OFF]], [[STEP1]]
98; IC2:        store i32 [[T0]],
99; IC2-NEXT:   store i32 [[T1]],
100;
101entry:
102  br label %loop
103
104loop:
105  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
106  %iv.2 = phi i64 [ 10, %entry ], [ %iv.2.next, %loop ]
107  %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv
108  %iv.2.trunc = trunc i64 %iv.2 to i32
109  store i32 %iv.2.trunc, ptr %gep, align 4
110  %iv.next = add i64 %iv, 1
111  %iv.2.next = add i64 %iv.2, %step
112  %exitcond = icmp eq i64 %iv.next, 1024
113  br i1 %exitcond, label %exit, label %loop
114
115exit:
116  ret void
117}
118
119define void @cast_induction_tail_folding(ptr %A) {
120; VF4-LABEL: @cast_induction_tail_folding(
121; VF4:       [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ]
122; VF4-NEXT:  [[VEC_IND:%.+]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ]
123; VF4-NEXT:  = icmp ule <4 x i32> [[VEC_IND]], splat (i32 2)
124; VF4-NEXT:  = sext <4 x i32> [[VEC_IND]] to <4 x i64>
125
126; IC2-LABEL: @cast_induction_tail_folding(
127; IC2:      [[INDEX:%.+]] = phi i32 [ 0, %vector.ph ]
128; IC2-NEXT: [[INDEX0:%.+]] = add i32 [[INDEX]], 0
129; IC2-NEXT: [[INDEX1:%.+]] = add i32 [[INDEX]], 1
130; IC2-NEXT: = icmp ule i32 [[INDEX0]], 2
131; IC2-NEXT: = icmp ule i32 [[INDEX1]], 2
132;
133entry:
134  br label %loop
135
136loop:
137  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
138  %iv.ext = sext i32 %iv to i64
139  %iv.trunc  = trunc i64 %iv.ext to i32
140  %gep = getelementptr inbounds i32, ptr %A, i64 %iv.ext
141  store i32 %iv.trunc, ptr %gep
142  %iv.next = add i32 %iv, 1
143  %c = icmp slt i32 %iv.next, 3
144  br i1 %c, label %loop, label %exit
145
146exit:
147  ret void
148}
149