xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/revec.ll (revision f71ea4bc1b01fd7e29048db82b3e21fba74e8dab)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
3
4define void @test1(ptr %a, ptr %b, ptr %c) {
5; CHECK-LABEL: @test1(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4
8; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4
9; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]]
10; CHECK-NEXT:    store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4
11; CHECK-NEXT:    ret void
12;
13entry:
14  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4
15  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8
16  %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12
17  %0 = load <4 x i32>, ptr %a, align 4
18  %1 = load <4 x i32>, ptr %arrayidx3, align 4
19  %2 = load <4 x i32>, ptr %arrayidx7, align 4
20  %3 = load <4 x i32>, ptr %arrayidx11, align 4
21  %arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4
22  %arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8
23  %arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12
24  %4 = load <4 x i32>, ptr %b, align 4
25  %5 = load <4 x i32>, ptr %arrayidx19, align 4
26  %6 = load <4 x i32>, ptr %arrayidx23, align 4
27  %7 = load <4 x i32>, ptr %arrayidx27, align 4
28  %add.i = add <4 x i32> %4, %0
29  %add.i63 = add <4 x i32> %5, %1
30  %add.i64 = add <4 x i32> %6, %2
31  %add.i65 = add <4 x i32> %7, %3
32  %arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4
33  %arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8
34  %arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12
35  store <4 x i32> %add.i, ptr %c, align 4
36  store <4 x i32> %add.i63, ptr %arrayidx36, align 4
37  store <4 x i32> %add.i64, ptr %arrayidx39, align 4
38  store <4 x i32> %add.i65, ptr %arrayidx42, align 4
39  ret void
40}
41
42define void @test2(ptr %in, ptr %out) {
43; CHECK-LABEL: @test2(
44; CHECK-NEXT:  entry:
45; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i16>, ptr [[IN:%.*]], align 2
46; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> [[TMP0]], <16 x i16> [[TMP0]])
47; CHECK-NEXT:    store <16 x i16> [[TMP1]], ptr [[OUT:%.*]], align 2
48; CHECK-NEXT:    ret void
49;
50entry:
51  %0 = getelementptr i16, ptr %in, i64 8
52  %1 = load <8 x i16>, ptr %in, align 2
53  %2 = load <8 x i16>, ptr %0, align 2
54  %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %1)
55  %4 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %2, <8 x i16> %2)
56  %5 = getelementptr i16, ptr %out, i64 8
57  store <8 x i16> %3, ptr %out, align 2
58  store <8 x i16> %4, ptr %5, align 2
59  ret void
60}
61
62define void @test3(ptr %x, ptr %y, ptr %z) {
63; CHECK-LABEL: @test3(
64; CHECK-NEXT:  entry:
65; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[X:%.*]], i32 0
66; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> [[TMP0]], ptr [[Y:%.*]], i32 1
67; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x ptr> [[TMP1]], zeroinitializer
68; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[X]], align 4
69; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[Y]], align 4
70; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
71; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]]
72; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr [[Z:%.*]], align 4
73; CHECK-NEXT:    ret void
74;
75entry:
76  %0 = getelementptr inbounds i32, ptr %x, i64 4
77  %1 = getelementptr inbounds i32, ptr %y, i64 4
78  %2 = load <4 x i32>, ptr %x, align 4
79  %3 = load <4 x i32>, ptr %0, align 4
80  %4 = load <4 x i32>, ptr %y, align 4
81  %5 = load <4 x i32>, ptr %1, align 4
82  %6 = icmp eq ptr %x, null
83  %7 = icmp eq ptr %y, null
84  %8 = select i1 %6, <4 x i32> %2, <4 x i32> %4
85  %9 = select i1 %7, <4 x i32> %3, <4 x i32> %5
86  %10 = getelementptr inbounds i32, ptr %z, i64 4
87  store <4 x i32> %8, ptr %z, align 4
88  store <4 x i32> %9, ptr %10, align 4
89  ret void
90}
91
92define void @test4(ptr %in, ptr %out) {
93; CHECK-LABEL: @test4(
94; CHECK-NEXT:  entry:
95; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4
96; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0)
97; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
98; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
99; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8)
100; CHECK-NEXT:    [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]]
101; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8)
102; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
103; CHECK-NEXT:    [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]]
104; CHECK-NEXT:    [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]]
105; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8
106; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8)
107; CHECK-NEXT:    store <8 x i1> [[TMP13]], ptr [[OUT]], align 1
108; CHECK-NEXT:    [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0)
109; CHECK-NEXT:    store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1
110; CHECK-NEXT:    ret void
111;
112entry:
113  %0 = load <8 x float>, ptr %in, align 4
114  %1 = fmul <8 x float> %0, zeroinitializer
115  %2 = fmul <8 x float> %0, zeroinitializer
116  %3 = fadd <8 x float> zeroinitializer, %1
117  %4 = fadd <8 x float> %0, %2
118  %5 = fcmp ogt <8 x float> %3, zeroinitializer
119  %6 = fcmp ogt <8 x float> %4, zeroinitializer
120  %7 = getelementptr i1, ptr %out, i64 8
121  store <8 x i1> %5, ptr %out, align 1
122  store <8 x i1> %6, ptr %7, align 1
123  ret void
124}
125
126define void @test5(ptr %ptr0, ptr %ptr1) {
127; CHECK-LABEL: @test5(
128; CHECK-NEXT:  entry:
129; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr i8, ptr null, i64 0
130; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x ptr> <ptr null, ptr null, ptr undef, ptr undef>, ptr [[GETELEMENTPTR0]], i32 2
131; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x ptr> [[TMP0]], ptr null, i32 3
132; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x ptr> zeroinitializer, [[TMP1]]
133; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> <ptr poison, ptr null, ptr null, ptr null>, ptr [[PTR0:%.*]], i32 0
134; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[PTR1:%.*]], i32 3
135; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <4 x ptr> [[TMP3]], [[TMP4]]
136; CHECK-NEXT:    ret void
137;
138entry:
139  %getelementptr0 = getelementptr i8, ptr null, i64 0
140  %0 = insertelement <4 x ptr> <ptr null, ptr null, ptr undef, ptr undef>, ptr %getelementptr0, i32 2
141  %1 = insertelement <4 x ptr> %0, ptr null, i32 3
142  %2 = icmp ult <4 x ptr> zeroinitializer, %1
143  %3 = insertelement <4 x ptr> <ptr poison, ptr null, ptr null, ptr null>, ptr %ptr0, i32 0
144  %4 = insertelement <4 x ptr> %1, ptr %ptr1, i32 3
145  %5 = icmp ult <4 x ptr> %3, %4
146  ret void
147}
148
149define <4 x i1> @test6(ptr %in1, ptr %in2) {
150; CHECK-LABEL: @test6(
151; CHECK-NEXT:  entry:
152; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[IN1:%.*]], align 4
153; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2
154; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0)
155; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
156; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0)
157; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4)
158; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8)
159; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12)
160; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]]
161; CHECK-NEXT:    [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0)
162; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
163; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0)
164; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP17]], <4 x i16> zeroinitializer, i64 4)
165; CHECK-NEXT:    [[TMP19:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP18]], <4 x i16> zeroinitializer, i64 8)
166; CHECK-NEXT:    [[TMP20:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP19]], <4 x i16> zeroinitializer, i64 12)
167; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP16]], [[TMP20]]
168; CHECK-NEXT:    [[TMP22:%.*]] = and <16 x i1> [[TMP11]], [[TMP21]]
169; CHECK-NEXT:    [[TMP23:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]]
170; CHECK-NEXT:    [[TMP24:%.*]] = and <16 x i1> [[TMP22]], [[TMP23]]
171; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
172; CHECK-NEXT:    [[TMP26:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP25]])
173; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i1> poison, i1 [[TMP26]], i64 0
174; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
175; CHECK-NEXT:    [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP28]])
176; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i1> [[TMP27]], i1 [[TMP29]], i64 1
177; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
178; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP31]])
179; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP32]], i64 2
180; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
181; CHECK-NEXT:    [[TMP35:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP34]])
182; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i1> [[TMP33]], i1 [[TMP35]], i64 3
183; CHECK-NEXT:    [[VBSL:%.*]] = select <4 x i1> [[TMP36]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>
184; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <4 x i32> [[VBSL]], <i32 2, i32 3, i32 4, i32 5>
185; CHECK-NEXT:    ret <4 x i1> [[CMP]]
186;
187entry:
188  %0 = load <4 x i32>, ptr %in1, align 4
189  %1 = load <4 x i16>, ptr %in2, align 2
190  %cmp000 = icmp ugt <4 x i32> %0, zeroinitializer
191  %cmp001 = icmp ugt <4 x i32> %0, zeroinitializer
192  %cmp002 = icmp ugt <4 x i32> %0, zeroinitializer
193  %cmp003 = icmp ugt <4 x i32> %0, zeroinitializer
194  %cmp100 = icmp eq <4 x i16> %1, zeroinitializer
195  %cmp101 = icmp eq <4 x i16> %1, zeroinitializer
196  %cmp102 = icmp eq <4 x i16> %1, zeroinitializer
197  %cmp103 = icmp eq <4 x i16> %1, zeroinitializer
198  %and.cmp0 = and <4 x i1> %cmp000, %cmp100
199  %and.cmp1 = and <4 x i1> %cmp001, %cmp101
200  %and.cmp2 = and <4 x i1> %cmp002, %cmp102
201  %and.cmp3 = and <4 x i1> %cmp003, %cmp103
202  %cmp004 = icmp ugt <4 x i32> %0, zeroinitializer
203  %cmp005 = icmp ugt <4 x i32> %0, zeroinitializer
204  %cmp006 = icmp ugt <4 x i32> %0, zeroinitializer
205  %cmp007 = icmp ugt <4 x i32> %0, zeroinitializer
206  %and.cmp4 = and <4 x i1> %and.cmp0, %cmp004
207  %and.cmp5 = and <4 x i1> %and.cmp1, %cmp005
208  %and.cmp6 = and <4 x i1> %and.cmp2, %cmp006
209  %and.cmp7 = and <4 x i1> %and.cmp3, %cmp007
210  %or0 = or <4 x i1> %and.cmp5, %and.cmp4
211  %or1 = or <4 x i1> %or0, %and.cmp6
212  %or2 = or <4 x i1> %or1, %and.cmp7
213  %vbsl = select <4 x i1> %or2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>
214  %cmp = icmp ugt <4 x i32> %vbsl, <i32 2, i32 3, i32 4, i32 5>
215  ret <4 x i1> %cmp
216}
217
218define void @test7() {
219; CHECK-LABEL: @test7(
220; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0)
221; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> [[TMP1]], <8 x i64> zeroinitializer, i64 8)
222; CHECK-NEXT:    [[TMP3:%.*]] = trunc <16 x i64> [[TMP2]] to <16 x i16>
223; CHECK-NEXT:    store <16 x i16> [[TMP3]], ptr null, align 2
224; CHECK-NEXT:    ret void
225;
226  %1 = getelementptr i8, ptr null, i64 16
227  %2 = trunc <8 x i64> zeroinitializer to <8 x i16>
228  store <8 x i16> %2, ptr %1, align 2
229  %3 = trunc <8 x i64> zeroinitializer to <8 x i16>
230  store <8 x i16> %3, ptr null, align 2
231  ret void
232}
233
234define void @test8() {
235; CHECK-LABEL: @test8(
236; CHECK-NEXT:  entry:
237; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0)
238; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> zeroinitializer, i64 2)
239; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP1]], <2 x float> zeroinitializer, i64 4)
240; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP2]], <2 x float> zeroinitializer, i64 6)
241; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
242; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2)
243; CHECK-NEXT:    br i1 false, label [[FOR0:%.*]], label [[FOR_BODY:%.*]]
244; CHECK:       for0:
245; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ]
246; CHECK-NEXT:    ret void
247; CHECK:       for.body:
248; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP5]], [[ENTRY]] ]
249; CHECK-NEXT:    [[TMP8]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
250; CHECK-NEXT:    br i1 false, label [[FOR0]], label [[FOR_BODY]]
251;
252entry:
253  br i1 false, label %for0, label %for.body
254
255for0:
256  %0 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ]
257  %1 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ]
258  %2 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ]
259  %3 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ]
260  ret void
261
262for.body:
263  %4 = phi <2 x float> [ %4, %for.body ], [ zeroinitializer, %entry ]
264  %5 = phi <2 x float> [ %5, %for.body ], [ zeroinitializer, %entry ]
265  br i1 false, label %for0, label %for.body
266}
267
268define void @test9() {
269; CHECK-LABEL: @test9(
270; CHECK-NEXT:  entry:
271; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0)
272; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4)
273; CHECK-NEXT:    br label [[FOR_BODY13:%.*]]
274; CHECK:       for.body13:
275; CHECK-NEXT:    [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1>
276; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32>
277; CHECK-NEXT:    store <8 x i32> [[TMP3]], ptr null, align 4
278; CHECK-NEXT:    br label [[FOR_BODY13]]
279;
280entry:
281  br label %for.body13
282
283for.body13:                                       ; preds = %for.body13, %entry
284  %vmovl.i111 = sext <4 x i16> zeroinitializer to <4 x i32>
285  %vmovl.i110 = sext <4 x i16> zeroinitializer to <4 x i32>
286  store <4 x i32> %vmovl.i111, ptr null, align 4
287  %add.ptr29 = getelementptr i8, ptr null, i64 16
288  store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4
289  br label %for.body13
290}
291
292define void @test10() {
293; CHECK-LABEL: @test10(
294; CHECK-NEXT:  entry:
295; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1
296; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[TMP0]], i64 0)
297; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
298; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
299; CHECK-NEXT:    [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16>
300; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
301; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
302; CHECK-NEXT:    [[TMP8:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
303; CHECK-NEXT:    [[TMP9:%.*]] = sext <16 x i8> [[TMP8]] to <16 x i32>
304; CHECK-NEXT:    store <16 x i32> [[TMP9]], ptr null, align 4
305; CHECK-NEXT:    ret void
306;
307entry:
308  %0 = load <16 x i8>, ptr null, align 1
309  %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
310  %shuffle.i107 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
311  %vmovl.i106 = sext <8 x i8> %shuffle.i to <8 x i16>
312  %vmovl.i = sext <8 x i8> %shuffle.i107 to <8 x i16>
313  %shuffle.i113 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
314  %shuffle.i115 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
315  %shuffle.i112 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
316  %shuffle.i114 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
317  %vmovl.i111 = sext <4 x i16> %shuffle.i113 to <4 x i32>
318  %vmovl.i110 = sext <4 x i16> %shuffle.i115 to <4 x i32>
319  %vmovl.i109 = sext <4 x i16> %shuffle.i112 to <4 x i32>
320  %vmovl.i108 = sext <4 x i16> %shuffle.i114 to <4 x i32>
321  %add.ptr29 = getelementptr i8, ptr null, i64 16
322  %add.ptr32 = getelementptr i8, ptr null, i64 32
323  %add.ptr35 = getelementptr i8, ptr null, i64 48
324  store <4 x i32> %vmovl.i111, ptr null, align 4
325  store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4
326  store <4 x i32> %vmovl.i109, ptr %add.ptr32, align 4
327  store <4 x i32> %vmovl.i108, ptr %add.ptr35, align 4
328  ret void
329}
330
331define void @test11(<2 x i64> %0, i64 %1, <2 x i64> %2) {
332; CHECK-LABEL: @test11(
333; CHECK-NEXT:  entry:
334; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 1
335; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> <i64 5, i64 0>, [[TMP2:%.*]]
336; CHECK-NEXT:    [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i16>
337; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP5]], i64 0)
338; CHECK-NEXT:    [[TMP7:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i16>
339; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP6]], <2 x i16> [[TMP7]], i64 2)
340; CHECK-NEXT:    [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i8>
341; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> poison, <2 x i8> zeroinitializer, i64 0)
342; CHECK-NEXT:    [[TMP11:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP10]], <2 x i8> zeroinitializer, i64 2)
343; CHECK-NEXT:    [[TMP12:%.*]] = urem <4 x i8> [[TMP9]], [[TMP11]]
344; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne <4 x i8> [[TMP12]], [[TMP11]]
345; CHECK-NEXT:    ret void
346;
347entry:
348  %3 = insertelement <2 x i64> %0, i64 %1, i32 1
349  %4 = add <2 x i64> <i64 5, i64 0>, %2
350  %5 = trunc <2 x i64> %3 to <2 x i8>
351  %6 = trunc <2 x i64> %4 to <2 x i8>
352  %7 = urem <2 x i8> %5, zeroinitializer
353  %8 = urem <2 x i8> %6, zeroinitializer
354  %9 = icmp ne <2 x i8> %7, zeroinitializer
355  %10 = icmp ne <2 x i8> %8, zeroinitializer
356  ret void
357}
358
359define void @test12() {
360; CHECK-LABEL: @test12(
361; CHECK-NEXT:  entry:
362; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, ptr null, i64 33
363; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr null, i64 50
364; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr null, i64 75
365; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
366; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
367; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
368; CHECK-NEXT:    [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0)
369; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
370; CHECK-NEXT:    [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
371; CHECK-NEXT:    [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
372; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
373; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
374; CHECK-NEXT:    [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
375; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
376; CHECK-NEXT:    [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
377; CHECK-NEXT:    [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
378; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
379; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
380; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
381; CHECK-NEXT:    [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
382; CHECK-NEXT:    [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
383; CHECK-NEXT:    ret void
384;
385entry:
386  %0 = getelementptr float, ptr null, i64 33
387  %1 = getelementptr float, ptr null, i64 41
388  %2 = getelementptr float, ptr null, i64 50
389  %3 = getelementptr float, ptr null, i64 75
390  %4 = load <8 x float>, ptr %0, align 4
391  %5 = load <8 x float>, ptr %1, align 4
392  %6 = load <8 x float>, ptr %2, align 4
393  %7 = load <8 x float>, ptr %3, align 4
394  %8 = fpext <8 x float> %4 to <8 x double>
395  %9 = fpext <8 x float> %5 to <8 x double>
396  %10 = fpext <8 x float> %6 to <8 x double>
397  %11 = fpext <8 x float> %7 to <8 x double>
398  %12 = fadd <8 x double> zeroinitializer, %8
399  %13 = fadd <8 x double> zeroinitializer, %9
400  %14 = fadd <8 x double> zeroinitializer, %10
401  %15 = fadd <8 x double> zeroinitializer, %11
402  %16 = fptrunc <8 x double> %12 to <8 x float>
403  %17 = fptrunc <8 x double> %13 to <8 x float>
404  %18 = fptrunc <8 x double> %14 to <8 x float>
405  %19 = fptrunc <8 x double> %15 to <8 x float>
406  %20 = fcmp ogt <8 x float> zeroinitializer, %16
407  %21 = fcmp ogt <8 x float> zeroinitializer, %17
408  %22 = fcmp ogt <8 x float> zeroinitializer, %18
409  %23 = fcmp ogt <8 x float> zeroinitializer, %19
410  ret void
411}
412
413define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) {
414; CHECK-LABEL: @test13(
415; CHECK-NEXT:  entry:
416; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0)
417; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
418; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
419; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
420; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
421; CHECK:       for.end.loopexit:
422; CHECK-NEXT:    [[TMP5:%.*]] = phi <16 x i32> [ [[TMP4]], [[ENTRY:%.*]] ]
423; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP5]], i64 12)
424; CHECK-NEXT:    [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer
425; CHECK-NEXT:    store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4
426; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 0)
427; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4
428; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 8)
429; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4
430; CHECK-NEXT:    ret void
431;
432entry:
433  %1 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
434  %2 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
435  %3 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
436  %4 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
437  br label %for.end.loopexit
438
439for.end.loopexit:
440  %phi0 = phi <4 x i32> [ %1, %entry ]
441  %phi1 = phi <4 x i32> [ %2, %entry ]
442  %phi2 = phi <4 x i32> [ %3, %entry ]
443  %phi3 = phi <4 x i32> [ %4, %entry ]
444  %or0 = or <4 x i32> %phi1, zeroinitializer
445  store <4 x i32> %or0, ptr %out0, align 4
446  store <4 x i32> %1, ptr %out1, align 4
447  store <4 x i32> %4, ptr %out2, align 4
448  ret void
449}
450
451define void @test14(<8 x i1> %0) {
452; CHECK-LABEL: @test14(
453; CHECK-NEXT:  entry:
454; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v8i1(<16 x i1> poison, <8 x i1> [[TMP0:%.*]], i64 0)
455; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
456; CHECK-NEXT:    [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16>
457; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
458; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
459; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7>
460; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
461; CHECK:       for.end.loopexit:
462; CHECK-NEXT:    [[TMP7:%.*]] = phi <16 x i16> [ [[TMP6]], [[ENTRY:%.*]] ]
463; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP7]], i64 12)
464; CHECK-NEXT:    [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
465; CHECK-NEXT:    ret void
466;
467entry:
468  %sext0 = sext <8 x i1> %0 to <8 x i16>
469  %sext1 = sext <8 x i1> %0 to <8 x i16>
470  %1 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
471  %2 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
472  %3 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
473  %4 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
474  br label %for.end.loopexit
475
476for.end.loopexit:
477  %phi0 = phi <4 x i16> [ %1, %entry ]
478  %phi1 = phi <4 x i16> [ %2, %entry ]
479  %phi2 = phi <4 x i16> [ %3, %entry ]
480  %phi3 = phi <4 x i16> [ %4, %entry ]
481  %or0 = or <4 x i16> %phi1, zeroinitializer
482  ret void
483}
484