xref: /llvm-project/llvm/test/Transforms/Scalarizer/basic.ll (revision 1bc9b67bd87d7ec51045951c93fd5db953faa745)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -passes='function(scalarizer<load-store>,dce)' -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
5declare <4 x float> @ext(<4 x float>)
6@g = global <4 x float> zeroinitializer
7
8define void @f1(<4 x float> %init, ptr %base, i32 %count) {
9; CHECK-LABEL: @f1(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0
12; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1
13; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2
14; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3
15; CHECK-NEXT:    br label [[LOOP:%.*]]
16; CHECK:       loop:
17; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
18; CHECK-NEXT:    [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
19; CHECK-NEXT:    [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
20; CHECK-NEXT:    [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
21; CHECK-NEXT:    [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
22; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
23; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
24; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
25; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
26; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
27; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
28; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
29; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
30; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
31; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
32; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
33; CHECK-NEXT:    [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
34; CHECK-NEXT:    [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
35; CHECK-NEXT:    [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0
36; CHECK-NEXT:    [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1
37; CHECK-NEXT:    [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2
38; CHECK-NEXT:    [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3
39; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
40; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
41; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
42; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
43; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
44; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
45; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
46; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
47; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
48; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
49; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
50; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
51; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
52; CHECK-NEXT:    store float [[SEL_I0]], ptr [[PTR]], align 16
53; CHECK-NEXT:    store float [[SEL_I1]], ptr [[PTR_I1]], align 4
54; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
55; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
56; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
57; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
58; CHECK:       exit:
59; CHECK-NEXT:    ret void
60;
61entry:
62  br label %loop
63
64loop:
65  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
66  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
67  %nexti = sub i32 %i, 1
68
69  %ptr = getelementptr <4 x float>, ptr %base, i32 %i
70  %val = load <4 x float> , ptr %ptr
71  %dval = bitcast <4 x float> %val to <2 x double>
72  %dacc = bitcast <4 x float> %acc to <2 x double>
73  %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
74  <2 x i32> <i32 0, i32 2>
75  %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
76  <2 x i32> <i32 1, i32 3>
77  %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
78  %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
79  %add = fadd <4 x float> %f1, %f2
80  %call = call <4 x float> @ext(<4 x float> %add)
81  %cmp = fcmp ogt <4 x float> %call,
82  <float 1.0, float 2.0, float 3.0, float 4.0>
83  %sel = select <4 x i1> %cmp, <4 x float> %call,
84  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
85  store <4 x float> %sel, ptr %ptr
86
87  %test = icmp eq i32 %nexti, 0
88  br i1 %test, label %loop, label %exit
89
90exit:
91  ret void
92}
93
94define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
95; CHECK-LABEL: @f2(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0
98; CHECK-NEXT:    [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1
99; CHECK-NEXT:    [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2
100; CHECK-NEXT:    [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3
101; CHECK-NEXT:    br label [[LOOP:%.*]]
102; CHECK:       loop:
103; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
104; CHECK-NEXT:    [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
105; CHECK-NEXT:    [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
106; CHECK-NEXT:    [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
107; CHECK-NEXT:    [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
108; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
109; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]]
110; CHECK-NEXT:    [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4
111; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
112; CHECK-NEXT:    [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1
113; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
114; CHECK-NEXT:    [[VAL_I2:%.*]] = load i8, ptr [[PTR_I2]], align 2
115; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr i8, ptr [[PTR]], i32 3
116; CHECK-NEXT:    [[VAL_I3:%.*]] = load i8, ptr [[PTR_I3]], align 1
117; CHECK-NEXT:    [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
118; CHECK-NEXT:    [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
119; CHECK-NEXT:    [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
120; CHECK-NEXT:    [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
121; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
122; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
123; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
124; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
125; CHECK-NEXT:    [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
126; CHECK-NEXT:    [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
127; CHECK-NEXT:    [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
128; CHECK-NEXT:    [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
129; CHECK-NEXT:    [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
130; CHECK-NEXT:    [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
131; CHECK-NEXT:    [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
132; CHECK-NEXT:    [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
133; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
134; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
135; CHECK-NEXT:    [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
136; CHECK-NEXT:    [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
137; CHECK-NEXT:    store i8 [[TRUNC_I0]], ptr [[PTR]], align 4
138; CHECK-NEXT:    store i8 [[TRUNC_I1]], ptr [[PTR_I1]], align 1
139; CHECK-NEXT:    store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2
140; CHECK-NEXT:    store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1
141; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
142; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
143; CHECK:       exit:
144; CHECK-NEXT:    ret void
145;
146entry:
147  br label %loop
148
149loop:
150  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
151  %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
152  %nexti = sub i32 %i, 1
153
154  %ptr = getelementptr <4 x i8>, ptr %base, i32 %i
155  %val = load <4 x i8> , ptr %ptr
156  %ext = sext <4 x i8> %val to <4 x i32>
157  %add = add <4 x i32> %ext, %acc
158  %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
159  %single = insertelement <4 x i32> undef, i32 %i, i32 0
160  %limit = shufflevector <4 x i32> %single, <4 x i32> undef,
161  <4 x i32> zeroinitializer
162  %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
163  %trunc = trunc <4 x i32> %sel to <4 x i8>
164  store <4 x i8> %trunc, ptr %ptr
165
166  %test = icmp eq i32 %nexti, 0
167  br i1 %test, label %loop, label %exit
168
169exit:
170  ret void
171}
172
173; Check that !tbaa information is preserved.
174define void @f3(ptr %src, ptr %dst) {
175; CHECK-LABEL: @f3(
176; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
177; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
178; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
179; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
180; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
181; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]]
182; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
183; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]]
184; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
185; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]]
186; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
187; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
188; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
189; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
190; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]]
191; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]]
192; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]]
193; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]]
194; CHECK-NEXT:    ret void
195;
196  %val = load <4 x i32> , ptr %src, !tbaa !1
197  %add = add <4 x i32> %val, %val
198  store <4 x i32> %add, ptr %dst, !tbaa !2
199  ret void
200}
201
202; Check that !tbaa.struct information is preserved.
203define void @f4(ptr %src, ptr %dst) {
204; CHECK-LABEL: @f4(
205; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
206; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
207; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
208; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
209; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
210; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
211; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
212; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
213; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
214; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
215; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
216; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
217; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
218; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
219; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa.struct [[TBAA_STRUCT5]]
220; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
221; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
222; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
223; CHECK-NEXT:    ret void
224;
225  %val = load <4 x i32> , ptr %src, !tbaa.struct !5
226  %add = add <4 x i32> %val, %val
227  store <4 x i32> %add, ptr %dst, !tbaa.struct !5
228  ret void
229}
230
231; Check that llvm.access.group information is preserved.
232define void @f5(i32 %count, ptr %src, ptr %dst) {
233; CHECK-LABEL: @f5(
234; CHECK-NEXT:  entry:
235; CHECK-NEXT:    br label [[LOOP:%.*]]
236; CHECK:       loop:
237; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
238; CHECK-NEXT:    [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]]
239; CHECK-NEXT:    [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1
240; CHECK-NEXT:    [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2
241; CHECK-NEXT:    [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3
242; CHECK-NEXT:    [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]]
243; CHECK-NEXT:    [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1
244; CHECK-NEXT:    [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2
245; CHECK-NEXT:    [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3
246; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[THIS_SRC]], align 16, !llvm.access.group [[ACC_GRP6:![0-9]+]]
247; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[THIS_SRC_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
248; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[THIS_SRC_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
249; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[THIS_SRC_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
250; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
251; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
252; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
253; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
254; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[THIS_DST]], align 16, !llvm.access.group [[ACC_GRP6]]
255; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[THIS_DST_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
256; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
257; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
258; CHECK-NEXT:    [[NEXT_INDEX]] = add i32 [[INDEX]], -1
259; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
260; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
261; CHECK:       end:
262; CHECK-NEXT:    ret void
263;
264entry:
265  br label %loop
266
267loop:
268  %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
269  %this_src = getelementptr <4 x i32>, ptr %src, i32 %index
270  %this_dst = getelementptr <4 x i32>, ptr %dst, i32 %index
271  %val = load <4 x i32> , ptr %this_src, !llvm.access.group !13
272  %add = add <4 x i32> %val, %val
273  store <4 x i32> %add, ptr %this_dst, !llvm.access.group !13
274  %next_index = add i32 %index, -1
275  %continue = icmp ne i32 %next_index, %count
276  br i1 %continue, label %loop, label %end, !llvm.loop !3
277
278end:
279  ret void
280}
281
282; Check that fpmath information is preserved.
283define <4 x float> @f6(<4 x float> %x) {
284; CHECK-LABEL: @f6(
285; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0
286; CHECK-NEXT:    [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]]
287; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1
288; CHECK-NEXT:    [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]]
289; CHECK-NEXT:    [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2
290; CHECK-NEXT:    [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath [[META9]]
291; CHECK-NEXT:    [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3
292; CHECK-NEXT:    [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath [[META9]]
293; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0
294; CHECK-NEXT:    [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
295; CHECK-NEXT:    [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2
296; CHECK-NEXT:    [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3
297; CHECK-NEXT:    ret <4 x float> [[RES]]
298;
299  %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
300  !fpmath !4
301  ret <4 x float> %res
302}
303
304; Check that random metadata isn't kept.
305define void @f7(ptr %src, ptr %dst) {
306; CHECK-LABEL: @f7(
307; CHECK-NEXT:    [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
308; CHECK-NEXT:    [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
309; CHECK-NEXT:    [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
310; CHECK-NEXT:    [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16
311; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
312; CHECK-NEXT:    [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4
313; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
314; CHECK-NEXT:    [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8
315; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
316; CHECK-NEXT:    [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4
317; CHECK-NEXT:    [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
318; CHECK-NEXT:    [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
319; CHECK-NEXT:    [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
320; CHECK-NEXT:    [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
321; CHECK-NEXT:    store i32 [[ADD_I0]], ptr [[DST]], align 16
322; CHECK-NEXT:    store i32 [[ADD_I1]], ptr [[DST_I1]], align 4
323; CHECK-NEXT:    store i32 [[ADD_I2]], ptr [[DST_I2]], align 8
324; CHECK-NEXT:    store i32 [[ADD_I3]], ptr [[DST_I3]], align 4
325; CHECK-NEXT:    ret void
326;
327  %val = load <4 x i32> , ptr %src, !foo !5
328  %add = add <4 x i32> %val, %val
329  store <4 x i32> %add, ptr %dst, !foo !5
330  ret void
331}
332
333; Test GEP with vectors.
334define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
335; CHECK-LABEL: @f8(
336; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
337; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
338; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
339; CHECK-NEXT:    [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0
340; CHECK-NEXT:    [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2
341; CHECK-NEXT:    [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3
342; CHECK-NEXT:    [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1
343; CHECK-NEXT:    [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3
344; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100
345; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]]
346; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100
347; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]]
348; CHECK-NEXT:    store ptr [[VAL_I0]], ptr [[DEST]], align 32
349; CHECK-NEXT:    store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
350; CHECK-NEXT:    store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
351; CHECK-NEXT:    store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
352; CHECK-NEXT:    ret void
353;
354  ptr %other) {
355  %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
356  %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
357  %ptr1 = insertelement <4 x ptr> %ptr0, ptr %other, i32 1
358  %val = getelementptr float, <4 x ptr> %ptr1, <4 x i32> %i2
359  store <4 x ptr> %val, ptr %dest
360  ret void
361}
362
363; Test the handling of unaligned loads.
364define void @f9(ptr %dest, ptr %src) {
365; CHECK-LABEL: @f9(
366; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
367; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
368; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
369; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4
370; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
371; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4
372; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
373; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 4
374; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
375; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 4
376; CHECK-NEXT:    store float [[VAL_I0]], ptr [[DEST]], align 8
377; CHECK-NEXT:    store float [[VAL_I1]], ptr [[DEST_I1]], align 4
378; CHECK-NEXT:    store float [[VAL_I2]], ptr [[DEST_I2]], align 8
379; CHECK-NEXT:    store float [[VAL_I3]], ptr [[DEST_I3]], align 4
380; CHECK-NEXT:    ret void
381;
382  %val = load <4 x float> , ptr %src, align 4
383  store <4 x float> %val, ptr %dest, align 8
384  ret void
385}
386
387; ...and again with subelement alignment.
388define void @f10(ptr %dest, ptr %src) {
389; CHECK-LABEL: @f10(
390; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
391; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
392; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
393; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1
394; CHECK-NEXT:    [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
395; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1
396; CHECK-NEXT:    [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
397; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 1
398; CHECK-NEXT:    [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
399; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 1
400; CHECK-NEXT:    store float [[VAL_I0]], ptr [[DEST]], align 2
401; CHECK-NEXT:    store float [[VAL_I1]], ptr [[DEST_I1]], align 2
402; CHECK-NEXT:    store float [[VAL_I2]], ptr [[DEST_I2]], align 2
403; CHECK-NEXT:    store float [[VAL_I3]], ptr [[DEST_I3]], align 2
404; CHECK-NEXT:    ret void
405;
406  %val = load <4 x float> , ptr %src, align 1
407  store <4 x float> %val, ptr %dest, align 2
408  ret void
409}
410
411; Test that sub-byte loads aren't scalarized.
412define void @f11(ptr %dest, ptr %src0) {
413; CHECK-LABEL: @f11(
414; CHECK-NEXT:    [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1
415; CHECK-NEXT:    [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4
416; CHECK-NEXT:    [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0
417; CHECK-NEXT:    [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1
418; CHECK-NEXT:    [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2
419; CHECK-NEXT:    [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3
420; CHECK-NEXT:    [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4
421; CHECK-NEXT:    [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5
422; CHECK-NEXT:    [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6
423; CHECK-NEXT:    [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7
424; CHECK-NEXT:    [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8
425; CHECK-NEXT:    [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9
426; CHECK-NEXT:    [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10
427; CHECK-NEXT:    [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11
428; CHECK-NEXT:    [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12
429; CHECK-NEXT:    [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13
430; CHECK-NEXT:    [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14
431; CHECK-NEXT:    [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15
432; CHECK-NEXT:    [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16
433; CHECK-NEXT:    [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17
434; CHECK-NEXT:    [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18
435; CHECK-NEXT:    [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19
436; CHECK-NEXT:    [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20
437; CHECK-NEXT:    [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21
438; CHECK-NEXT:    [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22
439; CHECK-NEXT:    [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23
440; CHECK-NEXT:    [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24
441; CHECK-NEXT:    [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25
442; CHECK-NEXT:    [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26
443; CHECK-NEXT:    [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27
444; CHECK-NEXT:    [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28
445; CHECK-NEXT:    [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29
446; CHECK-NEXT:    [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30
447; CHECK-NEXT:    [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31
448; CHECK-NEXT:    [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4
449; CHECK-NEXT:    [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0
450; CHECK-NEXT:    [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
451; CHECK-NEXT:    [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1
452; CHECK-NEXT:    [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
453; CHECK-NEXT:    [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2
454; CHECK-NEXT:    [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
455; CHECK-NEXT:    [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3
456; CHECK-NEXT:    [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
457; CHECK-NEXT:    [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4
458; CHECK-NEXT:    [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
459; CHECK-NEXT:    [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5
460; CHECK-NEXT:    [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
461; CHECK-NEXT:    [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6
462; CHECK-NEXT:    [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
463; CHECK-NEXT:    [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7
464; CHECK-NEXT:    [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
465; CHECK-NEXT:    [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8
466; CHECK-NEXT:    [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
467; CHECK-NEXT:    [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9
468; CHECK-NEXT:    [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
469; CHECK-NEXT:    [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10
470; CHECK-NEXT:    [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
471; CHECK-NEXT:    [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11
472; CHECK-NEXT:    [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
473; CHECK-NEXT:    [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12
474; CHECK-NEXT:    [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
475; CHECK-NEXT:    [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13
476; CHECK-NEXT:    [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
477; CHECK-NEXT:    [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14
478; CHECK-NEXT:    [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
479; CHECK-NEXT:    [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15
480; CHECK-NEXT:    [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
481; CHECK-NEXT:    [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16
482; CHECK-NEXT:    [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
483; CHECK-NEXT:    [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17
484; CHECK-NEXT:    [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
485; CHECK-NEXT:    [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18
486; CHECK-NEXT:    [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
487; CHECK-NEXT:    [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19
488; CHECK-NEXT:    [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
489; CHECK-NEXT:    [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20
490; CHECK-NEXT:    [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
491; CHECK-NEXT:    [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21
492; CHECK-NEXT:    [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
493; CHECK-NEXT:    [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22
494; CHECK-NEXT:    [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
495; CHECK-NEXT:    [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23
496; CHECK-NEXT:    [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
497; CHECK-NEXT:    [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24
498; CHECK-NEXT:    [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
499; CHECK-NEXT:    [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25
500; CHECK-NEXT:    [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
501; CHECK-NEXT:    [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26
502; CHECK-NEXT:    [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
503; CHECK-NEXT:    [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27
504; CHECK-NEXT:    [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
505; CHECK-NEXT:    [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28
506; CHECK-NEXT:    [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
507; CHECK-NEXT:    [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29
508; CHECK-NEXT:    [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
509; CHECK-NEXT:    [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30
510; CHECK-NEXT:    [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
511; CHECK-NEXT:    [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31
512; CHECK-NEXT:    [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
513; CHECK-NEXT:    [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0
514; CHECK-NEXT:    [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1
515; CHECK-NEXT:    [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2
516; CHECK-NEXT:    [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3
517; CHECK-NEXT:    [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4
518; CHECK-NEXT:    [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5
519; CHECK-NEXT:    [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6
520; CHECK-NEXT:    [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7
521; CHECK-NEXT:    [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8
522; CHECK-NEXT:    [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9
523; CHECK-NEXT:    [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10
524; CHECK-NEXT:    [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11
525; CHECK-NEXT:    [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12
526; CHECK-NEXT:    [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13
527; CHECK-NEXT:    [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14
528; CHECK-NEXT:    [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15
529; CHECK-NEXT:    [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16
530; CHECK-NEXT:    [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17
531; CHECK-NEXT:    [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18
532; CHECK-NEXT:    [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19
533; CHECK-NEXT:    [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20
534; CHECK-NEXT:    [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21
535; CHECK-NEXT:    [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22
536; CHECK-NEXT:    [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23
537; CHECK-NEXT:    [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24
538; CHECK-NEXT:    [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25
539; CHECK-NEXT:    [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26
540; CHECK-NEXT:    [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27
541; CHECK-NEXT:    [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28
542; CHECK-NEXT:    [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29
543; CHECK-NEXT:    [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30
544; CHECK-NEXT:    [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31
545; CHECK-NEXT:    store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4
546; CHECK-NEXT:    ret void
547;
548  %src1 = getelementptr <32 x i1>, ptr %src0, i32 1
549  %val0 = load <32 x i1> , ptr %src0
550  %val1 = load <32 x i1> , ptr %src1
551  %and = and <32 x i1> %val0, %val1
552  store <32 x i1> %and, ptr %dest
553  ret void
554}
555
556; Test vector GEPs with more than one index.
557define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
558; CHECK-LABEL: @f13(
559; CHECK-NEXT:    [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
560; CHECK-NEXT:    [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
561; CHECK-NEXT:    [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
562; CHECK-NEXT:    [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0
563; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0
564; CHECK-NEXT:    [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]]
565; CHECK-NEXT:    [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1
566; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1
567; CHECK-NEXT:    [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]]
568; CHECK-NEXT:    [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2
569; CHECK-NEXT:    [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2
570; CHECK-NEXT:    [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]]
571; CHECK-NEXT:    [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3
572; CHECK-NEXT:    [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3
573; CHECK-NEXT:    [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]]
574; CHECK-NEXT:    store ptr [[VAL_I0]], ptr [[DEST]], align 32
575; CHECK-NEXT:    store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
576; CHECK-NEXT:    store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
577; CHECK-NEXT:    store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
578; CHECK-NEXT:    ret void
579;
580  ptr %other) {
581  %val = getelementptr inbounds [4 x float], <4 x ptr> %ptr,
582  <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
583  <4 x i32> %i
584  store <4 x ptr> %val, ptr %dest
585  ret void
586}
587
588; Test combinations of vector and non-vector PHIs.
589define <4 x float> @f14(<4 x float> %acc, i32 %count) {
590; CHECK-LABEL: @f14(
591; CHECK-NEXT:  entry:
592; CHECK-NEXT:    [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0
593; CHECK-NEXT:    [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1
594; CHECK-NEXT:    [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2
595; CHECK-NEXT:    [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3
596; CHECK-NEXT:    br label [[LOOP:%.*]]
597; CHECK:       loop:
598; CHECK-NEXT:    [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
599; CHECK-NEXT:    [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
600; CHECK-NEXT:    [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
601; CHECK-NEXT:    [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
602; CHECK-NEXT:    [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
603; CHECK-NEXT:    [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0
604; CHECK-NEXT:    [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1
605; CHECK-NEXT:    [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2
606; CHECK-NEXT:    [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3
607; CHECK-NEXT:    [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
608; CHECK-NEXT:    [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0
609; CHECK-NEXT:    [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
610; CHECK-NEXT:    [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1
611; CHECK-NEXT:    [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
612; CHECK-NEXT:    [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2
613; CHECK-NEXT:    [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
614; CHECK-NEXT:    [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3
615; CHECK-NEXT:    [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
616; CHECK-NEXT:    [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0
617; CHECK-NEXT:    [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1
618; CHECK-NEXT:    [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2
619; CHECK-NEXT:    [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3
620; CHECK-NEXT:    [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
621; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
622; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
623; CHECK:       exit:
624; CHECK-NEXT:    ret <4 x float> [[NEXT_ACC]]
625;
626entry:
627  br label %loop
628
629loop:
630  %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
631  %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
632  %foo = call <4 x float> @ext(<4 x float> %this_acc)
633  %next_acc = fadd <4 x float> %this_acc, %foo
634  %next_count = sub i32 %this_count, 1
635  %cmp = icmp eq i32 %next_count, 0
636  br i1 %cmp, label %loop, label %exit
637
638exit:
639  ret <4 x float> %next_acc
640}
641
642; Test unary operator scalarization.
643define void @f15(<4 x float> %init, ptr %base, i32 %count) {
644; CHECK-LABEL: @f15(
645; CHECK-NEXT:  entry:
646; CHECK-NEXT:    br label [[LOOP:%.*]]
647; CHECK:       loop:
648; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
649; CHECK-NEXT:    [[NEXTI]] = sub i32 [[I]], 1
650; CHECK-NEXT:    [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
651; CHECK-NEXT:    [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
652; CHECK-NEXT:    [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
653; CHECK-NEXT:    [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
654; CHECK-NEXT:    [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
655; CHECK-NEXT:    [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
656; CHECK-NEXT:    [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
657; CHECK-NEXT:    [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
658; CHECK-NEXT:    [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
659; CHECK-NEXT:    [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
660; CHECK-NEXT:    [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
661; CHECK-NEXT:    [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
662; CHECK-NEXT:    [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0
663; CHECK-NEXT:    [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1
664; CHECK-NEXT:    [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2
665; CHECK-NEXT:    [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3
666; CHECK-NEXT:    [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
667; CHECK-NEXT:    [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
668; CHECK-NEXT:    [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
669; CHECK-NEXT:    [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
670; CHECK-NEXT:    [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
671; CHECK-NEXT:    [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
672; CHECK-NEXT:    [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
673; CHECK-NEXT:    [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
674; CHECK-NEXT:    [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
675; CHECK-NEXT:    [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
676; CHECK-NEXT:    [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
677; CHECK-NEXT:    [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
678; CHECK-NEXT:    [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
679; CHECK-NEXT:    store float [[SEL_I0]], ptr [[PTR]], align 16
680; CHECK-NEXT:    store float [[SEL_I1]], ptr [[PTR_I1]], align 4
681; CHECK-NEXT:    store float [[SEL_I2]], ptr [[PTR_I2]], align 8
682; CHECK-NEXT:    store float [[SEL_I3]], ptr [[PTR_I3]], align 4
683; CHECK-NEXT:    [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
684; CHECK-NEXT:    br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
685; CHECK:       exit:
686; CHECK-NEXT:    ret void
687;
688entry:
689  br label %loop
690
691loop:
692  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
693  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
694  %nexti = sub i32 %i, 1
695
696  %ptr = getelementptr <4 x float>, ptr %base, i32 %i
697  %val = load <4 x float> , ptr %ptr
698  %neg = fneg <4 x float> %val
699  %call = call <4 x float> @ext(<4 x float> %neg)
700  %cmp = fcmp ogt <4 x float> %call,
701  <float 1.0, float 2.0, float 3.0, float 4.0>
702  %sel = select <4 x i1> %cmp, <4 x float> %call,
703  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
704  store <4 x float> %sel, ptr %ptr
705
706  %test = icmp eq i32 %nexti, 0
707  br i1 %test, label %loop, label %exit
708
709exit:
710  ret void
711}
712
713; Check that IR flags are preserved.
714define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
715; CHECK-LABEL: @f16(
716; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
717; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
718; CHECK-NEXT:    [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
719; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
720; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
721; CHECK-NEXT:    [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
722; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
723; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
724; CHECK-NEXT:    ret <2 x i32> [[RES]]
725;
726  %res = add nuw nsw <2 x i32> %i, %j
727  ret <2 x i32> %res
728}
729define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
730; CHECK-LABEL: @f17(
731; CHECK-NEXT:    [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
732; CHECK-NEXT:    [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
733; CHECK-NEXT:    [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
734; CHECK-NEXT:    [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
735; CHECK-NEXT:    [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
736; CHECK-NEXT:    [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
737; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
738; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
739; CHECK-NEXT:    ret <2 x i32> [[RES]]
740;
741  %res = sdiv exact <2 x i32> %i, %j
742  ret <2 x i32> %res
743}
744define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
745; CHECK-LABEL: @f18(
746; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
747; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
748; CHECK-NEXT:    [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
749; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
750; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
751; CHECK-NEXT:    [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
752; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
753; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
754; CHECK-NEXT:    ret <2 x float> [[RES]]
755;
756  %res = fadd fast <2 x float> %x, %y
757  ret <2 x float> %res
758}
759define <2 x float> @f19(<2 x float> %x) {
760; CHECK-LABEL: @f19(
761; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
762; CHECK-NEXT:    [[RES_I0:%.*]] = fneg fast float [[X_I0]]
763; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
764; CHECK-NEXT:    [[RES_I1:%.*]] = fneg fast float [[X_I1]]
765; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
766; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
767; CHECK-NEXT:    ret <2 x float> [[RES]]
768;
769  %res = fneg fast <2 x float> %x
770  ret <2 x float> %res
771}
772define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
773; CHECK-LABEL: @f20(
774; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
775; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
776; CHECK-NEXT:    [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
777; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
778; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
779; CHECK-NEXT:    [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
780; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0
781; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1
782; CHECK-NEXT:    ret <2 x i1> [[RES]]
783;
784  %res = fcmp fast ogt <2 x float> %x, %y
785  ret <2 x i1> %res
786}
787declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
788define <2 x float> @f21(<2 x float> %x) {
789; CHECK-LABEL: @f21(
790; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
791; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
792; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
793; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
794; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
795; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
796; CHECK-NEXT:    ret <2 x float> [[RES]]
797;
798  %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
799  ret <2 x float> %res
800}
801declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
802define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
803; CHECK-LABEL: @f22(
804; CHECK-NEXT:    [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
805; CHECK-NEXT:    [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
806; CHECK-NEXT:    [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0
807; CHECK-NEXT:    [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
808; CHECK-NEXT:    [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
809; CHECK-NEXT:    [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
810; CHECK-NEXT:    [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1
811; CHECK-NEXT:    [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
812; CHECK-NEXT:    [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
813; CHECK-NEXT:    [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
814; CHECK-NEXT:    ret <2 x float> [[RES]]
815;
816  %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
817  ret <2 x float> %res
818}
819
820; See https://reviews.llvm.org/D83101#2133062
821define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
822; CHECK-LABEL: @f23_crash(
823; CHECK-NEXT:    [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0
824; CHECK-NEXT:    [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0
825; CHECK-NEXT:    [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1
826; CHECK-NEXT:    ret <2 x i32> [[T1]]
827;
828  %v0 = extractelement <2 x i32> %srcvec, i32 0
829  %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
830  %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
831  ret <2 x i32> %t1
832}
833
834define <2 x i32> @f24(<2 x i32> %src) {
835; CHECK-LABEL: @f24(
836; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
837; CHECK-NEXT:    [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]]
838; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
839; CHECK-NEXT:    [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]]
840; CHECK-NEXT:    [[FRZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[FRZ_I0]], i64 0
841; CHECK-NEXT:    [[FRZ:%.*]] = insertelement <2 x i32> [[FRZ_UPTO0]], i32 [[FRZ_I1]], i64 1
842; CHECK-NEXT:    ret <2 x i32> [[FRZ]]
843;
844  %frz = freeze <2 x i32> %src
845  ret <2 x i32> %frz
846}
847
848define <2 x float> @f25(<2 x float> %src) {
849; CHECK-LABEL: @f25(
850; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0
851; CHECK-NEXT:    [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]]
852; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1
853; CHECK-NEXT:    [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]]
854; CHECK-NEXT:    [[FRZ_I0:%.*]] = freeze float [[ADD_I0]]
855; CHECK-NEXT:    [[FRZ_I1:%.*]] = freeze float [[ADD_I1]]
856; CHECK-NEXT:    [[MUL_I0:%.*]] = fmul float [[FRZ_I0]], [[FRZ_I0]]
857; CHECK-NEXT:    [[MUL_I1:%.*]] = fmul float [[FRZ_I1]], [[FRZ_I1]]
858; CHECK-NEXT:    [[MUL_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MUL_I0]], i64 0
859; CHECK-NEXT:    [[MUL:%.*]] = insertelement <2 x float> [[MUL_UPTO0]], float [[MUL_I1]], i64 1
860; CHECK-NEXT:    ret <2 x float> [[MUL]]
861;
862  %add = fadd <2 x float> %src, %src
863  %frz = freeze <2 x float> %add
864  %mul = fmul <2 x float> %frz, %frz
865  ret <2 x float> %mul
866}
867
868define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) {
869; CHECK-LABEL: @test_copy_trunc_flags(
870; CHECK-NEXT:    [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
871; CHECK-NEXT:    [[TRUNC_I0:%.*]] = trunc nuw nsw i32 [[SRC_I0]] to i8
872; CHECK-NEXT:    [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
873; CHECK-NEXT:    [[TRUNC_I1:%.*]] = trunc nuw nsw i32 [[SRC_I1]] to i8
874; CHECK-NEXT:    [[TRUNC_UPTO0:%.*]] = insertelement <2 x i8> poison, i8 [[TRUNC_I0]], i64 0
875; CHECK-NEXT:    [[TRUNC:%.*]] = insertelement <2 x i8> [[TRUNC_UPTO0]], i8 [[TRUNC_I1]], i64 1
876; CHECK-NEXT:    ret <2 x i8> [[TRUNC]]
877;
878  %trunc = trunc nuw nsw <2 x i32> %src to <2 x i8>
879  ret <2 x i8> %trunc
880}
881
882!0 = !{ !"root" }
883!1 = !{ !"set1", !0 }
884!2 = !{ !"set2", !0 }
885!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
886!4 = !{ float 4.0 }
887!5 = !{ i64 0, i64 8, null }
888!13 = distinct !{}
889