xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
3
4define void @julia_2xdouble(ptr sret([2 x double]), ptr, ptr, ptr) {
5; CHECK-LABEL: @julia_2xdouble(
6; CHECK-NEXT:  top:
7; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, ptr [[TMP2:%.*]], align 4
8; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, ptr [[TMP3:%.*]], align 4
9; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
10; CHECK-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr [[TMP1:%.*]], align 4
11; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
12; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
13; CHECK-NEXT:    [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
14; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
15; CHECK-NEXT:    [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
16; CHECK-NEXT:    store [2 x double] [[I1]], ptr [[TMP0:%.*]], align 4
17; CHECK-NEXT:    ret void
18;
19top:
20  %x0 = load double, ptr %2, align 4
21  %y0 = load double, ptr %3, align 4
22  %m0 = fmul double %x0, %y0
23  %px1 = getelementptr inbounds [2 x double], ptr %2, i64 0, i64 1
24  %x1 = load double, ptr %px1, align 4
25  %py1 = getelementptr inbounds [2 x double], ptr %3, i64 0, i64 1
26  %y1 = load double, ptr %py1, align 4
27  %m1 = fmul double %x1, %y1
28  %z0 = load double, ptr %1, align 4
29  %a0 = fadd double %m0, %z0
30  %i0 = insertvalue [2 x double] undef, double %a0, 0
31  %pz1 = getelementptr inbounds [2 x double], ptr %1, i64 0, i64 1
32  %z1 = load double, ptr %pz1, align 4
33  %a1 = fadd double %m1, %z1
34  %i1 = insertvalue [2 x double] %i0, double %a1, 1
35  store [2 x double] %i1, ptr %0, align 4
36  ret void
37}
38
39define void @julia_4xfloat(ptr sret([4 x float]), ptr, ptr, ptr) {
40; CHECK-LABEL: @julia_4xfloat(
41; CHECK-NEXT:  top:
42; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[TMP2:%.*]], align 4
43; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[TMP3:%.*]], align 4
44; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
45; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 4
46; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
47; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
48; CHECK-NEXT:    [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
49; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
50; CHECK-NEXT:    [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
51; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
52; CHECK-NEXT:    [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
53; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
54; CHECK-NEXT:    [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
55; CHECK-NEXT:    store [4 x float] [[I3]], ptr [[TMP0:%.*]], align 4
56; CHECK-NEXT:    ret void
57;
58top:
59  %x0 = load float, ptr %2, align 4
60  %y0 = load float, ptr %3, align 4
61  %m0 = fmul float %x0, %y0
62  %px1 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 1
63  %x1 = load float, ptr %px1, align 4
64  %py1 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 1
65  %y1 = load float, ptr %py1, align 4
66  %m1 = fmul float %x1, %y1
67  %px2 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 2
68  %x2 = load float, ptr %px2, align 4
69  %py2 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 2
70  %y2 = load float, ptr %py2, align 4
71  %m2 = fmul float %x2, %y2
72  %px3 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 3
73  %x3 = load float, ptr %px3, align 4
74  %py3 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 3
75  %y3 = load float, ptr %py3, align 4
76  %m3 = fmul float %x3, %y3
77  %z0 = load float, ptr %1, align 4
78  %a0 = fadd float %m0, %z0
79  %i0 = insertvalue [4 x float] undef, float %a0, 0
80  %pz1 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 1
81  %z1 = load float, ptr %pz1, align 4
82  %a1 = fadd float %m1, %z1
83  %i1 = insertvalue [4 x float] %i0, float %a1, 1
84  %pz2 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 2
85  %z2 = load float, ptr %pz2, align 4
86  %a2 = fadd float %m2, %z2
87  %i2 = insertvalue [4 x float] %i1, float %a2, 2
88  %pz3 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 3
89  %z3 = load float, ptr %pz3, align 4
90  %a3 = fadd float %m3, %z3
91  %i3 = insertvalue [4 x float] %i2, float %a3, 3
92  store [4 x float] %i3, ptr %0, align 4
93  ret void
94}
95
96define void @julia_load_array_of_float(ptr %a, ptr %b, ptr %c) {
97; CHECK-LABEL: @julia_load_array_of_float(
98; CHECK-NEXT:  top:
99; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
100; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
101; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
102; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
103; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
104; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
105; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
106; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
107; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
108; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
109; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
110; CHECK-NEXT:    store [4 x float] [[C_ARR3]], ptr [[C:%.*]], align 4
111; CHECK-NEXT:    ret void
112;
113top:
114  %a_arr = load [4 x float], ptr %a, align 4
115  %a0 = extractvalue [4 x float] %a_arr, 0
116  %a2 = extractvalue [4 x float] %a_arr, 2
117  %a1 = extractvalue [4 x float] %a_arr, 1
118  %b_arr = load [4 x float], ptr %b, align 4
119  %b0 = extractvalue [4 x float] %b_arr, 0
120  %b2 = extractvalue [4 x float] %b_arr, 2
121  %b1 = extractvalue [4 x float] %b_arr, 1
122  %a3 = extractvalue [4 x float] %a_arr, 3
123  %c1 = fsub float %a1, %b1
124  %b3 = extractvalue [4 x float] %b_arr, 3
125  %c0 = fsub float %a0, %b0
126  %c2 = fsub float %a2, %b2
127  %c_arr0 = insertvalue [4 x float] undef, float %c0, 0
128  %c_arr1 = insertvalue [4 x float] %c_arr0, float %c1, 1
129  %c3 = fsub float %a3, %b3
130  %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2
131  %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3
132  store [4 x float] %c_arr3, ptr %c, align 4
133  ret void
134}
135
136define void @julia_load_array_of_i32(ptr %a, ptr %b, ptr %c) {
137; CHECK-LABEL: @julia_load_array_of_i32(
138; CHECK-NEXT:  top:
139; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4
140; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
141; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
142; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
143; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
144; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
145; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
146; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
147; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
148; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
149; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
150; CHECK-NEXT:    store [4 x i32] [[C_ARR3]], ptr [[C:%.*]], align 4
151; CHECK-NEXT:    ret void
152;
153top:
154  %a_arr = load [4 x i32], ptr %a, align 4
155  %a0 = extractvalue [4 x i32] %a_arr, 0
156  %a2 = extractvalue [4 x i32] %a_arr, 2
157  %a1 = extractvalue [4 x i32] %a_arr, 1
158  %b_arr = load [4 x i32], ptr %b, align 4
159  %b0 = extractvalue [4 x i32] %b_arr, 0
160  %b2 = extractvalue [4 x i32] %b_arr, 2
161  %b1 = extractvalue [4 x i32] %b_arr, 1
162  %a3 = extractvalue [4 x i32] %a_arr, 3
163  %c1 = sub i32 %a1, %b1
164  %b3 = extractvalue [4 x i32] %b_arr, 3
165  %c0 = sub i32 %a0, %b0
166  %c2 = sub i32 %a2, %b2
167  %c_arr0 = insertvalue [4 x i32] undef, i32 %c0, 0
168  %c_arr1 = insertvalue [4 x i32] %c_arr0, i32 %c1, 1
169  %c3 = sub i32 %a3, %b3
170  %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2
171  %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3
172  store [4 x i32] %c_arr3, ptr %c, align 4
173  ret void
174}
175
176; Almost identical to previous test, but for type that should NOT be vectorized.
177;
178define void @julia_load_array_of_i16(ptr %a, ptr %b, ptr %c) {
179; CHECK-LABEL: @julia_load_array_of_i16(
180; CHECK-NEXT:  top:
181; CHECK-NEXT:    [[A_ARR:%.*]] = load [4 x i16], ptr [[A:%.*]], align 4
182; CHECK-NEXT:    [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
183; CHECK-NEXT:    [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
184; CHECK-NEXT:    [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
185; CHECK-NEXT:    [[B_ARR:%.*]] = load [4 x i16], ptr [[B:%.*]], align 4
186; CHECK-NEXT:    [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
187; CHECK-NEXT:    [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
188; CHECK-NEXT:    [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
189; CHECK-NEXT:    [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
190; CHECK-NEXT:    [[C1:%.*]] = sub i16 [[A1]], [[B1]]
191; CHECK-NEXT:    [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
192; CHECK-NEXT:    [[C0:%.*]] = sub i16 [[A0]], [[B0]]
193; CHECK-NEXT:    [[C2:%.*]] = sub i16 [[A2]], [[B2]]
194; CHECK-NEXT:    [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
195; CHECK-NEXT:    [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
196; CHECK-NEXT:    [[C3:%.*]] = sub i16 [[A3]], [[B3]]
197; CHECK-NEXT:    [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
198; CHECK-NEXT:    [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
199; CHECK-NEXT:    store [4 x i16] [[C_ARR3]], ptr [[C:%.*]], align 4
200; CHECK-NEXT:    ret void
201;
202top:
203  %a_arr = load [4 x i16], ptr %a, align 4
204  %a0 = extractvalue [4 x i16] %a_arr, 0
205  %a2 = extractvalue [4 x i16] %a_arr, 2
206  %a1 = extractvalue [4 x i16] %a_arr, 1
207  %b_arr = load [4 x i16], ptr %b, align 4
208  %b0 = extractvalue [4 x i16] %b_arr, 0
209  %b2 = extractvalue [4 x i16] %b_arr, 2
210  %b1 = extractvalue [4 x i16] %b_arr, 1
211  %a3 = extractvalue [4 x i16] %a_arr, 3
212  %c1 = sub i16 %a1, %b1
213  %b3 = extractvalue [4 x i16] %b_arr, 3
214  %c0 = sub i16 %a0, %b0
215  %c2 = sub i16 %a2, %b2
216  %c_arr0 = insertvalue [4 x i16] undef, i16 %c0, 0
217  %c_arr1 = insertvalue [4 x i16] %c_arr0, i16 %c1, 1
218  %c3 = sub i16 %a3, %b3
219  %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2
220  %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3
221  store [4 x i16] %c_arr3, ptr %c, align 4
222  ret void
223}
224
225%pseudovec = type { float, float, float, float }
226
227define void @julia_load_struct_of_float(ptr %a, ptr %b, ptr %c) {
228; CHECK-LABEL: @julia_load_struct_of_float(
229; CHECK-NEXT:  top:
230; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
231; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
232; CHECK-NEXT:    [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
233; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
234; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0
235; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
236; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1
237; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
238; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2
239; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
240; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3
241; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], ptr [[C:%.*]], align 4
242; CHECK-NEXT:    ret void
243;
244top:
245  %a_struct = load %pseudovec, ptr %a, align 4
246  %a0 = extractvalue %pseudovec %a_struct, 0
247  %a1 = extractvalue %pseudovec %a_struct, 1
248  %b_struct = load %pseudovec, ptr %b, align 4
249  %a2 = extractvalue %pseudovec %a_struct, 2
250  %b0 = extractvalue %pseudovec %b_struct, 0
251  %a3 = extractvalue %pseudovec %a_struct, 3
252  %c0 = fsub float %a0, %b0
253  %b1 = extractvalue %pseudovec %b_struct, 1
254  %b2 = extractvalue %pseudovec %b_struct, 2
255  %c1 = fsub float %a1, %b1
256  %c_struct0 = insertvalue %pseudovec undef, float %c0, 0
257  %b3 = extractvalue %pseudovec %b_struct, 3
258  %c3 = fsub float %a3, %b3
259  %c_struct1 = insertvalue %pseudovec %c_struct0, float %c1, 1
260  %c2 = fsub float %a2, %b2
261  %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2
262  %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3
263  store %pseudovec %c_struct3, ptr %c, align 4
264  ret void
265}
266