xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
3; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
4
5%struct.zot = type { i32, i32, i32 }
6
7define i1 @reorder_results(ptr %arg, i1 %arg1, ptr %arg2, i64 %arg3, ptr %arg4) {
8; CHECK-LABEL: define i1 @reorder_results(
9; CHECK-SAME: ptr [[ARG:%.*]], i1 [[ARG1:%.*]], ptr [[ARG2:%.*]], i64 [[ARG3:%.*]], ptr [[ARG4:%.*]]) {
10; CHECK-NEXT:  bb:
11; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[ARG4]], align 8
12; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[LOAD]], align 4
13; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[LOAD]], i64 4
14; CHECK-NEXT:    [[LOAD5:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4
15; CHECK-NEXT:    [[GETELEMENTPTR6:%.*]] = getelementptr i8, ptr [[LOAD]], i64 8
16; CHECK-NEXT:    [[LOAD7:%.*]] = load i32, ptr [[GETELEMENTPTR6]], align 4
17; CHECK-NEXT:    br i1 [[ARG1]], label [[BB12:%.*]], label [[BB9:%.*]]
18; CHECK:       bb8:
19; CHECK-NEXT:    ret i1 false
20; CHECK:       bb9:
21; CHECK-NEXT:    [[FREEZE:%.*]] = freeze ptr [[ARG]]
22; CHECK-NEXT:    store i32 [[LOAD4]], ptr [[FREEZE]], align 4
23; CHECK-NEXT:    [[GETELEMENTPTR10:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 4
24; CHECK-NEXT:    store i32 [[LOAD7]], ptr [[GETELEMENTPTR10]], align 4
25; CHECK-NEXT:    [[GETELEMENTPTR11:%.*]] = getelementptr i8, ptr [[FREEZE]], i64 8
26; CHECK-NEXT:    store i32 [[LOAD5]], ptr [[GETELEMENTPTR11]], align 4
27; CHECK-NEXT:    br label [[BB8:%.*]]
28; CHECK:       bb12:
29; CHECK-NEXT:    [[GETELEMENTPTR13:%.*]] = getelementptr [[STRUCT_ZOT:%.*]], ptr [[ARG2]], i64 [[ARG3]]
30; CHECK-NEXT:    store i32 [[LOAD4]], ptr [[GETELEMENTPTR13]], align 4
31; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 4
32; CHECK-NEXT:    store i32 [[LOAD7]], ptr [[GETELEMENTPTR14]], align 4
33; CHECK-NEXT:    [[GETELEMENTPTR15:%.*]] = getelementptr i8, ptr [[GETELEMENTPTR13]], i64 8
34; CHECK-NEXT:    store i32 [[LOAD5]], ptr [[GETELEMENTPTR15]], align 4
35; CHECK-NEXT:    br label [[BB8]]
36;
37bb:
38  %load = load ptr, ptr %arg4, align 8
39  %load4 = load i32, ptr %load, align 4
40  %getelementptr = getelementptr i8, ptr %load, i64 4
41  %load5 = load i32, ptr %getelementptr, align 4
42  %getelementptr6 = getelementptr i8, ptr %load, i64 8
43  %load7 = load i32, ptr %getelementptr6, align 4
44  br i1 %arg1, label %bb12, label %bb9
45
46bb8:                                              ; preds = %bb12, %bb9
47  ret i1 false
48
49bb9:                                              ; preds = %bb
50  %freeze = freeze ptr %arg
51  store i32 %load4, ptr %freeze, align 4
52  %getelementptr10 = getelementptr i8, ptr %freeze, i64 4
53  store i32 %load7, ptr %getelementptr10, align 4
54  %getelementptr11 = getelementptr i8, ptr %freeze, i64 8
55  store i32 %load5, ptr %getelementptr11, align 4
56  br label %bb8
57
58bb12:                                             ; preds = %bb
59  %getelementptr13 = getelementptr %struct.zot, ptr %arg2, i64 %arg3
60  store i32 %load4, ptr %getelementptr13, align 4
61  %getelementptr14 = getelementptr i8, ptr %getelementptr13, i64 4
62  store i32 %load7, ptr %getelementptr14, align 4
63  %getelementptr15 = getelementptr i8, ptr %getelementptr13, i64 8
64  store i32 %load5, ptr %getelementptr15, align 4
65  br label %bb8
66}
67
68define void @extract_mask(ptr %object, double %conv503, double %conv520) {
69; CHECK-LABEL: define void @extract_mask(
70; CHECK-SAME: ptr [[OBJECT:%.*]], double [[CONV503:%.*]], double [[CONV520:%.*]]) {
71; CHECK-NEXT:  entry:
72; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[OBJECT]], align 8
73; CHECK-NEXT:    [[BBOX483:%.*]] = getelementptr float, ptr [[TMP0]]
74; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[BBOX483]], align 8
75; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
76; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
77; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[CONV503]], i32 0
78; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <2 x double> [[TMP4]], <double 0.000000e+00, double -2.000000e+10>
79; CHECK-NEXT:    [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> [[TMP3]], <2 x double> <double 0.000000e+00, double -2.000000e+10>
80; CHECK-NEXT:    [[TMP7:%.*]] = fsub <2 x double> zeroinitializer, [[TMP6]]
81; CHECK-NEXT:    [[TMP8:%.*]] = fptrunc <2 x double> [[TMP7]] to <2 x float>
82; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
83; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
84; CHECK-NEXT:    [[MUL646:%.*]] = fmul float [[TMP9]], [[TMP10]]
85; CHECK-NEXT:    [[CMP663:%.*]] = fcmp olt float [[MUL646]], 0.000000e+00
86; CHECK-NEXT:    br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END668:%.*]]
87; CHECK:       if.then665:
88; CHECK-NEXT:    [[ARRAYIDX656:%.*]] = getelementptr float, ptr [[OBJECT]], i64 10
89; CHECK-NEXT:    [[BBOX651:%.*]] = getelementptr float, ptr [[OBJECT]]
90; CHECK-NEXT:    [[CONV621:%.*]] = fptrunc double [[CONV520]] to float
91; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
92; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[CONV503]], i32 0
93; CHECK-NEXT:    [[TMP13:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
94; CHECK-NEXT:    store <2 x float> [[TMP13]], ptr [[BBOX651]], align 8
95; CHECK-NEXT:    [[BBOX_SROA_8_0_BBOX666_SROA_IDX:%.*]] = getelementptr float, ptr [[OBJECT]], i64 2
96; CHECK-NEXT:    store float [[CONV621]], ptr [[BBOX_SROA_8_0_BBOX666_SROA_IDX]], align 8
97; CHECK-NEXT:    store <2 x float> [[TMP8]], ptr [[ARRAYIDX656]], align 8
98; CHECK-NEXT:    br label [[IF_END668]]
99; CHECK:       if.end668:
100; CHECK-NEXT:    ret void
101;
102entry:
103  %0 = load ptr, ptr %object, align 8
104  %bbox483 = getelementptr float, ptr %0
105  %1 = load float, ptr %bbox483, align 8
106  %conv486 = fpext float %1 to double
107  %cmp487 = fcmp ogt double %conv486, -2.000000e+10
108  %conv486.2 = select i1 %cmp487, double %conv486, double -2.000000e+10
109  %arrayidx502 = getelementptr float, ptr %0, i64 1
110  %2 = load float, ptr %arrayidx502, align 4
111  %conv5033 = fpext float %2 to double
112  %cmp504 = fcmp ogt double %conv503, 0.000000e+00
113  %cond514 = select i1 %cmp504, double %conv5033, double 0.000000e+00
114  %sub626 = fsub double 0.000000e+00, %conv486.2
115  %conv627 = fptrunc double %sub626 to float
116  %sub632 = fsub double 0.000000e+00, %cond514
117  %conv633 = fptrunc double %sub632 to float
118  %mul646 = fmul float %conv633, %conv627
119  %cmp663 = fcmp olt float %mul646, 0.000000e+00
120  br i1 %cmp663, label %if.then665, label %if.end668
121
122if.then665:                                       ; preds = %entry
123  %arrayidx656 = getelementptr float, ptr %object, i64 10
124  %lengths652 = getelementptr float, ptr %object, i64 11
125  %bbox651 = getelementptr float, ptr %object
126  %conv621 = fptrunc double %conv520 to float
127  %conv617 = fptrunc double %cond514 to float
128  %conv613 = fptrunc double %conv503 to float
129  store float %conv613, ptr %bbox651, align 8
130  %bbox.sroa.6.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 1
131  store float %conv617, ptr %bbox.sroa.6.0.bbox666.sroa_idx, align 4
132  %bbox.sroa.8.0.bbox666.sroa_idx = getelementptr float, ptr %object, i64 2
133  store float %conv621, ptr %bbox.sroa.8.0.bbox666.sroa_idx, align 8
134  store float %conv627, ptr %lengths652, align 4
135  store float %conv633, ptr %arrayidx656, align 8
136  br label %if.end668
137
138if.end668:                                        ; preds = %if.then665, %entry
139  ret void
140}
141
142define void @gather_2(ptr %mat1, float %0, float %1) {
143; NON-POW2-LABEL: define void @gather_2(
144; NON-POW2-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
145; NON-POW2-NEXT:  entry:
146; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0]], i32 0
147; NON-POW2-NEXT:    [[TMP3:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <3 x i32> zeroinitializer
148; NON-POW2-NEXT:    [[TMP4:%.*]] = insertelement <3 x float> <float 0.000000e+00, float poison, float poison>, float [[TMP1]], i32 1
149; NON-POW2-NEXT:    [[TMP5:%.*]] = shufflevector <3 x float> [[TMP4]], <3 x float> poison, <3 x i32> <i32 0, i32 1, i32 1>
150; NON-POW2-NEXT:    [[TMP6:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP3]], <3 x float> [[TMP5]], <3 x float> zeroinitializer)
151; NON-POW2-NEXT:    [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1
152; NON-POW2-NEXT:    [[TMP7:%.*]] = fmul <3 x float> [[TMP6]], zeroinitializer
153; NON-POW2-NEXT:    store <3 x float> [[TMP7]], ptr [[ARRAYIDX163]], align 4
154; NON-POW2-NEXT:    ret void
155;
156; POW2-ONLY-LABEL: define void @gather_2(
157; POW2-ONLY-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
158; POW2-ONLY-NEXT:  entry:
159; POW2-ONLY-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
160; POW2-ONLY-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
161; POW2-ONLY-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP1]], i32 1
162; POW2-ONLY-NEXT:    [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x float> zeroinitializer)
163; POW2-ONLY-NEXT:    [[TMP6:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float 0.000000e+00)
164; POW2-ONLY-NEXT:    [[TMP7:%.*]] = fmul float [[TMP6]], 0.000000e+00
165; POW2-ONLY-NEXT:    [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1
166; POW2-ONLY-NEXT:    [[ARRAYIDX5_I_I_I280:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 2
167; POW2-ONLY-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
168; POW2-ONLY-NEXT:    store <2 x float> [[TMP8]], ptr [[ARRAYIDX163]], align 4
169; POW2-ONLY-NEXT:    store float [[TMP7]], ptr [[ARRAYIDX5_I_I_I280]], align 4
170; POW2-ONLY-NEXT:    ret void
171;
172entry:
173  %2 = call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00)
174  %3 = call float @llvm.fmuladd.f32(float %1, float %0, float 0.000000e+00)
175  %4 = call float @llvm.fmuladd.f32(float %0, float %1, float 0.000000e+00)
176  %5 = fmul float %2, 0.000000e+00
177  %6 = fmul float %3, 0.000000e+00
178  %7 = fmul float %4, 0.000000e+00
179  %arrayidx163 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1
180  %arrayidx2.i.i.i278 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 1
181  %arrayidx5.i.i.i280 = getelementptr [4 x [4 x float]], ptr %mat1, i64 0, i64 1, i64 2
182  store float %5, ptr %arrayidx163, align 4
183  store float %6, ptr %arrayidx2.i.i.i278, align 4
184  store float %7, ptr %arrayidx5.i.i.i280, align 4
185  ret void
186}
187
188define i32 @reorder_indices_1(float %0) {
189; NON-POW2-LABEL: define i32 @reorder_indices_1(
190; NON-POW2-SAME: float [[TMP0:%.*]]) {
191; NON-POW2-NEXT:  entry:
192; NON-POW2-NEXT:    [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
193; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[NOR1]], align 4
194; NON-POW2-NEXT:    [[TMP3:%.*]] = fneg <3 x float> [[TMP1]]
195; NON-POW2-NEXT:    [[TMP4:%.*]] = insertelement <3 x float> poison, float [[TMP0]], i32 0
196; NON-POW2-NEXT:    [[TMP5:%.*]] = shufflevector <3 x float> [[TMP4]], <3 x float> poison, <3 x i32> zeroinitializer
197; NON-POW2-NEXT:    [[TMP6:%.*]] = fmul <3 x float> [[TMP3]], [[TMP5]]
198; NON-POW2-NEXT:    [[TMP10:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
199; NON-POW2-NEXT:    [[TMP7:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> zeroinitializer, <3 x float> [[TMP10]])
200; NON-POW2-NEXT:    [[TMP8:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP5]], <3 x float> [[TMP7]], <3 x float> zeroinitializer)
201; NON-POW2-NEXT:    [[TMP9:%.*]] = fmul <3 x float> [[TMP8]], zeroinitializer
202; NON-POW2-NEXT:    store <3 x float> [[TMP9]], ptr [[NOR1]], align 4
203; NON-POW2-NEXT:    ret i32 0
204;
205; POW2-ONLY-LABEL: define i32 @reorder_indices_1(
206; POW2-ONLY-SAME: float [[TMP0:%.*]]) {
207; POW2-ONLY-NEXT:  entry:
208; POW2-ONLY-NEXT:    [[NOR1:%.*]] = alloca [0 x [3 x float]], i32 0, align 4
209; POW2-ONLY-NEXT:    [[ARRAYIDX2_I265:%.*]] = getelementptr float, ptr [[NOR1]], i64 2
210; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2_I265]], align 4
211; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[NOR1]], align 4
212; POW2-ONLY-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
213; POW2-ONLY-NEXT:    [[TMP4:%.*]] = fneg float [[TMP3]]
214; POW2-ONLY-NEXT:    [[NEG11_I:%.*]] = fmul float [[TMP4]], [[TMP0]]
215; POW2-ONLY-NEXT:    [[TMP5:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float 0.000000e+00, float [[NEG11_I]])
216; POW2-ONLY-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
217; POW2-ONLY-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1
218; POW2-ONLY-NEXT:    [[TMP8:%.*]] = fneg <2 x float> [[TMP7]]
219; POW2-ONLY-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
220; POW2-ONLY-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <2 x i32> zeroinitializer
221; POW2-ONLY-NEXT:    [[TMP11:%.*]] = fmul <2 x float> [[TMP8]], [[TMP10]]
222; POW2-ONLY-NEXT:    [[TMP12:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> zeroinitializer, <2 x float> [[TMP11]])
223; POW2-ONLY-NEXT:    [[TMP13:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP10]], <2 x float> [[TMP12]], <2 x float> zeroinitializer)
224; POW2-ONLY-NEXT:    [[TMP14:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP5]], float 0.000000e+00)
225; POW2-ONLY-NEXT:    [[TMP15:%.*]] = fmul <2 x float> [[TMP13]], zeroinitializer
226; POW2-ONLY-NEXT:    [[MUL6_I_I_I:%.*]] = fmul float [[TMP14]], 0.000000e+00
227; POW2-ONLY-NEXT:    store <2 x float> [[TMP15]], ptr [[NOR1]], align 4
228; POW2-ONLY-NEXT:    store float [[MUL6_I_I_I]], ptr [[ARRAYIDX2_I265]], align 4
229; POW2-ONLY-NEXT:    ret i32 0
230;
231entry:
232  %nor1 = alloca [0 x [3 x float]], i32 0, align 4
233  %arrayidx.i = getelementptr float, ptr %nor1, i64 1
234  %1 = load float, ptr %arrayidx.i, align 4
235  %arrayidx2.i265 = getelementptr float, ptr %nor1, i64 2
236  %2 = load float, ptr %arrayidx2.i265, align 4
237  %3 = fneg float %2
238  %neg.i267 = fmul float %3, %0
239  %4 = call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float %neg.i267)
240  %5 = load float, ptr %nor1, align 4
241  %6 = fneg float %5
242  %neg11.i = fmul float %6, %0
243  %7 = call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float %neg11.i)
244  %8 = fneg float %1
245  %neg18.i = fmul float %8, %0
246  %9 = call float @llvm.fmuladd.f32(float %5, float 0.000000e+00, float %neg18.i)
247  %10 = call float @llvm.fmuladd.f32(float %0, float %9, float 0.000000e+00)
248  %11 = call float @llvm.fmuladd.f32(float %0, float %4, float 0.000000e+00)
249  %12 = call float @llvm.fmuladd.f32(float %0, float %7, float 0.000000e+00)
250  %mul.i.i.i = fmul float %10, 0.000000e+00
251  %mul3.i.i.i = fmul float %11, 0.000000e+00
252  %mul6.i.i.i = fmul float %12, 0.000000e+00
253  store float %mul.i.i.i, ptr %nor1, align 4
254  store float %mul3.i.i.i, ptr %arrayidx.i, align 4
255  store float %mul6.i.i.i, ptr %arrayidx2.i265, align 4
256  ret i32 0
257}
258
259define void @reorder_indices_2(ptr %spoint) {
260; NON-POW2-LABEL: define void @reorder_indices_2(
261; NON-POW2-SAME: ptr [[SPOINT:%.*]]) {
262; NON-POW2-NEXT:  entry:
263; NON-POW2-NEXT:    [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0
264; NON-POW2-NEXT:    [[TMP0:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> zeroinitializer, <3 x float> zeroinitializer, <3 x float> zeroinitializer)
265; NON-POW2-NEXT:    [[TMP1:%.*]] = fmul <3 x float> [[TMP0]], zeroinitializer
266; NON-POW2-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
267; NON-POW2-NEXT:    store <3 x float> [[TMP2]], ptr [[DSCO]], align 4
268; NON-POW2-NEXT:    ret void
269;
270; POW2-ONLY-LABEL: define void @reorder_indices_2(
271; POW2-ONLY-SAME: ptr [[SPOINT:%.*]]) {
272; POW2-ONLY-NEXT:  entry:
273; POW2-ONLY-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> zeroinitializer, i64 0
274; POW2-ONLY-NEXT:    [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00)
275; POW2-ONLY-NEXT:    [[MUL4_I461:%.*]] = fmul float [[TMP1]], 0.000000e+00
276; POW2-ONLY-NEXT:    [[DSCO:%.*]] = getelementptr float, ptr [[SPOINT]], i64 0
277; POW2-ONLY-NEXT:    [[TMP2:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer)
278; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], zeroinitializer
279; POW2-ONLY-NEXT:    store <2 x float> [[TMP3]], ptr [[DSCO]], align 4
280; POW2-ONLY-NEXT:    [[ARRAYIDX5_I476:%.*]] = getelementptr float, ptr [[SPOINT]], i64 2
281; POW2-ONLY-NEXT:    store float [[MUL4_I461]], ptr [[ARRAYIDX5_I476]], align 4
282; POW2-ONLY-NEXT:    ret void
283;
284entry:
285  %0 = extractelement <3 x float> zeroinitializer, i64 1
286  %1 = extractelement <3 x float> zeroinitializer, i64 2
287  %2 = extractelement <3 x float> zeroinitializer, i64 0
288  %3 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float 0.000000e+00)
289  %4 = tail call float @llvm.fmuladd.f32(float %1, float 0.000000e+00, float 0.000000e+00)
290  %5 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00)
291  %mul.i457 = fmul float %3, 0.000000e+00
292  %mul2.i459 = fmul float %4, 0.000000e+00
293  %mul4.i461 = fmul float %5, 0.000000e+00
294  %dsco = getelementptr float, ptr %spoint, i64 0
295  store float %mul.i457, ptr %dsco, align 4
296  %arrayidx3.i474 = getelementptr float, ptr %spoint, i64 1
297  store float %mul2.i459, ptr %arrayidx3.i474, align 4
298  %arrayidx5.i476 = getelementptr float, ptr %spoint, i64 2
299  store float %mul4.i461, ptr %arrayidx5.i476, align 4
300  ret void
301}
302
303define void @reorder_indices_2x_load(ptr %png_ptr, ptr %info_ptr) {
304; CHECK-LABEL: define void @reorder_indices_2x_load(
305; CHECK-SAME: ptr [[PNG_PTR:%.*]], ptr [[INFO_PTR:%.*]]) {
306; CHECK-NEXT:  entry:
307; CHECK-NEXT:    [[BIT_DEPTH:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 0
308; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[BIT_DEPTH]], align 4
309; CHECK-NEXT:    [[COLOR_TYPE:%.*]] = getelementptr i8, ptr [[INFO_PTR]], i64 1
310; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[COLOR_TYPE]], align 1
311; CHECK-NEXT:    [[BIT_DEPTH37_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 11
312; CHECK-NEXT:    store i8 [[TMP0]], ptr [[BIT_DEPTH37_I]], align 1
313; CHECK-NEXT:    [[COLOR_TYPE39_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 10
314; CHECK-NEXT:    store i8 [[TMP1]], ptr [[COLOR_TYPE39_I]], align 2
315; CHECK-NEXT:    [[USR_BIT_DEPTH_I:%.*]] = getelementptr i8, ptr [[PNG_PTR]], i64 12
316; CHECK-NEXT:    store i8 [[TMP0]], ptr [[USR_BIT_DEPTH_I]], align 8
317; CHECK-NEXT:    ret void
318;
319entry:
320  %bit_depth = getelementptr i8, ptr %info_ptr, i64 0
321  %0 = load i8, ptr %bit_depth, align 4
322  %color_type = getelementptr i8, ptr %info_ptr, i64 1
323  %1 = load i8, ptr %color_type, align 1
324  %bit_depth37.i = getelementptr i8, ptr %png_ptr, i64 11
325  store i8 %0, ptr %bit_depth37.i, align 1
326  %color_type39.i = getelementptr i8, ptr %png_ptr, i64 10
327  store i8 %1, ptr %color_type39.i, align 2
328  %usr_bit_depth.i = getelementptr i8, ptr %png_ptr, i64 12
329  store i8 %0, ptr %usr_bit_depth.i, align 8
330  ret void
331}
332
333define void @reuse_shuffle_indidces_1(ptr %col, float %0, float %1) {
334; NON-POW2-LABEL: define void @reuse_shuffle_indidces_1(
335; NON-POW2-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
336; NON-POW2-NEXT:  entry:
337; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP1]], i32 0
338; NON-POW2-NEXT:    [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[TMP0]], i32 1
339; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> <i32 0, i32 1, i32 1>
340; NON-POW2-NEXT:    [[TMP5:%.*]] = fmul <3 x float> [[TMP4]], zeroinitializer
341; NON-POW2-NEXT:    [[TMP6:%.*]] = fadd <3 x float> [[TMP5]], zeroinitializer
342; NON-POW2-NEXT:    store <3 x float> [[TMP6]], ptr [[COL]], align 4
343; NON-POW2-NEXT:    ret void
344;
345; POW2-ONLY-LABEL: define void @reuse_shuffle_indidces_1(
346; POW2-ONLY-SAME: ptr [[COL:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) {
347; POW2-ONLY-NEXT:  entry:
348; POW2-ONLY-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
349; POW2-ONLY-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1
350; POW2-ONLY-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
351; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], zeroinitializer
352; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[COL]], align 4
353; POW2-ONLY-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr float, ptr [[COL]], i64 2
354; POW2-ONLY-NEXT:    [[MUL38:%.*]] = fmul float [[TMP0]], 0.000000e+00
355; POW2-ONLY-NEXT:    [[TMP6:%.*]] = fadd float [[MUL38]], 0.000000e+00
356; POW2-ONLY-NEXT:    store float [[TMP6]], ptr [[ARRAYIDX33]], align 4
357; POW2-ONLY-NEXT:    ret void
358;
359entry:
360  %mul24 = fmul float %1, 0.000000e+00
361  %2 = fadd float %mul24, 0.000000e+00
362  store float %2, ptr %col, align 4
363  %arrayidx26 = getelementptr float, ptr %col, i64 1
364  %mul31 = fmul float %0, 0.000000e+00
365  %3 = fadd float %mul31, 0.000000e+00
366  store float %3, ptr %arrayidx26, align 4
367  %arrayidx33 = getelementptr float, ptr %col, i64 2
368  %mul38 = fmul float %0, 0.000000e+00
369  %4 = fadd float %mul38, 0.000000e+00
370  store float %4, ptr %arrayidx33, align 4
371  ret void
372}
373
374define void @reuse_shuffle_indices_2(ptr %inertia, double %0) {
375; CHECK-LABEL: define void @reuse_shuffle_indices_2(
376; CHECK-SAME: ptr [[INERTIA:%.*]], double [[TMP0:%.*]]) {
377; CHECK-NEXT:  entry:
378; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
379; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
380; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
381; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
382; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
383; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
384; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[TMP6]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
385; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
386; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
387; CHECK-NEXT:    store <3 x float> [[TMP9]], ptr [[INERTIA]], align 4
388; CHECK-NEXT:    ret void
389;
390entry:
391  %1 = insertelement <2 x double> poison, double %0, i32 0
392  %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer
393  %3 = fptrunc <2 x double> %2 to <2 x float>
394  %4 = fmul <2 x float> %3, zeroinitializer
395  %5 = shufflevector <2 x float> %4, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 poison>
396  %6 = fadd <4 x float> %5, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
397  %7 = fmul <4 x float> %6, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
398  %8 = fadd <4 x float> %7, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>
399  %9 = shufflevector <4 x float> %8, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
400  store <3 x float> %9, ptr %inertia, align 4
401  ret void
402}
403
404define void @reuse_shuffle_indices_cost_crash_2(ptr %bezt, float %0) {
405; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_2(
406; CHECK-SAME: ptr [[BEZT:%.*]], float [[TMP0:%.*]]) {
407; CHECK-NEXT:  entry:
408; CHECK-NEXT:    [[FNEG:%.*]] = fmul float [[TMP0]], 0.000000e+00
409; CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00)
410; CHECK-NEXT:    store float [[TMP1]], ptr [[BEZT]], align 4
411; CHECK-NEXT:    [[ARRAYIDX5_I:%.*]] = getelementptr float, ptr [[BEZT]], i64 1
412; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
413; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[FNEG]], i32 0
414; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> zeroinitializer
415; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> [[TMP4]], <2 x float> zeroinitializer)
416; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[ARRAYIDX5_I]], align 4
417; CHECK-NEXT:    ret void
418;
419entry:
420  %fneg = fmul float %0, 0.000000e+00
421  %1 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00)
422  store float %1, ptr %bezt, align 4
423  %2 = tail call float @llvm.fmuladd.f32(float %0, float %fneg, float 0.000000e+00)
424  %arrayidx5.i = getelementptr float, ptr %bezt, i64 1
425  store float %2, ptr %arrayidx5.i, align 4
426  %3 = tail call float @llvm.fmuladd.f32(float %fneg, float 0.000000e+00, float 0.000000e+00)
427  %arrayidx8.i831 = getelementptr float, ptr %bezt, i64 2
428  store float %3, ptr %arrayidx8.i831, align 4
429  ret void
430}
431
432define void @reuse_shuffle_indices_cost_crash_3(ptr %m, double %conv, double %conv2) {
433; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_3(
434; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) {
435; CHECK-NEXT:  entry:
436; CHECK-NEXT:    [[SUB19:%.*]] = fsub double 0.000000e+00, [[CONV2]]
437; CHECK-NEXT:    [[CONV20:%.*]] = fptrunc double [[SUB19]] to float
438; CHECK-NEXT:    store float [[CONV20]], ptr [[M]], align 4
439; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV]], 0.000000e+00
440; CHECK-NEXT:    [[CONV239:%.*]] = fptrunc double [[ADD]] to float
441; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1
442; CHECK-NEXT:    store float [[CONV239]], ptr [[ARRAYIDX25]], align 4
443; CHECK-NEXT:    [[ADD26:%.*]] = fsub double [[CONV]], [[CONV]]
444; CHECK-NEXT:    [[CONV27:%.*]] = fptrunc double [[ADD26]] to float
445; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2
446; CHECK-NEXT:    store float [[CONV27]], ptr [[ARRAYIDX29]], align 4
447; CHECK-NEXT:    ret void
448;
449entry:
450  %sub19 = fsub double 0.000000e+00, %conv2
451  %conv20 = fptrunc double %sub19 to float
452  store float %conv20, ptr %m, align 4
453  %add = fadd double %conv, 0.000000e+00
454  %conv239 = fptrunc double %add to float
455  %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1
456  store float %conv239, ptr %arrayidx25, align 4
457  %add26 = fsub double %conv, %conv
458  %conv27 = fptrunc double %add26 to float
459  %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2
460  store float %conv27, ptr %arrayidx29, align 4
461  ret void
462}
463
464define void @reuse_shuffle_indices_cost_crash_4(double %conv7.i) {
465; CHECK-LABEL: define void @reuse_shuffle_indices_cost_crash_4(
466; CHECK-SAME: double [[CONV7_I:%.*]]) {
467; CHECK-NEXT:  entry:
468; CHECK-NEXT:    [[DATA_I111:%.*]] = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4
469; CHECK-NEXT:    [[ARRAYIDX_2_I:%.*]] = getelementptr [3 x float], ptr [[DATA_I111]], i64 0, i64 2
470; CHECK-NEXT:    [[MUL17_I_US:%.*]] = fmul double [[CONV7_I]], 0.000000e+00
471; CHECK-NEXT:    [[MUL_2_I_I_US:%.*]] = fmul double [[MUL17_I_US]], 0.000000e+00
472; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV7_I]], i32 0
473; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer
474; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer
475; CHECK-NEXT:    [[ADD_2_I_I_US:%.*]] = fadd double [[MUL_2_I_I_US]], 0.000000e+00
476; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]]
477; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], zeroinitializer
478; CHECK-NEXT:    [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
479; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[DATA_I111]], align 4
480; CHECK-NEXT:    [[CONV_2_I46_US:%.*]] = fptrunc double [[ADD_2_I_I_US]] to float
481; CHECK-NEXT:    store float [[CONV_2_I46_US]], ptr [[ARRAYIDX_2_I]], align 4
482; CHECK-NEXT:    [[CALL2_I_US:%.*]] = load volatile ptr, ptr [[DATA_I111]], align 8
483; CHECK-NEXT:    ret void
484;
485entry:
486  %data.i111 = alloca [0 x [0 x [0 x [3 x float]]]], i32 0, align 4
487  %arrayidx.1.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 1
488  %arrayidx.2.i = getelementptr [3 x float], ptr %data.i111, i64 0, i64 2
489  %mul17.i.us = fmul double %conv7.i, 0.000000e+00
490  %mul.2.i.i.us = fmul double %mul17.i.us, 0.000000e+00
491  %add.i.i82.i.us = fadd double %conv7.i, 0.000000e+00
492  %add.1.i.i84.i.us = fadd double %conv7.i, 0.000000e+00
493  %mul.i.i91.i.us = fmul double %add.i.i82.i.us, %conv7.i
494  %mul.1.i.i92.i.us = fmul double %add.1.i.i84.i.us, %conv7.i
495  %add.i96.i.us = fadd double %mul.i.i91.i.us, 0.000000e+00
496  %add.1.i.i.us = fadd double %mul.1.i.i92.i.us, 0.000000e+00
497  %add.2.i.i.us = fadd double %mul.2.i.i.us, 0.000000e+00
498  %conv.i42.us = fptrunc double %add.i96.i.us to float
499  store float %conv.i42.us, ptr %data.i111, align 4
500  %conv.1.i44.us = fptrunc double %add.1.i.i.us to float
501  store float %conv.1.i44.us, ptr %arrayidx.1.i, align 4
502  %conv.2.i46.us = fptrunc double %add.2.i.i.us to float
503  store float %conv.2.i46.us, ptr %arrayidx.2.i, align 4
504  %call2.i.us = load volatile ptr, ptr %data.i111, align 8
505  ret void
506}
507
508define void @common_mask(ptr %m, double %conv, double %conv2) {
509; CHECK-LABEL: define void @common_mask(
510; CHECK-SAME: ptr [[M:%.*]], double [[CONV:%.*]], double [[CONV2:%.*]]) {
511; CHECK-NEXT:  entry:
512; CHECK-NEXT:    [[SUB19:%.*]] = fsub double [[CONV]], [[CONV]]
513; CHECK-NEXT:    [[CONV20:%.*]] = fptrunc double [[SUB19]] to float
514; CHECK-NEXT:    store float [[CONV20]], ptr [[M]], align 4
515; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[CONV2]], 0.000000e+00
516; CHECK-NEXT:    [[CONV239:%.*]] = fptrunc double [[ADD]] to float
517; CHECK-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 1
518; CHECK-NEXT:    store float [[CONV239]], ptr [[ARRAYIDX25]], align 4
519; CHECK-NEXT:    [[ADD26:%.*]] = fsub double 0.000000e+00, [[CONV]]
520; CHECK-NEXT:    [[CONV27:%.*]] = fptrunc double [[ADD26]] to float
521; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr [4 x float], ptr [[M]], i64 0, i64 2
522; CHECK-NEXT:    store float [[CONV27]], ptr [[ARRAYIDX29]], align 4
523; CHECK-NEXT:    ret void
524;
525entry:
526  %sub19 = fsub double %conv, %conv
527  %conv20 = fptrunc double %sub19 to float
528  store float %conv20, ptr %m, align 4
529  %add = fadd double %conv2, 0.000000e+00
530  %conv239 = fptrunc double %add to float
531  %arrayidx25 = getelementptr [4 x float], ptr %m, i64 0, i64 1
532  store float %conv239, ptr %arrayidx25, align 4
533  %add26 = fsub double 0.000000e+00, %conv
534  %conv27 = fptrunc double %add26 to float
535  %arrayidx29 = getelementptr [4 x float], ptr %m, i64 0, i64 2
536  store float %conv27, ptr %arrayidx29, align 4
537  ret void
538}
539
540define void @vec3_extract(<3 x i16> %pixel.sroa.0.4.vec.insert606, ptr %call3.i536) {
541; CHECK-LABEL: define void @vec3_extract(
542; CHECK-SAME: <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606:%.*]], ptr [[CALL3_I536:%.*]]) {
543; CHECK-NEXT:  entry:
544; CHECK-NEXT:    [[PIXEL_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 2
545; CHECK-NEXT:    [[RED668:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 2
546; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_4_VEC_EXTRACT]], ptr [[RED668]], align 2
547; CHECK-NEXT:    [[PIXEL_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 1
548; CHECK-NEXT:    [[GREEN670:%.*]] = getelementptr i16, ptr [[CALL3_I536]], i64 1
549; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_2_VEC_EXTRACT]], ptr [[GREEN670]], align 2
550; CHECK-NEXT:    [[PIXEL_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x i16> [[PIXEL_SROA_0_4_VEC_INSERT606]], i64 0
551; CHECK-NEXT:    store i16 [[PIXEL_SROA_0_0_VEC_EXTRACT]], ptr [[CALL3_I536]], align 2
552; CHECK-NEXT:    ret void
553;
554entry:
555  %pixel.sroa.0.4.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 2
556  %red668 = getelementptr i16, ptr %call3.i536, i64 2
557  store i16 %pixel.sroa.0.4.vec.extract, ptr %red668, align 2
558  %pixel.sroa.0.2.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 1
559  %green670 = getelementptr i16, ptr %call3.i536, i64 1
560  store i16 %pixel.sroa.0.2.vec.extract, ptr %green670, align 2
561  %pixel.sroa.0.0.vec.extract = extractelement <3 x i16> %pixel.sroa.0.4.vec.insert606, i64 0
562  store i16 %pixel.sroa.0.0.vec.extract, ptr %call3.i536, align 2
563  ret void
564}
565
566define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) {
567; NON-POW2-LABEL: define void @can_reorder_vec3_op_with_padding(
568; NON-POW2-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) {
569; NON-POW2-NEXT:  entry:
570; NON-POW2-NEXT:    [[TMP1:%.*]] = fsub <3 x float> [[IN]], [[IN]]
571; NON-POW2-NEXT:    [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> splat (float 2.000000e+00), <3 x float> splat (float 3.000000e+00))
572; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], splat (float 3.000000e+00)
573; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
574; NON-POW2-NEXT:    store <3 x float> [[TMP4]], ptr [[A]], align 4
575; NON-POW2-NEXT:    ret void
576;
577; POW2-ONLY-LABEL: define void @can_reorder_vec3_op_with_padding(
578; POW2-ONLY-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) {
579; POW2-ONLY-NEXT:  entry:
580; POW2-ONLY-NEXT:    [[ARRAYIDX42_I:%.*]] = getelementptr float, ptr [[A]], i64 2
581; POW2-ONLY-NEXT:    [[TMP0:%.*]] = extractelement <3 x float> [[IN]], i64 0
582; POW2-ONLY-NEXT:    [[SUB_I362:%.*]] = fsub float [[TMP0]], [[TMP0]]
583; POW2-ONLY-NEXT:    [[TMP1:%.*]] = call float @llvm.fmuladd.f32(float [[SUB_I362]], float 2.000000e+00, float 3.000000e+00)
584; POW2-ONLY-NEXT:    [[MUL6_I_I_I_I:%.*]] = fmul float [[TMP1]], 3.000000e+00
585; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[IN]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
586; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], [[TMP2]]
587; POW2-ONLY-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 3.000000e+00))
588; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], splat (float 3.000000e+00)
589; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[A]], align 4
590; POW2-ONLY-NEXT:    store float [[MUL6_I_I_I_I]], ptr [[ARRAYIDX42_I]], align 4
591; POW2-ONLY-NEXT:    ret void
592;
593entry:
594  %arrayidx42.i = getelementptr float, ptr %A, i64 2
595  %arrayidx35.i = getelementptr float, ptr %A, i64 1
596  %0 = extractelement <3 x float> %in, i64 0
597  %1 = extractelement <3 x float> %in, i64 0
598  %sub.i362 = fsub float %0, %1
599  %2 = extractelement <3 x float> %in, i64 1
600  %3 = extractelement <3 x float> %in, i64 1
601  %sub5.i = fsub float %2, %3
602  %4 = extractelement <3 x float> %in, i64 2
603  %5 = extractelement <3 x float> %in, i64 2
604  %sub9.i = fsub float %4, %5
605  %6 = call float @llvm.fmuladd.f32(float %sub5.i, float 2.000000e+00, float 3.000000e+00)
606  %7 = call float @llvm.fmuladd.f32(float %sub9.i, float 2.000000e+00, float 3.000000e+00)
607  %8 = call float @llvm.fmuladd.f32(float %sub.i362, float 2.000000e+00, float 3.000000e+00)
608  %mul.i.i.i.i373 = fmul float %6, 3.000000e+00
609  %mul3.i.i.i.i = fmul float %7, 3.000000e+00
610  %mul6.i.i.i.i = fmul float %8, 3.000000e+00
611  store float %mul.i.i.i.i373, ptr %A, align 4
612  store float %mul3.i.i.i.i, ptr %arrayidx35.i, align 4
613  store float %mul6.i.i.i.i, ptr %arrayidx42.i, align 4
614  ret void
615}
616
617declare float @llvm.fmuladd.f32(float, float, float)
618declare double @llvm.fmuladd.f64(double, double, double)
619