xref: /llvm-project/llvm/test/Transforms/InstCombine/scalarization.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3
4define i32 @extract_load(ptr %p) {
5;
6; CHECK-LABEL: @extract_load(
7; CHECK-NEXT:    [[X:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4
8; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1
9; CHECK-NEXT:    ret i32 [[EXT]]
10;
11  %x = load <4 x i32>, ptr %p, align 4
12  %ext = extractelement <4 x i32> %x, i32 1
13  ret i32 %ext
14}
15
16define double @extract_load_fp(ptr %p) {
17;
18; CHECK-LABEL: @extract_load_fp(
19; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32
20; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3
21; CHECK-NEXT:    ret double [[EXT]]
22;
23  %x = load <4 x double>, ptr %p, align 32
24  %ext = extractelement <4 x double> %x, i32 3
25  ret double %ext
26}
27
28define double @extract_load_volatile(ptr %p) {
29;
30; CHECK-LABEL: @extract_load_volatile(
31; CHECK-NEXT:    [[X:%.*]] = load volatile <4 x double>, ptr [[P:%.*]], align 32
32; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 2
33; CHECK-NEXT:    ret double [[EXT]]
34;
35  %x = load volatile <4 x double>, ptr %p
36  %ext = extractelement <4 x double> %x, i32 2
37  ret double %ext
38}
39
40define double @extract_load_extra_use(ptr %p, ptr %p2) {
41;
42; CHECK-LABEL: @extract_load_extra_use(
43; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 8
44; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 0
45; CHECK-NEXT:    store <4 x double> [[X]], ptr [[P2:%.*]], align 32
46; CHECK-NEXT:    ret double [[EXT]]
47;
48  %x = load <4 x double>, ptr %p, align 8
49  %ext = extractelement <4 x double> %x, i32 0
50  store <4 x double> %x, ptr %p2
51  ret double %ext
52}
53
54define double @extract_load_variable_index(ptr %p, i32 %y) {
55;
56; CHECK-LABEL: @extract_load_variable_index(
57; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32
58; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]]
59; CHECK-NEXT:    ret double [[EXT]]
60;
61  %x = load <4 x double>, ptr %p
62  %ext = extractelement <4 x double> %x, i32 %y
63  ret double %ext
64}
65
66define void @scalarize_phi(ptr %n, ptr %inout) {
67;
68; CHECK-LABEL: @scalarize_phi(
69; CHECK-NEXT:  entry:
70; CHECK-NEXT:    [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4
71; CHECK-NEXT:    br label [[FOR_COND:%.*]]
72; CHECK:       for.cond:
73; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
74; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
75; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4
76; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
77; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
78; CHECK:       for.body:
79; CHECK-NEXT:    store volatile float [[TMP0]], ptr [[INOUT]], align 4
80; CHECK-NEXT:    [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000
81; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
82; CHECK-NEXT:    br label [[FOR_COND]]
83; CHECK:       for.end:
84; CHECK-NEXT:    ret void
85;
86entry:
87  %t0 = load volatile float, ptr %inout, align 4
88  %insert = insertelement <4 x float> undef, float %t0, i32 0
89  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
90  %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
91  br label %for.cond
92
93for.cond:
94  %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ]
95  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
96  %t1 = load i32, ptr %n, align 4
97  %cmp = icmp ne i32 %i.0, %t1
98  br i1 %cmp, label %for.body, label %for.end
99
100for.body:
101  %t2 = extractelement <4 x float> %x.0, i32 1
102  store volatile float %t2, ptr %inout, align 4
103  %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
104  %inc = add nsw i32 %i.0, 1
105  br label %for.cond
106
107for.end:
108  ret void
109}
110
111define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
112;
113; CHECK-LABEL: @extract_element_binop_splat_constant_index(
114; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
115; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000
116; CHECK-NEXT:    ret float [[R]]
117;
118  %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
119  %r = extractelement <4 x float> %b, i32 2
120  ret float %r
121}
122
123define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) {
124;
125; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index(
126; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0
127; CHECK-NEXT:    [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]]
128; CHECK-NEXT:    ret double [[R]]
129;
130  %b = fdiv <2 x double> <double 42.0, double undef>, %x
131  %r = extractelement <2 x double> %b, i32 0
132  ret double %r
133}
134
135define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) {
136;
137; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index(
138; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i64 1
139; CHECK-NEXT:    [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01
140; CHECK-NEXT:    ret float [[R]]
141;
142  %b = fmul <2 x float> %x, <float 42.0, float 43.0>
143  %r = extractelement <2 x float> %b, i32 1
144  ret float %r
145}
146
147define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
148;
149; CHECK-LABEL: @extract_element_binop_splat_variable_index(
150; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]]
151; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[TMP1]], 42
152; CHECK-NEXT:    ret i8 [[R]]
153;
154  %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42>
155  %r = extractelement <4 x i8> %b, i32 %y
156  ret i8 %r
157}
158
159; We cannot move the extractelement before the sdiv here, because %z may be
160; out of range, making the divisor poison and resulting in immediate UB.
161define i8 @extract_element_binop_splat_variable_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) {
162;
163; CHECK-LABEL: @extract_element_binop_splat_variable_index_may_trap(
164; CHECK-NEXT:    [[B:%.*]] = sdiv <4 x i8> splat (i8 42), [[Y:%.*]]
165; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Z:%.*]]
166; CHECK-NEXT:    ret i8 [[R]]
167;
168  %b = sdiv <4 x i8> splat (i8 42), %y
169  %r = extractelement <4 x i8> %b, i32 %z
170  ret i8 %r
171}
172
173; Moving the extractelement first is fine here, because the index is known to
174; be valid, so we can't introduce additional poison.
175define i8 @extract_element_binop_constant_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) {
176;
177; CHECK-LABEL: @extract_element_binop_constant_index_may_trap(
178; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i64 3
179; CHECK-NEXT:    [[R:%.*]] = sdiv i8 42, [[TMP1]]
180; CHECK-NEXT:    ret i8 [[R]]
181;
182  %b = sdiv <4 x i8> splat (i8 42), %y
183  %r = extractelement <4 x i8> %b, i32 3
184  ret i8 %r
185}
186
187define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) {
188;
189; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index(
190; CHECK-NEXT:    [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42>
191; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
192; CHECK-NEXT:    ret i8 [[R]]
193;
194  %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42>
195  %r = extractelement <4 x i8> %b, i32 %y
196  ret i8 %r
197}
198
199define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) {
200;
201; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index(
202; CHECK-NEXT:    [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2>
203; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]]
204; CHECK-NEXT:    ret i8 [[R]]
205;
206  %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2>
207  %r = extractelement <4 x i8> %b, i32 %y
208  ret i8 %r
209}
210
211define float @extract_element_load(<4 x float> %x, ptr %ptr) {
212;
213; CHECK-LABEL: @extract_element_load(
214; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 16
215; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
216; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[LOAD]], i64 2
217; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], [[TMP2]]
218; CHECK-NEXT:    ret float [[R]]
219;
220  %load = load <4 x float>, ptr %ptr
221  %add = fadd <4 x float> %x, %load
222  %r = extractelement <4 x float> %add, i32 2
223  ret float %r
224}
225
226define float @extract_element_multi_Use_load(<4 x float> %x, ptr %ptr0, ptr %ptr1) {
227;
228; CHECK-LABEL: @extract_element_multi_Use_load(
229; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, ptr [[PTR0:%.*]], align 16
230; CHECK-NEXT:    store <4 x float> [[LOAD]], ptr [[PTR1:%.*]], align 16
231; CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[X:%.*]], [[LOAD]]
232; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[ADD]], i64 2
233; CHECK-NEXT:    ret float [[R]]
234;
235  %load = load <4 x float>, ptr %ptr0
236  store <4 x float> %load, ptr %ptr1
237  %add = fadd <4 x float> %x, %load
238  %r = extractelement <4 x float> %add, i32 2
239  ret float %r
240}
241
242define float @extract_element_variable_index(<4 x float> %x, i32 %y) {
243;
244; CHECK-LABEL: @extract_element_variable_index(
245; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]]
246; CHECK-NEXT:    [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00
247; CHECK-NEXT:    ret float [[R]]
248;
249  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
250  %r = extractelement <4 x float> %add, i32 %y
251  ret float %r
252}
253
254define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) {
255;
256; CHECK-LABEL: @extelt_binop_insertelt(
257; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
258; CHECK-NEXT:    [[E:%.*]] = fmul nnan float [[F:%.*]], [[TMP1]]
259; CHECK-NEXT:    ret float [[E]]
260;
261  %C = insertelement <4 x float> %A, float %f, i32 0
262  %D = fmul nnan <4 x float> %C, %B
263  %E = extractelement <4 x float> %D, i32 0
264  ret float %E
265}
266
267; We recurse to find a scalarizable operand.
268define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) {
269;
270; CHECK-LABEL: @extelt_binop_binop_insertelt(
271; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i64 0
272; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[F:%.*]], [[TMP1]]
273; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i64 0
274; CHECK-NEXT:    [[E:%.*]] = mul nsw i32 [[TMP2]], [[TMP3]]
275; CHECK-NEXT:    ret i32 [[E]]
276;
277  %v = insertelement <4 x i32> %A, i32 %f, i32 0
278  %C = add <4 x i32> %v, %B
279  %D = mul nsw <4 x i32> %C, %B
280  %E = extractelement <4 x i32> %D, i32 0
281  ret i32 %E
282}
283
284define float @extract_element_constant_vector_variable_index(i32 %y) {
285;
286; CHECK-LABEL: @extract_element_constant_vector_variable_index(
287; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]]
288; CHECK-NEXT:    ret float [[R]]
289;
290  %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y
291  ret float %r
292}
293
294define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) {
295;
296; CHECK-LABEL: @cheap_to_extract_icmp(
297; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 2
298; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
299; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2
300; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
301; CHECK-NEXT:    ret i1 [[R]]
302;
303  %cmp = icmp eq <4 x i32> %x, zeroinitializer
304  %and = and <4 x i1> %cmp, %y
305  %r = extractelement <4 x i1> %and, i32 2
306  ret i1 %r
307}
308
309define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) {
310;
311; CHECK-LABEL: @cheap_to_extract_fcmp(
312; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
313; CHECK-NEXT:    [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
314; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2
315; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP2]], [[TMP3]]
316; CHECK-NEXT:    ret i1 [[R]]
317;
318  %cmp = fcmp oeq <4 x float> %x, zeroinitializer
319  %and = and <4 x i1> %cmp, %y
320  %r = extractelement <4 x i1> %and, i32 2
321  ret i1 %r
322}
323
324define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) {
325;
326; CHECK-LABEL: @extractelt_vector_icmp_constrhs(
327; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0
328; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
329; CHECK-NEXT:    ret i1 [[EXT]]
330;
331  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
332  %ext = extractelement <2 x i1> %cmp, i32 0
333  ret i1 %ext
334}
335
336define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) {
337;
338; CHECK-LABEL: @extractelt_vector_fcmp_constrhs(
339; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0
340; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
341; CHECK-NEXT:    ret i1 [[EXT]]
342;
343  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
344  %ext = extractelement <2 x i1> %cmp, i32 0
345  ret i1 %ext
346}
347
348define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) {
349;
350; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx(
351; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]]
352; CHECK-NEXT:    [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0
353; CHECK-NEXT:    ret i1 [[EXT]]
354;
355  %cmp = icmp eq <2 x i32> %arg, zeroinitializer
356  %ext = extractelement <2 x i1> %cmp, i32 %idx
357  ret i1 %ext
358}
359
360define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) {
361;
362; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx(
363; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]]
364; CHECK-NEXT:    [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00
365; CHECK-NEXT:    ret i1 [[EXT]]
366;
367  %cmp = fcmp oeq <2 x float> %arg, zeroinitializer
368  %ext = extractelement <2 x i1> %cmp, i32 %idx
369  ret i1 %ext
370}
371
372define i1 @extractelt_vector_fcmp_copy_flags(<4 x float> %x) {
373; CHECK-LABEL: @extractelt_vector_fcmp_copy_flags(
374; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2
375; CHECK-NEXT:    [[R:%.*]] = fcmp nsz arcp oeq float [[TMP1]], 0.000000e+00
376; CHECK-NEXT:    ret i1 [[R]]
377;
378  %cmp = fcmp nsz arcp oeq <4 x float> %x, zeroinitializer
379  %r = extractelement <4 x i1> %cmp, i32 2
380  ret i1 %r
381}
382
383define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) {
384;
385; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(
386; CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]]
387; CHECK-NEXT:    store volatile <2 x float> [[ADD]], ptr undef, align 8
388; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[ARG0:%.*]], [[ADD]]
389; CHECK-NEXT:    [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i64 0
390; CHECK-NEXT:    ret i1 [[EXT]]
391;
392  %add = fadd <2 x float> %arg1, %arg2
393  store volatile <2 x float> %add, ptr undef
394  %cmp = fcmp oeq <2 x float> %arg0, %add
395  %ext = extractelement <2 x i1> %cmp, i32 0
396  ret i1 %ext
397}
398