xref: /llvm-project/llvm/test/Transforms/GVN/pr63059.ll (revision 1d70ad5d0b933fc97e7d063c9676b5d7f01c6a0b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt -passes=gvn -S < %s | FileCheck %s
3
4
5define <4 x float> @ConvertVectors_ByRef(ptr %loc) {
6; CHECK-LABEL: define <4 x float> @ConvertVectors_ByRef
7; CHECK-SAME: (ptr [[LOC:%.*]]) {
8; CHECK-NEXT:    [[LOAD_VEC:%.*]] = load <4 x float>, ptr [[LOC]], align 16
9; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[LOAD_VEC]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
10; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 1
11; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[LOAD_VEC]] to i128
12; CHECK-NEXT:    [[TMP2:%.*]] = lshr i128 [[TMP1]], 32
13; CHECK-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
14; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
15; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float [[TMP4]], i64 1
16; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
17; CHECK-NEXT:    [[TMP5:%.*]] = lshr i128 [[TMP1]], 64
18; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i32
19; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float
20; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[TMP7]], i64 2
21; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[TMP7]], i64 3
22; CHECK-NEXT:    ret <4 x float> [[INS3]]
23;
24  %load_vec = load <4 x float>, ptr %loc, align 16
25  %shuf = shufflevector <4 x float> %load_vec, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
26  %gep1 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 1
27  %load1 = load float, ptr %gep1, align 4
28  %ins1 = insertelement <4 x float> %shuf, float %load1, i64 1
29  %gep2 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
30  %load2 = load float, ptr %gep2, align 8
31  %ins2 = insertelement <4 x float> %ins1, float %load2, i64 2
32  %ins3 = insertelement <4 x float> %ins2, float %load2, i64 3
33  ret <4 x float> %ins3
34}
35
36define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) {
37; CHECK-LABEL: define i64 @store_element_smaller_than_load
38; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
39; CHECK-NEXT:  entry:
40; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
41; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
42; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
43; CHECK-NEXT:    [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
44; CHECK-NEXT:    [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
45; CHECK-NEXT:    ret i64 [[TMP2]]
46;
47  entry:
48  store <4 x i32> %v, ptr  %loc
49  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
50  %ref = load i64, ptr %gep
51  ret i64 %ref
52}
53
54define i64 @call_before_load(ptr %loc, <4 x i32> %v) {
55; CHECK-LABEL: define i64 @call_before_load
56; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
57; CHECK-NEXT:  entry:
58; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
59; CHECK-NEXT:    call void @f(<4 x i32> [[V]])
60; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
61; CHECK-NEXT:    [[REF:%.*]] = load i64, ptr [[GEP]], align 4
62; CHECK-NEXT:    ret i64 [[REF]]
63;
64  entry:
65  store <4 x i32> %v, ptr  %loc
66  call void @f(<4 x i32> %v)
67  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
68  %ref = load i64, ptr %gep
69  ret i64 %ref
70}
71
72define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) {
73; CHECK-LABEL: define i64 @call_before_load_memory_none
74; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
75; CHECK-NEXT:  entry:
76; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
77; CHECK-NEXT:    call void @f_no_mem(<4 x i32> [[V]])
78; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
79; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
80; CHECK-NEXT:    [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
81; CHECK-NEXT:    [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
82; CHECK-NEXT:    ret i64 [[TMP2]]
83;
84  entry:
85  store <4 x i32> %v, ptr  %loc
86  call void @f_no_mem(<4 x i32> %v)
87  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
88  %ref = load i64, ptr %gep
89  ret i64 %ref
90}
91
92define i64 @call_after_load(ptr %loc, <4 x i32> %v) {
93; CHECK-LABEL: define i64 @call_after_load
94; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
95; CHECK-NEXT:  entry:
96; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
97; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2
98; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
99; CHECK-NEXT:    [[TMP1:%.*]] = lshr i128 [[TMP0]], 64
100; CHECK-NEXT:    [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
101; CHECK-NEXT:    call void @f(<4 x i32> [[V]])
102; CHECK-NEXT:    ret i64 [[TMP2]]
103;
104  entry:
105  store <4 x i32> %v, ptr  %loc
106  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2
107  %ref = load i64, ptr %gep
108  call void @f(<4 x i32> %v)
109  ret i64 %ref
110}
111
112define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) {
113; CHECK-LABEL: define double @store_element_smaller_than_load_float
114; CHECK-SAME: (ptr [[LOC:%.*]], <4 x float> [[V:%.*]]) {
115; CHECK-NEXT:    store <4 x float> [[V]], ptr [[LOC]], align 16
116; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2
117; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128
118; CHECK-NEXT:    [[TMP2:%.*]] = lshr i128 [[TMP1]], 64
119; CHECK-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i64
120; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double
121; CHECK-NEXT:    ret double [[TMP4]]
122;
123  store <4 x float> %v, ptr  %loc
124  %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2
125  %ref = load double, ptr %gep
126  ret double %ref
127}
128
129define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) {
130; CHECK-LABEL: define i64 @load_as_scalar
131; CHECK-SAME: (ptr [[LOC:%.*]], <2 x i32> [[V:%.*]]) {
132; CHECK-NEXT:    store <2 x i32> [[V]], ptr [[LOC]], align 8
133; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to i64
134; CHECK-NEXT:    ret i64 [[TMP1]]
135;
136  store <2 x i32> %v, ptr  %loc
137  %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0
138  %ref = load i64, ptr %gep
139  ret i64 %ref
140}
141
142
143define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) {
144; CHECK-LABEL: define i9 @load_as_scalar_larger
145; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) {
146; CHECK-NEXT:    store <4 x i6> [[V]], ptr [[LOC]], align 4
147; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24
148; CHECK-NEXT:    [[TMP2:%.*]] = trunc i24 [[TMP1]] to i16
149; CHECK-NEXT:    [[TMP3:%.*]] = trunc i16 [[TMP2]] to i9
150; CHECK-NEXT:    ret i9 [[TMP3]]
151;
152  store <4 x i6> %v, ptr  %loc
153  %gep = getelementptr i9, ptr %loc, i64 0
154  %ref = load i9, ptr %gep
155  ret i9 %ref
156}
157
158
159define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) {
160; CHECK-LABEL: define i4 @load_as_scalar_smaller
161; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) {
162; CHECK-NEXT:    store <4 x i6> [[V]], ptr [[LOC]], align 4
163; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24
164; CHECK-NEXT:    [[TMP2:%.*]] = trunc i24 [[TMP1]] to i8
165; CHECK-NEXT:    [[TMP3:%.*]] = trunc i8 [[TMP2]] to i4
166; CHECK-NEXT:    ret i4 [[TMP3]]
167;
168  store <4 x i6> %v, ptr  %loc
169  %gep = getelementptr i4, ptr %loc, i64 0
170  %ref = load i4, ptr %gep
171  ret i4 %ref
172}
173
174
175define i32 @load_vec_same_type(ptr %loc, <4 x i32> %v) {
176; CHECK-LABEL: define i32 @load_vec_same_type
177; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
178; CHECK-NEXT:  entry:
179; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
180; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[V]], i32 1
181; CHECK-NEXT:    ret i32 [[R]]
182;
183  entry:
184  store <4 x i32> %v, ptr  %loc
185  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
186  %ref = load <4 x i32>, ptr %gep
187  %r = extractelement <4 x i32> %ref, i32 1
188  ret i32 %r
189}
190
191define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) {
192; CHECK-LABEL: define i64 @load_vec_same_size_different_type1
193; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
194; CHECK-NEXT:  entry:
195; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
196; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
197; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x i64>
198; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
199; CHECK-NEXT:    ret i64 [[R]]
200;
201  entry:
202  store <4 x i32> %v, ptr  %loc
203  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
204  %ref = load <2 x i64>, ptr %gep
205  %r = extractelement <2 x i64> %ref, i32 1
206  ret i64 %r
207}
208
209define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) {
210; CHECK-LABEL: define double @load_vec_same_size_different_type2
211; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
212; CHECK-NEXT:  entry:
213; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
214; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
215; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x double>
216; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
217; CHECK-NEXT:    ret double [[R]]
218;
219  entry:
220  store <4 x i32> %v, ptr  %loc
221  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
222  %ref = load <2 x double>, ptr %gep
223  %r = extractelement <2 x double> %ref, i32 1
224  ret double %r
225}
226
227define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) {
228; CHECK-LABEL: define i32 @load_subvector_same_type
229; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) {
230; CHECK-NEXT:  entry:
231; CHECK-NEXT:    store <4 x i32> [[V]], ptr [[LOC]], align 16
232; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128
233; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
234; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
235; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
236; CHECK-NEXT:    ret i32 [[R]]
237;
238  entry:
239  store <4 x i32> %v, ptr  %loc
240  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
241  %ref = load <2 x i32>, ptr %gep
242  %r = extractelement <2 x i32> %ref, i32 1
243  ret i32 %r
244}
245
246define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) {
247; CHECK-LABEL: define i64 @load_subvector_different_type
248; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) {
249; CHECK-NEXT:  entry:
250; CHECK-NEXT:    store <8 x i32> [[V]], ptr [[LOC]], align 32
251; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256
252; CHECK-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
253; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
254; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
255; CHECK-NEXT:    ret i64 [[R]]
256;
257  entry:
258  store <8 x i32> %v, ptr  %loc
259  %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0
260  %ref = load <2 x i64>, ptr %gep
261  %r = extractelement <2 x i64> %ref, i32 1
262  ret i64 %r
263}
264
265define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) {
266; CHECK-LABEL: define i16 @load_subvector_different_type2
267; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) {
268; CHECK-NEXT:  entry:
269; CHECK-NEXT:    store <8 x i32> [[V]], ptr [[LOC]], align 32
270; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256
271; CHECK-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i32
272; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
273; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1
274; CHECK-NEXT:    ret i16 [[R]]
275;
276  entry:
277  store <8 x i32> %v, ptr  %loc
278  %gep = getelementptr [2 x i16], ptr %loc, i64 0
279  %ref = load <2 x i16>, ptr %gep
280  %r = extractelement <2 x i16> %ref, i32 1
281  ret i16 %r
282}
283
284define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) {
285; CHECK-LABEL: define i4 @load_subvector_different_type3
286; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    store <8 x i8> [[V]], ptr [[LOC]], align 8
289; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
290; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16
291; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12
292; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <3 x i4>
293; CHECK-NEXT:    [[R:%.*]] = extractelement <3 x i4> [[TMP3]], i32 1
294; CHECK-NEXT:    ret i4 [[R]]
295;
296  entry:
297  store <8 x i8> %v, ptr  %loc
298  %gep = getelementptr [3 x i4], ptr %loc, i64 0
299  %ref = load <3 x i4>, ptr %gep
300  %r = extractelement <3 x i4> %ref, i32 1
301  ret i4 %r
302}
303
304define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) {
305; CHECK-LABEL: define i12 @load_subvector_different_type4
306; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
307; CHECK-NEXT:  entry:
308; CHECK-NEXT:    store <8 x i8> [[V]], ptr [[LOC]], align 8
309; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
310; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24
311; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i24 [[TMP1]] to <2 x i12>
312; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i12> [[TMP2]], i32 1
313; CHECK-NEXT:    ret i12 [[R]]
314;
315  entry:
316  store <8 x i8> %v, ptr  %loc
317  %gep = getelementptr [2 x i12], ptr %loc, i64 0
318  %ref = load <2 x i12>, ptr %gep
319  %r = extractelement <2 x i12> %ref, i32 1
320  ret i12 %r
321}
322
323define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) {
324; CHECK-LABEL: define i6 @load_subvector_different_type5
325; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) {
326; CHECK-NEXT:  entry:
327; CHECK-NEXT:    store <8 x i8> [[V]], ptr [[LOC]], align 8
328; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64
329; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16
330; CHECK-NEXT:    [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12
331; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <2 x i6>
332; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x i6> [[TMP3]], i32 1
333; CHECK-NEXT:    ret i6 [[R]]
334;
335  entry:
336  store <8 x i8> %v, ptr  %loc
337  %gep = getelementptr [2 x i6], ptr %loc, i64 0
338  %ref = load <2 x i6>, ptr %gep
339  %r = extractelement <2 x i6> %ref, i32 1
340  ret i6 %r
341}
342
343declare void @f(<4 x i32>)
344declare void @f_no_mem(<4 x i32>) memory(none)
345