xref: /llvm-project/llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll (revision 6e54fccede402c9ed0e8038aa258a99c5a2773e5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-apple-ios -global-isel -global-isel-abort=1 | FileCheck %s
3
4define dso_local void @trunc_i16_to_i8(i16 %x, ptr %p) {
5; CHECK-LABEL: trunc_i16_to_i8:
6; CHECK:       ; %bb.0:
7; CHECK-NEXT:    strh w0, [x1]
8; CHECK-NEXT:    ret
9  %t1 = trunc i16 %x to i8
10  %sh = lshr i16 %x, 8
11  %t2 = trunc i16 %sh to i8
12  store i8 %t1, ptr %p, align 1
13  %p1 = getelementptr inbounds i8, ptr %p, i64 1
14  store i8 %t2, ptr %p1, align 1
15  ret void
16}
17
18define dso_local void @trunc_i32_to_i8(i32 %x, ptr %p) {
19; CHECK-LABEL: trunc_i32_to_i8:
20; CHECK:       ; %bb.0:
21; CHECK-NEXT:    str w0, [x1]
22; CHECK-NEXT:    ret
23  %t1 = trunc i32 %x to i8
24  %sh1 = lshr i32 %x, 8
25  %t2 = trunc i32 %sh1 to i8
26  %sh2 = lshr i32 %x, 16
27  %t3 = trunc i32 %sh2 to i8
28  %sh3 = lshr i32 %x, 24
29  %t4 = trunc i32 %sh3 to i8
30  store i8 %t1, ptr %p, align 1
31  %p1 = getelementptr inbounds i8, ptr %p, i64 1
32  store i8 %t2, ptr %p1, align 1
33  %p2 = getelementptr inbounds i8, ptr %p, i64 2
34  store i8 %t3, ptr %p2, align 1
35  %p3 = getelementptr inbounds i8, ptr %p, i64 3
36  store i8 %t4, ptr %p3, align 1
37  ret void
38}
39
40define dso_local void @trunc_i32_to_i16(i32 %x, ptr %p) {
41; CHECK-LABEL: trunc_i32_to_i16:
42; CHECK:       ; %bb.0:
43; CHECK-NEXT:    str w0, [x1]
44; CHECK-NEXT:    ret
45  %t1 = trunc i32 %x to i16
46  %sh = lshr i32 %x, 16
47  %t2 = trunc i32 %sh to i16
48  store i16 %t1, ptr %p, align 2
49  %p1 = getelementptr inbounds i16, ptr %p, i64 1
50  store i16 %t2, ptr %p1, align 2
51  ret void
52}
53
54define dso_local void @be_i32_to_i16(i32 %x, ptr %p0) {
55; CHECK-LABEL: be_i32_to_i16:
56; CHECK:       ; %bb.0:
57; CHECK-NEXT:    ror w8, w0, #16
58; CHECK-NEXT:    str w8, [x1]
59; CHECK-NEXT:    ret
60  %sh1 = lshr i32 %x, 16
61  %t0 = trunc i32 %x to i16
62  %t1 = trunc i32 %sh1 to i16
63  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
64  store i16 %t0, ptr %p1, align 2
65  store i16 %t1, ptr %p0, align 2
66  ret void
67}
68
69define dso_local void @be_i32_to_i16_order(i32 %x, ptr %p0) {
70; CHECK-LABEL: be_i32_to_i16_order:
71; CHECK:       ; %bb.0:
72; CHECK-NEXT:    ror w8, w0, #16
73; CHECK-NEXT:    str w8, [x1]
74; CHECK-NEXT:    ret
75  %sh1 = lshr i32 %x, 16
76  %t0 = trunc i32 %x to i16
77  %t1 = trunc i32 %sh1 to i16
78  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
79  store i16 %t1, ptr %p0, align 2
80  store i16 %t0, ptr %p1, align 2
81  ret void
82}
83
84define dso_local void @trunc_i64_to_i8(i64 %x, ptr %p) {
85; CHECK-LABEL: trunc_i64_to_i8:
86; CHECK:       ; %bb.0:
87; CHECK-NEXT:    str x0, [x1]
88; CHECK-NEXT:    ret
89  %t1 = trunc i64 %x to i8
90  %sh1 = lshr i64 %x, 8
91  %t2 = trunc i64 %sh1 to i8
92  %sh2 = lshr i64 %x, 16
93  %t3 = trunc i64 %sh2 to i8
94  %sh3 = lshr i64 %x, 24
95  %t4 = trunc i64 %sh3 to i8
96  %sh4 = lshr i64 %x, 32
97  %t5 = trunc i64 %sh4 to i8
98  %sh5 = lshr i64 %x, 40
99  %t6 = trunc i64 %sh5 to i8
100  %sh6 = lshr i64 %x, 48
101  %t7 = trunc i64 %sh6 to i8
102  %sh7 = lshr i64 %x, 56
103  %t8 = trunc i64 %sh7 to i8
104  store i8 %t1, ptr %p, align 1
105  %p1 = getelementptr inbounds i8, ptr %p, i64 1
106  store i8 %t2, ptr %p1, align 1
107  %p2 = getelementptr inbounds i8, ptr %p, i64 2
108  store i8 %t3, ptr %p2, align 1
109  %p3 = getelementptr inbounds i8, ptr %p, i64 3
110  store i8 %t4, ptr %p3, align 1
111  %p4 = getelementptr inbounds i8, ptr %p, i64 4
112  store i8 %t5, ptr %p4, align 1
113  %p5 = getelementptr inbounds i8, ptr %p, i64 5
114  store i8 %t6, ptr %p5, align 1
115  %p6 = getelementptr inbounds i8, ptr %p, i64 6
116  store i8 %t7, ptr %p6, align 1
117  %p7 = getelementptr inbounds i8, ptr %p, i64 7
118  store i8 %t8, ptr %p7, align 1
119  ret void
120}
121
122define dso_local void @trunc_i64_to_i16(i64 %x, ptr %p) {
123; CHECK-LABEL: trunc_i64_to_i16:
124; CHECK:       ; %bb.0:
125; CHECK-NEXT:    str x0, [x1]
126; CHECK-NEXT:    ret
127  %t1 = trunc i64 %x to i16
128  %sh1 = lshr i64 %x, 16
129  %t2 = trunc i64 %sh1 to i16
130  %sh2 = lshr i64 %x, 32
131  %t3 = trunc i64 %sh2 to i16
132  %sh3 = lshr i64 %x, 48
133  %t4 = trunc i64 %sh3 to i16
134  store i16 %t1, ptr %p, align 2
135  %p1 = getelementptr inbounds i16, ptr %p, i64 1
136  store i16 %t2, ptr %p1, align 2
137  %p2 = getelementptr inbounds i16, ptr %p, i64 2
138  store i16 %t3, ptr %p2, align 2
139  %p3 = getelementptr inbounds i16, ptr %p, i64 3
140  store i16 %t4, ptr %p3, align 2
141  ret void
142}
143
144define dso_local void @trunc_i64_to_i32(i64 %x, ptr %p) {
145; CHECK-LABEL: trunc_i64_to_i32:
146; CHECK:       ; %bb.0:
147; CHECK-NEXT:    str x0, [x1]
148; CHECK-NEXT:    ret
149  %t1 = trunc i64 %x to i32
150  %sh = lshr i64 %x, 32
151  %t2 = trunc i64 %sh to i32
152  store i32 %t1, ptr %p, align 4
153  %p1 = getelementptr inbounds i32, ptr %p, i64 1
154  store i32 %t2, ptr %p1, align 4
155  ret void
156}
157define dso_local void @be_i64_to_i32(i64 %x, ptr %p0) {
158; CHECK-LABEL: be_i64_to_i32:
159; CHECK:       ; %bb.0:
160; CHECK-NEXT:    ror x8, x0, #32
161; CHECK-NEXT:    str x8, [x1]
162; CHECK-NEXT:    ret
163  %sh1 = lshr i64 %x, 32
164  %t0 = trunc i64 %x to i32
165  %t1 = trunc i64 %sh1 to i32
166  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
167  store i32 %t0, ptr %p1, align 4
168  store i32 %t1, ptr %p0, align 4
169  ret void
170}
171
172define dso_local void @be_i64_to_i32_order(i64 %x, ptr %p0) {
173; CHECK-LABEL: be_i64_to_i32_order:
174; CHECK:       ; %bb.0:
175; CHECK-NEXT:    ror x8, x0, #32
176; CHECK-NEXT:    str x8, [x1]
177; CHECK-NEXT:    ret
178  %sh1 = lshr i64 %x, 32
179  %t0 = trunc i64 %x to i32
180  %t1 = trunc i64 %sh1 to i32
181  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
182  store i32 %t1, ptr %p0, align 4
183  store i32 %t0, ptr %p1, align 4
184  ret void
185}
186
187; Negative tests.
188
189define void @merge_hole(i32 %x, ptr %p) {
190; CHECK-LABEL: merge_hole:
191; CHECK:       ; %bb.0:
192; CHECK-NEXT:    lsr w8, w0, #16
193; CHECK-NEXT:    strb w0, [x1]
194; CHECK-NEXT:    strh w8, [x1, #2]
195; CHECK-NEXT:    ret
196  %p2 = getelementptr inbounds i16, ptr %p, i64 1
197  %x3 = trunc i32 %x to i8
198  store i8 %x3, ptr %p, align 1
199  %sh = lshr i32 %x, 16
200  %x01 = trunc i32 %sh to i16
201  store i16 %x01, ptr %p2, align 1
202  ret void
203}
204
205define void @merge_hole2(i32 %x, ptr %p) {
206; CHECK-LABEL: merge_hole2:
207; CHECK:       ; %bb.0:
208; CHECK-NEXT:    lsr w8, w0, #16
209; CHECK-NEXT:    strb w0, [x1]
210; CHECK-NEXT:    strh w8, [x1, #2]
211; CHECK-NEXT:    ret
212  %p2 = getelementptr inbounds i16, ptr %p, i64 1
213  %sh = lshr i32 %x, 16
214  %x01 = trunc i32 %sh to i16
215  store i16 %x01, ptr %p2, align 1
216  %x3 = trunc i32 %x to i8
217  store i8 %x3, ptr %p, align 1
218  ret void
219}
220
221define void @merge_hole3(i32 %x, ptr %p) {
222; CHECK-LABEL: merge_hole3:
223; CHECK:       ; %bb.0:
224; CHECK-NEXT:    lsr w8, w0, #16
225; CHECK-NEXT:    strb w0, [x1, #1]
226; CHECK-NEXT:    strh w8, [x1, #2]
227; CHECK-NEXT:    ret
228  %p1 = getelementptr inbounds i8, ptr %p, i64 1
229  %p2 = getelementptr inbounds i16, ptr %p, i64 1
230  %x3 = trunc i32 %x to i8
231  store i8 %x3, ptr %p1, align 1
232  %sh = lshr i32 %x, 16
233  %x01 = trunc i32 %sh to i16
234  store i16 %x01, ptr %p2, align 1
235  ret void
236}
237
238define void @merge_hole4(i32 %x, ptr %p) {
239; CHECK-LABEL: merge_hole4:
240; CHECK:       ; %bb.0:
241; CHECK-NEXT:    lsr w8, w0, #16
242; CHECK-NEXT:    strb w0, [x1, #2]
243; CHECK-NEXT:    strh w8, [x1]
244; CHECK-NEXT:    ret
245  %p2 = getelementptr inbounds i8, ptr %p, i64 2
246  %x3 = trunc i32 %x to i8
247  store i8 %x3, ptr %p2, align 1
248  %sh = lshr i32 %x, 16
249  %x01 = trunc i32 %sh to i16
250  store i16 %x01, ptr %p, align 1
251  ret void
252}
253
254define dso_local i32 @load_between_stores(i32 %x, ptr %p, ptr %ptr) {
255; CHECK-LABEL: load_between_stores:
256; CHECK:       ; %bb.0:
257; CHECK-NEXT:    strh w0, [x1]
258; CHECK-NEXT:    lsr w9, w0, #16
259; CHECK-NEXT:    ldr w8, [x2]
260; CHECK-NEXT:    strh w9, [x1, #2]
261; CHECK-NEXT:    mov w0, w8
262; CHECK-NEXT:    ret
263  %t1 = trunc i32 %x to i16
264  %sh = lshr i32 %x, 16
265  %t2 = trunc i32 %sh to i16
266  store i16 %t1, ptr %p, align 2
267  %ld = load i32, ptr %ptr
268  %p1 = getelementptr inbounds i16, ptr %p, i64 1
269  store i16 %t2, ptr %p1, align 2
270  ret i32 %ld
271}
272
273define dso_local void @invalid_shift(i16 %x, ptr %p) {
274; CHECK-LABEL: invalid_shift:
275; CHECK:       ; %bb.0:
276; CHECK-NEXT:    ubfx w8, w0, #4, #12
277; CHECK-NEXT:    strb w0, [x1]
278; CHECK-NEXT:    strb w8, [x1, #1]
279; CHECK-NEXT:    ret
280  %t1 = trunc i16 %x to i8
281  %sh = lshr i16 %x, 4
282  %t2 = trunc i16 %sh to i8
283  store i8 %t1, ptr %p, align 1
284  %p1 = getelementptr inbounds i8, ptr %p, i64 1
285  store i8 %t2, ptr %p1, align 1
286  ret void
287}
288
289define dso_local void @missing_store(i32 %x, ptr %p) {
290; The missing store of shift 16 means we can't merge to 32 bit store,
291; but we can still partially merge to a 16 bit one.
292; CHECK-LABEL: missing_store:
293; CHECK:       ; %bb.0:
294; CHECK-NEXT:    lsr w8, w0, #24
295; CHECK-NEXT:    strh w0, [x1]
296; CHECK-NEXT:    strb w8, [x1, #3]
297; CHECK-NEXT:    ret
298  %t1 = trunc i32 %x to i8
299  %sh1 = lshr i32 %x, 8
300  %t2 = trunc i32 %sh1 to i8
301  %sh3 = lshr i32 %x, 24
302  %t4 = trunc i32 %sh3 to i8
303  store i8 %t1, ptr %p, align 1
304  %p1 = getelementptr inbounds i8, ptr %p, i64 1
305  store i8 %t2, ptr %p1, align 1
306  %p3 = getelementptr inbounds i8, ptr %p, i64 3
307  store i8 %t4, ptr %p3, align 1
308  ret void
309}
310
311define dso_local void @different_base_reg(i16 %x, ptr %p, ptr %p2) {
312; CHECK-LABEL: different_base_reg:
313; CHECK:       ; %bb.0:
314; CHECK-NEXT:    ubfx w8, w0, #8, #8
315; CHECK-NEXT:    strb w0, [x1]
316; CHECK-NEXT:    strb w8, [x2, #1]
317; CHECK-NEXT:    ret
318  %t1 = trunc i16 %x to i8
319  %sh = lshr i16 %x, 8
320  %t2 = trunc i16 %sh to i8
321  store i8 %t1, ptr %p, align 1
322  %p1 = getelementptr inbounds i8, ptr %p2, i64 1
323  store i8 %t2, ptr %p1, align 1
324  ret void
325}
326
327define dso_local void @second_store_is_volatile(i16 %x, ptr %p) {
328; CHECK-LABEL: second_store_is_volatile:
329; CHECK:       ; %bb.0:
330; CHECK-NEXT:    ubfx w8, w0, #8, #8
331; CHECK-NEXT:    strb w0, [x1]
332; CHECK-NEXT:    strb w8, [x1, #1]
333; CHECK-NEXT:    ret
334  %t1 = trunc i16 %x to i8
335  %sh = lshr i16 %x, 8
336  %t2 = trunc i16 %sh to i8
337  store volatile i8 %t1, ptr %p, align 1
338  %p1 = getelementptr inbounds i8, ptr %p, i64 1
339  store i8 %t2, ptr %p1, align 1
340  ret void
341}
342
343declare void @use_ptr(ptr)
344
345define dso_local void @trunc_from_larger_src_val(i64 %hold.4.lcssa, ptr %check1792) {
346  ; Here we can merge these i8 stores into a single i32 store, but first we need
347  ; to truncate the i64 value to i32.
348; CHECK-LABEL: trunc_from_larger_src_val:
349; CHECK:       ; %bb.0:
350; CHECK-NEXT:    sub sp, sp, #32
351; CHECK-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
352; CHECK-NEXT:    .cfi_def_cfa_offset 32
353; CHECK-NEXT:    .cfi_offset w30, -8
354; CHECK-NEXT:    .cfi_offset w29, -16
355; CHECK-NEXT:    str w0, [sp, #12]
356; CHECK-NEXT:    add x0, sp, #12
357; CHECK-NEXT:    bl _use_ptr
358; CHECK-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
359; CHECK-NEXT:    add sp, sp, #32
360; CHECK-NEXT:    ret
361  %hbuf = alloca [4 x i8], align 1
362  %arrayidx177 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 1
363  %arrayidx234 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 2
364  %arrayidx237 = getelementptr inbounds [4 x i8], ptr %hbuf, i64 0, i64 3
365  %conv227 = trunc i64 %hold.4.lcssa to i8
366  store i8 %conv227, ptr %hbuf, align 1
367  %shr229 = lshr i64 %hold.4.lcssa, 8
368  %conv230 = trunc i64 %shr229 to i8
369  store i8 %conv230, ptr %arrayidx177, align 1
370  %shr232 = lshr i64 %hold.4.lcssa, 16
371  %conv233 = trunc i64 %shr232 to i8
372  store i8 %conv233, ptr %arrayidx234, align 1
373  %shr235 = lshr i64 %hold.4.lcssa, 24
374  %conv236 = trunc i64 %shr235 to i8
375  store i8 %conv236, ptr %arrayidx237, align 1
376  call void @use_ptr(ptr noundef nonnull %hbuf)
377  ret void
378}
379