xref: /llvm-project/llvm/test/CodeGen/X86/stores-merging.ll (revision b980841652c1b178b41a9a7b0e7dec06aa058e0b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
3
4%structTy = type { i8, i32, i32 }
5
6@e = common dso_local global %structTy zeroinitializer, align 4
7
8;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
9;; store operations.  The first test stores in increasing address
10;; order, the second in decreasing -- but in both cases should have
11;; the same result in memory in the end.
12
13define dso_local void @redundant_stores_merging() {
14; CHECK-LABEL: redundant_stores_merging:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    movabsq $1958505086977, %rax # imm = 0x1C800000001
17; CHECK-NEXT:    movq %rax, e+4(%rip)
18; CHECK-NEXT:    retq
19  store i32 1, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 1), align 4
20  store i32 123, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4
21  store i32 456, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4
22  ret void
23}
24
25;; This variant tests PR25154.
26define dso_local void @redundant_stores_merging_reverse() {
27; CHECK-LABEL: redundant_stores_merging_reverse:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    movabsq $528280977409, %rax # imm = 0x7B00000001
30; CHECK-NEXT:    movq %rax, e+4(%rip)
31; CHECK-NEXT:    movl $456, e+8(%rip) # imm = 0x1C8
32; CHECK-NEXT:    retq
33  store i32 123, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4
34  store i32 456, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 2), align 4
35  store i32 1, ptr getelementptr inbounds (%structTy, ptr @e, i64 0, i32 1), align 4
36  ret void
37}
38
39@b = common dso_local global [8 x i8] zeroinitializer, align 2
40
41;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
42;; these must not be reordered in MergeConsecutiveStores such that the
43;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
44;; a movl, after the store to 3).
45
46define dso_local void @overlapping_stores_merging() {
47; CHECK-LABEL: overlapping_stores_merging:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    movl $1, b(%rip)
50; CHECK-NEXT:    movw $2, b+3(%rip)
51; CHECK-NEXT:    retq
52  store i16 0, ptr getelementptr inbounds ([8 x i8], ptr @b, i64 0, i64 2), align 2
53  store i16 2, ptr getelementptr inbounds ([8 x i8], ptr @b, i64 0, i64 3), align 1
54  store i16 1, ptr @b, align 2
55  ret void
56}
57
58define dso_local void @extract_vector_store_16_consecutive_bytes(<2 x i64> %v, ptr %ptr) #0 {
59; CHECK-LABEL: extract_vector_store_16_consecutive_bytes:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    vmovups %xmm0, (%rdi)
62; CHECK-NEXT:    retq
63  %bc = bitcast <2 x i64> %v to <16 x i8>
64  %ext00 = extractelement <16 x i8> %bc, i32 0
65  %ext01 = extractelement <16 x i8> %bc, i32 1
66  %ext02 = extractelement <16 x i8> %bc, i32 2
67  %ext03 = extractelement <16 x i8> %bc, i32 3
68  %ext04 = extractelement <16 x i8> %bc, i32 4
69  %ext05 = extractelement <16 x i8> %bc, i32 5
70  %ext06 = extractelement <16 x i8> %bc, i32 6
71  %ext07 = extractelement <16 x i8> %bc, i32 7
72  %ext08 = extractelement <16 x i8> %bc, i32 8
73  %ext09 = extractelement <16 x i8> %bc, i32 9
74  %ext10 = extractelement <16 x i8> %bc, i32 10
75  %ext11 = extractelement <16 x i8> %bc, i32 11
76  %ext12 = extractelement <16 x i8> %bc, i32 12
77  %ext13 = extractelement <16 x i8> %bc, i32 13
78  %ext14 = extractelement <16 x i8> %bc, i32 14
79  %ext15 = extractelement <16 x i8> %bc, i32 15
80  %gep01 = getelementptr inbounds i8, ptr %ptr, i64 1
81  %gep02 = getelementptr inbounds i8, ptr %ptr, i64 2
82  %gep03 = getelementptr inbounds i8, ptr %ptr, i64 3
83  %gep04 = getelementptr inbounds i8, ptr %ptr, i64 4
84  %gep05 = getelementptr inbounds i8, ptr %ptr, i64 5
85  %gep06 = getelementptr inbounds i8, ptr %ptr, i64 6
86  %gep07 = getelementptr inbounds i8, ptr %ptr, i64 7
87  %gep08 = getelementptr inbounds i8, ptr %ptr, i64 8
88  %gep09 = getelementptr inbounds i8, ptr %ptr, i64 9
89  %gep10 = getelementptr inbounds i8, ptr %ptr, i64 10
90  %gep11 = getelementptr inbounds i8, ptr %ptr, i64 11
91  %gep12 = getelementptr inbounds i8, ptr %ptr, i64 12
92  %gep13 = getelementptr inbounds i8, ptr %ptr, i64 13
93  %gep14 = getelementptr inbounds i8, ptr %ptr, i64 14
94  %gep15 = getelementptr inbounds i8, ptr %ptr, i64 15
95  store i8 %ext00, ptr %ptr, align 1
96  store i8 %ext01, ptr %gep01, align 1
97  store i8 %ext02, ptr %gep02, align 1
98  store i8 %ext03, ptr %gep03, align 1
99  store i8 %ext04, ptr %gep04, align 1
100  store i8 %ext05, ptr %gep05, align 1
101  store i8 %ext06, ptr %gep06, align 1
102  store i8 %ext07, ptr %gep07, align 1
103  store i8 %ext08, ptr %gep08, align 1
104  store i8 %ext09, ptr %gep09, align 1
105  store i8 %ext10, ptr %gep10, align 1
106  store i8 %ext11, ptr %gep11, align 1
107  store i8 %ext12, ptr %gep12, align 1
108  store i8 %ext13, ptr %gep13, align 1
109  store i8 %ext14, ptr %gep14, align 1
110  store i8 %ext15, ptr %gep15, align 1
111  ret void
112}
113
114; PR34217 - https://bugs.llvm.org/show_bug.cgi?id=34217
115
116define dso_local void @extract_vector_store_32_consecutive_bytes(<4 x i64> %v, ptr %ptr) #0 {
117; CHECK-LABEL: extract_vector_store_32_consecutive_bytes:
118; CHECK:       # %bb.0:
119; CHECK-NEXT:    vmovups %ymm0, (%rdi)
120; CHECK-NEXT:    vzeroupper
121; CHECK-NEXT:    retq
122  %bc = bitcast <4 x i64> %v to <32 x i8>
123  %ext00 = extractelement <32 x i8> %bc, i32 0
124  %ext01 = extractelement <32 x i8> %bc, i32 1
125  %ext02 = extractelement <32 x i8> %bc, i32 2
126  %ext03 = extractelement <32 x i8> %bc, i32 3
127  %ext04 = extractelement <32 x i8> %bc, i32 4
128  %ext05 = extractelement <32 x i8> %bc, i32 5
129  %ext06 = extractelement <32 x i8> %bc, i32 6
130  %ext07 = extractelement <32 x i8> %bc, i32 7
131  %ext08 = extractelement <32 x i8> %bc, i32 8
132  %ext09 = extractelement <32 x i8> %bc, i32 9
133  %ext10 = extractelement <32 x i8> %bc, i32 10
134  %ext11 = extractelement <32 x i8> %bc, i32 11
135  %ext12 = extractelement <32 x i8> %bc, i32 12
136  %ext13 = extractelement <32 x i8> %bc, i32 13
137  %ext14 = extractelement <32 x i8> %bc, i32 14
138  %ext15 = extractelement <32 x i8> %bc, i32 15
139  %ext16 = extractelement <32 x i8> %bc, i32 16
140  %ext17 = extractelement <32 x i8> %bc, i32 17
141  %ext18 = extractelement <32 x i8> %bc, i32 18
142  %ext19 = extractelement <32 x i8> %bc, i32 19
143  %ext20 = extractelement <32 x i8> %bc, i32 20
144  %ext21 = extractelement <32 x i8> %bc, i32 21
145  %ext22 = extractelement <32 x i8> %bc, i32 22
146  %ext23 = extractelement <32 x i8> %bc, i32 23
147  %ext24 = extractelement <32 x i8> %bc, i32 24
148  %ext25 = extractelement <32 x i8> %bc, i32 25
149  %ext26 = extractelement <32 x i8> %bc, i32 26
150  %ext27 = extractelement <32 x i8> %bc, i32 27
151  %ext28 = extractelement <32 x i8> %bc, i32 28
152  %ext29 = extractelement <32 x i8> %bc, i32 29
153  %ext30 = extractelement <32 x i8> %bc, i32 30
154  %ext31 = extractelement <32 x i8> %bc, i32 31
155  %gep01 = getelementptr inbounds i8, ptr %ptr, i64 1
156  %gep02 = getelementptr inbounds i8, ptr %ptr, i64 2
157  %gep03 = getelementptr inbounds i8, ptr %ptr, i64 3
158  %gep04 = getelementptr inbounds i8, ptr %ptr, i64 4
159  %gep05 = getelementptr inbounds i8, ptr %ptr, i64 5
160  %gep06 = getelementptr inbounds i8, ptr %ptr, i64 6
161  %gep07 = getelementptr inbounds i8, ptr %ptr, i64 7
162  %gep08 = getelementptr inbounds i8, ptr %ptr, i64 8
163  %gep09 = getelementptr inbounds i8, ptr %ptr, i64 9
164  %gep10 = getelementptr inbounds i8, ptr %ptr, i64 10
165  %gep11 = getelementptr inbounds i8, ptr %ptr, i64 11
166  %gep12 = getelementptr inbounds i8, ptr %ptr, i64 12
167  %gep13 = getelementptr inbounds i8, ptr %ptr, i64 13
168  %gep14 = getelementptr inbounds i8, ptr %ptr, i64 14
169  %gep15 = getelementptr inbounds i8, ptr %ptr, i64 15
170  %gep16 = getelementptr inbounds i8, ptr %ptr, i64 16
171  %gep17 = getelementptr inbounds i8, ptr %ptr, i64 17
172  %gep18 = getelementptr inbounds i8, ptr %ptr, i64 18
173  %gep19 = getelementptr inbounds i8, ptr %ptr, i64 19
174  %gep20 = getelementptr inbounds i8, ptr %ptr, i64 20
175  %gep21 = getelementptr inbounds i8, ptr %ptr, i64 21
176  %gep22 = getelementptr inbounds i8, ptr %ptr, i64 22
177  %gep23 = getelementptr inbounds i8, ptr %ptr, i64 23
178  %gep24 = getelementptr inbounds i8, ptr %ptr, i64 24
179  %gep25 = getelementptr inbounds i8, ptr %ptr, i64 25
180  %gep26 = getelementptr inbounds i8, ptr %ptr, i64 26
181  %gep27 = getelementptr inbounds i8, ptr %ptr, i64 27
182  %gep28 = getelementptr inbounds i8, ptr %ptr, i64 28
183  %gep29 = getelementptr inbounds i8, ptr %ptr, i64 29
184  %gep30 = getelementptr inbounds i8, ptr %ptr, i64 30
185  %gep31 = getelementptr inbounds i8, ptr %ptr, i64 31
186  store i8 %ext00, ptr %ptr, align 1
187  store i8 %ext01, ptr %gep01, align 1
188  store i8 %ext02, ptr %gep02, align 1
189  store i8 %ext03, ptr %gep03, align 1
190  store i8 %ext04, ptr %gep04, align 1
191  store i8 %ext05, ptr %gep05, align 1
192  store i8 %ext06, ptr %gep06, align 1
193  store i8 %ext07, ptr %gep07, align 1
194  store i8 %ext08, ptr %gep08, align 1
195  store i8 %ext09, ptr %gep09, align 1
196  store i8 %ext10, ptr %gep10, align 1
197  store i8 %ext11, ptr %gep11, align 1
198  store i8 %ext12, ptr %gep12, align 1
199  store i8 %ext13, ptr %gep13, align 1
200  store i8 %ext14, ptr %gep14, align 1
201  store i8 %ext15, ptr %gep15, align 1
202  store i8 %ext16, ptr %gep16, align 1
203  store i8 %ext17, ptr %gep17, align 1
204  store i8 %ext18, ptr %gep18, align 1
205  store i8 %ext19, ptr %gep19, align 1
206  store i8 %ext20, ptr %gep20, align 1
207  store i8 %ext21, ptr %gep21, align 1
208  store i8 %ext22, ptr %gep22, align 1
209  store i8 %ext23, ptr %gep23, align 1
210  store i8 %ext24, ptr %gep24, align 1
211  store i8 %ext25, ptr %gep25, align 1
212  store i8 %ext26, ptr %gep26, align 1
213  store i8 %ext27, ptr %gep27, align 1
214  store i8 %ext28, ptr %gep28, align 1
215  store i8 %ext29, ptr %gep29, align 1
216  store i8 %ext30, ptr %gep30, align 1
217  store i8 %ext31, ptr %gep31, align 1
218  ret void
219}
220
221; https://bugs.llvm.org/show_bug.cgi?id=43446
222define dso_local void @pr43446_0(i64 %x) {
223; CHECK-LABEL: pr43446_0:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    movb $1, (%rdi)
226; CHECK-NEXT:    retq
227  %a = inttoptr i64 %x to ptr
228  store i8 -2, ptr %a, align 1
229  %b = inttoptr i64 %x to ptr
230  store i1 true, ptr %b, align 1
231  ret void
232}
233define dso_local void @pr43446_1(ptr %a) {
234; CHECK-LABEL: pr43446_1:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    movb $1, (%rdi)
237; CHECK-NEXT:    retq
238  store i8 -2, ptr %a, align 1
239  store i1 true, ptr %a, align 1
240  ret void
241}
242
243define dso_local void @rotate16_in_place(ptr %p) {
244; CHECK-LABEL: rotate16_in_place:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    rolw $8, (%rdi)
247; CHECK-NEXT:    retq
248  %p1 = getelementptr i8, ptr %p, i64 1
249  %i0 = load i8, ptr %p, align 1
250  %i1 = load i8, ptr %p1, align 1
251  store i8 %i1, ptr %p, align 1
252  store i8 %i0, ptr %p1, align 1
253  ret void
254}
255
256define dso_local void @rotate16(ptr %p, ptr %q) {
257; CHECK-LABEL: rotate16:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    movzwl (%rdi), %eax
260; CHECK-NEXT:    rolw $8, %ax
261; CHECK-NEXT:    movw %ax, (%rsi)
262; CHECK-NEXT:    retq
263  %p1 = getelementptr i8, ptr %p, i64 1
264  %q1 = getelementptr i8, ptr %q, i64 1
265  %i0 = load i8, ptr %p, align 1
266  %i1 = load i8, ptr %p1, align 1
267  store i8 %i1, ptr %q, align 1
268  store i8 %i0, ptr %q1, align 1
269  ret void
270}
271
272define dso_local void @rotate32_in_place(ptr %p) {
273; CHECK-LABEL: rotate32_in_place:
274; CHECK:       # %bb.0:
275; CHECK-NEXT:    roll $16, (%rdi)
276; CHECK-NEXT:    retq
277  %p1 = getelementptr i16, ptr %p, i64 1
278  %i0 = load i16, ptr %p, align 2
279  %i1 = load i16, ptr %p1, align 2
280  store i16 %i1, ptr %p, align 2
281  store i16 %i0, ptr %p1, align 2
282  ret void
283}
284
285define dso_local void @rotate32(ptr %p) {
286; CHECK-LABEL: rotate32:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    movl (%rdi), %eax
289; CHECK-NEXT:    roll $16, %eax
290; CHECK-NEXT:    movl %eax, 84(%rdi)
291; CHECK-NEXT:    retq
292  %p1 = getelementptr i16, ptr %p, i64 1
293  %p42 = getelementptr i16, ptr %p, i64 42
294  %p43 = getelementptr i16, ptr %p, i64 43
295  %i0 = load i16, ptr %p, align 2
296  %i1 = load i16, ptr %p1, align 2
297  store i16 %i1, ptr %p42, align 2
298  store i16 %i0, ptr %p43, align 2
299  ret void
300}
301
302define dso_local void @rotate64_in_place(ptr %p) {
303; CHECK-LABEL: rotate64_in_place:
304; CHECK:       # %bb.0:
305; CHECK-NEXT:    rolq $32, (%rdi)
306; CHECK-NEXT:    retq
307  %p1 = getelementptr i32, ptr %p, i64 1
308  %i0 = load i32, ptr %p, align 4
309  %i1 = load i32, ptr %p1, align 4
310  store i32 %i1, ptr %p, align 4
311  store i32 %i0, ptr %p1, align 4
312  ret void
313}
314
315define dso_local void @rotate64(ptr %p) {
316; CHECK-LABEL: rotate64:
317; CHECK:       # %bb.0:
318; CHECK-NEXT:    movq (%rdi), %rax
319; CHECK-NEXT:    rolq $32, %rax
320; CHECK-NEXT:    movq %rax, 8(%rdi)
321; CHECK-NEXT:    retq
322  %p1 = getelementptr i32, ptr %p, i64 1
323  %p2 = getelementptr i32, ptr %p, i64 2
324  %p3 = getelementptr i32, ptr %p, i64 3
325  %i0 = load i32, ptr %p, align 4
326  %i1 = load i32, ptr %p1, align 4
327  store i32 %i1, ptr %p2, align 4
328  store i32 %i0, ptr %p3, align 4
329  ret void
330}
331
332define dso_local void @rotate64_iterate(ptr %p) {
333; CHECK-LABEL: rotate64_iterate:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    movq (%rdi), %rax
336; CHECK-NEXT:    rolq $32, %rax
337; CHECK-NEXT:    movq %rax, 84(%rdi)
338; CHECK-NEXT:    retq
339  %p1 = getelementptr i16, ptr %p, i64 1
340  %p2 = getelementptr i16, ptr %p, i64 2
341  %p3 = getelementptr i16, ptr %p, i64 3
342  %p42 = getelementptr i16, ptr %p, i64 42
343  %p43 = getelementptr i16, ptr %p, i64 43
344  %p44 = getelementptr i16, ptr %p, i64 44
345  %p45 = getelementptr i16, ptr %p, i64 45
346  %i0 = load i16, ptr %p, align 2
347  %i1 = load i16, ptr %p1, align 2
348  %i2 = load i16, ptr %p2, align 2
349  %i3 = load i16, ptr %p3, align 2
350  store i16 %i2, ptr %p42, align 2
351  store i16 %i3, ptr %p43, align 2
352  store i16 %i0, ptr %p44, align 2
353  store i16 %i1, ptr %p45, align 2
354  ret void
355}
356
357; TODO: recognize this as 2 rotates?
358
359define dso_local void @rotate32_consecutive(ptr %p) {
360; CHECK-LABEL: rotate32_consecutive:
361; CHECK:       # %bb.0:
362; CHECK-NEXT:    movzwl (%rdi), %eax
363; CHECK-NEXT:    movzwl 2(%rdi), %ecx
364; CHECK-NEXT:    movzwl 4(%rdi), %edx
365; CHECK-NEXT:    movzwl 6(%rdi), %esi
366; CHECK-NEXT:    movw %cx, 84(%rdi)
367; CHECK-NEXT:    movw %ax, 86(%rdi)
368; CHECK-NEXT:    movw %si, 88(%rdi)
369; CHECK-NEXT:    movw %dx, 90(%rdi)
370; CHECK-NEXT:    retq
371  %p1 = getelementptr i16, ptr %p, i64 1
372  %p2 = getelementptr i16, ptr %p, i64 2
373  %p3 = getelementptr i16, ptr %p, i64 3
374  %p42 = getelementptr i16, ptr %p, i64 42
375  %p43 = getelementptr i16, ptr %p, i64 43
376  %p44 = getelementptr i16, ptr %p, i64 44
377  %p45 = getelementptr i16, ptr %p, i64 45
378  %i0 = load i16, ptr %p, align 2
379  %i1 = load i16, ptr %p1, align 2
380  %i2 = load i16, ptr %p2, align 2
381  %i3 = load i16, ptr %p3, align 2
382  store i16 %i1, ptr %p42, align 2
383  store i16 %i0, ptr %p43, align 2
384  store i16 %i3, ptr %p44, align 2
385  store i16 %i2, ptr %p45, align 2
386  ret void
387}
388
389; Same as above, but now the stores are not all consecutive.
390
391define dso_local void @rotate32_twice(ptr %p) {
392; CHECK-LABEL: rotate32_twice:
393; CHECK:       # %bb.0:
394; CHECK-NEXT:    movl (%rdi), %eax
395; CHECK-NEXT:    movl 4(%rdi), %ecx
396; CHECK-NEXT:    roll $16, %eax
397; CHECK-NEXT:    roll $16, %ecx
398; CHECK-NEXT:    movl %eax, 84(%rdi)
399; CHECK-NEXT:    movl %ecx, 108(%rdi)
400; CHECK-NEXT:    retq
401  %p1 = getelementptr i16, ptr %p, i64 1
402  %p2 = getelementptr i16, ptr %p, i64 2
403  %p3 = getelementptr i16, ptr %p, i64 3
404  %p42 = getelementptr i16, ptr %p, i64 42
405  %p43 = getelementptr i16, ptr %p, i64 43
406  %p54 = getelementptr i16, ptr %p, i64 54
407  %p55 = getelementptr i16, ptr %p, i64 55
408  %i0 = load i16, ptr %p, align 2
409  %i1 = load i16, ptr %p1, align 2
410  %i2 = load i16, ptr %p2, align 2
411  %i3 = load i16, ptr %p3, align 2
412  store i16 %i1, ptr %p42, align 2
413  store i16 %i0, ptr %p43, align 2
414  store i16 %i3, ptr %p54, align 2
415  store i16 %i2, ptr %p55, align 2
416  ret void
417}
418
419define dso_local void @trunc_i16_to_i8(i16 %x, ptr %p) {
420; CHECK-LABEL: trunc_i16_to_i8:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    movw %di, (%rsi)
423; CHECK-NEXT:    retq
424  %t1 = trunc i16 %x to i8
425  %sh = lshr i16 %x, 8
426  %t2 = trunc i16 %sh to i8
427  store i8 %t1, ptr %p, align 1
428  %p1 = getelementptr inbounds i8, ptr %p, i64 1
429  store i8 %t2, ptr %p1, align 1
430  ret void
431}
432
433define dso_local void @trunc_i32_to_i8(i32 %x, ptr %p) {
434; CHECK-LABEL: trunc_i32_to_i8:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    movl %edi, (%rsi)
437; CHECK-NEXT:    retq
438  %t1 = trunc i32 %x to i8
439  %sh1 = lshr i32 %x, 8
440  %t2 = trunc i32 %sh1 to i8
441  %sh2 = lshr i32 %x, 16
442  %t3 = trunc i32 %sh2 to i8
443  %sh3 = lshr i32 %x, 24
444  %t4 = trunc i32 %sh3 to i8
445  store i8 %t1, ptr %p, align 1
446  %p1 = getelementptr inbounds i8, ptr %p, i64 1
447  store i8 %t2, ptr %p1, align 1
448  %p2 = getelementptr inbounds i8, ptr %p, i64 2
449  store i8 %t3, ptr %p2, align 1
450  %p3 = getelementptr inbounds i8, ptr %p, i64 3
451  store i8 %t4, ptr %p3, align 1
452  ret void
453}
454
455define dso_local void @trunc_i32_to_i16(i32 %x, ptr %p) {
456; CHECK-LABEL: trunc_i32_to_i16:
457; CHECK:       # %bb.0:
458; CHECK-NEXT:    movl %edi, (%rsi)
459; CHECK-NEXT:    retq
460  %t1 = trunc i32 %x to i16
461  %sh = lshr i32 %x, 16
462  %t2 = trunc i32 %sh to i16
463  store i16 %t1, ptr %p, align 2
464  %p1 = getelementptr inbounds i16, ptr %p, i64 1
465  store i16 %t2, ptr %p1, align 2
466  ret void
467}
468
469define dso_local void @be_i32_to_i16(i32 %x, ptr %p0) {
470; CHECK-LABEL: be_i32_to_i16:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    rorl $16, %edi
473; CHECK-NEXT:    movl %edi, (%rsi)
474; CHECK-NEXT:    retq
475  %sh1 = lshr i32 %x, 16
476  %t0 = trunc i32 %x to i16
477  %t1 = trunc i32 %sh1 to i16
478  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
479  store i16 %t0, ptr %p1, align 2
480  store i16 %t1, ptr %p0, align 2
481  ret void
482}
483
484define dso_local void @be_i32_to_i16_order(i32 %x, ptr %p0) {
485; CHECK-LABEL: be_i32_to_i16_order:
486; CHECK:       # %bb.0:
487; CHECK-NEXT:    rorl $16, %edi
488; CHECK-NEXT:    movl %edi, (%rsi)
489; CHECK-NEXT:    retq
490  %sh1 = lshr i32 %x, 16
491  %t0 = trunc i32 %x to i16
492  %t1 = trunc i32 %sh1 to i16
493  %p1 = getelementptr inbounds i16, ptr %p0, i64 1
494  store i16 %t1, ptr %p0, align 2
495  store i16 %t0, ptr %p1, align 2
496  ret void
497}
498
499define dso_local void @trunc_i64_to_i8(i64 %x, ptr %p) {
500; CHECK-LABEL: trunc_i64_to_i8:
501; CHECK:       # %bb.0:
502; CHECK-NEXT:    movq %rdi, (%rsi)
503; CHECK-NEXT:    retq
504  %t1 = trunc i64 %x to i8
505  %sh1 = lshr i64 %x, 8
506  %t2 = trunc i64 %sh1 to i8
507  %sh2 = lshr i64 %x, 16
508  %t3 = trunc i64 %sh2 to i8
509  %sh3 = lshr i64 %x, 24
510  %t4 = trunc i64 %sh3 to i8
511  %sh4 = lshr i64 %x, 32
512  %t5 = trunc i64 %sh4 to i8
513  %sh5 = lshr i64 %x, 40
514  %t6 = trunc i64 %sh5 to i8
515  %sh6 = lshr i64 %x, 48
516  %t7 = trunc i64 %sh6 to i8
517  %sh7 = lshr i64 %x, 56
518  %t8 = trunc i64 %sh7 to i8
519  store i8 %t1, ptr %p, align 1
520  %p1 = getelementptr inbounds i8, ptr %p, i64 1
521  store i8 %t2, ptr %p1, align 1
522  %p2 = getelementptr inbounds i8, ptr %p, i64 2
523  store i8 %t3, ptr %p2, align 1
524  %p3 = getelementptr inbounds i8, ptr %p, i64 3
525  store i8 %t4, ptr %p3, align 1
526  %p4 = getelementptr inbounds i8, ptr %p, i64 4
527  store i8 %t5, ptr %p4, align 1
528  %p5 = getelementptr inbounds i8, ptr %p, i64 5
529  store i8 %t6, ptr %p5, align 1
530  %p6 = getelementptr inbounds i8, ptr %p, i64 6
531  store i8 %t7, ptr %p6, align 1
532  %p7 = getelementptr inbounds i8, ptr %p, i64 7
533  store i8 %t8, ptr %p7, align 1
534  ret void
535}
536
537define dso_local void @trunc_i64_to_i16(i64 %x, ptr %p) {
538; CHECK-LABEL: trunc_i64_to_i16:
539; CHECK:       # %bb.0:
540; CHECK-NEXT:    movq %rdi, (%rsi)
541; CHECK-NEXT:    retq
542  %t1 = trunc i64 %x to i16
543  %sh1 = lshr i64 %x, 16
544  %t2 = trunc i64 %sh1 to i16
545  %sh2 = lshr i64 %x, 32
546  %t3 = trunc i64 %sh2 to i16
547  %sh3 = lshr i64 %x, 48
548  %t4 = trunc i64 %sh3 to i16
549  store i16 %t1, ptr %p, align 2
550  %p1 = getelementptr inbounds i16, ptr %p, i64 1
551  store i16 %t2, ptr %p1, align 2
552  %p2 = getelementptr inbounds i16, ptr %p, i64 2
553  store i16 %t3, ptr %p2, align 2
554  %p3 = getelementptr inbounds i16, ptr %p, i64 3
555  store i16 %t4, ptr %p3, align 2
556  ret void
557}
558
559define dso_local void @trunc_i64_to_i32(i64 %x, ptr %p) {
560; CHECK-LABEL: trunc_i64_to_i32:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    movq %rdi, (%rsi)
563; CHECK-NEXT:    retq
564  %t1 = trunc i64 %x to i32
565  %sh = lshr i64 %x, 32
566  %t2 = trunc i64 %sh to i32
567  store i32 %t1, ptr %p, align 4
568  %p1 = getelementptr inbounds i32, ptr %p, i64 1
569  store i32 %t2, ptr %p1, align 4
570  ret void
571}
572
573define dso_local void @be_i64_to_i32(i64 %x, ptr %p0) {
574; CHECK-LABEL: be_i64_to_i32:
575; CHECK:       # %bb.0:
576; CHECK-NEXT:    rorq $32, %rdi
577; CHECK-NEXT:    movq %rdi, (%rsi)
578; CHECK-NEXT:    retq
579  %sh1 = lshr i64 %x, 32
580  %t0 = trunc i64 %x to i32
581  %t1 = trunc i64 %sh1 to i32
582  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
583  store i32 %t0, ptr %p1, align 4
584  store i32 %t1, ptr %p0, align 4
585  ret void
586}
587
588define dso_local void @be_i64_to_i32_order(i64 %x, ptr %p0) {
589; CHECK-LABEL: be_i64_to_i32_order:
590; CHECK:       # %bb.0:
591; CHECK-NEXT:    rorq $32, %rdi
592; CHECK-NEXT:    movq %rdi, (%rsi)
593; CHECK-NEXT:    retq
594  %sh1 = lshr i64 %x, 32
595  %t0 = trunc i64 %x to i32
596  %t1 = trunc i64 %sh1 to i32
597  %p1 = getelementptr inbounds i32, ptr %p0, i64 1
598  store i32 %t1, ptr %p0, align 4
599  store i32 %t0, ptr %p1, align 4
600  ret void
601}
602
603; https://llvm.org/PR50623
604; It is a miscompile to merge the stores if we are not
605; writing all of the bytes from the source value.
606
607define void @merge_hole(i32 %x, ptr %p) {
608; CHECK-LABEL: merge_hole:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    movb %dil, (%rsi)
611; CHECK-NEXT:    shrl $16, %edi
612; CHECK-NEXT:    movw %di, 2(%rsi)
613; CHECK-NEXT:    retq
614  %p2 = getelementptr inbounds i16, ptr %p, i64 1
615  %x3 = trunc i32 %x to i8
616  store i8 %x3, ptr %p, align 1
617  %sh = lshr i32 %x, 16
618  %x01 = trunc i32 %sh to i16
619  store i16 %x01, ptr %p2, align 1
620  ret void
621}
622
623; Change the order of the stores.
624; It is a miscompile to merge the stores if we are not
625; writing all of the bytes from the source value.
626
627define void @merge_hole2(i32 %x, ptr %p) {
628; CHECK-LABEL: merge_hole2:
629; CHECK:       # %bb.0:
630; CHECK-NEXT:    movl %edi, %eax
631; CHECK-NEXT:    shrl $16, %eax
632; CHECK-NEXT:    movw %ax, 2(%rsi)
633; CHECK-NEXT:    movb %dil, (%rsi)
634; CHECK-NEXT:    retq
635  %p2 = getelementptr inbounds i16, ptr %p, i64 1
636  %sh = lshr i32 %x, 16
637  %x01 = trunc i32 %sh to i16
638  store i16 %x01, ptr %p2, align 1
639  %x3 = trunc i32 %x to i8
640  store i8 %x3, ptr %p, align 1
641  ret void
642}
643
644; Change offset.
645; It is a miscompile to merge the stores if we are not
646; writing all of the bytes from the source value.
647
648define void @merge_hole3(i32 %x, ptr %p) {
649; CHECK-LABEL: merge_hole3:
650; CHECK:       # %bb.0:
651; CHECK-NEXT:    movb %dil, 1(%rsi)
652; CHECK-NEXT:    shrl $16, %edi
653; CHECK-NEXT:    movw %di, 2(%rsi)
654; CHECK-NEXT:    retq
655  %p1 = getelementptr inbounds i8, ptr %p, i64 1
656  %p2 = getelementptr inbounds i16, ptr %p, i64 1
657  %x3 = trunc i32 %x to i8
658  store i8 %x3, ptr %p1, align 1
659  %sh = lshr i32 %x, 16
660  %x01 = trunc i32 %sh to i16
661  store i16 %x01, ptr %p2, align 1
662  ret void
663}
664
665; Change offset.
666; It is a miscompile to merge the stores if we are not
667; writing all of the bytes from the source value.
668
669define void @merge_hole4(i32 %x, ptr %p) {
670; CHECK-LABEL: merge_hole4:
671; CHECK:       # %bb.0:
672; CHECK-NEXT:    movb %dil, 2(%rsi)
673; CHECK-NEXT:    shrl $16, %edi
674; CHECK-NEXT:    movw %di, (%rsi)
675; CHECK-NEXT:    retq
676  %p2 = getelementptr inbounds i8, ptr %p, i64 2
677  %x3 = trunc i32 %x to i8
678  store i8 %x3, ptr %p2, align 1
679  %sh = lshr i32 %x, 16
680  %x01 = trunc i32 %sh to i16
681  store i16 %x01, ptr %p, align 1
682  ret void
683}
684