xref: /llvm-project/llvm/test/CodeGen/X86/merge-store-constants.ll (revision 834cc88c5d08ca55664b7742590463de813d768f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown   -mattr=avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64
4
5define void @big_nonzero_16_bytes(ptr nocapture %a) {
6; X32-LABEL: big_nonzero_16_bytes:
7; X32:       # %bb.0:
8; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
10; X32-NEXT:    vmovups %xmm0, (%eax)
11; X32-NEXT:    retl
12;
13; X64-LABEL: big_nonzero_16_bytes:
14; X64:       # %bb.0:
15; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,3,4]
16; X64-NEXT:    vmovups %xmm0, (%rdi)
17; X64-NEXT:    retq
18  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 1
19  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 2
20  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 3
21
22  store i32 1, ptr %a, align 4
23  store i32 2, ptr %arrayidx1, align 4
24  store i32 3, ptr %arrayidx2, align 4
25  store i32 4, ptr %arrayidx3, align 4
26  ret void
27}
28
29; TODO: We assumed that two 64-bit stores were better than 1 vector load and 1 vector store.
30; But if the 64-bit constants can't be represented as sign-extended 32-bit constants, then
31; it takes extra instructions to do this in scalar.
32
33define void @big_nonzero_16_bytes_big64bit_constants(ptr nocapture %a) {
34; X32-LABEL: big_nonzero_16_bytes_big64bit_constants:
35; X32:       # %bb.0:
36; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
37; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,1,3]
38; X32-NEXT:    vmovups %xmm0, (%eax)
39; X32-NEXT:    retl
40;
41; X64-LABEL: big_nonzero_16_bytes_big64bit_constants:
42; X64:       # %bb.0:
43; X64-NEXT:    movabsq $4294967297, %rax # imm = 0x100000001
44; X64-NEXT:    movq %rax, (%rdi)
45; X64-NEXT:    movabsq $12884901889, %rax # imm = 0x300000001
46; X64-NEXT:    movq %rax, 8(%rdi)
47; X64-NEXT:    retq
48  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 1
49
50  store i64 4294967297, ptr %a
51  store i64 12884901889, ptr %arrayidx1
52  ret void
53}
54
55; Splats may be an opportunity to use a broadcast op.
56
57define void @big_nonzero_32_bytes_splat(ptr nocapture %a) {
58; X32-LABEL: big_nonzero_32_bytes_splat:
59; X32:       # %bb.0:
60; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
61; X32-NEXT:    vbroadcastss {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
62; X32-NEXT:    vmovups %ymm0, (%eax)
63; X32-NEXT:    vzeroupper
64; X32-NEXT:    retl
65;
66; X64-LABEL: big_nonzero_32_bytes_splat:
67; X64:       # %bb.0:
68; X64-NEXT:    vbroadcastss {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42]
69; X64-NEXT:    vmovups %ymm0, (%rdi)
70; X64-NEXT:    vzeroupper
71; X64-NEXT:    retq
72  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 1
73  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 2
74  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 3
75  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4
76  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 5
77  %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 6
78  %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 7
79
80  store i32 42, ptr %a, align 4
81  store i32 42, ptr %arrayidx1, align 4
82  store i32 42, ptr %arrayidx2, align 4
83  store i32 42, ptr %arrayidx3, align 4
84  store i32 42, ptr %arrayidx4, align 4
85  store i32 42, ptr %arrayidx5, align 4
86  store i32 42, ptr %arrayidx6, align 4
87  store i32 42, ptr %arrayidx7, align 4
88  ret void
89}
90
91; Verify that we choose the best-sized store(s) for each chunk.
92
93define void @big_nonzero_63_bytes(ptr nocapture %a) {
94; X32-LABEL: big_nonzero_63_bytes:
95; X32:       # %bb.0:
96; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
97; X32-NEXT:    vmovaps {{.*#+}} ymm0 = [1,0,2,0,3,0,4,0]
98; X32-NEXT:    vmovups %ymm0, (%eax)
99; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [5,0,6,0]
100; X32-NEXT:    vmovups %xmm0, 32(%eax)
101; X32-NEXT:    movl $0, 52(%eax)
102; X32-NEXT:    movl $7, 48(%eax)
103; X32-NEXT:    movl $8, 56(%eax)
104; X32-NEXT:    movw $9, 60(%eax)
105; X32-NEXT:    movb $10, 62(%eax)
106; X32-NEXT:    vzeroupper
107; X32-NEXT:    retl
108;
109; X64-LABEL: big_nonzero_63_bytes:
110; X64:       # %bb.0:
111; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,3,4]
112; X64-NEXT:    vmovups %ymm0, (%rdi)
113; X64-NEXT:    movq $5, 32(%rdi)
114; X64-NEXT:    movq $6, 40(%rdi)
115; X64-NEXT:    movq $7, 48(%rdi)
116; X64-NEXT:    movl $8, 56(%rdi)
117; X64-NEXT:    movw $9, 60(%rdi)
118; X64-NEXT:    movb $10, 62(%rdi)
119; X64-NEXT:    vzeroupper
120; X64-NEXT:    retq
121  %arrayidx8 = getelementptr inbounds i64, ptr %a, i64 1
122  %arrayidx16 = getelementptr inbounds i64, ptr %a, i64 2
123  %arrayidx24 = getelementptr inbounds i64, ptr %a, i64 3
124  %arrayidx32 = getelementptr inbounds i64, ptr %a, i64 4
125  %arrayidx40 = getelementptr inbounds i64, ptr %a, i64 5
126  %arrayidx48 = getelementptr inbounds i64, ptr %a, i64 6
127  %arrayidx56 = getelementptr inbounds i32, ptr %a, i64 14
128  %arrayidx60 = getelementptr inbounds i16, ptr %a, i64 30
129  %arrayidx62 = getelementptr inbounds i8, ptr %a, i64 62
130
131  store i64 1, ptr %a
132  store i64 2, ptr %arrayidx8
133  store i64 3, ptr %arrayidx16
134  store i64 4, ptr %arrayidx24
135  store i64 5, ptr %arrayidx32
136  store i64 6, ptr %arrayidx40
137  store i64 7, ptr %arrayidx48
138  store i32 8, ptr %arrayidx56
139  store i16 9, ptr %arrayidx60
140  store i8 10, ptr %arrayidx62
141  ret void
142}
143
144