xref: /llvm-project/llvm/test/CodeGen/X86/i64-mem-copy.ll (revision 61dcfaa745e22b0e5330fc82ee4b7de4b6c99ab7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86
4; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X86AVX
5
6; Use movq or movsd to load / store i64 values if sse2 is available.
7; rdar://6659858
8
9define void @foo(ptr %x, ptr %y) nounwind {
10; X64-LABEL: foo:
11; X64:       # %bb.0:
12; X64-NEXT:    movq (%rsi), %rax
13; X64-NEXT:    movq %rax, (%rdi)
14; X64-NEXT:    retq
15;
16; X86-LABEL: foo:
17; X86:       # %bb.0:
18; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
19; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
20; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
21; X86-NEXT:    movsd %xmm0, (%eax)
22; X86-NEXT:    retl
23;
24; X86AVX-LABEL: foo:
25; X86AVX:       # %bb.0:
26; X86AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X86AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; X86AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
29; X86AVX-NEXT:    vmovsd %xmm0, (%eax)
30; X86AVX-NEXT:    retl
31  %tmp1 = load i64, ptr %y, align 8
32  store i64 %tmp1, ptr %x, align 8
33  ret void
34}
35
36; Verify that a 64-bit chunk extracted from a vector is stored with a movq
37; regardless of whether the system is 64-bit.
38
39define void @store_i64_from_vector(<8 x i16> %x, <8 x i16> %y, ptr %i) nounwind {
40; X64-LABEL: store_i64_from_vector:
41; X64:       # %bb.0:
42; X64-NEXT:    paddw %xmm1, %xmm0
43; X64-NEXT:    movq %xmm0, (%rdi)
44; X64-NEXT:    retq
45;
46; X86-LABEL: store_i64_from_vector:
47; X86:       # %bb.0:
48; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
49; X86-NEXT:    paddw %xmm1, %xmm0
50; X86-NEXT:    movq %xmm0, (%eax)
51; X86-NEXT:    retl
52;
53; X86AVX-LABEL: store_i64_from_vector:
54; X86AVX:       # %bb.0:
55; X86AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
56; X86AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
57; X86AVX-NEXT:    vmovq %xmm0, (%eax)
58; X86AVX-NEXT:    retl
59  %z = add <8 x i16> %x, %y                          ; force execution domain
60  %bc = bitcast <8 x i16> %z to <2 x i64>
61  %vecext = extractelement <2 x i64> %bc, i32 0
62  store i64 %vecext, ptr %i, align 8
63  ret void
64}
65
66define void @store_i64_from_vector256(<16 x i16> %x, <16 x i16> %y, ptr %i) nounwind {
67; X64-LABEL: store_i64_from_vector256:
68; X64:       # %bb.0:
69; X64-NEXT:    paddw %xmm3, %xmm1
70; X64-NEXT:    movq %xmm1, (%rdi)
71; X64-NEXT:    retq
72;
73; X86-LABEL: store_i64_from_vector256:
74; X86:       # %bb.0:
75; X86-NEXT:    pushl %ebp
76; X86-NEXT:    movl %esp, %ebp
77; X86-NEXT:    andl $-16, %esp
78; X86-NEXT:    subl $16, %esp
79; X86-NEXT:    movl 24(%ebp), %eax
80; X86-NEXT:    paddw 8(%ebp), %xmm1
81; X86-NEXT:    movq %xmm1, (%eax)
82; X86-NEXT:    movl %ebp, %esp
83; X86-NEXT:    popl %ebp
84; X86-NEXT:    retl
85;
86; X86AVX-LABEL: store_i64_from_vector256:
87; X86AVX:       # %bb.0:
88; X86AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
89; X86AVX-NEXT:    vextracti128 $1, %ymm1, %xmm1
90; X86AVX-NEXT:    vextracti128 $1, %ymm0, %xmm0
91; X86AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
92; X86AVX-NEXT:    vmovq %xmm0, (%eax)
93; X86AVX-NEXT:    vzeroupper
94; X86AVX-NEXT:    retl
95  %z = add <16 x i16> %x, %y                          ; force execution domain
96  %bc = bitcast <16 x i16> %z to <4 x i64>
97  %vecext = extractelement <4 x i64> %bc, i32 2
98  store i64 %vecext, ptr %i, align 8
99  ret void
100}
101
102; PR23476
103; Handle extraction from a non-simple / pre-legalization type.
104
105define void @PR23476(<5 x i64> %in, ptr %out, i32 %index) nounwind {
106; X64-LABEL: PR23476:
107; X64:       # %bb.0:
108; X64-NEXT:    movq %rsi, %xmm0
109; X64-NEXT:    movq %rdi, %xmm1
110; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
111; X64-NEXT:    movq %rcx, %xmm0
112; X64-NEXT:    movq %rdx, %xmm2
113; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
114; X64-NEXT:    movl {{[0-9]+}}(%rsp), %eax
115; X64-NEXT:    andl $7, %eax
116; X64-NEXT:    movq %r8, %xmm0
117; X64-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
118; X64-NEXT:    movdqa %xmm2, -{{[0-9]+}}(%rsp)
119; X64-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
120; X64-NEXT:    movq -72(%rsp,%rax,8), %rax
121; X64-NEXT:    movq %rax, (%r9)
122; X64-NEXT:    retq
123;
124; X86-LABEL: PR23476:
125; X86:       # %bb.0:
126; X86-NEXT:    pushl %ebp
127; X86-NEXT:    movl %esp, %ebp
128; X86-NEXT:    andl $-16, %esp
129; X86-NEXT:    subl $80, %esp
130; X86-NEXT:    movl 52(%ebp), %eax
131; X86-NEXT:    andl $7, %eax
132; X86-NEXT:    movl 48(%ebp), %ecx
133; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
134; X86-NEXT:    movups 8(%ebp), %xmm1
135; X86-NEXT:    movups 24(%ebp), %xmm2
136; X86-NEXT:    movaps %xmm2, {{[0-9]+}}(%esp)
137; X86-NEXT:    movaps %xmm1, (%esp)
138; X86-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
139; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
140; X86-NEXT:    movsd %xmm0, (%ecx)
141; X86-NEXT:    movl %ebp, %esp
142; X86-NEXT:    popl %ebp
143; X86-NEXT:    retl
144;
145; X86AVX-LABEL: PR23476:
146; X86AVX:       # %bb.0:
147; X86AVX-NEXT:    pushl %ebp
148; X86AVX-NEXT:    movl %esp, %ebp
149; X86AVX-NEXT:    andl $-32, %esp
150; X86AVX-NEXT:    subl $96, %esp
151; X86AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
152; X86AVX-NEXT:    movl 52(%ebp), %eax
153; X86AVX-NEXT:    andl $7, %eax
154; X86AVX-NEXT:    movl 48(%ebp), %ecx
155; X86AVX-NEXT:    vmovups 8(%ebp), %ymm1
156; X86AVX-NEXT:    vmovaps %ymm1, (%esp)
157; X86AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
158; X86AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
159; X86AVX-NEXT:    vmovsd %xmm0, (%ecx)
160; X86AVX-NEXT:    movl %ebp, %esp
161; X86AVX-NEXT:    popl %ebp
162; X86AVX-NEXT:    vzeroupper
163; X86AVX-NEXT:    retl
164  %ext = extractelement <5 x i64> %in, i32 %index
165  store i64 %ext, ptr %out, align 8
166  ret void
167}
168
169