1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86 4; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X86AVX 5 6; Use movq or movsd to load / store i64 values if sse2 is available. 7; rdar://6659858 8 9define void @foo(ptr %x, ptr %y) nounwind { 10; X64-LABEL: foo: 11; X64: # %bb.0: 12; X64-NEXT: movq (%rsi), %rax 13; X64-NEXT: movq %rax, (%rdi) 14; X64-NEXT: retq 15; 16; X86-LABEL: foo: 17; X86: # %bb.0: 18; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 19; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 21; X86-NEXT: movsd %xmm0, (%eax) 22; X86-NEXT: retl 23; 24; X86AVX-LABEL: foo: 25; X86AVX: # %bb.0: 26; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 27; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 29; X86AVX-NEXT: vmovsd %xmm0, (%eax) 30; X86AVX-NEXT: retl 31 %tmp1 = load i64, ptr %y, align 8 32 store i64 %tmp1, ptr %x, align 8 33 ret void 34} 35 36; Verify that a 64-bit chunk extracted from a vector is stored with a movq 37; regardless of whether the system is 64-bit. 38 39define void @store_i64_from_vector(<8 x i16> %x, <8 x i16> %y, ptr %i) nounwind { 40; X64-LABEL: store_i64_from_vector: 41; X64: # %bb.0: 42; X64-NEXT: paddw %xmm1, %xmm0 43; X64-NEXT: movq %xmm0, (%rdi) 44; X64-NEXT: retq 45; 46; X86-LABEL: store_i64_from_vector: 47; X86: # %bb.0: 48; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 49; X86-NEXT: paddw %xmm1, %xmm0 50; X86-NEXT: movq %xmm0, (%eax) 51; X86-NEXT: retl 52; 53; X86AVX-LABEL: store_i64_from_vector: 54; X86AVX: # %bb.0: 55; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 56; X86AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 57; X86AVX-NEXT: vmovq %xmm0, (%eax) 58; X86AVX-NEXT: retl 59 %z = add <8 x i16> %x, %y ; force execution domain 60 %bc = bitcast <8 x i16> %z to <2 x i64> 61 %vecext = extractelement <2 x i64> %bc, i32 0 62 store i64 %vecext, ptr %i, align 8 63 ret void 64} 65 66define void @store_i64_from_vector256(<16 x i16> %x, <16 x i16> %y, ptr %i) nounwind { 67; X64-LABEL: store_i64_from_vector256: 68; X64: # %bb.0: 69; X64-NEXT: paddw %xmm3, %xmm1 70; X64-NEXT: movq %xmm1, (%rdi) 71; X64-NEXT: retq 72; 73; X86-LABEL: store_i64_from_vector256: 74; X86: # %bb.0: 75; X86-NEXT: pushl %ebp 76; X86-NEXT: movl %esp, %ebp 77; X86-NEXT: andl $-16, %esp 78; X86-NEXT: subl $16, %esp 79; X86-NEXT: movl 24(%ebp), %eax 80; X86-NEXT: paddw 8(%ebp), %xmm1 81; X86-NEXT: movq %xmm1, (%eax) 82; X86-NEXT: movl %ebp, %esp 83; X86-NEXT: popl %ebp 84; X86-NEXT: retl 85; 86; X86AVX-LABEL: store_i64_from_vector256: 87; X86AVX: # %bb.0: 88; X86AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X86AVX-NEXT: vextracti128 $1, %ymm1, %xmm1 90; X86AVX-NEXT: vextracti128 $1, %ymm0, %xmm0 91; X86AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 92; X86AVX-NEXT: vmovq %xmm0, (%eax) 93; X86AVX-NEXT: vzeroupper 94; X86AVX-NEXT: retl 95 %z = add <16 x i16> %x, %y ; force execution domain 96 %bc = bitcast <16 x i16> %z to <4 x i64> 97 %vecext = extractelement <4 x i64> %bc, i32 2 98 store i64 %vecext, ptr %i, align 8 99 ret void 100} 101 102; PR23476 103; Handle extraction from a non-simple / pre-legalization type. 104 105define void @PR23476(<5 x i64> %in, ptr %out, i32 %index) nounwind { 106; X64-LABEL: PR23476: 107; X64: # %bb.0: 108; X64-NEXT: movq %rsi, %xmm0 109; X64-NEXT: movq %rdi, %xmm1 110; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 111; X64-NEXT: movq %rcx, %xmm0 112; X64-NEXT: movq %rdx, %xmm2 113; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 114; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax 115; X64-NEXT: andl $7, %eax 116; X64-NEXT: movq %r8, %xmm0 117; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 118; X64-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp) 119; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 120; X64-NEXT: movq -72(%rsp,%rax,8), %rax 121; X64-NEXT: movq %rax, (%r9) 122; X64-NEXT: retq 123; 124; X86-LABEL: PR23476: 125; X86: # %bb.0: 126; X86-NEXT: pushl %ebp 127; X86-NEXT: movl %esp, %ebp 128; X86-NEXT: andl $-16, %esp 129; X86-NEXT: subl $80, %esp 130; X86-NEXT: movl 52(%ebp), %eax 131; X86-NEXT: andl $7, %eax 132; X86-NEXT: movl 48(%ebp), %ecx 133; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 134; X86-NEXT: movups 8(%ebp), %xmm1 135; X86-NEXT: movups 24(%ebp), %xmm2 136; X86-NEXT: movaps %xmm2, {{[0-9]+}}(%esp) 137; X86-NEXT: movaps %xmm1, (%esp) 138; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) 139; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 140; X86-NEXT: movsd %xmm0, (%ecx) 141; X86-NEXT: movl %ebp, %esp 142; X86-NEXT: popl %ebp 143; X86-NEXT: retl 144; 145; X86AVX-LABEL: PR23476: 146; X86AVX: # %bb.0: 147; X86AVX-NEXT: pushl %ebp 148; X86AVX-NEXT: movl %esp, %ebp 149; X86AVX-NEXT: andl $-32, %esp 150; X86AVX-NEXT: subl $96, %esp 151; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 152; X86AVX-NEXT: movl 52(%ebp), %eax 153; X86AVX-NEXT: andl $7, %eax 154; X86AVX-NEXT: movl 48(%ebp), %ecx 155; X86AVX-NEXT: vmovups 8(%ebp), %ymm1 156; X86AVX-NEXT: vmovaps %ymm1, (%esp) 157; X86AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 158; X86AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 159; X86AVX-NEXT: vmovsd %xmm0, (%ecx) 160; X86AVX-NEXT: movl %ebp, %esp 161; X86AVX-NEXT: popl %ebp 162; X86AVX-NEXT: vzeroupper 163; X86AVX-NEXT: retl 164 %ext = extractelement <5 x i64> %in, i32 %index 165 store i64 %ext, ptr %out, align 8 166 ret void 167} 168 169