1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2,cx16 | FileCheck %s --check-prefixes=X64-SSE 3; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx,cx16 | FileCheck %s --check-prefixes=X64-AVX 4; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f,cx16 | FileCheck %s --check-prefixes=X64-AVX 5 6; Codegen of fp128 without cx16 is tested in atomic-nocx16.ll 7 8define void @store_fp128(ptr %fptr, fp128 %v) { 9; X64-SSE-LABEL: store_fp128: 10; X64-SSE: # %bb.0: 11; X64-SSE-NEXT: pushq %rbx 12; X64-SSE-NEXT: .cfi_def_cfa_offset 16 13; X64-SSE-NEXT: .cfi_offset %rbx, -16 14; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 15; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx 16; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 17; X64-SSE-NEXT: movq (%rdi), %rax 18; X64-SSE-NEXT: movq 8(%rdi), %rdx 19; X64-SSE-NEXT: .p2align 4 20; X64-SSE-NEXT: .LBB0_1: # %atomicrmw.start 21; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1 22; X64-SSE-NEXT: lock cmpxchg16b (%rdi) 23; X64-SSE-NEXT: jne .LBB0_1 24; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end 25; X64-SSE-NEXT: popq %rbx 26; X64-SSE-NEXT: .cfi_def_cfa_offset 8 27; X64-SSE-NEXT: retq 28; 29; X64-AVX-LABEL: store_fp128: 30; X64-AVX: # %bb.0: 31; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 32; X64-AVX-NEXT: retq 33 store atomic fp128 %v, ptr %fptr unordered, align 16 34 ret void 35} 36 37define fp128 @load_fp128(ptr %fptr) { 38; X64-SSE-LABEL: load_fp128: 39; X64-SSE: # %bb.0: 40; X64-SSE-NEXT: pushq %rbx 41; X64-SSE-NEXT: .cfi_def_cfa_offset 16 42; X64-SSE-NEXT: .cfi_offset %rbx, -16 43; X64-SSE-NEXT: xorl %eax, %eax 44; X64-SSE-NEXT: xorl %edx, %edx 45; X64-SSE-NEXT: xorl %ecx, %ecx 46; X64-SSE-NEXT: xorl %ebx, %ebx 47; X64-SSE-NEXT: lock cmpxchg16b (%rdi) 48; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 49; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 50; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 51; X64-SSE-NEXT: popq %rbx 52; X64-SSE-NEXT: .cfi_def_cfa_offset 8 53; X64-SSE-NEXT: retq 54; 55; X64-AVX-LABEL: load_fp128: 56; X64-AVX: # %bb.0: 57; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 58; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 59; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 60; X64-AVX-NEXT: retq 61 %v = load atomic fp128, ptr %fptr unordered, align 16 62 ret fp128 %v 63} 64 65define fp128 @exchange_fp128(ptr %fptr, fp128 %x) { 66; X64-SSE-LABEL: exchange_fp128: 67; X64-SSE: # %bb.0: 68; X64-SSE-NEXT: pushq %rbx 69; X64-SSE-NEXT: .cfi_def_cfa_offset 16 70; X64-SSE-NEXT: .cfi_offset %rbx, -16 71; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 72; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rbx 73; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 74; X64-SSE-NEXT: movq (%rdi), %rax 75; X64-SSE-NEXT: movq 8(%rdi), %rdx 76; X64-SSE-NEXT: .p2align 4 77; X64-SSE-NEXT: .LBB2_1: # %atomicrmw.start 78; X64-SSE-NEXT: # =>This Inner Loop Header: Depth=1 79; X64-SSE-NEXT: lock cmpxchg16b (%rdi) 80; X64-SSE-NEXT: jne .LBB2_1 81; X64-SSE-NEXT: # %bb.2: # %atomicrmw.end 82; X64-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 83; X64-SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 84; X64-SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 85; X64-SSE-NEXT: popq %rbx 86; X64-SSE-NEXT: .cfi_def_cfa_offset 8 87; X64-SSE-NEXT: retq 88; 89; X64-AVX-LABEL: exchange_fp128: 90; X64-AVX: # %bb.0: 91; X64-AVX-NEXT: pushq %rbx 92; X64-AVX-NEXT: .cfi_def_cfa_offset 16 93; X64-AVX-NEXT: .cfi_offset %rbx, -16 94; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 95; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rbx 96; X64-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 97; X64-AVX-NEXT: movq (%rdi), %rax 98; X64-AVX-NEXT: movq 8(%rdi), %rdx 99; X64-AVX-NEXT: .p2align 4 100; X64-AVX-NEXT: .LBB2_1: # %atomicrmw.start 101; X64-AVX-NEXT: # =>This Inner Loop Header: Depth=1 102; X64-AVX-NEXT: lock cmpxchg16b (%rdi) 103; X64-AVX-NEXT: jne .LBB2_1 104; X64-AVX-NEXT: # %bb.2: # %atomicrmw.end 105; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 106; X64-AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 107; X64-AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 108; X64-AVX-NEXT: popq %rbx 109; X64-AVX-NEXT: .cfi_def_cfa_offset 8 110; X64-AVX-NEXT: retq 111 %v = atomicrmw xchg ptr %fptr, fp128 %x monotonic, align 16 112 ret fp128 %v 113} 114 115