xref: /llvm-project/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2,cx16 | FileCheck %s --check-prefixes=X64-SSE
3; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx,cx16 | FileCheck %s --check-prefixes=X64-AVX
4; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f,cx16 | FileCheck %s --check-prefixes=X64-AVX
5
6; Codegen of fp128 without cx16 is tested in atomic-nocx16.ll
7
8define void @store_fp128(ptr %fptr, fp128 %v) {
9; X64-SSE-LABEL: store_fp128:
10; X64-SSE:       # %bb.0:
11; X64-SSE-NEXT:    pushq %rbx
12; X64-SSE-NEXT:    .cfi_def_cfa_offset 16
13; X64-SSE-NEXT:    .cfi_offset %rbx, -16
14; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
15; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx
16; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
17; X64-SSE-NEXT:    movq (%rdi), %rax
18; X64-SSE-NEXT:    movq 8(%rdi), %rdx
19; X64-SSE-NEXT:    .p2align 4
20; X64-SSE-NEXT:  .LBB0_1: # %atomicrmw.start
21; X64-SSE-NEXT:    # =>This Inner Loop Header: Depth=1
22; X64-SSE-NEXT:    lock cmpxchg16b (%rdi)
23; X64-SSE-NEXT:    jne .LBB0_1
24; X64-SSE-NEXT:  # %bb.2: # %atomicrmw.end
25; X64-SSE-NEXT:    popq %rbx
26; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
27; X64-SSE-NEXT:    retq
28;
29; X64-AVX-LABEL: store_fp128:
30; X64-AVX:       # %bb.0:
31; X64-AVX-NEXT:    vmovaps %xmm0, (%rdi)
32; X64-AVX-NEXT:    retq
33  store atomic fp128 %v, ptr %fptr unordered, align 16
34  ret void
35}
36
37define fp128 @load_fp128(ptr %fptr) {
38; X64-SSE-LABEL: load_fp128:
39; X64-SSE:       # %bb.0:
40; X64-SSE-NEXT:    pushq %rbx
41; X64-SSE-NEXT:    .cfi_def_cfa_offset 16
42; X64-SSE-NEXT:    .cfi_offset %rbx, -16
43; X64-SSE-NEXT:    xorl %eax, %eax
44; X64-SSE-NEXT:    xorl %edx, %edx
45; X64-SSE-NEXT:    xorl %ecx, %ecx
46; X64-SSE-NEXT:    xorl %ebx, %ebx
47; X64-SSE-NEXT:    lock cmpxchg16b (%rdi)
48; X64-SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
49; X64-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
50; X64-SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
51; X64-SSE-NEXT:    popq %rbx
52; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
53; X64-SSE-NEXT:    retq
54;
55; X64-AVX-LABEL: load_fp128:
56; X64-AVX:       # %bb.0:
57; X64-AVX-NEXT:    vmovaps (%rdi), %xmm0
58; X64-AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
59; X64-AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
60; X64-AVX-NEXT:    retq
61  %v = load atomic fp128, ptr %fptr unordered, align 16
62  ret fp128 %v
63}
64
65define fp128 @exchange_fp128(ptr %fptr, fp128 %x) {
66; X64-SSE-LABEL: exchange_fp128:
67; X64-SSE:       # %bb.0:
68; X64-SSE-NEXT:    pushq %rbx
69; X64-SSE-NEXT:    .cfi_def_cfa_offset 16
70; X64-SSE-NEXT:    .cfi_offset %rbx, -16
71; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
72; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx
73; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
74; X64-SSE-NEXT:    movq (%rdi), %rax
75; X64-SSE-NEXT:    movq 8(%rdi), %rdx
76; X64-SSE-NEXT:    .p2align 4
77; X64-SSE-NEXT:  .LBB2_1: # %atomicrmw.start
78; X64-SSE-NEXT:    # =>This Inner Loop Header: Depth=1
79; X64-SSE-NEXT:    lock cmpxchg16b (%rdi)
80; X64-SSE-NEXT:    jne .LBB2_1
81; X64-SSE-NEXT:  # %bb.2: # %atomicrmw.end
82; X64-SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
83; X64-SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
84; X64-SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
85; X64-SSE-NEXT:    popq %rbx
86; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
87; X64-SSE-NEXT:    retq
88;
89; X64-AVX-LABEL: exchange_fp128:
90; X64-AVX:       # %bb.0:
91; X64-AVX-NEXT:    pushq %rbx
92; X64-AVX-NEXT:    .cfi_def_cfa_offset 16
93; X64-AVX-NEXT:    .cfi_offset %rbx, -16
94; X64-AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
95; X64-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rbx
96; X64-AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
97; X64-AVX-NEXT:    movq (%rdi), %rax
98; X64-AVX-NEXT:    movq 8(%rdi), %rdx
99; X64-AVX-NEXT:    .p2align 4
100; X64-AVX-NEXT:  .LBB2_1: # %atomicrmw.start
101; X64-AVX-NEXT:    # =>This Inner Loop Header: Depth=1
102; X64-AVX-NEXT:    lock cmpxchg16b (%rdi)
103; X64-AVX-NEXT:    jne .LBB2_1
104; X64-AVX-NEXT:  # %bb.2: # %atomicrmw.end
105; X64-AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
106; X64-AVX-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
107; X64-AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
108; X64-AVX-NEXT:    popq %rbx
109; X64-AVX-NEXT:    .cfi_def_cfa_offset 8
110; X64-AVX-NEXT:    retq
111  %v = atomicrmw xchg ptr %fptr, fp128 %x monotonic, align 16
112  ret fp128 %v
113}
114
115