xref: /llvm-project/llvm/test/CodeGen/X86/atomicrmw-fadd-fp-vector.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s
3
4define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 {
5; CHECK-LABEL: test_atomicrmw_fadd_v2f16_align4:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    pushq %rbp
8; CHECK-NEXT:    pushq %rbx
9; CHECK-NEXT:    subq $88, %rsp
10; CHECK-NEXT:    movq %rdi, %rbx
11; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
12; CHECK-NEXT:    psrld $16, %xmm0
13; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
14; CHECK-NEXT:    pinsrw $0, 2(%rdi), %xmm1
15; CHECK-NEXT:    pinsrw $0, (%rdi), %xmm0
16; CHECK-NEXT:    .p2align 4
17; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
18; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
19; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
20; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
21; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
22; CHECK-NEXT:    callq __extendhfsf2@PLT
23; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
24; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
25; CHECK-NEXT:    callq __extendhfsf2@PLT
26; CHECK-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
27; CHECK-NEXT:    callq __truncsfhf2@PLT
28; CHECK-NEXT:    pextrw $0, %xmm0, %eax
29; CHECK-NEXT:    movzwl %ax, %ebp
30; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
31; CHECK-NEXT:    callq __extendhfsf2@PLT
32; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
33; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
34; CHECK-NEXT:    callq __extendhfsf2@PLT
35; CHECK-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
36; CHECK-NEXT:    callq __truncsfhf2@PLT
37; CHECK-NEXT:    pextrw $0, %xmm0, %ecx
38; CHECK-NEXT:    shll $16, %ecx
39; CHECK-NEXT:    orl %ebp, %ecx
40; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
41; CHECK-NEXT:    pextrw $0, %xmm0, %edx
42; CHECK-NEXT:    shll $16, %edx
43; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
44; CHECK-NEXT:    pextrw $0, %xmm0, %eax
45; CHECK-NEXT:    movzwl %ax, %eax
46; CHECK-NEXT:    orl %edx, %eax
47; CHECK-NEXT:    lock cmpxchgl %ecx, (%rbx)
48; CHECK-NEXT:    setne %cl
49; CHECK-NEXT:    pinsrw $0, %eax, %xmm0
50; CHECK-NEXT:    shrl $16, %eax
51; CHECK-NEXT:    pinsrw $0, %eax, %xmm1
52; CHECK-NEXT:    testb %cl, %cl
53; CHECK-NEXT:    jne .LBB0_1
54; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
55; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
56; CHECK-NEXT:    addq $88, %rsp
57; CHECK-NEXT:    popq %rbx
58; CHECK-NEXT:    popq %rbp
59; CHECK-NEXT:    retq
60  %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4
61  ret <2 x half> %res
62}
63
64define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 {
65; CHECK-LABEL: test_atomicrmw_fadd_v2f32_align8:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
68; CHECK-NEXT:    .p2align 4
69; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
70; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
71; CHECK-NEXT:    movq %xmm1, %rax
72; CHECK-NEXT:    addps %xmm0, %xmm1
73; CHECK-NEXT:    movq %xmm1, %rcx
74; CHECK-NEXT:    lock cmpxchgq %rcx, (%rdi)
75; CHECK-NEXT:    movq %rax, %xmm1
76; CHECK-NEXT:    jne .LBB1_1
77; CHECK-NEXT:  # %bb.2: # %atomicrmw.end
78; CHECK-NEXT:    movdqa %xmm1, %xmm0
79; CHECK-NEXT:    retq
80  %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8
81  ret <2 x float> %res
82}
83
84attributes #0 = { nounwind }
85