xref: /llvm-project/llvm/test/CodeGen/X86/avx512fp16-fold-load-binops.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 < %s | FileCheck %s
3
4; Verify that we're folding the load into the math instruction.
5; This pattern is generated out of the simplest intrinsics usage:
6;  _mm_add_ss(a, _mm_load_ss(b));
7
8define <8 x half> @addsh(<8 x half> %va, ptr %pb) {
9; CHECK-LABEL: addsh:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    vaddsh (%rdi), %xmm0, %xmm0
12; CHECK-NEXT:    retq
13  %a = extractelement <8 x half> %va, i32 0
14  %b = load half, ptr %pb
15  %r = fadd half %a, %b
16  %vr = insertelement <8 x half> %va, half %r, i32 0
17  ret <8 x half> %vr
18}
19
20define <8 x half> @subsh(<8 x half> %va, ptr %pb) {
21; CHECK-LABEL: subsh:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsubsh (%rdi), %xmm0, %xmm0
24; CHECK-NEXT:    retq
25  %a = extractelement <8 x half> %va, i32 0
26  %b = load half, ptr %pb
27  %r = fsub half %a, %b
28  %vr = insertelement <8 x half> %va, half %r, i32 0
29  ret <8 x half> %vr
30}
31
32define <8 x half> @mulsh(<8 x half> %va, ptr %pb) {
33; CHECK-LABEL: mulsh:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vmulsh (%rdi), %xmm0, %xmm0
36; CHECK-NEXT:    retq
37  %a = extractelement <8 x half> %va, i32 0
38  %b = load half, ptr %pb
39  %r = fmul half %a, %b
40  %vr = insertelement <8 x half> %va, half %r, i32 0
41  ret <8 x half> %vr
42}
43
44define <8 x half> @divsh(<8 x half> %va, ptr %pb) {
45; CHECK-LABEL: divsh:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vdivsh (%rdi), %xmm0, %xmm0
48; CHECK-NEXT:    retq
49  %a = extractelement <8 x half> %va, i32 0
50  %b = load half, ptr %pb
51  %r = fdiv half %a, %b
52  %vr = insertelement <8 x half> %va, half %r, i32 0
53  ret <8 x half> %vr
54}
55
56define <8 x half> @minsh(<8 x half> %va, ptr %pb) {
57; CHECK-LABEL: minsh:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vminsh (%rdi), %xmm0, %xmm1
60; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0
61; CHECK-NEXT:    retq
62  %a = extractelement <8 x half> %va, i32 0
63  %b = load half, ptr %pb
64  %r = call nnan half @llvm.minnum.f16(half %a, half %b) readnone
65  %vr = insertelement <8 x half> %va, half %r, i32 0
66  ret <8 x half> %vr
67}
68
69define <8 x half> @maxsh(<8 x half> %va, ptr %pb) {
70; CHECK-LABEL: maxsh:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vminsh (%rdi), %xmm0, %xmm1
73; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0
74; CHECK-NEXT:    retq
75  %a = extractelement <8 x half> %va, i32 0
76  %b = load half, ptr %pb
77  %r = call nnan half @llvm.minnum.f16(half %a, half %b) readnone
78  %vr = insertelement <8 x half> %va, half %r, i32 0
79  ret <8 x half> %vr
80}
81
82declare half @llvm.minnum.f16(half, half)
83declare half @llvm.maxnum.f16(half, half)
84