xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
6
7define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
8; GFX906-LABEL: v_fdot2:
9; GFX906:       ; %bb.0:
10; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
12; GFX906-NEXT:    s_setpc_b64 s[30:31]
13  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
14  ret float %r
15}
16
17define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
18; GFX906-LABEL: v_fdot2_clamp:
19; GFX906:       ; %bb.0:
20; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
22; GFX906-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX10PLUS-LABEL: v_fdot2_clamp:
25; GFX10PLUS:       ; %bb.0:
26; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX10PLUS-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
28; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
29  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
30  ret float %r
31}
32
33define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
34; GFX906-LABEL: v_fdot2_neg_a:
35; GFX906:       ; %bb.0:
36; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
38; GFX906-NEXT:    s_setpc_b64 s[30:31]
39  %neg.a = fneg <2 x half> %a
40  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
41  ret float %r
42}
43
44define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
45; GFX906-LABEL: v_fdot2_neg_b:
46; GFX906:       ; %bb.0:
47; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
49; GFX906-NEXT:    s_setpc_b64 s[30:31]
50  %neg.b = fneg <2 x half> %b
51  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
52  ret float %r
53}
54
55define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
56; GFX906-LABEL: v_fdot2_neg_a_neg_b:
57; GFX906:       ; %bb.0:
58; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX906-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
60; GFX906-NEXT:    s_setpc_b64 s[30:31]
61  %neg.a = fneg <2 x half> %b
62  %neg.b = fneg <2 x half> %b
63  %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
64  ret float %r
65}
66
67define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
68; GFX906-LABEL: v_fdot2_neg_c:
69; GFX906:       ; %bb.0:
70; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
72; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
73; GFX906-NEXT:    s_setpc_b64 s[30:31]
74  %neg.c = fneg float %c
75  %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
76  ret float %r
77}
78
79define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
80; GFX906-LABEL: v_fdot2_inline_literal_a:
81; GFX906:       ; %bb.0:
82; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GFX906-NEXT:    v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
84; GFX906-NEXT:    s_setpc_b64 s[30:31]
85  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
86  ret float %ret
87}
88
89define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
90; GFX906-LABEL: v_fdot2_inline_literal_b:
91; GFX906:       ; %bb.0:
92; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
94; GFX906-NEXT:    s_setpc_b64 s[30:31]
95  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
96  ret float %ret
97}
98
99define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
100; GFX906-LABEL: v_fdot2_inline_literal_c:
101; GFX906:       ; %bb.0:
102; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
104; GFX906-NEXT:    s_setpc_b64 s[30:31]
105  %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
106  ret float %ret
107}
108
109declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
110
111attributes #0 = { nounwind readnone speculatable }
112