xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
8
9define double @v_constained_fsub_f64_fpexcept_strict(double %x, double %y) #0 {
10; GCN-LABEL: v_constained_fsub_f64_fpexcept_strict:
11; GCN:       ; %bb.0:
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
14; GCN-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX10-LABEL: v_constained_fsub_f64_fpexcept_strict:
17; GFX10:       ; %bb.0:
18; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
20; GFX10-NEXT:    s_setpc_b64 s[30:31]
21  %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
22  ret double %val
23}
24
25define double @v_constained_fsub_f64_fpexcept_ignore(double %x, double %y) #0 {
26; GCN-LABEL: v_constained_fsub_f64_fpexcept_ignore:
27; GCN:       ; %bb.0:
28; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
30; GCN-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX10-LABEL: v_constained_fsub_f64_fpexcept_ignore:
33; GFX10:       ; %bb.0:
34; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
36; GFX10-NEXT:    s_setpc_b64 s[30:31]
37  %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
38  ret double %val
39}
40
41define double @v_constained_fsub_f64_fpexcept_maytrap(double %x, double %y) #0 {
42; GCN-LABEL: v_constained_fsub_f64_fpexcept_maytrap:
43; GCN:       ; %bb.0:
44; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
46; GCN-NEXT:    s_setpc_b64 s[30:31]
47;
48; GFX10-LABEL: v_constained_fsub_f64_fpexcept_maytrap:
49; GFX10:       ; %bb.0:
50; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
52; GFX10-NEXT:    s_setpc_b64 s[30:31]
53  %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
54  ret double %val
55}
56
57define <2 x double> @v_constained_fsub_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y) #0 {
58; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_strict:
59; GCN:       ; %bb.0:
60; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
62; GCN-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
63; GCN-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_strict:
66; GFX10:       ; %bb.0:
67; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
69; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
70; GFX10-NEXT:    s_setpc_b64 s[30:31]
71  %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
72  ret <2 x double> %val
73}
74
75define <2 x double> @v_constained_fsub_v2f64_fpexcept_ignore(<2 x double> %x, <2 x double> %y) #0 {
76; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_ignore:
77; GCN:       ; %bb.0:
78; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
80; GCN-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
81; GCN-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_ignore:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
87; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
88; GFX10-NEXT:    s_setpc_b64 s[30:31]
89  %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
90  ret <2 x double> %val
91}
92
93define <2 x double> @v_constained_fsub_v2f64_fpexcept_maytrap(<2 x double> %x, <2 x double> %y) #0 {
94; GCN-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap:
95; GCN:       ; %bb.0:
96; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
98; GCN-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
99; GCN-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX10-LABEL: v_constained_fsub_v2f64_fpexcept_maytrap:
102; GFX10:       ; %bb.0:
103; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
105; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], -v[6:7]
106; GFX10-NEXT:    s_setpc_b64 s[30:31]
107  %val = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %x, <2 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
108  ret <2 x double> %val
109}
110
111define <3 x double> @v_constained_fsub_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y) #0 {
112; GCN-LABEL: v_constained_fsub_v3f64_fpexcept_strict:
113; GCN:       ; %bb.0:
114; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GCN-NEXT:    v_add_f64 v[0:1], v[0:1], -v[6:7]
116; GCN-NEXT:    v_add_f64 v[2:3], v[2:3], -v[8:9]
117; GCN-NEXT:    v_add_f64 v[4:5], v[4:5], -v[10:11]
118; GCN-NEXT:    s_setpc_b64 s[30:31]
119;
120; GFX10-LABEL: v_constained_fsub_v3f64_fpexcept_strict:
121; GFX10:       ; %bb.0:
122; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[6:7]
124; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], -v[8:9]
125; GFX10-NEXT:    v_add_f64 v[4:5], v[4:5], -v[10:11]
126; GFX10-NEXT:    s_setpc_b64 s[30:31]
127  %val = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double> %x, <3 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
128  ret <3 x double> %val
129}
130
131define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg %x, double inreg %y) #0 {
132; GCN-LABEL: s_constained_fsub_f64_fpexcept_strict:
133; GCN:       ; %bb.0:
134; GCN-NEXT:    v_mov_b32_e32 v0, s4
135; GCN-NEXT:    v_mov_b32_e32 v1, s5
136; GCN-NEXT:    v_add_f64 v[0:1], s[2:3], -v[0:1]
137; GCN-NEXT:    ; return to shader part epilog
138;
139; GFX10-LABEL: s_constained_fsub_f64_fpexcept_strict:
140; GFX10:       ; %bb.0:
141; GFX10-NEXT:    v_add_f64 v[0:1], s[2:3], -s[4:5]
142; GFX10-NEXT:    ; return to shader part epilog
143  %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
144  %cast = bitcast double %val to <2 x float>
145  ret <2 x float> %cast
146}
147
148declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) #1
149declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
150declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
151
152attributes #0 = { strictfp }
153attributes #1 = { inaccessiblememonly nounwind willreturn }
154