xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; RUN: llc -mtriple=amdgcn -mcpu=tahiti  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX6 %s
2; RUN: llc -mtriple=amdgcn -mcpu=tonga   -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX8 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx90a  -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX101 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
8
9; GCN-LABEL: {{^}}test_mul_legacy_f32:
10; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
11; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
12define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
13  %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
14  store float %result, ptr addrspace(1) %out, align 4
15  ret void
16}
17
18; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32:
19; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
20; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
21define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) #0 {
22  %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
23  store float %result, ptr addrspace(1) %out, align 4
24  ret void
25}
26
27; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32:
28; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
29; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
30define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) #0 {
31  %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
32  store float %result, ptr addrspace(1) %out, align 4
33  ret void
34}
35
36; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
37; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}|
38; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |s{{[0-9]+}}|
39define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
40  %a.fabs = call float @llvm.fabs.f32(float %a)
41  %b.fabs = call float @llvm.fabs.f32(float %b)
42  %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
43  store float %result, ptr addrspace(1) %out, align 4
44  ret void
45}
46
47; Don't form mad/mac instructions because they don't support denormals.
48; GCN-LABEL: {{^}}test_add_mul_legacy_f32:
49; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
50; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
51; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
52; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
53define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #0 {
54  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
55  %add = fadd float %mul, %c
56  store float %add, ptr addrspace(1) %out, align 4
57  ret void
58}
59
60; GCN-LABEL: {{^}}test_mad_legacy_f32:
61; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
62; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
63; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
64; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
65; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
66; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
67; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
68; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
69define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
70  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
71  %add = fadd float %mul, %c
72  store float %add, ptr addrspace(1) %out, align 4
73  ret void
74}
75
76; GCN-LABEL: {{^}}test_mad_legacy_f32_imm:
77; GFX6: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
78; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
79; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
80; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
81; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
82; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
83define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) #2 {
84  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0)
85  %add = fadd float %mul, %c
86  store float %add, ptr addrspace(1) %out, align 4
87  ret void
88}
89
90; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32:
91; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
92; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
93; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
94; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
95; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
96define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
97  %a.fneg = fneg float %a
98  %b.fneg = fneg float %b
99  %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
100  %add = fadd float %mul, %c
101  store float %add, ptr addrspace(1) %out, align 4
102  ret void
103}
104
105declare float @llvm.fabs.f32(float) #1
106declare float @llvm.amdgcn.fmul.legacy(float, float) #1
107
108attributes #0 = { nounwind }
109attributes #1 = { nounwind readnone }
110attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
111