1; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX6 %s 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX8 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MADMACF32,GFX101 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOMADMACF32,GFX103 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 8 9; GCN-LABEL: {{^}}test_mul_legacy_f32: 10; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} 11; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 12define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) #0 { 13 %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 14 store float %result, ptr addrspace(1) %out, align 4 15 ret void 16} 17 18; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32: 19; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} 20; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 21define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) #0 { 22 %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) 23 store float %result, ptr addrspace(1) %out, align 4 24 ret void 25} 26 27; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32: 28; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} 29; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 30define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) #0 { 31 %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) 32 store float %result, ptr addrspace(1) %out, align 4 33 ret void 34} 35 36; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: 37; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}| 38; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |s{{[0-9]+}}| 39define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) #0 { 40 %a.fabs = call float @llvm.fabs.f32(float %a) 41 %b.fabs = call float @llvm.fabs.f32(float %b) 42 %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs) 43 store float %result, ptr addrspace(1) %out, align 4 44 ret void 45} 46 47; Don't form mad/mac instructions because they don't support denormals. 48; GCN-LABEL: {{^}}test_add_mul_legacy_f32: 49; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} 50; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} 51; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 52; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 53define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #0 { 54 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 55 %add = fadd float %mul, %c 56 store float %add, ptr addrspace(1) %out, align 4 57 ret void 58} 59 60; GCN-LABEL: {{^}}test_mad_legacy_f32: 61; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 62; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 63; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 64; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 65; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 66; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 67; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} 68; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 69define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 { 70 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 71 %add = fadd float %mul, %c 72 store float %add, ptr addrspace(1) %out, align 4 73 ret void 74} 75 76; GCN-LABEL: {{^}}test_mad_legacy_f32_imm: 77; GFX6: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 78; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 79; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 80; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} 81; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} 82; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 83define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) #2 { 84 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0) 85 %add = fadd float %mul, %c 86 store float %add, ptr addrspace(1) %out, align 4 87 ret void 88} 89 90; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32: 91; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}} 92; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} 93; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 94; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} 95; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 96define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 { 97 %a.fneg = fneg float %a 98 %b.fneg = fneg float %b 99 %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) 100 %add = fadd float %mul, %c 101 store float %add, ptr addrspace(1) %out, align 4 102 ret void 103} 104 105declare float @llvm.fabs.f32(float) #1 106declare float @llvm.amdgcn.fmul.legacy(float, float) #1 107 108attributes #0 = { nounwind } 109attributes #1 = { nounwind readnone } 110attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 111