1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 5 6define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 { 7; GCN-LABEL: v_constained_fma_f32_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_fma_f32 v0, v0, v1, v2 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 19 ret float %val 20} 21 22define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 { 23; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_fma_f32 v0, v0, v2, v4 27; GCN-NEXT: v_fma_f32 v1, v1, v3, v5 28; GCN-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4 34; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5 35; GFX10-NEXT: s_setpc_b64 s[30:31] 36 %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 37 ret <2 x float> %val 38} 39 40define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 { 41; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict: 42; GCN: ; %bb.0: 43; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GCN-NEXT: v_fma_f32 v0, v0, v3, v6 45; GCN-NEXT: v_fma_f32 v1, v1, v4, v7 46; GCN-NEXT: v_fma_f32 v2, v2, v5, v8 47; GCN-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6 53; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7 54; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56 %val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 57 ret <3 x float> %val 58} 59 60define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 { 61; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict: 62; GCN: ; %bb.0: 63; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GCN-NEXT: v_fma_f32 v0, v0, v4, v8 65; GCN-NEXT: v_fma_f32 v1, v1, v5, v9 66; GCN-NEXT: v_fma_f32 v2, v2, v6, v10 67; GCN-NEXT: v_fma_f32 v3, v3, v7, v11 68; GCN-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict: 71; GFX10: ; %bb.0: 72; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8 74; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9 75; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10 76; GFX10-NEXT: v_fma_f32 v3, v3, v7, v11 77; GFX10-NEXT: s_setpc_b64 s[30:31] 78 %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 79 ret <4 x float> %val 80} 81 82define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 { 83; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg: 84; GCN: ; %bb.0: 85; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GCN-NEXT: v_fma_f32 v0, v0, v1, -v2 87; GCN-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg: 90; GFX10: ; %bb.0: 91; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2 93; GFX10-NEXT: s_setpc_b64 s[30:31] 94 %neg.z = fneg float %z 95 %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict") 96 ret float %val 97} 98 99define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 { 100; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg: 101; GCN: ; %bb.0: 102; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GCN-NEXT: v_fma_f32 v0, -v0, -v1, v2 104; GCN-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg: 107; GFX10: ; %bb.0: 108; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX10-NEXT: v_fma_f32 v0, -v0, -v1, v2 110; GFX10-NEXT: s_setpc_b64 s[30:31] 111 %neg.x = fneg float %x 112 %neg.y = fneg float %y 113 %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 114 ret float %val 115} 116 117define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 { 118; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs: 119; GCN: ; %bb.0: 120; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GCN-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 122; GCN-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs: 125; GFX10: ; %bb.0: 126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 128; GFX10-NEXT: s_setpc_b64 s[30:31] 129 %neg.x = call float @llvm.fabs.f32(float %x) #0 130 %neg.y = call float @llvm.fabs.f32(float %y) #0 131 %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 132 ret float %val 133} 134 135define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 { 136; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg: 137; GCN: ; %bb.0: 138; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GCN-NEXT: v_fma_f32 v0, -v0, -v2, v4 140; GCN-NEXT: v_fma_f32 v1, -v1, -v3, v5 141; GCN-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg: 144; GFX10: ; %bb.0: 145; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX10-NEXT: v_fma_f32 v0, -v0, -v2, v4 147; GFX10-NEXT: v_fma_f32 v1, -v1, -v3, v5 148; GFX10-NEXT: s_setpc_b64 s[30:31] 149 %neg.x = fneg <2 x float> %x 150 %neg.y = fneg <2 x float> %y 151 %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 152 ret <2 x float> %val 153} 154 155declare float @llvm.fabs.f32(float) 156declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) 157declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) 158declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata) 159declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) 160 161attributes #0 = { strictfp } 162