1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 5 6define double @v_constained_fma_f64_fpexcept_strict(double %x, double %y, double %z) #0 { 7; GCN-LABEL: v_constained_fma_f64_fpexcept_strict: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 11; GCN-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 17; GFX10-NEXT: s_setpc_b64 s[30:31] 18 %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 19 ret double %val 20} 21 22define <2 x double> @v_constained_fma_v2f64_fpexcept_strict(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 { 23; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict: 24; GCN: ; %bb.0: 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 27; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 28; GCN-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 34; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 35; GFX10-NEXT: s_setpc_b64 s[30:31] 36 %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 37 ret <2 x double> %val 38} 39 40define <3 x double> @v_constained_fma_v3f64_fpexcept_strict(<3 x double> %x, <3 x double> %y, <3 x double> %z) #0 { 41; GCN-LABEL: v_constained_fma_v3f64_fpexcept_strict: 42; GCN: ; %bb.0: 43; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] 45; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] 46; GCN-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] 47; GCN-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10-LABEL: v_constained_fma_v3f64_fpexcept_strict: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] 53; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] 54; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56 %val = call <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double> %x, <3 x double> %y, <3 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 57 ret <3 x double> %val 58} 59 60define <4 x double> @v_constained_fma_v4f64_fpexcept_strict(<4 x double> %x, <4 x double> %y, <4 x double> %z) #0 { 61; GCN-LABEL: v_constained_fma_v4f64_fpexcept_strict: 62; GCN: ; %bb.0: 63; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 65; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 66; GCN-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 67; GCN-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 68; GCN-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX10-LABEL: v_constained_fma_v4f64_fpexcept_strict: 71; GFX10: ; %bb.0: 72; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 74; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 75; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 76; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 77; GFX10-NEXT: s_setpc_b64 s[30:31] 78 %val = call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %x, <4 x double> %y, <4 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 79 ret <4 x double> %val 80} 81 82define double @v_constained_fma_f64_fpexcept_strict_fneg(double %x, double %y, double %z) #0 { 83; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg: 84; GCN: ; %bb.0: 85; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 87; GCN-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg: 90; GFX10: ; %bb.0: 91; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 93; GFX10-NEXT: s_setpc_b64 s[30:31] 94 %neg.z = fneg double %z 95 %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict") 96 ret double %val 97} 98 99define double @v_constained_fma_f64_fpexcept_strict_fneg_fneg(double %x, double %y, double %z) #0 { 100; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg: 101; GCN: ; %bb.0: 102; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5] 104; GCN-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fneg_fneg: 107; GFX10: ; %bb.0: 108; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[2:3], v[4:5] 110; GFX10-NEXT: s_setpc_b64 s[30:31] 111 %neg.x = fneg double %x 112 %neg.y = fneg double %y 113 %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 114 ret double %val 115} 116 117define double @v_constained_fma_f64_fpexcept_strict_fabs_fabs(double %x, double %y, double %z) #0 { 118; GCN-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs: 119; GCN: ; %bb.0: 120; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GCN-NEXT: v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5] 122; GCN-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX10-LABEL: v_constained_fma_f64_fpexcept_strict_fabs_fabs: 125; GFX10: ; %bb.0: 126; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX10-NEXT: v_fma_f64 v[0:1], |v[0:1]|, |v[2:3]|, v[4:5] 128; GFX10-NEXT: s_setpc_b64 s[30:31] 129 %neg.x = call double @llvm.fabs.f64(double %x) #0 130 %neg.y = call double @llvm.fabs.f64(double %y) #0 131 %val = call double @llvm.experimental.constrained.fma.f64(double %neg.x, double %neg.y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 132 ret double %val 133} 134 135define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x double> %x, <2 x double> %y, <2 x double> %z) #0 { 136; GCN-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg: 137; GCN: ; %bb.0: 138; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9] 140; GCN-NEXT: v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11] 141; GCN-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX10-LABEL: v_constained_fma_v2f64_fpexcept_strict_fneg_fneg: 144; GFX10: ; %bb.0: 145; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], -v[4:5], v[8:9] 147; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], -v[6:7], v[10:11] 148; GFX10-NEXT: s_setpc_b64 s[30:31] 149 %neg.x = fneg <2 x double> %x 150 %neg.y = fneg <2 x double> %y 151 %val = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %neg.x, <2 x double> %neg.y, <2 x double> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 152 ret <2 x double> %val 153} 154 155declare double @llvm.fabs.f64(double) 156declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) 157declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) 158declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata) 159declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata) 160 161attributes #0 = { strictfp } 162