1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4 5define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 { 6; GCN-LABEL: v_constained_fma_f16_fpexcept_strict: 7; GCN: ; %bb.0: 8; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GCN-NEXT: v_fma_f16 v0, v0, v1, v2 10; GCN-NEXT: s_setpc_b64 s[30:31] 11 %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 12 ret half %val 13} 14 15define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { 16; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict: 17; GFX9: ; %bb.0: 18; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 20; GFX9-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict: 23; GFX8: ; %bb.0: 24; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 26; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 27; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 28; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 29; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 30; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 31; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 32; GFX8-NEXT: s_setpc_b64 s[30:31] 33 %val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 34 ret <2 x half> %val 35} 36 37define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 { 38; GFX9-LABEL: v_constained_fma_v3f16_fpexcept_strict: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 42; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 43; GFX9-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX8-LABEL: v_constained_fma_v3f16_fpexcept_strict: 46; GFX8: ; %bb.0: 47; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 49; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2 50; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4 51; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 52; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8 53; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 54; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 55; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 56; GFX8-NEXT: s_setpc_b64 s[30:31] 57 %val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 58 ret <3 x half> %val 59} 60 61define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 { 62; GFX9-LABEL: v_constained_fma_v4f16_fpexcept_strict: 63; GFX9: ; %bb.0: 64; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 66; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 67; GFX9-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX8-LABEL: v_constained_fma_v4f16_fpexcept_strict: 70; GFX8: ; %bb.0: 71; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 73; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 74; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4 75; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 76; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 77; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5 78; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 79; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10 80; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 81; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11 82; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 83; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 84; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3 85; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 86; GFX8-NEXT: s_setpc_b64 s[30:31] 87 %val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 88 ret <4 x half> %val 89} 90 91define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) #0 { 92; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg: 93; GCN: ; %bb.0: 94; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GCN-NEXT: v_fma_f16 v0, v0, v1, -v2 96; GCN-NEXT: s_setpc_b64 s[30:31] 97 %neg.z = fneg half %z 98 %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict") 99 ret half %val 100} 101 102define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) #0 { 103; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg: 104; GCN: ; %bb.0: 105; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GCN-NEXT: v_fma_f16 v0, -v0, -v1, v2 107; GCN-NEXT: s_setpc_b64 s[30:31] 108 %neg.x = fneg half %x 109 %neg.y = fneg half %y 110 %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 111 ret half %val 112} 113 114define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) #0 { 115; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs: 116; GCN: ; %bb.0: 117; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GCN-NEXT: v_fma_f16 v0, |v0|, |v1|, v2 119; GCN-NEXT: s_setpc_b64 s[30:31] 120 %neg.x = call half @llvm.fabs.f16(half %x) #0 121 %neg.y = call half @llvm.fabs.f16(half %y) #0 122 %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 123 ret half %val 124} 125 126define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 { 127; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: 128; GFX9: ; %bb.0: 129; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0] 131; GFX9-NEXT: s_setpc_b64 s[30:31] 132; 133; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg: 134; GFX8: ; %bb.0: 135; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 137; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 138; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 139; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 140; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 141; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 142; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 143; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 144; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 145; GFX8-NEXT: s_setpc_b64 s[30:31] 146 %neg.x = fneg <2 x half> %x 147 %neg.y = fneg <2 x half> %y 148 %val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %neg.x, <2 x half> %neg.y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") 149 ret <2 x half> %val 150} 151 152declare half @llvm.fabs.f16(half) 153declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) 154declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata) 155declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata) 156declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata) 157 158attributes #0 = { strictfp } 159