1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s 3; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s 4 5; -------------------------------------------------------------------------------- 6; rcp tests 7; -------------------------------------------------------------------------------- 8 9define half @v_fneg_rcp_f16(half %a) #0 { 10; GCN-LABEL: v_fneg_rcp_f16: 11; GCN: ; %bb.0: 12; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GCN-NEXT: v_rcp_f16_e64 v0, -v0 14; GCN-NEXT: s_setpc_b64 s[30:31] 15 %rcp = call half @llvm.amdgcn.rcp.f16(half %a) 16 %fneg = fneg half %rcp 17 ret half %fneg 18} 19 20define half @v_fneg_rcp_fneg_f16(half %a) #0 { 21; GCN-LABEL: v_fneg_rcp_fneg_f16: 22; GCN: ; %bb.0: 23; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GCN-NEXT: v_rcp_f16_e32 v0, v0 25; GCN-NEXT: s_setpc_b64 s[30:31] 26 %fneg.a = fneg half %a 27 %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a) 28 %fneg = fneg half %rcp 29 ret half %fneg 30} 31 32define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 { 33; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16: 34; GCN: ; %bb.0: 35; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GCN-NEXT: v_rcp_f16_e32 v2, v0 37; GCN-NEXT: v_xor_b32_e32 v1, 0x8000, v0 38; GCN-NEXT: v_mov_b32_e32 v0, v2 39; GCN-NEXT: s_setpc_b64 s[30:31] 40 %fneg.a = fneg half %a 41 %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a) 42 %fneg = fneg half %rcp 43 %insert.0 = insertvalue { half, half } poison, half %fneg, 0 44 %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1 45 ret { half, half } %insert.1 46} 47 48define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 { 49; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16: 50; GCN: ; %bb.0: 51; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GCN-NEXT: v_rcp_f16_e32 v2, v0 53; GCN-NEXT: v_mul_f16_e64 v1, -v0, v1 54; GCN-NEXT: v_mov_b32_e32 v0, v2 55; GCN-NEXT: s_setpc_b64 s[30:31] 56 %fneg.a = fneg half %a 57 %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a) 58 %fneg = fneg half %rcp 59 %use1 = fmul half %fneg.a, %c 60 %insert.0 = insertvalue { half, half } poison, half %fneg, 0 61 %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1 62 ret { half, half } %insert.1 63} 64 65; -------------------------------------------------------------------------------- 66; sin tests 67; -------------------------------------------------------------------------------- 68 69define half @v_fneg_amdgcn_sin_f16(half %a) #0 { 70; GCN-LABEL: v_fneg_amdgcn_sin_f16: 71; GCN: ; %bb.0: 72; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GCN-NEXT: v_sin_f16_e64 v0, -v0 74; GCN-NEXT: s_setpc_b64 s[30:31] 75 %sin = call half @llvm.amdgcn.sin.f16(half %a) 76 %fneg = fneg half %sin 77 ret half %fneg 78} 79 80; -------------------------------------------------------------------------------- 81; vintrp tests 82; -------------------------------------------------------------------------------- 83 84define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 { 85; GCN-LABEL: v_fneg_interp_p1_f16: 86; GCN: ; %bb.0: 87; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1 89; GCN-NEXT: s_mov_b32 m0, 0 90; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 91; GCN-NEXT: v_interp_p1ll_f16 v0, v1, attr0.x 92; GCN-NEXT: v_interp_p1ll_f16 v1, v1, attr0.y 93; GCN-NEXT: s_setpc_b64 s[30:31] 94 %mul = fmul float %a, %b 95 %fneg = fneg float %mul 96 %intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0) 97 %intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0) 98 %insert.0 = insertvalue { float, float } poison, float %intrp0, 0 99 %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1 100 ret { float, float } %insert.1 101} 102 103define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 { 104; GCN-LABEL: v_fneg_interp_p2_f16: 105; GCN: ; %bb.0: 106; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GCN-NEXT: v_mul_f32_e64 v1, v0, -v1 108; GCN-NEXT: v_mov_b32_e32 v2, 4.0 109; GCN-NEXT: s_mov_b32 m0, 0 110; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3 111; GCN-NEXT: v_interp_p2_f16 v0, v1, attr0.x, v2 112; GCN-NEXT: v_interp_p2_f16 v1, v1, attr0.y, v2 113; GCN-NEXT: s_setpc_b64 s[30:31] 114 %mul = fmul float %a, %b 115 %fneg = fneg float %mul 116 %intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0) 117 %intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0) 118 %insert.0 = insertvalue { half, half } poison, half %intrp0, 0 119 %insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1 120 ret { half, half } %insert.1 121} 122 123; -------------------------------------------------------------------------------- 124; arithmetic.fence tests 125; -------------------------------------------------------------------------------- 126 127; FIXME: Legalization/promote is broken 128define half @v_fneg_arithmetic_fence_f16(half %a) #0 { 129; GCN-LABEL: v_fneg_arithmetic_fence_f16: 130; GCN: ; %bb.0: 131; GCN-NEXT: ;ARITH_FENCE 132; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0 134; GCN-NEXT: s_setpc_b64 s[30:31] 135 %fence = call half @llvm.arithmetic.fence.f16(half %a) 136 %fneg = fneg half %fence 137 ret half %fneg 138} 139 140define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 { 141; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16: 142; GCN: ; %bb.0: 143; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 145; GCN-NEXT: ;ARITH_FENCE 146; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0 147; GCN-NEXT: s_setpc_b64 s[30:31] 148 %mul = fmul half %a, %b 149 %fence = call half @llvm.arithmetic.fence.f16(half %mul) 150 %fneg = fneg half %fence 151 ret half %fneg 152} 153 154declare half @llvm.amdgcn.rcp.f16(half) #1 155declare half @llvm.amdgcn.sin.f16(half) #1 156declare half @llvm.arithmetic.fence.f16(half) #1 157declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0 158declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0 159 160attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 161attributes #1 = { nounwind readnone } 162attributes #2 = { nounwind "unsafe-fp-math"="true" } 163attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" } 164attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 165;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 166; GCN-NSZ: {{.*}} 167; GCN-SAFE: {{.*}} 168; VI: {{.*}} 169; VI-NSZ: {{.*}} 170; VI-SAFE: {{.*}} 171