1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s 4 5; fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) 6define amdgpu_vs float @test_f16_to_f32_sub_ext_mul(half %x, half %y, float %z) { 7; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: 8; GFX9-DENORM: ; %bb.0: ; %entry 9; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 10; GFX9-DENORM-NEXT: ; return to shader part epilog 11; 12; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: 13; GFX10-DENORM: ; %bb.0: ; %entry 14; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 15; GFX10-DENORM-NEXT: ; return to shader part epilog 16entry: 17 %a = fmul fast half %x, %y 18 %b = fpext half %a to float 19 %c = fsub fast float %b, %z 20 ret float %c 21} 22 23; fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x) 24define amdgpu_vs float @test_f16_to_f32_sub_ext_mul_rhs(float %x, half %y, half %z) { 25; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: 26; GFX9-DENORM: ; %bb.0: ; %.entry 27; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 28; GFX9-DENORM-NEXT: ; return to shader part epilog 29; 30; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: 31; GFX10-DENORM: ; %bb.0: ; %.entry 32; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 33; GFX10-DENORM-NEXT: ; return to shader part epilog 34.entry: 35 %a = fmul fast half %y, %z 36 %b = fpext half %a to float 37 %c = fsub fast float %x, %b 38 ret float %c 39} 40 41; fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) 42define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul(<4 x half> %x, <4 x half> %y, <4 x float> %z) { 43; GFX9-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul: 44; GFX9-DENORM: ; %bb.0: ; %entry 45; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 46; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 47; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v0 48; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 49; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v1 50; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 51; GFX9-DENORM-NEXT: v_sub_f32_e32 v0, v2, v4 52; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v3, v5 53; GFX9-DENORM-NEXT: v_sub_f32_e32 v2, v8, v6 54; GFX9-DENORM-NEXT: v_sub_f32_e32 v3, v9, v7 55; GFX9-DENORM-NEXT: ; return to shader part epilog 56; 57; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul: 58; GFX10-DENORM: ; %bb.0: ; %entry 59; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0] 60; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] 61; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0] 62; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] 63; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4 64; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 65; GFX10-DENORM-NEXT: ; return to shader part epilog 66entry: 67 %a = fmul fast <4 x half> %x, %y 68 %b = fpext <4 x half> %a to <4 x float> 69 %c = fsub fast <4 x float> %b, %z 70 ret <4 x float> %c 71} 72 73; fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x) 74define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul_rhs(<4 x float> %x, <4 x half> %y, <4 x half> %z) { 75; GFX9-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs: 76; GFX9-DENORM: ; %bb.0: ; %.entry 77; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 78; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 79; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v6, v4 80; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 81; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v7, v5 82; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 83; GFX9-DENORM-NEXT: v_sub_f32_e32 v0, v0, v6 84; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v1, v4 85; GFX9-DENORM-NEXT: v_sub_f32_e32 v2, v2, v7 86; GFX9-DENORM-NEXT: v_sub_f32_e32 v3, v3, v5 87; GFX9-DENORM-NEXT: ; return to shader part epilog 88; 89; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs: 90; GFX10-DENORM: ; %bb.0: ; %.entry 91; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0] 92; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] 93; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0] 94; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 95; GFX10-DENORM-NEXT: ; return to shader part epilog 96.entry: 97 %a = fmul fast <4 x half> %y, %z 98 %b = fpext <4 x half> %a to <4 x float> 99 %c = fsub fast <4 x float> %x, %b 100 ret <4 x float> %c 101} 102