1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-F32FLUSH %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-F32DENORM %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX9-F32FLUSH %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX9-F32DENORM %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX89 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX89 %s 8 9; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 10define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 11; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32: 12; GFX11: ; %bb.0: ; %entry 13; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 15; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 16; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 17; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 18; GFX11-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX9-F32FLUSH-LABEL: fadd_fpext_fmul_f16_to_f32: 21; GFX9-F32FLUSH: ; %bb.0: ; %entry 22; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 24; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-F32DENORM-LABEL: fadd_fpext_fmul_f16_to_f32: 27; GFX9-F32DENORM: ; %bb.0: ; %entry 28; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 30; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 31; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v2 32; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 33entry: 34 %mul = fmul half %x, %y 35 %mul.ext = fpext half %mul to float 36 %add = fadd float %mul.ext, %z 37 ret float %add 38} 39 40; f16->f64 is not free. 41define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 { 42; GFX11-LABEL: fadd_fpext_fmul_f16_to_f64: 43; GFX11: ; %bb.0: ; %entry 44; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 46; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 47; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 48; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 49; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 50; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 51; GFX11-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX89-LABEL: fadd_fpext_fmul_f16_to_f64: 54; GFX89: ; %bb.0: ; %entry 55; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX89-NEXT: v_mul_f16_e32 v0, v0, v1 57; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 58; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 59; GFX89-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 60; GFX89-NEXT: s_setpc_b64 s[30:31] 61entry: 62 %mul = fmul half %x, %y 63 %mul.ext = fpext half %mul to double 64 %add = fadd double %mul.ext, %z 65 ret double %add 66} 67 68; f32->f64 is not free. 69define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 { 70; GFX11-LABEL: fadd_fpext_fmul_f32_to_f64: 71; GFX11: ; %bb.0: ; %entry 72; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 74; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 75; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 76; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 77; GFX11-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX89-LABEL: fadd_fpext_fmul_f32_to_f64: 80; GFX89: ; %bb.0: ; %entry 81; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX89-NEXT: v_mul_f32_e32 v0, v0, v1 83; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 84; GFX89-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 85; GFX89-NEXT: s_setpc_b64 s[30:31] 86entry: 87 %mul = fmul float %x, %y 88 %mul.ext = fpext float %mul to double 89 %add = fadd double %mul.ext, %z 90 ret double %add 91} 92 93; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 94define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 { 95; GFX11-LABEL: fadd_fpext_fmul_f16_to_f32_commute: 96; GFX11: ; %bb.0: ; %entry 97; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 99; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 100; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 101; GFX11-NEXT: v_add_f32_e32 v0, v2, v0 102; GFX11-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX9-F32FLUSH-LABEL: fadd_fpext_fmul_f16_to_f32_commute: 105; GFX9-F32FLUSH: ; %bb.0: ; %entry 106; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 108; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX9-F32DENORM-LABEL: fadd_fpext_fmul_f16_to_f32_commute: 111; GFX9-F32DENORM: ; %bb.0: ; %entry 112; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 114; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 115; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v2, v0 116; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 117entry: 118 %mul = fmul half %x, %y 119 %mul.ext = fpext half %mul to float 120 %add = fadd float %z, %mul.ext 121 ret float %add 122} 123 124; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 125; -> (fma x, y, (fma (fpext u), (fpext v), z)) 126define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 127; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32: 128; GFX11: ; %bb.0: ; %entry 129; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 131; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 132; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] 133; GFX11-NEXT: v_add_f32_e32 v0, v0, v4 134; GFX11-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX9-F32FLUSH-LABEL: fadd_muladd_fpext_fmul_f16_to_f32: 137; GFX9-F32FLUSH: ; %bb.0: ; %entry 138; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 140; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 141; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 142; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX9-F32DENORM-LABEL: fadd_muladd_fpext_fmul_f16_to_f32: 145; GFX9-F32DENORM: ; %bb.0: ; %entry 146; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 148; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 149; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 150; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v4 151; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 152entry: 153 %mul = fmul half %u, %v 154 %mul.ext = fpext half %mul to float 155 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 156 %add = fadd float %fma, %z 157 ret float %add 158} 159 160; fold (fadd x, (fma y, z, (fpext (fmul u, v))) 161; -> (fma y, z, (fma (fpext u), (fpext v), x)) 162define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 163; GFX11-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute: 164; GFX11: ; %bb.0: ; %entry 165; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 167; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 168; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] 169; GFX11-NEXT: v_add_f32_e32 v0, v4, v0 170; GFX11-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX9-F32FLUSH-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute: 173; GFX9-F32FLUSH: ; %bb.0: ; %entry 174; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 176; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 177; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 178; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX9-F32DENORM-LABEL: fadd_muladd_fpext_fmul_f16_to_f32_commute: 181; GFX9-F32DENORM: ; %bb.0: ; %entry 182; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 184; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 185; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 186; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v4, v0 187; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 188entry: 189 %mul = fmul half %u, %v 190 %mul.ext = fpext half %mul to float 191 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 192 %add = fadd float %z, %fma 193 ret float %add 194} 195 196define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 197; GFX11-LABEL: fadd_fmad_fpext_fmul_f16_to_f32: 198; GFX11: ; %bb.0: ; %entry 199; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 201; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 202; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] 203; GFX11-NEXT: v_add_f32_e32 v0, v0, v4 204; GFX11-NEXT: s_setpc_b64 s[30:31] 205; 206; GFX9-F32FLUSH-LABEL: fadd_fmad_fpext_fmul_f16_to_f32: 207; GFX9-F32FLUSH: ; %bb.0: ; %entry 208; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 210; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 211; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 212; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX9-F32DENORM-LABEL: fadd_fmad_fpext_fmul_f16_to_f32: 215; GFX9-F32DENORM: ; %bb.0: ; %entry 216; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 218; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 219; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 220; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v4 221; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 222entry: 223 %mul = fmul half %u, %v 224 %mul.ext = fpext half %mul to float 225 %mul1 = fmul contract float %x, %y 226 %fmad = fadd contract float %mul1, %mul.ext 227 %add = fadd float %fmad, %z 228 ret float %add 229} 230 231; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 232; -> (fma x, y, (fma (fpext u), (fpext v), z)) 233define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 234; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32: 235; GFX11: ; %bb.0: ; %entry 236; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 238; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 239; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] 240; GFX11-NEXT: v_add_f32_e32 v0, v0, v4 241; GFX11-NEXT: s_setpc_b64 s[30:31] 242; 243; GFX9-F32FLUSH-LABEL: fadd_fma_fpext_fmul_f16_to_f32: 244; GFX9-F32FLUSH: ; %bb.0: ; %entry 245; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 246; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 247; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 248; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 249; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 250; 251; GFX9-F32DENORM-LABEL: fadd_fma_fpext_fmul_f16_to_f32: 252; GFX9-F32DENORM: ; %bb.0: ; %entry 253; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 255; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 256; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 257; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v4 258; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 259entry: 260 %mul = fmul contract half %u, %v 261 %mul.ext = fpext half %mul to float 262 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 263 %add = fadd float %fma, %z 264 ret float %add 265} 266 267define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 268; GFX11-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute: 269; GFX11: ; %bb.0: ; %entry 270; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX11-NEXT: v_mul_f16_e32 v2, v2, v3 272; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 273; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[0,0,1] 274; GFX11-NEXT: v_add_f32_e32 v0, v4, v0 275; GFX11-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX9-F32FLUSH-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute: 278; GFX9-F32FLUSH: ; %bb.0: ; %entry 279; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 281; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 282; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 283; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX9-F32DENORM-LABEL: fadd_fma_fpext_fmul_f16_to_f32_commute: 286; GFX9-F32DENORM: ; %bb.0: ; %entry 287; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 289; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 290; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 291; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v4, v0 292; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 293entry: 294 %mul = fmul contract half %u, %v 295 %mul.ext = fpext half %mul to float 296 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 297 %add = fadd float %z, %fma 298 ret float %add 299} 300 301; fold (fadd x, (fpext (fma y, z, (fmul u, v))) 302; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 303define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 304; GFX11-LABEL: fadd_fpext_fmuladd_f16_to_f32: 305; GFX11: ; %bb.0: ; %entry 306; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 309; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 310; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v3 311; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 312; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 313; GFX11-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX9-F32FLUSH-LABEL: fadd_fpext_fmuladd_f16_to_f32: 316; GFX9-F32FLUSH: ; %bb.0: ; %entry 317; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] 319; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0] 320; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 321; 322; GFX9-F32DENORM-LABEL: fadd_fpext_fmuladd_f16_to_f32: 323; GFX9-F32DENORM: ; %bb.0: ; %entry 324; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 326; GFX9-F32DENORM-NEXT: v_fma_f16 v1, v1, v2, v3 327; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 328; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v1 329; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 330entry: 331 %mul = fmul contract half %u, %v 332 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 333 %ext.fma = fpext half %fma to float 334 %add = fadd float %x, %ext.fma 335 ret float %add 336} 337 338define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 339; GFX11-LABEL: fadd_fpext_fma_f16_to_f32: 340; GFX11: ; %bb.0: ; %entry 341; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 343; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 344; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 345; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v3 346; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 347; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 348; GFX11-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX9-F32FLUSH-LABEL: fadd_fpext_fma_f16_to_f32: 351; GFX9-F32FLUSH: ; %bb.0: ; %entry 352; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] 354; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0] 355; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 356; 357; GFX9-F32DENORM-LABEL: fadd_fpext_fma_f16_to_f32: 358; GFX9-F32DENORM: ; %bb.0: ; %entry 359; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 360; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 361; GFX9-F32DENORM-NEXT: v_fma_f16 v1, v1, v2, v3 362; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 363; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v0, v1 364; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 365entry: 366 %mul = fmul contract half %u, %v 367 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 368 %ext.fma = fpext half %fma to float 369 %add = fadd float %x, %ext.fma 370 ret float %add 371} 372 373define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 374; GFX11-LABEL: fadd_fpext_fma_f16_to_f32_commute: 375; GFX11: ; %bb.0: ; %entry 376; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 378; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 379; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 380; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v3 381; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 382; GFX11-NEXT: v_add_f32_e32 v0, v1, v0 383; GFX11-NEXT: s_setpc_b64 s[30:31] 384; 385; GFX9-F32FLUSH-LABEL: fadd_fpext_fma_f16_to_f32_commute: 386; GFX9-F32FLUSH: ; %bb.0: ; %entry 387; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 388; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] 389; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0] 390; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX9-F32DENORM-LABEL: fadd_fpext_fma_f16_to_f32_commute: 393; GFX9-F32DENORM: ; %bb.0: ; %entry 394; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 396; GFX9-F32DENORM-NEXT: v_fma_f16 v1, v1, v2, v3 397; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 398; GFX9-F32DENORM-NEXT: v_add_f32_e32 v0, v1, v0 399; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 400entry: 401 %mul = fmul contract half %u, %v 402 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 403 %ext.fma = fpext half %fma to float 404 %add = fadd float %ext.fma, %x 405 ret float %add 406} 407 408; fold (fsub (fpext (fmul x, y)), z) 409; -> (fma (fpext x), (fpext y), (fneg z)) 410define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 411; GFX11-LABEL: fsub_fpext_fmul_f16_to_f32: 412; GFX11: ; %bb.0: ; %entry 413; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 415; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 416; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 417; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 418; GFX11-NEXT: s_setpc_b64 s[30:31] 419; 420; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32: 421; GFX9-F32FLUSH: ; %bb.0: ; %entry 422; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 423; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 424; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX9-F32DENORM-LABEL: fsub_fpext_fmul_f16_to_f32: 427; GFX9-F32DENORM: ; %bb.0: ; %entry 428; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 430; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 431; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 432; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 433entry: 434 %mul = fmul half %x, %y 435 %mul.ext = fpext half %mul to float 436 %add = fsub float %mul.ext, %z 437 ret float %add 438} 439 440; fold (fsub x, (fpext (fmul y, z))) 441; -> (fma (fneg (fpext y)), (fpext z), x) 442define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 { 443; GFX11-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute: 444; GFX11-F32FLUSH: ; %bb.0: ; %entry 445; GFX11-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GFX11-F32FLUSH-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 447; GFX11-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX11-F32DENORM-LABEL: fsub_fpext_fmul_f16_to_f32_commute: 450; GFX11-F32DENORM: ; %bb.0: ; %entry 451; GFX11-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX11-F32DENORM-NEXT: v_mul_f16_e32 v1, v1, v2 453; GFX11-F32DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 454; GFX11-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 455; GFX11-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 456; GFX11-F32DENORM-NEXT: s_setpc_b64 s[30:31] 457; 458; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute: 459; GFX9-F32FLUSH: ; %bb.0: ; %entry 460; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 461; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 462; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 463; 464; GFX9-F32DENORM-LABEL: fsub_fpext_fmul_f16_to_f32_commute: 465; GFX9-F32DENORM: ; %bb.0: ; %entry 466; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 467; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v1, v1, v2 468; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 469; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 470; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 471entry: 472 %mul = fmul contract half %y, %z 473 %mul.ext = fpext half %mul to float 474 %add = fsub contract float %x, %mul.ext 475 ret float %add 476} 477 478; fold (fsub (fpext (fneg (fmul, x, y))), z) 479; -> (fneg (fma (fpext x), (fpext y), z)) 480define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 481; GFX11-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: 482; GFX11: ; %bb.0: ; %entry 483; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 485; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 486; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 487; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 488; GFX11-NEXT: s_setpc_b64 s[30:31] 489; 490; GFX9-F32FLUSH-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: 491; GFX9-F32FLUSH: ; %bb.0: ; %entry 492; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] 494; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX9-F32DENORM-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: 497; GFX9-F32DENORM: ; %bb.0: ; %entry 498; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 500; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 501; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 502; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 503entry: 504 %mul = fmul half %x, %y 505 %neg.mul = fsub half -0.0, %mul 506 %neg.mul.ext = fpext half %neg.mul to float 507 %add = fsub float %neg.mul.ext, %z 508 ret float %add 509} 510 511; fold (fsub (fneg (fpext (fmul, x, y))), z) 512; -> (fneg (fma (fpext x)), (fpext y), z) 513define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 514; GFX11-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: 515; GFX11: ; %bb.0: ; %entry 516; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX11-NEXT: v_mul_f16_e64 v0, v0, -v1 518; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 519; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 520; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 521; GFX11-NEXT: s_setpc_b64 s[30:31] 522; 523; GFX9-F32FLUSH-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: 524; GFX9-F32FLUSH: ; %bb.0: ; %entry 525; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 526; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] 527; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 528; 529; GFX9-F32DENORM-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: 530; GFX9-F32DENORM: ; %bb.0: ; %entry 531; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 533; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 534; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 535; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 536entry: 537 %mul = fmul half %x, %y 538 %mul.ext = fpext half %mul to float 539 %neg.mul.ext = fneg float %mul.ext 540 %add = fsub float %neg.mul.ext, %z 541 ret float %add 542} 543 544; fold (fsub (fmad x, y, (fpext (fmul u, v))), z) 545; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z))) 546define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 { 547; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32: 548; GFX11: ; %bb.0: ; %entry 549; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 551; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 552; GFX11-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] 553; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 554; GFX11-NEXT: s_setpc_b64 s[30:31] 555; 556; GFX9-F32FLUSH-LABEL: fsub_muladd_fpext_mul_f16_to_f32: 557; GFX9-F32FLUSH: ; %bb.0: ; %entry 558; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0] 560; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 561; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 562; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 563; 564; GFX9-F32DENORM-LABEL: fsub_muladd_fpext_mul_f16_to_f32: 565; GFX9-F32DENORM: ; %bb.0: ; %entry 566; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 567; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 568; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 569; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3 570; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 571; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 572entry: 573 %mul = fmul reassoc half %u, %v 574 %mul.ext = fpext half %mul to float 575 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 576 %add = fsub reassoc float %fma, %z 577 ret float %add 578} 579 580; fold (fsub (fpext (fmad x, y, (fmul u, v))), z) 581; -> (fmad (fpext x), (fpext y), 582; (fmad (fpext u), (fpext v), (fneg z))) 583define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 { 584; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32: 585; GFX11: ; %bb.0: ; %entry 586; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 587; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 588; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 589; GFX11-NEXT: v_fmac_f16_e32 v3, v0, v1 590; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v3 591; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 592; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 593; GFX11-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX89-LABEL: fsub_fpext_muladd_mul_f16_to_f32: 596; GFX89: ; %bb.0: ; %entry 597; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX89-NEXT: v_mul_f16_e32 v3, v3, v4 599; GFX89-NEXT: v_fma_f16 v0, v0, v1, v3 600; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 601; GFX89-NEXT: v_sub_f32_e32 v0, v0, v2 602; GFX89-NEXT: s_setpc_b64 s[30:31] 603entry: 604 %mul = fmul half %u, %v 605 %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul) 606 %fma.ext = fpext half %fma to float 607 %add = fsub float %fma.ext, %z 608 ret float %add 609} 610 611; fold (fsub x, (fmad y, z, (fpext (fmul u, v)))) 612; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x)) 613define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 { 614; GFX11-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute: 615; GFX11: ; %bb.0: ; %entry 616; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 617; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 618; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 619; GFX11-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] 620; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 621; GFX11-NEXT: s_setpc_b64 s[30:31] 622; 623; GFX9-F32FLUSH-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute: 624; GFX9-F32FLUSH: ; %bb.0: ; %entry 625; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0] 627; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0 628; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX9-F32DENORM-LABEL: fsub_muladd_fpext_mul_f16_to_f32_commute: 631; GFX9-F32DENORM: ; %bb.0: ; %entry 632; GFX9-F32DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 634; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 635; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3 636; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 637; GFX9-F32DENORM-NEXT: s_setpc_b64 s[30:31] 638entry: 639 %mul = fmul reassoc half %u, %v 640 %mul.ext = fpext half %mul to float 641 %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext) 642 %add = fsub reassoc float %x, %fma 643 ret float %add 644} 645 646; fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 647; -> (fma (fneg (fpext y)), (fpext z), 648; (fma (fneg (fpext u)), (fpext v), x)) 649define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 650; GFX11-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: 651; GFX11: ; %bb.0: ; %entry 652; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 653; GFX11-NEXT: v_mul_f16_e32 v3, v3, v4 654; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 655; GFX11-NEXT: v_fmac_f16_e32 v3, v1, v2 656; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v3 657; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 658; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 659; GFX11-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX89-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: 662; GFX89: ; %bb.0: ; %entry 663; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX89-NEXT: v_mul_f16_e32 v3, v3, v4 665; GFX89-NEXT: v_fma_f16 v1, v1, v2, v3 666; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v1 667; GFX89-NEXT: v_sub_f32_e32 v0, v0, v1 668; GFX89-NEXT: s_setpc_b64 s[30:31] 669entry: 670 %mul = fmul half %u, %v 671 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 672 %fma.ext = fpext half %fma to float 673 %add = fsub float %x, %fma.ext 674 ret float %add 675} 676 677declare float @llvm.fmuladd.f32(float, float, float) #0 678declare float @llvm.fma.f32(float, float, float) #0 679declare half @llvm.fmuladd.f16(half, half, half) #0 680declare half @llvm.fma.f16(half, half, half) #0 681 682attributes #0 = { nounwind readnone speculatable } 683