1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -check-prefix=GFX11-CONTRACT %s 7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX11-DENORM %s 8 9; fadd (fma a, b, (fmul c, d)), e --> fma a, b, (fma c, d, e) 10; fadd e, (fma a, b, (fmul c, d)) --> fma a, b, (fma c, d, e) 11 12define float @test_f32_add_mul(float %a, float %b, float %c, float %d, float %e) { 13; GFX9-CONTRACT-LABEL: test_f32_add_mul: 14; GFX9-CONTRACT: ; %bb.0: ; %.entry 15; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 17; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 18; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX9-DENORM-LABEL: test_f32_add_mul: 21; GFX9-DENORM: ; %bb.0: ; %.entry 22; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v3, v4 24; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 25; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 26; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX10-CONTRACT-LABEL: test_f32_add_mul: 29; GFX10-CONTRACT: ; %bb.0: ; %.entry 30; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 32; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 33; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 34; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX10-DENORM-LABEL: test_f32_add_mul: 37; GFX10-DENORM: ; %bb.0: ; %.entry 38; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 40; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 41; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 42; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 43; 44; GFX11-CONTRACT-LABEL: test_f32_add_mul: 45; GFX11-CONTRACT: ; %bb.0: ; %.entry 46; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 48; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 49; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 50; GFX11-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 51; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11-DENORM-LABEL: test_f32_add_mul: 54; GFX11-DENORM: ; %bb.0: ; %.entry 55; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 57; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 58; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 59; GFX11-DENORM-NEXT: v_mov_b32_e32 v0, v2 60; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 61.entry: 62 %x = fmul fast float %c, %d 63 %y = call fast float @llvm.fmuladd.f32(float %a, float %b, float %x) 64 %z = fadd fast float %y, %e 65 ret float %z 66} 67 68define float @test_f32_add_mul_rhs(float %a, float %b, float %c, float %d, float %e) { 69; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs: 70; GFX9-CONTRACT: ; %bb.0: ; %.entry 71; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 73; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 74; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-DENORM-LABEL: test_f32_add_mul_rhs: 77; GFX9-DENORM: ; %bb.0: ; %.entry 78; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v3, v4 80; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 81; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 82; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs: 85; GFX10-CONTRACT: ; %bb.0: ; %.entry 86; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 88; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 89; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 90; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX10-DENORM-LABEL: test_f32_add_mul_rhs: 93; GFX10-DENORM: ; %bb.0: ; %.entry 94; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 96; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 97; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 98; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX11-CONTRACT-LABEL: test_f32_add_mul_rhs: 101; GFX11-CONTRACT: ; %bb.0: ; %.entry 102; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4 104; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 105; GFX11-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 106; GFX11-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 107; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX11-DENORM-LABEL: test_f32_add_mul_rhs: 110; GFX11-DENORM: ; %bb.0: ; %.entry 111; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX11-DENORM-NEXT: v_fma_f32 v2, v2, v3, v4 113; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 114; GFX11-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 115; GFX11-DENORM-NEXT: v_mov_b32_e32 v0, v2 116; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 117.entry: 118 %x = fmul fast float %c, %d 119 %y = call fast float @llvm.fmuladd.f32(float %a, float %b, float %x) 120 %z = fadd fast float %e, %y 121 ret float %z 122} 123 124define half @test_half_add_mul(half %a, half %b, half %c, half %d, half %e) { 125; GFX9-CONTRACT-LABEL: test_half_add_mul: 126; GFX9-CONTRACT: ; %bb.0: ; %.entry 127; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX9-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 129; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 130; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX9-DENORM-LABEL: test_half_add_mul: 133; GFX9-DENORM: ; %bb.0: ; %.entry 134; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX9-DENORM-NEXT: v_mad_legacy_f16 v2, v2, v3, v4 136; GFX9-DENORM-NEXT: v_mac_f16_e32 v2, v0, v1 137; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 138; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 139; 140; GFX10-CONTRACT-LABEL: test_half_add_mul: 141; GFX10-CONTRACT: ; %bb.0: ; %.entry 142; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 144; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 145; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 146; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 147; 148; GFX10-DENORM-LABEL: test_half_add_mul: 149; GFX10-DENORM: ; %bb.0: ; %.entry 150; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 152; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 153; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 154; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v4 155; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 156; 157; GFX11-CONTRACT-LABEL: test_half_add_mul: 158; GFX11-CONTRACT: ; %bb.0: ; %.entry 159; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 161; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 162; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 163; GFX11-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 164; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX11-DENORM-LABEL: test_half_add_mul: 167; GFX11-DENORM: ; %bb.0: ; %.entry 168; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 170; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 171; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 172; GFX11-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 173; GFX11-DENORM-NEXT: v_add_f16_e32 v0, v0, v4 174; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 175.entry: 176 %x = fmul fast half %c, %d 177 %y = call fast half @llvm.fmuladd.f16(half %a, half %b, half %x) 178 %z = fadd fast half %y, %e 179 ret half %z 180} 181 182define half @test_half_add_mul_rhs(half %a, half %b, half %c, half %d, half %e) { 183; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs: 184; GFX9-CONTRACT: ; %bb.0: ; %.entry 185; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX9-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 187; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 188; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 189; 190; GFX9-DENORM-LABEL: test_half_add_mul_rhs: 191; GFX9-DENORM: ; %bb.0: ; %.entry 192; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX9-DENORM-NEXT: v_mad_legacy_f16 v2, v2, v3, v4 194; GFX9-DENORM-NEXT: v_mac_f16_e32 v2, v0, v1 195; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 196; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs: 199; GFX10-CONTRACT: ; %bb.0: ; %.entry 200; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX10-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 202; GFX10-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 203; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 204; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 205; 206; GFX10-DENORM-LABEL: test_half_add_mul_rhs: 207; GFX10-DENORM: ; %bb.0: ; %.entry 208; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX10-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 210; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 211; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 212; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v4, v0 213; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 214; 215; GFX11-CONTRACT-LABEL: test_half_add_mul_rhs: 216; GFX11-CONTRACT: ; %bb.0: ; %.entry 217; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX11-CONTRACT-NEXT: v_fma_f16 v2, v2, v3, v4 219; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 220; GFX11-CONTRACT-NEXT: v_fmac_f16_e32 v2, v0, v1 221; GFX11-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 222; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 223; 224; GFX11-DENORM-LABEL: test_half_add_mul_rhs: 225; GFX11-DENORM: ; %bb.0: ; %.entry 226; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 227; GFX11-DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 228; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 229; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 230; GFX11-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 231; GFX11-DENORM-NEXT: v_add_f16_e32 v0, v4, v0 232; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 233.entry: 234 %x = fmul fast half %c, %d 235 %y = call fast half @llvm.fmuladd.f16(half %a, half %b, half %x) 236 %z = fadd fast half %e, %y 237 ret half %z 238} 239 240define double @test_double_add_mul(double %a, double %b, double %c, double %d, double %e) { 241; GFX9-CONTRACT-LABEL: test_double_add_mul: 242; GFX9-CONTRACT: ; %bb.0: ; %.entry 243; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 245; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 246; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX9-DENORM-LABEL: test_double_add_mul: 249; GFX9-DENORM: ; %bb.0: ; %.entry 250; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 252; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 253; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 254; 255; GFX10-CONTRACT-LABEL: test_double_add_mul: 256; GFX10-CONTRACT: ; %bb.0: ; %.entry 257; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 258; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 259; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 260; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 261; 262; GFX10-DENORM-LABEL: test_double_add_mul: 263; GFX10-DENORM: ; %bb.0: ; %.entry 264; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 266; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 267; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX11-CONTRACT-LABEL: test_double_add_mul: 270; GFX11-CONTRACT: ; %bb.0: ; %.entry 271; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 273; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) 274; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 275; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX11-DENORM-LABEL: test_double_add_mul: 278; GFX11-DENORM: ; %bb.0: ; %.entry 279; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 281; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 282; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 283; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 284.entry: 285 %x = fmul fast double %c, %d 286 %y = call fast double @llvm.fmuladd.f64(double %a, double %b, double %x) 287 %z = fadd fast double %y, %e 288 ret double %z 289} 290 291define double @test_double_add_mul_rhs(double %a, double %b, double %c, double %d, double %e) { 292; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs: 293; GFX9-CONTRACT: ; %bb.0: ; %.entry 294; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 296; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 297; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX9-DENORM-LABEL: test_double_add_mul_rhs: 300; GFX9-DENORM: ; %bb.0: ; %.entry 301; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 303; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 304; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 305; 306; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs: 307; GFX10-CONTRACT: ; %bb.0: ; %.entry 308; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 310; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 311; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX10-DENORM-LABEL: test_double_add_mul_rhs: 314; GFX10-DENORM: ; %bb.0: ; %.entry 315; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 317; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 318; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 319; 320; GFX11-CONTRACT-LABEL: test_double_add_mul_rhs: 321; GFX11-CONTRACT: ; %bb.0: ; %.entry 322; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 324; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_1) 325; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 326; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 327; 328; GFX11-DENORM-LABEL: test_double_add_mul_rhs: 329; GFX11-DENORM: ; %bb.0: ; %.entry 330; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 331; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[8:9] 332; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 333; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 334; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 335.entry: 336 %x = fmul fast double %c, %d 337 %y = call fast double @llvm.fmuladd.f64(double %a, double %b, double %x) 338 %z = fadd fast double %e, %y 339 ret double %z 340} 341 342define <4 x float> @test_v4f32_add_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e) { 343; GFX9-CONTRACT-LABEL: test_v4f32_add_mul: 344; GFX9-CONTRACT: ; %bb.0: ; %.entry 345; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX9-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 347; GFX9-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 348; GFX9-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 349; GFX9-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 350; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 351; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 352; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 353; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 354; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX9-DENORM-LABEL: test_v4f32_add_mul: 357; GFX9-DENORM: ; %bb.0: ; %.entry 358; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX9-DENORM-NEXT: v_mad_f32 v8, v8, v12, v16 360; GFX9-DENORM-NEXT: v_mac_f32_e32 v8, v0, v4 361; GFX9-DENORM-NEXT: v_mad_f32 v4, v9, v13, v17 362; GFX9-DENORM-NEXT: v_mac_f32_e32 v4, v1, v5 363; GFX9-DENORM-NEXT: v_mad_f32 v5, v10, v14, v18 364; GFX9-DENORM-NEXT: v_mac_f32_e32 v5, v2, v6 365; GFX9-DENORM-NEXT: v_mad_f32 v6, v11, v15, v19 366; GFX9-DENORM-NEXT: v_mac_f32_e32 v6, v3, v7 367; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v8 368; GFX9-DENORM-NEXT: v_mov_b32_e32 v1, v4 369; GFX9-DENORM-NEXT: v_mov_b32_e32 v2, v5 370; GFX9-DENORM-NEXT: v_mov_b32_e32 v3, v6 371; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 372; 373; GFX10-CONTRACT-LABEL: test_v4f32_add_mul: 374; GFX10-CONTRACT: ; %bb.0: ; %.entry 375; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 376; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 377; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 378; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 379; GFX10-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 380; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v8, v0, v4 381; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v9, v1, v5 382; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v10, v2, v6 383; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v11, v3, v7 384; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v8 385; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v1, v9 386; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v2, v10 387; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v3, v11 388; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX10-DENORM-LABEL: test_v4f32_add_mul: 391; GFX10-DENORM: ; %bb.0: ; %.entry 392; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 394; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 395; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 396; GFX10-DENORM-NEXT: v_fma_f32 v11, v11, v15, v19 397; GFX10-DENORM-NEXT: v_fmac_f32_e32 v8, v0, v4 398; GFX10-DENORM-NEXT: v_fmac_f32_e32 v9, v1, v5 399; GFX10-DENORM-NEXT: v_fmac_f32_e32 v10, v2, v6 400; GFX10-DENORM-NEXT: v_fmac_f32_e32 v11, v3, v7 401; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v8 402; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v9 403; GFX10-DENORM-NEXT: v_mov_b32_e32 v2, v10 404; GFX10-DENORM-NEXT: v_mov_b32_e32 v3, v11 405; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 406; 407; GFX11-CONTRACT-LABEL: test_v4f32_add_mul: 408; GFX11-CONTRACT: ; %bb.0: ; %.entry 409; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 411; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 412; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 413; GFX11-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 414; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 415; GFX11-CONTRACT-NEXT: v_dual_fmac_f32 v8, v0, v4 :: v_dual_fmac_f32 v9, v1, v5 416; GFX11-CONTRACT-NEXT: v_dual_fmac_f32 v10, v2, v6 :: v_dual_fmac_f32 v11, v3, v7 417; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 418; GFX11-CONTRACT-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 419; GFX11-CONTRACT-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 420; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX11-DENORM-LABEL: test_v4f32_add_mul: 423; GFX11-DENORM: ; %bb.0: ; %.entry 424; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 426; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 427; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 428; GFX11-DENORM-NEXT: v_fma_f32 v11, v11, v15, v19 429; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 430; GFX11-DENORM-NEXT: v_dual_fmac_f32 v8, v0, v4 :: v_dual_fmac_f32 v9, v1, v5 431; GFX11-DENORM-NEXT: v_dual_fmac_f32 v10, v2, v6 :: v_dual_fmac_f32 v11, v3, v7 432; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 433; GFX11-DENORM-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 434; GFX11-DENORM-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 435; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 436.entry: 437 %x = fmul fast <4 x float> %c, %d 438 %y = call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %x) 439 %z = fadd fast <4 x float> %y, %e 440 ret <4 x float> %z 441} 442 443define <4 x float> @test_v4f32_add_mul_rhs(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e) { 444; GFX9-CONTRACT-LABEL: test_v4f32_add_mul_rhs: 445; GFX9-CONTRACT: ; %bb.0: ; %.entry 446; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; GFX9-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 448; GFX9-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 449; GFX9-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 450; GFX9-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 451; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 452; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 453; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 454; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 455; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 456; 457; GFX9-DENORM-LABEL: test_v4f32_add_mul_rhs: 458; GFX9-DENORM: ; %bb.0: ; %.entry 459; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 460; GFX9-DENORM-NEXT: v_mad_f32 v8, v8, v12, v16 461; GFX9-DENORM-NEXT: v_mac_f32_e32 v8, v0, v4 462; GFX9-DENORM-NEXT: v_mad_f32 v4, v9, v13, v17 463; GFX9-DENORM-NEXT: v_mac_f32_e32 v4, v1, v5 464; GFX9-DENORM-NEXT: v_mad_f32 v5, v10, v14, v18 465; GFX9-DENORM-NEXT: v_mac_f32_e32 v5, v2, v6 466; GFX9-DENORM-NEXT: v_mad_f32 v6, v11, v15, v19 467; GFX9-DENORM-NEXT: v_mac_f32_e32 v6, v3, v7 468; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v8 469; GFX9-DENORM-NEXT: v_mov_b32_e32 v1, v4 470; GFX9-DENORM-NEXT: v_mov_b32_e32 v2, v5 471; GFX9-DENORM-NEXT: v_mov_b32_e32 v3, v6 472; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX10-CONTRACT-LABEL: test_v4f32_add_mul_rhs: 475; GFX10-CONTRACT: ; %bb.0: ; %.entry 476; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX10-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 478; GFX10-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 479; GFX10-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 480; GFX10-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 481; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v8, v0, v4 482; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v9, v1, v5 483; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v10, v2, v6 484; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v11, v3, v7 485; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v8 486; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v1, v9 487; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v2, v10 488; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v3, v11 489; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX10-DENORM-LABEL: test_v4f32_add_mul_rhs: 492; GFX10-DENORM: ; %bb.0: ; %.entry 493; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX10-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 495; GFX10-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 496; GFX10-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 497; GFX10-DENORM-NEXT: v_fma_f32 v11, v11, v15, v19 498; GFX10-DENORM-NEXT: v_fmac_f32_e32 v8, v0, v4 499; GFX10-DENORM-NEXT: v_fmac_f32_e32 v9, v1, v5 500; GFX10-DENORM-NEXT: v_fmac_f32_e32 v10, v2, v6 501; GFX10-DENORM-NEXT: v_fmac_f32_e32 v11, v3, v7 502; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v8 503; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v9 504; GFX10-DENORM-NEXT: v_mov_b32_e32 v2, v10 505; GFX10-DENORM-NEXT: v_mov_b32_e32 v3, v11 506; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 507; 508; GFX11-CONTRACT-LABEL: test_v4f32_add_mul_rhs: 509; GFX11-CONTRACT: ; %bb.0: ; %.entry 510; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX11-CONTRACT-NEXT: v_fma_f32 v8, v8, v12, v16 512; GFX11-CONTRACT-NEXT: v_fma_f32 v9, v9, v13, v17 513; GFX11-CONTRACT-NEXT: v_fma_f32 v10, v10, v14, v18 514; GFX11-CONTRACT-NEXT: v_fma_f32 v11, v11, v15, v19 515; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 516; GFX11-CONTRACT-NEXT: v_dual_fmac_f32 v8, v0, v4 :: v_dual_fmac_f32 v9, v1, v5 517; GFX11-CONTRACT-NEXT: v_dual_fmac_f32 v10, v2, v6 :: v_dual_fmac_f32 v11, v3, v7 518; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 519; GFX11-CONTRACT-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 520; GFX11-CONTRACT-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 521; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 522; 523; GFX11-DENORM-LABEL: test_v4f32_add_mul_rhs: 524; GFX11-DENORM: ; %bb.0: ; %.entry 525; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 526; GFX11-DENORM-NEXT: v_fma_f32 v8, v8, v12, v16 527; GFX11-DENORM-NEXT: v_fma_f32 v9, v9, v13, v17 528; GFX11-DENORM-NEXT: v_fma_f32 v10, v10, v14, v18 529; GFX11-DENORM-NEXT: v_fma_f32 v11, v11, v15, v19 530; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 531; GFX11-DENORM-NEXT: v_dual_fmac_f32 v8, v0, v4 :: v_dual_fmac_f32 v9, v1, v5 532; GFX11-DENORM-NEXT: v_dual_fmac_f32 v10, v2, v6 :: v_dual_fmac_f32 v11, v3, v7 533; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 534; GFX11-DENORM-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 535; GFX11-DENORM-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 536; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 537.entry: 538 %x = fmul fast <4 x float> %c, %d 539 %y = call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %x) 540 %z = fadd fast <4 x float> %e, %y 541 ret <4 x float> %z 542} 543 544define <4 x half> @test_f16_add_mul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) { 545; GFX9-CONTRACT-LABEL: test_f16_add_mul: 546; GFX9-CONTRACT: ; %bb.0: ; %.entry 547; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 548; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 549; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 550; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 551; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 552; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 553; 554; GFX9-DENORM-LABEL: test_f16_add_mul: 555; GFX9-DENORM: ; %bb.0: ; %.entry 556; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 558; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 559; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 560; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 561; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 562; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 563; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v8 564; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v9 565; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 566; 567; GFX10-CONTRACT-LABEL: test_f16_add_mul: 568; GFX10-CONTRACT: ; %bb.0: ; %.entry 569; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 571; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 572; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 573; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 574; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 575; 576; GFX10-DENORM-LABEL: test_f16_add_mul: 577; GFX10-DENORM: ; %bb.0: ; %.entry 578; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 579; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 580; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 581; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 582; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 583; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 584; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 585; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v8 586; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v9 587; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX11-CONTRACT-LABEL: test_f16_add_mul: 590; GFX11-CONTRACT: ; %bb.0: ; %.entry 591; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 593; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 594; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 595; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 596; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 597; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 598; 599; GFX11-DENORM-LABEL: test_f16_add_mul: 600; GFX11-DENORM: ; %bb.0: ; %.entry 601; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 602; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 603; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 604; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 605; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 606; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 607; GFX11-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 608; GFX11-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 609; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 610; GFX11-DENORM-NEXT: v_pk_add_f16 v0, v0, v8 611; GFX11-DENORM-NEXT: v_pk_add_f16 v1, v1, v9 612; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 613.entry: 614 %x = fmul fast <4 x half> %c, %d 615 %y = call fast <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %x) 616 %z = fadd fast <4 x half> %y, %e 617 ret <4 x half> %z 618} 619 620define <4 x half> @test_f16_add_mul_rhs(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) { 621; GFX9-CONTRACT-LABEL: test_f16_add_mul_rhs: 622; GFX9-CONTRACT: ; %bb.0: ; %.entry 623; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 625; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 626; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 627; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 628; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX9-DENORM-LABEL: test_f16_add_mul_rhs: 631; GFX9-DENORM: ; %bb.0: ; %.entry 632; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 634; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 635; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 636; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 637; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 638; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 639; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v8, v0 640; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v9, v1 641; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX10-CONTRACT-LABEL: test_f16_add_mul_rhs: 644; GFX10-CONTRACT: ; %bb.0: ; %.entry 645; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 647; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 648; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 649; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 650; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX10-DENORM-LABEL: test_f16_add_mul_rhs: 653; GFX10-DENORM: ; %bb.0: ; %.entry 654; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX10-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 656; GFX10-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 657; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 658; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 659; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 660; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 661; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v8, v0 662; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v9, v1 663; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 664; 665; GFX11-CONTRACT-LABEL: test_f16_add_mul_rhs: 666; GFX11-CONTRACT: ; %bb.0: ; %.entry 667; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 668; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v4, v4, v6, v8 669; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v5, v5, v7, v9 670; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 671; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 672; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 673; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX11-DENORM-LABEL: test_f16_add_mul_rhs: 676; GFX11-DENORM: ; %bb.0: ; %.entry 677; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX11-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 679; GFX11-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 680; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 681; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 682; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 683; GFX11-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 684; GFX11-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 685; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 686; GFX11-DENORM-NEXT: v_pk_add_f16 v0, v8, v0 687; GFX11-DENORM-NEXT: v_pk_add_f16 v1, v9, v1 688; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 689.entry: 690 %x = fmul fast <4 x half> %c, %d 691 %y = call fast <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %x) 692 %z = fadd fast <4 x half> %e, %y 693 ret <4 x half> %z 694} 695 696define <4 x double> @test_f64_add_mul(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d, <4 x double> %e) { 697; GFX9-CONTRACT-LABEL: test_f64_add_mul: 698; GFX9-CONTRACT: ; %bb.0: ; %.entry 699; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 700; GFX9-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 701; GFX9-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 702; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 703; GFX9-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[31:32] 704; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:12 705; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:16 706; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 707; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 708; GFX9-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[24:25] 709; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:20 710; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:24 711; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 712; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 713; GFX9-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[24:25] 714; GFX9-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 715; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:28 716; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:32 717; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 718; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 719; GFX9-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[24:25] 720; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 721; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 722; 723; GFX9-DENORM-LABEL: test_f64_add_mul: 724; GFX9-DENORM: ; %bb.0: ; %.entry 725; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 726; GFX9-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 727; GFX9-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 728; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 729; GFX9-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[31:32] 730; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:12 731; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:16 732; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 733; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 734; GFX9-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[24:25] 735; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:20 736; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:24 737; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 738; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 739; GFX9-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[24:25] 740; GFX9-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 741; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:28 742; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:32 743; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 744; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 745; GFX9-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[24:25] 746; GFX9-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 747; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 748; 749; GFX10-CONTRACT-LABEL: test_f64_add_mul: 750; GFX10-CONTRACT: ; %bb.0: ; %.entry 751; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX10-CONTRACT-NEXT: s_clause 0x8 753; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 754; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 755; GFX10-CONTRACT-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 756; GFX10-CONTRACT-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 757; GFX10-CONTRACT-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 758; GFX10-CONTRACT-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 759; GFX10-CONTRACT-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 760; GFX10-CONTRACT-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 761; GFX10-CONTRACT-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 762; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(6) 763; GFX10-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 764; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(4) 765; GFX10-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 766; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(2) 767; GFX10-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 768; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) 769; GFX10-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 770; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 771; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 772; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 773; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 774; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 775; 776; GFX10-DENORM-LABEL: test_f64_add_mul: 777; GFX10-DENORM: ; %bb.0: ; %.entry 778; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX10-DENORM-NEXT: s_clause 0x8 780; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 781; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 782; GFX10-DENORM-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 783; GFX10-DENORM-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 784; GFX10-DENORM-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 785; GFX10-DENORM-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 786; GFX10-DENORM-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 787; GFX10-DENORM-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 788; GFX10-DENORM-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 789; GFX10-DENORM-NEXT: s_waitcnt vmcnt(6) 790; GFX10-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 791; GFX10-DENORM-NEXT: s_waitcnt vmcnt(4) 792; GFX10-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 793; GFX10-DENORM-NEXT: s_waitcnt vmcnt(2) 794; GFX10-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 795; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) 796; GFX10-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 797; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 798; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 799; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 800; GFX10-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 801; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 802; 803; GFX11-CONTRACT-LABEL: test_f64_add_mul: 804; GFX11-CONTRACT: ; %bb.0: ; %.entry 805; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 806; GFX11-CONTRACT-NEXT: s_clause 0x8 807; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32 808; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4 809; GFX11-CONTRACT-NEXT: scratch_load_b32 v33, off, s32 offset:8 810; GFX11-CONTRACT-NEXT: scratch_load_b32 v34, off, s32 offset:12 811; GFX11-CONTRACT-NEXT: scratch_load_b32 v35, off, s32 offset:16 812; GFX11-CONTRACT-NEXT: scratch_load_b32 v36, off, s32 offset:20 813; GFX11-CONTRACT-NEXT: scratch_load_b32 v37, off, s32 offset:24 814; GFX11-CONTRACT-NEXT: scratch_load_b32 v38, off, s32 offset:28 815; GFX11-CONTRACT-NEXT: scratch_load_b32 v39, off, s32 offset:32 816; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(6) 817; GFX11-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 818; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(4) 819; GFX11-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 820; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(2) 821; GFX11-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 822; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) 823; GFX11-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 824; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 825; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 826; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 827; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 828; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 829; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 830; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 831; 832; GFX11-DENORM-LABEL: test_f64_add_mul: 833; GFX11-DENORM: ; %bb.0: ; %.entry 834; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 835; GFX11-DENORM-NEXT: s_clause 0x8 836; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32 837; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4 838; GFX11-DENORM-NEXT: scratch_load_b32 v33, off, s32 offset:8 839; GFX11-DENORM-NEXT: scratch_load_b32 v34, off, s32 offset:12 840; GFX11-DENORM-NEXT: scratch_load_b32 v35, off, s32 offset:16 841; GFX11-DENORM-NEXT: scratch_load_b32 v36, off, s32 offset:20 842; GFX11-DENORM-NEXT: scratch_load_b32 v37, off, s32 offset:24 843; GFX11-DENORM-NEXT: scratch_load_b32 v38, off, s32 offset:28 844; GFX11-DENORM-NEXT: scratch_load_b32 v39, off, s32 offset:32 845; GFX11-DENORM-NEXT: s_waitcnt vmcnt(6) 846; GFX11-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 847; GFX11-DENORM-NEXT: s_waitcnt vmcnt(4) 848; GFX11-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 849; GFX11-DENORM-NEXT: s_waitcnt vmcnt(2) 850; GFX11-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 851; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) 852; GFX11-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 853; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 854; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 855; GFX11-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 856; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 857; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 858; GFX11-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 859; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 860.entry: 861 %x = fmul fast <4 x double> %c, %d 862 %y = call fast <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %x) 863 %z = fadd fast <4 x double> %y, %e 864 ret <4 x double> %z 865} 866 867define <4 x double> @test_f64_add_mul_rhs(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d, <4 x double> %e) { 868; GFX9-CONTRACT-LABEL: test_f64_add_mul_rhs: 869; GFX9-CONTRACT: ; %bb.0: ; %.entry 870; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 871; GFX9-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 872; GFX9-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 873; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 874; GFX9-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[31:32] 875; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:12 876; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:16 877; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 878; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 879; GFX9-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[24:25] 880; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:20 881; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:24 882; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 883; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 884; GFX9-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[24:25] 885; GFX9-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 886; GFX9-CONTRACT-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:28 887; GFX9-CONTRACT-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:32 888; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 889; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) 890; GFX9-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[24:25] 891; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 892; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 893; 894; GFX9-DENORM-LABEL: test_f64_add_mul_rhs: 895; GFX9-DENORM: ; %bb.0: ; %.entry 896; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX9-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 898; GFX9-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 899; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 900; GFX9-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[31:32] 901; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:12 902; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:16 903; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 904; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 905; GFX9-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[24:25] 906; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:20 907; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:24 908; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 909; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 910; GFX9-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[24:25] 911; GFX9-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 912; GFX9-DENORM-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:28 913; GFX9-DENORM-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:32 914; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 915; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) 916; GFX9-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[24:25] 917; GFX9-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 918; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 919; 920; GFX10-CONTRACT-LABEL: test_f64_add_mul_rhs: 921; GFX10-CONTRACT: ; %bb.0: ; %.entry 922; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 923; GFX10-CONTRACT-NEXT: s_clause 0x8 924; GFX10-CONTRACT-NEXT: buffer_load_dword v31, off, s[0:3], s32 925; GFX10-CONTRACT-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 926; GFX10-CONTRACT-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 927; GFX10-CONTRACT-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 928; GFX10-CONTRACT-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 929; GFX10-CONTRACT-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 930; GFX10-CONTRACT-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 931; GFX10-CONTRACT-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 932; GFX10-CONTRACT-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 933; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(6) 934; GFX10-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 935; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(4) 936; GFX10-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 937; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(2) 938; GFX10-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 939; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) 940; GFX10-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 941; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 942; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 943; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 944; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 945; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 946; 947; GFX10-DENORM-LABEL: test_f64_add_mul_rhs: 948; GFX10-DENORM: ; %bb.0: ; %.entry 949; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX10-DENORM-NEXT: s_clause 0x8 951; GFX10-DENORM-NEXT: buffer_load_dword v31, off, s[0:3], s32 952; GFX10-DENORM-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 953; GFX10-DENORM-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 954; GFX10-DENORM-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 955; GFX10-DENORM-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 956; GFX10-DENORM-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 957; GFX10-DENORM-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 958; GFX10-DENORM-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 959; GFX10-DENORM-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 960; GFX10-DENORM-NEXT: s_waitcnt vmcnt(6) 961; GFX10-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 962; GFX10-DENORM-NEXT: s_waitcnt vmcnt(4) 963; GFX10-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 964; GFX10-DENORM-NEXT: s_waitcnt vmcnt(2) 965; GFX10-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 966; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) 967; GFX10-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 968; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 969; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 970; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 971; GFX10-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 972; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX11-CONTRACT-LABEL: test_f64_add_mul_rhs: 975; GFX11-CONTRACT: ; %bb.0: ; %.entry 976; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX11-CONTRACT-NEXT: s_clause 0x8 978; GFX11-CONTRACT-NEXT: scratch_load_b32 v31, off, s32 979; GFX11-CONTRACT-NEXT: scratch_load_b32 v32, off, s32 offset:4 980; GFX11-CONTRACT-NEXT: scratch_load_b32 v33, off, s32 offset:8 981; GFX11-CONTRACT-NEXT: scratch_load_b32 v34, off, s32 offset:12 982; GFX11-CONTRACT-NEXT: scratch_load_b32 v35, off, s32 offset:16 983; GFX11-CONTRACT-NEXT: scratch_load_b32 v36, off, s32 offset:20 984; GFX11-CONTRACT-NEXT: scratch_load_b32 v37, off, s32 offset:24 985; GFX11-CONTRACT-NEXT: scratch_load_b32 v38, off, s32 offset:28 986; GFX11-CONTRACT-NEXT: scratch_load_b32 v39, off, s32 offset:32 987; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(6) 988; GFX11-CONTRACT-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 989; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(4) 990; GFX11-CONTRACT-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 991; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(2) 992; GFX11-CONTRACT-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 993; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) 994; GFX11-CONTRACT-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 995; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 996; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 997; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 998; GFX11-CONTRACT-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 999; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 1000; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 1001; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 1002; 1003; GFX11-DENORM-LABEL: test_f64_add_mul_rhs: 1004; GFX11-DENORM: ; %bb.0: ; %.entry 1005; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1006; GFX11-DENORM-NEXT: s_clause 0x8 1007; GFX11-DENORM-NEXT: scratch_load_b32 v31, off, s32 1008; GFX11-DENORM-NEXT: scratch_load_b32 v32, off, s32 offset:4 1009; GFX11-DENORM-NEXT: scratch_load_b32 v33, off, s32 offset:8 1010; GFX11-DENORM-NEXT: scratch_load_b32 v34, off, s32 offset:12 1011; GFX11-DENORM-NEXT: scratch_load_b32 v35, off, s32 offset:16 1012; GFX11-DENORM-NEXT: scratch_load_b32 v36, off, s32 offset:20 1013; GFX11-DENORM-NEXT: scratch_load_b32 v37, off, s32 offset:24 1014; GFX11-DENORM-NEXT: scratch_load_b32 v38, off, s32 offset:28 1015; GFX11-DENORM-NEXT: scratch_load_b32 v39, off, s32 offset:32 1016; GFX11-DENORM-NEXT: s_waitcnt vmcnt(6) 1017; GFX11-DENORM-NEXT: v_fma_f64 v[16:17], v[16:17], v[24:25], v[32:33] 1018; GFX11-DENORM-NEXT: s_waitcnt vmcnt(4) 1019; GFX11-DENORM-NEXT: v_fma_f64 v[18:19], v[18:19], v[26:27], v[34:35] 1020; GFX11-DENORM-NEXT: s_waitcnt vmcnt(2) 1021; GFX11-DENORM-NEXT: v_fma_f64 v[20:21], v[20:21], v[28:29], v[36:37] 1022; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) 1023; GFX11-DENORM-NEXT: v_fma_f64 v[22:23], v[22:23], v[30:31], v[38:39] 1024; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1025; GFX11-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] 1026; GFX11-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] 1027; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1028; GFX11-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] 1029; GFX11-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] 1030; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 1031.entry: 1032 %x = fmul fast <4 x double> %c, %d 1033 %y = call fast <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %x) 1034 %z = fadd fast <4 x double> %e, %y 1035 ret <4 x double> %z 1036} 1037 1038declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) #0 1039declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 1040declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #0 1041declare double @llvm.fmuladd.f64(double, double, double) #0 1042declare float @llvm.fmuladd.f32(float, float, float) #0 1043declare half @llvm.fmuladd.f16(half, half, half) #0 1044attributes #0 = { nounwind readnone } 1045