1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s 7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s 8; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -check-prefix=GFX11-CONTRACT %s 9; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX11-DENORM %s 10 11; fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 12; fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 13 14define float @test_f32_sub_mul(float %x, float %y, float %z) { 15; GFX9-LABEL: test_f32_sub_mul: 16; GFX9: ; %bb.0: ; %.entry 17; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 19; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 20; GFX9-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX9-CONTRACT-LABEL: test_f32_sub_mul: 23; GFX9-CONTRACT: ; %bb.0: ; %.entry 24; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2 26; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX9-DENORM-LABEL: test_f32_sub_mul: 29; GFX9-DENORM: ; %bb.0: ; %.entry 30; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 32; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX10-LABEL: test_f32_sub_mul: 35; GFX10: ; %bb.0: ; %.entry 36; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 38; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 39; GFX10-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-CONTRACT-LABEL: test_f32_sub_mul: 42; GFX10-CONTRACT: ; %bb.0: ; %.entry 43; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2 45; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-DENORM-LABEL: test_f32_sub_mul: 48; GFX10-DENORM: ; %bb.0: ; %.entry 49; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 51; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11-CONTRACT-LABEL: test_f32_sub_mul: 54; GFX11-CONTRACT: ; %bb.0: ; %.entry 55; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, -v2 57; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX11-DENORM-LABEL: test_f32_sub_mul: 60; GFX11-DENORM: ; %bb.0: ; %.entry 61; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1 63; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 64; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 65; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 66.entry: 67 %a = fmul float %x, %y 68 %b = fsub float %a, %z 69 ret float %b 70} 71 72define float @test_f32_sub_mul_rhs(float %x, float %y, float %z) { 73; GFX9-LABEL: test_f32_sub_mul_rhs: 74; GFX9: ; %bb.0: ; %.entry 75; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 77; GFX9-NEXT: v_sub_f32_e32 v0, v2, v0 78; GFX9-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX9-CONTRACT-LABEL: test_f32_sub_mul_rhs: 81; GFX9-CONTRACT: ; %bb.0: ; %.entry 82; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2 84; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX9-DENORM-LABEL: test_f32_sub_mul_rhs: 87; GFX9-DENORM: ; %bb.0: ; %.entry 88; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2 90; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX10-LABEL: test_f32_sub_mul_rhs: 93; GFX10: ; %bb.0: ; %.entry 94; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 96; GFX10-NEXT: v_sub_f32_e32 v0, v2, v0 97; GFX10-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs: 100; GFX10-CONTRACT: ; %bb.0: ; %.entry 101; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2 103; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs: 106; GFX10-DENORM: ; %bb.0: ; %.entry 107; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v1, v2 109; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX11-CONTRACT-LABEL: test_f32_sub_mul_rhs: 112; GFX11-CONTRACT: ; %bb.0: ; %.entry 113; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v1, v2 115; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX11-DENORM-LABEL: test_f32_sub_mul_rhs: 118; GFX11-DENORM: ; %bb.0: ; %.entry 119; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX11-DENORM-NEXT: v_mul_f32_e32 v0, v0, v1 121; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 122; GFX11-DENORM-NEXT: v_sub_f32_e32 v0, v2, v0 123; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 124.entry: 125 %a = fmul float %x, %y 126 %b = fsub float %z, %a 127 ret float %b 128} 129 130define half @test_half_sub_mul(half %x, half %y, half %z) { 131; GFX9-LABEL: test_half_sub_mul: 132; GFX9: ; %bb.0: ; %.entry 133; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 135; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2 136; GFX9-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX9-CONTRACT-LABEL: test_half_sub_mul: 139; GFX9-CONTRACT: ; %bb.0: ; %.entry 140; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2 142; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX9-DENORM-LABEL: test_half_sub_mul: 145; GFX9-DENORM: ; %bb.0: ; %.entry 146; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, -v2 148; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX10-LABEL: test_half_sub_mul: 151; GFX10: ; %bb.0: ; %.entry 152; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 154; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2 155; GFX10-NEXT: s_setpc_b64 s[30:31] 156; 157; GFX10-CONTRACT-LABEL: test_half_sub_mul: 158; GFX10-CONTRACT: ; %bb.0: ; %.entry 159; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2 161; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX10-DENORM-LABEL: test_half_sub_mul: 164; GFX10-DENORM: ; %bb.0: ; %.entry 165; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 167; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 168; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX11-CONTRACT-LABEL: test_half_sub_mul: 171; GFX11-CONTRACT: ; %bb.0: ; %.entry 172; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX11-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, -v2 174; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 175; 176; GFX11-DENORM-LABEL: test_half_sub_mul: 177; GFX11-DENORM: ; %bb.0: ; %.entry 178; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 180; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 181; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 182; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 183.entry: 184 %a = fmul half %x, %y 185 %b = fsub half %a, %z 186 ret half %b 187} 188 189define half @test_half_sub_mul_rhs(half %x, half %y, half %z) { 190; GFX9-LABEL: test_half_sub_mul_rhs: 191; GFX9: ; %bb.0: ; %.entry 192; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 194; GFX9-NEXT: v_sub_f16_e32 v0, v2, v0 195; GFX9-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs: 198; GFX9-CONTRACT: ; %bb.0: ; %.entry 199; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX9-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2 201; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX9-DENORM-LABEL: test_half_sub_mul_rhs: 204; GFX9-DENORM: ; %bb.0: ; %.entry 205; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, -v0, v1, v2 207; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX10-LABEL: test_half_sub_mul_rhs: 210; GFX10: ; %bb.0: ; %.entry 211; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 213; GFX10-NEXT: v_sub_f16_e32 v0, v2, v0 214; GFX10-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs: 217; GFX10-CONTRACT: ; %bb.0: ; %.entry 218; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX10-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2 220; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX10-DENORM-LABEL: test_half_sub_mul_rhs: 223; GFX10-DENORM: ; %bb.0: ; %.entry 224; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 226; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0 227; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX11-CONTRACT-LABEL: test_half_sub_mul_rhs: 230; GFX11-CONTRACT: ; %bb.0: ; %.entry 231; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX11-CONTRACT-NEXT: v_fma_f16 v0, -v0, v1, v2 233; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX11-DENORM-LABEL: test_half_sub_mul_rhs: 236; GFX11-DENORM: ; %bb.0: ; %.entry 237; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX11-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 239; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 240; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v2, v0 241; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 242.entry: 243 %a = fmul half %x, %y 244 %b = fsub half %z, %a 245 ret half %b 246} 247 248define double @test_double_sub_mul(double %x, double %y, double %z) { 249; GFX9-LABEL: test_double_sub_mul: 250; GFX9: ; %bb.0: ; %.entry 251; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 253; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 254; GFX9-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX9-CONTRACT-LABEL: test_double_sub_mul: 257; GFX9-CONTRACT: ; %bb.0: ; %.entry 258; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 260; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 261; 262; GFX9-DENORM-LABEL: test_double_sub_mul: 263; GFX9-DENORM: ; %bb.0: ; %.entry 264; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 266; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 267; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX10-LABEL: test_double_sub_mul: 270; GFX10: ; %bb.0: ; %.entry 271; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 273; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 274; GFX10-NEXT: s_setpc_b64 s[30:31] 275; 276; GFX10-CONTRACT-LABEL: test_double_sub_mul: 277; GFX10-CONTRACT: ; %bb.0: ; %.entry 278; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 280; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX10-DENORM-LABEL: test_double_sub_mul: 283; GFX10-DENORM: ; %bb.0: ; %.entry 284; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 286; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 287; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 288; 289; GFX11-CONTRACT-LABEL: test_double_sub_mul: 290; GFX11-CONTRACT: ; %bb.0: ; %.entry 291; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 293; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 294; 295; GFX11-DENORM-LABEL: test_double_sub_mul: 296; GFX11-DENORM: ; %bb.0: ; %.entry 297; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 299; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 300; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] 301; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 302.entry: 303 %a = fmul double %x, %y 304 %b = fsub double %a, %z 305 ret double %b 306} 307 308define double @test_double_sub_mul_rhs(double %x, double %y, double %z) { 309; GFX9-LABEL: test_double_sub_mul_rhs: 310; GFX9: ; %bb.0: ; %.entry 311; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 313; GFX9-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] 314; GFX9-NEXT: s_setpc_b64 s[30:31] 315; 316; GFX9-CONTRACT-LABEL: test_double_sub_mul_rhs: 317; GFX9-CONTRACT: ; %bb.0: ; %.entry 318; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5] 320; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 321; 322; GFX9-DENORM-LABEL: test_double_sub_mul_rhs: 323; GFX9-DENORM: ; %bb.0: ; %.entry 324; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 326; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] 327; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 328; 329; GFX10-LABEL: test_double_sub_mul_rhs: 330; GFX10: ; %bb.0: ; %.entry 331; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 332; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 333; GFX10-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] 334; GFX10-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs: 337; GFX10-CONTRACT: ; %bb.0: ; %.entry 338; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5] 340; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 341; 342; GFX10-DENORM-LABEL: test_double_sub_mul_rhs: 343; GFX10-DENORM: ; %bb.0: ; %.entry 344; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 346; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] 347; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 348; 349; GFX11-CONTRACT-LABEL: test_double_sub_mul_rhs: 350; GFX11-CONTRACT: ; %bb.0: ; %.entry 351; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5] 353; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 354; 355; GFX11-DENORM-LABEL: test_double_sub_mul_rhs: 356; GFX11-DENORM: ; %bb.0: ; %.entry 357; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 359; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_1) 360; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], -v[0:1] 361; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 362.entry: 363 %a = fmul double %x, %y 364 %b = fsub double %z, %a 365 ret double %b 366} 367 368define <4 x float> @test_v4f32_sub_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 369; GFX9-LABEL: test_v4f32_sub_mul: 370; GFX9: ; %bb.0: ; %.entry 371; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 373; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5 374; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6 375; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7 376; GFX9-NEXT: v_sub_f32_e32 v0, v0, v8 377; GFX9-NEXT: v_sub_f32_e32 v1, v1, v9 378; GFX9-NEXT: v_sub_f32_e32 v2, v2, v10 379; GFX9-NEXT: v_sub_f32_e32 v3, v3, v11 380; GFX9-NEXT: s_setpc_b64 s[30:31] 381; 382; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul: 383; GFX9-CONTRACT: ; %bb.0: ; %.entry 384; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 385; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8 386; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9 387; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10 388; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11 389; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 390; 391; GFX9-DENORM-LABEL: test_v4f32_sub_mul: 392; GFX9-DENORM: ; %bb.0: ; %.entry 393; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8 395; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9 396; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10 397; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11 398; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 399; 400; GFX10-LABEL: test_v4f32_sub_mul: 401; GFX10: ; %bb.0: ; %.entry 402; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 403; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 404; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 405; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6 406; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7 407; GFX10-NEXT: v_sub_f32_e32 v0, v0, v8 408; GFX10-NEXT: v_sub_f32_e32 v1, v1, v9 409; GFX10-NEXT: v_sub_f32_e32 v2, v2, v10 410; GFX10-NEXT: v_sub_f32_e32 v3, v3, v11 411; GFX10-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul: 414; GFX10-CONTRACT: ; %bb.0: ; %.entry 415; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8 417; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9 418; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10 419; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11 420; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX10-DENORM-LABEL: test_v4f32_sub_mul: 423; GFX10-DENORM: ; %bb.0: ; %.entry 424; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v4, -v8 426; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v5, -v9 427; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, -v10 428; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, -v11 429; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul: 432; GFX11-CONTRACT: ; %bb.0: ; %.entry 433; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX11-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, -v8 435; GFX11-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, -v9 436; GFX11-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, -v10 437; GFX11-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, -v11 438; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 439; 440; GFX11-DENORM-LABEL: test_v4f32_sub_mul: 441; GFX11-DENORM: ; %bb.0: ; %.entry 442; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5 444; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7 445; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 446; GFX11-DENORM-NEXT: v_dual_sub_f32 v0, v0, v8 :: v_dual_sub_f32 v1, v1, v9 447; GFX11-DENORM-NEXT: v_dual_sub_f32 v2, v2, v10 :: v_dual_sub_f32 v3, v3, v11 448; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 449.entry: 450 %a = fmul <4 x float> %x, %y 451 %b = fsub <4 x float> %a, %z 452 ret <4 x float> %b 453} 454 455define <4 x float> @test_v4f32_sub_mul_rhs(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 456; GFX9-LABEL: test_v4f32_sub_mul_rhs: 457; GFX9: ; %bb.0: ; %.entry 458; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 460; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5 461; GFX9-NEXT: v_mul_f32_e32 v2, v2, v6 462; GFX9-NEXT: v_mul_f32_e32 v3, v3, v7 463; GFX9-NEXT: v_sub_f32_e32 v0, v8, v0 464; GFX9-NEXT: v_sub_f32_e32 v1, v9, v1 465; GFX9-NEXT: v_sub_f32_e32 v2, v10, v2 466; GFX9-NEXT: v_sub_f32_e32 v3, v11, v3 467; GFX9-NEXT: s_setpc_b64 s[30:31] 468; 469; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul_rhs: 470; GFX9-CONTRACT: ; %bb.0: ; %.entry 471; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GFX9-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8 473; GFX9-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9 474; GFX9-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10 475; GFX9-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11 476; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 477; 478; GFX9-DENORM-LABEL: test_v4f32_sub_mul_rhs: 479; GFX9-DENORM: ; %bb.0: ; %.entry 480; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; GFX9-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8 482; GFX9-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9 483; GFX9-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10 484; GFX9-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11 485; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 486; 487; GFX10-LABEL: test_v4f32_sub_mul_rhs: 488; GFX10: ; %bb.0: ; %.entry 489; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 491; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 492; GFX10-NEXT: v_mul_f32_e32 v2, v2, v6 493; GFX10-NEXT: v_mul_f32_e32 v3, v3, v7 494; GFX10-NEXT: v_sub_f32_e32 v0, v8, v0 495; GFX10-NEXT: v_sub_f32_e32 v1, v9, v1 496; GFX10-NEXT: v_sub_f32_e32 v2, v10, v2 497; GFX10-NEXT: v_sub_f32_e32 v3, v11, v3 498; GFX10-NEXT: s_setpc_b64 s[30:31] 499; 500; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs: 501; GFX10-CONTRACT: ; %bb.0: ; %.entry 502; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 503; GFX10-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8 504; GFX10-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9 505; GFX10-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10 506; GFX10-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11 507; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 508; 509; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs: 510; GFX10-DENORM: ; %bb.0: ; %.entry 511; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; GFX10-DENORM-NEXT: v_mad_f32 v0, -v0, v4, v8 513; GFX10-DENORM-NEXT: v_mad_f32 v1, -v1, v5, v9 514; GFX10-DENORM-NEXT: v_mad_f32 v2, -v2, v6, v10 515; GFX10-DENORM-NEXT: v_mad_f32 v3, -v3, v7, v11 516; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul_rhs: 519; GFX11-CONTRACT: ; %bb.0: ; %.entry 520; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX11-CONTRACT-NEXT: v_fma_f32 v0, -v0, v4, v8 522; GFX11-CONTRACT-NEXT: v_fma_f32 v1, -v1, v5, v9 523; GFX11-CONTRACT-NEXT: v_fma_f32 v2, -v2, v6, v10 524; GFX11-CONTRACT-NEXT: v_fma_f32 v3, -v3, v7, v11 525; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX11-DENORM-LABEL: test_v4f32_sub_mul_rhs: 528; GFX11-DENORM: ; %bb.0: ; %.entry 529; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX11-DENORM-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5 531; GFX11-DENORM-NEXT: v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7 532; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 533; GFX11-DENORM-NEXT: v_dual_sub_f32 v0, v8, v0 :: v_dual_sub_f32 v1, v9, v1 534; GFX11-DENORM-NEXT: v_dual_sub_f32 v2, v10, v2 :: v_dual_sub_f32 v3, v11, v3 535; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 536.entry: 537 %a = fmul <4 x float> %x, %y 538 %b = fsub <4 x float> %z, %a 539 ret <4 x float> %b 540} 541 542define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) { 543; GFX9-LABEL: test_v4f16_sub_mul: 544; GFX9: ; %bb.0: ; %.entry 545; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 547; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 548; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4 549; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 550; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5 551; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 552; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0 553; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1 554; GFX9-NEXT: s_setpc_b64 s[30:31] 555; 556; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul: 557; GFX9-CONTRACT: ; %bb.0: ; %.entry 558; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1] 560; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1] 561; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 562; 563; GFX9-DENORM-LABEL: test_v4f16_sub_mul: 564; GFX9-DENORM: ; %bb.0: ; %.entry 565; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 567; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 568; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 569; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 570; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5 571; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 572; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 573; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 574; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 575; 576; GFX10-LABEL: test_v4f16_sub_mul: 577; GFX10: ; %bb.0: ; %.entry 578; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 579; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 580; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 581; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4 582; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 583; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5 584; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 585; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0 586; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1 587; GFX10-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul: 590; GFX10-CONTRACT: ; %bb.0: ; %.entry 591; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1] 593; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1] 594; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 595; 596; GFX10-DENORM-LABEL: test_v4f16_sub_mul: 597; GFX10-DENORM: ; %bb.0: ; %.entry 598; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 600; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 601; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 602; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 603; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5 604; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 605; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 606; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 607; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul: 610; GFX11-CONTRACT: ; %bb.0: ; %.entry 611; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1] 613; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1] 614; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX11-DENORM-LABEL: test_v4f16_sub_mul: 617; GFX11-DENORM: ; %bb.0: ; %.entry 618; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 620; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 621; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 622; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5 623; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 624; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 625; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1 626; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v0, v4 627; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v1, v5 628; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 629; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v6, v2 630; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v7, v3 631; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 632; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2 633; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3 634; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 635.entry: 636 %a = fmul <4 x half> %x, %y 637 %b = fsub <4 x half> %a, %z 638 ret <4 x half> %b 639} 640 641define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x half> %z) { 642; GFX9-LABEL: test_v4f16_sub_mul_rhs: 643; GFX9: ; %bb.0: ; %.entry 644; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 645; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 646; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 647; GFX9-NEXT: v_sub_f16_e32 v2, v4, v0 648; GFX9-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 649; GFX9-NEXT: v_sub_f16_e32 v3, v5, v1 650; GFX9-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 651; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0 652; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1 653; GFX9-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs: 656; GFX9-CONTRACT: ; %bb.0: ; %.entry 657; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0] 659; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0] 660; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX9-DENORM-LABEL: test_v4f16_sub_mul_rhs: 663; GFX9-DENORM: ; %bb.0: ; %.entry 664; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 666; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 667; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0 668; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 669; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1 670; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 671; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 672; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 673; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX10-LABEL: test_v4f16_sub_mul_rhs: 676; GFX10: ; %bb.0: ; %.entry 677; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 679; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 680; GFX10-NEXT: v_sub_f16_e32 v2, v4, v0 681; GFX10-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 682; GFX10-NEXT: v_sub_f16_e32 v3, v5, v1 683; GFX10-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 684; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0 685; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1 686; GFX10-NEXT: s_setpc_b64 s[30:31] 687; 688; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs: 689; GFX10-CONTRACT: ; %bb.0: ; %.entry 690; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0] 692; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0] 693; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 694; 695; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs: 696; GFX10-DENORM: ; %bb.0: ; %.entry 697; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 698; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 699; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 700; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v4, v0 701; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 702; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v5, v1 703; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 704; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 705; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 706; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 707; 708; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs: 709; GFX11-CONTRACT: ; %bb.0: ; %.entry 710; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 711; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0] 712; GFX11-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0] 713; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 714; 715; GFX11-DENORM-LABEL: test_v4f16_sub_mul_rhs: 716; GFX11-DENORM: ; %bb.0: ; %.entry 717; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 718; GFX11-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 719; GFX11-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 720; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 721; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v5 722; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 723; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 724; GFX11-DENORM-NEXT: v_lshrrev_b32_e32 v7, 16, v1 725; GFX11-DENORM-NEXT: v_sub_f16_e32 v0, v4, v0 726; GFX11-DENORM-NEXT: v_sub_f16_e32 v1, v5, v1 727; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 728; GFX11-DENORM-NEXT: v_sub_f16_e32 v2, v2, v6 729; GFX11-DENORM-NEXT: v_sub_f16_e32 v3, v3, v7 730; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 731; GFX11-DENORM-NEXT: v_pack_b32_f16 v0, v0, v2 732; GFX11-DENORM-NEXT: v_pack_b32_f16 v1, v1, v3 733; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 734.entry: 735 %a = fmul <4 x half> %x, %y 736 %b = fsub <4 x half> %z, %a 737 ret <4 x half> %b 738} 739 740define <4 x double> @test_v4f64_sub_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) { 741; GFX9-LABEL: test_v4f64_sub_mul: 742; GFX9: ; %bb.0: ; %.entry 743; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 745; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 746; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 747; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 748; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17] 749; GFX9-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19] 750; GFX9-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21] 751; GFX9-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23] 752; GFX9-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul: 755; GFX9-CONTRACT: ; %bb.0: ; %.entry 756; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 757; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17] 758; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19] 759; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21] 760; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23] 761; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 762; 763; GFX9-DENORM-LABEL: test_v4f64_sub_mul: 764; GFX9-DENORM: ; %bb.0: ; %.entry 765; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 766; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 767; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 768; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 769; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 770; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17] 771; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19] 772; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21] 773; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23] 774; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 775; 776; GFX10-LABEL: test_v4f64_sub_mul: 777; GFX10: ; %bb.0: ; %.entry 778; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 780; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 781; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 782; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 783; GFX10-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17] 784; GFX10-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19] 785; GFX10-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21] 786; GFX10-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23] 787; GFX10-NEXT: s_setpc_b64 s[30:31] 788; 789; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul: 790; GFX10-CONTRACT: ; %bb.0: ; %.entry 791; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 792; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17] 793; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19] 794; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21] 795; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23] 796; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX10-DENORM-LABEL: test_v4f64_sub_mul: 799; GFX10-DENORM: ; %bb.0: ; %.entry 800; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 802; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 803; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 804; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 805; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17] 806; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19] 807; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21] 808; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23] 809; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul: 812; GFX11-CONTRACT: ; %bb.0: ; %.entry 813; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17] 815; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19] 816; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21] 817; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23] 818; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX11-DENORM-LABEL: test_v4f64_sub_mul: 821; GFX11-DENORM: ; %bb.0: ; %.entry 822; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 824; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 825; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 826; GFX11-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 827; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 828; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], -v[16:17] 829; GFX11-DENORM-NEXT: v_add_f64 v[2:3], v[2:3], -v[18:19] 830; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 831; GFX11-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], -v[20:21] 832; GFX11-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], -v[22:23] 833; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 834.entry: 835 %a = fmul <4 x double> %x, %y 836 %b = fsub <4 x double> %a, %z 837 ret <4 x double> %b 838} 839 840define <4 x double> @test_v4f64_sub_mul_rhs(<4 x double> %x, <4 x double> %y, <4 x double> %z) { 841; GFX9-LABEL: test_v4f64_sub_mul_rhs: 842; GFX9: ; %bb.0: ; %.entry 843; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 844; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 845; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 846; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 847; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 848; GFX9-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1] 849; GFX9-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3] 850; GFX9-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5] 851; GFX9-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7] 852; GFX9-NEXT: s_setpc_b64 s[30:31] 853; 854; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul_rhs: 855; GFX9-CONTRACT: ; %bb.0: ; %.entry 856; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 857; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17] 858; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19] 859; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21] 860; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23] 861; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 862; 863; GFX9-DENORM-LABEL: test_v4f64_sub_mul_rhs: 864; GFX9-DENORM: ; %bb.0: ; %.entry 865; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 867; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 868; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 869; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 870; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1] 871; GFX9-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3] 872; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5] 873; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7] 874; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 875; 876; GFX10-LABEL: test_v4f64_sub_mul_rhs: 877; GFX10: ; %bb.0: ; %.entry 878; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 879; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 880; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 881; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 882; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 883; GFX10-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1] 884; GFX10-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3] 885; GFX10-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5] 886; GFX10-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7] 887; GFX10-NEXT: s_setpc_b64 s[30:31] 888; 889; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs: 890; GFX10-CONTRACT: ; %bb.0: ; %.entry 891; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 892; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17] 893; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19] 894; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21] 895; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23] 896; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 897; 898; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs: 899; GFX10-DENORM: ; %bb.0: ; %.entry 900; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 901; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 902; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 903; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 904; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 905; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1] 906; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3] 907; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5] 908; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7] 909; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 910; 911; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul_rhs: 912; GFX11-CONTRACT: ; %bb.0: ; %.entry 913; GFX11-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; GFX11-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17] 915; GFX11-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19] 916; GFX11-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21] 917; GFX11-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23] 918; GFX11-CONTRACT-NEXT: s_setpc_b64 s[30:31] 919; 920; GFX11-DENORM-LABEL: test_v4f64_sub_mul_rhs: 921; GFX11-DENORM: ; %bb.0: ; %.entry 922; GFX11-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 923; GFX11-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 924; GFX11-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 925; GFX11-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 926; GFX11-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 927; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 928; GFX11-DENORM-NEXT: v_add_f64 v[0:1], v[16:17], -v[0:1] 929; GFX11-DENORM-NEXT: v_add_f64 v[2:3], v[18:19], -v[2:3] 930; GFX11-DENORM-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 931; GFX11-DENORM-NEXT: v_add_f64 v[4:5], v[20:21], -v[4:5] 932; GFX11-DENORM-NEXT: v_add_f64 v[6:7], v[22:23], -v[6:7] 933; GFX11-DENORM-NEXT: s_setpc_b64 s[30:31] 934.entry: 935 %a = fmul <4 x double> %x, %y 936 %b = fsub <4 x double> %z, %a 937 ret <4 x double> %b 938} 939