1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s 7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s 8 9; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 10define float @test_f32_sub_ext_neg_mul(float %x, float %y, float %z) { 11; GFX9-LABEL: test_f32_sub_ext_neg_mul: 12; GFX9: ; %bb.0: ; %entry 13; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX9-NEXT: v_mul_f32_e64 v0, v0, -v1 15; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 16; GFX9-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX9-CONTRACT-LABEL: test_f32_sub_ext_neg_mul: 19; GFX9-CONTRACT: ; %bb.0: ; %entry 20; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2 22; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-DENORM-LABEL: test_f32_sub_ext_neg_mul: 25; GFX9-DENORM: ; %bb.0: ; %entry 26; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2 28; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: test_f32_sub_ext_neg_mul: 31; GFX10: ; %bb.0: ; %entry 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v1 34; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 35; GFX10-NEXT: s_setpc_b64 s[30:31] 36; 37; GFX10-CONTRACT-LABEL: test_f32_sub_ext_neg_mul: 38; GFX10-CONTRACT: ; %bb.0: ; %entry 39; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v1, -v2 41; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX10-DENORM-LABEL: test_f32_sub_ext_neg_mul: 44; GFX10-DENORM: ; %bb.0: ; %entry 45; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v1, -v2 47; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 48entry: 49 %a = fmul float %x, %y 50 %b = fneg float %a 51 %c = fsub float %b, %z 52 ret float %c 53} 54 55define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) { 56; GFX9-LABEL: test_f16_sub_ext_neg_mul: 57; GFX9: ; %bb.0: ; %entry 58; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX9-NEXT: v_mul_f16_e64 v0, v0, -v1 60; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2 61; GFX9-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul: 64; GFX9-CONTRACT: ; %bb.0: ; %entry 65; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2 67; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX9-DENORM-LABEL: test_f16_sub_ext_neg_mul: 70; GFX9-DENORM: ; %bb.0: ; %entry 71; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, -v1, -v2 73; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX10-LABEL: test_f16_sub_ext_neg_mul: 76; GFX10: ; %bb.0: ; %entry 77; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX10-NEXT: v_mul_f16_e64 v0, v0, -v1 79; GFX10-NEXT: v_sub_f16_e32 v0, v0, v2 80; GFX10-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul: 83; GFX10-CONTRACT: ; %bb.0: ; %entry 84; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, -v1, -v2 86; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 87; 88; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul: 89; GFX10-DENORM: ; %bb.0: ; %entry 90; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX10-DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 92; GFX10-DENORM-NEXT: v_sub_f16_e32 v0, v0, v2 93; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 94entry: 95 %a = fmul half %x, %y 96 %b = fneg half %a 97 %c = fsub half %b, %z 98 ret half %c 99} 100 101define double @test_f64_sub_ext_neg_mul(double %x, double %y, double %z) { 102; GFX9-LABEL: test_f64_sub_ext_neg_mul: 103; GFX9: ; %bb.0: ; %entry 104; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 106; GFX9-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] 107; GFX9-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX9-CONTRACT-LABEL: test_f64_sub_ext_neg_mul: 110; GFX9-CONTRACT: ; %bb.0: ; %entry 111; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5] 113; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX9-DENORM-LABEL: test_f64_sub_ext_neg_mul: 116; GFX9-DENORM: ; %bb.0: ; %entry 117; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 119; GFX9-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] 120; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX10-LABEL: test_f64_sub_ext_neg_mul: 123; GFX10: ; %bb.0: ; %entry 124; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 126; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] 127; GFX10-NEXT: s_setpc_b64 s[30:31] 128; 129; GFX10-CONTRACT-LABEL: test_f64_sub_ext_neg_mul: 130; GFX10-CONTRACT: ; %bb.0: ; %entry 131; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5] 133; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 134; 135; GFX10-DENORM-LABEL: test_f64_sub_ext_neg_mul: 136; GFX10-DENORM: ; %bb.0: ; %entry 137; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 139; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[4:5] 140; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 141entry: 142 %a = fmul double %x, %y 143 %b = fneg double %a 144 %c = fsub double %b, %z 145 ret double %c 146} 147 148; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 149define <4 x float> @test_v4f32_sub_ext_neg_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 150; GFX9-LABEL: test_v4f32_sub_ext_neg_mul: 151; GFX9: ; %bb.0: ; %entry 152; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX9-NEXT: v_mul_f32_e64 v0, v0, -v4 154; GFX9-NEXT: v_mul_f32_e64 v1, v1, -v5 155; GFX9-NEXT: v_mul_f32_e64 v2, v2, -v6 156; GFX9-NEXT: v_mul_f32_e64 v3, v3, -v7 157; GFX9-NEXT: v_sub_f32_e32 v0, v0, v8 158; GFX9-NEXT: v_sub_f32_e32 v1, v1, v9 159; GFX9-NEXT: v_sub_f32_e32 v2, v2, v10 160; GFX9-NEXT: v_sub_f32_e32 v3, v3, v11 161; GFX9-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX9-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul: 164; GFX9-CONTRACT: ; %bb.0: ; %entry 165; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8 167; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9 168; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10 169; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, -v7, -v11 170; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX9-DENORM-LABEL: test_v4f32_sub_ext_neg_mul: 173; GFX9-DENORM: ; %bb.0: ; %entry 174; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8 176; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9 177; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10 178; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, -v7, -v11 179; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 180; 181; GFX10-LABEL: test_v4f32_sub_ext_neg_mul: 182; GFX10: ; %bb.0: ; %entry 183; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX10-NEXT: v_mul_f32_e64 v0, v0, -v4 185; GFX10-NEXT: v_mul_f32_e64 v1, v1, -v5 186; GFX10-NEXT: v_mul_f32_e64 v2, v2, -v6 187; GFX10-NEXT: v_mul_f32_e64 v3, v3, -v7 188; GFX10-NEXT: v_sub_f32_e32 v0, v0, v8 189; GFX10-NEXT: v_sub_f32_e32 v1, v1, v9 190; GFX10-NEXT: v_sub_f32_e32 v2, v2, v10 191; GFX10-NEXT: v_sub_f32_e32 v3, v3, v11 192; GFX10-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX10-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul: 195; GFX10-CONTRACT: ; %bb.0: ; %entry 196; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, -v4, -v8 198; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, -v5, -v9 199; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, -v6, -v10 200; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, -v7, -v11 201; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX10-DENORM-LABEL: test_v4f32_sub_ext_neg_mul: 204; GFX10-DENORM: ; %bb.0: ; %entry 205; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, -v4, -v8 207; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, -v5, -v9 208; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, -v6, -v10 209; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, -v7, -v11 210; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 211entry: 212 %a = fmul <4 x float> %x, %y 213 %b = fneg <4 x float> %a 214 %c = fsub <4 x float> %b, %z 215 ret <4 x float> %c 216} 217 218define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) { 219; GFX9-LABEL: test_v4f16_sub_ext_neg_mul: 220; GFX9: ; %bb.0: ; %entry 221; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 223; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 224; GFX9-NEXT: v_sub_f16_e32 v2, v0, v4 225; GFX9-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 226; GFX9-NEXT: v_sub_f16_e32 v3, v1, v5 227; GFX9-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 228; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0 229; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1 230; GFX9-NEXT: s_setpc_b64 s[30:31] 231; 232; GFX9-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul: 233; GFX9-CONTRACT: ; %bb.0: ; %entry 234; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] 236; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] 237; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX9-DENORM-LABEL: test_v4f16_sub_ext_neg_mul: 240; GFX9-DENORM: ; %bb.0: ; %entry 241; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 243; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 244; GFX9-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 245; GFX9-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 246; GFX9-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5 247; GFX9-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 248; GFX9-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 249; GFX9-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 250; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 251; 252; GFX10-LABEL: test_v4f16_sub_ext_neg_mul: 253; GFX10: ; %bb.0: ; %entry 254; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 256; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 257; GFX10-NEXT: v_sub_f16_e32 v2, v0, v4 258; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 259; GFX10-NEXT: v_sub_f16_e32 v3, v1, v5 260; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 261; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0 262; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1 263; GFX10-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul: 266; GFX10-CONTRACT: ; %bb.0: ; %entry 267; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1] 269; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1] 270; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 271; 272; GFX10-DENORM-LABEL: test_v4f16_sub_ext_neg_mul: 273; GFX10-DENORM: ; %bb.0: ; %entry 274; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 276; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 277; GFX10-DENORM-NEXT: v_sub_f16_e32 v2, v0, v4 278; GFX10-DENORM-NEXT: v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 279; GFX10-DENORM-NEXT: v_sub_f16_e32 v3, v1, v5 280; GFX10-DENORM-NEXT: v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 281; GFX10-DENORM-NEXT: v_pack_b32_f16 v0, v2, v0 282; GFX10-DENORM-NEXT: v_pack_b32_f16 v1, v3, v1 283; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 284entry: 285 %a = fmul <4 x half> %x, %y 286 %b = fneg <4 x half> %a 287 %c = fsub <4 x half> %b, %z 288 ret <4 x half> %c 289} 290 291define <4 x double> @test_v4f64_sub_ext_neg_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) { 292; GFX9-LABEL: test_v4f64_sub_ext_neg_mul: 293; GFX9: ; %bb.0: ; %entry 294; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 296; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 297; GFX9-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 298; GFX9-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 299; GFX9-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17] 300; GFX9-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19] 301; GFX9-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21] 302; GFX9-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23] 303; GFX9-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX9-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul: 306; GFX9-CONTRACT: ; %bb.0: ; %entry 307; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17] 309; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19] 310; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21] 311; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23] 312; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] 313; 314; GFX9-DENORM-LABEL: test_v4f64_sub_ext_neg_mul: 315; GFX9-DENORM: ; %bb.0: ; %entry 316; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; GFX9-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 318; GFX9-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 319; GFX9-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 320; GFX9-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 321; GFX9-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17] 322; GFX9-DENORM-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19] 323; GFX9-DENORM-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21] 324; GFX9-DENORM-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23] 325; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX10-LABEL: test_v4f64_sub_ext_neg_mul: 328; GFX10: ; %bb.0: ; %entry 329; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 331; GFX10-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 332; GFX10-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 333; GFX10-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 334; GFX10-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17] 335; GFX10-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19] 336; GFX10-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21] 337; GFX10-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23] 338; GFX10-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX10-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul: 341; GFX10-CONTRACT: ; %bb.0: ; %entry 342; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17] 344; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19] 345; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21] 346; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23] 347; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] 348; 349; GFX10-DENORM-LABEL: test_v4f64_sub_ext_neg_mul: 350; GFX10-DENORM: ; %bb.0: ; %entry 351; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] 353; GFX10-DENORM-NEXT: v_mul_f64 v[2:3], v[2:3], v[10:11] 354; GFX10-DENORM-NEXT: v_mul_f64 v[4:5], v[4:5], v[12:13] 355; GFX10-DENORM-NEXT: v_mul_f64 v[6:7], v[6:7], v[14:15] 356; GFX10-DENORM-NEXT: v_add_f64 v[0:1], -v[0:1], -v[16:17] 357; GFX10-DENORM-NEXT: v_add_f64 v[2:3], -v[2:3], -v[18:19] 358; GFX10-DENORM-NEXT: v_add_f64 v[4:5], -v[4:5], -v[20:21] 359; GFX10-DENORM-NEXT: v_add_f64 v[6:7], -v[6:7], -v[22:23] 360; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] 361entry: 362 %a = fmul <4 x double> %x, %y 363 %b = fneg <4 x double> %a 364 %c = fsub <4 x double> %b, %z 365 ret <4 x double> %c 366} 367