1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA,FMA-INFS 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefixes=AVX512,AVX512-INFS 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS 10 11; 12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13; 14 15define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 16; FMA-LABEL: test_f32_fmadd: 17; FMA: # %bb.0: 18; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 19; FMA-NEXT: retq 20; 21; FMA4-LABEL: test_f32_fmadd: 22; FMA4: # %bb.0: 23; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 24; FMA4-NEXT: retq 25; 26; AVX512-LABEL: test_f32_fmadd: 27; AVX512: # %bb.0: 28; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 29; AVX512-NEXT: retq 30 %x = fmul float %a0, %a1 31 %res = fadd float %x, %a2 32 ret float %res 33} 34 35define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 36; FMA-LABEL: test_4f32_fmadd: 37; FMA: # %bb.0: 38; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 39; FMA-NEXT: retq 40; 41; FMA4-LABEL: test_4f32_fmadd: 42; FMA4: # %bb.0: 43; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 44; FMA4-NEXT: retq 45; 46; AVX512-LABEL: test_4f32_fmadd: 47; AVX512: # %bb.0: 48; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 49; AVX512-NEXT: retq 50 %x = fmul <4 x float> %a0, %a1 51 %res = fadd <4 x float> %x, %a2 52 ret <4 x float> %res 53} 54 55define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 56; FMA-LABEL: test_8f32_fmadd: 57; FMA: # %bb.0: 58; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 59; FMA-NEXT: retq 60; 61; FMA4-LABEL: test_8f32_fmadd: 62; FMA4: # %bb.0: 63; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 64; FMA4-NEXT: retq 65; 66; AVX512-LABEL: test_8f32_fmadd: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 69; AVX512-NEXT: retq 70 %x = fmul <8 x float> %a0, %a1 71 %res = fadd <8 x float> %x, %a2 72 ret <8 x float> %res 73} 74 75define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 76; FMA-LABEL: test_f64_fmadd: 77; FMA: # %bb.0: 78; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 79; FMA-NEXT: retq 80; 81; FMA4-LABEL: test_f64_fmadd: 82; FMA4: # %bb.0: 83; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 84; FMA4-NEXT: retq 85; 86; AVX512-LABEL: test_f64_fmadd: 87; AVX512: # %bb.0: 88; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 89; AVX512-NEXT: retq 90 %x = fmul double %a0, %a1 91 %res = fadd double %x, %a2 92 ret double %res 93} 94 95define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 96; FMA-LABEL: test_2f64_fmadd: 97; FMA: # %bb.0: 98; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 99; FMA-NEXT: retq 100; 101; FMA4-LABEL: test_2f64_fmadd: 102; FMA4: # %bb.0: 103; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 104; FMA4-NEXT: retq 105; 106; AVX512-LABEL: test_2f64_fmadd: 107; AVX512: # %bb.0: 108; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 109; AVX512-NEXT: retq 110 %x = fmul <2 x double> %a0, %a1 111 %res = fadd <2 x double> %x, %a2 112 ret <2 x double> %res 113} 114 115define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 116; FMA-LABEL: test_4f64_fmadd: 117; FMA: # %bb.0: 118; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 119; FMA-NEXT: retq 120; 121; FMA4-LABEL: test_4f64_fmadd: 122; FMA4: # %bb.0: 123; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 124; FMA4-NEXT: retq 125; 126; AVX512-LABEL: test_4f64_fmadd: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 129; AVX512-NEXT: retq 130 %x = fmul <4 x double> %a0, %a1 131 %res = fadd <4 x double> %x, %a2 132 ret <4 x double> %res 133} 134 135; 136; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 137; 138 139define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 140; FMA-LABEL: test_f32_fmsub: 141; FMA: # %bb.0: 142; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 143; FMA-NEXT: retq 144; 145; FMA4-LABEL: test_f32_fmsub: 146; FMA4: # %bb.0: 147; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 148; FMA4-NEXT: retq 149; 150; AVX512-LABEL: test_f32_fmsub: 151; AVX512: # %bb.0: 152; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 153; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157} 158 159define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160; FMA-LABEL: test_4f32_fmsub: 161; FMA: # %bb.0: 162; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 163; FMA-NEXT: retq 164; 165; FMA4-LABEL: test_4f32_fmsub: 166; FMA4: # %bb.0: 167; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 168; FMA4-NEXT: retq 169; 170; AVX512-LABEL: test_4f32_fmsub: 171; AVX512: # %bb.0: 172; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 173; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177} 178 179define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180; FMA-LABEL: test_8f32_fmsub: 181; FMA: # %bb.0: 182; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 183; FMA-NEXT: retq 184; 185; FMA4-LABEL: test_8f32_fmsub: 186; FMA4: # %bb.0: 187; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 188; FMA4-NEXT: retq 189; 190; AVX512-LABEL: test_8f32_fmsub: 191; AVX512: # %bb.0: 192; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 193; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197} 198 199define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200; FMA-LABEL: test_f64_fmsub: 201; FMA: # %bb.0: 202; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 203; FMA-NEXT: retq 204; 205; FMA4-LABEL: test_f64_fmsub: 206; FMA4: # %bb.0: 207; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 208; FMA4-NEXT: retq 209; 210; AVX512-LABEL: test_f64_fmsub: 211; AVX512: # %bb.0: 212; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 213; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217} 218 219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220; FMA-LABEL: test_2f64_fmsub: 221; FMA: # %bb.0: 222; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 223; FMA-NEXT: retq 224; 225; FMA4-LABEL: test_2f64_fmsub: 226; FMA4: # %bb.0: 227; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 228; FMA4-NEXT: retq 229; 230; AVX512-LABEL: test_2f64_fmsub: 231; AVX512: # %bb.0: 232; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 233; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237} 238 239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240; FMA-LABEL: test_4f64_fmsub: 241; FMA: # %bb.0: 242; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 243; FMA-NEXT: retq 244; 245; FMA4-LABEL: test_4f64_fmsub: 246; FMA4: # %bb.0: 247; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 248; FMA4-NEXT: retq 249; 250; AVX512-LABEL: test_4f64_fmsub: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 253; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257} 258 259; 260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261; 262 263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264; FMA-LABEL: test_f32_fnmadd: 265; FMA: # %bb.0: 266; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 267; FMA-NEXT: retq 268; 269; FMA4-LABEL: test_f32_fnmadd: 270; FMA4: # %bb.0: 271; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 272; FMA4-NEXT: retq 273; 274; AVX512-LABEL: test_f32_fnmadd: 275; AVX512: # %bb.0: 276; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 277; AVX512-NEXT: retq 278 %x = fmul float %a0, %a1 279 %res = fsub float %a2, %x 280 ret float %res 281} 282 283define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 284; FMA-LABEL: test_4f32_fnmadd: 285; FMA: # %bb.0: 286; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 287; FMA-NEXT: retq 288; 289; FMA4-LABEL: test_4f32_fnmadd: 290; FMA4: # %bb.0: 291; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 292; FMA4-NEXT: retq 293; 294; AVX512-LABEL: test_4f32_fnmadd: 295; AVX512: # %bb.0: 296; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 297; AVX512-NEXT: retq 298 %x = fmul <4 x float> %a0, %a1 299 %res = fsub <4 x float> %a2, %x 300 ret <4 x float> %res 301} 302 303define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 304; FMA-LABEL: test_8f32_fnmadd: 305; FMA: # %bb.0: 306; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 307; FMA-NEXT: retq 308; 309; FMA4-LABEL: test_8f32_fnmadd: 310; FMA4: # %bb.0: 311; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 312; FMA4-NEXT: retq 313; 314; AVX512-LABEL: test_8f32_fnmadd: 315; AVX512: # %bb.0: 316; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 317; AVX512-NEXT: retq 318 %x = fmul <8 x float> %a0, %a1 319 %res = fsub <8 x float> %a2, %x 320 ret <8 x float> %res 321} 322 323define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 324; FMA-LABEL: test_f64_fnmadd: 325; FMA: # %bb.0: 326; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 327; FMA-NEXT: retq 328; 329; FMA4-LABEL: test_f64_fnmadd: 330; FMA4: # %bb.0: 331; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 332; FMA4-NEXT: retq 333; 334; AVX512-LABEL: test_f64_fnmadd: 335; AVX512: # %bb.0: 336; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 337; AVX512-NEXT: retq 338 %x = fmul double %a0, %a1 339 %res = fsub double %a2, %x 340 ret double %res 341} 342 343define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 344; FMA-LABEL: test_2f64_fnmadd: 345; FMA: # %bb.0: 346; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 347; FMA-NEXT: retq 348; 349; FMA4-LABEL: test_2f64_fnmadd: 350; FMA4: # %bb.0: 351; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 352; FMA4-NEXT: retq 353; 354; AVX512-LABEL: test_2f64_fnmadd: 355; AVX512: # %bb.0: 356; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 357; AVX512-NEXT: retq 358 %x = fmul <2 x double> %a0, %a1 359 %res = fsub <2 x double> %a2, %x 360 ret <2 x double> %res 361} 362 363define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 364; FMA-LABEL: test_4f64_fnmadd: 365; FMA: # %bb.0: 366; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 367; FMA-NEXT: retq 368; 369; FMA4-LABEL: test_4f64_fnmadd: 370; FMA4: # %bb.0: 371; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 372; FMA4-NEXT: retq 373; 374; AVX512-LABEL: test_4f64_fnmadd: 375; AVX512: # %bb.0: 376; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 377; AVX512-NEXT: retq 378 %x = fmul <4 x double> %a0, %a1 379 %res = fsub <4 x double> %a2, %x 380 ret <4 x double> %res 381} 382 383; 384; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 385; 386 387define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 388; FMA-LABEL: test_f32_fnmsub: 389; FMA: # %bb.0: 390; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 391; FMA-NEXT: retq 392; 393; FMA4-LABEL: test_f32_fnmsub: 394; FMA4: # %bb.0: 395; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 396; FMA4-NEXT: retq 397; 398; AVX512-LABEL: test_f32_fnmsub: 399; AVX512: # %bb.0: 400; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 401; AVX512-NEXT: retq 402 %x = fmul float %a0, %a1 403 %y = fsub float -0.000000e+00, %x 404 %res = fsub float %y, %a2 405 ret float %res 406} 407 408define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 409; FMA-LABEL: test_4f32_fnmsub: 410; FMA: # %bb.0: 411; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 412; FMA-NEXT: retq 413; 414; FMA4-LABEL: test_4f32_fnmsub: 415; FMA4: # %bb.0: 416; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 417; FMA4-NEXT: retq 418; 419; AVX512-LABEL: test_4f32_fnmsub: 420; AVX512: # %bb.0: 421; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 422; AVX512-NEXT: retq 423 %x = fmul <4 x float> %a0, %a1 424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 425 %res = fsub <4 x float> %y, %a2 426 ret <4 x float> %res 427} 428 429define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 430; FMA-LABEL: test_8f32_fnmsub: 431; FMA: # %bb.0: 432; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 433; FMA-NEXT: retq 434; 435; FMA4-LABEL: test_8f32_fnmsub: 436; FMA4: # %bb.0: 437; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 438; FMA4-NEXT: retq 439; 440; AVX512-LABEL: test_8f32_fnmsub: 441; AVX512: # %bb.0: 442; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 443; AVX512-NEXT: retq 444 %x = fmul <8 x float> %a0, %a1 445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 446 %res = fsub <8 x float> %y, %a2 447 ret <8 x float> %res 448} 449 450define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 451; FMA-LABEL: test_f64_fnmsub: 452; FMA: # %bb.0: 453; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 454; FMA-NEXT: retq 455; 456; FMA4-LABEL: test_f64_fnmsub: 457; FMA4: # %bb.0: 458; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 459; FMA4-NEXT: retq 460; 461; AVX512-LABEL: test_f64_fnmsub: 462; AVX512: # %bb.0: 463; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 464; AVX512-NEXT: retq 465 %x = fmul double %a0, %a1 466 %y = fsub double -0.000000e+00, %x 467 %res = fsub double %y, %a2 468 ret double %res 469} 470 471define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 472; FMA-LABEL: test_2f64_fnmsub: 473; FMA: # %bb.0: 474; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 475; FMA-NEXT: retq 476; 477; FMA4-LABEL: test_2f64_fnmsub: 478; FMA4: # %bb.0: 479; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 480; FMA4-NEXT: retq 481; 482; AVX512-LABEL: test_2f64_fnmsub: 483; AVX512: # %bb.0: 484; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 485; AVX512-NEXT: retq 486 %x = fmul <2 x double> %a0, %a1 487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 488 %res = fsub <2 x double> %y, %a2 489 ret <2 x double> %res 490} 491 492define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 493; FMA-LABEL: test_4f64_fnmsub: 494; FMA: # %bb.0: 495; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 496; FMA-NEXT: retq 497; 498; FMA4-LABEL: test_4f64_fnmsub: 499; FMA4: # %bb.0: 500; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 501; FMA4-NEXT: retq 502; 503; AVX512-LABEL: test_4f64_fnmsub: 504; AVX512: # %bb.0: 505; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 506; AVX512-NEXT: retq 507 %x = fmul <4 x double> %a0, %a1 508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 509 %res = fsub <4 x double> %y, %a2 510 ret <4 x double> %res 511} 512 513; 514; Load Folding Patterns 515; 516 517define <4 x float> @test_4f32_fmadd_load(ptr %a0, <4 x float> %a1, <4 x float> %a2) { 518; FMA-LABEL: test_4f32_fmadd_load: 519; FMA: # %bb.0: 520; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 521; FMA-NEXT: retq 522; 523; FMA4-LABEL: test_4f32_fmadd_load: 524; FMA4: # %bb.0: 525; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 526; FMA4-NEXT: retq 527; 528; AVX512-LABEL: test_4f32_fmadd_load: 529; AVX512: # %bb.0: 530; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 531; AVX512-NEXT: retq 532 %x = load <4 x float>, ptr %a0 533 %y = fmul <4 x float> %x, %a1 534 %res = fadd <4 x float> %y, %a2 535 ret <4 x float> %res 536} 537 538define <2 x double> @test_2f64_fmsub_load(ptr %a0, <2 x double> %a1, <2 x double> %a2) { 539; FMA-LABEL: test_2f64_fmsub_load: 540; FMA: # %bb.0: 541; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 542; FMA-NEXT: retq 543; 544; FMA4-LABEL: test_2f64_fmsub_load: 545; FMA4: # %bb.0: 546; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 547; FMA4-NEXT: retq 548; 549; AVX512-LABEL: test_2f64_fmsub_load: 550; AVX512: # %bb.0: 551; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 552; AVX512-NEXT: retq 553 %x = load <2 x double>, ptr %a0 554 %y = fmul <2 x double> %x, %a1 555 %res = fsub <2 x double> %y, %a2 556 ret <2 x double> %res 557} 558 559; 560; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 561; 562 563define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 564; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y: 565; FMA-INFS: # %bb.0: 566; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 567; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 568; FMA-INFS-NEXT: retq 569; 570; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y: 571; FMA4-INFS: # %bb.0: 572; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 573; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 574; FMA4-INFS-NEXT: retq 575; 576; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y: 577; AVX512-INFS: # %bb.0: 578; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 579; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 580; AVX512-INFS-NEXT: retq 581; 582; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 583; FMA-NOINFS: # %bb.0: 584; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 585; FMA-NOINFS-NEXT: retq 586; 587; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 588; FMA4-NOINFS: # %bb.0: 589; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 590; FMA4-NOINFS-NEXT: retq 591; 592; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 593; AVX512-NOINFS: # %bb.0: 594; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 595; AVX512-NOINFS-NEXT: retq 596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 597 %m = fmul <4 x float> %a, %y 598 ret <4 x float> %m 599} 600 601define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 602; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one: 603; FMA-INFS: # %bb.0: 604; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 605; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 606; FMA-INFS-NEXT: retq 607; 608; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one: 609; FMA4-INFS: # %bb.0: 610; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 611; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 612; FMA4-INFS-NEXT: retq 613; 614; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one: 615; AVX512-INFS: # %bb.0: 616; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 617; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 618; AVX512-INFS-NEXT: retq 619; 620; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 621; FMA-NOINFS: # %bb.0: 622; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 623; FMA-NOINFS-NEXT: retq 624; 625; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 626; FMA4-NOINFS: # %bb.0: 627; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 628; FMA4-NOINFS-NEXT: retq 629; 630; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 631; AVX512-NOINFS: # %bb.0: 632; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 633; AVX512-NOINFS-NEXT: retq 634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 635 %m = fmul <4 x float> %y, %a 636 ret <4 x float> %m 637} 638 639define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) { 640; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 641; FMA-INFS: # %bb.0: 642; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 643; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 644; FMA-INFS-NEXT: retq 645; 646; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 647; FMA4-INFS: # %bb.0: 648; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 649; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 650; FMA4-INFS-NEXT: retq 651; 652; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 653; AVX512-INFS: # %bb.0: 654; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 655; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 656; AVX512-INFS-NEXT: retq 657; 658; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 659; FMA-NOINFS: # %bb.0: 660; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 661; FMA-NOINFS-NEXT: retq 662; 663; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 664; FMA4-NOINFS: # %bb.0: 665; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 666; FMA4-NOINFS-NEXT: retq 667; 668; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 669; AVX512-NOINFS: # %bb.0: 670; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 671; AVX512-NOINFS-NEXT: retq 672 %a = fadd <4 x float> %x, <float 1.0, float undef, float 1.0, float undef> 673 %m = fmul <4 x float> %y, %a 674 ret <4 x float> %m 675} 676 677define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 678; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 679; FMA-INFS: # %bb.0: 680; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 681; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 682; FMA-INFS-NEXT: retq 683; 684; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 685; FMA4-INFS: # %bb.0: 686; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 687; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 688; FMA4-INFS-NEXT: retq 689; 690; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 691; AVX512-INFS: # %bb.0: 692; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 693; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 694; AVX512-INFS-NEXT: retq 695; 696; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 697; FMA-NOINFS: # %bb.0: 698; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 699; FMA-NOINFS-NEXT: retq 700; 701; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 702; FMA4-NOINFS: # %bb.0: 703; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 704; FMA4-NOINFS-NEXT: retq 705; 706; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 707; AVX512-NOINFS: # %bb.0: 708; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 709; AVX512-NOINFS-NEXT: retq 710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 711 %m = fmul <4 x float> %a, %y 712 ret <4 x float> %m 713} 714 715define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 716; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 717; FMA-INFS: # %bb.0: 718; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 719; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 720; FMA-INFS-NEXT: retq 721; 722; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 723; FMA4-INFS: # %bb.0: 724; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 725; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 726; FMA4-INFS-NEXT: retq 727; 728; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 729; AVX512-INFS: # %bb.0: 730; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 731; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 732; AVX512-INFS-NEXT: retq 733; 734; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 735; FMA-NOINFS: # %bb.0: 736; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 737; FMA-NOINFS-NEXT: retq 738; 739; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 740; FMA4-NOINFS: # %bb.0: 741; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 742; FMA4-NOINFS-NEXT: retq 743; 744; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 745; AVX512-NOINFS: # %bb.0: 746; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 747; AVX512-NOINFS-NEXT: retq 748 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 749 %m = fmul <4 x float> %y, %a 750 ret <4 x float> %m 751} 752 753define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) { 754; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 755; FMA-INFS: # %bb.0: 756; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 757; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 758; FMA-INFS-NEXT: retq 759; 760; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 761; FMA4-INFS: # %bb.0: 762; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 763; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 764; FMA4-INFS-NEXT: retq 765; 766; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 767; AVX512-INFS: # %bb.0: 768; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 769; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 770; AVX512-INFS-NEXT: retq 771; 772; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 773; FMA-NOINFS: # %bb.0: 774; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 775; FMA-NOINFS-NEXT: retq 776; 777; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 778; FMA4-NOINFS: # %bb.0: 779; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 780; FMA4-NOINFS-NEXT: retq 781; 782; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 783; AVX512-NOINFS: # %bb.0: 784; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 785; AVX512-NOINFS-NEXT: retq 786 %a = fadd <4 x float> %x, <float undef, float -1.0, float undef, float -1.0> 787 %m = fmul <4 x float> %y, %a 788 ret <4 x float> %m 789} 790 791define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 792; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 793; FMA-INFS: # %bb.0: 794; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 795; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 796; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 797; FMA-INFS-NEXT: retq 798; 799; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 800; FMA4-INFS: # %bb.0: 801; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 802; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 803; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 804; FMA4-INFS-NEXT: retq 805; 806; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 807; AVX512-INFS: # %bb.0: 808; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 809; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 810; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 811; AVX512-INFS-NEXT: retq 812; 813; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 814; FMA-NOINFS: # %bb.0: 815; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 816; FMA-NOINFS-NEXT: retq 817; 818; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 819; FMA4-NOINFS: # %bb.0: 820; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 821; FMA4-NOINFS-NEXT: retq 822; 823; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 824; AVX512-NOINFS: # %bb.0: 825; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 826; AVX512-NOINFS-NEXT: retq 827 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 828 %m = fmul <4 x float> %s, %y 829 ret <4 x float> %m 830} 831 832define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 833; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 834; FMA-INFS: # %bb.0: 835; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 836; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 837; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 838; FMA-INFS-NEXT: retq 839; 840; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 841; FMA4-INFS: # %bb.0: 842; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 843; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 844; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 845; FMA4-INFS-NEXT: retq 846; 847; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 848; AVX512-INFS: # %bb.0: 849; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 850; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 851; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 852; AVX512-INFS-NEXT: retq 853; 854; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 855; FMA-NOINFS: # %bb.0: 856; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 857; FMA-NOINFS-NEXT: retq 858; 859; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 860; FMA4-NOINFS: # %bb.0: 861; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 862; FMA4-NOINFS-NEXT: retq 863; 864; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 865; AVX512-NOINFS: # %bb.0: 866; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 867; AVX512-NOINFS-NEXT: retq 868 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 869 %m = fmul <4 x float> %y, %s 870 ret <4 x float> %m 871} 872 873define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) { 874; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 875; FMA-INFS: # %bb.0: 876; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 877; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 878; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 879; FMA-INFS-NEXT: retq 880; 881; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 882; FMA4-INFS: # %bb.0: 883; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 884; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 885; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 886; FMA4-INFS-NEXT: retq 887; 888; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 889; AVX512-INFS: # %bb.0: 890; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 891; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 892; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 893; AVX512-INFS-NEXT: retq 894; 895; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 896; FMA-NOINFS: # %bb.0: 897; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 898; FMA-NOINFS-NEXT: retq 899; 900; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 901; FMA4-NOINFS: # %bb.0: 902; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 903; FMA4-NOINFS-NEXT: retq 904; 905; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 906; AVX512-NOINFS: # %bb.0: 907; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 908; AVX512-NOINFS-NEXT: retq 909 %s = fsub <4 x float> <float 1.0, float undef, float 1.0, float 1.0>, %x 910 %m = fmul <4 x float> %y, %s 911 ret <4 x float> %m 912} 913 914define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 915; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 916; FMA-INFS: # %bb.0: 917; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 918; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 919; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 920; FMA-INFS-NEXT: retq 921; 922; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 923; FMA4-INFS: # %bb.0: 924; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 925; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 926; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 927; FMA4-INFS-NEXT: retq 928; 929; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 930; AVX512-INFS: # %bb.0: 931; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 932; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 933; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 934; AVX512-INFS-NEXT: retq 935; 936; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 937; FMA-NOINFS: # %bb.0: 938; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 939; FMA-NOINFS-NEXT: retq 940; 941; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 942; FMA4-NOINFS: # %bb.0: 943; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 944; FMA4-NOINFS-NEXT: retq 945; 946; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 947; AVX512-NOINFS: # %bb.0: 948; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 949; AVX512-NOINFS-NEXT: retq 950 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 951 %m = fmul <4 x float> %s, %y 952 ret <4 x float> %m 953} 954 955define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 956; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 957; FMA-INFS: # %bb.0: 958; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 959; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 960; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 961; FMA-INFS-NEXT: retq 962; 963; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 964; FMA4-INFS: # %bb.0: 965; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 966; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 967; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 968; FMA4-INFS-NEXT: retq 969; 970; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 971; AVX512-INFS: # %bb.0: 972; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 973; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 974; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 975; AVX512-INFS-NEXT: retq 976; 977; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 978; FMA-NOINFS: # %bb.0: 979; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 980; FMA-NOINFS-NEXT: retq 981; 982; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 983; FMA4-NOINFS: # %bb.0: 984; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 985; FMA4-NOINFS-NEXT: retq 986; 987; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 988; AVX512-NOINFS: # %bb.0: 989; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 990; AVX512-NOINFS-NEXT: retq 991 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 992 %m = fmul <4 x float> %y, %s 993 ret <4 x float> %m 994} 995 996define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) { 997; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 998; FMA-INFS: # %bb.0: 999; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 1000; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1001; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1002; FMA-INFS-NEXT: retq 1003; 1004; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1005; FMA4-INFS: # %bb.0: 1006; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 1007; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1008; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1009; FMA4-INFS-NEXT: retq 1010; 1011; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1012; AVX512-INFS: # %bb.0: 1013; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 1014; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1015; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1016; AVX512-INFS-NEXT: retq 1017; 1018; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1019; FMA-NOINFS: # %bb.0: 1020; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 1021; FMA-NOINFS-NEXT: retq 1022; 1023; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1024; FMA4-NOINFS: # %bb.0: 1025; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 1026; FMA4-NOINFS-NEXT: retq 1027; 1028; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1029; AVX512-NOINFS: # %bb.0: 1030; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 1031; AVX512-NOINFS-NEXT: retq 1032 %s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x 1033 %m = fmul <4 x float> %y, %s 1034 ret <4 x float> %m 1035} 1036 1037define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 1038; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1039; FMA-INFS: # %bb.0: 1040; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1041; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1042; FMA-INFS-NEXT: retq 1043; 1044; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1045; FMA4-INFS: # %bb.0: 1046; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1047; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1048; FMA4-INFS-NEXT: retq 1049; 1050; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1051; AVX512-INFS: # %bb.0: 1052; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1053; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1054; AVX512-INFS-NEXT: retq 1055; 1056; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1057; FMA-NOINFS: # %bb.0: 1058; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1059; FMA-NOINFS-NEXT: retq 1060; 1061; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1062; FMA4-NOINFS: # %bb.0: 1063; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1064; FMA4-NOINFS-NEXT: retq 1065; 1066; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1067; AVX512-NOINFS: # %bb.0: 1068; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1069; AVX512-NOINFS-NEXT: retq 1070 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 1071 %m = fmul <4 x float> %s, %y 1072 ret <4 x float> %m 1073} 1074 1075define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 1076; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1077; FMA-INFS: # %bb.0: 1078; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1079; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1080; FMA-INFS-NEXT: retq 1081; 1082; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1083; FMA4-INFS: # %bb.0: 1084; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1085; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1086; FMA4-INFS-NEXT: retq 1087; 1088; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1089; AVX512-INFS: # %bb.0: 1090; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1091; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1092; AVX512-INFS-NEXT: retq 1093; 1094; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1095; FMA-NOINFS: # %bb.0: 1096; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1097; FMA-NOINFS-NEXT: retq 1098; 1099; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1100; FMA4-NOINFS: # %bb.0: 1101; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1102; FMA4-NOINFS-NEXT: retq 1103; 1104; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1105; AVX512-NOINFS: # %bb.0: 1106; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1107; AVX512-NOINFS-NEXT: retq 1108 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 1109 %m = fmul <4 x float> %y, %s 1110 ret <4 x float> %m 1111} 1112 1113define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) { 1114; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1115; FMA-INFS: # %bb.0: 1116; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1117; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1118; FMA-INFS-NEXT: retq 1119; 1120; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1121; FMA4-INFS: # %bb.0: 1122; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1123; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1124; FMA4-INFS-NEXT: retq 1125; 1126; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1127; AVX512-INFS: # %bb.0: 1128; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1129; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1130; AVX512-INFS-NEXT: retq 1131; 1132; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1133; FMA-NOINFS: # %bb.0: 1134; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1135; FMA-NOINFS-NEXT: retq 1136; 1137; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1138; FMA4-NOINFS: # %bb.0: 1139; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1140; FMA4-NOINFS-NEXT: retq 1141; 1142; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1143; AVX512-NOINFS: # %bb.0: 1144; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1145; AVX512-NOINFS-NEXT: retq 1146 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef> 1147 %m = fmul <4 x float> %y, %s 1148 ret <4 x float> %m 1149} 1150 1151define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 1152; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1153; FMA-INFS: # %bb.0: 1154; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1155; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1156; FMA-INFS-NEXT: retq 1157; 1158; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1159; FMA4-INFS: # %bb.0: 1160; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1161; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1162; FMA4-INFS-NEXT: retq 1163; 1164; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1165; AVX512-INFS: # %bb.0: 1166; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1167; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1168; AVX512-INFS-NEXT: retq 1169; 1170; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1171; FMA-NOINFS: # %bb.0: 1172; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1173; FMA-NOINFS-NEXT: retq 1174; 1175; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1176; FMA4-NOINFS: # %bb.0: 1177; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1178; FMA4-NOINFS-NEXT: retq 1179; 1180; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1181; AVX512-NOINFS: # %bb.0: 1182; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1183; AVX512-NOINFS-NEXT: retq 1184 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1185 %m = fmul <4 x float> %s, %y 1186 ret <4 x float> %m 1187} 1188 1189define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 1190; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1191; FMA-INFS: # %bb.0: 1192; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1193; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1194; FMA-INFS-NEXT: retq 1195; 1196; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1197; FMA4-INFS: # %bb.0: 1198; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1199; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1200; FMA4-INFS-NEXT: retq 1201; 1202; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1203; AVX512-INFS: # %bb.0: 1204; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1205; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1206; AVX512-INFS-NEXT: retq 1207; 1208; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1209; FMA-NOINFS: # %bb.0: 1210; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1211; FMA-NOINFS-NEXT: retq 1212; 1213; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1214; FMA4-NOINFS: # %bb.0: 1215; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1216; FMA4-NOINFS-NEXT: retq 1217; 1218; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1219; AVX512-NOINFS: # %bb.0: 1220; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1221; AVX512-NOINFS-NEXT: retq 1222 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1223 %m = fmul <4 x float> %y, %s 1224 ret <4 x float> %m 1225} 1226 1227define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) { 1228; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1229; FMA-INFS: # %bb.0: 1230; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1231; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1232; FMA-INFS-NEXT: retq 1233; 1234; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1235; FMA4-INFS: # %bb.0: 1236; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1237; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1238; FMA4-INFS-NEXT: retq 1239; 1240; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1241; AVX512-INFS: # %bb.0: 1242; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1243; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1244; AVX512-INFS-NEXT: retq 1245; 1246; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1247; FMA-NOINFS: # %bb.0: 1248; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1249; FMA-NOINFS-NEXT: retq 1250; 1251; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1252; FMA4-NOINFS: # %bb.0: 1253; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1254; FMA4-NOINFS-NEXT: retq 1255; 1256; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1257; AVX512-NOINFS: # %bb.0: 1258; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1259; AVX512-NOINFS-NEXT: retq 1260 %s = fsub <4 x float> %x, <float undef, float -1.0, float -1.0, float -1.0> 1261 %m = fmul <4 x float> %y, %s 1262 ret <4 x float> %m 1263} 1264 1265; 1266; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 1267; 1268 1269define float @test_f32_interp(float %x, float %y, float %t) { 1270; FMA-INFS-LABEL: test_f32_interp: 1271; FMA-INFS: # %bb.0: 1272; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1273; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1274; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1275; FMA-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1276; FMA-INFS-NEXT: retq 1277; 1278; FMA4-INFS-LABEL: test_f32_interp: 1279; FMA4-INFS: # %bb.0: 1280; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1281; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1282; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1283; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1284; FMA4-INFS-NEXT: retq 1285; 1286; AVX512-INFS-LABEL: test_f32_interp: 1287; AVX512-INFS: # %bb.0: 1288; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 1289; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1290; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1291; AVX512-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1292; AVX512-INFS-NEXT: retq 1293; 1294; FMA-NOINFS-LABEL: test_f32_interp: 1295; FMA-NOINFS: # %bb.0: 1296; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1297; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1298; FMA-NOINFS-NEXT: retq 1299; 1300; FMA4-NOINFS-LABEL: test_f32_interp: 1301; FMA4-NOINFS: # %bb.0: 1302; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1303; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1304; FMA4-NOINFS-NEXT: retq 1305; 1306; AVX512-NOINFS-LABEL: test_f32_interp: 1307; AVX512-NOINFS: # %bb.0: 1308; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1309; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1310; AVX512-NOINFS-NEXT: retq 1311 %t1 = fsub nsz float 1.0, %t 1312 %tx = fmul nsz float %x, %t 1313 %ty = fmul nsz float %y, %t1 1314 %r = fadd nsz float %tx, %ty 1315 ret float %r 1316} 1317 1318define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 1319; FMA-INFS-LABEL: test_v4f32_interp: 1320; FMA-INFS: # %bb.0: 1321; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1322; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1323; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1324; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1325; FMA-INFS-NEXT: retq 1326; 1327; FMA4-INFS-LABEL: test_v4f32_interp: 1328; FMA4-INFS: # %bb.0: 1329; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1330; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1331; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1332; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1333; FMA4-INFS-NEXT: retq 1334; 1335; AVX512-INFS-LABEL: test_v4f32_interp: 1336; AVX512-INFS: # %bb.0: 1337; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1338; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1339; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1340; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1341; AVX512-INFS-NEXT: retq 1342; 1343; FMA-NOINFS-LABEL: test_v4f32_interp: 1344; FMA-NOINFS: # %bb.0: 1345; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1346; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1347; FMA-NOINFS-NEXT: retq 1348; 1349; FMA4-NOINFS-LABEL: test_v4f32_interp: 1350; FMA4-NOINFS: # %bb.0: 1351; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1352; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1353; FMA4-NOINFS-NEXT: retq 1354; 1355; AVX512-NOINFS-LABEL: test_v4f32_interp: 1356; AVX512-NOINFS: # %bb.0: 1357; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1358; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1359; AVX512-NOINFS-NEXT: retq 1360 %t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 1361 %tx = fmul nsz <4 x float> %x, %t 1362 %ty = fmul nsz <4 x float> %y, %t1 1363 %r = fadd nsz <4 x float> %tx, %ty 1364 ret <4 x float> %r 1365} 1366 1367define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 1368; FMA-INFS-LABEL: test_v8f32_interp: 1369; FMA-INFS: # %bb.0: 1370; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1371; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1372; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1373; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1374; FMA-INFS-NEXT: retq 1375; 1376; FMA4-INFS-LABEL: test_v8f32_interp: 1377; FMA4-INFS: # %bb.0: 1378; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1379; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1380; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1381; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 1382; FMA4-INFS-NEXT: retq 1383; 1384; AVX512-INFS-LABEL: test_v8f32_interp: 1385; AVX512-INFS: # %bb.0: 1386; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1387; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1388; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1389; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1390; AVX512-INFS-NEXT: retq 1391; 1392; FMA-NOINFS-LABEL: test_v8f32_interp: 1393; FMA-NOINFS: # %bb.0: 1394; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1395; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1396; FMA-NOINFS-NEXT: retq 1397; 1398; FMA4-NOINFS-LABEL: test_v8f32_interp: 1399; FMA4-NOINFS: # %bb.0: 1400; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1401; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 1402; FMA4-NOINFS-NEXT: retq 1403; 1404; AVX512-NOINFS-LABEL: test_v8f32_interp: 1405; AVX512-NOINFS: # %bb.0: 1406; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1407; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1408; AVX512-NOINFS-NEXT: retq 1409 %t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 1410 %tx = fmul nsz <8 x float> %x, %t 1411 %ty = fmul nsz <8 x float> %y, %t1 1412 %r = fadd nsz <8 x float> %tx, %ty 1413 ret <8 x float> %r 1414} 1415 1416define double @test_f64_interp(double %x, double %y, double %t) { 1417; FMA-INFS-LABEL: test_f64_interp: 1418; FMA-INFS: # %bb.0: 1419; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] 1420; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1421; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1422; FMA-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1423; FMA-INFS-NEXT: retq 1424; 1425; FMA4-INFS-LABEL: test_f64_interp: 1426; FMA4-INFS: # %bb.0: 1427; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] 1428; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1429; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1430; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1431; FMA4-INFS-NEXT: retq 1432; 1433; AVX512-INFS-LABEL: test_f64_interp: 1434; AVX512-INFS: # %bb.0: 1435; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0] 1436; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1437; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1438; AVX512-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1439; AVX512-INFS-NEXT: retq 1440; 1441; FMA-NOINFS-LABEL: test_f64_interp: 1442; FMA-NOINFS: # %bb.0: 1443; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1444; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1445; FMA-NOINFS-NEXT: retq 1446; 1447; FMA4-NOINFS-LABEL: test_f64_interp: 1448; FMA4-NOINFS: # %bb.0: 1449; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1450; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1451; FMA4-NOINFS-NEXT: retq 1452; 1453; AVX512-NOINFS-LABEL: test_f64_interp: 1454; AVX512-NOINFS: # %bb.0: 1455; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1456; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1457; AVX512-NOINFS-NEXT: retq 1458 %t1 = fsub nsz double 1.0, %t 1459 %tx = fmul nsz double %x, %t 1460 %ty = fmul nsz double %y, %t1 1461 %r = fadd nsz double %tx, %ty 1462 ret double %r 1463} 1464 1465define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 1466; FMA-INFS-LABEL: test_v2f64_interp: 1467; FMA-INFS: # %bb.0: 1468; FMA-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1469; FMA-INFS-NEXT: # xmm3 = mem[0,0] 1470; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1471; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1472; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1473; FMA-INFS-NEXT: retq 1474; 1475; FMA4-INFS-LABEL: test_v2f64_interp: 1476; FMA4-INFS: # %bb.0: 1477; FMA4-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1478; FMA4-INFS-NEXT: # xmm3 = mem[0,0] 1479; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1480; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1481; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1482; FMA4-INFS-NEXT: retq 1483; 1484; AVX512-INFS-LABEL: test_v2f64_interp: 1485; AVX512-INFS: # %bb.0: 1486; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1487; AVX512-INFS-NEXT: # xmm3 = mem[0,0] 1488; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1489; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1490; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1491; AVX512-INFS-NEXT: retq 1492; 1493; FMA-NOINFS-LABEL: test_v2f64_interp: 1494; FMA-NOINFS: # %bb.0: 1495; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1496; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1497; FMA-NOINFS-NEXT: retq 1498; 1499; FMA4-NOINFS-LABEL: test_v2f64_interp: 1500; FMA4-NOINFS: # %bb.0: 1501; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1502; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1503; FMA4-NOINFS-NEXT: retq 1504; 1505; AVX512-NOINFS-LABEL: test_v2f64_interp: 1506; AVX512-NOINFS: # %bb.0: 1507; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1508; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1509; AVX512-NOINFS-NEXT: retq 1510 %t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t 1511 %tx = fmul nsz <2 x double> %x, %t 1512 %ty = fmul nsz <2 x double> %y, %t1 1513 %r = fadd nsz <2 x double> %tx, %ty 1514 ret <2 x double> %r 1515} 1516 1517define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 1518; FMA-INFS-LABEL: test_v4f64_interp: 1519; FMA-INFS: # %bb.0: 1520; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1521; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1522; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1523; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1524; FMA-INFS-NEXT: retq 1525; 1526; FMA4-INFS-LABEL: test_v4f64_interp: 1527; FMA4-INFS: # %bb.0: 1528; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1529; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1530; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1531; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 1532; FMA4-INFS-NEXT: retq 1533; 1534; AVX512-INFS-LABEL: test_v4f64_interp: 1535; AVX512-INFS: # %bb.0: 1536; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1537; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1538; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1539; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1540; AVX512-INFS-NEXT: retq 1541; 1542; FMA-NOINFS-LABEL: test_v4f64_interp: 1543; FMA-NOINFS: # %bb.0: 1544; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1545; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1546; FMA-NOINFS-NEXT: retq 1547; 1548; FMA4-NOINFS-LABEL: test_v4f64_interp: 1549; FMA4-NOINFS: # %bb.0: 1550; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1551; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 1552; FMA4-NOINFS-NEXT: retq 1553; 1554; AVX512-NOINFS-LABEL: test_v4f64_interp: 1555; AVX512-NOINFS: # %bb.0: 1556; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1557; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1558; AVX512-NOINFS-NEXT: retq 1559 %t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 1560 %tx = fmul nsz <4 x double> %x, %t 1561 %ty = fmul nsz <4 x double> %y, %t1 1562 %r = fadd nsz <4 x double> %tx, %ty 1563 ret <4 x double> %r 1564} 1565 1566; 1567; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 1568; 1569 1570define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1571; FMA-LABEL: test_v4f32_fneg_fmadd: 1572; FMA: # %bb.0: 1573; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1574; FMA-NEXT: retq 1575; 1576; FMA4-LABEL: test_v4f32_fneg_fmadd: 1577; FMA4: # %bb.0: 1578; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1579; FMA4-NEXT: retq 1580; 1581; AVX512-LABEL: test_v4f32_fneg_fmadd: 1582; AVX512: # %bb.0: 1583; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1584; AVX512-NEXT: retq 1585 %mul = fmul nsz <4 x float> %a0, %a1 1586 %add = fadd nsz <4 x float> %mul, %a2 1587 %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1588 ret <4 x float> %neg 1589} 1590 1591define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1592; FMA-LABEL: test_v4f64_fneg_fmsub: 1593; FMA: # %bb.0: 1594; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 1595; FMA-NEXT: retq 1596; 1597; FMA4-LABEL: test_v4f64_fneg_fmsub: 1598; FMA4: # %bb.0: 1599; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 1600; FMA4-NEXT: retq 1601; 1602; AVX512-LABEL: test_v4f64_fneg_fmsub: 1603; AVX512: # %bb.0: 1604; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 1605; AVX512-NEXT: retq 1606 %mul = fmul nsz <4 x double> %a0, %a1 1607 %sub = fsub nsz <4 x double> %mul, %a2 1608 %neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1609 ret <4 x double> %neg 1610} 1611 1612define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1613; FMA-LABEL: test_v4f32_fneg_fnmadd: 1614; FMA: # %bb.0: 1615; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 1616; FMA-NEXT: retq 1617; 1618; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1619; FMA4: # %bb.0: 1620; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 1621; FMA4-NEXT: retq 1622; 1623; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1624; AVX512: # %bb.0: 1625; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 1626; AVX512-NEXT: retq 1627 %mul = fmul nsz <4 x float> %a0, %a1 1628 %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1629 %add = fadd nsz <4 x float> %neg0, %a2 1630 %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1631 ret <4 x float> %neg1 1632} 1633 1634define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1635; FMA-LABEL: test_v4f64_fneg_fnmsub: 1636; FMA: # %bb.0: 1637; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 1638; FMA-NEXT: retq 1639; 1640; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1641; FMA4: # %bb.0: 1642; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 1643; FMA4-NEXT: retq 1644; 1645; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1646; AVX512: # %bb.0: 1647; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 1648; AVX512-NEXT: retq 1649 %mul = fmul nsz <4 x double> %a0, %a1 1650 %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1651 %sub = fsub nsz <4 x double> %neg0, %a2 1652 %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1653 ret <4 x double> %neg1 1654} 1655 1656; 1657; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1658; 1659 1660define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1661; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1662; FMA: # %bb.0: 1663; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1664; FMA-NEXT: retq 1665; 1666; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1667; FMA4: # %bb.0: 1668; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1669; FMA4-NEXT: retq 1670; 1671; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1672; AVX512: # %bb.0: 1673; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1674; AVX512-NEXT: retq 1675 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1676 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1677 %a = fadd <4 x float> %m0, %m1 1678 ret <4 x float> %a 1679} 1680 1681; 1682; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1683; 1684 1685define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1686; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1687; FMA: # %bb.0: 1688; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1689; FMA-NEXT: retq 1690; 1691; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1692; FMA4: # %bb.0: 1693; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1694; FMA4-NEXT: retq 1695; 1696; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1697; AVX512: # %bb.0: 1698; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1699; AVX512-NEXT: retq 1700 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1701 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1702 %a = fadd <4 x float> %m1, %y 1703 ret <4 x float> %a 1704} 1705 1706; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1707 1708define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1709; FMA-LABEL: test_f64_fneg_fmul: 1710; FMA: # %bb.0: 1711; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1712; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1713; FMA-NEXT: retq 1714; 1715; FMA4-LABEL: test_f64_fneg_fmul: 1716; FMA4: # %bb.0: 1717; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1718; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1719; FMA4-NEXT: retq 1720; 1721; AVX512-LABEL: test_f64_fneg_fmul: 1722; AVX512: # %bb.0: 1723; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1724; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1725; AVX512-NEXT: retq 1726 %m = fmul nsz double %x, %y 1727 %n = fsub double -0.0, %m 1728 ret double %n 1729} 1730 1731define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1732; FMA-LABEL: test_v4f32_fneg_fmul: 1733; FMA: # %bb.0: 1734; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1735; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1736; FMA-NEXT: retq 1737; 1738; FMA4-LABEL: test_v4f32_fneg_fmul: 1739; FMA4: # %bb.0: 1740; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1741; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1742; FMA4-NEXT: retq 1743; 1744; AVX512-LABEL: test_v4f32_fneg_fmul: 1745; AVX512: # %bb.0: 1746; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1747; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1748; AVX512-NEXT: retq 1749 %m = fmul nsz <4 x float> %x, %y 1750 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1751 ret <4 x float> %n 1752} 1753 1754define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1755; FMA-LABEL: test_v4f64_fneg_fmul: 1756; FMA: # %bb.0: 1757; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1758; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 1759; FMA-NEXT: retq 1760; 1761; FMA4-LABEL: test_v4f64_fneg_fmul: 1762; FMA4: # %bb.0: 1763; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1764; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 1765; FMA4-NEXT: retq 1766; 1767; AVX512-LABEL: test_v4f64_fneg_fmul: 1768; AVX512: # %bb.0: 1769; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1770; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 1771; AVX512-NEXT: retq 1772 %m = fmul nsz <4 x double> %x, %y 1773 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1774 ret <4 x double> %n 1775} 1776 1777define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1778; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz: 1779; FMA: # %bb.0: 1780; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1781; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1782; FMA-NEXT: retq 1783; 1784; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz: 1785; FMA4: # %bb.0: 1786; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1787; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1788; FMA4-NEXT: retq 1789; 1790; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz: 1791; AVX512: # %bb.0: 1792; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1793; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 1794; AVX512-NEXT: retq 1795 %m = fmul <4 x double> %x, %y 1796 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1797 ret <4 x double> %n 1798} 1799 1800; ((a*b) + (c*d)) + n1 --> (a*b) + ((c*d) + n1) 1801 1802define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, double %n1) nounwind { 1803; FMA-LABEL: fadd_fma_fmul_1: 1804; FMA: # %bb.0: 1805; FMA-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1806; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1807; FMA-NEXT: retq 1808; 1809; FMA4-LABEL: fadd_fma_fmul_1: 1810; FMA4: # %bb.0: 1811; FMA4-NEXT: vfmaddsd {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4 1812; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1813; FMA4-NEXT: retq 1814; 1815; AVX512-LABEL: fadd_fma_fmul_1: 1816; AVX512: # %bb.0: 1817; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1818; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1819; AVX512-NEXT: retq 1820 %m1 = fmul fast double %a, %b 1821 %m2 = fmul fast double %c, %d 1822 %a1 = fadd fast double %m1, %m2 1823 %a2 = fadd fast double %a1, %n1 1824 ret double %a2 1825} 1826 1827; Minimum FMF - the 1st fadd is contracted because that combines 1828; fmul+fadd as specified by the order of operations; the 2nd fadd 1829; requires reassociation to fuse with c*d. 1830 1831define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind { 1832; FMA-LABEL: fadd_fma_fmul_fmf: 1833; FMA: # %bb.0: 1834; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1835; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1836; FMA-NEXT: retq 1837; 1838; FMA4-LABEL: fadd_fma_fmul_fmf: 1839; FMA4: # %bb.0: 1840; FMA4-NEXT: vfmaddss {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4 1841; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1842; FMA4-NEXT: retq 1843; 1844; AVX512-LABEL: fadd_fma_fmul_fmf: 1845; AVX512: # %bb.0: 1846; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1847; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1848; AVX512-NEXT: retq 1849 %m1 = fmul float %a, %b 1850 %m2 = fmul float %c, %d 1851 %a1 = fadd contract float %m1, %m2 1852 %a2 = fadd reassoc float %n0, %a1 1853 ret float %a2 1854} 1855 1856; Not minimum FMF. 1857 1858define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind { 1859; FMA-LABEL: fadd_fma_fmul_2: 1860; FMA: # %bb.0: 1861; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1862; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1863; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 1864; FMA-NEXT: retq 1865; 1866; FMA4-LABEL: fadd_fma_fmul_2: 1867; FMA4: # %bb.0: 1868; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 1869; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1870; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1871; FMA4-NEXT: retq 1872; 1873; AVX512-LABEL: fadd_fma_fmul_2: 1874; AVX512: # %bb.0: 1875; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 1876; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1877; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 1878; AVX512-NEXT: retq 1879 %m1 = fmul float %a, %b 1880 %m2 = fmul float %c, %d 1881 %a1 = fadd contract float %m1, %m2 1882 %a2 = fadd contract float %n0, %a1 1883 ret float %a2 1884} 1885 1886; The final fadd can be folded with either 1 of the leading fmuls. 1887 1888define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x double> %x3, <2 x double> %x4, <2 x double> %x5, <2 x double> %x6, <2 x double> %x7, <2 x double> %x8) nounwind { 1889; FMA-LABEL: fadd_fma_fmul_3: 1890; FMA: # %bb.0: 1891; FMA-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1892; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1893; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2 1894; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2 1895; FMA-NEXT: vmovapd %xmm2, %xmm0 1896; FMA-NEXT: retq 1897; 1898; FMA4-LABEL: fadd_fma_fmul_3: 1899; FMA4: # %bb.0: 1900; FMA4-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1901; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1902; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm6 * xmm7) + xmm0 1903; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm4 * xmm5) + xmm0 1904; FMA4-NEXT: retq 1905; 1906; AVX512-LABEL: fadd_fma_fmul_3: 1907; AVX512: # %bb.0: 1908; AVX512-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1909; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1910; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2 1911; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2 1912; AVX512-NEXT: vmovapd %xmm2, %xmm0 1913; AVX512-NEXT: retq 1914 %m1 = fmul fast <2 x double> %x1, %x2 1915 %m2 = fmul fast <2 x double> %x3, %x4 1916 %m3 = fmul fast <2 x double> %x5, %x6 1917 %m4 = fmul fast <2 x double> %x7, %x8 1918 %a1 = fadd fast <2 x double> %m1, %m2 1919 %a2 = fadd fast <2 x double> %m3, %m4 1920 %a3 = fadd fast <2 x double> %a1, %a2 1921 ret <2 x double> %a3 1922} 1923 1924; negative test 1925 1926define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1927; FMA-LABEL: fadd_fma_fmul_extra_use_1: 1928; FMA: # %bb.0: 1929; FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 1930; FMA-NEXT: vmovss %xmm0, (%rdi) 1931; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0 1932; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 1933; FMA-NEXT: retq 1934; 1935; FMA4-LABEL: fadd_fma_fmul_extra_use_1: 1936; FMA4: # %bb.0: 1937; FMA4-NEXT: vmulss %xmm1, %xmm0, %xmm0 1938; FMA4-NEXT: vmovss %xmm0, (%rdi) 1939; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm2 * xmm3) + xmm0 1940; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1941; FMA4-NEXT: retq 1942; 1943; AVX512-LABEL: fadd_fma_fmul_extra_use_1: 1944; AVX512: # %bb.0: 1945; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 1946; AVX512-NEXT: vmovss %xmm0, (%rdi) 1947; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0 1948; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 1949; AVX512-NEXT: retq 1950 %m1 = fmul fast float %a, %b 1951 store float %m1, ptr %p 1952 %m2 = fmul fast float %c, %d 1953 %a1 = fadd fast float %m1, %m2 1954 %a2 = fadd fast float %n0, %a1 1955 ret float %a2 1956} 1957 1958; negative test 1959 1960define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1961; FMA-LABEL: fadd_fma_fmul_extra_use_2: 1962; FMA: # %bb.0: 1963; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1964; FMA-NEXT: vmovss %xmm2, (%rdi) 1965; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1966; FMA-NEXT: vaddss %xmm0, %xmm4, %xmm0 1967; FMA-NEXT: retq 1968; 1969; FMA4-LABEL: fadd_fma_fmul_extra_use_2: 1970; FMA4: # %bb.0: 1971; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 1972; FMA4-NEXT: vmovss %xmm2, (%rdi) 1973; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1974; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1975; FMA4-NEXT: retq 1976; 1977; AVX512-LABEL: fadd_fma_fmul_extra_use_2: 1978; AVX512: # %bb.0: 1979; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 1980; AVX512-NEXT: vmovss %xmm2, (%rdi) 1981; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1982; AVX512-NEXT: vaddss %xmm0, %xmm4, %xmm0 1983; AVX512-NEXT: retq 1984 %m1 = fmul fast float %a, %b 1985 %m2 = fmul fast float %c, %d 1986 store float %m2, ptr %p 1987 %a1 = fadd fast float %m1, %m2 1988 %a2 = fadd fast float %n0, %a1 1989 ret float %a2 1990} 1991 1992; negative test 1993 1994define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1995; FMA-LABEL: fadd_fma_fmul_extra_use_3: 1996; FMA: # %bb.0: 1997; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1998; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1999; FMA-NEXT: vmovss %xmm2, (%rdi) 2000; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 2001; FMA-NEXT: retq 2002; 2003; FMA4-LABEL: fadd_fma_fmul_extra_use_3: 2004; FMA4: # %bb.0: 2005; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 2006; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 2007; FMA4-NEXT: vmovss %xmm0, (%rdi) 2008; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 2009; FMA4-NEXT: retq 2010; 2011; AVX512-LABEL: fadd_fma_fmul_extra_use_3: 2012; AVX512: # %bb.0: 2013; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 2014; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 2015; AVX512-NEXT: vmovss %xmm2, (%rdi) 2016; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 2017; AVX512-NEXT: retq 2018 %m1 = fmul fast float %a, %b 2019 %m2 = fmul fast float %c, %d 2020 %a1 = fadd fast float %m1, %m2 2021 store float %a1, ptr %p 2022 %a2 = fadd fast float %n0, %a1 2023 ret float %a2 2024} 2025 2026attributes #0 = { "unsafe-fp-math"="true" } 2027