1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS 10 11; 12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13; 14 15define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 16; FMA-LABEL: test_16f32_fmadd: 17; FMA: # %bb.0: 18; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4 19; FMA-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5 20; FMA-NEXT: retq 21; 22; FMA4-LABEL: test_16f32_fmadd: 23; FMA4: # %bb.0: 24; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 25; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 26; FMA4-NEXT: retq 27; 28; AVX512-LABEL: test_16f32_fmadd: 29; AVX512: # %bb.0: 30; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 31; AVX512-NEXT: retq 32 %x = fmul <16 x float> %a0, %a1 33 %res = fadd <16 x float> %x, %a2 34 ret <16 x float> %res 35} 36 37define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 38; FMA-LABEL: test_8f64_fmadd: 39; FMA: # %bb.0: 40; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4 41; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5 42; FMA-NEXT: retq 43; 44; FMA4-LABEL: test_8f64_fmadd: 45; FMA4: # %bb.0: 46; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 47; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 48; FMA4-NEXT: retq 49; 50; AVX512-LABEL: test_8f64_fmadd: 51; AVX512: # %bb.0: 52; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 53; AVX512-NEXT: retq 54 %x = fmul <8 x double> %a0, %a1 55 %res = fadd <8 x double> %x, %a2 56 ret <8 x double> %res 57} 58 59; 60; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 61; 62 63define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 64; FMA-LABEL: test_16f32_fmsub: 65; FMA: # %bb.0: 66; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4 67; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5 68; FMA-NEXT: retq 69; 70; FMA4-LABEL: test_16f32_fmsub: 71; FMA4: # %bb.0: 72; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 73; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 74; FMA4-NEXT: retq 75; 76; AVX512-LABEL: test_16f32_fmsub: 77; AVX512: # %bb.0: 78; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 79; AVX512-NEXT: retq 80 %x = fmul <16 x float> %a0, %a1 81 %res = fsub <16 x float> %x, %a2 82 ret <16 x float> %res 83} 84 85define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 86; FMA-LABEL: test_8f64_fmsub: 87; FMA: # %bb.0: 88; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4 89; FMA-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5 90; FMA-NEXT: retq 91; 92; FMA4-LABEL: test_8f64_fmsub: 93; FMA4: # %bb.0: 94; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 95; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 96; FMA4-NEXT: retq 97; 98; AVX512-LABEL: test_8f64_fmsub: 99; AVX512: # %bb.0: 100; AVX512-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 101; AVX512-NEXT: retq 102 %x = fmul <8 x double> %a0, %a1 103 %res = fsub <8 x double> %x, %a2 104 ret <8 x double> %res 105} 106 107; 108; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 109; 110 111define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 112; FMA-LABEL: test_16f32_fnmadd: 113; FMA: # %bb.0: 114; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4 115; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5 116; FMA-NEXT: retq 117; 118; FMA4-LABEL: test_16f32_fnmadd: 119; FMA4: # %bb.0: 120; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 121; FMA4-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 122; FMA4-NEXT: retq 123; 124; AVX512-LABEL: test_16f32_fnmadd: 125; AVX512: # %bb.0: 126; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 127; AVX512-NEXT: retq 128 %x = fmul <16 x float> %a0, %a1 129 %res = fsub <16 x float> %a2, %x 130 ret <16 x float> %res 131} 132 133define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 134; FMA-LABEL: test_8f64_fnmadd: 135; FMA: # %bb.0: 136; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4 137; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5 138; FMA-NEXT: retq 139; 140; FMA4-LABEL: test_8f64_fnmadd: 141; FMA4: # %bb.0: 142; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 143; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 144; FMA4-NEXT: retq 145; 146; AVX512-LABEL: test_8f64_fnmadd: 147; AVX512: # %bb.0: 148; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 149; AVX512-NEXT: retq 150 %x = fmul <8 x double> %a0, %a1 151 %res = fsub <8 x double> %a2, %x 152 ret <8 x double> %res 153} 154 155; 156; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 157; 158 159define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 160; FMA-LABEL: test_16f32_fnmsub: 161; FMA: # %bb.0: 162; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4 163; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5 164; FMA-NEXT: retq 165; 166; FMA4-LABEL: test_16f32_fnmsub: 167; FMA4: # %bb.0: 168; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 169; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 170; FMA4-NEXT: retq 171; 172; AVX512-LABEL: test_16f32_fnmsub: 173; AVX512: # %bb.0: 174; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 175; AVX512-NEXT: retq 176 %x = fmul <16 x float> %a0, %a1 177 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 178 %res = fsub <16 x float> %y, %a2 179 ret <16 x float> %res 180} 181 182define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 183; FMA-LABEL: test_8f64_fnmsub: 184; FMA: # %bb.0: 185; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4 186; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5 187; FMA-NEXT: retq 188; 189; FMA4-LABEL: test_8f64_fnmsub: 190; FMA4: # %bb.0: 191; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 192; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 193; FMA4-NEXT: retq 194; 195; AVX512-LABEL: test_8f64_fnmsub: 196; AVX512: # %bb.0: 197; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 198; AVX512-NEXT: retq 199 %x = fmul <8 x double> %a0, %a1 200 %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 201 %res = fsub <8 x double> %y, %a2 202 ret <8 x double> %res 203} 204 205; 206; Load Folding Patterns 207; 208 209define <16 x float> @test_16f32_fmadd_load(ptr %a0, <16 x float> %a1, <16 x float> %a2) { 210; FMA-LABEL: test_16f32_fmadd_load: 211; FMA: # %bb.0: 212; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 213; FMA-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 214; FMA-NEXT: retq 215; 216; FMA4-LABEL: test_16f32_fmadd_load: 217; FMA4: # %bb.0: 218; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 219; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 220; FMA4-NEXT: retq 221; 222; AVX512-LABEL: test_16f32_fmadd_load: 223; AVX512: # %bb.0: 224; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 225; AVX512-NEXT: retq 226 %x = load <16 x float>, ptr %a0 227 %y = fmul <16 x float> %x, %a1 228 %res = fadd <16 x float> %y, %a2 229 ret <16 x float> %res 230} 231 232define <8 x double> @test_8f64_fmsub_load(ptr %a0, <8 x double> %a1, <8 x double> %a2) { 233; FMA-LABEL: test_8f64_fmsub_load: 234; FMA: # %bb.0: 235; FMA-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2 236; FMA-NEXT: vfmsub132pd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3 237; FMA-NEXT: retq 238; 239; FMA4-LABEL: test_8f64_fmsub_load: 240; FMA4: # %bb.0: 241; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * mem) - ymm2 242; FMA4-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * mem) - ymm3 243; FMA4-NEXT: retq 244; 245; AVX512-LABEL: test_8f64_fmsub_load: 246; AVX512: # %bb.0: 247; AVX512-NEXT: vfmsub132pd {{.*#+}} zmm0 = (zmm0 * mem) - zmm1 248; AVX512-NEXT: retq 249 %x = load <8 x double>, ptr %a0 250 %y = fmul <8 x double> %x, %a1 251 %res = fsub <8 x double> %y, %a2 252 ret <8 x double> %res 253} 254 255; 256; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 257; 258 259define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) { 260; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y: 261; FMA-INFS: # %bb.0: 262; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 263; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 264; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 265; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 266; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 267; FMA-INFS-NEXT: retq 268; 269; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y: 270; FMA4-INFS: # %bb.0: 271; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 272; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 273; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 274; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 275; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 276; FMA4-INFS-NEXT: retq 277; 278; AVX512-INFS-LABEL: test_v16f32_mul_add_x_one_y: 279; AVX512-INFS: # %bb.0: 280; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 281; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 282; AVX512-INFS-NEXT: retq 283; 284; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 285; FMA-NOINFS: # %bb.0: 286; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 287; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 288; FMA-NOINFS-NEXT: retq 289; 290; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 291; FMA4-NOINFS: # %bb.0: 292; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 293; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 294; FMA4-NOINFS-NEXT: retq 295; 296; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 297; AVX512-NOINFS: # %bb.0: 298; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 299; AVX512-NOINFS-NEXT: retq 300 %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 301 %m = fmul <16 x float> %a, %y 302 ret <16 x float> %m 303} 304 305define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) { 306; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one: 307; FMA-INFS: # %bb.0: 308; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 309; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 310; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 311; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 312; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 313; FMA-INFS-NEXT: retq 314; 315; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one: 316; FMA4-INFS: # %bb.0: 317; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 318; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 319; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 320; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 321; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 322; FMA4-INFS-NEXT: retq 323; 324; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_one: 325; AVX512-INFS: # %bb.0: 326; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 327; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 328; AVX512-INFS-NEXT: retq 329; 330; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 331; FMA-NOINFS: # %bb.0: 332; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 333; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 334; FMA-NOINFS-NEXT: retq 335; 336; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 337; FMA4-NOINFS: # %bb.0: 338; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 339; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 340; FMA4-NOINFS-NEXT: retq 341; 342; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 343; AVX512-NOINFS: # %bb.0: 344; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 345; AVX512-NOINFS-NEXT: retq 346 %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 347 %m = fmul <8 x double> %y, %a 348 ret <8 x double> %m 349} 350 351define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) { 352; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 353; FMA-INFS: # %bb.0: 354; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 355; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 356; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 357; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 358; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 359; FMA-INFS-NEXT: retq 360; 361; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 362; FMA4-INFS: # %bb.0: 363; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 364; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 365; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 366; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 367; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 368; FMA4-INFS-NEXT: retq 369; 370; AVX512-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 371; AVX512-INFS: # %bb.0: 372; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 373; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 374; AVX512-INFS-NEXT: retq 375; 376; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 377; FMA-NOINFS: # %bb.0: 378; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 379; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 380; FMA-NOINFS-NEXT: retq 381; 382; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 383; FMA4-NOINFS: # %bb.0: 384; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 385; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 386; FMA4-NOINFS-NEXT: retq 387; 388; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 389; AVX512-NOINFS: # %bb.0: 390; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 391; AVX512-NOINFS-NEXT: retq 392 %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 393 %m = fmul <16 x float> %a, %y 394 ret <16 x float> %m 395} 396 397define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) { 398; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 399; FMA-INFS: # %bb.0: 400; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 401; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 402; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 403; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 404; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 405; FMA-INFS-NEXT: retq 406; 407; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 408; FMA4-INFS: # %bb.0: 409; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 410; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 411; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 412; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 413; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 414; FMA4-INFS-NEXT: retq 415; 416; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 417; AVX512-INFS: # %bb.0: 418; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 419; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 420; AVX512-INFS-NEXT: retq 421; 422; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 423; FMA-NOINFS: # %bb.0: 424; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 425; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 426; FMA-NOINFS-NEXT: retq 427; 428; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 429; FMA4-NOINFS: # %bb.0: 430; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 431; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 432; FMA4-NOINFS-NEXT: retq 433; 434; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 435; AVX512-NOINFS: # %bb.0: 436; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 437; AVX512-NOINFS-NEXT: retq 438 %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 439 %m = fmul <8 x double> %y, %a 440 ret <8 x double> %m 441} 442 443define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) { 444; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 445; FMA-INFS: # %bb.0: 446; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 447; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 448; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 449; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 450; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 451; FMA-INFS-NEXT: retq 452; 453; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 454; FMA4-INFS: # %bb.0: 455; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 456; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 457; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 458; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 459; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 460; FMA4-INFS-NEXT: retq 461; 462; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 463; AVX512-INFS: # %bb.0: 464; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 465; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 466; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 467; AVX512-INFS-NEXT: retq 468; 469; FMA-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 470; FMA-NOINFS: # %bb.0: 471; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2 472; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3 473; FMA-NOINFS-NEXT: retq 474; 475; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 476; FMA4-NOINFS: # %bb.0: 477; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 478; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 479; FMA4-NOINFS-NEXT: retq 480; 481; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 482; AVX512-NOINFS: # %bb.0: 483; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 484; AVX512-NOINFS-NEXT: retq 485 %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x 486 %m = fmul <16 x float> %s, %y 487 ret <16 x float> %m 488} 489 490define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) { 491; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 492; FMA-INFS: # %bb.0: 493; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 494; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 495; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 496; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 497; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 498; FMA-INFS-NEXT: retq 499; 500; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 501; FMA4-INFS: # %bb.0: 502; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 503; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 504; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 505; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 506; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 507; FMA4-INFS-NEXT: retq 508; 509; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 510; AVX512-INFS: # %bb.0: 511; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 512; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 513; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 514; AVX512-INFS-NEXT: retq 515; 516; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 517; FMA-NOINFS: # %bb.0: 518; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm2 519; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm3 520; FMA-NOINFS-NEXT: retq 521; 522; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 523; FMA4-NOINFS: # %bb.0: 524; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2 525; FMA4-NOINFS-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm3 526; FMA4-NOINFS-NEXT: retq 527; 528; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 529; AVX512-NOINFS: # %bb.0: 530; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm1 531; AVX512-NOINFS-NEXT: retq 532 %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x 533 %m = fmul <8 x double> %y, %s 534 ret <8 x double> %m 535} 536 537define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) { 538; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 539; FMA-INFS: # %bb.0: 540; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 541; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 542; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 543; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 544; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 545; FMA-INFS-NEXT: retq 546; 547; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 548; FMA4-INFS: # %bb.0: 549; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 550; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 551; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 552; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 553; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 554; FMA4-INFS-NEXT: retq 555; 556; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 557; AVX512-INFS: # %bb.0: 558; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 559; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 560; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 561; AVX512-INFS-NEXT: retq 562; 563; FMA-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 564; FMA-NOINFS: # %bb.0: 565; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2 566; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3 567; FMA-NOINFS-NEXT: retq 568; 569; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 570; FMA4-NOINFS: # %bb.0: 571; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 572; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 573; FMA4-NOINFS-NEXT: retq 574; 575; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 576; AVX512-NOINFS: # %bb.0: 577; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 578; AVX512-NOINFS-NEXT: retq 579 %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x 580 %m = fmul <16 x float> %s, %y 581 ret <16 x float> %m 582} 583 584define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) { 585; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 586; FMA-INFS: # %bb.0: 587; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 588; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 589; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 590; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 591; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 592; FMA-INFS-NEXT: retq 593; 594; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 595; FMA4-INFS: # %bb.0: 596; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 597; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 598; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 599; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 600; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 601; FMA4-INFS-NEXT: retq 602; 603; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 604; AVX512-INFS: # %bb.0: 605; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 606; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 607; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 608; AVX512-INFS-NEXT: retq 609; 610; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 611; FMA-NOINFS: # %bb.0: 612; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm2 613; FMA-NOINFS-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm3 614; FMA-NOINFS-NEXT: retq 615; 616; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 617; FMA4-NOINFS: # %bb.0: 618; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm2 619; FMA4-NOINFS-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm3 620; FMA4-NOINFS-NEXT: retq 621; 622; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 623; AVX512-NOINFS: # %bb.0: 624; AVX512-NOINFS-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm1 625; AVX512-NOINFS-NEXT: retq 626 %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x 627 %m = fmul <8 x double> %y, %s 628 ret <8 x double> %m 629} 630 631define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { 632; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 633; FMA-INFS: # %bb.0: 634; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 635; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 636; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 637; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 638; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 639; FMA-INFS-NEXT: retq 640; 641; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 642; FMA4-INFS: # %bb.0: 643; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 644; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 645; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 646; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 647; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 648; FMA4-INFS-NEXT: retq 649; 650; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 651; AVX512-INFS: # %bb.0: 652; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 653; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 654; AVX512-INFS-NEXT: retq 655; 656; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 657; FMA-NOINFS: # %bb.0: 658; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 659; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 660; FMA-NOINFS-NEXT: retq 661; 662; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 663; FMA4-NOINFS: # %bb.0: 664; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 665; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 666; FMA4-NOINFS-NEXT: retq 667; 668; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 669; AVX512-NOINFS: # %bb.0: 670; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 671; AVX512-NOINFS-NEXT: retq 672 %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 673 %m = fmul <16 x float> %s, %y 674 ret <16 x float> %m 675} 676 677define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { 678; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 679; FMA-INFS: # %bb.0: 680; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 681; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 682; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 683; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 684; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 685; FMA-INFS-NEXT: retq 686; 687; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 688; FMA4-INFS: # %bb.0: 689; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 690; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 691; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 692; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 693; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 694; FMA4-INFS-NEXT: retq 695; 696; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 697; AVX512-INFS: # %bb.0: 698; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 699; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 700; AVX512-INFS-NEXT: retq 701; 702; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 703; FMA-NOINFS: # %bb.0: 704; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm2 705; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm3 706; FMA-NOINFS-NEXT: retq 707; 708; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 709; FMA4-NOINFS: # %bb.0: 710; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm2 711; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm3 712; FMA4-NOINFS-NEXT: retq 713; 714; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 715; AVX512-NOINFS: # %bb.0: 716; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm1 717; AVX512-NOINFS-NEXT: retq 718 %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 719 %m = fmul <8 x double> %y, %s 720 ret <8 x double> %m 721} 722 723define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { 724; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 725; FMA-INFS: # %bb.0: 726; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 727; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 728; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 729; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 730; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 731; FMA-INFS-NEXT: retq 732; 733; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 734; FMA4-INFS: # %bb.0: 735; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 736; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 737; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 738; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 739; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 740; FMA4-INFS-NEXT: retq 741; 742; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 743; AVX512-INFS: # %bb.0: 744; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 745; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 746; AVX512-INFS-NEXT: retq 747; 748; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 749; FMA-NOINFS: # %bb.0: 750; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 751; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 752; FMA-NOINFS-NEXT: retq 753; 754; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 755; FMA4-NOINFS: # %bb.0: 756; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 757; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 758; FMA4-NOINFS-NEXT: retq 759; 760; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 761; AVX512-NOINFS: # %bb.0: 762; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 763; AVX512-NOINFS-NEXT: retq 764 %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 765 %m = fmul <16 x float> %s, %y 766 ret <16 x float> %m 767} 768 769define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { 770; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 771; FMA-INFS: # %bb.0: 772; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 773; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 774; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 775; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 776; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 777; FMA-INFS-NEXT: retq 778; 779; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 780; FMA4-INFS: # %bb.0: 781; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 782; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 783; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 784; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 785; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 786; FMA4-INFS-NEXT: retq 787; 788; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 789; AVX512-INFS: # %bb.0: 790; AVX512-INFS-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 791; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 792; AVX512-INFS-NEXT: retq 793; 794; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 795; FMA-NOINFS: # %bb.0: 796; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm2 797; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm3 798; FMA-NOINFS-NEXT: retq 799; 800; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 801; FMA4-NOINFS: # %bb.0: 802; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 803; FMA4-NOINFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm3 804; FMA4-NOINFS-NEXT: retq 805; 806; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 807; AVX512-NOINFS: # %bb.0: 808; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm1 809; AVX512-NOINFS-NEXT: retq 810 %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 811 %m = fmul <8 x double> %y, %s 812 ret <8 x double> %m 813} 814 815; 816; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 817; 818 819define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) { 820; FMA-INFS-LABEL: test_v16f32_interp: 821; FMA-INFS: # %bb.0: 822; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 823; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 824; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 825; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 826; FMA-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 827; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 828; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 829; FMA-INFS-NEXT: retq 830; 831; FMA4-INFS-LABEL: test_v16f32_interp: 832; FMA4-INFS: # %bb.0: 833; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 834; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 835; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 836; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 837; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 838; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 839; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 840; FMA4-INFS-NEXT: retq 841; 842; AVX512-INFS-LABEL: test_v16f32_interp: 843; AVX512-INFS: # %bb.0: 844; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 845; AVX512-INFS-NEXT: vsubps %zmm2, %zmm3, %zmm3 846; AVX512-INFS-NEXT: vmulps %zmm3, %zmm1, %zmm1 847; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 848; AVX512-INFS-NEXT: retq 849; 850; FMA-NOINFS-LABEL: test_v16f32_interp: 851; FMA-NOINFS: # %bb.0: 852; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 853; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 854; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 855; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 856; FMA-NOINFS-NEXT: retq 857; 858; FMA4-NOINFS-LABEL: test_v16f32_interp: 859; FMA4-NOINFS: # %bb.0: 860; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 861; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 862; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 863; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 864; FMA4-NOINFS-NEXT: retq 865; 866; AVX512-NOINFS-LABEL: test_v16f32_interp: 867; AVX512-NOINFS: # %bb.0: 868; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 869; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 870; AVX512-NOINFS-NEXT: retq 871 %t1 = fsub nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 872 %tx = fmul nsz <16 x float> %x, %t 873 %ty = fmul nsz <16 x float> %y, %t1 874 %r = fadd nsz <16 x float> %tx, %ty 875 ret <16 x float> %r 876} 877 878define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) { 879; FMA-INFS-LABEL: test_v8f64_interp: 880; FMA-INFS: # %bb.0: 881; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 882; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 883; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 884; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 885; FMA-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 886; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm2 887; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm5 * ymm1) + ymm3 888; FMA-INFS-NEXT: retq 889; 890; FMA4-INFS-LABEL: test_v8f64_interp: 891; FMA4-INFS: # %bb.0: 892; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 893; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 894; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 895; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 896; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 897; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm2 898; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm5) + ymm3 899; FMA4-INFS-NEXT: retq 900; 901; AVX512-INFS-LABEL: test_v8f64_interp: 902; AVX512-INFS: # %bb.0: 903; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 904; AVX512-INFS-NEXT: vsubpd %zmm2, %zmm3, %zmm3 905; AVX512-INFS-NEXT: vmulpd %zmm3, %zmm1, %zmm1 906; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm2 * zmm0) + zmm1 907; AVX512-INFS-NEXT: retq 908; 909; FMA-NOINFS-LABEL: test_v8f64_interp: 910; FMA-NOINFS: # %bb.0: 911; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 912; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 913; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm2 914; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm5 * ymm1) - ymm3 915; FMA-NOINFS-NEXT: retq 916; 917; FMA4-NOINFS-LABEL: test_v8f64_interp: 918; FMA4-NOINFS: # %bb.0: 919; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm3 = (ymm5 * ymm3) - ymm3 920; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm2 = (ymm4 * ymm2) - ymm2 921; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm4) - ymm2 922; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm1 * ymm5) - ymm3 923; FMA4-NOINFS-NEXT: retq 924; 925; AVX512-NOINFS-LABEL: test_v8f64_interp: 926; AVX512-NOINFS: # %bb.0: 927; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 928; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 929; AVX512-NOINFS-NEXT: retq 930 %t1 = fsub nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t 931 %tx = fmul nsz <8 x double> %x, %t 932 %ty = fmul nsz <8 x double> %y, %t1 933 %r = fadd nsz <8 x double> %tx, %ty 934 ret <8 x double> %r 935} 936 937; 938; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 939; 940 941define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 942; FMA-LABEL: test_v16f32_fneg_fmadd: 943; FMA: # %bb.0: 944; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4 945; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm5 946; FMA-NEXT: retq 947; 948; FMA4-LABEL: test_v16f32_fneg_fmadd: 949; FMA4: # %bb.0: 950; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 951; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm5 952; FMA4-NEXT: retq 953; 954; AVX512-LABEL: test_v16f32_fneg_fmadd: 955; AVX512: # %bb.0: 956; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 957; AVX512-NEXT: retq 958 %mul = fmul nsz <16 x float> %a0, %a1 959 %add = fadd nsz <16 x float> %mul, %a2 960 %neg = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 961 ret <16 x float> %neg 962} 963 964define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 965; FMA-LABEL: test_v8f64_fneg_fmsub: 966; FMA: # %bb.0: 967; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4 968; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm5 969; FMA-NEXT: retq 970; 971; FMA4-LABEL: test_v8f64_fneg_fmsub: 972; FMA4: # %bb.0: 973; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm4 974; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm1 = -(ymm1 * ymm3) + ymm5 975; FMA4-NEXT: retq 976; 977; AVX512-LABEL: test_v8f64_fneg_fmsub: 978; AVX512: # %bb.0: 979; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 980; AVX512-NEXT: retq 981 %mul = fmul nsz <8 x double> %a0, %a1 982 %sub = fsub nsz <8 x double> %mul, %a2 983 %neg = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 984 ret <8 x double> %neg 985} 986 987define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 988; FMA-LABEL: test_v16f32_fneg_fnmadd: 989; FMA: # %bb.0: 990; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm4 991; FMA-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) - ymm5 992; FMA-NEXT: retq 993; 994; FMA4-LABEL: test_v16f32_fneg_fnmadd: 995; FMA4: # %bb.0: 996; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm4 997; FMA4-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm1 * ymm3) - ymm5 998; FMA4-NEXT: retq 999; 1000; AVX512-LABEL: test_v16f32_fneg_fnmadd: 1001; AVX512: # %bb.0: 1002; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 1003; AVX512-NEXT: retq 1004 %mul = fmul nsz <16 x float> %a0, %a1 1005 %neg0 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1006 %add = fadd nsz <16 x float> %neg0, %a2 1007 %neg1 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 1008 ret <16 x float> %neg1 1009} 1010 1011define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 1012; FMA-LABEL: test_v8f64_fneg_fnmsub: 1013; FMA: # %bb.0: 1014; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm4 1015; FMA-NEXT: vfmadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) + ymm5 1016; FMA-NEXT: retq 1017; 1018; FMA4-LABEL: test_v8f64_fneg_fnmsub: 1019; FMA4: # %bb.0: 1020; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm4 1021; FMA4-NEXT: vfmaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) + ymm5 1022; FMA4-NEXT: retq 1023; 1024; AVX512-LABEL: test_v8f64_fneg_fnmsub: 1025; AVX512: # %bb.0: 1026; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 1027; AVX512-NEXT: retq 1028 %mul = fmul nsz <8 x double> %a0, %a1 1029 %neg0 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1030 %sub = fsub nsz <8 x double> %neg0, %a2 1031 %neg1 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1032 ret <8 x double> %neg1 1033} 1034 1035; 1036; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1037; 1038 1039define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 { 1040; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1041; FMA: # %bb.0: 1042; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1043; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1044; FMA-NEXT: retq 1045; 1046; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1047; FMA4: # %bb.0: 1048; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1049; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 1050; FMA4-NEXT: retq 1051; 1052; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1053; AVX512: # %bb.0: 1054; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 1055; AVX512-NEXT: retq 1056 %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0> 1057 %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 1058 %a = fadd <16 x float> %m0, %m1 1059 ret <16 x float> %a 1060} 1061 1062; 1063; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1064; 1065 1066define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 { 1067; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1068; FMA: # %bb.0: 1069; FMA-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 1070; FMA-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 1071; FMA-NEXT: retq 1072; 1073; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1074; FMA4: # %bb.0: 1075; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * mem) + ymm2 1076; FMA4-NEXT: vfmaddps {{.*#+}} ymm1 = (ymm1 * mem) + ymm3 1077; FMA4-NEXT: retq 1078; 1079; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1080; AVX512: # %bb.0: 1081; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1 1082; AVX512-NEXT: retq 1083 %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0> 1084 %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 1085 %a = fadd <16 x float> %m1, %y 1086 ret <16 x float> %a 1087} 1088 1089; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1090 1091define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 { 1092; FMA-LABEL: test_v16f32_fneg_fmul: 1093; FMA: # %bb.0: 1094; FMA-NEXT: vxorps %xmm4, %xmm4, %xmm4 1095; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4 1096; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm4 1097; FMA-NEXT: retq 1098; 1099; FMA4-LABEL: test_v16f32_fneg_fmul: 1100; FMA4: # %bb.0: 1101; FMA4-NEXT: vxorps %xmm4, %xmm4, %xmm4 1102; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 1103; FMA4-NEXT: vfnmsubps {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4 1104; FMA4-NEXT: retq 1105; 1106; AVX512-LABEL: test_v16f32_fneg_fmul: 1107; AVX512: # %bb.0: 1108; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1109; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 1110; AVX512-NEXT: retq 1111 %m = fmul nsz <16 x float> %x, %y 1112 %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m 1113 ret <16 x float> %n 1114} 1115 1116define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { 1117; FMA-LABEL: test_v8f64_fneg_fmul: 1118; FMA: # %bb.0: 1119; FMA-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1120; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm2 * ymm0) - ymm4 1121; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm1 = -(ymm3 * ymm1) - ymm4 1122; FMA-NEXT: retq 1123; 1124; FMA4-LABEL: test_v8f64_fneg_fmul: 1125; FMA4: # %bb.0: 1126; FMA4-NEXT: vxorpd %xmm4, %xmm4, %xmm4 1127; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm4 1128; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm1 = -(ymm1 * ymm3) - ymm4 1129; FMA4-NEXT: retq 1130; 1131; AVX512-LABEL: test_v8f64_fneg_fmul: 1132; AVX512: # %bb.0: 1133; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1134; AVX512-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 1135; AVX512-NEXT: retq 1136 %m = fmul nsz <8 x double> %x, %y 1137 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 1138 ret <8 x double> %n 1139} 1140 1141define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 { 1142; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz: 1143; FMA: # %bb.0: 1144; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1145; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0 1146; FMA-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 1147; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0 1148; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1 1149; FMA-NEXT: retq 1150; 1151; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz: 1152; FMA4: # %bb.0: 1153; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1154; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0 1155; FMA4-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 1156; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0 1157; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1 1158; FMA4-NEXT: retq 1159; 1160; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz: 1161; AVX512: # %bb.0: 1162; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 1163; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1164; AVX512-NEXT: retq 1165 %m = fmul <8 x double> %x, %y 1166 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 1167 ret <8 x double> %n 1168} 1169 1170attributes #0 = { "unsafe-fp-math"="true" } 1171