1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s 3; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX 5; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE 6 7declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 8declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) 9declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) 10declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) 11 12declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) 13declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) 14declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata) 15declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) 16 17declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) 18declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) 19declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata) 20declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) 21 22declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) 23declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) 24declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata) 25declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) 26 27declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) 28declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) 29declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) 30declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) 31 32declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) 33declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) 34declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata) 35declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) 36 37define float @fadd_f32(float %f1, float %f2) #0 { 38; CHECK-LABEL: fadd_f32: 39; CHECK: # %bb.0: 40; CHECK-NEXT: xsaddsp f1, f1, f2 41; CHECK-NEXT: blr 42; 43; NOVSX-LABEL: fadd_f32: 44; NOVSX: # %bb.0: 45; NOVSX-NEXT: fadds f1, f1, f2 46; NOVSX-NEXT: blr 47; 48; SPE-LABEL: fadd_f32: 49; SPE: # %bb.0: 50; SPE-NEXT: efsadd r3, r3, r4 51; SPE-NEXT: blr 52 %res = call float @llvm.experimental.constrained.fadd.f32( 53 float %f1, float %f2, 54 metadata !"round.dynamic", 55 metadata !"fpexcept.strict") #0 56 ret float %res 57} 58 59define double @fadd_f64(double %f1, double %f2) #0 { 60; CHECK-LABEL: fadd_f64: 61; CHECK: # %bb.0: 62; CHECK-NEXT: xsadddp f1, f1, f2 63; CHECK-NEXT: blr 64; 65; NOVSX-LABEL: fadd_f64: 66; NOVSX: # %bb.0: 67; NOVSX-NEXT: fadd f1, f1, f2 68; NOVSX-NEXT: blr 69; 70; SPE-LABEL: fadd_f64: 71; SPE: # %bb.0: 72; SPE-NEXT: evmergelo r5, r5, r6 73; SPE-NEXT: evmergelo r3, r3, r4 74; SPE-NEXT: efdadd r4, r3, r5 75; SPE-NEXT: evmergehi r3, r4, r4 76; SPE-NEXT: blr 77 %res = call double @llvm.experimental.constrained.fadd.f64( 78 double %f1, double %f2, 79 metadata !"round.dynamic", 80 metadata !"fpexcept.strict") #0 81 ret double %res 82} 83 84define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 { 85; CHECK-LABEL: fadd_v4f32: 86; CHECK: # %bb.0: 87; CHECK-NEXT: xvaddsp v2, v2, v3 88; CHECK-NEXT: blr 89; 90; NOVSX-LABEL: fadd_v4f32: 91; NOVSX: # %bb.0: 92; NOVSX-NEXT: addi r3, r1, -32 93; NOVSX-NEXT: stvx v3, 0, r3 94; NOVSX-NEXT: addi r3, r1, -48 95; NOVSX-NEXT: stvx v2, 0, r3 96; NOVSX-NEXT: addi r3, r1, -16 97; NOVSX-NEXT: lfs f0, -20(r1) 98; NOVSX-NEXT: lfs f1, -36(r1) 99; NOVSX-NEXT: fadds f0, f1, f0 100; NOVSX-NEXT: lfs f1, -40(r1) 101; NOVSX-NEXT: stfs f0, -4(r1) 102; NOVSX-NEXT: lfs f0, -24(r1) 103; NOVSX-NEXT: fadds f0, f1, f0 104; NOVSX-NEXT: lfs f1, -44(r1) 105; NOVSX-NEXT: stfs f0, -8(r1) 106; NOVSX-NEXT: lfs f0, -28(r1) 107; NOVSX-NEXT: fadds f0, f1, f0 108; NOVSX-NEXT: lfs f1, -48(r1) 109; NOVSX-NEXT: stfs f0, -12(r1) 110; NOVSX-NEXT: lfs f0, -32(r1) 111; NOVSX-NEXT: fadds f0, f1, f0 112; NOVSX-NEXT: stfs f0, -16(r1) 113; NOVSX-NEXT: lvx v2, 0, r3 114; NOVSX-NEXT: blr 115; 116; SPE-LABEL: fadd_v4f32: 117; SPE: # %bb.0: 118; SPE-NEXT: efsadd r6, r6, r10 119; SPE-NEXT: efsadd r5, r5, r9 120; SPE-NEXT: efsadd r4, r4, r8 121; SPE-NEXT: efsadd r3, r3, r7 122; SPE-NEXT: blr 123 %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32( 124 <4 x float> %vf1, <4 x float> %vf2, 125 metadata !"round.dynamic", 126 metadata !"fpexcept.strict") #0 127 ret <4 x float> %res 128} 129 130define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 { 131; CHECK-LABEL: fadd_v2f64: 132; CHECK: # %bb.0: 133; CHECK-NEXT: xvadddp v2, v2, v3 134; CHECK-NEXT: blr 135; 136; NOVSX-LABEL: fadd_v2f64: 137; NOVSX: # %bb.0: 138; NOVSX-NEXT: fadd f2, f2, f4 139; NOVSX-NEXT: fadd f1, f1, f3 140; NOVSX-NEXT: blr 141; 142; SPE-LABEL: fadd_v2f64: 143; SPE: # %bb.0: 144; SPE-NEXT: evldd r4, 8(r1) 145; SPE-NEXT: evmergelo r7, r7, r8 146; SPE-NEXT: evmergelo r8, r9, r10 147; SPE-NEXT: li r9, 8 148; SPE-NEXT: evmergelo r5, r5, r6 149; SPE-NEXT: efdadd r4, r7, r4 150; SPE-NEXT: evstddx r4, r3, r9 151; SPE-NEXT: efdadd r4, r5, r8 152; SPE-NEXT: evstdd r4, 0(r3) 153; SPE-NEXT: blr 154 %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( 155 <2 x double> %vf1, <2 x double> %vf2, 156 metadata !"round.dynamic", 157 metadata !"fpexcept.strict") #0 158 ret <2 x double> %res 159} 160 161define float @fsub_f32(float %f1, float %f2) #0 { 162; CHECK-LABEL: fsub_f32: 163; CHECK: # %bb.0: 164; CHECK-NEXT: xssubsp f1, f1, f2 165; CHECK-NEXT: blr 166; 167; NOVSX-LABEL: fsub_f32: 168; NOVSX: # %bb.0: 169; NOVSX-NEXT: fsubs f1, f1, f2 170; NOVSX-NEXT: blr 171; 172; SPE-LABEL: fsub_f32: 173; SPE: # %bb.0: 174; SPE-NEXT: efssub r3, r3, r4 175; SPE-NEXT: blr 176 177 %res = call float @llvm.experimental.constrained.fsub.f32( 178 float %f1, float %f2, 179 metadata !"round.dynamic", 180 metadata !"fpexcept.strict") #0 181 ret float %res; 182} 183 184define double @fsub_f64(double %f1, double %f2) #0 { 185; CHECK-LABEL: fsub_f64: 186; CHECK: # %bb.0: 187; CHECK-NEXT: xssubdp f1, f1, f2 188; CHECK-NEXT: blr 189; 190; NOVSX-LABEL: fsub_f64: 191; NOVSX: # %bb.0: 192; NOVSX-NEXT: fsub f1, f1, f2 193; NOVSX-NEXT: blr 194; 195; SPE-LABEL: fsub_f64: 196; SPE: # %bb.0: 197; SPE-NEXT: evmergelo r5, r5, r6 198; SPE-NEXT: evmergelo r3, r3, r4 199; SPE-NEXT: efdsub r4, r3, r5 200; SPE-NEXT: evmergehi r3, r4, r4 201; SPE-NEXT: blr 202 203 %res = call double @llvm.experimental.constrained.fsub.f64( 204 double %f1, double %f2, 205 metadata !"round.dynamic", 206 metadata !"fpexcept.strict") #0 207 ret double %res; 208} 209 210define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 { 211; CHECK-LABEL: fsub_v4f32: 212; CHECK: # %bb.0: 213; CHECK-NEXT: xvsubsp v2, v2, v3 214; CHECK-NEXT: blr 215; 216; NOVSX-LABEL: fsub_v4f32: 217; NOVSX: # %bb.0: 218; NOVSX-NEXT: addi r3, r1, -32 219; NOVSX-NEXT: stvx v3, 0, r3 220; NOVSX-NEXT: addi r3, r1, -48 221; NOVSX-NEXT: stvx v2, 0, r3 222; NOVSX-NEXT: addi r3, r1, -16 223; NOVSX-NEXT: lfs f0, -20(r1) 224; NOVSX-NEXT: lfs f1, -36(r1) 225; NOVSX-NEXT: fsubs f0, f1, f0 226; NOVSX-NEXT: lfs f1, -40(r1) 227; NOVSX-NEXT: stfs f0, -4(r1) 228; NOVSX-NEXT: lfs f0, -24(r1) 229; NOVSX-NEXT: fsubs f0, f1, f0 230; NOVSX-NEXT: lfs f1, -44(r1) 231; NOVSX-NEXT: stfs f0, -8(r1) 232; NOVSX-NEXT: lfs f0, -28(r1) 233; NOVSX-NEXT: fsubs f0, f1, f0 234; NOVSX-NEXT: lfs f1, -48(r1) 235; NOVSX-NEXT: stfs f0, -12(r1) 236; NOVSX-NEXT: lfs f0, -32(r1) 237; NOVSX-NEXT: fsubs f0, f1, f0 238; NOVSX-NEXT: stfs f0, -16(r1) 239; NOVSX-NEXT: lvx v2, 0, r3 240; NOVSX-NEXT: blr 241; 242; SPE-LABEL: fsub_v4f32: 243; SPE: # %bb.0: 244; SPE-NEXT: efssub r6, r6, r10 245; SPE-NEXT: efssub r5, r5, r9 246; SPE-NEXT: efssub r4, r4, r8 247; SPE-NEXT: efssub r3, r3, r7 248; SPE-NEXT: blr 249 %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32( 250 <4 x float> %vf1, <4 x float> %vf2, 251 metadata !"round.dynamic", 252 metadata !"fpexcept.strict") #0 253 ret <4 x float> %res; 254} 255 256define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 { 257; CHECK-LABEL: fsub_v2f64: 258; CHECK: # %bb.0: 259; CHECK-NEXT: xvsubdp v2, v2, v3 260; CHECK-NEXT: blr 261; 262; NOVSX-LABEL: fsub_v2f64: 263; NOVSX: # %bb.0: 264; NOVSX-NEXT: fsub f2, f2, f4 265; NOVSX-NEXT: fsub f1, f1, f3 266; NOVSX-NEXT: blr 267; 268; SPE-LABEL: fsub_v2f64: 269; SPE: # %bb.0: 270; SPE-NEXT: evldd r4, 8(r1) 271; SPE-NEXT: evmergelo r7, r7, r8 272; SPE-NEXT: evmergelo r8, r9, r10 273; SPE-NEXT: li r9, 8 274; SPE-NEXT: evmergelo r5, r5, r6 275; SPE-NEXT: efdsub r4, r7, r4 276; SPE-NEXT: evstddx r4, r3, r9 277; SPE-NEXT: efdsub r4, r5, r8 278; SPE-NEXT: evstdd r4, 0(r3) 279; SPE-NEXT: blr 280 %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( 281 <2 x double> %vf1, <2 x double> %vf2, 282 metadata !"round.dynamic", 283 metadata !"fpexcept.strict") #0 284 ret <2 x double> %res; 285} 286 287define float @fmul_f32(float %f1, float %f2) #0 { 288; CHECK-LABEL: fmul_f32: 289; CHECK: # %bb.0: 290; CHECK-NEXT: xsmulsp f1, f1, f2 291; CHECK-NEXT: blr 292; 293; NOVSX-LABEL: fmul_f32: 294; NOVSX: # %bb.0: 295; NOVSX-NEXT: fmuls f1, f1, f2 296; NOVSX-NEXT: blr 297; 298; SPE-LABEL: fmul_f32: 299; SPE: # %bb.0: 300; SPE-NEXT: efsmul r3, r3, r4 301; SPE-NEXT: blr 302 303 %res = call float @llvm.experimental.constrained.fmul.f32( 304 float %f1, float %f2, 305 metadata !"round.dynamic", 306 metadata !"fpexcept.strict") #0 307 ret float %res; 308} 309 310define double @fmul_f64(double %f1, double %f2) #0 { 311; CHECK-LABEL: fmul_f64: 312; CHECK: # %bb.0: 313; CHECK-NEXT: xsmuldp f1, f1, f2 314; CHECK-NEXT: blr 315; 316; NOVSX-LABEL: fmul_f64: 317; NOVSX: # %bb.0: 318; NOVSX-NEXT: fmul f1, f1, f2 319; NOVSX-NEXT: blr 320; 321; SPE-LABEL: fmul_f64: 322; SPE: # %bb.0: 323; SPE-NEXT: evmergelo r5, r5, r6 324; SPE-NEXT: evmergelo r3, r3, r4 325; SPE-NEXT: efdmul r4, r3, r5 326; SPE-NEXT: evmergehi r3, r4, r4 327; SPE-NEXT: blr 328 329 %res = call double @llvm.experimental.constrained.fmul.f64( 330 double %f1, double %f2, 331 metadata !"round.dynamic", 332 metadata !"fpexcept.strict") #0 333 ret double %res; 334} 335 336define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 { 337; CHECK-LABEL: fmul_v4f32: 338; CHECK: # %bb.0: 339; CHECK-NEXT: xvmulsp v2, v2, v3 340; CHECK-NEXT: blr 341; 342; NOVSX-LABEL: fmul_v4f32: 343; NOVSX: # %bb.0: 344; NOVSX-NEXT: addi r3, r1, -32 345; NOVSX-NEXT: stvx v3, 0, r3 346; NOVSX-NEXT: addi r3, r1, -48 347; NOVSX-NEXT: stvx v2, 0, r3 348; NOVSX-NEXT: addi r3, r1, -16 349; NOVSX-NEXT: lfs f0, -20(r1) 350; NOVSX-NEXT: lfs f1, -36(r1) 351; NOVSX-NEXT: fmuls f0, f1, f0 352; NOVSX-NEXT: lfs f1, -40(r1) 353; NOVSX-NEXT: stfs f0, -4(r1) 354; NOVSX-NEXT: lfs f0, -24(r1) 355; NOVSX-NEXT: fmuls f0, f1, f0 356; NOVSX-NEXT: lfs f1, -44(r1) 357; NOVSX-NEXT: stfs f0, -8(r1) 358; NOVSX-NEXT: lfs f0, -28(r1) 359; NOVSX-NEXT: fmuls f0, f1, f0 360; NOVSX-NEXT: lfs f1, -48(r1) 361; NOVSX-NEXT: stfs f0, -12(r1) 362; NOVSX-NEXT: lfs f0, -32(r1) 363; NOVSX-NEXT: fmuls f0, f1, f0 364; NOVSX-NEXT: stfs f0, -16(r1) 365; NOVSX-NEXT: lvx v2, 0, r3 366; NOVSX-NEXT: blr 367; 368; SPE-LABEL: fmul_v4f32: 369; SPE: # %bb.0: 370; SPE-NEXT: efsmul r6, r6, r10 371; SPE-NEXT: efsmul r5, r5, r9 372; SPE-NEXT: efsmul r4, r4, r8 373; SPE-NEXT: efsmul r3, r3, r7 374; SPE-NEXT: blr 375 %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32( 376 <4 x float> %vf1, <4 x float> %vf2, 377 metadata !"round.dynamic", 378 metadata !"fpexcept.strict") #0 379 ret <4 x float> %res; 380} 381 382define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 { 383; CHECK-LABEL: fmul_v2f64: 384; CHECK: # %bb.0: 385; CHECK-NEXT: xvmuldp v2, v2, v3 386; CHECK-NEXT: blr 387; 388; NOVSX-LABEL: fmul_v2f64: 389; NOVSX: # %bb.0: 390; NOVSX-NEXT: fmul f2, f2, f4 391; NOVSX-NEXT: fmul f1, f1, f3 392; NOVSX-NEXT: blr 393; 394; SPE-LABEL: fmul_v2f64: 395; SPE: # %bb.0: 396; SPE-NEXT: evldd r4, 8(r1) 397; SPE-NEXT: evmergelo r7, r7, r8 398; SPE-NEXT: evmergelo r8, r9, r10 399; SPE-NEXT: li r9, 8 400; SPE-NEXT: evmergelo r5, r5, r6 401; SPE-NEXT: efdmul r4, r7, r4 402; SPE-NEXT: evstddx r4, r3, r9 403; SPE-NEXT: efdmul r4, r5, r8 404; SPE-NEXT: evstdd r4, 0(r3) 405; SPE-NEXT: blr 406 %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( 407 <2 x double> %vf1, <2 x double> %vf2, 408 metadata !"round.dynamic", 409 metadata !"fpexcept.strict") #0 410 ret <2 x double> %res; 411} 412 413define float @fdiv_f32(float %f1, float %f2) #0 { 414; CHECK-LABEL: fdiv_f32: 415; CHECK: # %bb.0: 416; CHECK-NEXT: xsdivsp f1, f1, f2 417; CHECK-NEXT: blr 418; 419; NOVSX-LABEL: fdiv_f32: 420; NOVSX: # %bb.0: 421; NOVSX-NEXT: fdivs f1, f1, f2 422; NOVSX-NEXT: blr 423; 424; SPE-LABEL: fdiv_f32: 425; SPE: # %bb.0: 426; SPE-NEXT: efsdiv r3, r3, r4 427; SPE-NEXT: blr 428 429 %res = call float @llvm.experimental.constrained.fdiv.f32( 430 float %f1, float %f2, 431 metadata !"round.dynamic", 432 metadata !"fpexcept.strict") #0 433 ret float %res; 434} 435 436define double @fdiv_f64(double %f1, double %f2) #0 { 437; CHECK-LABEL: fdiv_f64: 438; CHECK: # %bb.0: 439; CHECK-NEXT: xsdivdp f1, f1, f2 440; CHECK-NEXT: blr 441; 442; NOVSX-LABEL: fdiv_f64: 443; NOVSX: # %bb.0: 444; NOVSX-NEXT: fdiv f1, f1, f2 445; NOVSX-NEXT: blr 446; 447; SPE-LABEL: fdiv_f64: 448; SPE: # %bb.0: 449; SPE-NEXT: evmergelo r5, r5, r6 450; SPE-NEXT: evmergelo r3, r3, r4 451; SPE-NEXT: efddiv r4, r3, r5 452; SPE-NEXT: evmergehi r3, r4, r4 453; SPE-NEXT: blr 454 455 %res = call double @llvm.experimental.constrained.fdiv.f64( 456 double %f1, double %f2, 457 metadata !"round.dynamic", 458 metadata !"fpexcept.strict") #0 459 ret double %res; 460} 461 462define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 { 463; CHECK-LABEL: fdiv_v4f32: 464; CHECK: # %bb.0: 465; CHECK-NEXT: xvdivsp v2, v2, v3 466; CHECK-NEXT: blr 467; 468; NOVSX-LABEL: fdiv_v4f32: 469; NOVSX: # %bb.0: 470; NOVSX-NEXT: addi r3, r1, -32 471; NOVSX-NEXT: stvx v3, 0, r3 472; NOVSX-NEXT: addi r3, r1, -48 473; NOVSX-NEXT: stvx v2, 0, r3 474; NOVSX-NEXT: addi r3, r1, -16 475; NOVSX-NEXT: lfs f0, -20(r1) 476; NOVSX-NEXT: lfs f1, -36(r1) 477; NOVSX-NEXT: fdivs f0, f1, f0 478; NOVSX-NEXT: lfs f1, -40(r1) 479; NOVSX-NEXT: stfs f0, -4(r1) 480; NOVSX-NEXT: lfs f0, -24(r1) 481; NOVSX-NEXT: fdivs f0, f1, f0 482; NOVSX-NEXT: lfs f1, -44(r1) 483; NOVSX-NEXT: stfs f0, -8(r1) 484; NOVSX-NEXT: lfs f0, -28(r1) 485; NOVSX-NEXT: fdivs f0, f1, f0 486; NOVSX-NEXT: lfs f1, -48(r1) 487; NOVSX-NEXT: stfs f0, -12(r1) 488; NOVSX-NEXT: lfs f0, -32(r1) 489; NOVSX-NEXT: fdivs f0, f1, f0 490; NOVSX-NEXT: stfs f0, -16(r1) 491; NOVSX-NEXT: lvx v2, 0, r3 492; NOVSX-NEXT: blr 493; 494; SPE-LABEL: fdiv_v4f32: 495; SPE: # %bb.0: 496; SPE-NEXT: efsdiv r6, r6, r10 497; SPE-NEXT: efsdiv r5, r5, r9 498; SPE-NEXT: efsdiv r4, r4, r8 499; SPE-NEXT: efsdiv r3, r3, r7 500; SPE-NEXT: blr 501 %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32( 502 <4 x float> %vf1, <4 x float> %vf2, 503 metadata !"round.dynamic", 504 metadata !"fpexcept.strict") #0 505 ret <4 x float> %res 506} 507 508define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 { 509; CHECK-LABEL: fdiv_v2f64: 510; CHECK: # %bb.0: 511; CHECK-NEXT: xvdivdp v2, v2, v3 512; CHECK-NEXT: blr 513; 514; NOVSX-LABEL: fdiv_v2f64: 515; NOVSX: # %bb.0: 516; NOVSX-NEXT: fdiv f2, f2, f4 517; NOVSX-NEXT: fdiv f1, f1, f3 518; NOVSX-NEXT: blr 519; 520; SPE-LABEL: fdiv_v2f64: 521; SPE: # %bb.0: 522; SPE-NEXT: evldd r4, 8(r1) 523; SPE-NEXT: evmergelo r7, r7, r8 524; SPE-NEXT: evmergelo r8, r9, r10 525; SPE-NEXT: evmergelo r5, r5, r6 526; SPE-NEXT: efddiv r4, r7, r4 527; SPE-NEXT: li r7, 8 528; SPE-NEXT: evstddx r4, r3, r7 529; SPE-NEXT: efddiv r4, r5, r8 530; SPE-NEXT: evstdd r4, 0(r3) 531; SPE-NEXT: blr 532 %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( 533 <2 x double> %vf1, <2 x double> %vf2, 534 metadata !"round.dynamic", 535 metadata !"fpexcept.strict") #0 536 ret <2 x double> %res 537} 538 539define double @no_fma_fold(double %f1, double %f2, double %f3) #0 { 540; CHECK-LABEL: no_fma_fold: 541; CHECK: # %bb.0: 542; CHECK-NEXT: xsmuldp f0, f1, f2 543; CHECK-NEXT: xsadddp f1, f0, f3 544; CHECK-NEXT: blr 545; 546; NOVSX-LABEL: no_fma_fold: 547; NOVSX: # %bb.0: 548; NOVSX-NEXT: fmul f0, f1, f2 549; NOVSX-NEXT: fadd f1, f0, f3 550; NOVSX-NEXT: blr 551; 552; SPE-LABEL: no_fma_fold: 553; SPE: # %bb.0: 554; SPE-NEXT: evmergelo r7, r7, r8 555; SPE-NEXT: evmergelo r5, r5, r6 556; SPE-NEXT: evmergelo r3, r3, r4 557; SPE-NEXT: efdmul r3, r3, r5 558; SPE-NEXT: efdadd r4, r3, r7 559; SPE-NEXT: evmergehi r3, r4, r4 560; SPE-NEXT: blr 561 %mul = call double @llvm.experimental.constrained.fmul.f64( 562 double %f1, double %f2, 563 metadata !"round.dynamic", 564 metadata !"fpexcept.strict") #0 565 %add = call double @llvm.experimental.constrained.fadd.f64( 566 double %mul, double %f3, 567 metadata !"round.dynamic", 568 metadata !"fpexcept.strict") #0 569 ret double %add 570} 571 572define float @fmadd_f32(float %f0, float %f1, float %f2) #0 { 573; CHECK-LABEL: fmadd_f32: 574; CHECK: # %bb.0: 575; CHECK-NEXT: xsmaddasp f3, f1, f2 576; CHECK-NEXT: fmr f1, f3 577; CHECK-NEXT: blr 578; 579; NOVSX-LABEL: fmadd_f32: 580; NOVSX: # %bb.0: 581; NOVSX-NEXT: fmadds f1, f1, f2, f3 582; NOVSX-NEXT: blr 583; 584; SPE-LABEL: fmadd_f32: 585; SPE: # %bb.0: 586; SPE-NEXT: mflr r0 587; SPE-NEXT: stwu r1, -16(r1) 588; SPE-NEXT: stw r0, 20(r1) 589; SPE-NEXT: .cfi_def_cfa_offset 16 590; SPE-NEXT: .cfi_offset lr, 4 591; SPE-NEXT: bl fmaf 592; SPE-NEXT: lwz r0, 20(r1) 593; SPE-NEXT: addi r1, r1, 16 594; SPE-NEXT: mtlr r0 595; SPE-NEXT: blr 596 %res = call float @llvm.experimental.constrained.fma.f32( 597 float %f0, float %f1, float %f2, 598 metadata !"round.dynamic", 599 metadata !"fpexcept.strict") #0 600 ret float %res 601} 602 603define double @fmadd_f64(double %f0, double %f1, double %f2) #0 { 604; CHECK-LABEL: fmadd_f64: 605; CHECK: # %bb.0: 606; CHECK-NEXT: xsmaddadp f3, f1, f2 607; CHECK-NEXT: fmr f1, f3 608; CHECK-NEXT: blr 609; 610; NOVSX-LABEL: fmadd_f64: 611; NOVSX: # %bb.0: 612; NOVSX-NEXT: fmadd f1, f1, f2, f3 613; NOVSX-NEXT: blr 614; 615; SPE-LABEL: fmadd_f64: 616; SPE: # %bb.0: 617; SPE-NEXT: mflr r0 618; SPE-NEXT: stwu r1, -16(r1) 619; SPE-NEXT: stw r0, 20(r1) 620; SPE-NEXT: .cfi_def_cfa_offset 16 621; SPE-NEXT: .cfi_offset lr, 4 622; SPE-NEXT: evmergelo r8, r7, r8 623; SPE-NEXT: evmergelo r6, r5, r6 624; SPE-NEXT: evmergelo r4, r3, r4 625; SPE-NEXT: evmergehi r3, r4, r4 626; SPE-NEXT: evmergehi r5, r6, r6 627; SPE-NEXT: evmergehi r7, r8, r8 628; SPE-NEXT: bl fma 629; SPE-NEXT: evmergelo r4, r3, r4 630; SPE-NEXT: evmergehi r3, r4, r4 631; SPE-NEXT: lwz r0, 20(r1) 632; SPE-NEXT: addi r1, r1, 16 633; SPE-NEXT: mtlr r0 634; SPE-NEXT: blr 635 %res = call double @llvm.experimental.constrained.fma.f64( 636 double %f0, double %f1, double %f2, 637 metadata !"round.dynamic", 638 metadata !"fpexcept.strict") #0 639 ret double %res 640} 641 642define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 { 643; CHECK-LABEL: fmadd_v4f32: 644; CHECK: # %bb.0: 645; CHECK-NEXT: xvmaddasp v4, v2, v3 646; CHECK-NEXT: vmr v2, v4 647; CHECK-NEXT: blr 648; 649; NOVSX-LABEL: fmadd_v4f32: 650; NOVSX: # %bb.0: 651; NOVSX-NEXT: addi r3, r1, -32 652; NOVSX-NEXT: stvx v4, 0, r3 653; NOVSX-NEXT: addi r3, r1, -48 654; NOVSX-NEXT: stvx v3, 0, r3 655; NOVSX-NEXT: addi r3, r1, -64 656; NOVSX-NEXT: stvx v2, 0, r3 657; NOVSX-NEXT: addi r3, r1, -16 658; NOVSX-NEXT: lfs f0, -20(r1) 659; NOVSX-NEXT: lfs f1, -36(r1) 660; NOVSX-NEXT: lfs f2, -52(r1) 661; NOVSX-NEXT: fmadds f0, f2, f1, f0 662; NOVSX-NEXT: lfs f1, -40(r1) 663; NOVSX-NEXT: lfs f2, -56(r1) 664; NOVSX-NEXT: stfs f0, -4(r1) 665; NOVSX-NEXT: lfs f0, -24(r1) 666; NOVSX-NEXT: fmadds f0, f2, f1, f0 667; NOVSX-NEXT: lfs f1, -44(r1) 668; NOVSX-NEXT: lfs f2, -60(r1) 669; NOVSX-NEXT: stfs f0, -8(r1) 670; NOVSX-NEXT: lfs f0, -28(r1) 671; NOVSX-NEXT: fmadds f0, f2, f1, f0 672; NOVSX-NEXT: lfs f1, -48(r1) 673; NOVSX-NEXT: lfs f2, -64(r1) 674; NOVSX-NEXT: stfs f0, -12(r1) 675; NOVSX-NEXT: lfs f0, -32(r1) 676; NOVSX-NEXT: fmadds f0, f2, f1, f0 677; NOVSX-NEXT: stfs f0, -16(r1) 678; NOVSX-NEXT: lvx v2, 0, r3 679; NOVSX-NEXT: blr 680; 681; SPE-LABEL: fmadd_v4f32: 682; SPE: # %bb.0: 683; SPE-NEXT: mflr r0 684; SPE-NEXT: stwu r1, -64(r1) 685; SPE-NEXT: stw r0, 68(r1) 686; SPE-NEXT: .cfi_def_cfa_offset 64 687; SPE-NEXT: .cfi_offset lr, 4 688; SPE-NEXT: .cfi_offset r21, -44 689; SPE-NEXT: .cfi_offset r22, -40 690; SPE-NEXT: .cfi_offset r23, -36 691; SPE-NEXT: .cfi_offset r24, -32 692; SPE-NEXT: .cfi_offset r25, -28 693; SPE-NEXT: .cfi_offset r26, -24 694; SPE-NEXT: .cfi_offset r27, -20 695; SPE-NEXT: .cfi_offset r28, -16 696; SPE-NEXT: .cfi_offset r29, -12 697; SPE-NEXT: .cfi_offset r30, -8 698; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill 699; SPE-NEXT: mr r27, r5 700; SPE-NEXT: lwz r5, 84(r1) 701; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill 702; SPE-NEXT: mr r25, r3 703; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill 704; SPE-NEXT: mr r26, r4 705; SPE-NEXT: mr r3, r6 706; SPE-NEXT: mr r4, r10 707; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill 708; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill 709; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill 710; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill 711; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill 712; SPE-NEXT: mr r28, r7 713; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill 714; SPE-NEXT: mr r29, r8 715; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill 716; SPE-NEXT: mr r30, r9 717; SPE-NEXT: lwz r24, 72(r1) 718; SPE-NEXT: lwz r23, 76(r1) 719; SPE-NEXT: lwz r22, 80(r1) 720; SPE-NEXT: bl fmaf 721; SPE-NEXT: mr r21, r3 722; SPE-NEXT: mr r3, r27 723; SPE-NEXT: mr r4, r30 724; SPE-NEXT: mr r5, r22 725; SPE-NEXT: bl fmaf 726; SPE-NEXT: mr r30, r3 727; SPE-NEXT: mr r3, r26 728; SPE-NEXT: mr r4, r29 729; SPE-NEXT: mr r5, r23 730; SPE-NEXT: bl fmaf 731; SPE-NEXT: mr r29, r3 732; SPE-NEXT: mr r3, r25 733; SPE-NEXT: mr r4, r28 734; SPE-NEXT: mr r5, r24 735; SPE-NEXT: bl fmaf 736; SPE-NEXT: mr r4, r29 737; SPE-NEXT: mr r5, r30 738; SPE-NEXT: mr r6, r21 739; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload 740; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload 741; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload 742; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload 743; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload 744; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload 745; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload 746; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload 747; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload 748; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload 749; SPE-NEXT: lwz r0, 68(r1) 750; SPE-NEXT: addi r1, r1, 64 751; SPE-NEXT: mtlr r0 752; SPE-NEXT: blr 753 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( 754 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, 755 metadata !"round.dynamic", 756 metadata !"fpexcept.strict") #0 757 ret <4 x float> %res 758} 759 760define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 { 761; CHECK-LABEL: fmadd_v2f64: 762; CHECK: # %bb.0: 763; CHECK-NEXT: xvmaddadp v4, v2, v3 764; CHECK-NEXT: vmr v2, v4 765; CHECK-NEXT: blr 766; 767; NOVSX-LABEL: fmadd_v2f64: 768; NOVSX: # %bb.0: 769; NOVSX-NEXT: fmadd f2, f2, f4, f6 770; NOVSX-NEXT: fmadd f1, f1, f3, f5 771; NOVSX-NEXT: blr 772; 773; SPE-LABEL: fmadd_v2f64: 774; SPE: # %bb.0: 775; SPE-NEXT: mflr r0 776; SPE-NEXT: stwu r1, -80(r1) 777; SPE-NEXT: stw r0, 84(r1) 778; SPE-NEXT: .cfi_def_cfa_offset 80 779; SPE-NEXT: .cfi_offset lr, 4 780; SPE-NEXT: .cfi_offset r26, -64 781; SPE-NEXT: .cfi_offset r27, -56 782; SPE-NEXT: .cfi_offset r28, -48 783; SPE-NEXT: .cfi_offset r29, -40 784; SPE-NEXT: .cfi_offset r30, -8 785; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill 786; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill 787; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill 788; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill 789; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill 790; SPE-NEXT: evmergelo r27, r7, r8 791; SPE-NEXT: evmergelo r9, r9, r10 792; SPE-NEXT: evmergelo r4, r5, r6 793; SPE-NEXT: mr r30, r3 794; SPE-NEXT: evldd r8, 96(r1) 795; SPE-NEXT: evmergehi r3, r4, r4 796; SPE-NEXT: evmergehi r5, r9, r9 797; SPE-NEXT: mr r6, r9 798; SPE-NEXT: evldd r29, 104(r1) 799; SPE-NEXT: evmergehi r7, r8, r8 800; SPE-NEXT: evldd r28, 88(r1) 801; SPE-NEXT: bl fma 802; SPE-NEXT: evmergelo r26, r3, r4 803; SPE-NEXT: evmergehi r3, r27, r27 804; SPE-NEXT: evmergehi r5, r28, r28 805; SPE-NEXT: evmergehi r7, r29, r29 806; SPE-NEXT: mr r4, r27 807; SPE-NEXT: mr r6, r28 808; SPE-NEXT: mr r8, r29 809; SPE-NEXT: bl fma 810; SPE-NEXT: li r5, 8 811; SPE-NEXT: evmergelo r3, r3, r4 812; SPE-NEXT: evstddx r3, r30, r5 813; SPE-NEXT: evstdd r26, 0(r30) 814; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload 815; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload 816; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload 817; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload 818; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload 819; SPE-NEXT: lwz r0, 84(r1) 820; SPE-NEXT: addi r1, r1, 80 821; SPE-NEXT: mtlr r0 822; SPE-NEXT: blr 823 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( 824 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, 825 metadata !"round.dynamic", 826 metadata !"fpexcept.strict") #0 827 ret <2 x double> %res 828} 829 830define float @fmsub_f32(float %f0, float %f1, float %f2) #0 { 831; CHECK-LABEL: fmsub_f32: 832; CHECK: # %bb.0: 833; CHECK-NEXT: xsmsubasp f3, f1, f2 834; CHECK-NEXT: fmr f1, f3 835; CHECK-NEXT: blr 836; 837; NOVSX-LABEL: fmsub_f32: 838; NOVSX: # %bb.0: 839; NOVSX-NEXT: fmsubs f1, f1, f2, f3 840; NOVSX-NEXT: blr 841; 842; SPE-LABEL: fmsub_f32: 843; SPE: # %bb.0: 844; SPE-NEXT: mflr r0 845; SPE-NEXT: stwu r1, -16(r1) 846; SPE-NEXT: stw r0, 20(r1) 847; SPE-NEXT: .cfi_def_cfa_offset 16 848; SPE-NEXT: .cfi_offset lr, 4 849; SPE-NEXT: efsneg r5, r5 850; SPE-NEXT: bl fmaf 851; SPE-NEXT: lwz r0, 20(r1) 852; SPE-NEXT: addi r1, r1, 16 853; SPE-NEXT: mtlr r0 854; SPE-NEXT: blr 855 %neg = fneg float %f2 856 %res = call float @llvm.experimental.constrained.fma.f32( 857 float %f0, float %f1, float %neg, 858 metadata !"round.dynamic", 859 metadata !"fpexcept.strict") #0 860 ret float %res 861} 862 863define double @fmsub_f64(double %f0, double %f1, double %f2) #0 { 864; CHECK-LABEL: fmsub_f64: 865; CHECK: # %bb.0: 866; CHECK-NEXT: xsmsubadp f3, f1, f2 867; CHECK-NEXT: fmr f1, f3 868; CHECK-NEXT: blr 869; 870; NOVSX-LABEL: fmsub_f64: 871; NOVSX: # %bb.0: 872; NOVSX-NEXT: fmsub f1, f1, f2, f3 873; NOVSX-NEXT: blr 874; 875; SPE-LABEL: fmsub_f64: 876; SPE: # %bb.0: 877; SPE-NEXT: mflr r0 878; SPE-NEXT: stwu r1, -16(r1) 879; SPE-NEXT: stw r0, 20(r1) 880; SPE-NEXT: .cfi_def_cfa_offset 16 881; SPE-NEXT: .cfi_offset lr, 4 882; SPE-NEXT: evmergelo r6, r5, r6 883; SPE-NEXT: evmergelo r4, r3, r4 884; SPE-NEXT: evmergelo r3, r7, r8 885; SPE-NEXT: efdneg r8, r3 886; SPE-NEXT: evmergehi r3, r4, r4 887; SPE-NEXT: evmergehi r5, r6, r6 888; SPE-NEXT: evmergehi r7, r8, r8 889; SPE-NEXT: bl fma 890; SPE-NEXT: evmergelo r4, r3, r4 891; SPE-NEXT: evmergehi r3, r4, r4 892; SPE-NEXT: lwz r0, 20(r1) 893; SPE-NEXT: addi r1, r1, 16 894; SPE-NEXT: mtlr r0 895; SPE-NEXT: blr 896 %neg = fneg double %f2 897 %res = call double @llvm.experimental.constrained.fma.f64( 898 double %f0, double %f1, double %neg, 899 metadata !"round.dynamic", 900 metadata !"fpexcept.strict") #0 901 ret double %res 902} 903 904define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 { 905; CHECK-LABEL: fmsub_v4f32: 906; CHECK: # %bb.0: 907; CHECK-NEXT: xvmsubasp v4, v2, v3 908; CHECK-NEXT: vmr v2, v4 909; CHECK-NEXT: blr 910; 911; NOVSX-LABEL: fmsub_v4f32: 912; NOVSX: # %bb.0: 913; NOVSX-NEXT: vspltisb v5, -1 914; NOVSX-NEXT: addi r3, r1, -48 915; NOVSX-NEXT: vslw v5, v5, v5 916; NOVSX-NEXT: stvx v3, 0, r3 917; NOVSX-NEXT: addi r3, r1, -64 918; NOVSX-NEXT: vxor v4, v4, v5 919; NOVSX-NEXT: stvx v2, 0, r3 920; NOVSX-NEXT: addi r3, r1, -32 921; NOVSX-NEXT: stvx v4, 0, r3 922; NOVSX-NEXT: addi r3, r1, -16 923; NOVSX-NEXT: lfs f0, -36(r1) 924; NOVSX-NEXT: lfs f1, -52(r1) 925; NOVSX-NEXT: lfs f2, -20(r1) 926; NOVSX-NEXT: fmadds f0, f1, f0, f2 927; NOVSX-NEXT: lfs f1, -56(r1) 928; NOVSX-NEXT: lfs f2, -24(r1) 929; NOVSX-NEXT: stfs f0, -4(r1) 930; NOVSX-NEXT: lfs f0, -40(r1) 931; NOVSX-NEXT: fmadds f0, f1, f0, f2 932; NOVSX-NEXT: lfs f1, -60(r1) 933; NOVSX-NEXT: lfs f2, -28(r1) 934; NOVSX-NEXT: stfs f0, -8(r1) 935; NOVSX-NEXT: lfs f0, -44(r1) 936; NOVSX-NEXT: fmadds f0, f1, f0, f2 937; NOVSX-NEXT: lfs f1, -64(r1) 938; NOVSX-NEXT: lfs f2, -32(r1) 939; NOVSX-NEXT: stfs f0, -12(r1) 940; NOVSX-NEXT: lfs f0, -48(r1) 941; NOVSX-NEXT: fmadds f0, f1, f0, f2 942; NOVSX-NEXT: stfs f0, -16(r1) 943; NOVSX-NEXT: lvx v2, 0, r3 944; NOVSX-NEXT: blr 945; 946; SPE-LABEL: fmsub_v4f32: 947; SPE: # %bb.0: 948; SPE-NEXT: mflr r0 949; SPE-NEXT: stwu r1, -64(r1) 950; SPE-NEXT: stw r0, 68(r1) 951; SPE-NEXT: .cfi_def_cfa_offset 64 952; SPE-NEXT: .cfi_offset lr, 4 953; SPE-NEXT: .cfi_offset r21, -44 954; SPE-NEXT: .cfi_offset r22, -40 955; SPE-NEXT: .cfi_offset r23, -36 956; SPE-NEXT: .cfi_offset r24, -32 957; SPE-NEXT: .cfi_offset r25, -28 958; SPE-NEXT: .cfi_offset r26, -24 959; SPE-NEXT: .cfi_offset r27, -20 960; SPE-NEXT: .cfi_offset r28, -16 961; SPE-NEXT: .cfi_offset r29, -12 962; SPE-NEXT: .cfi_offset r30, -8 963; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill 964; SPE-NEXT: mr r25, r3 965; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill 966; SPE-NEXT: mr r26, r4 967; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill 968; SPE-NEXT: mr r27, r5 969; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill 970; SPE-NEXT: mr r28, r7 971; SPE-NEXT: lwz r3, 80(r1) 972; SPE-NEXT: lwz r4, 72(r1) 973; SPE-NEXT: lwz r5, 76(r1) 974; SPE-NEXT: lwz r7, 84(r1) 975; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill 976; SPE-NEXT: efsneg r22, r3 977; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill 978; SPE-NEXT: efsneg r23, r5 979; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill 980; SPE-NEXT: efsneg r24, r4 981; SPE-NEXT: efsneg r5, r7 982; SPE-NEXT: mr r3, r6 983; SPE-NEXT: mr r4, r10 984; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill 985; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill 986; SPE-NEXT: mr r29, r8 987; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill 988; SPE-NEXT: mr r30, r9 989; SPE-NEXT: bl fmaf 990; SPE-NEXT: mr r21, r3 991; SPE-NEXT: mr r3, r27 992; SPE-NEXT: mr r4, r30 993; SPE-NEXT: mr r5, r22 994; SPE-NEXT: bl fmaf 995; SPE-NEXT: mr r30, r3 996; SPE-NEXT: mr r3, r26 997; SPE-NEXT: mr r4, r29 998; SPE-NEXT: mr r5, r23 999; SPE-NEXT: bl fmaf 1000; SPE-NEXT: mr r29, r3 1001; SPE-NEXT: mr r3, r25 1002; SPE-NEXT: mr r4, r28 1003; SPE-NEXT: mr r5, r24 1004; SPE-NEXT: bl fmaf 1005; SPE-NEXT: mr r4, r29 1006; SPE-NEXT: mr r5, r30 1007; SPE-NEXT: mr r6, r21 1008; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload 1009; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload 1010; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload 1011; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload 1012; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload 1013; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload 1014; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload 1015; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload 1016; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload 1017; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload 1018; SPE-NEXT: lwz r0, 68(r1) 1019; SPE-NEXT: addi r1, r1, 64 1020; SPE-NEXT: mtlr r0 1021; SPE-NEXT: blr 1022 %neg = fneg <4 x float> %vf2 1023 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( 1024 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, 1025 metadata !"round.dynamic", 1026 metadata !"fpexcept.strict") #0 1027 ret <4 x float> %res 1028} 1029 1030define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 { 1031; CHECK-LABEL: fmsub_v2f64: 1032; CHECK: # %bb.0: 1033; CHECK-NEXT: xvmsubadp v4, v2, v3 1034; CHECK-NEXT: vmr v2, v4 1035; CHECK-NEXT: blr 1036; 1037; NOVSX-LABEL: fmsub_v2f64: 1038; NOVSX: # %bb.0: 1039; NOVSX-NEXT: fmsub f2, f2, f4, f6 1040; NOVSX-NEXT: fmsub f1, f1, f3, f5 1041; NOVSX-NEXT: blr 1042; 1043; SPE-LABEL: fmsub_v2f64: 1044; SPE: # %bb.0: 1045; SPE-NEXT: mflr r0 1046; SPE-NEXT: stwu r1, -80(r1) 1047; SPE-NEXT: stw r0, 84(r1) 1048; SPE-NEXT: .cfi_def_cfa_offset 80 1049; SPE-NEXT: .cfi_offset lr, 4 1050; SPE-NEXT: .cfi_offset r26, -64 1051; SPE-NEXT: .cfi_offset r27, -56 1052; SPE-NEXT: .cfi_offset r28, -48 1053; SPE-NEXT: .cfi_offset r29, -40 1054; SPE-NEXT: .cfi_offset r30, -8 1055; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill 1056; SPE-NEXT: mr r30, r3 1057; SPE-NEXT: evldd r3, 96(r1) 1058; SPE-NEXT: evldd r11, 104(r1) 1059; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill 1060; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill 1061; SPE-NEXT: efdneg r27, r11 1062; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill 1063; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill 1064; SPE-NEXT: evmergelo r29, r7, r8 1065; SPE-NEXT: evmergelo r9, r9, r10 1066; SPE-NEXT: evmergelo r4, r5, r6 1067; SPE-NEXT: efdneg r8, r3 1068; SPE-NEXT: evmergehi r3, r4, r4 1069; SPE-NEXT: evmergehi r5, r9, r9 1070; SPE-NEXT: evmergehi r7, r8, r8 1071; SPE-NEXT: mr r6, r9 1072; SPE-NEXT: evldd r28, 88(r1) 1073; SPE-NEXT: bl fma 1074; SPE-NEXT: evmergelo r26, r3, r4 1075; SPE-NEXT: evmergehi r3, r29, r29 1076; SPE-NEXT: evmergehi r5, r28, r28 1077; SPE-NEXT: evmergehi r7, r27, r27 1078; SPE-NEXT: mr r4, r29 1079; SPE-NEXT: mr r6, r28 1080; SPE-NEXT: mr r8, r27 1081; SPE-NEXT: bl fma 1082; SPE-NEXT: li r5, 8 1083; SPE-NEXT: evmergelo r3, r3, r4 1084; SPE-NEXT: evstddx r3, r30, r5 1085; SPE-NEXT: evstdd r26, 0(r30) 1086; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload 1087; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload 1088; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload 1089; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload 1090; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload 1091; SPE-NEXT: lwz r0, 84(r1) 1092; SPE-NEXT: addi r1, r1, 80 1093; SPE-NEXT: mtlr r0 1094; SPE-NEXT: blr 1095 %neg = fneg <2 x double> %vf2 1096 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( 1097 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, 1098 metadata !"round.dynamic", 1099 metadata !"fpexcept.strict") #0 1100 ret <2 x double> %res 1101} 1102 1103define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 { 1104; CHECK-LABEL: fnmadd_f32: 1105; CHECK: # %bb.0: 1106; CHECK-NEXT: xsnmaddasp f3, f1, f2 1107; CHECK-NEXT: fmr f1, f3 1108; CHECK-NEXT: blr 1109; 1110; NOVSX-LABEL: fnmadd_f32: 1111; NOVSX: # %bb.0: 1112; NOVSX-NEXT: fnmadds f1, f1, f2, f3 1113; NOVSX-NEXT: blr 1114; 1115; SPE-LABEL: fnmadd_f32: 1116; SPE: # %bb.0: 1117; SPE-NEXT: mflr r0 1118; SPE-NEXT: stwu r1, -16(r1) 1119; SPE-NEXT: stw r0, 20(r1) 1120; SPE-NEXT: .cfi_def_cfa_offset 16 1121; SPE-NEXT: .cfi_offset lr, 4 1122; SPE-NEXT: bl fmaf 1123; SPE-NEXT: efsneg r3, r3 1124; SPE-NEXT: lwz r0, 20(r1) 1125; SPE-NEXT: addi r1, r1, 16 1126; SPE-NEXT: mtlr r0 1127; SPE-NEXT: blr 1128 %fma = call float @llvm.experimental.constrained.fma.f32( 1129 float %f0, float %f1, float %f2, 1130 metadata !"round.dynamic", 1131 metadata !"fpexcept.strict") #0 1132 %res = fneg float %fma 1133 ret float %res 1134} 1135 1136define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 { 1137; CHECK-LABEL: fnmadd_f64: 1138; CHECK: # %bb.0: 1139; CHECK-NEXT: xsnmaddadp f3, f1, f2 1140; CHECK-NEXT: fmr f1, f3 1141; CHECK-NEXT: blr 1142; 1143; NOVSX-LABEL: fnmadd_f64: 1144; NOVSX: # %bb.0: 1145; NOVSX-NEXT: fnmadd f1, f1, f2, f3 1146; NOVSX-NEXT: blr 1147; 1148; SPE-LABEL: fnmadd_f64: 1149; SPE: # %bb.0: 1150; SPE-NEXT: mflr r0 1151; SPE-NEXT: stwu r1, -16(r1) 1152; SPE-NEXT: stw r0, 20(r1) 1153; SPE-NEXT: .cfi_def_cfa_offset 16 1154; SPE-NEXT: .cfi_offset lr, 4 1155; SPE-NEXT: evmergelo r8, r7, r8 1156; SPE-NEXT: evmergelo r6, r5, r6 1157; SPE-NEXT: evmergelo r4, r3, r4 1158; SPE-NEXT: evmergehi r3, r4, r4 1159; SPE-NEXT: evmergehi r5, r6, r6 1160; SPE-NEXT: evmergehi r7, r8, r8 1161; SPE-NEXT: bl fma 1162; SPE-NEXT: evmergelo r3, r3, r4 1163; SPE-NEXT: efdneg r4, r3 1164; SPE-NEXT: evmergehi r3, r4, r4 1165; SPE-NEXT: lwz r0, 20(r1) 1166; SPE-NEXT: addi r1, r1, 16 1167; SPE-NEXT: mtlr r0 1168; SPE-NEXT: blr 1169 %fma = call double @llvm.experimental.constrained.fma.f64( 1170 double %f0, double %f1, double %f2, 1171 metadata !"round.dynamic", 1172 metadata !"fpexcept.strict") #0 1173 %res = fneg double %fma 1174 ret double %res 1175} 1176 1177define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 { 1178; CHECK-LABEL: fnmadd_v4f32: 1179; CHECK: # %bb.0: 1180; CHECK-NEXT: xvmaddasp v4, v2, v3 1181; CHECK-NEXT: xvnegsp v2, v4 1182; CHECK-NEXT: blr 1183; 1184; NOVSX-LABEL: fnmadd_v4f32: 1185; NOVSX: # %bb.0: 1186; NOVSX-NEXT: addi r3, r1, -32 1187; NOVSX-NEXT: vspltisb v5, -1 1188; NOVSX-NEXT: stvx v4, 0, r3 1189; NOVSX-NEXT: addi r3, r1, -48 1190; NOVSX-NEXT: stvx v3, 0, r3 1191; NOVSX-NEXT: addi r3, r1, -64 1192; NOVSX-NEXT: vslw v3, v5, v5 1193; NOVSX-NEXT: stvx v2, 0, r3 1194; NOVSX-NEXT: addi r3, r1, -16 1195; NOVSX-NEXT: lfs f0, -20(r1) 1196; NOVSX-NEXT: lfs f1, -36(r1) 1197; NOVSX-NEXT: lfs f2, -52(r1) 1198; NOVSX-NEXT: fmadds f0, f2, f1, f0 1199; NOVSX-NEXT: lfs f1, -40(r1) 1200; NOVSX-NEXT: lfs f2, -56(r1) 1201; NOVSX-NEXT: stfs f0, -4(r1) 1202; NOVSX-NEXT: lfs f0, -24(r1) 1203; NOVSX-NEXT: fmadds f0, f2, f1, f0 1204; NOVSX-NEXT: lfs f1, -44(r1) 1205; NOVSX-NEXT: lfs f2, -60(r1) 1206; NOVSX-NEXT: stfs f0, -8(r1) 1207; NOVSX-NEXT: lfs f0, -28(r1) 1208; NOVSX-NEXT: fmadds f0, f2, f1, f0 1209; NOVSX-NEXT: lfs f1, -48(r1) 1210; NOVSX-NEXT: lfs f2, -64(r1) 1211; NOVSX-NEXT: stfs f0, -12(r1) 1212; NOVSX-NEXT: lfs f0, -32(r1) 1213; NOVSX-NEXT: fmadds f0, f2, f1, f0 1214; NOVSX-NEXT: stfs f0, -16(r1) 1215; NOVSX-NEXT: lvx v2, 0, r3 1216; NOVSX-NEXT: vxor v2, v2, v3 1217; NOVSX-NEXT: blr 1218; 1219; SPE-LABEL: fnmadd_v4f32: 1220; SPE: # %bb.0: 1221; SPE-NEXT: mflr r0 1222; SPE-NEXT: stwu r1, -64(r1) 1223; SPE-NEXT: stw r0, 68(r1) 1224; SPE-NEXT: .cfi_def_cfa_offset 64 1225; SPE-NEXT: .cfi_offset lr, 4 1226; SPE-NEXT: .cfi_offset r21, -44 1227; SPE-NEXT: .cfi_offset r22, -40 1228; SPE-NEXT: .cfi_offset r23, -36 1229; SPE-NEXT: .cfi_offset r24, -32 1230; SPE-NEXT: .cfi_offset r25, -28 1231; SPE-NEXT: .cfi_offset r26, -24 1232; SPE-NEXT: .cfi_offset r27, -20 1233; SPE-NEXT: .cfi_offset r28, -16 1234; SPE-NEXT: .cfi_offset r29, -12 1235; SPE-NEXT: .cfi_offset r30, -8 1236; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill 1237; SPE-NEXT: mr r27, r5 1238; SPE-NEXT: lwz r5, 84(r1) 1239; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill 1240; SPE-NEXT: mr r25, r3 1241; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill 1242; SPE-NEXT: mr r26, r4 1243; SPE-NEXT: mr r3, r6 1244; SPE-NEXT: mr r4, r10 1245; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill 1246; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill 1247; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill 1248; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill 1249; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill 1250; SPE-NEXT: mr r28, r7 1251; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill 1252; SPE-NEXT: mr r29, r8 1253; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill 1254; SPE-NEXT: mr r30, r9 1255; SPE-NEXT: lwz r24, 72(r1) 1256; SPE-NEXT: lwz r23, 76(r1) 1257; SPE-NEXT: lwz r22, 80(r1) 1258; SPE-NEXT: bl fmaf 1259; SPE-NEXT: mr r21, r3 1260; SPE-NEXT: mr r3, r27 1261; SPE-NEXT: mr r4, r30 1262; SPE-NEXT: mr r5, r22 1263; SPE-NEXT: bl fmaf 1264; SPE-NEXT: mr r30, r3 1265; SPE-NEXT: mr r3, r26 1266; SPE-NEXT: mr r4, r29 1267; SPE-NEXT: mr r5, r23 1268; SPE-NEXT: bl fmaf 1269; SPE-NEXT: mr r29, r3 1270; SPE-NEXT: mr r3, r25 1271; SPE-NEXT: mr r4, r28 1272; SPE-NEXT: mr r5, r24 1273; SPE-NEXT: bl fmaf 1274; SPE-NEXT: efsneg r4, r29 1275; SPE-NEXT: efsneg r5, r30 1276; SPE-NEXT: efsneg r3, r3 1277; SPE-NEXT: efsneg r6, r21 1278; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload 1279; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload 1280; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload 1281; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload 1282; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload 1283; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload 1284; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload 1285; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload 1286; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload 1287; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload 1288; SPE-NEXT: lwz r0, 68(r1) 1289; SPE-NEXT: addi r1, r1, 64 1290; SPE-NEXT: mtlr r0 1291; SPE-NEXT: blr 1292 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( 1293 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, 1294 metadata !"round.dynamic", 1295 metadata !"fpexcept.strict") #0 1296 %res = fneg <4 x float> %fma 1297 ret <4 x float> %res 1298} 1299 1300define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 { 1301; CHECK-LABEL: fnmadd_v2f64: 1302; CHECK: # %bb.0: 1303; CHECK-NEXT: xvnmaddadp v4, v2, v3 1304; CHECK-NEXT: vmr v2, v4 1305; CHECK-NEXT: blr 1306; 1307; NOVSX-LABEL: fnmadd_v2f64: 1308; NOVSX: # %bb.0: 1309; NOVSX-NEXT: fnmadd f2, f2, f4, f6 1310; NOVSX-NEXT: fnmadd f1, f1, f3, f5 1311; NOVSX-NEXT: blr 1312; 1313; SPE-LABEL: fnmadd_v2f64: 1314; SPE: # %bb.0: 1315; SPE-NEXT: mflr r0 1316; SPE-NEXT: stwu r1, -80(r1) 1317; SPE-NEXT: stw r0, 84(r1) 1318; SPE-NEXT: .cfi_def_cfa_offset 80 1319; SPE-NEXT: .cfi_offset lr, 4 1320; SPE-NEXT: .cfi_offset r26, -64 1321; SPE-NEXT: .cfi_offset r27, -56 1322; SPE-NEXT: .cfi_offset r28, -48 1323; SPE-NEXT: .cfi_offset r29, -40 1324; SPE-NEXT: .cfi_offset r30, -8 1325; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill 1326; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill 1327; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill 1328; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill 1329; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill 1330; SPE-NEXT: evmergelo r27, r7, r8 1331; SPE-NEXT: evmergelo r9, r9, r10 1332; SPE-NEXT: evmergelo r4, r5, r6 1333; SPE-NEXT: mr r30, r3 1334; SPE-NEXT: evldd r8, 96(r1) 1335; SPE-NEXT: evmergehi r3, r4, r4 1336; SPE-NEXT: evmergehi r5, r9, r9 1337; SPE-NEXT: mr r6, r9 1338; SPE-NEXT: evldd r29, 104(r1) 1339; SPE-NEXT: evmergehi r7, r8, r8 1340; SPE-NEXT: evldd r28, 88(r1) 1341; SPE-NEXT: bl fma 1342; SPE-NEXT: evmergelo r26, r3, r4 1343; SPE-NEXT: evmergehi r3, r27, r27 1344; SPE-NEXT: evmergehi r5, r28, r28 1345; SPE-NEXT: evmergehi r7, r29, r29 1346; SPE-NEXT: mr r4, r27 1347; SPE-NEXT: mr r6, r28 1348; SPE-NEXT: mr r8, r29 1349; SPE-NEXT: bl fma 1350; SPE-NEXT: evmergelo r3, r3, r4 1351; SPE-NEXT: li r5, 8 1352; SPE-NEXT: efdneg r3, r3 1353; SPE-NEXT: evstddx r3, r30, r5 1354; SPE-NEXT: efdneg r3, r26 1355; SPE-NEXT: evstdd r3, 0(r30) 1356; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload 1357; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload 1358; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload 1359; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload 1360; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload 1361; SPE-NEXT: lwz r0, 84(r1) 1362; SPE-NEXT: addi r1, r1, 80 1363; SPE-NEXT: mtlr r0 1364; SPE-NEXT: blr 1365 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( 1366 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2, 1367 metadata !"round.dynamic", 1368 metadata !"fpexcept.strict") #0 1369 %res = fneg <2 x double> %fma 1370 ret <2 x double> %res 1371} 1372 1373define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 { 1374; CHECK-LABEL: fnmsub_f32: 1375; CHECK: # %bb.0: 1376; CHECK-NEXT: xsnmsubasp f3, f1, f2 1377; CHECK-NEXT: fmr f1, f3 1378; CHECK-NEXT: blr 1379; 1380; NOVSX-LABEL: fnmsub_f32: 1381; NOVSX: # %bb.0: 1382; NOVSX-NEXT: fnmsubs f1, f1, f2, f3 1383; NOVSX-NEXT: blr 1384; 1385; SPE-LABEL: fnmsub_f32: 1386; SPE: # %bb.0: 1387; SPE-NEXT: mflr r0 1388; SPE-NEXT: stwu r1, -16(r1) 1389; SPE-NEXT: stw r0, 20(r1) 1390; SPE-NEXT: .cfi_def_cfa_offset 16 1391; SPE-NEXT: .cfi_offset lr, 4 1392; SPE-NEXT: efsneg r5, r5 1393; SPE-NEXT: bl fmaf 1394; SPE-NEXT: efsneg r3, r3 1395; SPE-NEXT: lwz r0, 20(r1) 1396; SPE-NEXT: addi r1, r1, 16 1397; SPE-NEXT: mtlr r0 1398; SPE-NEXT: blr 1399 %neg = fneg float %f2 1400 %fma = call float @llvm.experimental.constrained.fma.f32( 1401 float %f0, float %f1, float %neg, 1402 metadata !"round.dynamic", 1403 metadata !"fpexcept.strict") #0 1404 %res = fneg float %fma 1405 ret float %res 1406} 1407 1408define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 { 1409; CHECK-LABEL: fnmsub_f64: 1410; CHECK: # %bb.0: 1411; CHECK-NEXT: xsnmsubadp f3, f1, f2 1412; CHECK-NEXT: fmr f1, f3 1413; CHECK-NEXT: blr 1414; 1415; NOVSX-LABEL: fnmsub_f64: 1416; NOVSX: # %bb.0: 1417; NOVSX-NEXT: fnmsub f1, f1, f2, f3 1418; NOVSX-NEXT: blr 1419; 1420; SPE-LABEL: fnmsub_f64: 1421; SPE: # %bb.0: 1422; SPE-NEXT: mflr r0 1423; SPE-NEXT: stwu r1, -16(r1) 1424; SPE-NEXT: stw r0, 20(r1) 1425; SPE-NEXT: .cfi_def_cfa_offset 16 1426; SPE-NEXT: .cfi_offset lr, 4 1427; SPE-NEXT: evmergelo r6, r5, r6 1428; SPE-NEXT: evmergelo r4, r3, r4 1429; SPE-NEXT: evmergelo r3, r7, r8 1430; SPE-NEXT: efdneg r8, r3 1431; SPE-NEXT: evmergehi r3, r4, r4 1432; SPE-NEXT: evmergehi r5, r6, r6 1433; SPE-NEXT: evmergehi r7, r8, r8 1434; SPE-NEXT: bl fma 1435; SPE-NEXT: evmergelo r3, r3, r4 1436; SPE-NEXT: efdneg r4, r3 1437; SPE-NEXT: evmergehi r3, r4, r4 1438; SPE-NEXT: lwz r0, 20(r1) 1439; SPE-NEXT: addi r1, r1, 16 1440; SPE-NEXT: mtlr r0 1441; SPE-NEXT: blr 1442 %neg = fneg double %f2 1443 %fma = call double @llvm.experimental.constrained.fma.f64( 1444 double %f0, double %f1, double %neg, 1445 metadata !"round.dynamic", 1446 metadata !"fpexcept.strict") #0 1447 %res = fneg double %fma 1448 ret double %res 1449} 1450 1451define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 { 1452; CHECK-LABEL: fnmsub_v4f32: 1453; CHECK: # %bb.0: 1454; CHECK-NEXT: xvnmsubasp v4, v2, v3 1455; CHECK-NEXT: vmr v2, v4 1456; CHECK-NEXT: blr 1457; 1458; NOVSX-LABEL: fnmsub_v4f32: 1459; NOVSX: # %bb.0: 1460; NOVSX-NEXT: vspltisb v5, -1 1461; NOVSX-NEXT: addi r3, r1, -48 1462; NOVSX-NEXT: vslw v5, v5, v5 1463; NOVSX-NEXT: stvx v3, 0, r3 1464; NOVSX-NEXT: addi r3, r1, -64 1465; NOVSX-NEXT: vxor v4, v4, v5 1466; NOVSX-NEXT: stvx v2, 0, r3 1467; NOVSX-NEXT: addi r3, r1, -32 1468; NOVSX-NEXT: stvx v4, 0, r3 1469; NOVSX-NEXT: addi r3, r1, -16 1470; NOVSX-NEXT: lfs f0, -36(r1) 1471; NOVSX-NEXT: lfs f1, -52(r1) 1472; NOVSX-NEXT: lfs f2, -20(r1) 1473; NOVSX-NEXT: fmadds f0, f1, f0, f2 1474; NOVSX-NEXT: lfs f1, -56(r1) 1475; NOVSX-NEXT: lfs f2, -24(r1) 1476; NOVSX-NEXT: stfs f0, -4(r1) 1477; NOVSX-NEXT: lfs f0, -40(r1) 1478; NOVSX-NEXT: fmadds f0, f1, f0, f2 1479; NOVSX-NEXT: lfs f1, -60(r1) 1480; NOVSX-NEXT: lfs f2, -28(r1) 1481; NOVSX-NEXT: stfs f0, -8(r1) 1482; NOVSX-NEXT: lfs f0, -44(r1) 1483; NOVSX-NEXT: fmadds f0, f1, f0, f2 1484; NOVSX-NEXT: lfs f1, -64(r1) 1485; NOVSX-NEXT: lfs f2, -32(r1) 1486; NOVSX-NEXT: stfs f0, -12(r1) 1487; NOVSX-NEXT: lfs f0, -48(r1) 1488; NOVSX-NEXT: fmadds f0, f1, f0, f2 1489; NOVSX-NEXT: stfs f0, -16(r1) 1490; NOVSX-NEXT: lvx v2, 0, r3 1491; NOVSX-NEXT: vxor v2, v2, v5 1492; NOVSX-NEXT: blr 1493; 1494; SPE-LABEL: fnmsub_v4f32: 1495; SPE: # %bb.0: 1496; SPE-NEXT: mflr r0 1497; SPE-NEXT: stwu r1, -64(r1) 1498; SPE-NEXT: stw r0, 68(r1) 1499; SPE-NEXT: .cfi_def_cfa_offset 64 1500; SPE-NEXT: .cfi_offset lr, 4 1501; SPE-NEXT: .cfi_offset r21, -44 1502; SPE-NEXT: .cfi_offset r22, -40 1503; SPE-NEXT: .cfi_offset r23, -36 1504; SPE-NEXT: .cfi_offset r24, -32 1505; SPE-NEXT: .cfi_offset r25, -28 1506; SPE-NEXT: .cfi_offset r26, -24 1507; SPE-NEXT: .cfi_offset r27, -20 1508; SPE-NEXT: .cfi_offset r28, -16 1509; SPE-NEXT: .cfi_offset r29, -12 1510; SPE-NEXT: .cfi_offset r30, -8 1511; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill 1512; SPE-NEXT: mr r25, r3 1513; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill 1514; SPE-NEXT: mr r26, r4 1515; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill 1516; SPE-NEXT: mr r27, r5 1517; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill 1518; SPE-NEXT: mr r28, r7 1519; SPE-NEXT: lwz r3, 80(r1) 1520; SPE-NEXT: lwz r4, 72(r1) 1521; SPE-NEXT: lwz r5, 76(r1) 1522; SPE-NEXT: lwz r7, 84(r1) 1523; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill 1524; SPE-NEXT: efsneg r22, r3 1525; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill 1526; SPE-NEXT: efsneg r23, r5 1527; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill 1528; SPE-NEXT: efsneg r24, r4 1529; SPE-NEXT: efsneg r5, r7 1530; SPE-NEXT: mr r3, r6 1531; SPE-NEXT: mr r4, r10 1532; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill 1533; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill 1534; SPE-NEXT: mr r29, r8 1535; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill 1536; SPE-NEXT: mr r30, r9 1537; SPE-NEXT: bl fmaf 1538; SPE-NEXT: mr r21, r3 1539; SPE-NEXT: mr r3, r27 1540; SPE-NEXT: mr r4, r30 1541; SPE-NEXT: mr r5, r22 1542; SPE-NEXT: bl fmaf 1543; SPE-NEXT: mr r30, r3 1544; SPE-NEXT: mr r3, r26 1545; SPE-NEXT: mr r4, r29 1546; SPE-NEXT: mr r5, r23 1547; SPE-NEXT: bl fmaf 1548; SPE-NEXT: mr r29, r3 1549; SPE-NEXT: mr r3, r25 1550; SPE-NEXT: mr r4, r28 1551; SPE-NEXT: mr r5, r24 1552; SPE-NEXT: bl fmaf 1553; SPE-NEXT: efsneg r4, r29 1554; SPE-NEXT: efsneg r5, r30 1555; SPE-NEXT: efsneg r3, r3 1556; SPE-NEXT: efsneg r6, r21 1557; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload 1558; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload 1559; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload 1560; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload 1561; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload 1562; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload 1563; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload 1564; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload 1565; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload 1566; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload 1567; SPE-NEXT: lwz r0, 68(r1) 1568; SPE-NEXT: addi r1, r1, 64 1569; SPE-NEXT: mtlr r0 1570; SPE-NEXT: blr 1571 %neg = fneg <4 x float> %vf2 1572 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( 1573 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg, 1574 metadata !"round.dynamic", 1575 metadata !"fpexcept.strict") #0 1576 %res = fneg <4 x float> %fma 1577 ret <4 x float> %res 1578} 1579 1580define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 { 1581; CHECK-LABEL: fnmsub_v2f64: 1582; CHECK: # %bb.0: 1583; CHECK-NEXT: xvnmsubadp v4, v2, v3 1584; CHECK-NEXT: vmr v2, v4 1585; CHECK-NEXT: blr 1586; 1587; NOVSX-LABEL: fnmsub_v2f64: 1588; NOVSX: # %bb.0: 1589; NOVSX-NEXT: fnmsub f2, f2, f4, f6 1590; NOVSX-NEXT: fnmsub f1, f1, f3, f5 1591; NOVSX-NEXT: blr 1592; 1593; SPE-LABEL: fnmsub_v2f64: 1594; SPE: # %bb.0: 1595; SPE-NEXT: mflr r0 1596; SPE-NEXT: stwu r1, -80(r1) 1597; SPE-NEXT: stw r0, 84(r1) 1598; SPE-NEXT: .cfi_def_cfa_offset 80 1599; SPE-NEXT: .cfi_offset lr, 4 1600; SPE-NEXT: .cfi_offset r26, -64 1601; SPE-NEXT: .cfi_offset r27, -56 1602; SPE-NEXT: .cfi_offset r28, -48 1603; SPE-NEXT: .cfi_offset r29, -40 1604; SPE-NEXT: .cfi_offset r30, -8 1605; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill 1606; SPE-NEXT: mr r30, r3 1607; SPE-NEXT: evldd r3, 96(r1) 1608; SPE-NEXT: evldd r11, 104(r1) 1609; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill 1610; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill 1611; SPE-NEXT: efdneg r27, r11 1612; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill 1613; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill 1614; SPE-NEXT: evmergelo r29, r7, r8 1615; SPE-NEXT: evmergelo r9, r9, r10 1616; SPE-NEXT: evmergelo r4, r5, r6 1617; SPE-NEXT: efdneg r8, r3 1618; SPE-NEXT: evmergehi r3, r4, r4 1619; SPE-NEXT: evmergehi r5, r9, r9 1620; SPE-NEXT: evmergehi r7, r8, r8 1621; SPE-NEXT: mr r6, r9 1622; SPE-NEXT: evldd r28, 88(r1) 1623; SPE-NEXT: bl fma 1624; SPE-NEXT: evmergelo r26, r3, r4 1625; SPE-NEXT: evmergehi r3, r29, r29 1626; SPE-NEXT: evmergehi r5, r28, r28 1627; SPE-NEXT: evmergehi r7, r27, r27 1628; SPE-NEXT: mr r4, r29 1629; SPE-NEXT: mr r6, r28 1630; SPE-NEXT: mr r8, r27 1631; SPE-NEXT: bl fma 1632; SPE-NEXT: evmergelo r3, r3, r4 1633; SPE-NEXT: li r5, 8 1634; SPE-NEXT: efdneg r3, r3 1635; SPE-NEXT: evstddx r3, r30, r5 1636; SPE-NEXT: efdneg r3, r26 1637; SPE-NEXT: evstdd r3, 0(r30) 1638; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload 1639; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload 1640; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload 1641; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload 1642; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload 1643; SPE-NEXT: lwz r0, 84(r1) 1644; SPE-NEXT: addi r1, r1, 80 1645; SPE-NEXT: mtlr r0 1646; SPE-NEXT: blr 1647 %neg = fneg <2 x double> %vf2 1648 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( 1649 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg, 1650 metadata !"round.dynamic", 1651 metadata !"fpexcept.strict") #0 1652 %res = fneg <2 x double> %fma 1653 ret <2 x double> %res 1654} 1655 1656define float @fsqrt_f32(float %f1) #0 { 1657; CHECK-LABEL: fsqrt_f32: 1658; CHECK: # %bb.0: 1659; CHECK-NEXT: xssqrtsp f1, f1 1660; CHECK-NEXT: blr 1661; 1662; NOVSX-LABEL: fsqrt_f32: 1663; NOVSX: # %bb.0: 1664; NOVSX-NEXT: fsqrts f1, f1 1665; NOVSX-NEXT: blr 1666; 1667; SPE-LABEL: fsqrt_f32: 1668; SPE: # %bb.0: 1669; SPE-NEXT: mflr r0 1670; SPE-NEXT: stwu r1, -16(r1) 1671; SPE-NEXT: stw r0, 20(r1) 1672; SPE-NEXT: .cfi_def_cfa_offset 16 1673; SPE-NEXT: .cfi_offset lr, 4 1674; SPE-NEXT: bl sqrtf 1675; SPE-NEXT: lwz r0, 20(r1) 1676; SPE-NEXT: addi r1, r1, 16 1677; SPE-NEXT: mtlr r0 1678; SPE-NEXT: blr 1679 %res = call float @llvm.experimental.constrained.sqrt.f32( 1680 float %f1, 1681 metadata !"round.dynamic", 1682 metadata !"fpexcept.strict") #0 1683 ret float %res 1684} 1685 1686define double @fsqrt_f64(double %f1) #0 { 1687; CHECK-LABEL: fsqrt_f64: 1688; CHECK: # %bb.0: 1689; CHECK-NEXT: xssqrtdp f1, f1 1690; CHECK-NEXT: blr 1691; 1692; NOVSX-LABEL: fsqrt_f64: 1693; NOVSX: # %bb.0: 1694; NOVSX-NEXT: fsqrt f1, f1 1695; NOVSX-NEXT: blr 1696; 1697; SPE-LABEL: fsqrt_f64: 1698; SPE: # %bb.0: 1699; SPE-NEXT: mflr r0 1700; SPE-NEXT: stwu r1, -16(r1) 1701; SPE-NEXT: stw r0, 20(r1) 1702; SPE-NEXT: .cfi_def_cfa_offset 16 1703; SPE-NEXT: .cfi_offset lr, 4 1704; SPE-NEXT: evmergelo r4, r3, r4 1705; SPE-NEXT: evmergehi r3, r4, r4 1706; SPE-NEXT: bl sqrt 1707; SPE-NEXT: evmergelo r4, r3, r4 1708; SPE-NEXT: evmergehi r3, r4, r4 1709; SPE-NEXT: lwz r0, 20(r1) 1710; SPE-NEXT: addi r1, r1, 16 1711; SPE-NEXT: mtlr r0 1712; SPE-NEXT: blr 1713 %res = call double @llvm.experimental.constrained.sqrt.f64( 1714 double %f1, 1715 metadata !"round.dynamic", 1716 metadata !"fpexcept.strict") #0 1717 ret double %res 1718} 1719 1720define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 { 1721; CHECK-LABEL: fsqrt_v4f32: 1722; CHECK: # %bb.0: 1723; CHECK-NEXT: xvsqrtsp v2, v2 1724; CHECK-NEXT: blr 1725; 1726; NOVSX-LABEL: fsqrt_v4f32: 1727; NOVSX: # %bb.0: 1728; NOVSX-NEXT: addi r3, r1, -32 1729; NOVSX-NEXT: stvx v2, 0, r3 1730; NOVSX-NEXT: addi r3, r1, -16 1731; NOVSX-NEXT: lfs f0, -20(r1) 1732; NOVSX-NEXT: fsqrts f0, f0 1733; NOVSX-NEXT: stfs f0, -4(r1) 1734; NOVSX-NEXT: lfs f0, -24(r1) 1735; NOVSX-NEXT: fsqrts f0, f0 1736; NOVSX-NEXT: stfs f0, -8(r1) 1737; NOVSX-NEXT: lfs f0, -28(r1) 1738; NOVSX-NEXT: fsqrts f0, f0 1739; NOVSX-NEXT: stfs f0, -12(r1) 1740; NOVSX-NEXT: lfs f0, -32(r1) 1741; NOVSX-NEXT: fsqrts f0, f0 1742; NOVSX-NEXT: stfs f0, -16(r1) 1743; NOVSX-NEXT: lvx v2, 0, r3 1744; NOVSX-NEXT: blr 1745; 1746; SPE-LABEL: fsqrt_v4f32: 1747; SPE: # %bb.0: 1748; SPE-NEXT: mflr r0 1749; SPE-NEXT: stwu r1, -32(r1) 1750; SPE-NEXT: stw r0, 36(r1) 1751; SPE-NEXT: .cfi_def_cfa_offset 32 1752; SPE-NEXT: .cfi_offset lr, 4 1753; SPE-NEXT: .cfi_offset r27, -20 1754; SPE-NEXT: .cfi_offset r28, -16 1755; SPE-NEXT: .cfi_offset r29, -12 1756; SPE-NEXT: .cfi_offset r30, -8 1757; SPE-NEXT: stw r28, 16(r1) # 4-byte Folded Spill 1758; SPE-NEXT: mr r28, r3 1759; SPE-NEXT: mr r3, r6 1760; SPE-NEXT: stw r27, 12(r1) # 4-byte Folded Spill 1761; SPE-NEXT: stw r29, 20(r1) # 4-byte Folded Spill 1762; SPE-NEXT: mr r29, r4 1763; SPE-NEXT: stw r30, 24(r1) # 4-byte Folded Spill 1764; SPE-NEXT: mr r30, r5 1765; SPE-NEXT: bl sqrtf 1766; SPE-NEXT: mr r27, r3 1767; SPE-NEXT: mr r3, r30 1768; SPE-NEXT: bl sqrtf 1769; SPE-NEXT: mr r30, r3 1770; SPE-NEXT: mr r3, r29 1771; SPE-NEXT: bl sqrtf 1772; SPE-NEXT: mr r29, r3 1773; SPE-NEXT: mr r3, r28 1774; SPE-NEXT: bl sqrtf 1775; SPE-NEXT: mr r4, r29 1776; SPE-NEXT: mr r5, r30 1777; SPE-NEXT: mr r6, r27 1778; SPE-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload 1779; SPE-NEXT: lwz r29, 20(r1) # 4-byte Folded Reload 1780; SPE-NEXT: lwz r28, 16(r1) # 4-byte Folded Reload 1781; SPE-NEXT: lwz r27, 12(r1) # 4-byte Folded Reload 1782; SPE-NEXT: lwz r0, 36(r1) 1783; SPE-NEXT: addi r1, r1, 32 1784; SPE-NEXT: mtlr r0 1785; SPE-NEXT: blr 1786 %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( 1787 <4 x float> %vf1, 1788 metadata !"round.dynamic", 1789 metadata !"fpexcept.strict") #0 1790 ret <4 x float> %res 1791} 1792 1793define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 { 1794; CHECK-LABEL: fsqrt_v2f64: 1795; CHECK: # %bb.0: 1796; CHECK-NEXT: xvsqrtdp v2, v2 1797; CHECK-NEXT: blr 1798; 1799; NOVSX-LABEL: fsqrt_v2f64: 1800; NOVSX: # %bb.0: 1801; NOVSX-NEXT: fsqrt f2, f2 1802; NOVSX-NEXT: fsqrt f1, f1 1803; NOVSX-NEXT: blr 1804; 1805; SPE-LABEL: fsqrt_v2f64: 1806; SPE: # %bb.0: 1807; SPE-NEXT: mflr r0 1808; SPE-NEXT: stwu r1, -64(r1) 1809; SPE-NEXT: stw r0, 68(r1) 1810; SPE-NEXT: .cfi_def_cfa_offset 64 1811; SPE-NEXT: .cfi_offset lr, 4 1812; SPE-NEXT: .cfi_offset r28, -48 1813; SPE-NEXT: .cfi_offset r29, -40 1814; SPE-NEXT: .cfi_offset r30, -8 1815; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill 1816; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill 1817; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill 1818; SPE-NEXT: evmergelo r29, r7, r8 1819; SPE-NEXT: evmergelo r4, r5, r6 1820; SPE-NEXT: mr r30, r3 1821; SPE-NEXT: evmergehi r3, r4, r4 1822; SPE-NEXT: bl sqrt 1823; SPE-NEXT: evmergelo r28, r3, r4 1824; SPE-NEXT: evmergehi r3, r29, r29 1825; SPE-NEXT: mr r4, r29 1826; SPE-NEXT: bl sqrt 1827; SPE-NEXT: li r5, 8 1828; SPE-NEXT: evmergelo r3, r3, r4 1829; SPE-NEXT: evstddx r3, r30, r5 1830; SPE-NEXT: evstdd r28, 0(r30) 1831; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload 1832; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload 1833; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload 1834; SPE-NEXT: lwz r0, 68(r1) 1835; SPE-NEXT: addi r1, r1, 64 1836; SPE-NEXT: mtlr r0 1837; SPE-NEXT: blr 1838 %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( 1839 <2 x double> %vf1, 1840 metadata !"round.dynamic", 1841 metadata !"fpexcept.strict") #0 1842 ret <2 x double> %res 1843} 1844 1845attributes #0 = { strictfp } 1846