1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ 3; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s 4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ 5; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \ 6; RUN: -check-prefix=CHECK-P8 7 8define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b, 9; CHECK-LABEL: qpFmadd: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: lxv v2, 0(r3) 12; CHECK-NEXT: lxv v3, 0(r4) 13; CHECK-NEXT: lxv v4, 0(r5) 14; CHECK-NEXT: xsmaddqp v4, v2, v3 15; CHECK-NEXT: stxv v4, 0(r6) 16; CHECK-NEXT: blr 17; 18; CHECK-P8-LABEL: qpFmadd: 19; CHECK-P8: # %bb.0: # %entry 20; CHECK-P8-NEXT: mflr r0 21; CHECK-P8-NEXT: stdu r1, -80(r1) 22; CHECK-P8-NEXT: std r0, 96(r1) 23; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 24; CHECK-P8-NEXT: .cfi_offset lr, 16 25; CHECK-P8-NEXT: .cfi_offset r30, -16 26; CHECK-P8-NEXT: .cfi_offset v31, -32 27; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 28; CHECK-P8-NEXT: li r7, 48 29; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill 30; CHECK-P8-NEXT: mr r30, r6 31; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill 32; CHECK-P8-NEXT: xxswapd v2, vs0 33; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 34; CHECK-P8-NEXT: xxswapd v3, vs0 35; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 36; CHECK-P8-NEXT: xxswapd v31, vs0 37; CHECK-P8-NEXT: bl __mulkf3 38; CHECK-P8-NEXT: nop 39; CHECK-P8-NEXT: vmr v3, v31 40; CHECK-P8-NEXT: bl __addkf3 41; CHECK-P8-NEXT: nop 42; CHECK-P8-NEXT: li r3, 48 43; CHECK-P8-NEXT: xxswapd vs0, v2 44; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload 45; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 46; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload 47; CHECK-P8-NEXT: addi r1, r1, 80 48; CHECK-P8-NEXT: ld r0, 16(r1) 49; CHECK-P8-NEXT: mtlr r0 50; CHECK-P8-NEXT: blr 51 ptr nocapture readonly %c, ptr nocapture %res) { 52entry: 53 %0 = load fp128, ptr %a, align 16 54 %1 = load fp128, ptr %b, align 16 55 %2 = load fp128, ptr %c, align 16 56 %madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2) 57 store fp128 %madd, ptr %res, align 16 58 ret void 59} 60declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128) 61 62; Function Attrs: norecurse nounwind 63define void @qpFmadd_02(ptr nocapture readonly %a, 64; CHECK-LABEL: qpFmadd_02: 65; CHECK: # %bb.0: # %entry 66; CHECK-NEXT: lxv v2, 0(r3) 67; CHECK-NEXT: lxv v3, 0(r4) 68; CHECK-NEXT: lxv v4, 0(r5) 69; CHECK-NEXT: xsmaddqp v2, v3, v4 70; CHECK-NEXT: stxv v2, 0(r6) 71; CHECK-NEXT: blr 72; 73; CHECK-P8-LABEL: qpFmadd_02: 74; CHECK-P8: # %bb.0: # %entry 75; CHECK-P8-NEXT: mflr r0 76; CHECK-P8-NEXT: stdu r1, -80(r1) 77; CHECK-P8-NEXT: std r0, 96(r1) 78; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 79; CHECK-P8-NEXT: .cfi_offset lr, 16 80; CHECK-P8-NEXT: .cfi_offset r30, -16 81; CHECK-P8-NEXT: .cfi_offset v31, -32 82; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 83; CHECK-P8-NEXT: li r7, 48 84; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill 85; CHECK-P8-NEXT: mr r30, r6 86; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill 87; CHECK-P8-NEXT: xxswapd v31, vs0 88; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 89; CHECK-P8-NEXT: xxswapd v2, vs0 90; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 91; CHECK-P8-NEXT: xxswapd v3, vs0 92; CHECK-P8-NEXT: bl __mulkf3 93; CHECK-P8-NEXT: nop 94; CHECK-P8-NEXT: vmr v3, v2 95; CHECK-P8-NEXT: vmr v2, v31 96; CHECK-P8-NEXT: bl __addkf3 97; CHECK-P8-NEXT: nop 98; CHECK-P8-NEXT: li r3, 48 99; CHECK-P8-NEXT: xxswapd vs0, v2 100; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload 101; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 102; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload 103; CHECK-P8-NEXT: addi r1, r1, 80 104; CHECK-P8-NEXT: ld r0, 16(r1) 105; CHECK-P8-NEXT: mtlr r0 106; CHECK-P8-NEXT: blr 107 ptr nocapture readonly %b, 108 ptr nocapture readonly %c, ptr nocapture %res) { 109entry: 110 %0 = load fp128, ptr %a, align 16 111 %1 = load fp128, ptr %b, align 16 112 %2 = load fp128, ptr %c, align 16 113 %mul = fmul contract fp128 %1, %2 114 %add = fadd contract fp128 %0, %mul 115 store fp128 %add, ptr %res, align 16 116 ret void 117} 118 119; Function Attrs: norecurse nounwind 120define void @qpFmadd_03(ptr nocapture readonly %a, 121; CHECK-LABEL: qpFmadd_03: 122; CHECK: # %bb.0: # %entry 123; CHECK-NEXT: lxv v2, 0(r3) 124; CHECK-NEXT: lxv v3, 0(r4) 125; CHECK-NEXT: lxv v4, 0(r5) 126; CHECK-NEXT: xsmaddqp v4, v2, v3 127; CHECK-NEXT: stxv v4, 0(r6) 128; CHECK-NEXT: blr 129; 130; CHECK-P8-LABEL: qpFmadd_03: 131; CHECK-P8: # %bb.0: # %entry 132; CHECK-P8-NEXT: mflr r0 133; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 134; CHECK-P8-NEXT: .cfi_offset lr, 16 135; CHECK-P8-NEXT: .cfi_offset r29, -24 136; CHECK-P8-NEXT: .cfi_offset r30, -16 137; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill 138; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 139; CHECK-P8-NEXT: stdu r1, -64(r1) 140; CHECK-P8-NEXT: std r0, 80(r1) 141; CHECK-P8-NEXT: mr r30, r6 142; CHECK-P8-NEXT: mr r29, r5 143; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 144; CHECK-P8-NEXT: xxswapd v2, vs0 145; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 146; CHECK-P8-NEXT: xxswapd v3, vs0 147; CHECK-P8-NEXT: bl __mulkf3 148; CHECK-P8-NEXT: nop 149; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 150; CHECK-P8-NEXT: xxswapd v3, vs0 151; CHECK-P8-NEXT: bl __addkf3 152; CHECK-P8-NEXT: nop 153; CHECK-P8-NEXT: xxswapd vs0, v2 154; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 155; CHECK-P8-NEXT: addi r1, r1, 64 156; CHECK-P8-NEXT: ld r0, 16(r1) 157; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 158; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 159; CHECK-P8-NEXT: mtlr r0 160; CHECK-P8-NEXT: blr 161 ptr nocapture readonly %b, 162 ptr nocapture readonly %c, ptr nocapture %res) { 163entry: 164 %0 = load fp128, ptr %a, align 16 165 %1 = load fp128, ptr %b, align 16 166 %mul = fmul contract fp128 %0, %1 167 %2 = load fp128, ptr %c, align 16 168 %add = fadd contract fp128 %mul, %2 169 store fp128 %add, ptr %res, align 16 170 ret void 171} 172 173; Function Attrs: norecurse nounwind 174define void @qpFnmadd(ptr nocapture readonly %a, 175; CHECK-LABEL: qpFnmadd: 176; CHECK: # %bb.0: # %entry 177; CHECK-NEXT: lxv v2, 0(r3) 178; CHECK-NEXT: lxv v3, 0(r4) 179; CHECK-NEXT: lxv v4, 0(r5) 180; CHECK-NEXT: xsnmaddqp v2, v3, v4 181; CHECK-NEXT: stxv v2, 0(r6) 182; CHECK-NEXT: blr 183; 184; CHECK-P8-LABEL: qpFnmadd: 185; CHECK-P8: # %bb.0: # %entry 186; CHECK-P8-NEXT: mflr r0 187; CHECK-P8-NEXT: stdu r1, -96(r1) 188; CHECK-P8-NEXT: std r0, 112(r1) 189; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 190; CHECK-P8-NEXT: .cfi_offset lr, 16 191; CHECK-P8-NEXT: .cfi_offset r30, -16 192; CHECK-P8-NEXT: .cfi_offset v31, -32 193; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 194; CHECK-P8-NEXT: li r7, 64 195; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill 196; CHECK-P8-NEXT: mr r30, r6 197; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill 198; CHECK-P8-NEXT: xxswapd v31, vs0 199; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 200; CHECK-P8-NEXT: xxswapd v2, vs0 201; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 202; CHECK-P8-NEXT: xxswapd v3, vs0 203; CHECK-P8-NEXT: bl __mulkf3 204; CHECK-P8-NEXT: nop 205; CHECK-P8-NEXT: vmr v3, v2 206; CHECK-P8-NEXT: vmr v2, v31 207; CHECK-P8-NEXT: bl __addkf3 208; CHECK-P8-NEXT: nop 209; CHECK-P8-NEXT: xxswapd vs0, v2 210; CHECK-P8-NEXT: addi r3, r1, 48 211; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 212; CHECK-P8-NEXT: lbz r4, 63(r1) 213; CHECK-P8-NEXT: xori r4, r4, 128 214; CHECK-P8-NEXT: stb r4, 63(r1) 215; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 216; CHECK-P8-NEXT: li r3, 64 217; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload 218; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 219; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload 220; CHECK-P8-NEXT: addi r1, r1, 96 221; CHECK-P8-NEXT: ld r0, 16(r1) 222; CHECK-P8-NEXT: mtlr r0 223; CHECK-P8-NEXT: blr 224 ptr nocapture readonly %b, 225 ptr nocapture readonly %c, ptr nocapture %res) { 226entry: 227 %0 = load fp128, ptr %a, align 16 228 %1 = load fp128, ptr %b, align 16 229 %2 = load fp128, ptr %c, align 16 230 %mul = fmul contract fp128 %1, %2 231 %add = fadd contract fp128 %0, %mul 232 %sub = fsub fp128 0xL00000000000000008000000000000000, %add 233 store fp128 %sub, ptr %res, align 16 234 ret void 235} 236 237; Function Attrs: norecurse nounwind 238define void @qpFnmadd_02(ptr nocapture readonly %a, 239; CHECK-LABEL: qpFnmadd_02: 240; CHECK: # %bb.0: # %entry 241; CHECK-NEXT: lxv v2, 0(r3) 242; CHECK-NEXT: lxv v3, 0(r4) 243; CHECK-NEXT: lxv v4, 0(r5) 244; CHECK-NEXT: xsnmaddqp v4, v2, v3 245; CHECK-NEXT: stxv v4, 0(r6) 246; CHECK-NEXT: blr 247; 248; CHECK-P8-LABEL: qpFnmadd_02: 249; CHECK-P8: # %bb.0: # %entry 250; CHECK-P8-NEXT: mflr r0 251; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 252; CHECK-P8-NEXT: .cfi_offset lr, 16 253; CHECK-P8-NEXT: .cfi_offset r29, -24 254; CHECK-P8-NEXT: .cfi_offset r30, -16 255; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill 256; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 257; CHECK-P8-NEXT: stdu r1, -80(r1) 258; CHECK-P8-NEXT: std r0, 96(r1) 259; CHECK-P8-NEXT: mr r30, r6 260; CHECK-P8-NEXT: mr r29, r5 261; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 262; CHECK-P8-NEXT: xxswapd v2, vs0 263; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 264; CHECK-P8-NEXT: xxswapd v3, vs0 265; CHECK-P8-NEXT: bl __mulkf3 266; CHECK-P8-NEXT: nop 267; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 268; CHECK-P8-NEXT: xxswapd v3, vs0 269; CHECK-P8-NEXT: bl __addkf3 270; CHECK-P8-NEXT: nop 271; CHECK-P8-NEXT: xxswapd vs0, v2 272; CHECK-P8-NEXT: addi r3, r1, 32 273; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 274; CHECK-P8-NEXT: lbz r4, 47(r1) 275; CHECK-P8-NEXT: xori r4, r4, 128 276; CHECK-P8-NEXT: stb r4, 47(r1) 277; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 278; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 279; CHECK-P8-NEXT: addi r1, r1, 80 280; CHECK-P8-NEXT: ld r0, 16(r1) 281; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 282; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 283; CHECK-P8-NEXT: mtlr r0 284; CHECK-P8-NEXT: blr 285 ptr nocapture readonly %b, 286 ptr nocapture readonly %c, ptr nocapture %res) { 287entry: 288 %0 = load fp128, ptr %a, align 16 289 %1 = load fp128, ptr %b, align 16 290 %mul = fmul contract fp128 %0, %1 291 %2 = load fp128, ptr %c, align 16 292 %add = fadd contract fp128 %mul, %2 293 %sub = fsub fp128 0xL00000000000000008000000000000000, %add 294 store fp128 %sub, ptr %res, align 16 295 ret void 296} 297 298; Function Attrs: norecurse nounwind 299define void @qpFmsub(ptr nocapture readonly %a, 300; CHECK-LABEL: qpFmsub: 301; CHECK: # %bb.0: # %entry 302; CHECK-NEXT: lxv v2, 0(r3) 303; CHECK-NEXT: lxv v3, 0(r4) 304; CHECK-NEXT: lxv v4, 0(r5) 305; CHECK-NEXT: xsnmsubqp v2, v3, v4 306; CHECK-NEXT: stxv v2, 0(r6) 307; CHECK-NEXT: blr 308; 309; CHECK-P8-LABEL: qpFmsub: 310; CHECK-P8: # %bb.0: # %entry 311; CHECK-P8-NEXT: mflr r0 312; CHECK-P8-NEXT: stdu r1, -80(r1) 313; CHECK-P8-NEXT: std r0, 96(r1) 314; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 315; CHECK-P8-NEXT: .cfi_offset lr, 16 316; CHECK-P8-NEXT: .cfi_offset r30, -16 317; CHECK-P8-NEXT: .cfi_offset v31, -32 318; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 319; CHECK-P8-NEXT: li r7, 48 320; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill 321; CHECK-P8-NEXT: mr r30, r6 322; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill 323; CHECK-P8-NEXT: xxswapd v31, vs0 324; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 325; CHECK-P8-NEXT: xxswapd v2, vs0 326; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 327; CHECK-P8-NEXT: xxswapd v3, vs0 328; CHECK-P8-NEXT: bl __mulkf3 329; CHECK-P8-NEXT: nop 330; CHECK-P8-NEXT: vmr v3, v2 331; CHECK-P8-NEXT: vmr v2, v31 332; CHECK-P8-NEXT: bl __subkf3 333; CHECK-P8-NEXT: nop 334; CHECK-P8-NEXT: li r3, 48 335; CHECK-P8-NEXT: xxswapd vs0, v2 336; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload 337; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 338; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload 339; CHECK-P8-NEXT: addi r1, r1, 80 340; CHECK-P8-NEXT: ld r0, 16(r1) 341; CHECK-P8-NEXT: mtlr r0 342; CHECK-P8-NEXT: blr 343 ptr nocapture readonly %b, 344 ptr nocapture readonly %c, ptr nocapture %res) { 345entry: 346 %0 = load fp128, ptr %a, align 16 347 %1 = load fp128, ptr %b, align 16 348 %2 = load fp128, ptr %c, align 16 349 %mul = fmul contract fp128 %1, %2 350 %sub = fsub contract nsz fp128 %0, %mul 351 store fp128 %sub, ptr %res, align 16 352 ret void 353} 354 355; Function Attrs: norecurse nounwind 356define void @qpFmsub_02(ptr nocapture readonly %a, 357; CHECK-LABEL: qpFmsub_02: 358; CHECK: # %bb.0: # %entry 359; CHECK-NEXT: lxv v2, 0(r3) 360; CHECK-NEXT: lxv v3, 0(r4) 361; CHECK-NEXT: lxv v4, 0(r5) 362; CHECK-NEXT: xsmsubqp v4, v2, v3 363; CHECK-NEXT: stxv v4, 0(r6) 364; CHECK-NEXT: blr 365; 366; CHECK-P8-LABEL: qpFmsub_02: 367; CHECK-P8: # %bb.0: # %entry 368; CHECK-P8-NEXT: mflr r0 369; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 370; CHECK-P8-NEXT: .cfi_offset lr, 16 371; CHECK-P8-NEXT: .cfi_offset r29, -24 372; CHECK-P8-NEXT: .cfi_offset r30, -16 373; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill 374; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 375; CHECK-P8-NEXT: stdu r1, -64(r1) 376; CHECK-P8-NEXT: std r0, 80(r1) 377; CHECK-P8-NEXT: mr r30, r6 378; CHECK-P8-NEXT: mr r29, r5 379; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 380; CHECK-P8-NEXT: xxswapd v2, vs0 381; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 382; CHECK-P8-NEXT: xxswapd v3, vs0 383; CHECK-P8-NEXT: bl __mulkf3 384; CHECK-P8-NEXT: nop 385; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 386; CHECK-P8-NEXT: xxswapd v3, vs0 387; CHECK-P8-NEXT: bl __subkf3 388; CHECK-P8-NEXT: nop 389; CHECK-P8-NEXT: xxswapd vs0, v2 390; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 391; CHECK-P8-NEXT: addi r1, r1, 64 392; CHECK-P8-NEXT: ld r0, 16(r1) 393; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 394; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 395; CHECK-P8-NEXT: mtlr r0 396; CHECK-P8-NEXT: blr 397 ptr nocapture readonly %b, 398 ptr nocapture readonly %c, ptr nocapture %res) { 399entry: 400 %0 = load fp128, ptr %a, align 16 401 %1 = load fp128, ptr %b, align 16 402 %mul = fmul contract fp128 %0, %1 403 %2 = load fp128, ptr %c, align 16 404 %sub = fsub contract fp128 %mul, %2 405 store fp128 %sub, ptr %res, align 16 406 ret void 407} 408 409; Function Attrs: norecurse nounwind 410define void @qpFnmsub(ptr nocapture readonly %a, 411; CHECK-LABEL: qpFnmsub: 412; CHECK: # %bb.0: # %entry 413; CHECK-NEXT: lxv v3, 0(r4) 414; CHECK-NEXT: lxv v2, 0(r3) 415; CHECK-NEXT: lxv v4, 0(r5) 416; CHECK-NEXT: xsnegqp v3, v3 417; CHECK-NEXT: xsnmaddqp v2, v3, v4 418; CHECK-NEXT: stxv v2, 0(r6) 419; CHECK-NEXT: blr 420; 421; CHECK-P8-LABEL: qpFnmsub: 422; CHECK-P8: # %bb.0: # %entry 423; CHECK-P8-NEXT: mflr r0 424; CHECK-P8-NEXT: stdu r1, -96(r1) 425; CHECK-P8-NEXT: std r0, 112(r1) 426; CHECK-P8-NEXT: .cfi_def_cfa_offset 96 427; CHECK-P8-NEXT: .cfi_offset lr, 16 428; CHECK-P8-NEXT: .cfi_offset r30, -16 429; CHECK-P8-NEXT: .cfi_offset v31, -32 430; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 431; CHECK-P8-NEXT: li r7, 64 432; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill 433; CHECK-P8-NEXT: mr r30, r6 434; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill 435; CHECK-P8-NEXT: xxswapd v31, vs0 436; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 437; CHECK-P8-NEXT: xxswapd v2, vs0 438; CHECK-P8-NEXT: lxvd2x vs0, 0, r5 439; CHECK-P8-NEXT: xxswapd v3, vs0 440; CHECK-P8-NEXT: bl __mulkf3 441; CHECK-P8-NEXT: nop 442; CHECK-P8-NEXT: vmr v3, v2 443; CHECK-P8-NEXT: vmr v2, v31 444; CHECK-P8-NEXT: bl __subkf3 445; CHECK-P8-NEXT: nop 446; CHECK-P8-NEXT: xxswapd vs0, v2 447; CHECK-P8-NEXT: addi r3, r1, 48 448; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 449; CHECK-P8-NEXT: lbz r4, 63(r1) 450; CHECK-P8-NEXT: xori r4, r4, 128 451; CHECK-P8-NEXT: stb r4, 63(r1) 452; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 453; CHECK-P8-NEXT: li r3, 64 454; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload 455; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 456; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload 457; CHECK-P8-NEXT: addi r1, r1, 96 458; CHECK-P8-NEXT: ld r0, 16(r1) 459; CHECK-P8-NEXT: mtlr r0 460; CHECK-P8-NEXT: blr 461 ptr nocapture readonly %b, 462 ptr nocapture readonly %c, ptr nocapture %res) { 463entry: 464 %0 = load fp128, ptr %a, align 16 465 %1 = load fp128, ptr %b, align 16 466 %2 = load fp128, ptr %c, align 16 467 %mul = fmul contract fp128 %1, %2 468 %sub = fsub contract fp128 %0, %mul 469 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub 470 store fp128 %sub1, ptr %res, align 16 471 ret void 472} 473 474; Function Attrs: norecurse nounwind 475define void @qpFnmsub_02(ptr nocapture readonly %a, 476; CHECK-LABEL: qpFnmsub_02: 477; CHECK: # %bb.0: # %entry 478; CHECK-NEXT: lxv v2, 0(r3) 479; CHECK-NEXT: lxv v3, 0(r4) 480; CHECK-NEXT: lxv v4, 0(r5) 481; CHECK-NEXT: xsnmsubqp v4, v2, v3 482; CHECK-NEXT: stxv v4, 0(r6) 483; CHECK-NEXT: blr 484; 485; CHECK-P8-LABEL: qpFnmsub_02: 486; CHECK-P8: # %bb.0: # %entry 487; CHECK-P8-NEXT: mflr r0 488; CHECK-P8-NEXT: .cfi_def_cfa_offset 80 489; CHECK-P8-NEXT: .cfi_offset lr, 16 490; CHECK-P8-NEXT: .cfi_offset r29, -24 491; CHECK-P8-NEXT: .cfi_offset r30, -16 492; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill 493; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 494; CHECK-P8-NEXT: stdu r1, -80(r1) 495; CHECK-P8-NEXT: std r0, 96(r1) 496; CHECK-P8-NEXT: mr r30, r6 497; CHECK-P8-NEXT: mr r29, r5 498; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 499; CHECK-P8-NEXT: xxswapd v2, vs0 500; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 501; CHECK-P8-NEXT: xxswapd v3, vs0 502; CHECK-P8-NEXT: bl __mulkf3 503; CHECK-P8-NEXT: nop 504; CHECK-P8-NEXT: lxvd2x vs0, 0, r29 505; CHECK-P8-NEXT: xxswapd v3, vs0 506; CHECK-P8-NEXT: bl __subkf3 507; CHECK-P8-NEXT: nop 508; CHECK-P8-NEXT: xxswapd vs0, v2 509; CHECK-P8-NEXT: addi r3, r1, 32 510; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 511; CHECK-P8-NEXT: lbz r4, 47(r1) 512; CHECK-P8-NEXT: xori r4, r4, 128 513; CHECK-P8-NEXT: stb r4, 47(r1) 514; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 515; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 516; CHECK-P8-NEXT: addi r1, r1, 80 517; CHECK-P8-NEXT: ld r0, 16(r1) 518; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 519; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 520; CHECK-P8-NEXT: mtlr r0 521; CHECK-P8-NEXT: blr 522 ptr nocapture readonly %b, 523 ptr nocapture readonly %c, ptr nocapture %res) { 524entry: 525 %0 = load fp128, ptr %a, align 16 526 %1 = load fp128, ptr %b, align 16 527 %mul = fmul contract fp128 %0, %1 528 %2 = load fp128, ptr %c, align 16 529 %sub = fsub contract fp128 %mul, %2 530 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub 531 store fp128 %sub1, ptr %res, align 16 532 ret void 533} 534