1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ 4; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s 5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ 7; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE 8 9; assemble_acc 10declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) 11define void @ass_acc(ptr %ptr, <16 x i8> %vc) { 12; CHECK-LABEL: ass_acc: 13; CHECK: # %bb.0: # %entry 14; CHECK-NEXT: xxlor vs3, v2, v2 15; CHECK-NEXT: xxlor vs2, v2, v2 16; CHECK-NEXT: xxlor vs0, vs2, vs2 17; CHECK-NEXT: xxlor vs1, vs3, vs3 18; CHECK-NEXT: stxv vs0, 48(r3) 19; CHECK-NEXT: stxv vs1, 32(r3) 20; CHECK-NEXT: stxv vs2, 16(r3) 21; CHECK-NEXT: stxv vs3, 0(r3) 22; CHECK-NEXT: blr 23; 24; CHECK-BE-LABEL: ass_acc: 25; CHECK-BE: # %bb.0: # %entry 26; CHECK-BE-NEXT: xxlor vs3, v2, v2 27; CHECK-BE-NEXT: xxlor vs2, v2, v2 28; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 29; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 30; CHECK-BE-NEXT: stxv vs1, 16(r3) 31; CHECK-BE-NEXT: stxv vs0, 0(r3) 32; CHECK-BE-NEXT: stxv vs3, 48(r3) 33; CHECK-BE-NEXT: stxv vs2, 32(r3) 34; CHECK-BE-NEXT: blr 35entry: 36 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 37 store <512 x i1> %0, ptr %ptr, align 64 38 ret void 39} 40 41; xxmtacc 42declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) 43define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) { 44; CHECK-LABEL: int_xxmtacc: 45; CHECK: # %bb.0: # %entry 46; CHECK-NEXT: xxlor vs3, v2, v2 47; CHECK-NEXT: xxlor vs2, v2, v2 48; CHECK-NEXT: xxlor vs0, vs2, vs2 49; CHECK-NEXT: xxlor vs1, vs3, vs3 50; CHECK-NEXT: xxmtacc acc0 51; CHECK-NEXT: stxv vs0, 48(r3) 52; CHECK-NEXT: stxv vs1, 32(r3) 53; CHECK-NEXT: stxv vs2, 16(r3) 54; CHECK-NEXT: stxv vs3, 0(r3) 55; CHECK-NEXT: blr 56; 57; CHECK-BE-LABEL: int_xxmtacc: 58; CHECK-BE: # %bb.0: # %entry 59; CHECK-BE-NEXT: xxlor vs3, v2, v2 60; CHECK-BE-NEXT: xxlor vs2, v2, v2 61; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 62; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 63; CHECK-BE-NEXT: xxmtacc acc0 64; CHECK-BE-NEXT: stxv vs1, 16(r3) 65; CHECK-BE-NEXT: stxv vs0, 0(r3) 66; CHECK-BE-NEXT: stxv vs3, 48(r3) 67; CHECK-BE-NEXT: stxv vs2, 32(r3) 68; CHECK-BE-NEXT: blr 69entry: 70; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is 71; generated from the call to xxmtacc then one xxmfacc is generated for the store 72 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 73 %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0) 74 store <512 x i1> %1, ptr %ptr, align 64 75 ret void 76} 77 78; xxmfacc 79declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) 80define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) { 81; CHECK-LABEL: int_xxmfacc: 82; CHECK: # %bb.0: # %entry 83; CHECK-NEXT: xxlor vs3, v2, v2 84; CHECK-NEXT: xxlor vs2, v2, v2 85; CHECK-NEXT: xxlor vs0, vs2, vs2 86; CHECK-NEXT: xxlor vs1, vs3, vs3 87; CHECK-NEXT: stxv vs0, 48(r3) 88; CHECK-NEXT: stxv vs1, 32(r3) 89; CHECK-NEXT: stxv vs2, 16(r3) 90; CHECK-NEXT: stxv vs3, 0(r3) 91; CHECK-NEXT: blr 92; 93; CHECK-BE-LABEL: int_xxmfacc: 94; CHECK-BE: # %bb.0: # %entry 95; CHECK-BE-NEXT: xxlor vs3, v2, v2 96; CHECK-BE-NEXT: xxlor vs2, v2, v2 97; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 98; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 99; CHECK-BE-NEXT: stxv vs1, 16(r3) 100; CHECK-BE-NEXT: stxv vs0, 0(r3) 101; CHECK-BE-NEXT: stxv vs3, 48(r3) 102; CHECK-BE-NEXT: stxv vs2, 32(r3) 103; CHECK-BE-NEXT: blr 104entry: 105; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is 106; generated from the call to xxmfacc then one xxmfacc is generated for the store 107 %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) 108 %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0) 109 store <512 x i1> %1, ptr %ptr, align 64 110 ret void 111} 112 113; xxsetaccz 114declare <512 x i1> @llvm.ppc.mma.xxsetaccz() 115define void @int_xxsetaccz(ptr %ptr) { 116; CHECK-LABEL: int_xxsetaccz: 117; CHECK: # %bb.0: # %entry 118; CHECK-NEXT: xxsetaccz acc0 119; CHECK-NEXT: xxmfacc acc0 120; CHECK-NEXT: stxv vs0, 48(r3) 121; CHECK-NEXT: stxv vs1, 32(r3) 122; CHECK-NEXT: stxv vs2, 16(r3) 123; CHECK-NEXT: stxv vs3, 0(r3) 124; CHECK-NEXT: blr 125; 126; CHECK-BE-LABEL: int_xxsetaccz: 127; CHECK-BE: # %bb.0: # %entry 128; CHECK-BE-NEXT: xxsetaccz acc0 129; CHECK-BE-NEXT: xxmfacc acc0 130; CHECK-BE-NEXT: stxv vs1, 16(r3) 131; CHECK-BE-NEXT: stxv vs0, 0(r3) 132; CHECK-BE-NEXT: stxv vs3, 48(r3) 133; CHECK-BE-NEXT: stxv vs2, 32(r3) 134; CHECK-BE-NEXT: blr 135entry: 136 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 137 store <512 x i1> %0, ptr %ptr, align 64 138 ret void 139} 140 141; disassemble_acc 142declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>) 143define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { 144; CHECK-LABEL: disass_acc: 145; CHECK: # %bb.0: # %entry 146; CHECK-NEXT: xxsetaccz acc0 147; CHECK-NEXT: xxmfacc acc0 148; CHECK-NEXT: stxv vs3, 0(r3) 149; CHECK-NEXT: stxv vs2, 0(r4) 150; CHECK-NEXT: stxv vs1, 0(r5) 151; CHECK-NEXT: stxv vs0, 0(r6) 152; CHECK-NEXT: blr 153; 154; CHECK-BE-LABEL: disass_acc: 155; CHECK-BE: # %bb.0: # %entry 156; CHECK-BE-NEXT: xxsetaccz acc0 157; CHECK-BE-NEXT: xxmfacc acc0 158; CHECK-BE-NEXT: stxv vs0, 0(r3) 159; CHECK-BE-NEXT: stxv vs1, 0(r4) 160; CHECK-BE-NEXT: stxv vs2, 0(r5) 161; CHECK-BE-NEXT: stxv vs3, 0(r6) 162; CHECK-BE-NEXT: blr 163entry: 164 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 165 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) 166 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0 167 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1 168 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2 169 %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3 170 store <16 x i8> %2, ptr %ptr1, align 16 171 store <16 x i8> %3, ptr %ptr2, align 16 172 store <16 x i8> %4, ptr %ptr3, align 16 173 store <16 x i8> %5, ptr %ptr4, align 16 174 ret void 175} 176 177declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) 178define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { 179; CHECK-LABEL: testBranch: 180; CHECK: # %bb.0: # %entry 181; CHECK-NEXT: cmplwi r7, 0 182; CHECK-NEXT: beq cr0, .LBB5_2 183; CHECK-NEXT: # %bb.1: # %if.then 184; CHECK-NEXT: xxsetaccz acc0 185; CHECK-NEXT: b .LBB5_3 186; CHECK-NEXT: .LBB5_2: # %if.else 187; CHECK-NEXT: lxv vs1, 32(r3) 188; CHECK-NEXT: lxv vs0, 48(r3) 189; CHECK-NEXT: lxv vs3, 0(r3) 190; CHECK-NEXT: lxv vs2, 16(r3) 191; CHECK-NEXT: xxmtacc acc0 192; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 193; CHECK-NEXT: .LBB5_3: # %if.end 194; CHECK-NEXT: xxmfacc acc0 195; CHECK-NEXT: stxv vs0, 48(r3) 196; CHECK-NEXT: stxv vs1, 32(r3) 197; CHECK-NEXT: stxv vs2, 16(r3) 198; CHECK-NEXT: stxv vs3, 0(r3) 199; CHECK-NEXT: blr 200; 201; CHECK-BE-LABEL: testBranch: 202; CHECK-BE: # %bb.0: # %entry 203; CHECK-BE-NEXT: cmplwi r7, 0 204; CHECK-BE-NEXT: beq cr0, .LBB5_2 205; CHECK-BE-NEXT: # %bb.1: # %if.then 206; CHECK-BE-NEXT: xxsetaccz acc0 207; CHECK-BE-NEXT: b .LBB5_3 208; CHECK-BE-NEXT: .LBB5_2: # %if.else 209; CHECK-BE-NEXT: lxv vs1, 16(r3) 210; CHECK-BE-NEXT: lxv vs0, 0(r3) 211; CHECK-BE-NEXT: lxv vs3, 48(r3) 212; CHECK-BE-NEXT: lxv vs2, 32(r3) 213; CHECK-BE-NEXT: xxmtacc acc0 214; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 215; CHECK-BE-NEXT: .LBB5_3: # %if.end 216; CHECK-BE-NEXT: xxmfacc acc0 217; CHECK-BE-NEXT: stxv vs1, 16(r3) 218; CHECK-BE-NEXT: stxv vs0, 0(r3) 219; CHECK-BE-NEXT: stxv vs3, 48(r3) 220; CHECK-BE-NEXT: stxv vs2, 32(r3) 221; CHECK-BE-NEXT: blr 222entry: 223 %tobool = icmp eq i32 %val, 0 224 br i1 %tobool, label %if.else, label %if.then 225 226if.then: 227 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 228 br label %if.end 229 230if.else: 231 %1 = load <512 x i1>, ptr %ptr, align 64 232 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 233 br label %if.end 234 235if.end: 236 %vq1.0 = phi <512 x i1> [ %0, %if.then ], [ %2, %if.else ] 237 store <512 x i1> %vq1.0, ptr %ptr, align 64 238 ret void 239} 240 241; The following test cases check that the xxsetaccz instruction is correctly rematerialized 242declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>) 243declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) 244declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) 245 246define void @testcse(ptr %res, <16 x i8> %vc) { 247; CHECK-LABEL: testcse: 248; CHECK: # %bb.0: # %entry 249; CHECK-NEXT: xxsetaccz acc0 250; CHECK-NEXT: xvf32gerpp acc0, v2, v2 251; CHECK-NEXT: xxmfacc acc0 252; CHECK-NEXT: stxv vs0, 48(r3) 253; CHECK-NEXT: stxv vs1, 32(r3) 254; CHECK-NEXT: stxv vs2, 16(r3) 255; CHECK-NEXT: stxv vs3, 0(r3) 256; CHECK-NEXT: stxv vs0, 112(r3) 257; CHECK-NEXT: stxv vs1, 96(r3) 258; CHECK-NEXT: stxv vs2, 80(r3) 259; CHECK-NEXT: stxv vs3, 64(r3) 260; CHECK-NEXT: blr 261; 262; CHECK-BE-LABEL: testcse: 263; CHECK-BE: # %bb.0: # %entry 264; CHECK-BE-NEXT: xxsetaccz acc0 265; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 266; CHECK-BE-NEXT: xxmfacc acc0 267; CHECK-BE-NEXT: stxv vs1, 16(r3) 268; CHECK-BE-NEXT: stxv vs0, 0(r3) 269; CHECK-BE-NEXT: stxv vs3, 48(r3) 270; CHECK-BE-NEXT: stxv vs2, 32(r3) 271; CHECK-BE-NEXT: stxv vs1, 80(r3) 272; CHECK-BE-NEXT: stxv vs0, 64(r3) 273; CHECK-BE-NEXT: stxv vs3, 112(r3) 274; CHECK-BE-NEXT: stxv vs2, 96(r3) 275; CHECK-BE-NEXT: blr 276entry: 277 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 278 %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 279 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 280 %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 281 %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 282 store <512 x i1> %2, ptr %res, align 64 283 store <512 x i1> %3, ptr %4, align 64 284 ret void 285} 286 287define void @testcse2(ptr %res, <16 x i8> %vc) { 288; CHECK-LABEL: testcse2: 289; CHECK: # %bb.0: # %entry 290; CHECK-NEXT: xxsetaccz acc0 291; CHECK-NEXT: xxsetaccz acc1 292; CHECK-NEXT: xvf32gerpp acc1, v2, v2 293; CHECK-NEXT: xvf32gerpn acc0, v2, v2 294; CHECK-NEXT: xxmfacc acc1 295; CHECK-NEXT: xxmfacc acc0 296; CHECK-NEXT: stxv vs4, 48(r3) 297; CHECK-NEXT: stxv vs5, 32(r3) 298; CHECK-NEXT: stxv vs6, 16(r3) 299; CHECK-NEXT: stxv vs7, 0(r3) 300; CHECK-NEXT: stxv vs0, 112(r3) 301; CHECK-NEXT: stxv vs1, 96(r3) 302; CHECK-NEXT: stxv vs2, 80(r3) 303; CHECK-NEXT: stxv vs3, 64(r3) 304; CHECK-NEXT: blr 305; 306; CHECK-BE-LABEL: testcse2: 307; CHECK-BE: # %bb.0: # %entry 308; CHECK-BE-NEXT: xxsetaccz acc0 309; CHECK-BE-NEXT: xxsetaccz acc1 310; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 311; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 312; CHECK-BE-NEXT: xxmfacc acc1 313; CHECK-BE-NEXT: xxmfacc acc0 314; CHECK-BE-NEXT: stxv vs5, 16(r3) 315; CHECK-BE-NEXT: stxv vs4, 0(r3) 316; CHECK-BE-NEXT: stxv vs7, 48(r3) 317; CHECK-BE-NEXT: stxv vs6, 32(r3) 318; CHECK-BE-NEXT: stxv vs1, 80(r3) 319; CHECK-BE-NEXT: stxv vs0, 64(r3) 320; CHECK-BE-NEXT: stxv vs3, 112(r3) 321; CHECK-BE-NEXT: stxv vs2, 96(r3) 322; CHECK-BE-NEXT: blr 323entry: 324 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 325 %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 326 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 327 %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc) 328 %4 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 329 store <512 x i1> %2, ptr %res, align 64 330 store <512 x i1> %3, ptr %4, align 64 331 ret void 332} 333 334define void @testcse3(ptr %res, <16 x i8> %vc) { 335; CHECK-LABEL: testcse3: 336; CHECK: # %bb.0: # %entry 337; CHECK-NEXT: xxsetaccz acc0 338; CHECK-NEXT: xxsetaccz acc1 339; CHECK-NEXT: xvf32gerpp acc1, v2, v2 340; CHECK-NEXT: xvf32gerpn acc0, v2, v2 341; CHECK-NEXT: xxmfacc acc1 342; CHECK-NEXT: xxmfacc acc0 343; CHECK-NEXT: stxv vs4, 48(r3) 344; CHECK-NEXT: stxv vs5, 32(r3) 345; CHECK-NEXT: stxv vs6, 16(r3) 346; CHECK-NEXT: stxv vs7, 0(r3) 347; CHECK-NEXT: stxv vs0, 112(r3) 348; CHECK-NEXT: stxv vs1, 96(r3) 349; CHECK-NEXT: stxv vs2, 80(r3) 350; CHECK-NEXT: stxv vs3, 64(r3) 351; CHECK-NEXT: blr 352; 353; CHECK-BE-LABEL: testcse3: 354; CHECK-BE: # %bb.0: # %entry 355; CHECK-BE-NEXT: xxsetaccz acc0 356; CHECK-BE-NEXT: xxsetaccz acc1 357; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 358; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 359; CHECK-BE-NEXT: xxmfacc acc1 360; CHECK-BE-NEXT: xxmfacc acc0 361; CHECK-BE-NEXT: stxv vs5, 16(r3) 362; CHECK-BE-NEXT: stxv vs4, 0(r3) 363; CHECK-BE-NEXT: stxv vs7, 48(r3) 364; CHECK-BE-NEXT: stxv vs6, 32(r3) 365; CHECK-BE-NEXT: stxv vs1, 80(r3) 366; CHECK-BE-NEXT: stxv vs0, 64(r3) 367; CHECK-BE-NEXT: stxv vs3, 112(r3) 368; CHECK-BE-NEXT: stxv vs2, 96(r3) 369; CHECK-BE-NEXT: blr 370entry: 371 %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() 372 %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 373 %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 374 %3 = getelementptr inbounds <512 x i1>, ptr %res, i64 1 375 store <512 x i1> %1, ptr %res, align 64 376 store <512 x i1> %2, ptr %3, align 64 377 ret void 378} 379 380define void @testcse4(ptr %res, i32 %lim, ptr %vc) { 381; CHECK-LABEL: testcse4: 382; CHECK: # %bb.0: # %entry 383; CHECK-NEXT: cmpwi r4, 1 384; CHECK-NEXT: bltlr cr0 385; CHECK-NEXT: # %bb.1: # %for.body.preheader 386; CHECK-NEXT: clrldi r4, r4, 32 387; CHECK-NEXT: li r6, 0 388; CHECK-NEXT: mtctr r4 389; CHECK-NEXT: li r4, 0 390; CHECK-NEXT: .p2align 4 391; CHECK-NEXT: .LBB9_2: # %for.body 392; CHECK-NEXT: # 393; CHECK-NEXT: rldic r7, r6, 4, 28 394; CHECK-NEXT: xxsetaccz acc2 395; CHECK-NEXT: xxsetaccz acc1 396; CHECK-NEXT: addi r6, r6, 6 397; CHECK-NEXT: lxvx vs0, r5, r7 398; CHECK-NEXT: add r7, r5, r7 399; CHECK-NEXT: lxv vs1, 16(r7) 400; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 401; CHECK-NEXT: lxv vs0, 32(r7) 402; CHECK-NEXT: lxv vs1, 48(r7) 403; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 404; CHECK-NEXT: lxv vs12, 64(r7) 405; CHECK-NEXT: lxv vs13, 80(r7) 406; CHECK-NEXT: xxsetaccz acc0 407; CHECK-NEXT: rldic r7, r4, 6, 26 408; CHECK-NEXT: addi r4, r4, 3 409; CHECK-NEXT: add r8, r3, r7 410; CHECK-NEXT: xxmfacc acc2 411; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 412; CHECK-NEXT: stxvx vs11, r3, r7 413; CHECK-NEXT: stxv vs8, 48(r8) 414; CHECK-NEXT: xxmfacc acc1 415; CHECK-NEXT: stxv vs9, 32(r8) 416; CHECK-NEXT: stxv vs10, 16(r8) 417; CHECK-NEXT: stxv vs4, 112(r8) 418; CHECK-NEXT: stxv vs5, 96(r8) 419; CHECK-NEXT: xxmfacc acc0 420; CHECK-NEXT: stxv vs6, 80(r8) 421; CHECK-NEXT: stxv vs7, 64(r8) 422; CHECK-NEXT: stxv vs0, 176(r8) 423; CHECK-NEXT: stxv vs1, 160(r8) 424; CHECK-NEXT: stxv vs2, 144(r8) 425; CHECK-NEXT: stxv vs3, 128(r8) 426; CHECK-NEXT: bdnz .LBB9_2 427; CHECK-NEXT: # %bb.3: # %for.cond.cleanup 428; CHECK-NEXT: blr 429; 430; CHECK-BE-LABEL: testcse4: 431; CHECK-BE: # %bb.0: # %entry 432; CHECK-BE-NEXT: cmpwi r4, 1 433; CHECK-BE-NEXT: bltlr cr0 434; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader 435; CHECK-BE-NEXT: clrldi r4, r4, 32 436; CHECK-BE-NEXT: li r6, 0 437; CHECK-BE-NEXT: mtctr r4 438; CHECK-BE-NEXT: li r4, 0 439; CHECK-BE-NEXT: .p2align 4 440; CHECK-BE-NEXT: .LBB9_2: # %for.body 441; CHECK-BE-NEXT: # 442; CHECK-BE-NEXT: rldic r7, r6, 4, 28 443; CHECK-BE-NEXT: xxsetaccz acc2 444; CHECK-BE-NEXT: xxsetaccz acc1 445; CHECK-BE-NEXT: addi r6, r6, 6 446; CHECK-BE-NEXT: lxvx vs0, r5, r7 447; CHECK-BE-NEXT: add r7, r5, r7 448; CHECK-BE-NEXT: lxv vs1, 16(r7) 449; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 450; CHECK-BE-NEXT: lxv vs0, 32(r7) 451; CHECK-BE-NEXT: lxv vs1, 48(r7) 452; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 453; CHECK-BE-NEXT: lxv vs12, 64(r7) 454; CHECK-BE-NEXT: lxv vs13, 80(r7) 455; CHECK-BE-NEXT: xxsetaccz acc0 456; CHECK-BE-NEXT: rldic r7, r4, 6, 26 457; CHECK-BE-NEXT: addi r4, r4, 3 458; CHECK-BE-NEXT: add r8, r3, r7 459; CHECK-BE-NEXT: xxmfacc acc2 460; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 461; CHECK-BE-NEXT: stxvx vs8, r3, r7 462; CHECK-BE-NEXT: stxv vs9, 16(r8) 463; CHECK-BE-NEXT: xxmfacc acc1 464; CHECK-BE-NEXT: stxv vs11, 48(r8) 465; CHECK-BE-NEXT: stxv vs10, 32(r8) 466; CHECK-BE-NEXT: stxv vs5, 80(r8) 467; CHECK-BE-NEXT: stxv vs4, 64(r8) 468; CHECK-BE-NEXT: xxmfacc acc0 469; CHECK-BE-NEXT: stxv vs7, 112(r8) 470; CHECK-BE-NEXT: stxv vs6, 96(r8) 471; CHECK-BE-NEXT: stxv vs1, 144(r8) 472; CHECK-BE-NEXT: stxv vs0, 128(r8) 473; CHECK-BE-NEXT: stxv vs3, 176(r8) 474; CHECK-BE-NEXT: stxv vs2, 160(r8) 475; CHECK-BE-NEXT: bdnz .LBB9_2 476; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup 477; CHECK-BE-NEXT: blr 478entry: 479 %cmp55 = icmp sgt i32 %lim, 0 480 br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup 481 482for.body.preheader: ; preds = %entry 483 %wide.trip.count = zext i32 %lim to i64 484 br label %for.body 485 486for.cond.cleanup: ; preds = %for.body, %entry 487 ret void 488 489for.body: ; preds = %for.body, %for.body.preheader 490 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 491 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 492 %1 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 493 %2 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 494 %3 = trunc i64 %indvars.iv to i32 495 %mul = mul nsw i32 %3, 6 496 %idxprom = zext i32 %mul to i64 497 %arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom 498 %4 = load <16 x i8>, ptr %arrayidx, align 16 499 %add2 = or disjoint i32 %mul, 1 500 %idxprom3 = zext i32 %add2 to i64 501 %arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3 502 %5 = load <16 x i8>, ptr %arrayidx4, align 16 503 %6 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %4, <16 x i8> %5) 504 %add6 = add nuw nsw i32 %mul, 2 505 %idxprom7 = zext i32 %add6 to i64 506 %arrayidx8 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom7 507 %7 = load <16 x i8>, ptr %arrayidx8, align 16 508 %add10 = add nuw nsw i32 %mul, 3 509 %idxprom11 = zext i32 %add10 to i64 510 %arrayidx12 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom11 511 %8 = load <16 x i8>, ptr %arrayidx12, align 16 512 %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %7, <16 x i8> %8) 513 %add14 = add nuw nsw i32 %mul, 4 514 %idxprom15 = zext i32 %add14 to i64 515 %arrayidx16 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom15 516 %10 = load <16 x i8>, ptr %arrayidx16, align 16 517 %add18 = add nuw nsw i32 %mul, 5 518 %idxprom19 = zext i32 %add18 to i64 519 %arrayidx20 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom19 520 %11 = load <16 x i8>, ptr %arrayidx20, align 16 521 %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %2, <16 x i8> %10, <16 x i8> %11) 522 %mul21 = mul i64 %indvars.iv, 3 523 %idx.ext = and i64 %mul21, 4294967295 524 %add.ptr = getelementptr inbounds <512 x i1>, ptr %res, i64 %idx.ext 525 store <512 x i1> %6, ptr %add.ptr, align 64 526 %add.ptr26 = getelementptr inbounds <512 x i1>, ptr %add.ptr, i64 1 527 store <512 x i1> %9, ptr %add.ptr26, align 64 528 %add.ptr30 = getelementptr inbounds <512 x i1>, ptr %add.ptr, i64 2 529 store <512 x i1> %12, ptr %add.ptr30, align 64 530 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 531 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 532 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 533} 534 535declare i32 @testRedundantPrimeUnprimeF() 536define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { 537; CHECK-LABEL: testRedundantPrimeUnprime: 538; CHECK: # %bb.0: # %entry 539; CHECK-NEXT: mflr r0 540; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 541; CHECK-NEXT: std r0, 16(r1) 542; CHECK-NEXT: stdu r1, -112(r1) 543; CHECK-NEXT: xxsetaccz acc0 544; CHECK-NEXT: xxsetaccz acc1 545; CHECK-NEXT: mr r30, r3 546; CHECK-NEXT: xxmfacc acc0 547; CHECK-NEXT: stxv vs0, 48(r3) 548; CHECK-NEXT: stxv vs1, 32(r3) 549; CHECK-NEXT: stxv vs2, 16(r3) 550; CHECK-NEXT: stxv vs3, 0(r3) 551; CHECK-NEXT: xvf32gerpp acc1, v2, v2 552; CHECK-NEXT: xxmfacc acc1 553; CHECK-NEXT: stxv vs4, 80(r1) 554; CHECK-NEXT: stxv vs5, 64(r1) 555; CHECK-NEXT: stxv vs6, 48(r1) 556; CHECK-NEXT: stxv vs7, 32(r1) 557; CHECK-NEXT: bl testRedundantPrimeUnprimeF@notoc 558; CHECK-NEXT: lxvp vsp0, 64(r1) 559; CHECK-NEXT: lxvp vsp2, 32(r1) 560; CHECK-NEXT: stxv vs0, 112(r30) 561; CHECK-NEXT: stxv vs1, 96(r30) 562; CHECK-NEXT: stxv vs2, 80(r30) 563; CHECK-NEXT: stxv vs3, 64(r30) 564; CHECK-NEXT: addi r1, r1, 112 565; CHECK-NEXT: ld r0, 16(r1) 566; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 567; CHECK-NEXT: mtlr r0 568; CHECK-NEXT: blr 569; 570; CHECK-BE-LABEL: testRedundantPrimeUnprime: 571; CHECK-BE: # %bb.0: # %entry 572; CHECK-BE-NEXT: mflr r0 573; CHECK-BE-NEXT: std r0, 16(r1) 574; CHECK-BE-NEXT: stdu r1, -192(r1) 575; CHECK-BE-NEXT: xxsetaccz acc0 576; CHECK-BE-NEXT: xxsetaccz acc1 577; CHECK-BE-NEXT: std r30, 176(r1) # 8-byte Folded Spill 578; CHECK-BE-NEXT: mr r30, r3 579; CHECK-BE-NEXT: xxmfacc acc0 580; CHECK-BE-NEXT: stxv vs1, 16(r3) 581; CHECK-BE-NEXT: stxv vs0, 0(r3) 582; CHECK-BE-NEXT: stxv vs3, 48(r3) 583; CHECK-BE-NEXT: stxv vs2, 32(r3) 584; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 585; CHECK-BE-NEXT: xxmfacc acc1 586; CHECK-BE-NEXT: stxv vs4, 112(r1) 587; CHECK-BE-NEXT: stxv vs5, 128(r1) 588; CHECK-BE-NEXT: stxv vs6, 144(r1) 589; CHECK-BE-NEXT: stxv vs7, 160(r1) 590; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF 591; CHECK-BE-NEXT: nop 592; CHECK-BE-NEXT: lxvp vsp0, 112(r1) 593; CHECK-BE-NEXT: lxvp vsp2, 144(r1) 594; CHECK-BE-NEXT: stxv vs3, 112(r30) 595; CHECK-BE-NEXT: stxv vs2, 96(r30) 596; CHECK-BE-NEXT: stxv vs1, 80(r30) 597; CHECK-BE-NEXT: stxv vs0, 64(r30) 598; CHECK-BE-NEXT: ld r30, 176(r1) # 8-byte Folded Reload 599; CHECK-BE-NEXT: addi r1, r1, 192 600; CHECK-BE-NEXT: ld r0, 16(r1) 601; CHECK-BE-NEXT: mtlr r0 602; CHECK-BE-NEXT: blr 603entry: 604 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() 605 store <512 x i1> %0, ptr %dst, align 64 606 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) 607 %call = tail call signext i32 @testRedundantPrimeUnprimeF() 608 %add.ptr1 = getelementptr inbounds <512 x i1>, ptr %dst, i64 1 609 store <512 x i1> %1, ptr %add.ptr1, align 64 610 ret void 611} 612 613declare <256 x i1> @llvm.ppc.vsx.lxvp(ptr) 614declare void @llvm.ppc.vsx.stxvp(<256 x i1>, ptr) 615 616; Function Attrs: nofree nounwind 617define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { 618; CHECK-LABEL: test_ldst_1: 619; CHECK: # %bb.0: # %entry 620; CHECK-NEXT: lxv vs1, 32(r3) 621; CHECK-NEXT: lxv vs0, 48(r3) 622; CHECK-NEXT: lxv vs3, 0(r3) 623; CHECK-NEXT: lxv vs2, 16(r3) 624; CHECK-NEXT: plxvp vsp36, 8(r4), 0 625; CHECK-NEXT: xxmtacc acc0 626; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 627; CHECK-NEXT: xxmfacc acc0 628; CHECK-NEXT: stxv vs0, 48(r7) 629; CHECK-NEXT: stxv vs1, 32(r7) 630; CHECK-NEXT: stxv vs2, 16(r7) 631; CHECK-NEXT: stxv vs3, 0(r7) 632; CHECK-NEXT: blr 633; 634; CHECK-BE-LABEL: test_ldst_1: 635; CHECK-BE: # %bb.0: # %entry 636; CHECK-BE-NEXT: lxv vs1, 16(r3) 637; CHECK-BE-NEXT: lxv vs0, 0(r3) 638; CHECK-BE-NEXT: lxv vs3, 48(r3) 639; CHECK-BE-NEXT: lxv vs2, 32(r3) 640; CHECK-BE-NEXT: plxvp vsp36, 8(r4), 0 641; CHECK-BE-NEXT: xxmtacc acc0 642; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 643; CHECK-BE-NEXT: xxmfacc acc0 644; CHECK-BE-NEXT: stxv vs1, 16(r7) 645; CHECK-BE-NEXT: stxv vs0, 0(r7) 646; CHECK-BE-NEXT: stxv vs3, 48(r7) 647; CHECK-BE-NEXT: stxv vs2, 32(r7) 648; CHECK-BE-NEXT: blr 649entry: 650 %0 = load <512 x i1>, ptr %vqp, align 64 651 %1 = getelementptr i8, ptr %vpp, i64 8 652 %2 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %1) 653 %3 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %0, <256 x i1> %2, <16 x i8> %vc, i32 0, i32 0) 654 store <512 x i1> %3, ptr %resp, align 64 655 ret void 656} 657 658; Function Attrs: nofree nounwind 659define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { 660; CHECK-LABEL: test_ldst_2: 661; CHECK: # %bb.0: # %entry 662; CHECK-NEXT: lxv vs1, 32(r3) 663; CHECK-NEXT: lxv vs0, 48(r3) 664; CHECK-NEXT: lxv vs3, 0(r3) 665; CHECK-NEXT: lxv vs2, 16(r3) 666; CHECK-NEXT: xxmtacc acc0 667; CHECK-NEXT: lxvp vsp36, 0(r4) 668; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 669; CHECK-NEXT: xxmfacc acc0 670; CHECK-NEXT: stxv vs0, 48(r7) 671; CHECK-NEXT: stxv vs1, 32(r7) 672; CHECK-NEXT: stxv vs2, 16(r7) 673; CHECK-NEXT: stxv vs3, 0(r7) 674; CHECK-NEXT: blr 675; 676; CHECK-BE-LABEL: test_ldst_2: 677; CHECK-BE: # %bb.0: # %entry 678; CHECK-BE-NEXT: lxv vs1, 16(r3) 679; CHECK-BE-NEXT: lxv vs0, 0(r3) 680; CHECK-BE-NEXT: lxv vs3, 48(r3) 681; CHECK-BE-NEXT: lxv vs2, 32(r3) 682; CHECK-BE-NEXT: xxmtacc acc0 683; CHECK-BE-NEXT: lxvp vsp36, 0(r4) 684; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 685; CHECK-BE-NEXT: xxmfacc acc0 686; CHECK-BE-NEXT: stxv vs1, 16(r7) 687; CHECK-BE-NEXT: stxv vs0, 0(r7) 688; CHECK-BE-NEXT: stxv vs3, 48(r7) 689; CHECK-BE-NEXT: stxv vs2, 32(r7) 690; CHECK-BE-NEXT: blr 691entry: 692 %0 = load <512 x i1>, ptr %vqp, align 64 693 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) 694 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) 695 store <512 x i1> %2, ptr %resp, align 64 696 ret void 697} 698 699; Function Attrs: nofree nounwind 700define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { 701; CHECK-LABEL: test_ldst_3: 702; CHECK: # %bb.0: # %entry 703; CHECK-NEXT: lxv vs1, 32(r3) 704; CHECK-NEXT: lxv vs0, 48(r3) 705; CHECK-NEXT: lxv vs3, 0(r3) 706; CHECK-NEXT: lxv vs2, 16(r3) 707; CHECK-NEXT: xxmtacc acc0 708; CHECK-NEXT: lxvp vsp36, 0(r5) 709; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 710; CHECK-NEXT: xxmfacc acc0 711; CHECK-NEXT: stxv vs0, 48(r9) 712; CHECK-NEXT: stxv vs1, 32(r9) 713; CHECK-NEXT: stxv vs2, 16(r9) 714; CHECK-NEXT: stxv vs3, 0(r9) 715; CHECK-NEXT: blr 716; 717; CHECK-BE-LABEL: test_ldst_3: 718; CHECK-BE: # %bb.0: # %entry 719; CHECK-BE-NEXT: lxv vs1, 16(r3) 720; CHECK-BE-NEXT: lxv vs0, 0(r3) 721; CHECK-BE-NEXT: lxv vs3, 48(r3) 722; CHECK-BE-NEXT: lxv vs2, 32(r3) 723; CHECK-BE-NEXT: xxmtacc acc0 724; CHECK-BE-NEXT: lxvp vsp36, 0(r5) 725; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 726; CHECK-BE-NEXT: xxmfacc acc0 727; CHECK-BE-NEXT: stxv vs1, 16(r9) 728; CHECK-BE-NEXT: stxv vs0, 0(r9) 729; CHECK-BE-NEXT: stxv vs3, 48(r9) 730; CHECK-BE-NEXT: stxv vs2, 32(r9) 731; CHECK-BE-NEXT: blr 732entry: 733 %0 = load <512 x i1>, ptr %vqp, align 64 734 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) 735 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %0, <256 x i1> %1, <16 x i8> %vc) 736 store <512 x i1> %2, ptr %resp, align 64 737 ret void 738} 739 740declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32) 741declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) 742