1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 4; RUN: < %s | FileCheck %s 5 6; On future CPU with PC Relative addressing enabled, it is possible for the 7; linker to optimize GOT indirect accesses. In order for the linker to do this 8; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation. 9; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation 10; correctly. 11 12@input8 = external local_unnamed_addr global i8, align 1 13@output8 = external local_unnamed_addr global i8, align 1 14@input16 = external local_unnamed_addr global i16, align 2 15@output16 = external local_unnamed_addr global i16, align 2 16@input32 = external global i32, align 4 17@output32 = external local_unnamed_addr global i32, align 4 18@input64 = external local_unnamed_addr global i64, align 8 19@output64 = external local_unnamed_addr global i64, align 8 20@input128 = external local_unnamed_addr global i128, align 16 21@output128 = external local_unnamed_addr global i128, align 16 22@inputf32 = external local_unnamed_addr global float, align 4 23@outputf32 = external local_unnamed_addr global float, align 4 24@inputf64 = external local_unnamed_addr global double, align 8 25@outputf64 = external local_unnamed_addr global double, align 8 26@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16 27@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16 28@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16 29@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16 30@ArrayIn = external global [10 x i32], align 4 31@ArrayOut = external local_unnamed_addr global [10 x i32], align 4 32@IntPtrIn = external local_unnamed_addr global ptr, align 8 33@IntPtrOut = external local_unnamed_addr global ptr, align 8 34@FuncPtrIn = external local_unnamed_addr global ptr, align 8 35@FuncPtrOut = external local_unnamed_addr global ptr, align 8 36 37define dso_local void @ReadWrite8() local_unnamed_addr #0 { 38; In this test the stb r3, 0(r4) cannot be optimized because it 39; uses the register r3 and that register is defined by lbz r3, 0(r3) 40; which is defined between the pld and the stb. 41; CHECK-LABEL: ReadWrite8: 42; CHECK: # %bb.0: # %entry 43; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 44; CHECK-NEXT: .Lpcrel0: 45; CHECK-NEXT: pld r4, output8@got@pcrel(0), 1 46; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8) 47; CHECK-NEXT: lbz r3, 0(r3) 48; CHECK-NEXT: stb r3, 0(r4) 49; CHECK-NEXT: blr 50entry: 51 %0 = load i8, ptr @input8, align 1 52 store i8 %0, ptr @output8, align 1 53 ret void 54} 55 56define dso_local void @ReadWrite16() local_unnamed_addr #0 { 57; In this test the sth r3, 0(r4) cannot be optimized because it 58; uses the register r3 and that register is defined by lhz r3, 0(r3) 59; which is defined between the pld and the sth. 60; CHECK-LABEL: ReadWrite16: 61; CHECK: # %bb.0: # %entry 62; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 63; CHECK-NEXT: .Lpcrel1: 64; CHECK-NEXT: pld r4, output16@got@pcrel(0), 1 65; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8) 66; CHECK-NEXT: lhz r3, 0(r3) 67; CHECK-NEXT: sth r3, 0(r4) 68; CHECK-NEXT: blr 69entry: 70 %0 = load i16, ptr @input16, align 2 71 store i16 %0, ptr @output16, align 2 72 ret void 73} 74 75define dso_local void @ReadWrite32() local_unnamed_addr #0 { 76; CHECK-LABEL: ReadWrite32: 77; CHECK: # %bb.0: # %entry 78; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 79; CHECK-NEXT: .Lpcrel2: 80; CHECK-NEXT: pld r4, output32@got@pcrel(0), 1 81; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8) 82; CHECK-NEXT: lwz r3, 0(r3) 83; CHECK-NEXT: stw r3, 0(r4) 84; CHECK-NEXT: blr 85entry: 86 %0 = load i32, ptr @input32, align 4 87 store i32 %0, ptr @output32, align 4 88 ret void 89} 90 91define dso_local void @ReadWrite64() local_unnamed_addr #0 { 92; CHECK-LABEL: ReadWrite64: 93; CHECK: # %bb.0: # %entry 94; CHECK-NEXT: pld r3, input64@got@pcrel(0), 1 95; CHECK-NEXT: .Lpcrel3: 96; CHECK-NEXT: pld r4, output64@got@pcrel(0), 1 97; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8) 98; CHECK-NEXT: ld r3, 0(r3) 99; CHECK-NEXT: std r3, 0(r4) 100; CHECK-NEXT: blr 101entry: 102 %0 = load i64, ptr @input64, align 8 103 store i64 %0, ptr @output64, align 8 104 ret void 105} 106 107; FIXME: we should always convert X-Form instructions that use 108; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. 109define dso_local void @ReadWrite128() local_unnamed_addr #0 { 110; CHECK-LABEL: ReadWrite128: 111; CHECK: # %bb.0: # %entry 112; CHECK-NEXT: pld r3, input128@got@pcrel(0), 1 113; CHECK-NEXT: .Lpcrel4: 114; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8) 115; CHECK-NEXT: lxv vs0, 0(r3) 116; CHECK-NEXT: pld r3, output128@got@pcrel(0), 1 117; CHECK-NEXT: .Lpcrel5: 118; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) 119; CHECK-NEXT: stxv vs0, 0(r3) 120; CHECK-NEXT: blr 121entry: 122 %0 = load i128, ptr @input128, align 16 123 store i128 %0, ptr @output128, align 16 124 ret void 125} 126 127define dso_local void @ReadWritef32() local_unnamed_addr #0 { 128; CHECK-LABEL: ReadWritef32: 129; CHECK: # %bb.0: # %entry 130; CHECK-NEXT: pld r3, inputf32@got@pcrel(0), 1 131; CHECK-NEXT: .Lpcrel6: 132; CHECK-NEXT: xxspltidp vs1, 1078103900 133; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8) 134; CHECK-NEXT: lfs f0, 0(r3) 135; CHECK-NEXT: pld r3, outputf32@got@pcrel(0), 1 136; CHECK-NEXT: xsaddsp f0, f0, f1 137; CHECK-NEXT: stfs f0, 0(r3) 138; CHECK-NEXT: blr 139entry: 140 %0 = load float, ptr @inputf32, align 4 141 %add = fadd float %0, 0x400851EB80000000 142 store float %add, ptr @outputf32, align 4 143 ret void 144} 145 146define dso_local void @ReadWritef64() local_unnamed_addr #0 { 147; CHECK-LABEL: ReadWritef64: 148; CHECK: # %bb.0: # %entry 149; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 150; CHECK-NEXT: .Lpcrel7: 151; CHECK-NEXT: xxsplti32dx vs1, 0, 1075524403 152; CHECK-NEXT: xxsplti32dx vs1, 1, 858993459 153; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8) 154; CHECK-NEXT: lfd f0, 0(r3) 155; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 156; CHECK-NEXT: xsadddp f0, f0, f1 157; CHECK-NEXT: stfd f0, 0(r3) 158; CHECK-NEXT: blr 159entry: 160 %0 = load double, ptr @inputf64, align 8 161 %add = fadd double %0, 6.800000e+00 162 store double %add, ptr @outputf64, align 8 163 ret void 164} 165 166; FIXME: we should always convert X-Form instructions that use 167; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt. 168define dso_local void @ReadWriteVi32() local_unnamed_addr #0 { 169; CHECK-LABEL: ReadWriteVi32: 170; CHECK: # %bb.0: # %entry 171; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 172; CHECK-NEXT: .Lpcrel8: 173; CHECK-NEXT: li r4, 45 174; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8) 175; CHECK-NEXT: lxv v2, 0(r3) 176; CHECK-NEXT: pld r3, outputVi32@got@pcrel(0), 1 177; CHECK-NEXT: vinsw v2, r4, 8 178; CHECK-NEXT: stxv v2, 0(r3) 179; CHECK-NEXT: blr 180entry: 181 %0 = load <4 x i32>, ptr @inputVi32, align 16 182 %vecins = insertelement <4 x i32> %0, i32 45, i32 1 183 store <4 x i32> %vecins, ptr @outputVi32, align 16 184 ret void 185} 186 187define dso_local void @ReadWriteVi64() local_unnamed_addr #0 { 188; CHECK-LABEL: ReadWriteVi64: 189; CHECK: # %bb.0: # %entry 190; CHECK-NEXT: pld r3, inputVi64@got@pcrel(0), 1 191; CHECK-NEXT: .Lpcrel9: 192; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8) 193; CHECK-NEXT: lxv vs0, 0(r3) 194; CHECK-NEXT: pld r3, outputVi64@got@pcrel(0), 1 195; CHECK-NEXT: .Lpcrel10: 196; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8) 197; CHECK-NEXT: stxv vs0, 0(r3) 198; CHECK-NEXT: blr 199entry: 200 %0 = load <2 x i64>, ptr @inputVi64, align 16 201 store <2 x i64> %0, ptr @outputVi64, align 16 202 ret void 203} 204 205define dso_local void @ReadWriteArray() local_unnamed_addr #0 { 206; CHECK-LABEL: ReadWriteArray: 207; CHECK: # %bb.0: # %entry 208; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 209; CHECK-NEXT: .Lpcrel11: 210; CHECK-NEXT: pld r4, ArrayOut@got@pcrel(0), 1 211; CHECK-NEXT: .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8) 212; CHECK-NEXT: lwz r3, 28(r3) 213; CHECK-NEXT: addi r3, r3, 42 214; CHECK-NEXT: stw r3, 8(r4) 215; CHECK-NEXT: blr 216entry: 217 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 7), align 4 218 %add = add nsw i32 %0, 42 219 store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayOut, i64 0, i64 2), align 4 220 ret void 221} 222 223define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 { 224; CHECK-LABEL: ReadWriteSameArray: 225; CHECK: # %bb.0: # %entry 226; CHECK-NEXT: pld r3, ArrayIn@got@pcrel(0), 1 227; CHECK-NEXT: lwz r4, 12(r3) 228; CHECK-NEXT: addi r4, r4, 8 229; CHECK-NEXT: stw r4, 24(r3) 230; CHECK-NEXT: blr 231entry: 232 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 3), align 4 233 %add = add nsw i32 %0, 8 234 store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 6), align 4 235 ret void 236} 237 238define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 { 239; CHECK-LABEL: ReadWriteIntPtr: 240; CHECK: # %bb.0: # %entry 241; CHECK-NEXT: pld r3, IntPtrIn@got@pcrel(0), 1 242; CHECK-NEXT: .Lpcrel12: 243; CHECK-NEXT: pld r4, IntPtrOut@got@pcrel(0), 1 244; CHECK-NEXT: .Lpcrel13: 245; CHECK-NEXT: .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8) 246; CHECK-NEXT: ld r3, 0(r3) 247; CHECK-NEXT: .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8) 248; CHECK-NEXT: ld r4, 0(r4) 249; CHECK-NEXT: lwz r5, 216(r3) 250; CHECK-NEXT: lwz r3, 48(r3) 251; CHECK-NEXT: add r3, r3, r5 252; CHECK-NEXT: stw r3, 136(r4) 253; CHECK-NEXT: blr 254entry: 255 %0 = load ptr, ptr @IntPtrIn, align 8 256 %arrayidx = getelementptr inbounds i32, ptr %0, i64 54 257 %1 = load i32, ptr %arrayidx, align 4 258 %arrayidx1 = getelementptr inbounds i32, ptr %0, i64 12 259 %2 = load i32, ptr %arrayidx1, align 4 260 %add = add nsw i32 %2, %1 261 %3 = load ptr, ptr @IntPtrOut, align 8 262 %arrayidx2 = getelementptr inbounds i32, ptr %3, i64 34 263 store i32 %add, ptr %arrayidx2, align 4 264 ret void 265} 266 267define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 { 268; CHECK-LABEL: ReadWriteFuncPtr: 269; CHECK: # %bb.0: # %entry 270; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 271; CHECK-NEXT: .Lpcrel14: 272; CHECK-NEXT: pld r4, FuncPtrOut@got@pcrel(0), 1 273; CHECK-NEXT: .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8) 274; CHECK-NEXT: ld r3, 0(r3) 275; CHECK-NEXT: std r3, 0(r4) 276; CHECK-NEXT: blr 277entry: 278 %0 = load i64, ptr @FuncPtrIn, align 8 279 store i64 %0, ptr @FuncPtrOut, align 8 280 ret void 281} 282 283define dso_local void @FuncPtrCopy() local_unnamed_addr #0 { 284; CHECK-LABEL: FuncPtrCopy: 285; CHECK: # %bb.0: # %entry 286; CHECK-NEXT: pld r3, FuncPtrOut@got@pcrel(0), 1 287; CHECK-NEXT: pld r4, Callee@got@pcrel(0), 1 288; CHECK-NEXT: std r4, 0(r3) 289; CHECK-NEXT: blr 290entry: 291 store ptr @Callee, ptr @FuncPtrOut, align 8 292 ret void 293} 294 295declare void @Callee(...) 296 297define dso_local void @FuncPtrCall() local_unnamed_addr #0 { 298; CHECK-LABEL: FuncPtrCall: 299; CHECK: # %bb.0: # %entry 300; CHECK-NEXT: pld r3, FuncPtrIn@got@pcrel(0), 1 301; CHECK-NEXT: .Lpcrel15: 302; CHECK-NEXT: .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8) 303; CHECK-NEXT: ld r12, 0(r3) 304; CHECK-NEXT: mtctr r12 305; CHECK-NEXT: bctr 306; CHECK-NEXT: #TC_RETURNr8 ctr 0 307entry: 308 %0 = load ptr, ptr @FuncPtrIn, align 8 309 tail call void %0() 310 ret void 311} 312 313define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 { 314; CHECK-LABEL: ReadVecElement: 315; CHECK: # %bb.0: # %entry 316; CHECK-NEXT: pld r3, inputVi32@got@pcrel(0), 1 317; CHECK-NEXT: .Lpcrel16: 318; CHECK-NEXT: .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8) 319; CHECK-NEXT: lwa r3, 4(r3) 320; CHECK-NEXT: blr 321entry: 322 %0 = load <4 x i32>, ptr @inputVi32, align 16 323 %vecext = extractelement <4 x i32> %0, i32 1 324 ret i32 %vecext 325} 326 327define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 { 328; CHECK-LABEL: VecMultiUse: 329; CHECK: # %bb.0: # %entry 330; CHECK-NEXT: mflr r0 331; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill 332; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 333; CHECK-NEXT: std r0, 16(r1) 334; CHECK-NEXT: stdu r1, -64(r1) 335; CHECK-NEXT: pld r30, inputVi32@got@pcrel(0), 1 336; CHECK-NEXT: lwz r29, 4(r30) 337; CHECK-NEXT: bl Callee@notoc 338; CHECK-NEXT: lwz r3, 8(r30) 339; CHECK-NEXT: add r29, r3, r29 340; CHECK-NEXT: bl Callee@notoc 341; CHECK-NEXT: lwz r3, 0(r30) 342; CHECK-NEXT: add r3, r29, r3 343; CHECK-NEXT: extsw r3, r3 344; CHECK-NEXT: addi r1, r1, 64 345; CHECK-NEXT: ld r0, 16(r1) 346; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 347; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload 348; CHECK-NEXT: mtlr r0 349; CHECK-NEXT: blr 350entry: 351 %0 = load <4 x i32>, ptr @inputVi32, align 16 352 tail call void @Callee() 353 %1 = load <4 x i32>, ptr @inputVi32, align 16 354 %2 = extractelement <4 x i32> %1, i32 2 355 %3 = extractelement <4 x i32> %0, i64 1 356 %4 = add nsw i32 %2, %3 357 tail call void @Callee() 358 %5 = load <4 x i32>, ptr @inputVi32, align 16 359 %vecext2 = extractelement <4 x i32> %5, i32 0 360 %add3 = add nsw i32 %4, %vecext2 361 ret i32 %add3 362} 363 364define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 { 365; CHECK-LABEL: UseAddr: 366; CHECK: # %bb.0: # %entry 367; CHECK-NEXT: mflr r0 368; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 369; CHECK-NEXT: std r0, 16(r1) 370; CHECK-NEXT: stdu r1, -48(r1) 371; CHECK-NEXT: pld r4, ArrayIn@got@pcrel(0), 1 372; CHECK-NEXT: lwz r5, 16(r4) 373; CHECK-NEXT: add r30, r5, r3 374; CHECK-NEXT: mr r3, r4 375; CHECK-NEXT: bl getAddr@notoc 376; CHECK-NEXT: add r3, r30, r3 377; CHECK-NEXT: extsw r3, r3 378; CHECK-NEXT: addi r1, r1, 48 379; CHECK-NEXT: ld r0, 16(r1) 380; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 381; CHECK-NEXT: mtlr r0 382; CHECK-NEXT: blr 383entry: 384 %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 4), align 4 385 %add = add nsw i32 %0, %a 386 %call = tail call signext i32 @getAddr(ptr @ArrayIn) 387 %add1 = add nsw i32 %add, %call 388 ret i32 %add1 389} 390 391declare signext i32 @getAddr(ptr) local_unnamed_addr 392 393define dso_local nonnull ptr @AddrTaken32() local_unnamed_addr #0 { 394; CHECK-LABEL: AddrTaken32: 395; CHECK: # %bb.0: # %entry 396; CHECK-NEXT: pld r3, input32@got@pcrel(0), 1 397; CHECK-NEXT: blr 398entry: 399 ret ptr @input32 400} 401 402attributes #0 = { nounwind } 403