xref: /llvm-project/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll (revision 5403c59c608c08c8ecd4303763f08eb046eb5e4d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
4; RUN:   < %s | FileCheck %s
5
6; On future CPU with PC Relative addressing enabled, it is possible for the
7; linker to optimize GOT indirect accesses. In order for the linker to do this
8; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
9; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
10; correctly.
11
12@input8 = external local_unnamed_addr global i8, align 1
13@output8 = external local_unnamed_addr global i8, align 1
14@input16 = external local_unnamed_addr global i16, align 2
15@output16 = external local_unnamed_addr global i16, align 2
16@input32 = external global i32, align 4
17@output32 = external local_unnamed_addr global i32, align 4
18@input64 = external local_unnamed_addr global i64, align 8
19@output64 = external local_unnamed_addr global i64, align 8
20@input128 = external local_unnamed_addr global i128, align 16
21@output128 = external local_unnamed_addr global i128, align 16
22@inputf32 = external local_unnamed_addr global float, align 4
23@outputf32 = external local_unnamed_addr global float, align 4
24@inputf64 = external local_unnamed_addr global double, align 8
25@outputf64 = external local_unnamed_addr global double, align 8
26@inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
27@outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
28@inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
29@outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
30@ArrayIn = external global [10 x i32], align 4
31@ArrayOut = external local_unnamed_addr global [10 x i32], align 4
32@IntPtrIn = external local_unnamed_addr global ptr, align 8
33@IntPtrOut = external local_unnamed_addr global ptr, align 8
34@FuncPtrIn = external local_unnamed_addr global ptr, align 8
35@FuncPtrOut = external local_unnamed_addr global ptr, align 8
36
37define dso_local void @ReadWrite8() local_unnamed_addr #0 {
38; In this test the stb r3, 0(r4) cannot be optimized because it
39; uses the register r3 and that register is defined by lbz r3, 0(r3)
40; which is defined between the pld and the stb.
41; CHECK-LABEL: ReadWrite8:
42; CHECK:       # %bb.0: # %entry
43; CHECK-NEXT:    pld r3, input8@got@pcrel(0), 1
44; CHECK-NEXT:  .Lpcrel0:
45; CHECK-NEXT:    pld r4, output8@got@pcrel(0), 1
46; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
47; CHECK-NEXT:    lbz r3, 0(r3)
48; CHECK-NEXT:    stb r3, 0(r4)
49; CHECK-NEXT:    blr
50entry:
51  %0 = load i8, ptr @input8, align 1
52  store i8 %0, ptr @output8, align 1
53  ret void
54}
55
56define dso_local void @ReadWrite16() local_unnamed_addr #0 {
57; In this test the sth r3, 0(r4) cannot be optimized because it
58; uses the register r3 and that register is defined by lhz r3, 0(r3)
59; which is defined between the pld and the sth.
60; CHECK-LABEL: ReadWrite16:
61; CHECK:       # %bb.0: # %entry
62; CHECK-NEXT:    pld r3, input16@got@pcrel(0), 1
63; CHECK-NEXT:  .Lpcrel1:
64; CHECK-NEXT:    pld r4, output16@got@pcrel(0), 1
65; CHECK-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
66; CHECK-NEXT:    lhz r3, 0(r3)
67; CHECK-NEXT:    sth r3, 0(r4)
68; CHECK-NEXT:    blr
69entry:
70  %0 = load i16, ptr @input16, align 2
71  store i16 %0, ptr @output16, align 2
72  ret void
73}
74
75define dso_local void @ReadWrite32() local_unnamed_addr #0 {
76; CHECK-LABEL: ReadWrite32:
77; CHECK:       # %bb.0: # %entry
78; CHECK-NEXT:    pld r3, input32@got@pcrel(0), 1
79; CHECK-NEXT:  .Lpcrel2:
80; CHECK-NEXT:    pld r4, output32@got@pcrel(0), 1
81; CHECK-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
82; CHECK-NEXT:    lwz r3, 0(r3)
83; CHECK-NEXT:    stw r3, 0(r4)
84; CHECK-NEXT:    blr
85entry:
86  %0 = load i32, ptr @input32, align 4
87  store i32 %0, ptr @output32, align 4
88  ret void
89}
90
91define dso_local void @ReadWrite64() local_unnamed_addr #0 {
92; CHECK-LABEL: ReadWrite64:
93; CHECK:       # %bb.0: # %entry
94; CHECK-NEXT:    pld r3, input64@got@pcrel(0), 1
95; CHECK-NEXT:  .Lpcrel3:
96; CHECK-NEXT:    pld r4, output64@got@pcrel(0), 1
97; CHECK-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
98; CHECK-NEXT:    ld r3, 0(r3)
99; CHECK-NEXT:    std r3, 0(r4)
100; CHECK-NEXT:    blr
101entry:
102  %0 = load i64, ptr @input64, align 8
103  store i64 %0, ptr @output64, align 8
104  ret void
105}
106
107; FIXME: we should always convert X-Form instructions that use
108; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
109define dso_local void @ReadWrite128() local_unnamed_addr #0 {
110; CHECK-LABEL: ReadWrite128:
111; CHECK:       # %bb.0: # %entry
112; CHECK-NEXT:    pld r3, input128@got@pcrel(0), 1
113; CHECK-NEXT:  .Lpcrel4:
114; CHECK-NEXT:    .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
115; CHECK-NEXT:    lxv vs0, 0(r3)
116; CHECK-NEXT:    pld r3, output128@got@pcrel(0), 1
117; CHECK-NEXT:  .Lpcrel5:
118; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
119; CHECK-NEXT:    stxv vs0, 0(r3)
120; CHECK-NEXT:    blr
121entry:
122  %0 = load i128, ptr @input128, align 16
123  store i128 %0, ptr @output128, align 16
124  ret void
125}
126
127define dso_local void @ReadWritef32() local_unnamed_addr #0 {
128; CHECK-LABEL: ReadWritef32:
129; CHECK:       # %bb.0: # %entry
130; CHECK-NEXT:    pld r3, inputf32@got@pcrel(0), 1
131; CHECK-NEXT:  .Lpcrel6:
132; CHECK-NEXT:    xxspltidp vs1, 1078103900
133; CHECK-NEXT:    .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
134; CHECK-NEXT:    lfs f0, 0(r3)
135; CHECK-NEXT:    pld r3, outputf32@got@pcrel(0), 1
136; CHECK-NEXT:    xsaddsp f0, f0, f1
137; CHECK-NEXT:    stfs f0, 0(r3)
138; CHECK-NEXT:    blr
139entry:
140  %0 = load float, ptr @inputf32, align 4
141  %add = fadd float %0, 0x400851EB80000000
142  store float %add, ptr @outputf32, align 4
143  ret void
144}
145
146define dso_local void @ReadWritef64() local_unnamed_addr #0 {
147; CHECK-LABEL: ReadWritef64:
148; CHECK:       # %bb.0: # %entry
149; CHECK-NEXT:    pld r3, inputf64@got@pcrel(0), 1
150; CHECK-NEXT:  .Lpcrel7:
151; CHECK-NEXT:    xxsplti32dx vs1, 0, 1075524403
152; CHECK-NEXT:    xxsplti32dx vs1, 1, 858993459
153; CHECK-NEXT:    .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
154; CHECK-NEXT:    lfd f0, 0(r3)
155; CHECK-NEXT:    pld r3, outputf64@got@pcrel(0), 1
156; CHECK-NEXT:    xsadddp f0, f0, f1
157; CHECK-NEXT:    stfd f0, 0(r3)
158; CHECK-NEXT:    blr
159entry:
160  %0 = load double, ptr @inputf64, align 8
161  %add = fadd double %0, 6.800000e+00
162  store double %add, ptr @outputf64, align 8
163  ret void
164}
165
166; FIXME: we should always convert X-Form instructions that use
167; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
168define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
169; CHECK-LABEL: ReadWriteVi32:
170; CHECK:       # %bb.0: # %entry
171; CHECK-NEXT:    pld r3, inputVi32@got@pcrel(0), 1
172; CHECK-NEXT:  .Lpcrel8:
173; CHECK-NEXT:    li r4, 45
174; CHECK-NEXT:    .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
175; CHECK-NEXT:    lxv v2, 0(r3)
176; CHECK-NEXT:    pld r3, outputVi32@got@pcrel(0), 1
177; CHECK-NEXT:    vinsw v2, r4, 8
178; CHECK-NEXT:    stxv v2, 0(r3)
179; CHECK-NEXT:    blr
180entry:
181  %0 = load <4 x i32>, ptr @inputVi32, align 16
182  %vecins = insertelement <4 x i32> %0, i32 45, i32 1
183  store <4 x i32> %vecins, ptr @outputVi32, align 16
184  ret void
185}
186
187define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
188; CHECK-LABEL: ReadWriteVi64:
189; CHECK:       # %bb.0: # %entry
190; CHECK-NEXT:    pld r3, inputVi64@got@pcrel(0), 1
191; CHECK-NEXT:  .Lpcrel9:
192; CHECK-NEXT:    .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
193; CHECK-NEXT:    lxv vs0, 0(r3)
194; CHECK-NEXT:    pld r3, outputVi64@got@pcrel(0), 1
195; CHECK-NEXT:  .Lpcrel10:
196; CHECK-NEXT:    .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
197; CHECK-NEXT:    stxv vs0, 0(r3)
198; CHECK-NEXT:    blr
199entry:
200  %0 = load <2 x i64>, ptr @inputVi64, align 16
201  store <2 x i64> %0, ptr @outputVi64, align 16
202  ret void
203}
204
205define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
206; CHECK-LABEL: ReadWriteArray:
207; CHECK:       # %bb.0: # %entry
208; CHECK-NEXT:    pld r3, ArrayIn@got@pcrel(0), 1
209; CHECK-NEXT:  .Lpcrel11:
210; CHECK-NEXT:    pld r4, ArrayOut@got@pcrel(0), 1
211; CHECK-NEXT:    .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
212; CHECK-NEXT:    lwz r3, 28(r3)
213; CHECK-NEXT:    addi r3, r3, 42
214; CHECK-NEXT:    stw r3, 8(r4)
215; CHECK-NEXT:    blr
216entry:
217  %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 7), align 4
218  %add = add nsw i32 %0, 42
219  store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayOut, i64 0, i64 2), align 4
220  ret void
221}
222
223define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
224; CHECK-LABEL: ReadWriteSameArray:
225; CHECK:       # %bb.0: # %entry
226; CHECK-NEXT:    pld r3, ArrayIn@got@pcrel(0), 1
227; CHECK-NEXT:    lwz r4, 12(r3)
228; CHECK-NEXT:    addi r4, r4, 8
229; CHECK-NEXT:    stw r4, 24(r3)
230; CHECK-NEXT:    blr
231entry:
232  %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 3), align 4
233  %add = add nsw i32 %0, 8
234  store i32 %add, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 6), align 4
235  ret void
236}
237
238define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
239; CHECK-LABEL: ReadWriteIntPtr:
240; CHECK:       # %bb.0: # %entry
241; CHECK-NEXT:    pld r3, IntPtrIn@got@pcrel(0), 1
242; CHECK-NEXT:  .Lpcrel12:
243; CHECK-NEXT:    pld r4, IntPtrOut@got@pcrel(0), 1
244; CHECK-NEXT:  .Lpcrel13:
245; CHECK-NEXT:    .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
246; CHECK-NEXT:    ld r3, 0(r3)
247; CHECK-NEXT:    .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
248; CHECK-NEXT:    ld r4, 0(r4)
249; CHECK-NEXT:    lwz r5, 216(r3)
250; CHECK-NEXT:    lwz r3, 48(r3)
251; CHECK-NEXT:    add r3, r3, r5
252; CHECK-NEXT:    stw r3, 136(r4)
253; CHECK-NEXT:    blr
254entry:
255  %0 = load ptr, ptr @IntPtrIn, align 8
256  %arrayidx = getelementptr inbounds i32, ptr %0, i64 54
257  %1 = load i32, ptr %arrayidx, align 4
258  %arrayidx1 = getelementptr inbounds i32, ptr %0, i64 12
259  %2 = load i32, ptr %arrayidx1, align 4
260  %add = add nsw i32 %2, %1
261  %3 = load ptr, ptr @IntPtrOut, align 8
262  %arrayidx2 = getelementptr inbounds i32, ptr %3, i64 34
263  store i32 %add, ptr %arrayidx2, align 4
264  ret void
265}
266
267define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
268; CHECK-LABEL: ReadWriteFuncPtr:
269; CHECK:       # %bb.0: # %entry
270; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
271; CHECK-NEXT:  .Lpcrel14:
272; CHECK-NEXT:    pld r4, FuncPtrOut@got@pcrel(0), 1
273; CHECK-NEXT:    .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
274; CHECK-NEXT:    ld r3, 0(r3)
275; CHECK-NEXT:    std r3, 0(r4)
276; CHECK-NEXT:    blr
277entry:
278  %0 = load i64, ptr @FuncPtrIn, align 8
279  store i64 %0, ptr @FuncPtrOut, align 8
280  ret void
281}
282
283define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
284; CHECK-LABEL: FuncPtrCopy:
285; CHECK:       # %bb.0: # %entry
286; CHECK-NEXT:    pld r3, FuncPtrOut@got@pcrel(0), 1
287; CHECK-NEXT:    pld r4, Callee@got@pcrel(0), 1
288; CHECK-NEXT:    std r4, 0(r3)
289; CHECK-NEXT:    blr
290entry:
291  store ptr @Callee, ptr @FuncPtrOut, align 8
292  ret void
293}
294
295declare void @Callee(...)
296
297define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
298; CHECK-LABEL: FuncPtrCall:
299; CHECK:       # %bb.0: # %entry
300; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
301; CHECK-NEXT:  .Lpcrel15:
302; CHECK-NEXT:    .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
303; CHECK-NEXT:    ld r12, 0(r3)
304; CHECK-NEXT:    mtctr r12
305; CHECK-NEXT:    bctr
306; CHECK-NEXT:    #TC_RETURNr8 ctr 0
307entry:
308  %0 = load ptr, ptr @FuncPtrIn, align 8
309  tail call void %0()
310  ret void
311}
312
313define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
314; CHECK-LABEL: ReadVecElement:
315; CHECK:       # %bb.0: # %entry
316; CHECK-NEXT:    pld r3, inputVi32@got@pcrel(0), 1
317; CHECK-NEXT:  .Lpcrel16:
318; CHECK-NEXT:    .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
319; CHECK-NEXT:    lwa r3, 4(r3)
320; CHECK-NEXT:    blr
321entry:
322  %0 = load <4 x i32>, ptr @inputVi32, align 16
323  %vecext = extractelement <4 x i32> %0, i32 1
324  ret i32 %vecext
325}
326
327define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
328; CHECK-LABEL: VecMultiUse:
329; CHECK:       # %bb.0: # %entry
330; CHECK-NEXT:    mflr r0
331; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
332; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
333; CHECK-NEXT:    std r0, 16(r1)
334; CHECK-NEXT:    stdu r1, -64(r1)
335; CHECK-NEXT:    pld r30, inputVi32@got@pcrel(0), 1
336; CHECK-NEXT:    lwz r29, 4(r30)
337; CHECK-NEXT:    bl Callee@notoc
338; CHECK-NEXT:    lwz r3, 8(r30)
339; CHECK-NEXT:    add r29, r3, r29
340; CHECK-NEXT:    bl Callee@notoc
341; CHECK-NEXT:    lwz r3, 0(r30)
342; CHECK-NEXT:    add r3, r29, r3
343; CHECK-NEXT:    extsw r3, r3
344; CHECK-NEXT:    addi r1, r1, 64
345; CHECK-NEXT:    ld r0, 16(r1)
346; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
347; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
348; CHECK-NEXT:    mtlr r0
349; CHECK-NEXT:    blr
350entry:
351  %0 = load <4 x i32>, ptr @inputVi32, align 16
352  tail call void @Callee()
353  %1 = load <4 x i32>, ptr @inputVi32, align 16
354  %2 = extractelement <4 x i32> %1, i32 2
355  %3 = extractelement <4 x i32> %0, i64 1
356  %4 = add nsw i32 %2, %3
357  tail call void @Callee()
358  %5 = load <4 x i32>, ptr @inputVi32, align 16
359  %vecext2 = extractelement <4 x i32> %5, i32 0
360  %add3 = add nsw i32 %4, %vecext2
361  ret i32 %add3
362}
363
364define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
365; CHECK-LABEL: UseAddr:
366; CHECK:       # %bb.0: # %entry
367; CHECK-NEXT:    mflr r0
368; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
369; CHECK-NEXT:    std r0, 16(r1)
370; CHECK-NEXT:    stdu r1, -48(r1)
371; CHECK-NEXT:    pld r4, ArrayIn@got@pcrel(0), 1
372; CHECK-NEXT:    lwz r5, 16(r4)
373; CHECK-NEXT:    add r30, r5, r3
374; CHECK-NEXT:    mr r3, r4
375; CHECK-NEXT:    bl getAddr@notoc
376; CHECK-NEXT:    add r3, r30, r3
377; CHECK-NEXT:    extsw r3, r3
378; CHECK-NEXT:    addi r1, r1, 48
379; CHECK-NEXT:    ld r0, 16(r1)
380; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
381; CHECK-NEXT:    mtlr r0
382; CHECK-NEXT:    blr
383entry:
384  %0 = load i32, ptr getelementptr inbounds ([10 x i32], ptr @ArrayIn, i64 0, i64 4), align 4
385  %add = add nsw i32 %0, %a
386  %call = tail call signext i32 @getAddr(ptr @ArrayIn)
387  %add1 = add nsw i32 %add, %call
388  ret i32 %add1
389}
390
391declare signext i32 @getAddr(ptr) local_unnamed_addr
392
393define dso_local nonnull ptr @AddrTaken32() local_unnamed_addr #0 {
394; CHECK-LABEL: AddrTaken32:
395; CHECK:       # %bb.0: # %entry
396; CHECK-NEXT:    pld r3, input32@got@pcrel(0), 1
397; CHECK-NEXT:    blr
398entry:
399  ret ptr @input32
400}
401
402attributes #0 = { nounwind }
403