xref: /llvm-project/llvm/test/CodeGen/PowerPC/licm-tocReg.ll (revision a51712751c184ebe056718c938d2526693a31564)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefixes=CHECKLX %s
3; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECKAIX %s
4; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck -check-prefixes=CHECKAIX32 %s
5
6; The instructions ADDIStocHA8/LDtocL are used to calculate the address of
7; globals. The ones that are in bb.3.if.end could not be hoisted by Machine
8; LICM due to BCTRL_LDinto_toc in bb2.if.then.  This call causes the compiler
9; to insert a save TOC to stack before the call and load into X2 to restore TOC
10; after. By communicating to Machine LICM that X2 is guaranteed to have the
11; same value before and after BCTRL_LDinto_toc, these instructions can be
12; hoisted out of bb.3.if.end to outside of the loop.
13
14; Pre Machine LICM MIR
15;
16;body:
17;  bb.0.entry:
18;    successors: %bb.2.if.then(0x40000000), %bb.3.if.end(0x40000000)
19;    liveins: %x3
20;
21;    %4 = COPY %x3
22;    %5 = ADDIStocHA8 %x2, @ga
23;    %6 = LDtocL @ga, killed %5 :: (load (s64) from got)
24;    %7 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga)
25;    %8 = ADDIStocHA8 %x2, @gb
26;    %9 = LDtocL @gb, killed %8 :: (load (s64) from got)
27;    %10 = LWZ 0, killed %9 :: (volatile dereferenceable load (s32) from @gb)
28;    %0 = LWZ 0, %6 :: (volatile dereferenceable load (s32) from @ga)
29;    %11 = CMPW killed %7, killed %10
30;    BCC 44, killed %11, %bb.2.if.then
31;    B %bb.3.if.end
32;
33;  bb.2.if.then:
34;    %1 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
35;    ADJCALLSTACKDOWN 32, 0, implicit-def dead %r1, implicit %r1
36;    %20 = COPY %x2
37;    STD %20, 24, %x1 :: (store (s64) into stack + 24)
38;    %21 = EXTSW_32_64 %1
39;    %x3 = COPY %21
40;    %x12 = COPY %4
41;    MTCTR8 %4, implicit-def %ctr8
42;    BCTRL8_LDinto_toc 24, %x1, csr_ppc64_altivec, implicit-def dead %lr8, implicit-def dead %x2, implicit %ctr8, implicit %rm, implicit %x3, implicit %x12, implicit %x2, implicit-def %r1, implicit-def %x3
43;    ADJCALLSTACKUP 32, 0, implicit-def dead %r1, implicit %r1
44;    %22 = COPY %x3
45;    %x3 = COPY %22
46;    BLR8 implicit %lr8, implicit %rm, implicit %x3
47;
48;  bb.3.if.end:
49;    successors: %bb.2.if.then(0x04000000), %bb.3.if.end(0x7c000000)
50;
51;    %2 = PHI %0, %bb.0.entry, %3, %bb.3.if.end
52;    %12 = ADDI %2, 1
53;    %13 = ADDIStocHA8 %x2, @ga
54;    %14 = LDtocL @ga, killed %13 :: (load (s64) from got)
55;    STW killed %12, 0, %14 :: (volatile store (s32) into @ga)
56;    %15 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga)
57;    %16 = ADDIStocHA8 %x2, @gb
58;    %17 = LDtocL @gb, killed %16 :: (load (s64) from got)
59;    %18 = LWZ 0, killed %17 :: (volatile dereferenceable load (s32) from @gb)
60;    %3 = LWZ 0, %14 :: (volatile dereferenceable load (s32) from @ga)
61;    %19 = CMPW killed %15, killed %18
62;    BCC 44, killed %19, %bb.2.if.then
63;    B %bb.3.if.end
64
65@ga = external global i32, align 4
66@gb = external global i32, align 4
67define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
68; CHECKLX-LABEL: test:
69; CHECKLX:       # %bb.0: # %entry
70; CHECKLX-NEXT:    mr 12, 3
71; CHECKLX-NEXT:    addis 3, 2, .LC0@toc@ha
72; CHECKLX-NEXT:    addis 4, 2, .LC1@toc@ha
73; CHECKLX-NEXT:    ld 3, .LC0@toc@l(3)
74; CHECKLX-NEXT:    ld 5, .LC1@toc@l(4)
75; CHECKLX-NEXT:    lwz 6, 0(3)
76; CHECKLX-NEXT:    .p2align 5
77; CHECKLX-NEXT:  .LBB0_1: # %if.end
78; CHECKLX-NEXT:    #
79; CHECKLX-NEXT:    lwz 7, 0(5)
80; CHECKLX-NEXT:    lwz 4, 0(3)
81; CHECKLX-NEXT:    cmpw 6, 7
82; CHECKLX-NEXT:    bgt 0, .LBB0_3
83; CHECKLX-NEXT:  # %bb.2: # %if.end
84; CHECKLX-NEXT:    #
85; CHECKLX-NEXT:    addi 4, 4, 1
86; CHECKLX-NEXT:    stw 4, 0(3)
87; CHECKLX-NEXT:    lwz 6, 0(3)
88; CHECKLX-NEXT:    b .LBB0_1
89; CHECKLX-NEXT:  .LBB0_3: # %if.then
90; CHECKLX-NEXT:    mflr 0
91; CHECKLX-NEXT:    stdu 1, -32(1)
92; CHECKLX-NEXT:    std 2, 24(1)
93; CHECKLX-NEXT:    std 0, 48(1)
94; CHECKLX-NEXT:    .cfi_def_cfa_offset 32
95; CHECKLX-NEXT:    .cfi_offset lr, 16
96; CHECKLX-NEXT:    mtctr 12
97; CHECKLX-NEXT:    extsw 3, 4
98; CHECKLX-NEXT:    bctrl
99; CHECKLX-NEXT:    ld 2, 24(1)
100; CHECKLX-NEXT:    addi 1, 1, 32
101; CHECKLX-NEXT:    ld 0, 16(1)
102; CHECKLX-NEXT:    mtlr 0
103; CHECKLX-NEXT:    blr
104;
105; CHECKAIX-LABEL: test:
106; CHECKAIX:       # %bb.0: # %entry
107; CHECKAIX-NEXT:    ld 5, L..C0(2) # @ga
108; CHECKAIX-NEXT:    ld 6, L..C1(2) # @gb
109; CHECKAIX-NEXT:  L..BB0_1: # %if.end
110; CHECKAIX-NEXT:    #
111; CHECKAIX-NEXT:    lwz 4, 0(5)
112; CHECKAIX-NEXT:    lwz 7, 0(6)
113; CHECKAIX-NEXT:    cmpw 4, 7
114; CHECKAIX-NEXT:    lwz 4, 0(5)
115; CHECKAIX-NEXT:    bgt 0, L..BB0_3
116; CHECKAIX-NEXT:  # %bb.2: # %if.end
117; CHECKAIX-NEXT:    #
118; CHECKAIX-NEXT:    addi 4, 4, 1
119; CHECKAIX-NEXT:    stw 4, 0(5)
120; CHECKAIX-NEXT:    b L..BB0_1
121; CHECKAIX-NEXT:  L..BB0_3: # %if.then
122; CHECKAIX-NEXT:    mflr 0
123; CHECKAIX-NEXT:    stdu 1, -112(1)
124; CHECKAIX-NEXT:    ld 5, 0(3)
125; CHECKAIX-NEXT:    std 0, 128(1)
126; CHECKAIX-NEXT:    ld 11, 16(3)
127; CHECKAIX-NEXT:    std 2, 40(1)
128; CHECKAIX-NEXT:    ld 2, 8(3)
129; CHECKAIX-NEXT:    extsw 3, 4
130; CHECKAIX-NEXT:    mtctr 5
131; CHECKAIX-NEXT:    bctrl
132; CHECKAIX-NEXT:    ld 2, 40(1)
133; CHECKAIX-NEXT:    addi 1, 1, 112
134; CHECKAIX-NEXT:    ld 0, 16(1)
135; CHECKAIX-NEXT:    mtlr 0
136; CHECKAIX-NEXT:    blr
137;
138; CHECKAIX32-LABEL: test:
139; CHECKAIX32:       # %bb.0: # %entry
140; CHECKAIX32-NEXT:    lwz 5, L..C0(2) # @ga
141; CHECKAIX32-NEXT:    lwz 6, L..C1(2) # @gb
142; CHECKAIX32-NEXT:  L..BB0_1: # %if.end
143; CHECKAIX32-NEXT:    #
144; CHECKAIX32-NEXT:    lwz 4, 0(5)
145; CHECKAIX32-NEXT:    lwz 7, 0(6)
146; CHECKAIX32-NEXT:    cmpw 4, 7
147; CHECKAIX32-NEXT:    lwz 4, 0(5)
148; CHECKAIX32-NEXT:    bgt 0, L..BB0_3
149; CHECKAIX32-NEXT:  # %bb.2: # %if.end
150; CHECKAIX32-NEXT:    #
151; CHECKAIX32-NEXT:    addi 4, 4, 1
152; CHECKAIX32-NEXT:    stw 4, 0(5)
153; CHECKAIX32-NEXT:    b L..BB0_1
154; CHECKAIX32-NEXT:  L..BB0_3: # %if.then
155; CHECKAIX32-NEXT:    mflr 0
156; CHECKAIX32-NEXT:    stwu 1, -64(1)
157; CHECKAIX32-NEXT:    lwz 5, 0(3)
158; CHECKAIX32-NEXT:    stw 0, 72(1)
159; CHECKAIX32-NEXT:    stw 2, 20(1)
160; CHECKAIX32-NEXT:    mtctr 5
161; CHECKAIX32-NEXT:    lwz 11, 8(3)
162; CHECKAIX32-NEXT:    lwz 2, 4(3)
163; CHECKAIX32-NEXT:    mr 3, 4
164; CHECKAIX32-NEXT:    bctrl
165; CHECKAIX32-NEXT:    lwz 2, 20(1)
166; CHECKAIX32-NEXT:    addi 1, 1, 64
167; CHECKAIX32-NEXT:    lwz 0, 8(1)
168; CHECKAIX32-NEXT:    mtlr 0
169; CHECKAIX32-NEXT:    blr
170entry:
171  %0 = load volatile i32, ptr @ga, align 4
172  %1 = load volatile i32, ptr @gb, align 4
173  %cmp1 = icmp sgt i32 %0, %1
174  %2 = load volatile i32, ptr @ga, align 4
175  br i1 %cmp1, label %if.then, label %if.end
176
177if.then:                                          ; preds = %if.end, %entry
178  %.lcssa = phi i32 [ %2, %entry ], [ %6, %if.end ]
179  %call = tail call signext i32 %FP(i32 signext %.lcssa) #1
180  ret i32 %call
181
182if.end:                                           ; preds = %entry, %if.end
183  %3 = phi i32 [ %6, %if.end ], [ %2, %entry ]
184  %inc = add nsw i32 %3, 1
185  store volatile i32 %inc, ptr @ga, align 4
186  %4 = load volatile i32, ptr @ga, align 4
187  %5 = load volatile i32, ptr @gb, align 4
188  %cmp = icmp sgt i32 %4, %5
189  %6 = load volatile i32, ptr @ga, align 4
190  br i1 %cmp, label %if.then, label %if.end
191}
192