xref: /llvm-project/llvm/test/ThinLTO/AArch64/cgdata-merge-read.ll (revision d23c5c2d6566fce4380cfa31d438422db19fbce9)
1; This test demonstrates how similar functions are handled during global outlining.
2; Currently, we do not attempt to share an merged function for identical sequences.
3; Instead, each merging instance is created uniquely.
4
5; RUN: rm -rf %t; split-file %s %t
6
7; RUN: opt -module-summary -module-hash %t/foo.ll -o %t-foo.bc
8; RUN: opt -module-summary -module-hash %t/goo.ll -o %t-goo.bc
9
10; First, run with -codegen-data-generate=true to generate the cgdata in the object files.
11; Using llvm-cgdata, merge the cg data.
12; RUN: llvm-lto2 run -enable-global-merge-func=true -codegen-data-generate=true %t-foo.bc %t-goo.bc -o %tout-write \
13; RUN:    -r %t-foo.bc,_f1,px \
14; RUN:    -r %t-goo.bc,_f2,px \
15; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
16; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
17; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-write.1 %tout-write.2
18
19; Now run with -codegen-data-use-path=%tout.cgdata to optimize the binary.
20; Each module has its own merging instance as it is matched against the merged cgdata.
21; RUN: llvm-lto2 run -enable-global-merge-func=true \
22; RUN:    -codegen-data-use-path=%tout.cgdata \
23; RUN:    %t-foo.bc %t-goo.bc -o %tout-read \
24; RUN:    -r %t-foo.bc,_f1,px \
25; RUN:    -r %t-goo.bc,_f2,px \
26; RUN:    -r %t-foo.bc,_g,l -r %t-foo.bc,_g1,l -r %t-foo.bc,_g2,l \
27; RUN:    -r %t-goo.bc,_g,l -r %t-goo.bc,_g1,l -r %t-goo.bc,_g2,l
28; RUN: llvm-nm %tout-read.1 | FileCheck %s --check-prefix=READ1
29; RUN: llvm-nm %tout-read.2 | FileCheck %s --check-prefix=READ2
30; RUN: llvm-objdump -d %tout-read.1 | FileCheck %s --check-prefix=THUNK1
31; RUN: llvm-objdump -d %tout-read.2 | FileCheck %s --check-prefix=THUNK2
32
33; READ1: _f1.Tgm
34; READ2: _f2.Tgm
35
36; THUNK1: <_f1>:
37; THUNK1-NEXT: adrp x1,
38; THUNK1-NEXT: ldr x1, [x1]
39; THUNK1-NEXT: b
40
41; THUNK2: <_f2>:
42; THUNK2-NEXT: adrp x1,
43; THUNK2-NEXT: ldr x1, [x1]
44; THUNK2-NEXT: b
45
46;--- foo.ll
47target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
48target triple = "arm64-unknown-ios12.0.0"
49
50@g = external local_unnamed_addr global [0 x i32], align 4
51@g1 = external global i32, align 4
52@g2 = external global i32, align 4
53
54define i32 @f1(i32 %a) {
55entry:
56  %idxprom = sext i32 %a to i64
57  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
58  %0 = load i32, i32* %arrayidx, align 4
59  %1 = load volatile i32, i32* @g1, align 4
60  %mul = mul nsw i32 %1, %0
61  %add = add nsw i32 %mul, 1
62  ret i32 %add
63}
64
65;--- goo.ll
66target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
67target triple = "arm64-unknown-ios12.0.0"
68
69@g = external local_unnamed_addr global [0 x i32], align 4
70@g1 = external global i32, align 4
71@g2 = external global i32, align 4
72
73define i32 @f2(i32 %a) {
74entry:
75  %idxprom = sext i32 %a to i64
76  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
77  %0 = load i32, i32* %arrayidx, align 4
78  %1 = load volatile i32, i32* @g2, align 4
79  %mul = mul nsw i32 %1, %0
80  %add = add nsw i32 %mul, 1
81  ret i32 %add
82}
83