xref: /llvm-project/llvm/test/CodeGen/ARM/vldm-sched-a9.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -arm-atomic-cfg-tidy=0 -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s
2
3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64"
4
5; This test used to test vector spilling using vstmia/vldmia instructions, but
6; the changes for PR:18825 prevent that spilling.
7
8; VST1 and VLD1 are now used for spilling/restoring.
9;
10; TODO:
11; I think more vldm should be generated, initial ones are used to load some
12; elements and then a sequence of vldr are used:
13; vldr  d15, [r1, #104]
14; vldr  d13, [r2, #96]
15; vldr  d9, [r1, #120]
16; vldr  d11, [r2, #112]
17; vldr  d14, [r1, #96]
18; vldr  d12, [r2, #88]
19; vldr  d8, [r1, #112]
20; vldr  d10, [r2, #104]
21
22; Also this patterns repeats several times which certainly seems like a vld1.64
23; should be used to load the data:
24; vldr  d16, [r1, #16]
25; vldr  d17, [r1, #24]
26; vst1.64 {d16, d17}, [lr:128]    @ 16-byte Spill
27
28; CHECK: test:
29; CHECK: vldmia r{{.*}}, {d{{.*}}, d{{.*}}}
30; CHECK: vldmia r{{.*}}, {d{{.*}}, d{{.*}}}
31define <16 x i64> @test(ptr %src0, ptr %src1) #0 {
32entry:
33  %el.0 = load i64, ptr %src0, align 8
34  %addr.1 = getelementptr inbounds i64, ptr %src0, i32 1
35  %el.1 = load i64, ptr %addr.1, align 8
36  %addr.2 = getelementptr inbounds i64, ptr %src0, i32 2
37  %el.2 = load i64, ptr %addr.2, align 8
38  %addr.3 = getelementptr inbounds i64, ptr %src0, i32 3
39  %el.3 = load i64, ptr %addr.3, align 8
40  %addr.4 = getelementptr inbounds i64, ptr %src0, i32 4
41  %el.4 = load i64, ptr %addr.4, align 8
42  %addr.5 = getelementptr inbounds i64, ptr %src0, i32 5
43  %el.5 = load i64, ptr %addr.5, align 8
44  %addr.6 = getelementptr inbounds i64, ptr %src0, i32 6
45  %el.6 = load i64, ptr %addr.6, align 8
46  %addr.7 = getelementptr inbounds i64, ptr %src0, i32 7
47  %el.7 = load i64, ptr %addr.7, align 8
48  %addr.8 = getelementptr inbounds i64, ptr %src0, i32 8
49  %el.8 = load i64, ptr %addr.8, align 8
50  %addr.9 = getelementptr inbounds i64, ptr %src0, i32 9
51  %el.9 = load i64, ptr %addr.9, align 8
52  %addr.10 = getelementptr inbounds i64, ptr %src0, i32 10
53  %el.10 = load i64, ptr %addr.10, align 8
54  %addr.11 = getelementptr inbounds i64, ptr %src0, i32 11
55  %el.11 = load i64, ptr %addr.11, align 8
56  %addr.12 = getelementptr inbounds i64, ptr %src0, i32 12
57  %el.12 = load i64, ptr %addr.12, align 8
58  %addr.13 = getelementptr inbounds i64, ptr %src0, i32 13
59  %el.13 = load i64, ptr %addr.13, align 8
60  %addr.14 = getelementptr inbounds i64, ptr %src0, i32 14
61  %el.14 = load i64, ptr %addr.14, align 8
62  %addr.15 = getelementptr inbounds i64, ptr %src0, i32 15
63  %el.15 = load i64, ptr %addr.15, align 8
64
65  %el.0.1 = load i64, ptr %src1, align 8
66  %addr.1.1 = getelementptr inbounds i64, ptr %src1, i32 1
67  %el.1.1 = load i64, ptr %addr.1.1, align 8
68  %addr.2.1 = getelementptr inbounds i64, ptr %src1, i32 2
69  %el.2.1 = load i64, ptr %addr.2.1, align 8
70  %addr.3.1 = getelementptr inbounds i64, ptr %src1, i32 3
71  %el.3.1 = load i64, ptr %addr.3.1, align 8
72  %addr.4.1 = getelementptr inbounds i64, ptr %src1, i32 4
73  %el.4.1 = load i64, ptr %addr.4.1, align 8
74  %addr.5.1 = getelementptr inbounds i64, ptr %src1, i32 5
75  %el.5.1 = load i64, ptr %addr.5.1, align 8
76  %addr.6.1 = getelementptr inbounds i64, ptr %src1, i32 6
77  %el.6.1 = load i64, ptr %addr.6.1, align 8
78  %addr.7.1 = getelementptr inbounds i64, ptr %src1, i32 7
79  %el.7.1 = load i64, ptr %addr.7.1, align 8
80  %addr.8.1 = getelementptr inbounds i64, ptr %src1, i32 8
81  %el.8.1 = load i64, ptr %addr.8.1, align 8
82  %addr.9.1 = getelementptr inbounds i64, ptr %src1, i32 9
83  %el.9.1 = load i64, ptr %addr.9.1, align 8
84  %addr.10.1 = getelementptr inbounds i64, ptr %src1, i32 10
85  %el.10.1 = load i64, ptr %addr.10.1, align 8
86  %addr.11.1 = getelementptr inbounds i64, ptr %src1, i32 11
87  %el.11.1 = load i64, ptr %addr.11.1, align 8
88  %addr.12.1 = getelementptr inbounds i64, ptr %src1, i32 12
89  %el.12.1 = load i64, ptr %addr.12.1, align 8
90  %addr.13.1 = getelementptr inbounds i64, ptr %src1, i32 13
91  %el.13.1 = load i64, ptr %addr.13.1, align 8
92  %addr.14.1 = getelementptr inbounds i64, ptr %src1, i32 14
93  %el.14.1 = load i64, ptr %addr.14.1, align 8
94  %addr.15.1 = getelementptr inbounds i64, ptr %src1, i32 15
95  %el.15.1 = load i64, ptr %addr.15.1, align 8
96  %vec.0 = insertelement <16 x i64> undef, i64 %el.0, i32 0
97  %vec.1 = insertelement <16 x i64> %vec.0, i64 %el.1, i32 1
98  %vec.2 = insertelement <16 x i64> %vec.1, i64 %el.2, i32 2
99  %vec.3 = insertelement <16 x i64> %vec.2, i64 %el.3, i32 3
100  %vec.4 = insertelement <16 x i64> %vec.3, i64 %el.4, i32 4
101  %vec.5 = insertelement <16 x i64> %vec.4, i64 %el.5, i32 5
102  %vec.6 = insertelement <16 x i64> %vec.5, i64 %el.6, i32 6
103  %vec.7 = insertelement <16 x i64> %vec.6, i64 %el.7, i32 7
104  %vec.8 = insertelement <16 x i64> %vec.7, i64 %el.8, i32 8
105  %vec.9 = insertelement <16 x i64> %vec.8, i64 %el.9, i32 9
106  %vec.10 = insertelement <16 x i64> %vec.9, i64 %el.10, i32 10
107  %vec.11 = insertelement <16 x i64> %vec.10, i64 %el.11, i32 11
108  %vec.12 = insertelement <16 x i64> %vec.11, i64 %el.12, i32 12
109  %vec.13 = insertelement <16 x i64> %vec.12, i64 %el.13, i32 13
110  %vec.14 = insertelement <16 x i64> %vec.13, i64 %el.14, i32 14
111  %vec.15 = insertelement <16 x i64> %vec.14, i64 %el.15, i32 15
112  call void @capture(ptr %src0, ptr %src1)
113  %vec.0.1 = insertelement <16 x i64> undef, i64 %el.0.1, i32 0
114  %vec.1.1 = insertelement <16 x i64> %vec.0.1, i64 %el.1.1, i32 1
115  %vec.2.1 = insertelement <16 x i64> %vec.1.1, i64 %el.2.1, i32 2
116  %vec.3.1 = insertelement <16 x i64> %vec.2.1, i64 %el.3.1, i32 3
117  %vec.4.1 = insertelement <16 x i64> %vec.3.1, i64 %el.4.1, i32 4
118  %vec.5.1 = insertelement <16 x i64> %vec.4.1, i64 %el.5.1, i32 5
119  %vec.6.1 = insertelement <16 x i64> %vec.5.1, i64 %el.6.1, i32 6
120  %vec.7.1 = insertelement <16 x i64> %vec.6.1, i64 %el.7.1, i32 7
121  %vec.8.1 = insertelement <16 x i64> %vec.7.1, i64 %el.7.1, i32 8
122  %vec.9.1 = insertelement <16 x i64> %vec.8.1, i64 %el.8.1, i32 9
123  %vec.10.1 = insertelement <16 x i64> %vec.9.1, i64 %el.9.1, i32 10
124  %vec.11.1 = insertelement <16 x i64> %vec.10.1, i64 %el.10.1, i32 11
125  %vec.12.1 = insertelement <16 x i64> %vec.11.1, i64 %el.11.1, i32 12
126  %vec.13.1 = insertelement <16 x i64> %vec.12.1, i64 %el.12.1, i32 13
127  %vec.14.1 = insertelement <16 x i64> %vec.13.1, i64 %el.13.1, i32 14
128  %vec.15.1 = insertelement <16 x i64> %vec.14.1, i64 %el.14.1, i32 15
129  %res = add <16 x i64> %vec.15, %vec.15.1
130  ret <16 x i64> %res
131}
132
133declare void @capture(ptr, ptr)
134
135attributes #0 = { noredzone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
136