xref: /llvm-project/llvm/test/CodeGen/SystemZ/memcpy-01.ll (revision 5baa4b8e1164b3635ef9220104159988f4ee836a)
1; Test memcpy using MVC.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4; RUN: llc -mtriple=s390x-linux-gnu -filetype=null %s
5
6declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
7declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
8declare void @foo(ptr, ptr)
9
10; Test a no-op move, i32 version.
11define void @f1(ptr %dest, ptr %src) {
12; CHECK-LABEL: f1:
13; CHECK-NOT: %r2
14; CHECK-NOT: %r3
15; CHECK: br %r14
16  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 false)
17  ret void
18}
19
20; Test a no-op move, i64 version.
21define void @f2(ptr %dest, ptr %src) {
22; CHECK-LABEL: f2:
23; CHECK-NOT: %r2
24; CHECK-NOT: %r3
25; CHECK: br %r14
26  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false)
27  ret void
28}
29
30; Test a 1-byte move, i32 version.
31define void @f3(ptr %dest, ptr %src) {
32; CHECK-LABEL: f3:
33; CHECK: mvc 0(1,%r2), 0(%r3)
34; CHECK: br %r14
35  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1, i1 false)
36  ret void
37}
38
39; Test a 1-byte move, i64 version.
40define void @f4(ptr %dest, ptr %src) {
41; CHECK-LABEL: f4:
42; CHECK: mvc 0(1,%r2), 0(%r3)
43; CHECK: br %r14
44  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
45  ret void
46}
47
48; Test the upper range of a single MVC, i32 version.
49define void @f5(ptr %dest, ptr %src) {
50; CHECK-LABEL: f5:
51; CHECK: mvc 0(256,%r2), 0(%r3)
52; CHECK: br %r14
53  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 256, i1 false)
54  ret void
55}
56
57; Test the upper range of a single MVC, i64 version.
58define void @f6(ptr %dest, ptr %src) {
59; CHECK-LABEL: f6:
60; CHECK: mvc 0(256,%r2), 0(%r3)
61; CHECK: br %r14
62  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 256, i1 false)
63  ret void
64}
65
66; Test the first case that needs two MVCs.
67define void @f7(ptr %dest, ptr %src) {
68; CHECK-LABEL: f7:
69; CHECK: mvc 0(256,%r2), 0(%r3)
70; CHECK: mvc 256(1,%r2), 256(%r3)
71; CHECK: br %r14
72  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 257, i1 false)
73  ret void
74}
75
76; Test the last-but-one case that needs two MVCs.
77define void @f8(ptr %dest, ptr %src) {
78; CHECK-LABEL: f8:
79; CHECK: mvc 0(256,%r2), 0(%r3)
80; CHECK: mvc 256(255,%r2), 256(%r3)
81; CHECK: br %r14
82  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 511, i1 false)
83  ret void
84}
85
86; Test the last case that needs two MVCs.
87define void @f9(ptr %dest, ptr %src) {
88; CHECK-LABEL: f9:
89; CHECK: mvc 0(256,%r2), 0(%r3)
90; CHECK: mvc 256(256,%r2), 256(%r3)
91; CHECK: br %r14
92  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 512, i1 false)
93  ret void
94}
95
96; Test an arbitrary value that uses straight-line code.
97define void @f10(ptr %dest, ptr %src) {
98; CHECK-LABEL: f10:
99; CHECK: mvc 0(256,%r2), 0(%r3)
100; CHECK: mvc 256(256,%r2), 256(%r3)
101; CHECK: mvc 512(256,%r2), 512(%r3)
102; CHECK: mvc 768(256,%r2), 768(%r3)
103; CHECK: mvc 1024(255,%r2), 1024(%r3)
104; CHECK: br %r14
105  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false)
106  ret void
107}
108
109; ...and again in cases where not all parts are in range of MVC.
110define void @f11(ptr %srcbase, ptr %destbase) {
111; CHECK-LABEL: f11:
112; CHECK: mvc 4000(256,%r2), 3500(%r3)
113; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
114; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
115; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
116; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
117; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
118; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
119; CHECK: br %r14
120  %dest = getelementptr i8, ptr %srcbase, i64 4000
121  %src = getelementptr i8, ptr %destbase, i64 3500
122  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false)
123  ret void
124}
125
126; ...and again with a destination frame base that goes out of range.
127define void @f12() {
128; CHECK-LABEL: f12:
129; CHECK: brasl %r14, foo@PLT
130; CHECK: mvc 4076(256,%r15), 2100(%r15)
131; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
132; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
133; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
134; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
135; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
136; CHECK: brasl %r14, foo@PLT
137; CHECK: br %r14
138  %arr = alloca [6000 x i8]
139  %dest = getelementptr [6000 x i8], ptr %arr, i64 0, i64 3900
140  %src = getelementptr [6000 x i8], ptr %arr, i64 0, i64 1924
141  call void @foo(ptr %dest, ptr %src)
142  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false)
143  call void @foo(ptr %dest, ptr %src)
144  ret void
145}
146
147; ...and again with a source frame base that goes out of range.
148define void @f13() {
149; CHECK-LABEL: f13:
150; CHECK: brasl %r14, foo@PLT
151; CHECK: mvc 200(256,%r15), 3826(%r15)
152; CHECK: mvc 456(256,%r15), 4082(%r15)
153; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
154; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
155; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
156; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
157; CHECK: brasl %r14, foo@PLT
158; CHECK: br %r14
159  %arr = alloca [6000 x i8]
160  %dest = getelementptr [6000 x i8], ptr %arr, i64 0, i64 24
161  %src = getelementptr [6000 x i8], ptr %arr, i64 0, i64 3650
162  call void @foo(ptr %dest, ptr %src)
163  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false)
164  call void @foo(ptr %dest, ptr %src)
165  ret void
166}
167
168; Test the last case that is done using straight-line code.
169define void @f14(ptr %dest, ptr %src) {
170; CHECK-LABEL: f14:
171; CHECK: mvc 0(256,%r2), 0(%r3)
172; CHECK: mvc 256(256,%r2), 256(%r3)
173; CHECK: mvc 512(256,%r2), 512(%r3)
174; CHECK: mvc 768(256,%r2), 768(%r3)
175; CHECK: mvc 1024(256,%r2), 1024(%r3)
176; CHECK: mvc 1280(256,%r2), 1280(%r3)
177; CHECK: br %r14
178  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1536, i1 false)
179  ret void
180}
181
182; Test the first case that is done using a loop.
183define void @f15(ptr %dest, ptr %src) {
184; CHECK-LABEL: f15:
185; CHECK: lghi [[COUNT:%r[0-5]]], 6
186; CHECK: [[LABEL:\.L[^:]*]]:
187; CHECK: pfd 2, 768(%r2)
188; CHECK: mvc 0(256,%r2), 0(%r3)
189; CHECK: la %r2, 256(%r2)
190; CHECK: la %r3, 256(%r3)
191; CHECK: brctg [[COUNT]], [[LABEL]]
192; CHECK: mvc 0(1,%r2), 0(%r3)
193; CHECK: br %r14
194  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1537, i1 false)
195  ret void
196}
197
198; ...and again with frame bases, where the base must be loaded into a
199; register before the loop.
200define void @f16() {
201; CHECK-LABEL: f16:
202; CHECK: brasl %r14, foo@PLT
203; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
204; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
205; CHECK: [[LABEL:\.L[^:]*]]:
206; CHECK: pfd 2, 2368([[BASE]])
207; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
208; CHECK: la [[BASE]], 256([[BASE]])
209; CHECK: brctg [[COUNT]], [[LABEL]]
210; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
211; CHECK: brasl %r14, foo@PLT
212; CHECK: br %r14
213  %arr = alloca [3200 x i8]
214  %dest = getelementptr [3200 x i8], ptr %arr, i64 0, i64 1600
215  call void @foo(ptr %dest, ptr %arr)
216  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %arr, i64 1537, i1 false)
217  call void @foo(ptr %dest, ptr %arr)
218  ret void
219}
220
221; Test a variable length loop.
222define void @f17(ptr %dest, ptr %src, i64 %Len) {
223; CHECK-LABEL: f17:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    aghi %r4, -1
226; CHECK-NEXT:    cgibe %r4, -1, 0(%r14)
227; CHECK-NEXT:  .LBB16_1:
228; CHECK-NEXT:    srlg %r0, %r4, 8
229; CHECK-NEXT:    cgije %r0, 0, .LBB16_3
230; CHECK-NEXT:  .LBB16_2: # =>This Inner Loop Header: Depth=1
231; CHECK-NEXT:    pfd 2, 768(%r2)
232; CHECK-NEXT:    mvc 0(256,%r2), 0(%r3)
233; CHECK-NEXT:    la %r2, 256(%r2)
234; CHECK-NEXT:    la %r3, 256(%r3)
235; CHECK-NEXT:    brctg %r0, .LBB16_2
236; CHECK-NEXT:  .LBB16_3:
237; CHECK-NEXT:    exrl %r4, .Ltmp0
238; CHECK-NEXT:    br %r14
239  call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 %Len, i1 false)
240  ret void
241}
242
243; CHECK:       .Ltmp0:
244; CHECK-NEXT:    mvc 0(1,%r2), 0(%r3)
245