xref: /llvm-project/llvm/test/CodeGen/RISCV/memcpy.ll (revision 681c4a2068702f7483608b89e7a7e9235faf6bd9)
14f4f4913SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
24f4f4913SPhilip Reames; RUN: llc < %s -mtriple=riscv32 \
34f4f4913SPhilip Reames; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
44f4f4913SPhilip Reames; RUN: llc < %s -mtriple=riscv64 \
54f4f4913SPhilip Reames; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
69067070dSCraig Topper; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
74f4f4913SPhilip Reames; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
89067070dSCraig Topper; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
94f4f4913SPhilip Reames; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
104f4f4913SPhilip Reames
11*681c4a20SPengcheng Wang; ----------------------------------------------------------------------
12*681c4a20SPengcheng Wang; Fully unaligned cases
134f4f4913SPhilip Reames
14*681c4a20SPengcheng Wangdefine void @unaligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
15*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: unaligned_memcpy0:
1659bba39aSPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
17*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
1859bba39aSPengcheng Wang;
19*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: unaligned_memcpy0:
20*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
21*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
2259bba39aSPengcheng Wangentry:
23*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false)
2459bba39aSPengcheng Wang  ret void
2559bba39aSPengcheng Wang}
2659bba39aSPengcheng Wang
27*681c4a20SPengcheng Wangdefine void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
28*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: unaligned_memcpy1:
29*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
30*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lbu a1, 0(a1)
31*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sb a1, 0(a0)
32*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
334637c777SPengcheng Wang;
34*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: unaligned_memcpy1:
35*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
36*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lbu a1, 0(a1)
37*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sb a1, 0(a0)
38*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
394637c777SPengcheng Wangentry:
40*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
414637c777SPengcheng Wang  ret void
424637c777SPengcheng Wang}
434637c777SPengcheng Wang
44*681c4a20SPengcheng Wangdefine void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
45*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy2:
464637c777SPengcheng Wang; RV32:       # %bb.0: # %entry
47*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 1(a1)
48*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 1(a0)
49*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a1, 0(a1)
50*681c4a20SPengcheng Wang; RV32-NEXT:    sb a1, 0(a0)
5159bba39aSPengcheng Wang; RV32-NEXT:    ret
5259bba39aSPengcheng Wang;
53*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy2:
5459bba39aSPengcheng Wang; RV64:       # %bb.0: # %entry
55*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 1(a1)
56*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 1(a0)
57*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a1, 0(a1)
58*681c4a20SPengcheng Wang; RV64-NEXT:    sb a1, 0(a0)
5959bba39aSPengcheng Wang; RV64-NEXT:    ret
6059bba39aSPengcheng Wang;
61*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy2:
6259bba39aSPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
63*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lh a1, 0(a1)
64*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sh a1, 0(a0)
65*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
66*681c4a20SPengcheng Wang;
67*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy2:
68*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
69*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lh a1, 0(a1)
70*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sh a1, 0(a0)
71*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
72*681c4a20SPengcheng Wangentry:
73*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
74*681c4a20SPengcheng Wang  ret void
75*681c4a20SPengcheng Wang}
76*681c4a20SPengcheng Wang
77*681c4a20SPengcheng Wangdefine void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
78*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy3:
79*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
80*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 2(a1)
81*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 2(a0)
82*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 1(a1)
83*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 1(a0)
84*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a1, 0(a1)
85*681c4a20SPengcheng Wang; RV32-NEXT:    sb a1, 0(a0)
86*681c4a20SPengcheng Wang; RV32-NEXT:    ret
87*681c4a20SPengcheng Wang;
88*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy3:
89*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
90*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 2(a1)
91*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 2(a0)
92*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 1(a1)
93*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 1(a0)
94*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a1, 0(a1)
95*681c4a20SPengcheng Wang; RV64-NEXT:    sb a1, 0(a0)
96*681c4a20SPengcheng Wang; RV64-NEXT:    ret
97*681c4a20SPengcheng Wang;
98*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy3:
99*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
100*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lbu a2, 2(a1)
101*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sb a2, 2(a0)
102*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lh a1, 0(a1)
103*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sh a1, 0(a0)
104*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
105*681c4a20SPengcheng Wang;
106*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy3:
107*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
108*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lbu a2, 2(a1)
109*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sb a2, 2(a0)
110*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lh a1, 0(a1)
111*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sh a1, 0(a0)
112*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
113*681c4a20SPengcheng Wangentry:
114*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
115*681c4a20SPengcheng Wang  ret void
116*681c4a20SPengcheng Wang}
117*681c4a20SPengcheng Wang
118*681c4a20SPengcheng Wangdefine void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
119*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy4:
120*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
121*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 3(a1)
122*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 3(a0)
123*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 2(a1)
124*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 2(a0)
125*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 1(a1)
126*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 1(a0)
127*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a1, 0(a1)
128*681c4a20SPengcheng Wang; RV32-NEXT:    sb a1, 0(a0)
129*681c4a20SPengcheng Wang; RV32-NEXT:    ret
130*681c4a20SPengcheng Wang;
131*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy4:
132*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
133*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 3(a1)
134*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 3(a0)
135*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 2(a1)
136*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 2(a0)
137*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 1(a1)
138*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 1(a0)
139*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a1, 0(a1)
140*681c4a20SPengcheng Wang; RV64-NEXT:    sb a1, 0(a0)
141*681c4a20SPengcheng Wang; RV64-NEXT:    ret
142*681c4a20SPengcheng Wang;
143*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy4:
144*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
145*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
14659bba39aSPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
14759bba39aSPengcheng Wang; RV32-FAST-NEXT:    ret
14859bba39aSPengcheng Wang;
149*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy4:
15059bba39aSPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
151*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a1, 0(a1)
15259bba39aSPengcheng Wang; RV64-FAST-NEXT:    sw a1, 0(a0)
15359bba39aSPengcheng Wang; RV64-FAST-NEXT:    ret
15459bba39aSPengcheng Wangentry:
155*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false)
15659bba39aSPengcheng Wang  ret void
15759bba39aSPengcheng Wang}
15859bba39aSPengcheng Wang
159*681c4a20SPengcheng Wangdefine void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
160*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy7:
16159bba39aSPengcheng Wang; RV32:       # %bb.0: # %entry
162*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 6(a1)
163*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 6(a0)
164*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 5(a1)
165*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 5(a0)
166*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 4(a1)
167*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 4(a0)
168*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 3(a1)
169*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 3(a0)
170*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 2(a1)
171*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 2(a0)
172*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 1(a1)
173*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 1(a0)
174*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a1, 0(a1)
175*681c4a20SPengcheng Wang; RV32-NEXT:    sb a1, 0(a0)
17659bba39aSPengcheng Wang; RV32-NEXT:    ret
17759bba39aSPengcheng Wang;
178*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy7:
17959bba39aSPengcheng Wang; RV64:       # %bb.0: # %entry
180*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 6(a1)
181*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 6(a0)
182*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 5(a1)
183*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 5(a0)
184*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 4(a1)
185*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 4(a0)
186*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 3(a1)
187*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 3(a0)
188*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 2(a1)
189*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 2(a0)
190*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 1(a1)
191*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 1(a0)
192*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a1, 0(a1)
193*681c4a20SPengcheng Wang; RV64-NEXT:    sb a1, 0(a0)
19459bba39aSPengcheng Wang; RV64-NEXT:    ret
19559bba39aSPengcheng Wang;
196*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy7:
19759bba39aSPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
198*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 3(a1)
199*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 3(a0)
200*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
201*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
20259bba39aSPengcheng Wang; RV32-FAST-NEXT:    ret
20359bba39aSPengcheng Wang;
204*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy7:
20559bba39aSPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
206*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a2, 3(a1)
207*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sw a2, 3(a0)
208*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a1, 0(a1)
209*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sw a1, 0(a0)
21059bba39aSPengcheng Wang; RV64-FAST-NEXT:    ret
21159bba39aSPengcheng Wangentry:
212*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false)
21359bba39aSPengcheng Wang  ret void
21459bba39aSPengcheng Wang}
21559bba39aSPengcheng Wang
216*681c4a20SPengcheng Wangdefine void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
217*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy8:
218*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
219*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 7(a1)
220*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 7(a0)
221*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 6(a1)
222*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 6(a0)
223*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 5(a1)
224*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 5(a0)
225*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 4(a1)
226*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 4(a0)
227*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 3(a1)
228*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 3(a0)
229*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 2(a1)
230*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 2(a0)
231*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 1(a1)
232*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 1(a0)
233*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a1, 0(a1)
234*681c4a20SPengcheng Wang; RV32-NEXT:    sb a1, 0(a0)
235*681c4a20SPengcheng Wang; RV32-NEXT:    ret
236*681c4a20SPengcheng Wang;
237*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy8:
238*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
239*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 7(a1)
240*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 7(a0)
241*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 6(a1)
242*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 6(a0)
243*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 5(a1)
244*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 5(a0)
245*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 4(a1)
246*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 4(a0)
247*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 3(a1)
248*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 3(a0)
249*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 2(a1)
250*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 2(a0)
251*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 1(a1)
252*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 1(a0)
253*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a1, 0(a1)
254*681c4a20SPengcheng Wang; RV64-NEXT:    sb a1, 0(a0)
255*681c4a20SPengcheng Wang; RV64-NEXT:    ret
256*681c4a20SPengcheng Wang;
257*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy8:
258*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
259*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
260*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
261*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
262*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
263*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
264*681c4a20SPengcheng Wang;
265*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy8:
266*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
267*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
268*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
269*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
270*681c4a20SPengcheng Wangentry:
271*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
272*681c4a20SPengcheng Wang  ret void
273*681c4a20SPengcheng Wang}
2744637c777SPengcheng Wang
275*681c4a20SPengcheng Wangdefine void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
276*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy15:
277*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
278*681c4a20SPengcheng Wang; RV32-NEXT:    li a2, 15
279*681c4a20SPengcheng Wang; RV32-NEXT:    tail memcpy
280*681c4a20SPengcheng Wang;
281*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy15:
282*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
283*681c4a20SPengcheng Wang; RV64-NEXT:    li a2, 15
284*681c4a20SPengcheng Wang; RV64-NEXT:    tail memcpy
285*681c4a20SPengcheng Wang;
286*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy15:
287*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
288*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 11(a1)
289*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 11(a0)
290*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 8(a1)
291*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 8(a0)
292*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
293*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
294*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
295*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
296*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
297*681c4a20SPengcheng Wang;
298*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy15:
299*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
300*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 7(a1)
301*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 7(a0)
302*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
303*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
304*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
305*681c4a20SPengcheng Wangentry:
306*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false)
307*681c4a20SPengcheng Wang  ret void
308*681c4a20SPengcheng Wang}
309*681c4a20SPengcheng Wang
310*681c4a20SPengcheng Wangdefine void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
311*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy16:
312*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
313*681c4a20SPengcheng Wang; RV32-NEXT:    li a2, 16
314*681c4a20SPengcheng Wang; RV32-NEXT:    tail memcpy
315*681c4a20SPengcheng Wang;
316*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy16:
317*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
318*681c4a20SPengcheng Wang; RV64-NEXT:    li a2, 16
319*681c4a20SPengcheng Wang; RV64-NEXT:    tail memcpy
320*681c4a20SPengcheng Wang;
321*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy16:
322*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
323*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 12(a1)
324*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 12(a0)
325*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 8(a1)
326*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 8(a0)
327*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
328*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
329*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
330*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
331*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
332*681c4a20SPengcheng Wang;
333*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy16:
334*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
335*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 8(a1)
336*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 8(a0)
337*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
338*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
339*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
340*681c4a20SPengcheng Wangentry:
341*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
342*681c4a20SPengcheng Wang  ret void
343*681c4a20SPengcheng Wang}
344*681c4a20SPengcheng Wang
345*681c4a20SPengcheng Wangdefine void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
346*681c4a20SPengcheng Wang; RV32-LABEL: unaligned_memcpy31:
347*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
348*681c4a20SPengcheng Wang; RV32-NEXT:    li a2, 31
349*681c4a20SPengcheng Wang; RV32-NEXT:    tail memcpy
350*681c4a20SPengcheng Wang;
351*681c4a20SPengcheng Wang; RV64-LABEL: unaligned_memcpy31:
352*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
353*681c4a20SPengcheng Wang; RV64-NEXT:    li a2, 31
354*681c4a20SPengcheng Wang; RV64-NEXT:    tail memcpy
355*681c4a20SPengcheng Wang;
356*681c4a20SPengcheng Wang; RV32-FAST-LABEL: unaligned_memcpy31:
357*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
358*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 27(a1)
359*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 27(a0)
360*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 24(a1)
361*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 24(a0)
362*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 20(a1)
363*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 20(a0)
364*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 16(a1)
365*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 16(a0)
366*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 12(a1)
367*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 12(a0)
368*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 8(a1)
369*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 8(a0)
370*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
371*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
372*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
373*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
374*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
375*681c4a20SPengcheng Wang;
376*681c4a20SPengcheng Wang; RV64-FAST-LABEL: unaligned_memcpy31:
377*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
378*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 23(a1)
379*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 23(a0)
380*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 16(a1)
381*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 16(a0)
382*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 8(a1)
383*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 8(a0)
384*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
385*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
386*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
387*681c4a20SPengcheng Wangentry:
388*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
389*681c4a20SPengcheng Wang  ret void
390*681c4a20SPengcheng Wang}
391*681c4a20SPengcheng Wang
392*681c4a20SPengcheng Wang; ----------------------------------------------------------------------
393*681c4a20SPengcheng Wang; Fully aligned cases
394*681c4a20SPengcheng Wang
395*681c4a20SPengcheng Wangdefine void @aligned_memcpy0(ptr nocapture %dest, ptr %src) nounwind {
396*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy0:
397*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
398*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
399*681c4a20SPengcheng Wang;
400*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy0:
401*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
402*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
403*681c4a20SPengcheng Wangentry:
404*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 0, i1 false)
405*681c4a20SPengcheng Wang  ret void
406*681c4a20SPengcheng Wang}
407*681c4a20SPengcheng Wang
408*681c4a20SPengcheng Wangdefine void @aligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
409*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy1:
410*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
411*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lbu a1, 0(a1)
412*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sb a1, 0(a0)
413*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
414*681c4a20SPengcheng Wang;
415*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy1:
416*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
417*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lbu a1, 0(a1)
418*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sb a1, 0(a0)
419*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
420*681c4a20SPengcheng Wangentry:
421*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 1, i1 false)
422*681c4a20SPengcheng Wang  ret void
423*681c4a20SPengcheng Wang}
424*681c4a20SPengcheng Wang
425*681c4a20SPengcheng Wangdefine void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
426*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy2:
427*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
428*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lh a1, 0(a1)
429*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sh a1, 0(a0)
430*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
431*681c4a20SPengcheng Wang;
432*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy2:
433*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
434*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lh a1, 0(a1)
435*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sh a1, 0(a0)
436*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
437*681c4a20SPengcheng Wangentry:
438*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false)
439*681c4a20SPengcheng Wang  ret void
440*681c4a20SPengcheng Wang}
441*681c4a20SPengcheng Wang
442*681c4a20SPengcheng Wangdefine void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
443*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy3:
444*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
445*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lbu a2, 2(a1)
446*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sb a2, 2(a0)
447*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lh a1, 0(a1)
448*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sh a1, 0(a0)
449*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
450*681c4a20SPengcheng Wang;
451*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy3:
452*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
453*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lbu a2, 2(a1)
454*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sb a2, 2(a0)
455*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lh a1, 0(a1)
456*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sh a1, 0(a0)
457*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
458*681c4a20SPengcheng Wangentry:
459*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false)
460*681c4a20SPengcheng Wang  ret void
461*681c4a20SPengcheng Wang}
462*681c4a20SPengcheng Wang
463*681c4a20SPengcheng Wangdefine void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
464*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy4:
465*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
466*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a1, 0(a1)
467*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a1, 0(a0)
468*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
469*681c4a20SPengcheng Wang;
470*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy4:
471*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
472*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    lw a1, 0(a1)
473*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sw a1, 0(a0)
474*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
475*681c4a20SPengcheng Wangentry:
476*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false)
477*681c4a20SPengcheng Wang  ret void
478*681c4a20SPengcheng Wang}
479*681c4a20SPengcheng Wang
480*681c4a20SPengcheng Wangdefine void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
481*681c4a20SPengcheng Wang; RV32-LABEL: aligned_memcpy7:
482*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
483*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 6(a1)
484*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 6(a0)
485*681c4a20SPengcheng Wang; RV32-NEXT:    lh a2, 4(a1)
486*681c4a20SPengcheng Wang; RV32-NEXT:    sh a2, 4(a0)
487*681c4a20SPengcheng Wang; RV32-NEXT:    lw a1, 0(a1)
488*681c4a20SPengcheng Wang; RV32-NEXT:    sw a1, 0(a0)
489*681c4a20SPengcheng Wang; RV32-NEXT:    ret
490*681c4a20SPengcheng Wang;
491*681c4a20SPengcheng Wang; RV64-LABEL: aligned_memcpy7:
492*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
493*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 6(a1)
494*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 6(a0)
495*681c4a20SPengcheng Wang; RV64-NEXT:    lh a2, 4(a1)
496*681c4a20SPengcheng Wang; RV64-NEXT:    sh a2, 4(a0)
497*681c4a20SPengcheng Wang; RV64-NEXT:    lw a1, 0(a1)
498*681c4a20SPengcheng Wang; RV64-NEXT:    sw a1, 0(a0)
499*681c4a20SPengcheng Wang; RV64-NEXT:    ret
500*681c4a20SPengcheng Wang;
501*681c4a20SPengcheng Wang; RV32-FAST-LABEL: aligned_memcpy7:
502*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
503*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 3(a1)
504*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 3(a0)
505*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
506*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
507*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
508*681c4a20SPengcheng Wang;
509*681c4a20SPengcheng Wang; RV64-FAST-LABEL: aligned_memcpy7:
510*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
511*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a2, 3(a1)
512*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sw a2, 3(a0)
513*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a1, 0(a1)
514*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sw a1, 0(a0)
515*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
516*681c4a20SPengcheng Wangentry:
517*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false)
518*681c4a20SPengcheng Wang  ret void
519*681c4a20SPengcheng Wang}
520*681c4a20SPengcheng Wang
521*681c4a20SPengcheng Wangdefine void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
522*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy8:
523*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
524*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a2, 4(a1)
525*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a2, 4(a0)
526*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a1, 0(a1)
527*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a1, 0(a0)
528*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
529*681c4a20SPengcheng Wang;
530*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy8:
531*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
532*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ld a1, 0(a1)
533*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sd a1, 0(a0)
534*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
535*681c4a20SPengcheng Wangentry:
536*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false)
537*681c4a20SPengcheng Wang  ret void
538*681c4a20SPengcheng Wang}
539*681c4a20SPengcheng Wang
540*681c4a20SPengcheng Wangdefine void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
541*681c4a20SPengcheng Wang; RV32-LABEL: aligned_memcpy15:
542*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
543*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 14(a1)
544*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 14(a0)
545*681c4a20SPengcheng Wang; RV32-NEXT:    lh a2, 12(a1)
546*681c4a20SPengcheng Wang; RV32-NEXT:    sh a2, 12(a0)
547*681c4a20SPengcheng Wang; RV32-NEXT:    lw a2, 8(a1)
548*681c4a20SPengcheng Wang; RV32-NEXT:    sw a2, 8(a0)
549*681c4a20SPengcheng Wang; RV32-NEXT:    lw a2, 4(a1)
550*681c4a20SPengcheng Wang; RV32-NEXT:    sw a2, 4(a0)
551*681c4a20SPengcheng Wang; RV32-NEXT:    lw a1, 0(a1)
552*681c4a20SPengcheng Wang; RV32-NEXT:    sw a1, 0(a0)
553*681c4a20SPengcheng Wang; RV32-NEXT:    ret
554*681c4a20SPengcheng Wang;
555*681c4a20SPengcheng Wang; RV64-LABEL: aligned_memcpy15:
556*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
557*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 14(a1)
558*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 14(a0)
559*681c4a20SPengcheng Wang; RV64-NEXT:    lh a2, 12(a1)
560*681c4a20SPengcheng Wang; RV64-NEXT:    sh a2, 12(a0)
561*681c4a20SPengcheng Wang; RV64-NEXT:    lw a2, 8(a1)
562*681c4a20SPengcheng Wang; RV64-NEXT:    sw a2, 8(a0)
563*681c4a20SPengcheng Wang; RV64-NEXT:    ld a1, 0(a1)
564*681c4a20SPengcheng Wang; RV64-NEXT:    sd a1, 0(a0)
565*681c4a20SPengcheng Wang; RV64-NEXT:    ret
566*681c4a20SPengcheng Wang;
567*681c4a20SPengcheng Wang; RV32-FAST-LABEL: aligned_memcpy15:
568*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
569*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 11(a1)
570*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 11(a0)
571*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 8(a1)
572*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 8(a0)
573*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
574*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
575*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
576*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
577*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
578*681c4a20SPengcheng Wang;
579*681c4a20SPengcheng Wang; RV64-FAST-LABEL: aligned_memcpy15:
580*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
581*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 7(a1)
582*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 7(a0)
583*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
584*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
585*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
586*681c4a20SPengcheng Wangentry:
587*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false)
588*681c4a20SPengcheng Wang  ret void
589*681c4a20SPengcheng Wang}
590*681c4a20SPengcheng Wang
591*681c4a20SPengcheng Wangdefine void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
592*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: aligned_memcpy16:
5934f4f4913SPhilip Reames; RV32-BOTH:       # %bb.0: # %entry
5944f4f4913SPhilip Reames; RV32-BOTH-NEXT:    lw a2, 12(a1)
5954f4f4913SPhilip Reames; RV32-BOTH-NEXT:    sw a2, 12(a0)
5964f4f4913SPhilip Reames; RV32-BOTH-NEXT:    lw a2, 8(a1)
5974f4f4913SPhilip Reames; RV32-BOTH-NEXT:    sw a2, 8(a0)
5984f4f4913SPhilip Reames; RV32-BOTH-NEXT:    lw a2, 4(a1)
5994f4f4913SPhilip Reames; RV32-BOTH-NEXT:    sw a2, 4(a0)
6004f4f4913SPhilip Reames; RV32-BOTH-NEXT:    lw a1, 0(a1)
6014f4f4913SPhilip Reames; RV32-BOTH-NEXT:    sw a1, 0(a0)
6024f4f4913SPhilip Reames; RV32-BOTH-NEXT:    ret
6034f4f4913SPhilip Reames;
604*681c4a20SPengcheng Wang; RV64-BOTH-LABEL: aligned_memcpy16:
605*681c4a20SPengcheng Wang; RV64-BOTH:       # %bb.0: # %entry
606*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ld a2, 8(a1)
607*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sd a2, 8(a0)
608*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ld a1, 0(a1)
609*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    sd a1, 0(a0)
610*681c4a20SPengcheng Wang; RV64-BOTH-NEXT:    ret
611*681c4a20SPengcheng Wangentry:
612*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false)
613*681c4a20SPengcheng Wang  ret void
614*681c4a20SPengcheng Wang}
615*681c4a20SPengcheng Wang
616*681c4a20SPengcheng Wangdefine void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
617*681c4a20SPengcheng Wang; RV32-LABEL: aligned_memcpy31:
618*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
619*681c4a20SPengcheng Wang; RV32-NEXT:    li a2, 31
620*681c4a20SPengcheng Wang; RV32-NEXT:    tail memcpy
621*681c4a20SPengcheng Wang;
622*681c4a20SPengcheng Wang; RV64-LABEL: aligned_memcpy31:
623*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
624*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 30(a1)
625*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 30(a0)
626*681c4a20SPengcheng Wang; RV64-NEXT:    lh a2, 28(a1)
627*681c4a20SPengcheng Wang; RV64-NEXT:    sh a2, 28(a0)
628*681c4a20SPengcheng Wang; RV64-NEXT:    lw a2, 24(a1)
629*681c4a20SPengcheng Wang; RV64-NEXT:    sw a2, 24(a0)
630*681c4a20SPengcheng Wang; RV64-NEXT:    ld a2, 16(a1)
631*681c4a20SPengcheng Wang; RV64-NEXT:    sd a2, 16(a0)
632*681c4a20SPengcheng Wang; RV64-NEXT:    ld a2, 8(a1)
633*681c4a20SPengcheng Wang; RV64-NEXT:    sd a2, 8(a0)
634*681c4a20SPengcheng Wang; RV64-NEXT:    ld a1, 0(a1)
635*681c4a20SPengcheng Wang; RV64-NEXT:    sd a1, 0(a0)
636*681c4a20SPengcheng Wang; RV64-NEXT:    ret
637*681c4a20SPengcheng Wang;
638*681c4a20SPengcheng Wang; RV32-FAST-LABEL: aligned_memcpy31:
639*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
640*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 27(a1)
641*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 27(a0)
642*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 24(a1)
643*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 24(a0)
644*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 20(a1)
645*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 20(a0)
646*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 16(a1)
647*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 16(a0)
648*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 12(a1)
649*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 12(a0)
650*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 8(a1)
651*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 8(a0)
652*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
653*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
654*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
655*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
656*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
657*681c4a20SPengcheng Wang;
658*681c4a20SPengcheng Wang; RV64-FAST-LABEL: aligned_memcpy31:
659*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
660*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 23(a1)
661*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 23(a0)
662*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 16(a1)
663*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 16(a0)
664*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a2, 8(a1)
665*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a2, 8(a0)
666*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
667*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
668*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
669*681c4a20SPengcheng Wangentry:
670*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false)
671*681c4a20SPengcheng Wang  ret void
672*681c4a20SPengcheng Wang}
673*681c4a20SPengcheng Wang
674*681c4a20SPengcheng Wang; ------------------------------------------------------------------------
675*681c4a20SPengcheng Wang; A few partially aligned cases
676*681c4a20SPengcheng Wang
677*681c4a20SPengcheng Wang
678*681c4a20SPengcheng Wangdefine void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
679*681c4a20SPengcheng Wang; RV32-BOTH-LABEL: memcpy16_align4:
680*681c4a20SPengcheng Wang; RV32-BOTH:       # %bb.0: # %entry
681*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a2, 12(a1)
682*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a2, 12(a0)
683*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a2, 8(a1)
684*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a2, 8(a0)
685*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a2, 4(a1)
686*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a2, 4(a0)
687*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    lw a1, 0(a1)
688*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    sw a1, 0(a0)
689*681c4a20SPengcheng Wang; RV32-BOTH-NEXT:    ret
690*681c4a20SPengcheng Wang;
691*681c4a20SPengcheng Wang; RV64-LABEL: memcpy16_align4:
6924f4f4913SPhilip Reames; RV64:       # %bb.0: # %entry
6934f4f4913SPhilip Reames; RV64-NEXT:    lw a2, 12(a1)
6944f4f4913SPhilip Reames; RV64-NEXT:    sw a2, 12(a0)
6954f4f4913SPhilip Reames; RV64-NEXT:    lw a2, 8(a1)
6964f4f4913SPhilip Reames; RV64-NEXT:    sw a2, 8(a0)
6974f4f4913SPhilip Reames; RV64-NEXT:    lw a2, 4(a1)
6984f4f4913SPhilip Reames; RV64-NEXT:    sw a2, 4(a0)
6994f4f4913SPhilip Reames; RV64-NEXT:    lw a1, 0(a1)
7004f4f4913SPhilip Reames; RV64-NEXT:    sw a1, 0(a0)
7014f4f4913SPhilip Reames; RV64-NEXT:    ret
7024f4f4913SPhilip Reames;
703*681c4a20SPengcheng Wang; RV64-FAST-LABEL: memcpy16_align4:
7044f4f4913SPhilip Reames; RV64-FAST:       # %bb.0: # %entry
7054f4f4913SPhilip Reames; RV64-FAST-NEXT:    ld a2, 8(a1)
7064f4f4913SPhilip Reames; RV64-FAST-NEXT:    sd a2, 8(a0)
7074f4f4913SPhilip Reames; RV64-FAST-NEXT:    ld a1, 0(a1)
7084f4f4913SPhilip Reames; RV64-FAST-NEXT:    sd a1, 0(a0)
7094f4f4913SPhilip Reames; RV64-FAST-NEXT:    ret
7104f4f4913SPhilip Reamesentry:
711*681c4a20SPengcheng Wang  tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false)
7124f4f4913SPhilip Reames  ret void
7134f4f4913SPhilip Reames}
7144f4f4913SPhilip Reames
715*681c4a20SPengcheng Wangdefine i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) {
716*681c4a20SPengcheng Wang; RV32-LABEL: memcpy11_align8:
717*681c4a20SPengcheng Wang; RV32:       # %bb.0: # %entry
718*681c4a20SPengcheng Wang; RV32-NEXT:    lbu a2, 10(a1)
719*681c4a20SPengcheng Wang; RV32-NEXT:    sb a2, 10(a0)
720*681c4a20SPengcheng Wang; RV32-NEXT:    lh a2, 8(a1)
721*681c4a20SPengcheng Wang; RV32-NEXT:    sh a2, 8(a0)
722*681c4a20SPengcheng Wang; RV32-NEXT:    lw a2, 4(a1)
723*681c4a20SPengcheng Wang; RV32-NEXT:    sw a2, 4(a0)
724*681c4a20SPengcheng Wang; RV32-NEXT:    lw a1, 0(a1)
725*681c4a20SPengcheng Wang; RV32-NEXT:    sw a1, 0(a0)
726*681c4a20SPengcheng Wang; RV32-NEXT:    li a0, 0
727*681c4a20SPengcheng Wang; RV32-NEXT:    ret
728*681c4a20SPengcheng Wang;
729*681c4a20SPengcheng Wang; RV64-LABEL: memcpy11_align8:
730*681c4a20SPengcheng Wang; RV64:       # %bb.0: # %entry
731*681c4a20SPengcheng Wang; RV64-NEXT:    lbu a2, 10(a1)
732*681c4a20SPengcheng Wang; RV64-NEXT:    sb a2, 10(a0)
733*681c4a20SPengcheng Wang; RV64-NEXT:    lh a2, 8(a1)
734*681c4a20SPengcheng Wang; RV64-NEXT:    sh a2, 8(a0)
735*681c4a20SPengcheng Wang; RV64-NEXT:    ld a1, 0(a1)
736*681c4a20SPengcheng Wang; RV64-NEXT:    sd a1, 0(a0)
737*681c4a20SPengcheng Wang; RV64-NEXT:    li a0, 0
738*681c4a20SPengcheng Wang; RV64-NEXT:    ret
739*681c4a20SPengcheng Wang;
740*681c4a20SPengcheng Wang; RV32-FAST-LABEL: memcpy11_align8:
741*681c4a20SPengcheng Wang; RV32-FAST:       # %bb.0: # %entry
742*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 7(a1)
743*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 7(a0)
744*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a2, 4(a1)
745*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a2, 4(a0)
746*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    lw a1, 0(a1)
747*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    sw a1, 0(a0)
748*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    li a0, 0
749*681c4a20SPengcheng Wang; RV32-FAST-NEXT:    ret
750*681c4a20SPengcheng Wang;
751*681c4a20SPengcheng Wang; RV64-FAST-LABEL: memcpy11_align8:
752*681c4a20SPengcheng Wang; RV64-FAST:       # %bb.0: # %entry
753*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    lw a2, 7(a1)
754*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sw a2, 7(a0)
755*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ld a1, 0(a1)
756*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    sd a1, 0(a0)
757*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    li a0, 0
758*681c4a20SPengcheng Wang; RV64-FAST-NEXT:    ret
759*681c4a20SPengcheng Wangentry:
760*681c4a20SPengcheng Wang  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false)
761*681c4a20SPengcheng Wang  ret i32 0
762*681c4a20SPengcheng Wang}
763*681c4a20SPengcheng Wang
7644f4f4913SPhilip Reamesdeclare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
7654f4f4913SPhilip Reamesdeclare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
766