xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v \
3; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4; RUN: llc < %s -mtriple=riscv64 -mattr=+v \
5; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \
7; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \
9; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10
11; ----------------------------------------------------------------------
12; Fully unaligned cases
13
14
15define void @unaligned_memcpy1(ptr nocapture %dest, ptr %src) nounwind {
16; RV32-BOTH-LABEL: unaligned_memcpy1:
17; RV32-BOTH:       # %bb.0: # %entry
18; RV32-BOTH-NEXT:    lbu a1, 0(a1)
19; RV32-BOTH-NEXT:    sb a1, 0(a0)
20; RV32-BOTH-NEXT:    ret
21;
22; RV64-BOTH-LABEL: unaligned_memcpy1:
23; RV64-BOTH:       # %bb.0: # %entry
24; RV64-BOTH-NEXT:    lbu a1, 0(a1)
25; RV64-BOTH-NEXT:    sb a1, 0(a0)
26; RV64-BOTH-NEXT:    ret
27entry:
28  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false)
29  ret void
30}
31
32define void @unaligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
33; RV32-LABEL: unaligned_memcpy2:
34; RV32:       # %bb.0: # %entry
35; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
36; RV32-NEXT:    vle8.v v8, (a1)
37; RV32-NEXT:    vse8.v v8, (a0)
38; RV32-NEXT:    ret
39;
40; RV64-LABEL: unaligned_memcpy2:
41; RV64:       # %bb.0: # %entry
42; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
43; RV64-NEXT:    vle8.v v8, (a1)
44; RV64-NEXT:    vse8.v v8, (a0)
45; RV64-NEXT:    ret
46;
47; RV32-FAST-LABEL: unaligned_memcpy2:
48; RV32-FAST:       # %bb.0: # %entry
49; RV32-FAST-NEXT:    lh a1, 0(a1)
50; RV32-FAST-NEXT:    sh a1, 0(a0)
51; RV32-FAST-NEXT:    ret
52;
53; RV64-FAST-LABEL: unaligned_memcpy2:
54; RV64-FAST:       # %bb.0: # %entry
55; RV64-FAST-NEXT:    lh a1, 0(a1)
56; RV64-FAST-NEXT:    sh a1, 0(a0)
57; RV64-FAST-NEXT:    ret
58entry:
59  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 2, i1 false)
60  ret void
61}
62
63define void @unaligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
64; RV32-LABEL: unaligned_memcpy3:
65; RV32:       # %bb.0: # %entry
66; RV32-NEXT:    lbu a2, 2(a1)
67; RV32-NEXT:    sb a2, 2(a0)
68; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
69; RV32-NEXT:    vle8.v v8, (a1)
70; RV32-NEXT:    vse8.v v8, (a0)
71; RV32-NEXT:    ret
72;
73; RV64-LABEL: unaligned_memcpy3:
74; RV64:       # %bb.0: # %entry
75; RV64-NEXT:    lbu a2, 2(a1)
76; RV64-NEXT:    sb a2, 2(a0)
77; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
78; RV64-NEXT:    vle8.v v8, (a1)
79; RV64-NEXT:    vse8.v v8, (a0)
80; RV64-NEXT:    ret
81;
82; RV32-FAST-LABEL: unaligned_memcpy3:
83; RV32-FAST:       # %bb.0: # %entry
84; RV32-FAST-NEXT:    lbu a2, 2(a1)
85; RV32-FAST-NEXT:    sb a2, 2(a0)
86; RV32-FAST-NEXT:    lh a1, 0(a1)
87; RV32-FAST-NEXT:    sh a1, 0(a0)
88; RV32-FAST-NEXT:    ret
89;
90; RV64-FAST-LABEL: unaligned_memcpy3:
91; RV64-FAST:       # %bb.0: # %entry
92; RV64-FAST-NEXT:    lbu a2, 2(a1)
93; RV64-FAST-NEXT:    sb a2, 2(a0)
94; RV64-FAST-NEXT:    lh a1, 0(a1)
95; RV64-FAST-NEXT:    sh a1, 0(a0)
96; RV64-FAST-NEXT:    ret
97entry:
98  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 3, i1 false)
99  ret void
100}
101
102define void @unaligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
103; RV32-LABEL: unaligned_memcpy4:
104; RV32:       # %bb.0: # %entry
105; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
106; RV32-NEXT:    vle8.v v8, (a1)
107; RV32-NEXT:    vse8.v v8, (a0)
108; RV32-NEXT:    ret
109;
110; RV64-LABEL: unaligned_memcpy4:
111; RV64:       # %bb.0: # %entry
112; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
113; RV64-NEXT:    vle8.v v8, (a1)
114; RV64-NEXT:    vse8.v v8, (a0)
115; RV64-NEXT:    ret
116;
117; RV32-FAST-LABEL: unaligned_memcpy4:
118; RV32-FAST:       # %bb.0: # %entry
119; RV32-FAST-NEXT:    lw a1, 0(a1)
120; RV32-FAST-NEXT:    sw a1, 0(a0)
121; RV32-FAST-NEXT:    ret
122;
123; RV64-FAST-LABEL: unaligned_memcpy4:
124; RV64-FAST:       # %bb.0: # %entry
125; RV64-FAST-NEXT:    lw a1, 0(a1)
126; RV64-FAST-NEXT:    sw a1, 0(a0)
127; RV64-FAST-NEXT:    ret
128entry:
129  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 4, i1 false)
130  ret void
131}
132
133define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
134; RV32-LABEL: unaligned_memcpy7:
135; RV32:       # %bb.0: # %entry
136; RV32-NEXT:    lbu a2, 6(a1)
137; RV32-NEXT:    sb a2, 6(a0)
138; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
139; RV32-NEXT:    vle8.v v8, (a1)
140; RV32-NEXT:    vse8.v v8, (a0)
141; RV32-NEXT:    addi a1, a1, 4
142; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
143; RV32-NEXT:    vle8.v v8, (a1)
144; RV32-NEXT:    addi a0, a0, 4
145; RV32-NEXT:    vse8.v v8, (a0)
146; RV32-NEXT:    ret
147;
148; RV64-LABEL: unaligned_memcpy7:
149; RV64:       # %bb.0: # %entry
150; RV64-NEXT:    lbu a2, 6(a1)
151; RV64-NEXT:    sb a2, 6(a0)
152; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
153; RV64-NEXT:    vle8.v v8, (a1)
154; RV64-NEXT:    vse8.v v8, (a0)
155; RV64-NEXT:    addi a1, a1, 4
156; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
157; RV64-NEXT:    vle8.v v8, (a1)
158; RV64-NEXT:    addi a0, a0, 4
159; RV64-NEXT:    vse8.v v8, (a0)
160; RV64-NEXT:    ret
161;
162; RV32-FAST-LABEL: unaligned_memcpy7:
163; RV32-FAST:       # %bb.0: # %entry
164; RV32-FAST-NEXT:    lw a2, 3(a1)
165; RV32-FAST-NEXT:    sw a2, 3(a0)
166; RV32-FAST-NEXT:    lw a1, 0(a1)
167; RV32-FAST-NEXT:    sw a1, 0(a0)
168; RV32-FAST-NEXT:    ret
169;
170; RV64-FAST-LABEL: unaligned_memcpy7:
171; RV64-FAST:       # %bb.0: # %entry
172; RV64-FAST-NEXT:    lw a2, 3(a1)
173; RV64-FAST-NEXT:    sw a2, 3(a0)
174; RV64-FAST-NEXT:    lw a1, 0(a1)
175; RV64-FAST-NEXT:    sw a1, 0(a0)
176; RV64-FAST-NEXT:    ret
177entry:
178  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 7, i1 false)
179  ret void
180}
181
182define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
183; RV32-LABEL: unaligned_memcpy8:
184; RV32:       # %bb.0: # %entry
185; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
186; RV32-NEXT:    vle8.v v8, (a1)
187; RV32-NEXT:    vse8.v v8, (a0)
188; RV32-NEXT:    ret
189;
190; RV64-LABEL: unaligned_memcpy8:
191; RV64:       # %bb.0: # %entry
192; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
193; RV64-NEXT:    vle8.v v8, (a1)
194; RV64-NEXT:    vse8.v v8, (a0)
195; RV64-NEXT:    ret
196;
197; RV32-FAST-LABEL: unaligned_memcpy8:
198; RV32-FAST:       # %bb.0: # %entry
199; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
200; RV32-FAST-NEXT:    vle32.v v8, (a1)
201; RV32-FAST-NEXT:    vse32.v v8, (a0)
202; RV32-FAST-NEXT:    ret
203;
204; RV64-FAST-LABEL: unaligned_memcpy8:
205; RV64-FAST:       # %bb.0: # %entry
206; RV64-FAST-NEXT:    ld a1, 0(a1)
207; RV64-FAST-NEXT:    sd a1, 0(a0)
208; RV64-FAST-NEXT:    ret
209entry:
210  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 8, i1 false)
211  ret void
212}
213
214define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
215; RV32-LABEL: unaligned_memcpy15:
216; RV32:       # %bb.0: # %entry
217; RV32-NEXT:    lbu a2, 14(a1)
218; RV32-NEXT:    sb a2, 14(a0)
219; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
220; RV32-NEXT:    vle8.v v8, (a1)
221; RV32-NEXT:    addi a2, a1, 12
222; RV32-NEXT:    vse8.v v8, (a0)
223; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
224; RV32-NEXT:    vle8.v v8, (a2)
225; RV32-NEXT:    addi a2, a0, 12
226; RV32-NEXT:    vse8.v v8, (a2)
227; RV32-NEXT:    addi a1, a1, 8
228; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
229; RV32-NEXT:    vle8.v v8, (a1)
230; RV32-NEXT:    addi a0, a0, 8
231; RV32-NEXT:    vse8.v v8, (a0)
232; RV32-NEXT:    ret
233;
234; RV64-LABEL: unaligned_memcpy15:
235; RV64:       # %bb.0: # %entry
236; RV64-NEXT:    lbu a2, 14(a1)
237; RV64-NEXT:    sb a2, 14(a0)
238; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
239; RV64-NEXT:    vle8.v v8, (a1)
240; RV64-NEXT:    addi a2, a1, 12
241; RV64-NEXT:    vse8.v v8, (a0)
242; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
243; RV64-NEXT:    vle8.v v8, (a2)
244; RV64-NEXT:    addi a2, a0, 12
245; RV64-NEXT:    vse8.v v8, (a2)
246; RV64-NEXT:    addi a1, a1, 8
247; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
248; RV64-NEXT:    vle8.v v8, (a1)
249; RV64-NEXT:    addi a0, a0, 8
250; RV64-NEXT:    vse8.v v8, (a0)
251; RV64-NEXT:    ret
252;
253; RV32-FAST-LABEL: unaligned_memcpy15:
254; RV32-FAST:       # %bb.0: # %entry
255; RV32-FAST-NEXT:    lw a2, 11(a1)
256; RV32-FAST-NEXT:    sw a2, 11(a0)
257; RV32-FAST-NEXT:    lw a2, 8(a1)
258; RV32-FAST-NEXT:    sw a2, 8(a0)
259; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
260; RV32-FAST-NEXT:    vle32.v v8, (a1)
261; RV32-FAST-NEXT:    vse32.v v8, (a0)
262; RV32-FAST-NEXT:    ret
263;
264; RV64-FAST-LABEL: unaligned_memcpy15:
265; RV64-FAST:       # %bb.0: # %entry
266; RV64-FAST-NEXT:    ld a2, 7(a1)
267; RV64-FAST-NEXT:    sd a2, 7(a0)
268; RV64-FAST-NEXT:    ld a1, 0(a1)
269; RV64-FAST-NEXT:    sd a1, 0(a0)
270; RV64-FAST-NEXT:    ret
271entry:
272  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 15, i1 false)
273  ret void
274}
275
276define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
277; RV32-LABEL: unaligned_memcpy16:
278; RV32:       # %bb.0: # %entry
279; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
280; RV32-NEXT:    vle8.v v8, (a1)
281; RV32-NEXT:    vse8.v v8, (a0)
282; RV32-NEXT:    ret
283;
284; RV64-LABEL: unaligned_memcpy16:
285; RV64:       # %bb.0: # %entry
286; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
287; RV64-NEXT:    vle8.v v8, (a1)
288; RV64-NEXT:    vse8.v v8, (a0)
289; RV64-NEXT:    ret
290;
291; RV32-FAST-LABEL: unaligned_memcpy16:
292; RV32-FAST:       # %bb.0: # %entry
293; RV32-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
294; RV32-FAST-NEXT:    vle64.v v8, (a1)
295; RV32-FAST-NEXT:    vse64.v v8, (a0)
296; RV32-FAST-NEXT:    ret
297;
298; RV64-FAST-LABEL: unaligned_memcpy16:
299; RV64-FAST:       # %bb.0: # %entry
300; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
301; RV64-FAST-NEXT:    vle64.v v8, (a1)
302; RV64-FAST-NEXT:    vse64.v v8, (a0)
303; RV64-FAST-NEXT:    ret
304entry:
305  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
306  ret void
307}
308
309define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
310; RV32-LABEL: unaligned_memcpy31:
311; RV32:       # %bb.0: # %entry
312; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
313; RV32-NEXT:    vle8.v v8, (a1)
314; RV32-NEXT:    vse8.v v8, (a0)
315; RV32-NEXT:    addi a1, a1, 15
316; RV32-NEXT:    vle8.v v8, (a1)
317; RV32-NEXT:    addi a0, a0, 15
318; RV32-NEXT:    vse8.v v8, (a0)
319; RV32-NEXT:    ret
320;
321; RV64-LABEL: unaligned_memcpy31:
322; RV64:       # %bb.0: # %entry
323; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
324; RV64-NEXT:    vle8.v v8, (a1)
325; RV64-NEXT:    vse8.v v8, (a0)
326; RV64-NEXT:    addi a1, a1, 15
327; RV64-NEXT:    vle8.v v8, (a1)
328; RV64-NEXT:    addi a0, a0, 15
329; RV64-NEXT:    vse8.v v8, (a0)
330; RV64-NEXT:    ret
331;
332; RV32-FAST-LABEL: unaligned_memcpy31:
333; RV32-FAST:       # %bb.0: # %entry
334; RV32-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
335; RV32-FAST-NEXT:    vle64.v v8, (a1)
336; RV32-FAST-NEXT:    vse64.v v8, (a0)
337; RV32-FAST-NEXT:    addi a1, a1, 15
338; RV32-FAST-NEXT:    vle64.v v8, (a1)
339; RV32-FAST-NEXT:    addi a0, a0, 15
340; RV32-FAST-NEXT:    vse64.v v8, (a0)
341; RV32-FAST-NEXT:    ret
342;
343; RV64-FAST-LABEL: unaligned_memcpy31:
344; RV64-FAST:       # %bb.0: # %entry
345; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
346; RV64-FAST-NEXT:    vle64.v v8, (a1)
347; RV64-FAST-NEXT:    vse64.v v8, (a0)
348; RV64-FAST-NEXT:    addi a1, a1, 15
349; RV64-FAST-NEXT:    vle64.v v8, (a1)
350; RV64-FAST-NEXT:    addi a0, a0, 15
351; RV64-FAST-NEXT:    vse64.v v8, (a0)
352; RV64-FAST-NEXT:    ret
353entry:
354  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
355  ret void
356}
357
358define void @unaligned_memcpy32(ptr nocapture %dest, ptr %src) nounwind {
359; RV32-LABEL: unaligned_memcpy32:
360; RV32:       # %bb.0: # %entry
361; RV32-NEXT:    li a2, 32
362; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
363; RV32-NEXT:    vle8.v v8, (a1)
364; RV32-NEXT:    vse8.v v8, (a0)
365; RV32-NEXT:    ret
366;
367; RV64-LABEL: unaligned_memcpy32:
368; RV64:       # %bb.0: # %entry
369; RV64-NEXT:    li a2, 32
370; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
371; RV64-NEXT:    vle8.v v8, (a1)
372; RV64-NEXT:    vse8.v v8, (a0)
373; RV64-NEXT:    ret
374;
375; RV32-FAST-LABEL: unaligned_memcpy32:
376; RV32-FAST:       # %bb.0: # %entry
377; RV32-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
378; RV32-FAST-NEXT:    vle64.v v8, (a1)
379; RV32-FAST-NEXT:    vse64.v v8, (a0)
380; RV32-FAST-NEXT:    ret
381;
382; RV64-FAST-LABEL: unaligned_memcpy32:
383; RV64-FAST:       # %bb.0: # %entry
384; RV64-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
385; RV64-FAST-NEXT:    vle64.v v8, (a1)
386; RV64-FAST-NEXT:    vse64.v v8, (a0)
387; RV64-FAST-NEXT:    ret
388entry:
389  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 32, i1 false)
390  ret void
391}
392
393define void @unaligned_memcpy64(ptr nocapture %dest, ptr %src) nounwind {
394; RV32-LABEL: unaligned_memcpy64:
395; RV32:       # %bb.0: # %entry
396; RV32-NEXT:    li a2, 64
397; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
398; RV32-NEXT:    vle8.v v8, (a1)
399; RV32-NEXT:    vse8.v v8, (a0)
400; RV32-NEXT:    ret
401;
402; RV64-LABEL: unaligned_memcpy64:
403; RV64:       # %bb.0: # %entry
404; RV64-NEXT:    li a2, 64
405; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
406; RV64-NEXT:    vle8.v v8, (a1)
407; RV64-NEXT:    vse8.v v8, (a0)
408; RV64-NEXT:    ret
409;
410; RV32-FAST-LABEL: unaligned_memcpy64:
411; RV32-FAST:       # %bb.0: # %entry
412; RV32-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
413; RV32-FAST-NEXT:    vle64.v v8, (a1)
414; RV32-FAST-NEXT:    vse64.v v8, (a0)
415; RV32-FAST-NEXT:    ret
416;
417; RV64-FAST-LABEL: unaligned_memcpy64:
418; RV64-FAST:       # %bb.0: # %entry
419; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
420; RV64-FAST-NEXT:    vle64.v v8, (a1)
421; RV64-FAST-NEXT:    vse64.v v8, (a0)
422; RV64-FAST-NEXT:    ret
423entry:
424  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 64, i1 false)
425  ret void
426}
427
428define void @unaligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind {
429; RV32-LABEL: unaligned_memcpy96:
430; RV32:       # %bb.0: # %entry
431; RV32-NEXT:    li a2, 64
432; RV32-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
433; RV32-NEXT:    vle8.v v8, (a1)
434; RV32-NEXT:    addi a1, a1, 64
435; RV32-NEXT:    vse8.v v8, (a0)
436; RV32-NEXT:    li a2, 32
437; RV32-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
438; RV32-NEXT:    vle8.v v8, (a1)
439; RV32-NEXT:    addi a0, a0, 64
440; RV32-NEXT:    vse8.v v8, (a0)
441; RV32-NEXT:    ret
442;
443; RV64-LABEL: unaligned_memcpy96:
444; RV64:       # %bb.0: # %entry
445; RV64-NEXT:    li a2, 64
446; RV64-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
447; RV64-NEXT:    vle8.v v8, (a1)
448; RV64-NEXT:    addi a1, a1, 64
449; RV64-NEXT:    vse8.v v8, (a0)
450; RV64-NEXT:    li a2, 32
451; RV64-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
452; RV64-NEXT:    vle8.v v8, (a1)
453; RV64-NEXT:    addi a0, a0, 64
454; RV64-NEXT:    vse8.v v8, (a0)
455; RV64-NEXT:    ret
456;
457; RV32-FAST-LABEL: unaligned_memcpy96:
458; RV32-FAST:       # %bb.0: # %entry
459; RV32-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
460; RV32-FAST-NEXT:    vle64.v v8, (a1)
461; RV32-FAST-NEXT:    vse64.v v8, (a0)
462; RV32-FAST-NEXT:    addi a1, a1, 64
463; RV32-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
464; RV32-FAST-NEXT:    vle64.v v8, (a1)
465; RV32-FAST-NEXT:    addi a0, a0, 64
466; RV32-FAST-NEXT:    vse64.v v8, (a0)
467; RV32-FAST-NEXT:    ret
468;
469; RV64-FAST-LABEL: unaligned_memcpy96:
470; RV64-FAST:       # %bb.0: # %entry
471; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
472; RV64-FAST-NEXT:    vle64.v v8, (a1)
473; RV64-FAST-NEXT:    vse64.v v8, (a0)
474; RV64-FAST-NEXT:    addi a1, a1, 64
475; RV64-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
476; RV64-FAST-NEXT:    vle64.v v8, (a1)
477; RV64-FAST-NEXT:    addi a0, a0, 64
478; RV64-FAST-NEXT:    vse64.v v8, (a0)
479; RV64-FAST-NEXT:    ret
480entry:
481  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 96, i1 false)
482  ret void
483}
484
485define void @unaligned_memcpy128(ptr nocapture %dest, ptr %src) nounwind {
486; RV32-LABEL: unaligned_memcpy128:
487; RV32:       # %bb.0: # %entry
488; RV32-NEXT:    li a2, 128
489; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
490; RV32-NEXT:    vle8.v v8, (a1)
491; RV32-NEXT:    vse8.v v8, (a0)
492; RV32-NEXT:    ret
493;
494; RV64-LABEL: unaligned_memcpy128:
495; RV64:       # %bb.0: # %entry
496; RV64-NEXT:    li a2, 128
497; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
498; RV64-NEXT:    vle8.v v8, (a1)
499; RV64-NEXT:    vse8.v v8, (a0)
500; RV64-NEXT:    ret
501;
502; RV32-FAST-LABEL: unaligned_memcpy128:
503; RV32-FAST:       # %bb.0: # %entry
504; RV32-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
505; RV32-FAST-NEXT:    vle64.v v8, (a1)
506; RV32-FAST-NEXT:    vse64.v v8, (a0)
507; RV32-FAST-NEXT:    ret
508;
509; RV64-FAST-LABEL: unaligned_memcpy128:
510; RV64-FAST:       # %bb.0: # %entry
511; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
512; RV64-FAST-NEXT:    vle64.v v8, (a1)
513; RV64-FAST-NEXT:    vse64.v v8, (a0)
514; RV64-FAST-NEXT:    ret
515entry:
516  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 128, i1 false)
517  ret void
518}
519
520define void @unaligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind {
521; RV32-LABEL: unaligned_memcpy196:
522; RV32:       # %bb.0: # %entry
523; RV32-NEXT:    li a2, 128
524; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
525; RV32-NEXT:    vle8.v v8, (a1)
526; RV32-NEXT:    addi a2, a1, 128
527; RV32-NEXT:    vse8.v v8, (a0)
528; RV32-NEXT:    li a3, 64
529; RV32-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
530; RV32-NEXT:    vle8.v v8, (a2)
531; RV32-NEXT:    addi a2, a0, 128
532; RV32-NEXT:    vse8.v v8, (a2)
533; RV32-NEXT:    lbu a2, 195(a1)
534; RV32-NEXT:    sb a2, 195(a0)
535; RV32-NEXT:    lbu a2, 194(a1)
536; RV32-NEXT:    sb a2, 194(a0)
537; RV32-NEXT:    lbu a2, 193(a1)
538; RV32-NEXT:    sb a2, 193(a0)
539; RV32-NEXT:    lbu a1, 192(a1)
540; RV32-NEXT:    sb a1, 192(a0)
541; RV32-NEXT:    ret
542;
543; RV64-LABEL: unaligned_memcpy196:
544; RV64:       # %bb.0: # %entry
545; RV64-NEXT:    li a2, 128
546; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
547; RV64-NEXT:    vle8.v v8, (a1)
548; RV64-NEXT:    addi a2, a1, 128
549; RV64-NEXT:    vse8.v v8, (a0)
550; RV64-NEXT:    li a3, 64
551; RV64-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
552; RV64-NEXT:    vle8.v v8, (a2)
553; RV64-NEXT:    addi a2, a0, 128
554; RV64-NEXT:    vse8.v v8, (a2)
555; RV64-NEXT:    lbu a2, 195(a1)
556; RV64-NEXT:    sb a2, 195(a0)
557; RV64-NEXT:    lbu a2, 194(a1)
558; RV64-NEXT:    sb a2, 194(a0)
559; RV64-NEXT:    lbu a2, 193(a1)
560; RV64-NEXT:    sb a2, 193(a0)
561; RV64-NEXT:    lbu a1, 192(a1)
562; RV64-NEXT:    sb a1, 192(a0)
563; RV64-NEXT:    ret
564;
565; RV32-FAST-LABEL: unaligned_memcpy196:
566; RV32-FAST:       # %bb.0: # %entry
567; RV32-FAST-NEXT:    lw a2, 192(a1)
568; RV32-FAST-NEXT:    sw a2, 192(a0)
569; RV32-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
570; RV32-FAST-NEXT:    vle64.v v8, (a1)
571; RV32-FAST-NEXT:    vse64.v v8, (a0)
572; RV32-FAST-NEXT:    addi a1, a1, 128
573; RV32-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
574; RV32-FAST-NEXT:    vle64.v v8, (a1)
575; RV32-FAST-NEXT:    addi a0, a0, 128
576; RV32-FAST-NEXT:    vse64.v v8, (a0)
577; RV32-FAST-NEXT:    ret
578;
579; RV64-FAST-LABEL: unaligned_memcpy196:
580; RV64-FAST:       # %bb.0: # %entry
581; RV64-FAST-NEXT:    lw a2, 192(a1)
582; RV64-FAST-NEXT:    sw a2, 192(a0)
583; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
584; RV64-FAST-NEXT:    vle64.v v8, (a1)
585; RV64-FAST-NEXT:    vse64.v v8, (a0)
586; RV64-FAST-NEXT:    addi a1, a1, 128
587; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
588; RV64-FAST-NEXT:    vle64.v v8, (a1)
589; RV64-FAST-NEXT:    addi a0, a0, 128
590; RV64-FAST-NEXT:    vse64.v v8, (a0)
591; RV64-FAST-NEXT:    ret
592entry:
593  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 196, i1 false)
594  ret void
595}
596
597define void @unaligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind {
598; RV32-LABEL: unaligned_memcpy256:
599; RV32:       # %bb.0: # %entry
600; RV32-NEXT:    li a2, 128
601; RV32-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
602; RV32-NEXT:    vle8.v v8, (a1)
603; RV32-NEXT:    vse8.v v8, (a0)
604; RV32-NEXT:    addi a1, a1, 128
605; RV32-NEXT:    vle8.v v8, (a1)
606; RV32-NEXT:    addi a0, a0, 128
607; RV32-NEXT:    vse8.v v8, (a0)
608; RV32-NEXT:    ret
609;
610; RV64-LABEL: unaligned_memcpy256:
611; RV64:       # %bb.0: # %entry
612; RV64-NEXT:    li a2, 128
613; RV64-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
614; RV64-NEXT:    vle8.v v8, (a1)
615; RV64-NEXT:    vse8.v v8, (a0)
616; RV64-NEXT:    addi a1, a1, 128
617; RV64-NEXT:    vle8.v v8, (a1)
618; RV64-NEXT:    addi a0, a0, 128
619; RV64-NEXT:    vse8.v v8, (a0)
620; RV64-NEXT:    ret
621;
622; RV32-FAST-LABEL: unaligned_memcpy256:
623; RV32-FAST:       # %bb.0: # %entry
624; RV32-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
625; RV32-FAST-NEXT:    vle64.v v8, (a1)
626; RV32-FAST-NEXT:    vse64.v v8, (a0)
627; RV32-FAST-NEXT:    addi a1, a1, 128
628; RV32-FAST-NEXT:    vle64.v v8, (a1)
629; RV32-FAST-NEXT:    addi a0, a0, 128
630; RV32-FAST-NEXT:    vse64.v v8, (a0)
631; RV32-FAST-NEXT:    ret
632;
633; RV64-FAST-LABEL: unaligned_memcpy256:
634; RV64-FAST:       # %bb.0: # %entry
635; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
636; RV64-FAST-NEXT:    vle64.v v8, (a1)
637; RV64-FAST-NEXT:    vse64.v v8, (a0)
638; RV64-FAST-NEXT:    addi a1, a1, 128
639; RV64-FAST-NEXT:    vle64.v v8, (a1)
640; RV64-FAST-NEXT:    addi a0, a0, 128
641; RV64-FAST-NEXT:    vse64.v v8, (a0)
642; RV64-FAST-NEXT:    ret
643entry:
644  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 256, i1 false)
645  ret void
646}
647
648
649; ----------------------------------------------------------------------
650; Fully aligned cases
651
652define void @aligned_memcpy2(ptr nocapture %dest, ptr %src) nounwind {
653; RV32-BOTH-LABEL: aligned_memcpy2:
654; RV32-BOTH:       # %bb.0: # %entry
655; RV32-BOTH-NEXT:    lh a1, 0(a1)
656; RV32-BOTH-NEXT:    sh a1, 0(a0)
657; RV32-BOTH-NEXT:    ret
658;
659; RV64-BOTH-LABEL: aligned_memcpy2:
660; RV64-BOTH:       # %bb.0: # %entry
661; RV64-BOTH-NEXT:    lh a1, 0(a1)
662; RV64-BOTH-NEXT:    sh a1, 0(a0)
663; RV64-BOTH-NEXT:    ret
664entry:
665  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 2, i1 false)
666  ret void
667}
668
669define void @aligned_memcpy3(ptr nocapture %dest, ptr %src) nounwind {
670; RV32-BOTH-LABEL: aligned_memcpy3:
671; RV32-BOTH:       # %bb.0: # %entry
672; RV32-BOTH-NEXT:    lbu a2, 2(a1)
673; RV32-BOTH-NEXT:    sb a2, 2(a0)
674; RV32-BOTH-NEXT:    lh a1, 0(a1)
675; RV32-BOTH-NEXT:    sh a1, 0(a0)
676; RV32-BOTH-NEXT:    ret
677;
678; RV64-BOTH-LABEL: aligned_memcpy3:
679; RV64-BOTH:       # %bb.0: # %entry
680; RV64-BOTH-NEXT:    lbu a2, 2(a1)
681; RV64-BOTH-NEXT:    sb a2, 2(a0)
682; RV64-BOTH-NEXT:    lh a1, 0(a1)
683; RV64-BOTH-NEXT:    sh a1, 0(a0)
684; RV64-BOTH-NEXT:    ret
685entry:
686  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 3, i1 false)
687  ret void
688}
689
690define void @aligned_memcpy4(ptr nocapture %dest, ptr %src) nounwind {
691; RV32-BOTH-LABEL: aligned_memcpy4:
692; RV32-BOTH:       # %bb.0: # %entry
693; RV32-BOTH-NEXT:    lw a1, 0(a1)
694; RV32-BOTH-NEXT:    sw a1, 0(a0)
695; RV32-BOTH-NEXT:    ret
696;
697; RV64-BOTH-LABEL: aligned_memcpy4:
698; RV64-BOTH:       # %bb.0: # %entry
699; RV64-BOTH-NEXT:    lw a1, 0(a1)
700; RV64-BOTH-NEXT:    sw a1, 0(a0)
701; RV64-BOTH-NEXT:    ret
702entry:
703  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 4, i1 false)
704  ret void
705}
706
707define void @aligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind {
708; RV32-LABEL: aligned_memcpy7:
709; RV32:       # %bb.0: # %entry
710; RV32-NEXT:    lbu a2, 6(a1)
711; RV32-NEXT:    sb a2, 6(a0)
712; RV32-NEXT:    lh a2, 4(a1)
713; RV32-NEXT:    sh a2, 4(a0)
714; RV32-NEXT:    lw a1, 0(a1)
715; RV32-NEXT:    sw a1, 0(a0)
716; RV32-NEXT:    ret
717;
718; RV64-LABEL: aligned_memcpy7:
719; RV64:       # %bb.0: # %entry
720; RV64-NEXT:    lbu a2, 6(a1)
721; RV64-NEXT:    sb a2, 6(a0)
722; RV64-NEXT:    lh a2, 4(a1)
723; RV64-NEXT:    sh a2, 4(a0)
724; RV64-NEXT:    lw a1, 0(a1)
725; RV64-NEXT:    sw a1, 0(a0)
726; RV64-NEXT:    ret
727;
728; RV32-FAST-LABEL: aligned_memcpy7:
729; RV32-FAST:       # %bb.0: # %entry
730; RV32-FAST-NEXT:    lw a2, 3(a1)
731; RV32-FAST-NEXT:    sw a2, 3(a0)
732; RV32-FAST-NEXT:    lw a1, 0(a1)
733; RV32-FAST-NEXT:    sw a1, 0(a0)
734; RV32-FAST-NEXT:    ret
735;
736; RV64-FAST-LABEL: aligned_memcpy7:
737; RV64-FAST:       # %bb.0: # %entry
738; RV64-FAST-NEXT:    lw a2, 3(a1)
739; RV64-FAST-NEXT:    sw a2, 3(a0)
740; RV64-FAST-NEXT:    lw a1, 0(a1)
741; RV64-FAST-NEXT:    sw a1, 0(a0)
742; RV64-FAST-NEXT:    ret
743entry:
744  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 7, i1 false)
745  ret void
746}
747
748define void @aligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
749; RV32-BOTH-LABEL: aligned_memcpy8:
750; RV32-BOTH:       # %bb.0: # %entry
751; RV32-BOTH-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
752; RV32-BOTH-NEXT:    vle32.v v8, (a1)
753; RV32-BOTH-NEXT:    vse32.v v8, (a0)
754; RV32-BOTH-NEXT:    ret
755;
756; RV64-BOTH-LABEL: aligned_memcpy8:
757; RV64-BOTH:       # %bb.0: # %entry
758; RV64-BOTH-NEXT:    ld a1, 0(a1)
759; RV64-BOTH-NEXT:    sd a1, 0(a0)
760; RV64-BOTH-NEXT:    ret
761entry:
762  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 8, i1 false)
763  ret void
764}
765
766define void @aligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
767; RV32-LABEL: aligned_memcpy15:
768; RV32:       # %bb.0: # %entry
769; RV32-NEXT:    lbu a2, 14(a1)
770; RV32-NEXT:    sb a2, 14(a0)
771; RV32-NEXT:    lh a2, 12(a1)
772; RV32-NEXT:    sh a2, 12(a0)
773; RV32-NEXT:    lw a2, 8(a1)
774; RV32-NEXT:    sw a2, 8(a0)
775; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
776; RV32-NEXT:    vle32.v v8, (a1)
777; RV32-NEXT:    vse32.v v8, (a0)
778; RV32-NEXT:    ret
779;
780; RV64-LABEL: aligned_memcpy15:
781; RV64:       # %bb.0: # %entry
782; RV64-NEXT:    lbu a2, 14(a1)
783; RV64-NEXT:    sb a2, 14(a0)
784; RV64-NEXT:    lh a2, 12(a1)
785; RV64-NEXT:    sh a2, 12(a0)
786; RV64-NEXT:    lw a2, 8(a1)
787; RV64-NEXT:    sw a2, 8(a0)
788; RV64-NEXT:    ld a1, 0(a1)
789; RV64-NEXT:    sd a1, 0(a0)
790; RV64-NEXT:    ret
791;
792; RV32-FAST-LABEL: aligned_memcpy15:
793; RV32-FAST:       # %bb.0: # %entry
794; RV32-FAST-NEXT:    lw a2, 11(a1)
795; RV32-FAST-NEXT:    sw a2, 11(a0)
796; RV32-FAST-NEXT:    lw a2, 8(a1)
797; RV32-FAST-NEXT:    sw a2, 8(a0)
798; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
799; RV32-FAST-NEXT:    vle32.v v8, (a1)
800; RV32-FAST-NEXT:    vse32.v v8, (a0)
801; RV32-FAST-NEXT:    ret
802;
803; RV64-FAST-LABEL: aligned_memcpy15:
804; RV64-FAST:       # %bb.0: # %entry
805; RV64-FAST-NEXT:    ld a2, 7(a1)
806; RV64-FAST-NEXT:    sd a2, 7(a0)
807; RV64-FAST-NEXT:    ld a1, 0(a1)
808; RV64-FAST-NEXT:    sd a1, 0(a0)
809; RV64-FAST-NEXT:    ret
810entry:
811  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 15, i1 false)
812  ret void
813}
814
815define void @aligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
816; RV32-BOTH-LABEL: aligned_memcpy16:
817; RV32-BOTH:       # %bb.0: # %entry
818; RV32-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
819; RV32-BOTH-NEXT:    vle64.v v8, (a1)
820; RV32-BOTH-NEXT:    vse64.v v8, (a0)
821; RV32-BOTH-NEXT:    ret
822;
823; RV64-BOTH-LABEL: aligned_memcpy16:
824; RV64-BOTH:       # %bb.0: # %entry
825; RV64-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
826; RV64-BOTH-NEXT:    vle64.v v8, (a1)
827; RV64-BOTH-NEXT:    vse64.v v8, (a0)
828; RV64-BOTH-NEXT:    ret
829entry:
830  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 16, i1 false)
831  ret void
832}
833
834define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
835; RV32-LABEL: aligned_memcpy31:
836; RV32:       # %bb.0: # %entry
837; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
838; RV32-NEXT:    vle64.v v8, (a1)
839; RV32-NEXT:    vse64.v v8, (a0)
840; RV32-NEXT:    addi a1, a1, 15
841; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
842; RV32-NEXT:    vle8.v v8, (a1)
843; RV32-NEXT:    addi a0, a0, 15
844; RV32-NEXT:    vse8.v v8, (a0)
845; RV32-NEXT:    ret
846;
847; RV64-LABEL: aligned_memcpy31:
848; RV64:       # %bb.0: # %entry
849; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
850; RV64-NEXT:    vle64.v v8, (a1)
851; RV64-NEXT:    vse64.v v8, (a0)
852; RV64-NEXT:    addi a1, a1, 15
853; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
854; RV64-NEXT:    vle8.v v8, (a1)
855; RV64-NEXT:    addi a0, a0, 15
856; RV64-NEXT:    vse8.v v8, (a0)
857; RV64-NEXT:    ret
858;
859; RV32-FAST-LABEL: aligned_memcpy31:
860; RV32-FAST:       # %bb.0: # %entry
861; RV32-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
862; RV32-FAST-NEXT:    vle64.v v8, (a1)
863; RV32-FAST-NEXT:    vse64.v v8, (a0)
864; RV32-FAST-NEXT:    addi a1, a1, 15
865; RV32-FAST-NEXT:    vle64.v v8, (a1)
866; RV32-FAST-NEXT:    addi a0, a0, 15
867; RV32-FAST-NEXT:    vse64.v v8, (a0)
868; RV32-FAST-NEXT:    ret
869;
870; RV64-FAST-LABEL: aligned_memcpy31:
871; RV64-FAST:       # %bb.0: # %entry
872; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
873; RV64-FAST-NEXT:    vle64.v v8, (a1)
874; RV64-FAST-NEXT:    vse64.v v8, (a0)
875; RV64-FAST-NEXT:    addi a1, a1, 15
876; RV64-FAST-NEXT:    vle64.v v8, (a1)
877; RV64-FAST-NEXT:    addi a0, a0, 15
878; RV64-FAST-NEXT:    vse64.v v8, (a0)
879; RV64-FAST-NEXT:    ret
880entry:
881  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false)
882  ret void
883}
884
885define void @aligned_memcpy32(ptr nocapture %dest, ptr %src) nounwind {
886; RV32-BOTH-LABEL: aligned_memcpy32:
887; RV32-BOTH:       # %bb.0: # %entry
888; RV32-BOTH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
889; RV32-BOTH-NEXT:    vle64.v v8, (a1)
890; RV32-BOTH-NEXT:    vse64.v v8, (a0)
891; RV32-BOTH-NEXT:    ret
892;
893; RV64-BOTH-LABEL: aligned_memcpy32:
894; RV64-BOTH:       # %bb.0: # %entry
895; RV64-BOTH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
896; RV64-BOTH-NEXT:    vle64.v v8, (a1)
897; RV64-BOTH-NEXT:    vse64.v v8, (a0)
898; RV64-BOTH-NEXT:    ret
899entry:
900  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 32, i1 false)
901  ret void
902}
903
904define void @aligned_memcpy64(ptr nocapture %dest, ptr %src) nounwind {
905; RV32-BOTH-LABEL: aligned_memcpy64:
906; RV32-BOTH:       # %bb.0: # %entry
907; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
908; RV32-BOTH-NEXT:    vle64.v v8, (a1)
909; RV32-BOTH-NEXT:    vse64.v v8, (a0)
910; RV32-BOTH-NEXT:    ret
911;
912; RV64-BOTH-LABEL: aligned_memcpy64:
913; RV64-BOTH:       # %bb.0: # %entry
914; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
915; RV64-BOTH-NEXT:    vle64.v v8, (a1)
916; RV64-BOTH-NEXT:    vse64.v v8, (a0)
917; RV64-BOTH-NEXT:    ret
918entry:
919  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 64, i1 false)
920  ret void
921}
922
923define void @aligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind {
924; RV32-BOTH-LABEL: aligned_memcpy96:
925; RV32-BOTH:       # %bb.0: # %entry
926; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
927; RV32-BOTH-NEXT:    vle64.v v8, (a1)
928; RV32-BOTH-NEXT:    vse64.v v8, (a0)
929; RV32-BOTH-NEXT:    addi a1, a1, 64
930; RV32-BOTH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
931; RV32-BOTH-NEXT:    vle64.v v8, (a1)
932; RV32-BOTH-NEXT:    addi a0, a0, 64
933; RV32-BOTH-NEXT:    vse64.v v8, (a0)
934; RV32-BOTH-NEXT:    ret
935;
936; RV64-BOTH-LABEL: aligned_memcpy96:
937; RV64-BOTH:       # %bb.0: # %entry
938; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
939; RV64-BOTH-NEXT:    vle64.v v8, (a1)
940; RV64-BOTH-NEXT:    vse64.v v8, (a0)
941; RV64-BOTH-NEXT:    addi a1, a1, 64
942; RV64-BOTH-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
943; RV64-BOTH-NEXT:    vle64.v v8, (a1)
944; RV64-BOTH-NEXT:    addi a0, a0, 64
945; RV64-BOTH-NEXT:    vse64.v v8, (a0)
946; RV64-BOTH-NEXT:    ret
947entry:
948  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 96, i1 false)
949  ret void
950}
951
952define void @aligned_memcpy128(ptr nocapture %dest, ptr %src) nounwind {
953; RV32-BOTH-LABEL: aligned_memcpy128:
954; RV32-BOTH:       # %bb.0: # %entry
955; RV32-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
956; RV32-BOTH-NEXT:    vle64.v v8, (a1)
957; RV32-BOTH-NEXT:    vse64.v v8, (a0)
958; RV32-BOTH-NEXT:    ret
959;
960; RV64-BOTH-LABEL: aligned_memcpy128:
961; RV64-BOTH:       # %bb.0: # %entry
962; RV64-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
963; RV64-BOTH-NEXT:    vle64.v v8, (a1)
964; RV64-BOTH-NEXT:    vse64.v v8, (a0)
965; RV64-BOTH-NEXT:    ret
966entry:
967  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 128, i1 false)
968  ret void
969}
970
971define void @aligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind {
972; RV32-BOTH-LABEL: aligned_memcpy196:
973; RV32-BOTH:       # %bb.0: # %entry
974; RV32-BOTH-NEXT:    lw a2, 192(a1)
975; RV32-BOTH-NEXT:    sw a2, 192(a0)
976; RV32-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
977; RV32-BOTH-NEXT:    vle64.v v8, (a1)
978; RV32-BOTH-NEXT:    vse64.v v8, (a0)
979; RV32-BOTH-NEXT:    addi a1, a1, 128
980; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
981; RV32-BOTH-NEXT:    vle64.v v8, (a1)
982; RV32-BOTH-NEXT:    addi a0, a0, 128
983; RV32-BOTH-NEXT:    vse64.v v8, (a0)
984; RV32-BOTH-NEXT:    ret
985;
986; RV64-BOTH-LABEL: aligned_memcpy196:
987; RV64-BOTH:       # %bb.0: # %entry
988; RV64-BOTH-NEXT:    lw a2, 192(a1)
989; RV64-BOTH-NEXT:    sw a2, 192(a0)
990; RV64-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
991; RV64-BOTH-NEXT:    vle64.v v8, (a1)
992; RV64-BOTH-NEXT:    vse64.v v8, (a0)
993; RV64-BOTH-NEXT:    addi a1, a1, 128
994; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
995; RV64-BOTH-NEXT:    vle64.v v8, (a1)
996; RV64-BOTH-NEXT:    addi a0, a0, 128
997; RV64-BOTH-NEXT:    vse64.v v8, (a0)
998; RV64-BOTH-NEXT:    ret
999entry:
1000  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 196, i1 false)
1001  ret void
1002}
1003
1004define void @aligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind {
1005; RV32-BOTH-LABEL: aligned_memcpy256:
1006; RV32-BOTH:       # %bb.0: # %entry
1007; RV32-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1008; RV32-BOTH-NEXT:    vle64.v v8, (a1)
1009; RV32-BOTH-NEXT:    vse64.v v8, (a0)
1010; RV32-BOTH-NEXT:    addi a1, a1, 128
1011; RV32-BOTH-NEXT:    vle64.v v8, (a1)
1012; RV32-BOTH-NEXT:    addi a0, a0, 128
1013; RV32-BOTH-NEXT:    vse64.v v8, (a0)
1014; RV32-BOTH-NEXT:    ret
1015;
1016; RV64-BOTH-LABEL: aligned_memcpy256:
1017; RV64-BOTH:       # %bb.0: # %entry
1018; RV64-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1019; RV64-BOTH-NEXT:    vle64.v v8, (a1)
1020; RV64-BOTH-NEXT:    vse64.v v8, (a0)
1021; RV64-BOTH-NEXT:    addi a1, a1, 128
1022; RV64-BOTH-NEXT:    vle64.v v8, (a1)
1023; RV64-BOTH-NEXT:    addi a0, a0, 128
1024; RV64-BOTH-NEXT:    vse64.v v8, (a0)
1025; RV64-BOTH-NEXT:    ret
1026entry:
1027  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 256, i1 false)
1028  ret void
1029}
1030
1031; ------------------------------------------------------------------------
1032; A few partially aligned cases
1033
1034
1035define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
1036; RV32-LABEL: memcpy16_align4:
1037; RV32:       # %bb.0: # %entry
1038; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1039; RV32-NEXT:    vle32.v v8, (a1)
1040; RV32-NEXT:    vse32.v v8, (a0)
1041; RV32-NEXT:    ret
1042;
1043; RV64-LABEL: memcpy16_align4:
1044; RV64:       # %bb.0: # %entry
1045; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1046; RV64-NEXT:    vle32.v v8, (a1)
1047; RV64-NEXT:    vse32.v v8, (a0)
1048; RV64-NEXT:    ret
1049;
1050; RV32-FAST-LABEL: memcpy16_align4:
1051; RV32-FAST:       # %bb.0: # %entry
1052; RV32-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1053; RV32-FAST-NEXT:    vle64.v v8, (a1)
1054; RV32-FAST-NEXT:    vse64.v v8, (a0)
1055; RV32-FAST-NEXT:    ret
1056;
1057; RV64-FAST-LABEL: memcpy16_align4:
1058; RV64-FAST:       # %bb.0: # %entry
1059; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1060; RV64-FAST-NEXT:    vle64.v v8, (a1)
1061; RV64-FAST-NEXT:    vse64.v v8, (a0)
1062; RV64-FAST-NEXT:    ret
1063entry:
1064  tail call void @llvm.memcpy.inline.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false)
1065  ret void
1066}
1067
1068define i32 @memcpy11_align8(ptr nocapture %dest, ptr %src) {
1069; RV32-LABEL: memcpy11_align8:
1070; RV32:       # %bb.0: # %entry
1071; RV32-NEXT:    lbu a2, 10(a1)
1072; RV32-NEXT:    sb a2, 10(a0)
1073; RV32-NEXT:    lh a2, 8(a1)
1074; RV32-NEXT:    sh a2, 8(a0)
1075; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1076; RV32-NEXT:    vle32.v v8, (a1)
1077; RV32-NEXT:    vse32.v v8, (a0)
1078; RV32-NEXT:    li a0, 0
1079; RV32-NEXT:    ret
1080;
1081; RV64-LABEL: memcpy11_align8:
1082; RV64:       # %bb.0: # %entry
1083; RV64-NEXT:    lbu a2, 10(a1)
1084; RV64-NEXT:    sb a2, 10(a0)
1085; RV64-NEXT:    lh a2, 8(a1)
1086; RV64-NEXT:    sh a2, 8(a0)
1087; RV64-NEXT:    ld a1, 0(a1)
1088; RV64-NEXT:    sd a1, 0(a0)
1089; RV64-NEXT:    li a0, 0
1090; RV64-NEXT:    ret
1091;
1092; RV32-FAST-LABEL: memcpy11_align8:
1093; RV32-FAST:       # %bb.0: # %entry
1094; RV32-FAST-NEXT:    lw a2, 7(a1)
1095; RV32-FAST-NEXT:    sw a2, 7(a0)
1096; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1097; RV32-FAST-NEXT:    vle32.v v8, (a1)
1098; RV32-FAST-NEXT:    vse32.v v8, (a0)
1099; RV32-FAST-NEXT:    li a0, 0
1100; RV32-FAST-NEXT:    ret
1101;
1102; RV64-FAST-LABEL: memcpy11_align8:
1103; RV64-FAST:       # %bb.0: # %entry
1104; RV64-FAST-NEXT:    lw a2, 7(a1)
1105; RV64-FAST-NEXT:    sw a2, 7(a0)
1106; RV64-FAST-NEXT:    ld a1, 0(a1)
1107; RV64-FAST-NEXT:    sd a1, 0(a0)
1108; RV64-FAST-NEXT:    li a0, 0
1109; RV64-FAST-NEXT:    ret
1110entry:
1111  call void @llvm.memcpy.inline.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 11, i1 false)
1112  ret i32 0
1113}
1114
1115
1116declare void @llvm.memcpy.inline.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
1117declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
1118