xref: /llvm-project/llvm/test/CodeGen/RISCV/memset-inline.ll (revision 2967e5f8007d873a3e9d97870d2461d0827a3976)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
3; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
5; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
7; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
9; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
11
12declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
13declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
14
15; /////////////////////////////////////////////////////////////////////////////
16
17define void @memset_1(ptr %a, i8 %value) nounwind {
18; RV32-BOTH-LABEL: memset_1:
19; RV32-BOTH:       # %bb.0:
20; RV32-BOTH-NEXT:    sb a1, 0(a0)
21; RV32-BOTH-NEXT:    ret
22;
23; RV64-BOTH-LABEL: memset_1:
24; RV64-BOTH:       # %bb.0:
25; RV64-BOTH-NEXT:    sb a1, 0(a0)
26; RV64-BOTH-NEXT:    ret
27  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1, i1 0)
28  ret void
29}
30
31define void @memset_2(ptr %a, i8 %value) nounwind {
32; RV32-LABEL: memset_2:
33; RV32:       # %bb.0:
34; RV32-NEXT:    sb a1, 0(a0)
35; RV32-NEXT:    sb a1, 1(a0)
36; RV32-NEXT:    ret
37;
38; RV64-LABEL: memset_2:
39; RV64:       # %bb.0:
40; RV64-NEXT:    sb a1, 0(a0)
41; RV64-NEXT:    sb a1, 1(a0)
42; RV64-NEXT:    ret
43;
44; RV32-FAST-LABEL: memset_2:
45; RV32-FAST:       # %bb.0:
46; RV32-FAST-NEXT:    andi a2, a1, 255
47; RV32-FAST-NEXT:    slli a1, a1, 8
48; RV32-FAST-NEXT:    or a1, a1, a2
49; RV32-FAST-NEXT:    sh a1, 0(a0)
50; RV32-FAST-NEXT:    ret
51;
52; RV64-FAST-LABEL: memset_2:
53; RV64-FAST:       # %bb.0:
54; RV64-FAST-NEXT:    andi a2, a1, 255
55; RV64-FAST-NEXT:    slli a1, a1, 8
56; RV64-FAST-NEXT:    or a1, a1, a2
57; RV64-FAST-NEXT:    sh a1, 0(a0)
58; RV64-FAST-NEXT:    ret
59  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 2, i1 0)
60  ret void
61}
62
63define void @memset_4(ptr %a, i8 %value) nounwind {
64; RV32-LABEL: memset_4:
65; RV32:       # %bb.0:
66; RV32-NEXT:    sb a1, 0(a0)
67; RV32-NEXT:    sb a1, 1(a0)
68; RV32-NEXT:    sb a1, 2(a0)
69; RV32-NEXT:    sb a1, 3(a0)
70; RV32-NEXT:    ret
71;
72; RV64-LABEL: memset_4:
73; RV64:       # %bb.0:
74; RV64-NEXT:    sb a1, 0(a0)
75; RV64-NEXT:    sb a1, 1(a0)
76; RV64-NEXT:    sb a1, 2(a0)
77; RV64-NEXT:    sb a1, 3(a0)
78; RV64-NEXT:    ret
79;
80; RV32-FAST-LABEL: memset_4:
81; RV32-FAST:       # %bb.0:
82; RV32-FAST-NEXT:    andi a1, a1, 255
83; RV32-FAST-NEXT:    lui a2, 4112
84; RV32-FAST-NEXT:    addi a2, a2, 257
85; RV32-FAST-NEXT:    mul a1, a1, a2
86; RV32-FAST-NEXT:    sw a1, 0(a0)
87; RV32-FAST-NEXT:    ret
88;
89; RV64-FAST-LABEL: memset_4:
90; RV64-FAST:       # %bb.0:
91; RV64-FAST-NEXT:    slli a1, a1, 56
92; RV64-FAST-NEXT:    lui a2, 65793
93; RV64-FAST-NEXT:    slli a2, a2, 4
94; RV64-FAST-NEXT:    addi a2, a2, 256
95; RV64-FAST-NEXT:    mulhu a1, a1, a2
96; RV64-FAST-NEXT:    sw a1, 0(a0)
97; RV64-FAST-NEXT:    ret
98  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 4, i1 0)
99  ret void
100}
101
102define void @memset_8(ptr %a, i8 %value) nounwind {
103; RV32-LABEL: memset_8:
104; RV32:       # %bb.0:
105; RV32-NEXT:    sb a1, 4(a0)
106; RV32-NEXT:    sb a1, 5(a0)
107; RV32-NEXT:    sb a1, 6(a0)
108; RV32-NEXT:    sb a1, 7(a0)
109; RV32-NEXT:    sb a1, 0(a0)
110; RV32-NEXT:    sb a1, 1(a0)
111; RV32-NEXT:    sb a1, 2(a0)
112; RV32-NEXT:    sb a1, 3(a0)
113; RV32-NEXT:    ret
114;
115; RV64-LABEL: memset_8:
116; RV64:       # %bb.0:
117; RV64-NEXT:    sb a1, 4(a0)
118; RV64-NEXT:    sb a1, 5(a0)
119; RV64-NEXT:    sb a1, 6(a0)
120; RV64-NEXT:    sb a1, 7(a0)
121; RV64-NEXT:    sb a1, 0(a0)
122; RV64-NEXT:    sb a1, 1(a0)
123; RV64-NEXT:    sb a1, 2(a0)
124; RV64-NEXT:    sb a1, 3(a0)
125; RV64-NEXT:    ret
126;
127; RV32-FAST-LABEL: memset_8:
128; RV32-FAST:       # %bb.0:
129; RV32-FAST-NEXT:    andi a1, a1, 255
130; RV32-FAST-NEXT:    lui a2, 4112
131; RV32-FAST-NEXT:    addi a2, a2, 257
132; RV32-FAST-NEXT:    mul a1, a1, a2
133; RV32-FAST-NEXT:    sw a1, 0(a0)
134; RV32-FAST-NEXT:    sw a1, 4(a0)
135; RV32-FAST-NEXT:    ret
136;
137; RV64-FAST-LABEL: memset_8:
138; RV64-FAST:       # %bb.0:
139; RV64-FAST-NEXT:    andi a1, a1, 255
140; RV64-FAST-NEXT:    lui a2, 4112
141; RV64-FAST-NEXT:    addiw a2, a2, 257
142; RV64-FAST-NEXT:    slli a3, a2, 32
143; RV64-FAST-NEXT:    add a2, a2, a3
144; RV64-FAST-NEXT:    mul a1, a1, a2
145; RV64-FAST-NEXT:    sd a1, 0(a0)
146; RV64-FAST-NEXT:    ret
147  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 8, i1 0)
148  ret void
149}
150
151define void @memset_16(ptr %a, i8 %value) nounwind {
152; RV32-LABEL: memset_16:
153; RV32:       # %bb.0:
154; RV32-NEXT:    sb a1, 12(a0)
155; RV32-NEXT:    sb a1, 13(a0)
156; RV32-NEXT:    sb a1, 14(a0)
157; RV32-NEXT:    sb a1, 15(a0)
158; RV32-NEXT:    sb a1, 8(a0)
159; RV32-NEXT:    sb a1, 9(a0)
160; RV32-NEXT:    sb a1, 10(a0)
161; RV32-NEXT:    sb a1, 11(a0)
162; RV32-NEXT:    sb a1, 4(a0)
163; RV32-NEXT:    sb a1, 5(a0)
164; RV32-NEXT:    sb a1, 6(a0)
165; RV32-NEXT:    sb a1, 7(a0)
166; RV32-NEXT:    sb a1, 0(a0)
167; RV32-NEXT:    sb a1, 1(a0)
168; RV32-NEXT:    sb a1, 2(a0)
169; RV32-NEXT:    sb a1, 3(a0)
170; RV32-NEXT:    ret
171;
172; RV64-LABEL: memset_16:
173; RV64:       # %bb.0:
174; RV64-NEXT:    sb a1, 12(a0)
175; RV64-NEXT:    sb a1, 13(a0)
176; RV64-NEXT:    sb a1, 14(a0)
177; RV64-NEXT:    sb a1, 15(a0)
178; RV64-NEXT:    sb a1, 8(a0)
179; RV64-NEXT:    sb a1, 9(a0)
180; RV64-NEXT:    sb a1, 10(a0)
181; RV64-NEXT:    sb a1, 11(a0)
182; RV64-NEXT:    sb a1, 4(a0)
183; RV64-NEXT:    sb a1, 5(a0)
184; RV64-NEXT:    sb a1, 6(a0)
185; RV64-NEXT:    sb a1, 7(a0)
186; RV64-NEXT:    sb a1, 0(a0)
187; RV64-NEXT:    sb a1, 1(a0)
188; RV64-NEXT:    sb a1, 2(a0)
189; RV64-NEXT:    sb a1, 3(a0)
190; RV64-NEXT:    ret
191;
192; RV32-FAST-LABEL: memset_16:
193; RV32-FAST:       # %bb.0:
194; RV32-FAST-NEXT:    andi a1, a1, 255
195; RV32-FAST-NEXT:    lui a2, 4112
196; RV32-FAST-NEXT:    addi a2, a2, 257
197; RV32-FAST-NEXT:    mul a1, a1, a2
198; RV32-FAST-NEXT:    sw a1, 0(a0)
199; RV32-FAST-NEXT:    sw a1, 4(a0)
200; RV32-FAST-NEXT:    sw a1, 8(a0)
201; RV32-FAST-NEXT:    sw a1, 12(a0)
202; RV32-FAST-NEXT:    ret
203;
204; RV64-FAST-LABEL: memset_16:
205; RV64-FAST:       # %bb.0:
206; RV64-FAST-NEXT:    andi a1, a1, 255
207; RV64-FAST-NEXT:    lui a2, 4112
208; RV64-FAST-NEXT:    addiw a2, a2, 257
209; RV64-FAST-NEXT:    slli a3, a2, 32
210; RV64-FAST-NEXT:    add a2, a2, a3
211; RV64-FAST-NEXT:    mul a1, a1, a2
212; RV64-FAST-NEXT:    sd a1, 0(a0)
213; RV64-FAST-NEXT:    sd a1, 8(a0)
214; RV64-FAST-NEXT:    ret
215  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0)
216  ret void
217}
218
219define void @memset_32(ptr %a, i8 %value) nounwind {
220; RV32-LABEL: memset_32:
221; RV32:       # %bb.0:
222; RV32-NEXT:    sb a1, 28(a0)
223; RV32-NEXT:    sb a1, 29(a0)
224; RV32-NEXT:    sb a1, 30(a0)
225; RV32-NEXT:    sb a1, 31(a0)
226; RV32-NEXT:    sb a1, 24(a0)
227; RV32-NEXT:    sb a1, 25(a0)
228; RV32-NEXT:    sb a1, 26(a0)
229; RV32-NEXT:    sb a1, 27(a0)
230; RV32-NEXT:    sb a1, 20(a0)
231; RV32-NEXT:    sb a1, 21(a0)
232; RV32-NEXT:    sb a1, 22(a0)
233; RV32-NEXT:    sb a1, 23(a0)
234; RV32-NEXT:    sb a1, 16(a0)
235; RV32-NEXT:    sb a1, 17(a0)
236; RV32-NEXT:    sb a1, 18(a0)
237; RV32-NEXT:    sb a1, 19(a0)
238; RV32-NEXT:    sb a1, 12(a0)
239; RV32-NEXT:    sb a1, 13(a0)
240; RV32-NEXT:    sb a1, 14(a0)
241; RV32-NEXT:    sb a1, 15(a0)
242; RV32-NEXT:    sb a1, 8(a0)
243; RV32-NEXT:    sb a1, 9(a0)
244; RV32-NEXT:    sb a1, 10(a0)
245; RV32-NEXT:    sb a1, 11(a0)
246; RV32-NEXT:    sb a1, 4(a0)
247; RV32-NEXT:    sb a1, 5(a0)
248; RV32-NEXT:    sb a1, 6(a0)
249; RV32-NEXT:    sb a1, 7(a0)
250; RV32-NEXT:    sb a1, 0(a0)
251; RV32-NEXT:    sb a1, 1(a0)
252; RV32-NEXT:    sb a1, 2(a0)
253; RV32-NEXT:    sb a1, 3(a0)
254; RV32-NEXT:    ret
255;
256; RV64-LABEL: memset_32:
257; RV64:       # %bb.0:
258; RV64-NEXT:    sb a1, 28(a0)
259; RV64-NEXT:    sb a1, 29(a0)
260; RV64-NEXT:    sb a1, 30(a0)
261; RV64-NEXT:    sb a1, 31(a0)
262; RV64-NEXT:    sb a1, 24(a0)
263; RV64-NEXT:    sb a1, 25(a0)
264; RV64-NEXT:    sb a1, 26(a0)
265; RV64-NEXT:    sb a1, 27(a0)
266; RV64-NEXT:    sb a1, 20(a0)
267; RV64-NEXT:    sb a1, 21(a0)
268; RV64-NEXT:    sb a1, 22(a0)
269; RV64-NEXT:    sb a1, 23(a0)
270; RV64-NEXT:    sb a1, 16(a0)
271; RV64-NEXT:    sb a1, 17(a0)
272; RV64-NEXT:    sb a1, 18(a0)
273; RV64-NEXT:    sb a1, 19(a0)
274; RV64-NEXT:    sb a1, 12(a0)
275; RV64-NEXT:    sb a1, 13(a0)
276; RV64-NEXT:    sb a1, 14(a0)
277; RV64-NEXT:    sb a1, 15(a0)
278; RV64-NEXT:    sb a1, 8(a0)
279; RV64-NEXT:    sb a1, 9(a0)
280; RV64-NEXT:    sb a1, 10(a0)
281; RV64-NEXT:    sb a1, 11(a0)
282; RV64-NEXT:    sb a1, 4(a0)
283; RV64-NEXT:    sb a1, 5(a0)
284; RV64-NEXT:    sb a1, 6(a0)
285; RV64-NEXT:    sb a1, 7(a0)
286; RV64-NEXT:    sb a1, 0(a0)
287; RV64-NEXT:    sb a1, 1(a0)
288; RV64-NEXT:    sb a1, 2(a0)
289; RV64-NEXT:    sb a1, 3(a0)
290; RV64-NEXT:    ret
291;
292; RV32-FAST-LABEL: memset_32:
293; RV32-FAST:       # %bb.0:
294; RV32-FAST-NEXT:    andi a1, a1, 255
295; RV32-FAST-NEXT:    lui a2, 4112
296; RV32-FAST-NEXT:    addi a2, a2, 257
297; RV32-FAST-NEXT:    mul a1, a1, a2
298; RV32-FAST-NEXT:    sw a1, 16(a0)
299; RV32-FAST-NEXT:    sw a1, 20(a0)
300; RV32-FAST-NEXT:    sw a1, 24(a0)
301; RV32-FAST-NEXT:    sw a1, 28(a0)
302; RV32-FAST-NEXT:    sw a1, 0(a0)
303; RV32-FAST-NEXT:    sw a1, 4(a0)
304; RV32-FAST-NEXT:    sw a1, 8(a0)
305; RV32-FAST-NEXT:    sw a1, 12(a0)
306; RV32-FAST-NEXT:    ret
307;
308; RV64-FAST-LABEL: memset_32:
309; RV64-FAST:       # %bb.0:
310; RV64-FAST-NEXT:    andi a1, a1, 255
311; RV64-FAST-NEXT:    lui a2, 4112
312; RV64-FAST-NEXT:    addiw a2, a2, 257
313; RV64-FAST-NEXT:    slli a3, a2, 32
314; RV64-FAST-NEXT:    add a2, a2, a3
315; RV64-FAST-NEXT:    mul a1, a1, a2
316; RV64-FAST-NEXT:    sd a1, 0(a0)
317; RV64-FAST-NEXT:    sd a1, 8(a0)
318; RV64-FAST-NEXT:    sd a1, 16(a0)
319; RV64-FAST-NEXT:    sd a1, 24(a0)
320; RV64-FAST-NEXT:    ret
321  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0)
322  ret void
323}
324
325define void @memset_64(ptr %a, i8 %value) nounwind {
326; RV32-LABEL: memset_64:
327; RV32:       # %bb.0:
328; RV32-NEXT:    sb a1, 60(a0)
329; RV32-NEXT:    sb a1, 61(a0)
330; RV32-NEXT:    sb a1, 62(a0)
331; RV32-NEXT:    sb a1, 63(a0)
332; RV32-NEXT:    sb a1, 56(a0)
333; RV32-NEXT:    sb a1, 57(a0)
334; RV32-NEXT:    sb a1, 58(a0)
335; RV32-NEXT:    sb a1, 59(a0)
336; RV32-NEXT:    sb a1, 52(a0)
337; RV32-NEXT:    sb a1, 53(a0)
338; RV32-NEXT:    sb a1, 54(a0)
339; RV32-NEXT:    sb a1, 55(a0)
340; RV32-NEXT:    sb a1, 48(a0)
341; RV32-NEXT:    sb a1, 49(a0)
342; RV32-NEXT:    sb a1, 50(a0)
343; RV32-NEXT:    sb a1, 51(a0)
344; RV32-NEXT:    sb a1, 44(a0)
345; RV32-NEXT:    sb a1, 45(a0)
346; RV32-NEXT:    sb a1, 46(a0)
347; RV32-NEXT:    sb a1, 47(a0)
348; RV32-NEXT:    sb a1, 40(a0)
349; RV32-NEXT:    sb a1, 41(a0)
350; RV32-NEXT:    sb a1, 42(a0)
351; RV32-NEXT:    sb a1, 43(a0)
352; RV32-NEXT:    sb a1, 36(a0)
353; RV32-NEXT:    sb a1, 37(a0)
354; RV32-NEXT:    sb a1, 38(a0)
355; RV32-NEXT:    sb a1, 39(a0)
356; RV32-NEXT:    sb a1, 32(a0)
357; RV32-NEXT:    sb a1, 33(a0)
358; RV32-NEXT:    sb a1, 34(a0)
359; RV32-NEXT:    sb a1, 35(a0)
360; RV32-NEXT:    sb a1, 28(a0)
361; RV32-NEXT:    sb a1, 29(a0)
362; RV32-NEXT:    sb a1, 30(a0)
363; RV32-NEXT:    sb a1, 31(a0)
364; RV32-NEXT:    sb a1, 24(a0)
365; RV32-NEXT:    sb a1, 25(a0)
366; RV32-NEXT:    sb a1, 26(a0)
367; RV32-NEXT:    sb a1, 27(a0)
368; RV32-NEXT:    sb a1, 20(a0)
369; RV32-NEXT:    sb a1, 21(a0)
370; RV32-NEXT:    sb a1, 22(a0)
371; RV32-NEXT:    sb a1, 23(a0)
372; RV32-NEXT:    sb a1, 16(a0)
373; RV32-NEXT:    sb a1, 17(a0)
374; RV32-NEXT:    sb a1, 18(a0)
375; RV32-NEXT:    sb a1, 19(a0)
376; RV32-NEXT:    sb a1, 12(a0)
377; RV32-NEXT:    sb a1, 13(a0)
378; RV32-NEXT:    sb a1, 14(a0)
379; RV32-NEXT:    sb a1, 15(a0)
380; RV32-NEXT:    sb a1, 8(a0)
381; RV32-NEXT:    sb a1, 9(a0)
382; RV32-NEXT:    sb a1, 10(a0)
383; RV32-NEXT:    sb a1, 11(a0)
384; RV32-NEXT:    sb a1, 4(a0)
385; RV32-NEXT:    sb a1, 5(a0)
386; RV32-NEXT:    sb a1, 6(a0)
387; RV32-NEXT:    sb a1, 7(a0)
388; RV32-NEXT:    sb a1, 0(a0)
389; RV32-NEXT:    sb a1, 1(a0)
390; RV32-NEXT:    sb a1, 2(a0)
391; RV32-NEXT:    sb a1, 3(a0)
392; RV32-NEXT:    ret
393;
394; RV64-LABEL: memset_64:
395; RV64:       # %bb.0:
396; RV64-NEXT:    sb a1, 60(a0)
397; RV64-NEXT:    sb a1, 61(a0)
398; RV64-NEXT:    sb a1, 62(a0)
399; RV64-NEXT:    sb a1, 63(a0)
400; RV64-NEXT:    sb a1, 56(a0)
401; RV64-NEXT:    sb a1, 57(a0)
402; RV64-NEXT:    sb a1, 58(a0)
403; RV64-NEXT:    sb a1, 59(a0)
404; RV64-NEXT:    sb a1, 52(a0)
405; RV64-NEXT:    sb a1, 53(a0)
406; RV64-NEXT:    sb a1, 54(a0)
407; RV64-NEXT:    sb a1, 55(a0)
408; RV64-NEXT:    sb a1, 48(a0)
409; RV64-NEXT:    sb a1, 49(a0)
410; RV64-NEXT:    sb a1, 50(a0)
411; RV64-NEXT:    sb a1, 51(a0)
412; RV64-NEXT:    sb a1, 44(a0)
413; RV64-NEXT:    sb a1, 45(a0)
414; RV64-NEXT:    sb a1, 46(a0)
415; RV64-NEXT:    sb a1, 47(a0)
416; RV64-NEXT:    sb a1, 40(a0)
417; RV64-NEXT:    sb a1, 41(a0)
418; RV64-NEXT:    sb a1, 42(a0)
419; RV64-NEXT:    sb a1, 43(a0)
420; RV64-NEXT:    sb a1, 36(a0)
421; RV64-NEXT:    sb a1, 37(a0)
422; RV64-NEXT:    sb a1, 38(a0)
423; RV64-NEXT:    sb a1, 39(a0)
424; RV64-NEXT:    sb a1, 32(a0)
425; RV64-NEXT:    sb a1, 33(a0)
426; RV64-NEXT:    sb a1, 34(a0)
427; RV64-NEXT:    sb a1, 35(a0)
428; RV64-NEXT:    sb a1, 28(a0)
429; RV64-NEXT:    sb a1, 29(a0)
430; RV64-NEXT:    sb a1, 30(a0)
431; RV64-NEXT:    sb a1, 31(a0)
432; RV64-NEXT:    sb a1, 24(a0)
433; RV64-NEXT:    sb a1, 25(a0)
434; RV64-NEXT:    sb a1, 26(a0)
435; RV64-NEXT:    sb a1, 27(a0)
436; RV64-NEXT:    sb a1, 20(a0)
437; RV64-NEXT:    sb a1, 21(a0)
438; RV64-NEXT:    sb a1, 22(a0)
439; RV64-NEXT:    sb a1, 23(a0)
440; RV64-NEXT:    sb a1, 16(a0)
441; RV64-NEXT:    sb a1, 17(a0)
442; RV64-NEXT:    sb a1, 18(a0)
443; RV64-NEXT:    sb a1, 19(a0)
444; RV64-NEXT:    sb a1, 12(a0)
445; RV64-NEXT:    sb a1, 13(a0)
446; RV64-NEXT:    sb a1, 14(a0)
447; RV64-NEXT:    sb a1, 15(a0)
448; RV64-NEXT:    sb a1, 8(a0)
449; RV64-NEXT:    sb a1, 9(a0)
450; RV64-NEXT:    sb a1, 10(a0)
451; RV64-NEXT:    sb a1, 11(a0)
452; RV64-NEXT:    sb a1, 4(a0)
453; RV64-NEXT:    sb a1, 5(a0)
454; RV64-NEXT:    sb a1, 6(a0)
455; RV64-NEXT:    sb a1, 7(a0)
456; RV64-NEXT:    sb a1, 0(a0)
457; RV64-NEXT:    sb a1, 1(a0)
458; RV64-NEXT:    sb a1, 2(a0)
459; RV64-NEXT:    sb a1, 3(a0)
460; RV64-NEXT:    ret
461;
462; RV32-FAST-LABEL: memset_64:
463; RV32-FAST:       # %bb.0:
464; RV32-FAST-NEXT:    andi a1, a1, 255
465; RV32-FAST-NEXT:    lui a2, 4112
466; RV32-FAST-NEXT:    addi a2, a2, 257
467; RV32-FAST-NEXT:    mul a1, a1, a2
468; RV32-FAST-NEXT:    sw a1, 48(a0)
469; RV32-FAST-NEXT:    sw a1, 52(a0)
470; RV32-FAST-NEXT:    sw a1, 56(a0)
471; RV32-FAST-NEXT:    sw a1, 60(a0)
472; RV32-FAST-NEXT:    sw a1, 32(a0)
473; RV32-FAST-NEXT:    sw a1, 36(a0)
474; RV32-FAST-NEXT:    sw a1, 40(a0)
475; RV32-FAST-NEXT:    sw a1, 44(a0)
476; RV32-FAST-NEXT:    sw a1, 16(a0)
477; RV32-FAST-NEXT:    sw a1, 20(a0)
478; RV32-FAST-NEXT:    sw a1, 24(a0)
479; RV32-FAST-NEXT:    sw a1, 28(a0)
480; RV32-FAST-NEXT:    sw a1, 0(a0)
481; RV32-FAST-NEXT:    sw a1, 4(a0)
482; RV32-FAST-NEXT:    sw a1, 8(a0)
483; RV32-FAST-NEXT:    sw a1, 12(a0)
484; RV32-FAST-NEXT:    ret
485;
486; RV64-FAST-LABEL: memset_64:
487; RV64-FAST:       # %bb.0:
488; RV64-FAST-NEXT:    andi a1, a1, 255
489; RV64-FAST-NEXT:    lui a2, 4112
490; RV64-FAST-NEXT:    addiw a2, a2, 257
491; RV64-FAST-NEXT:    slli a3, a2, 32
492; RV64-FAST-NEXT:    add a2, a2, a3
493; RV64-FAST-NEXT:    mul a1, a1, a2
494; RV64-FAST-NEXT:    sd a1, 32(a0)
495; RV64-FAST-NEXT:    sd a1, 40(a0)
496; RV64-FAST-NEXT:    sd a1, 48(a0)
497; RV64-FAST-NEXT:    sd a1, 56(a0)
498; RV64-FAST-NEXT:    sd a1, 0(a0)
499; RV64-FAST-NEXT:    sd a1, 8(a0)
500; RV64-FAST-NEXT:    sd a1, 16(a0)
501; RV64-FAST-NEXT:    sd a1, 24(a0)
502; RV64-FAST-NEXT:    ret
503  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0)
504  ret void
505}
506
507; /////////////////////////////////////////////////////////////////////////////
508
509define void @aligned_memset_2(ptr align 2 %a, i8 %value) nounwind {
510; RV32-BOTH-LABEL: aligned_memset_2:
511; RV32-BOTH:       # %bb.0:
512; RV32-BOTH-NEXT:    andi a2, a1, 255
513; RV32-BOTH-NEXT:    slli a1, a1, 8
514; RV32-BOTH-NEXT:    or a1, a1, a2
515; RV32-BOTH-NEXT:    sh a1, 0(a0)
516; RV32-BOTH-NEXT:    ret
517;
518; RV64-BOTH-LABEL: aligned_memset_2:
519; RV64-BOTH:       # %bb.0:
520; RV64-BOTH-NEXT:    andi a2, a1, 255
521; RV64-BOTH-NEXT:    slli a1, a1, 8
522; RV64-BOTH-NEXT:    or a1, a1, a2
523; RV64-BOTH-NEXT:    sh a1, 0(a0)
524; RV64-BOTH-NEXT:    ret
525  tail call void @llvm.memset.inline.p0.i64(ptr align 2 %a, i8 %value, i64 2, i1 0)
526  ret void
527}
528
529define void @aligned_memset_4(ptr align 4 %a, i8 %value) nounwind {
530; RV32-BOTH-LABEL: aligned_memset_4:
531; RV32-BOTH:       # %bb.0:
532; RV32-BOTH-NEXT:    andi a1, a1, 255
533; RV32-BOTH-NEXT:    lui a2, 4112
534; RV32-BOTH-NEXT:    addi a2, a2, 257
535; RV32-BOTH-NEXT:    mul a1, a1, a2
536; RV32-BOTH-NEXT:    sw a1, 0(a0)
537; RV32-BOTH-NEXT:    ret
538;
539; RV64-BOTH-LABEL: aligned_memset_4:
540; RV64-BOTH:       # %bb.0:
541; RV64-BOTH-NEXT:    slli a1, a1, 56
542; RV64-BOTH-NEXT:    lui a2, 65793
543; RV64-BOTH-NEXT:    slli a2, a2, 4
544; RV64-BOTH-NEXT:    addi a2, a2, 256
545; RV64-BOTH-NEXT:    mulhu a1, a1, a2
546; RV64-BOTH-NEXT:    sw a1, 0(a0)
547; RV64-BOTH-NEXT:    ret
548  tail call void @llvm.memset.inline.p0.i64(ptr align 4 %a, i8 %value, i64 4, i1 0)
549  ret void
550}
551
552define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind {
553; RV32-BOTH-LABEL: aligned_memset_8:
554; RV32-BOTH:       # %bb.0:
555; RV32-BOTH-NEXT:    andi a1, a1, 255
556; RV32-BOTH-NEXT:    lui a2, 4112
557; RV32-BOTH-NEXT:    addi a2, a2, 257
558; RV32-BOTH-NEXT:    mul a1, a1, a2
559; RV32-BOTH-NEXT:    sw a1, 0(a0)
560; RV32-BOTH-NEXT:    sw a1, 4(a0)
561; RV32-BOTH-NEXT:    ret
562;
563; RV64-BOTH-LABEL: aligned_memset_8:
564; RV64-BOTH:       # %bb.0:
565; RV64-BOTH-NEXT:    andi a1, a1, 255
566; RV64-BOTH-NEXT:    lui a2, 4112
567; RV64-BOTH-NEXT:    addiw a2, a2, 257
568; RV64-BOTH-NEXT:    slli a3, a2, 32
569; RV64-BOTH-NEXT:    add a2, a2, a3
570; RV64-BOTH-NEXT:    mul a1, a1, a2
571; RV64-BOTH-NEXT:    sd a1, 0(a0)
572; RV64-BOTH-NEXT:    ret
573  tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 %value, i64 8, i1 0)
574  ret void
575}
576
577define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
578; RV32-BOTH-LABEL: aligned_memset_16:
579; RV32-BOTH:       # %bb.0:
580; RV32-BOTH-NEXT:    andi a1, a1, 255
581; RV32-BOTH-NEXT:    lui a2, 4112
582; RV32-BOTH-NEXT:    addi a2, a2, 257
583; RV32-BOTH-NEXT:    mul a1, a1, a2
584; RV32-BOTH-NEXT:    sw a1, 0(a0)
585; RV32-BOTH-NEXT:    sw a1, 4(a0)
586; RV32-BOTH-NEXT:    sw a1, 8(a0)
587; RV32-BOTH-NEXT:    sw a1, 12(a0)
588; RV32-BOTH-NEXT:    ret
589;
590; RV64-BOTH-LABEL: aligned_memset_16:
591; RV64-BOTH:       # %bb.0:
592; RV64-BOTH-NEXT:    andi a1, a1, 255
593; RV64-BOTH-NEXT:    lui a2, 4112
594; RV64-BOTH-NEXT:    addiw a2, a2, 257
595; RV64-BOTH-NEXT:    slli a3, a2, 32
596; RV64-BOTH-NEXT:    add a2, a2, a3
597; RV64-BOTH-NEXT:    mul a1, a1, a2
598; RV64-BOTH-NEXT:    sd a1, 0(a0)
599; RV64-BOTH-NEXT:    sd a1, 8(a0)
600; RV64-BOTH-NEXT:    ret
601  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0)
602  ret void
603}
604
605define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
606; RV32-BOTH-LABEL: aligned_memset_32:
607; RV32-BOTH:       # %bb.0:
608; RV32-BOTH-NEXT:    andi a1, a1, 255
609; RV32-BOTH-NEXT:    lui a2, 4112
610; RV32-BOTH-NEXT:    addi a2, a2, 257
611; RV32-BOTH-NEXT:    mul a1, a1, a2
612; RV32-BOTH-NEXT:    sw a1, 16(a0)
613; RV32-BOTH-NEXT:    sw a1, 20(a0)
614; RV32-BOTH-NEXT:    sw a1, 24(a0)
615; RV32-BOTH-NEXT:    sw a1, 28(a0)
616; RV32-BOTH-NEXT:    sw a1, 0(a0)
617; RV32-BOTH-NEXT:    sw a1, 4(a0)
618; RV32-BOTH-NEXT:    sw a1, 8(a0)
619; RV32-BOTH-NEXT:    sw a1, 12(a0)
620; RV32-BOTH-NEXT:    ret
621;
622; RV64-BOTH-LABEL: aligned_memset_32:
623; RV64-BOTH:       # %bb.0:
624; RV64-BOTH-NEXT:    andi a1, a1, 255
625; RV64-BOTH-NEXT:    lui a2, 4112
626; RV64-BOTH-NEXT:    addiw a2, a2, 257
627; RV64-BOTH-NEXT:    slli a3, a2, 32
628; RV64-BOTH-NEXT:    add a2, a2, a3
629; RV64-BOTH-NEXT:    mul a1, a1, a2
630; RV64-BOTH-NEXT:    sd a1, 0(a0)
631; RV64-BOTH-NEXT:    sd a1, 8(a0)
632; RV64-BOTH-NEXT:    sd a1, 16(a0)
633; RV64-BOTH-NEXT:    sd a1, 24(a0)
634; RV64-BOTH-NEXT:    ret
635  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0)
636  ret void
637}
638
639define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
640; RV32-BOTH-LABEL: aligned_memset_64:
641; RV32-BOTH:       # %bb.0:
642; RV32-BOTH-NEXT:    andi a1, a1, 255
643; RV32-BOTH-NEXT:    lui a2, 4112
644; RV32-BOTH-NEXT:    addi a2, a2, 257
645; RV32-BOTH-NEXT:    mul a1, a1, a2
646; RV32-BOTH-NEXT:    sw a1, 48(a0)
647; RV32-BOTH-NEXT:    sw a1, 52(a0)
648; RV32-BOTH-NEXT:    sw a1, 56(a0)
649; RV32-BOTH-NEXT:    sw a1, 60(a0)
650; RV32-BOTH-NEXT:    sw a1, 32(a0)
651; RV32-BOTH-NEXT:    sw a1, 36(a0)
652; RV32-BOTH-NEXT:    sw a1, 40(a0)
653; RV32-BOTH-NEXT:    sw a1, 44(a0)
654; RV32-BOTH-NEXT:    sw a1, 16(a0)
655; RV32-BOTH-NEXT:    sw a1, 20(a0)
656; RV32-BOTH-NEXT:    sw a1, 24(a0)
657; RV32-BOTH-NEXT:    sw a1, 28(a0)
658; RV32-BOTH-NEXT:    sw a1, 0(a0)
659; RV32-BOTH-NEXT:    sw a1, 4(a0)
660; RV32-BOTH-NEXT:    sw a1, 8(a0)
661; RV32-BOTH-NEXT:    sw a1, 12(a0)
662; RV32-BOTH-NEXT:    ret
663;
664; RV64-BOTH-LABEL: aligned_memset_64:
665; RV64-BOTH:       # %bb.0:
666; RV64-BOTH-NEXT:    andi a1, a1, 255
667; RV64-BOTH-NEXT:    lui a2, 4112
668; RV64-BOTH-NEXT:    addiw a2, a2, 257
669; RV64-BOTH-NEXT:    slli a3, a2, 32
670; RV64-BOTH-NEXT:    add a2, a2, a3
671; RV64-BOTH-NEXT:    mul a1, a1, a2
672; RV64-BOTH-NEXT:    sd a1, 32(a0)
673; RV64-BOTH-NEXT:    sd a1, 40(a0)
674; RV64-BOTH-NEXT:    sd a1, 48(a0)
675; RV64-BOTH-NEXT:    sd a1, 56(a0)
676; RV64-BOTH-NEXT:    sd a1, 0(a0)
677; RV64-BOTH-NEXT:    sd a1, 8(a0)
678; RV64-BOTH-NEXT:    sd a1, 16(a0)
679; RV64-BOTH-NEXT:    sd a1, 24(a0)
680; RV64-BOTH-NEXT:    ret
681  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0)
682  ret void
683}
684
685; /////////////////////////////////////////////////////////////////////////////
686
687define void @bzero_1(ptr %a) nounwind {
688; RV32-BOTH-LABEL: bzero_1:
689; RV32-BOTH:       # %bb.0:
690; RV32-BOTH-NEXT:    sb zero, 0(a0)
691; RV32-BOTH-NEXT:    ret
692;
693; RV64-BOTH-LABEL: bzero_1:
694; RV64-BOTH:       # %bb.0:
695; RV64-BOTH-NEXT:    sb zero, 0(a0)
696; RV64-BOTH-NEXT:    ret
697  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 1, i1 0)
698  ret void
699}
700
701define void @bzero_2(ptr %a) nounwind {
702; RV32-LABEL: bzero_2:
703; RV32:       # %bb.0:
704; RV32-NEXT:    sb zero, 0(a0)
705; RV32-NEXT:    sb zero, 1(a0)
706; RV32-NEXT:    ret
707;
708; RV64-LABEL: bzero_2:
709; RV64:       # %bb.0:
710; RV64-NEXT:    sb zero, 0(a0)
711; RV64-NEXT:    sb zero, 1(a0)
712; RV64-NEXT:    ret
713;
714; RV32-FAST-LABEL: bzero_2:
715; RV32-FAST:       # %bb.0:
716; RV32-FAST-NEXT:    sh zero, 0(a0)
717; RV32-FAST-NEXT:    ret
718;
719; RV64-FAST-LABEL: bzero_2:
720; RV64-FAST:       # %bb.0:
721; RV64-FAST-NEXT:    sh zero, 0(a0)
722; RV64-FAST-NEXT:    ret
723  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 2, i1 0)
724  ret void
725}
726
727define void @bzero_4(ptr %a) nounwind {
728; RV32-LABEL: bzero_4:
729; RV32:       # %bb.0:
730; RV32-NEXT:    sb zero, 0(a0)
731; RV32-NEXT:    sb zero, 1(a0)
732; RV32-NEXT:    sb zero, 2(a0)
733; RV32-NEXT:    sb zero, 3(a0)
734; RV32-NEXT:    ret
735;
736; RV64-LABEL: bzero_4:
737; RV64:       # %bb.0:
738; RV64-NEXT:    sb zero, 0(a0)
739; RV64-NEXT:    sb zero, 1(a0)
740; RV64-NEXT:    sb zero, 2(a0)
741; RV64-NEXT:    sb zero, 3(a0)
742; RV64-NEXT:    ret
743;
744; RV32-FAST-LABEL: bzero_4:
745; RV32-FAST:       # %bb.0:
746; RV32-FAST-NEXT:    sw zero, 0(a0)
747; RV32-FAST-NEXT:    ret
748;
749; RV64-FAST-LABEL: bzero_4:
750; RV64-FAST:       # %bb.0:
751; RV64-FAST-NEXT:    sw zero, 0(a0)
752; RV64-FAST-NEXT:    ret
753  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 4, i1 0)
754  ret void
755}
756
757define void @bzero_8(ptr %a) nounwind {
758; RV32-LABEL: bzero_8:
759; RV32:       # %bb.0:
760; RV32-NEXT:    sb zero, 4(a0)
761; RV32-NEXT:    sb zero, 5(a0)
762; RV32-NEXT:    sb zero, 6(a0)
763; RV32-NEXT:    sb zero, 7(a0)
764; RV32-NEXT:    sb zero, 0(a0)
765; RV32-NEXT:    sb zero, 1(a0)
766; RV32-NEXT:    sb zero, 2(a0)
767; RV32-NEXT:    sb zero, 3(a0)
768; RV32-NEXT:    ret
769;
770; RV64-LABEL: bzero_8:
771; RV64:       # %bb.0:
772; RV64-NEXT:    sb zero, 4(a0)
773; RV64-NEXT:    sb zero, 5(a0)
774; RV64-NEXT:    sb zero, 6(a0)
775; RV64-NEXT:    sb zero, 7(a0)
776; RV64-NEXT:    sb zero, 0(a0)
777; RV64-NEXT:    sb zero, 1(a0)
778; RV64-NEXT:    sb zero, 2(a0)
779; RV64-NEXT:    sb zero, 3(a0)
780; RV64-NEXT:    ret
781;
782; RV32-FAST-LABEL: bzero_8:
783; RV32-FAST:       # %bb.0:
784; RV32-FAST-NEXT:    sw zero, 0(a0)
785; RV32-FAST-NEXT:    sw zero, 4(a0)
786; RV32-FAST-NEXT:    ret
787;
788; RV64-FAST-LABEL: bzero_8:
789; RV64-FAST:       # %bb.0:
790; RV64-FAST-NEXT:    sd zero, 0(a0)
791; RV64-FAST-NEXT:    ret
792  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 8, i1 0)
793  ret void
794}
795
796define void @bzero_16(ptr %a) nounwind {
797; RV32-LABEL: bzero_16:
798; RV32:       # %bb.0:
799; RV32-NEXT:    sb zero, 12(a0)
800; RV32-NEXT:    sb zero, 13(a0)
801; RV32-NEXT:    sb zero, 14(a0)
802; RV32-NEXT:    sb zero, 15(a0)
803; RV32-NEXT:    sb zero, 8(a0)
804; RV32-NEXT:    sb zero, 9(a0)
805; RV32-NEXT:    sb zero, 10(a0)
806; RV32-NEXT:    sb zero, 11(a0)
807; RV32-NEXT:    sb zero, 4(a0)
808; RV32-NEXT:    sb zero, 5(a0)
809; RV32-NEXT:    sb zero, 6(a0)
810; RV32-NEXT:    sb zero, 7(a0)
811; RV32-NEXT:    sb zero, 0(a0)
812; RV32-NEXT:    sb zero, 1(a0)
813; RV32-NEXT:    sb zero, 2(a0)
814; RV32-NEXT:    sb zero, 3(a0)
815; RV32-NEXT:    ret
816;
817; RV64-LABEL: bzero_16:
818; RV64:       # %bb.0:
819; RV64-NEXT:    sb zero, 12(a0)
820; RV64-NEXT:    sb zero, 13(a0)
821; RV64-NEXT:    sb zero, 14(a0)
822; RV64-NEXT:    sb zero, 15(a0)
823; RV64-NEXT:    sb zero, 8(a0)
824; RV64-NEXT:    sb zero, 9(a0)
825; RV64-NEXT:    sb zero, 10(a0)
826; RV64-NEXT:    sb zero, 11(a0)
827; RV64-NEXT:    sb zero, 4(a0)
828; RV64-NEXT:    sb zero, 5(a0)
829; RV64-NEXT:    sb zero, 6(a0)
830; RV64-NEXT:    sb zero, 7(a0)
831; RV64-NEXT:    sb zero, 0(a0)
832; RV64-NEXT:    sb zero, 1(a0)
833; RV64-NEXT:    sb zero, 2(a0)
834; RV64-NEXT:    sb zero, 3(a0)
835; RV64-NEXT:    ret
836;
837; RV32-FAST-LABEL: bzero_16:
838; RV32-FAST:       # %bb.0:
839; RV32-FAST-NEXT:    sw zero, 0(a0)
840; RV32-FAST-NEXT:    sw zero, 4(a0)
841; RV32-FAST-NEXT:    sw zero, 8(a0)
842; RV32-FAST-NEXT:    sw zero, 12(a0)
843; RV32-FAST-NEXT:    ret
844;
845; RV64-FAST-LABEL: bzero_16:
846; RV64-FAST:       # %bb.0:
847; RV64-FAST-NEXT:    sd zero, 0(a0)
848; RV64-FAST-NEXT:    sd zero, 8(a0)
849; RV64-FAST-NEXT:    ret
850  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0)
851  ret void
852}
853
854define void @bzero_32(ptr %a) nounwind {
855; RV32-LABEL: bzero_32:
856; RV32:       # %bb.0:
857; RV32-NEXT:    sb zero, 28(a0)
858; RV32-NEXT:    sb zero, 29(a0)
859; RV32-NEXT:    sb zero, 30(a0)
860; RV32-NEXT:    sb zero, 31(a0)
861; RV32-NEXT:    sb zero, 24(a0)
862; RV32-NEXT:    sb zero, 25(a0)
863; RV32-NEXT:    sb zero, 26(a0)
864; RV32-NEXT:    sb zero, 27(a0)
865; RV32-NEXT:    sb zero, 20(a0)
866; RV32-NEXT:    sb zero, 21(a0)
867; RV32-NEXT:    sb zero, 22(a0)
868; RV32-NEXT:    sb zero, 23(a0)
869; RV32-NEXT:    sb zero, 16(a0)
870; RV32-NEXT:    sb zero, 17(a0)
871; RV32-NEXT:    sb zero, 18(a0)
872; RV32-NEXT:    sb zero, 19(a0)
873; RV32-NEXT:    sb zero, 12(a0)
874; RV32-NEXT:    sb zero, 13(a0)
875; RV32-NEXT:    sb zero, 14(a0)
876; RV32-NEXT:    sb zero, 15(a0)
877; RV32-NEXT:    sb zero, 8(a0)
878; RV32-NEXT:    sb zero, 9(a0)
879; RV32-NEXT:    sb zero, 10(a0)
880; RV32-NEXT:    sb zero, 11(a0)
881; RV32-NEXT:    sb zero, 4(a0)
882; RV32-NEXT:    sb zero, 5(a0)
883; RV32-NEXT:    sb zero, 6(a0)
884; RV32-NEXT:    sb zero, 7(a0)
885; RV32-NEXT:    sb zero, 0(a0)
886; RV32-NEXT:    sb zero, 1(a0)
887; RV32-NEXT:    sb zero, 2(a0)
888; RV32-NEXT:    sb zero, 3(a0)
889; RV32-NEXT:    ret
890;
891; RV64-LABEL: bzero_32:
892; RV64:       # %bb.0:
893; RV64-NEXT:    sb zero, 28(a0)
894; RV64-NEXT:    sb zero, 29(a0)
895; RV64-NEXT:    sb zero, 30(a0)
896; RV64-NEXT:    sb zero, 31(a0)
897; RV64-NEXT:    sb zero, 24(a0)
898; RV64-NEXT:    sb zero, 25(a0)
899; RV64-NEXT:    sb zero, 26(a0)
900; RV64-NEXT:    sb zero, 27(a0)
901; RV64-NEXT:    sb zero, 20(a0)
902; RV64-NEXT:    sb zero, 21(a0)
903; RV64-NEXT:    sb zero, 22(a0)
904; RV64-NEXT:    sb zero, 23(a0)
905; RV64-NEXT:    sb zero, 16(a0)
906; RV64-NEXT:    sb zero, 17(a0)
907; RV64-NEXT:    sb zero, 18(a0)
908; RV64-NEXT:    sb zero, 19(a0)
909; RV64-NEXT:    sb zero, 12(a0)
910; RV64-NEXT:    sb zero, 13(a0)
911; RV64-NEXT:    sb zero, 14(a0)
912; RV64-NEXT:    sb zero, 15(a0)
913; RV64-NEXT:    sb zero, 8(a0)
914; RV64-NEXT:    sb zero, 9(a0)
915; RV64-NEXT:    sb zero, 10(a0)
916; RV64-NEXT:    sb zero, 11(a0)
917; RV64-NEXT:    sb zero, 4(a0)
918; RV64-NEXT:    sb zero, 5(a0)
919; RV64-NEXT:    sb zero, 6(a0)
920; RV64-NEXT:    sb zero, 7(a0)
921; RV64-NEXT:    sb zero, 0(a0)
922; RV64-NEXT:    sb zero, 1(a0)
923; RV64-NEXT:    sb zero, 2(a0)
924; RV64-NEXT:    sb zero, 3(a0)
925; RV64-NEXT:    ret
926;
927; RV32-FAST-LABEL: bzero_32:
928; RV32-FAST:       # %bb.0:
929; RV32-FAST-NEXT:    sw zero, 16(a0)
930; RV32-FAST-NEXT:    sw zero, 20(a0)
931; RV32-FAST-NEXT:    sw zero, 24(a0)
932; RV32-FAST-NEXT:    sw zero, 28(a0)
933; RV32-FAST-NEXT:    sw zero, 0(a0)
934; RV32-FAST-NEXT:    sw zero, 4(a0)
935; RV32-FAST-NEXT:    sw zero, 8(a0)
936; RV32-FAST-NEXT:    sw zero, 12(a0)
937; RV32-FAST-NEXT:    ret
938;
939; RV64-FAST-LABEL: bzero_32:
940; RV64-FAST:       # %bb.0:
941; RV64-FAST-NEXT:    sd zero, 0(a0)
942; RV64-FAST-NEXT:    sd zero, 8(a0)
943; RV64-FAST-NEXT:    sd zero, 16(a0)
944; RV64-FAST-NEXT:    sd zero, 24(a0)
945; RV64-FAST-NEXT:    ret
946  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0)
947  ret void
948}
949
950define void @bzero_64(ptr %a) nounwind {
951; RV32-LABEL: bzero_64:
952; RV32:       # %bb.0:
953; RV32-NEXT:    sb zero, 60(a0)
954; RV32-NEXT:    sb zero, 61(a0)
955; RV32-NEXT:    sb zero, 62(a0)
956; RV32-NEXT:    sb zero, 63(a0)
957; RV32-NEXT:    sb zero, 56(a0)
958; RV32-NEXT:    sb zero, 57(a0)
959; RV32-NEXT:    sb zero, 58(a0)
960; RV32-NEXT:    sb zero, 59(a0)
961; RV32-NEXT:    sb zero, 52(a0)
962; RV32-NEXT:    sb zero, 53(a0)
963; RV32-NEXT:    sb zero, 54(a0)
964; RV32-NEXT:    sb zero, 55(a0)
965; RV32-NEXT:    sb zero, 48(a0)
966; RV32-NEXT:    sb zero, 49(a0)
967; RV32-NEXT:    sb zero, 50(a0)
968; RV32-NEXT:    sb zero, 51(a0)
969; RV32-NEXT:    sb zero, 44(a0)
970; RV32-NEXT:    sb zero, 45(a0)
971; RV32-NEXT:    sb zero, 46(a0)
972; RV32-NEXT:    sb zero, 47(a0)
973; RV32-NEXT:    sb zero, 40(a0)
974; RV32-NEXT:    sb zero, 41(a0)
975; RV32-NEXT:    sb zero, 42(a0)
976; RV32-NEXT:    sb zero, 43(a0)
977; RV32-NEXT:    sb zero, 36(a0)
978; RV32-NEXT:    sb zero, 37(a0)
979; RV32-NEXT:    sb zero, 38(a0)
980; RV32-NEXT:    sb zero, 39(a0)
981; RV32-NEXT:    sb zero, 32(a0)
982; RV32-NEXT:    sb zero, 33(a0)
983; RV32-NEXT:    sb zero, 34(a0)
984; RV32-NEXT:    sb zero, 35(a0)
985; RV32-NEXT:    sb zero, 28(a0)
986; RV32-NEXT:    sb zero, 29(a0)
987; RV32-NEXT:    sb zero, 30(a0)
988; RV32-NEXT:    sb zero, 31(a0)
989; RV32-NEXT:    sb zero, 24(a0)
990; RV32-NEXT:    sb zero, 25(a0)
991; RV32-NEXT:    sb zero, 26(a0)
992; RV32-NEXT:    sb zero, 27(a0)
993; RV32-NEXT:    sb zero, 20(a0)
994; RV32-NEXT:    sb zero, 21(a0)
995; RV32-NEXT:    sb zero, 22(a0)
996; RV32-NEXT:    sb zero, 23(a0)
997; RV32-NEXT:    sb zero, 16(a0)
998; RV32-NEXT:    sb zero, 17(a0)
999; RV32-NEXT:    sb zero, 18(a0)
1000; RV32-NEXT:    sb zero, 19(a0)
1001; RV32-NEXT:    sb zero, 12(a0)
1002; RV32-NEXT:    sb zero, 13(a0)
1003; RV32-NEXT:    sb zero, 14(a0)
1004; RV32-NEXT:    sb zero, 15(a0)
1005; RV32-NEXT:    sb zero, 8(a0)
1006; RV32-NEXT:    sb zero, 9(a0)
1007; RV32-NEXT:    sb zero, 10(a0)
1008; RV32-NEXT:    sb zero, 11(a0)
1009; RV32-NEXT:    sb zero, 4(a0)
1010; RV32-NEXT:    sb zero, 5(a0)
1011; RV32-NEXT:    sb zero, 6(a0)
1012; RV32-NEXT:    sb zero, 7(a0)
1013; RV32-NEXT:    sb zero, 0(a0)
1014; RV32-NEXT:    sb zero, 1(a0)
1015; RV32-NEXT:    sb zero, 2(a0)
1016; RV32-NEXT:    sb zero, 3(a0)
1017; RV32-NEXT:    ret
1018;
1019; RV64-LABEL: bzero_64:
1020; RV64:       # %bb.0:
1021; RV64-NEXT:    sb zero, 60(a0)
1022; RV64-NEXT:    sb zero, 61(a0)
1023; RV64-NEXT:    sb zero, 62(a0)
1024; RV64-NEXT:    sb zero, 63(a0)
1025; RV64-NEXT:    sb zero, 56(a0)
1026; RV64-NEXT:    sb zero, 57(a0)
1027; RV64-NEXT:    sb zero, 58(a0)
1028; RV64-NEXT:    sb zero, 59(a0)
1029; RV64-NEXT:    sb zero, 52(a0)
1030; RV64-NEXT:    sb zero, 53(a0)
1031; RV64-NEXT:    sb zero, 54(a0)
1032; RV64-NEXT:    sb zero, 55(a0)
1033; RV64-NEXT:    sb zero, 48(a0)
1034; RV64-NEXT:    sb zero, 49(a0)
1035; RV64-NEXT:    sb zero, 50(a0)
1036; RV64-NEXT:    sb zero, 51(a0)
1037; RV64-NEXT:    sb zero, 44(a0)
1038; RV64-NEXT:    sb zero, 45(a0)
1039; RV64-NEXT:    sb zero, 46(a0)
1040; RV64-NEXT:    sb zero, 47(a0)
1041; RV64-NEXT:    sb zero, 40(a0)
1042; RV64-NEXT:    sb zero, 41(a0)
1043; RV64-NEXT:    sb zero, 42(a0)
1044; RV64-NEXT:    sb zero, 43(a0)
1045; RV64-NEXT:    sb zero, 36(a0)
1046; RV64-NEXT:    sb zero, 37(a0)
1047; RV64-NEXT:    sb zero, 38(a0)
1048; RV64-NEXT:    sb zero, 39(a0)
1049; RV64-NEXT:    sb zero, 32(a0)
1050; RV64-NEXT:    sb zero, 33(a0)
1051; RV64-NEXT:    sb zero, 34(a0)
1052; RV64-NEXT:    sb zero, 35(a0)
1053; RV64-NEXT:    sb zero, 28(a0)
1054; RV64-NEXT:    sb zero, 29(a0)
1055; RV64-NEXT:    sb zero, 30(a0)
1056; RV64-NEXT:    sb zero, 31(a0)
1057; RV64-NEXT:    sb zero, 24(a0)
1058; RV64-NEXT:    sb zero, 25(a0)
1059; RV64-NEXT:    sb zero, 26(a0)
1060; RV64-NEXT:    sb zero, 27(a0)
1061; RV64-NEXT:    sb zero, 20(a0)
1062; RV64-NEXT:    sb zero, 21(a0)
1063; RV64-NEXT:    sb zero, 22(a0)
1064; RV64-NEXT:    sb zero, 23(a0)
1065; RV64-NEXT:    sb zero, 16(a0)
1066; RV64-NEXT:    sb zero, 17(a0)
1067; RV64-NEXT:    sb zero, 18(a0)
1068; RV64-NEXT:    sb zero, 19(a0)
1069; RV64-NEXT:    sb zero, 12(a0)
1070; RV64-NEXT:    sb zero, 13(a0)
1071; RV64-NEXT:    sb zero, 14(a0)
1072; RV64-NEXT:    sb zero, 15(a0)
1073; RV64-NEXT:    sb zero, 8(a0)
1074; RV64-NEXT:    sb zero, 9(a0)
1075; RV64-NEXT:    sb zero, 10(a0)
1076; RV64-NEXT:    sb zero, 11(a0)
1077; RV64-NEXT:    sb zero, 4(a0)
1078; RV64-NEXT:    sb zero, 5(a0)
1079; RV64-NEXT:    sb zero, 6(a0)
1080; RV64-NEXT:    sb zero, 7(a0)
1081; RV64-NEXT:    sb zero, 0(a0)
1082; RV64-NEXT:    sb zero, 1(a0)
1083; RV64-NEXT:    sb zero, 2(a0)
1084; RV64-NEXT:    sb zero, 3(a0)
1085; RV64-NEXT:    ret
1086;
1087; RV32-FAST-LABEL: bzero_64:
1088; RV32-FAST:       # %bb.0:
1089; RV32-FAST-NEXT:    sw zero, 48(a0)
1090; RV32-FAST-NEXT:    sw zero, 52(a0)
1091; RV32-FAST-NEXT:    sw zero, 56(a0)
1092; RV32-FAST-NEXT:    sw zero, 60(a0)
1093; RV32-FAST-NEXT:    sw zero, 32(a0)
1094; RV32-FAST-NEXT:    sw zero, 36(a0)
1095; RV32-FAST-NEXT:    sw zero, 40(a0)
1096; RV32-FAST-NEXT:    sw zero, 44(a0)
1097; RV32-FAST-NEXT:    sw zero, 16(a0)
1098; RV32-FAST-NEXT:    sw zero, 20(a0)
1099; RV32-FAST-NEXT:    sw zero, 24(a0)
1100; RV32-FAST-NEXT:    sw zero, 28(a0)
1101; RV32-FAST-NEXT:    sw zero, 0(a0)
1102; RV32-FAST-NEXT:    sw zero, 4(a0)
1103; RV32-FAST-NEXT:    sw zero, 8(a0)
1104; RV32-FAST-NEXT:    sw zero, 12(a0)
1105; RV32-FAST-NEXT:    ret
1106;
1107; RV64-FAST-LABEL: bzero_64:
1108; RV64-FAST:       # %bb.0:
1109; RV64-FAST-NEXT:    sd zero, 32(a0)
1110; RV64-FAST-NEXT:    sd zero, 40(a0)
1111; RV64-FAST-NEXT:    sd zero, 48(a0)
1112; RV64-FAST-NEXT:    sd zero, 56(a0)
1113; RV64-FAST-NEXT:    sd zero, 0(a0)
1114; RV64-FAST-NEXT:    sd zero, 8(a0)
1115; RV64-FAST-NEXT:    sd zero, 16(a0)
1116; RV64-FAST-NEXT:    sd zero, 24(a0)
1117; RV64-FAST-NEXT:    ret
1118  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
1119  ret void
1120}
1121
1122; /////////////////////////////////////////////////////////////////////////////
1123
1124define void @aligned_bzero_2(ptr %a) nounwind {
1125; RV32-BOTH-LABEL: aligned_bzero_2:
1126; RV32-BOTH:       # %bb.0:
1127; RV32-BOTH-NEXT:    sh zero, 0(a0)
1128; RV32-BOTH-NEXT:    ret
1129;
1130; RV64-BOTH-LABEL: aligned_bzero_2:
1131; RV64-BOTH:       # %bb.0:
1132; RV64-BOTH-NEXT:    sh zero, 0(a0)
1133; RV64-BOTH-NEXT:    ret
1134  tail call void @llvm.memset.inline.p0.i64(ptr align 2 %a, i8 0, i64 2, i1 0)
1135  ret void
1136}
1137
1138define void @aligned_bzero_4(ptr %a) nounwind {
1139; RV32-BOTH-LABEL: aligned_bzero_4:
1140; RV32-BOTH:       # %bb.0:
1141; RV32-BOTH-NEXT:    sw zero, 0(a0)
1142; RV32-BOTH-NEXT:    ret
1143;
1144; RV64-BOTH-LABEL: aligned_bzero_4:
1145; RV64-BOTH:       # %bb.0:
1146; RV64-BOTH-NEXT:    sw zero, 0(a0)
1147; RV64-BOTH-NEXT:    ret
1148  tail call void @llvm.memset.inline.p0.i64(ptr align 4 %a, i8 0, i64 4, i1 0)
1149  ret void
1150}
1151
1152define void @aligned_bzero_8(ptr %a) nounwind {
1153; RV32-BOTH-LABEL: aligned_bzero_8:
1154; RV32-BOTH:       # %bb.0:
1155; RV32-BOTH-NEXT:    sw zero, 0(a0)
1156; RV32-BOTH-NEXT:    sw zero, 4(a0)
1157; RV32-BOTH-NEXT:    ret
1158;
1159; RV64-BOTH-LABEL: aligned_bzero_8:
1160; RV64-BOTH:       # %bb.0:
1161; RV64-BOTH-NEXT:    sd zero, 0(a0)
1162; RV64-BOTH-NEXT:    ret
1163  tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 0, i64 8, i1 0)
1164  ret void
1165}
1166
1167
1168define void @aligned_bzero_16(ptr %a) nounwind {
1169; RV32-BOTH-LABEL: aligned_bzero_16:
1170; RV32-BOTH:       # %bb.0:
1171; RV32-BOTH-NEXT:    sw zero, 0(a0)
1172; RV32-BOTH-NEXT:    sw zero, 4(a0)
1173; RV32-BOTH-NEXT:    sw zero, 8(a0)
1174; RV32-BOTH-NEXT:    sw zero, 12(a0)
1175; RV32-BOTH-NEXT:    ret
1176;
1177; RV64-BOTH-LABEL: aligned_bzero_16:
1178; RV64-BOTH:       # %bb.0:
1179; RV64-BOTH-NEXT:    sd zero, 0(a0)
1180; RV64-BOTH-NEXT:    sd zero, 8(a0)
1181; RV64-BOTH-NEXT:    ret
1182  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0)
1183  ret void
1184}
1185
1186define void @aligned_bzero_32(ptr %a) nounwind {
1187; RV32-BOTH-LABEL: aligned_bzero_32:
1188; RV32-BOTH:       # %bb.0:
1189; RV32-BOTH-NEXT:    sw zero, 16(a0)
1190; RV32-BOTH-NEXT:    sw zero, 20(a0)
1191; RV32-BOTH-NEXT:    sw zero, 24(a0)
1192; RV32-BOTH-NEXT:    sw zero, 28(a0)
1193; RV32-BOTH-NEXT:    sw zero, 0(a0)
1194; RV32-BOTH-NEXT:    sw zero, 4(a0)
1195; RV32-BOTH-NEXT:    sw zero, 8(a0)
1196; RV32-BOTH-NEXT:    sw zero, 12(a0)
1197; RV32-BOTH-NEXT:    ret
1198;
1199; RV64-BOTH-LABEL: aligned_bzero_32:
1200; RV64-BOTH:       # %bb.0:
1201; RV64-BOTH-NEXT:    sd zero, 0(a0)
1202; RV64-BOTH-NEXT:    sd zero, 8(a0)
1203; RV64-BOTH-NEXT:    sd zero, 16(a0)
1204; RV64-BOTH-NEXT:    sd zero, 24(a0)
1205; RV64-BOTH-NEXT:    ret
1206  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0)
1207  ret void
1208}
1209
1210define void @aligned_bzero_64(ptr %a) nounwind {
1211; RV32-BOTH-LABEL: aligned_bzero_64:
1212; RV32-BOTH:       # %bb.0:
1213; RV32-BOTH-NEXT:    sw zero, 48(a0)
1214; RV32-BOTH-NEXT:    sw zero, 52(a0)
1215; RV32-BOTH-NEXT:    sw zero, 56(a0)
1216; RV32-BOTH-NEXT:    sw zero, 60(a0)
1217; RV32-BOTH-NEXT:    sw zero, 32(a0)
1218; RV32-BOTH-NEXT:    sw zero, 36(a0)
1219; RV32-BOTH-NEXT:    sw zero, 40(a0)
1220; RV32-BOTH-NEXT:    sw zero, 44(a0)
1221; RV32-BOTH-NEXT:    sw zero, 16(a0)
1222; RV32-BOTH-NEXT:    sw zero, 20(a0)
1223; RV32-BOTH-NEXT:    sw zero, 24(a0)
1224; RV32-BOTH-NEXT:    sw zero, 28(a0)
1225; RV32-BOTH-NEXT:    sw zero, 0(a0)
1226; RV32-BOTH-NEXT:    sw zero, 4(a0)
1227; RV32-BOTH-NEXT:    sw zero, 8(a0)
1228; RV32-BOTH-NEXT:    sw zero, 12(a0)
1229; RV32-BOTH-NEXT:    ret
1230;
1231; RV64-BOTH-LABEL: aligned_bzero_64:
1232; RV64-BOTH:       # %bb.0:
1233; RV64-BOTH-NEXT:    sd zero, 32(a0)
1234; RV64-BOTH-NEXT:    sd zero, 40(a0)
1235; RV64-BOTH-NEXT:    sd zero, 48(a0)
1236; RV64-BOTH-NEXT:    sd zero, 56(a0)
1237; RV64-BOTH-NEXT:    sd zero, 0(a0)
1238; RV64-BOTH-NEXT:    sd zero, 8(a0)
1239; RV64-BOTH-NEXT:    sd zero, 16(a0)
1240; RV64-BOTH-NEXT:    sd zero, 24(a0)
1241; RV64-BOTH-NEXT:    ret
1242  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
1243  ret void
1244}
1245
1246
1247; /////////////////////////////////////////////////////////////////////////////
1248; Usual overlap tricks
1249
1250define void @aligned_bzero_7(ptr %a) nounwind {
1251; RV32-LABEL: aligned_bzero_7:
1252; RV32:       # %bb.0:
1253; RV32-NEXT:    sw zero, 0(a0)
1254; RV32-NEXT:    sh zero, 4(a0)
1255; RV32-NEXT:    sb zero, 6(a0)
1256; RV32-NEXT:    ret
1257;
1258; RV64-LABEL: aligned_bzero_7:
1259; RV64:       # %bb.0:
1260; RV64-NEXT:    sw zero, 0(a0)
1261; RV64-NEXT:    sh zero, 4(a0)
1262; RV64-NEXT:    sb zero, 6(a0)
1263; RV64-NEXT:    ret
1264;
1265; RV32-FAST-LABEL: aligned_bzero_7:
1266; RV32-FAST:       # %bb.0:
1267; RV32-FAST-NEXT:    sw zero, 3(a0)
1268; RV32-FAST-NEXT:    sw zero, 0(a0)
1269; RV32-FAST-NEXT:    ret
1270;
1271; RV64-FAST-LABEL: aligned_bzero_7:
1272; RV64-FAST:       # %bb.0:
1273; RV64-FAST-NEXT:    sw zero, 3(a0)
1274; RV64-FAST-NEXT:    sw zero, 0(a0)
1275; RV64-FAST-NEXT:    ret
1276  tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 0, i64 7, i1 0)
1277  ret void
1278}
1279
1280define void @aligned_bzero_15(ptr %a) nounwind {
1281; RV32-LABEL: aligned_bzero_15:
1282; RV32:       # %bb.0:
1283; RV32-NEXT:    sb zero, 14(a0)
1284; RV32-NEXT:    sw zero, 0(a0)
1285; RV32-NEXT:    sw zero, 4(a0)
1286; RV32-NEXT:    sw zero, 8(a0)
1287; RV32-NEXT:    sh zero, 12(a0)
1288; RV32-NEXT:    ret
1289;
1290; RV64-LABEL: aligned_bzero_15:
1291; RV64:       # %bb.0:
1292; RV64-NEXT:    sd zero, 0(a0)
1293; RV64-NEXT:    sw zero, 8(a0)
1294; RV64-NEXT:    sh zero, 12(a0)
1295; RV64-NEXT:    sb zero, 14(a0)
1296; RV64-NEXT:    ret
1297;
1298; RV32-FAST-LABEL: aligned_bzero_15:
1299; RV32-FAST:       # %bb.0:
1300; RV32-FAST-NEXT:    sw zero, 11(a0)
1301; RV32-FAST-NEXT:    sw zero, 0(a0)
1302; RV32-FAST-NEXT:    sw zero, 4(a0)
1303; RV32-FAST-NEXT:    sw zero, 8(a0)
1304; RV32-FAST-NEXT:    ret
1305;
1306; RV64-FAST-LABEL: aligned_bzero_15:
1307; RV64-FAST:       # %bb.0:
1308; RV64-FAST-NEXT:    sd zero, 7(a0)
1309; RV64-FAST-NEXT:    sd zero, 0(a0)
1310; RV64-FAST-NEXT:    ret
1311  tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 0, i64 15, i1 0)
1312  ret void
1313}
1314