xref: /llvm-project/libc/AOR_v20.02/string/arm/memset.S (revision 0928368f623a0f885894f9c3ef1b740b060c0d9c)
1/*
2 * memset - fill memory with a constant
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9/*
10   Written by Dave Gilbert <david.gilbert@linaro.org>
11
12   This memset routine is optimised on a Cortex-A9 and should work on
13   all ARMv7 processors.
14
15 */
16
17	.syntax unified
18	.arch armv7-a
19
20@ 2011-08-30 david.gilbert@linaro.org
21@    Extracted from local git 2f11b436
22
23@ this lets us check a flag in a 00/ff byte easily in either endianness
24#ifdef __ARMEB__
25#define CHARTSTMASK(c) 1<<(31-(c*8))
26#else
27#define CHARTSTMASK(c) 1<<(c*8)
28#endif
29	.text
30	.thumb
31
32@ ---------------------------------------------------------------------------
33	.thumb_func
34	.align 2
35	.p2align 4,,15
36	.global __memset_arm
37	.type __memset_arm,%function
38__memset_arm:
39	@ r0 = address
40	@ r1 = character
41	@ r2 = count
42	@ returns original address in r0
43
44	mov	r3, r0		@ Leave r0 alone
45	cbz	r2, 10f		@ Exit if 0 length
46
47	tst	r0, #7
48	beq	2f		@ Already aligned
49
50	@ Ok, so we're misaligned here
511:
52	strb	r1, [r3], #1
53	subs	r2,r2,#1
54	tst	r3, #7
55	cbz	r2, 10f		@ Exit if we hit the end
56	bne	1b		@ go round again if still misaligned
57
582:
59	@ OK, so we're aligned
60	push	{r4,r5,r6,r7}
61	bics	r4, r2, #15	@ if less than 16 bytes then need to finish it off
62	beq	5f
63
643:
65	@ POSIX says that ch is cast to an unsigned char.  A uxtb is one
66	@ byte and takes two cycles, where an AND is four bytes but one
67	@ cycle.
68	and	r1, #0xFF
69	orr	r1, r1, r1, lsl#8	@ Same character into all bytes
70	orr	r1, r1, r1, lsl#16
71	mov	r5,r1
72	mov	r6,r1
73	mov	r7,r1
74
754:
76	subs	r4,r4,#16
77	stmia	r3!,{r1,r5,r6,r7}
78	bne	4b
79	and	r2,r2,#15
80
81	@ At this point we're still aligned and we have upto align-1 bytes left to right
82	@ we can avoid some of the byte-at-a time now by testing for some big chunks
83	tst	r2,#8
84	itt	ne
85	subne	r2,r2,#8
86	stmiane	r3!,{r1,r5}
87
885:
89	pop	{r4,r5,r6,r7}
90	cbz	r2, 10f
91
92	@ Got to do any last < alignment bytes
936:
94	subs	r2,r2,#1
95	strb	r1,[r3],#1
96	bne	6b
97
9810:
99	bx	lr		@ goodbye
100	.size	__memset_arm, . - __memset_arm
101