xref: /freebsd-src/contrib/cortex-strings/src/arm/memset.S (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner/* Copyright (c) 2010-2011, Linaro Limited
2*09a53ad8SAndrew Turner   All rights reserved.
3*09a53ad8SAndrew Turner
4*09a53ad8SAndrew Turner   Redistribution and use in source and binary forms, with or without
5*09a53ad8SAndrew Turner   modification, are permitted provided that the following conditions
6*09a53ad8SAndrew Turner   are met:
7*09a53ad8SAndrew Turner
8*09a53ad8SAndrew Turner      * Redistributions of source code must retain the above copyright
9*09a53ad8SAndrew Turner      notice, this list of conditions and the following disclaimer.
10*09a53ad8SAndrew Turner
11*09a53ad8SAndrew Turner      * Redistributions in binary form must reproduce the above copyright
12*09a53ad8SAndrew Turner      notice, this list of conditions and the following disclaimer in the
13*09a53ad8SAndrew Turner      documentation and/or other materials provided with the distribution.
14*09a53ad8SAndrew Turner
15*09a53ad8SAndrew Turner      * Neither the name of Linaro Limited nor the names of its
16*09a53ad8SAndrew Turner      contributors may be used to endorse or promote products derived
17*09a53ad8SAndrew Turner      from this software without specific prior written permission.
18*09a53ad8SAndrew Turner
19*09a53ad8SAndrew Turner   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20*09a53ad8SAndrew Turner   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21*09a53ad8SAndrew Turner   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22*09a53ad8SAndrew Turner   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23*09a53ad8SAndrew Turner   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24*09a53ad8SAndrew Turner   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25*09a53ad8SAndrew Turner   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*09a53ad8SAndrew Turner   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*09a53ad8SAndrew Turner   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*09a53ad8SAndrew Turner   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29*09a53ad8SAndrew Turner   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*09a53ad8SAndrew Turner */
31*09a53ad8SAndrew Turner
32*09a53ad8SAndrew Turner/*
33*09a53ad8SAndrew Turner   Written by Dave Gilbert <david.gilbert@linaro.org>
34*09a53ad8SAndrew Turner
35*09a53ad8SAndrew Turner   This memset routine is optimised on a Cortex-A9 and should work on
36*09a53ad8SAndrew Turner   all ARMv7 processors.
37*09a53ad8SAndrew Turner
38*09a53ad8SAndrew Turner */
39*09a53ad8SAndrew Turner
40*09a53ad8SAndrew Turner	.syntax unified
41*09a53ad8SAndrew Turner	.arch armv7-a
42*09a53ad8SAndrew Turner
43*09a53ad8SAndrew Turner@ 2011-08-30 david.gilbert@linaro.org
44*09a53ad8SAndrew Turner@    Extracted from local git 2f11b436
45*09a53ad8SAndrew Turner
46*09a53ad8SAndrew Turner@ this lets us check a flag in a 00/ff byte easily in either endianness
47*09a53ad8SAndrew Turner#ifdef __ARMEB__
48*09a53ad8SAndrew Turner#define CHARTSTMASK(c) 1<<(31-(c*8))
49*09a53ad8SAndrew Turner#else
50*09a53ad8SAndrew Turner#define CHARTSTMASK(c) 1<<(c*8)
51*09a53ad8SAndrew Turner#endif
52*09a53ad8SAndrew Turner	.text
53*09a53ad8SAndrew Turner	.thumb
54*09a53ad8SAndrew Turner
55*09a53ad8SAndrew Turner@ ---------------------------------------------------------------------------
56*09a53ad8SAndrew Turner	.thumb_func
57*09a53ad8SAndrew Turner	.align 2
58*09a53ad8SAndrew Turner	.p2align 4,,15
59*09a53ad8SAndrew Turner	.global memset
60*09a53ad8SAndrew Turner	.type memset,%function
61*09a53ad8SAndrew Turnermemset:
62*09a53ad8SAndrew Turner	@ r0 = address
63*09a53ad8SAndrew Turner	@ r1 = character
64*09a53ad8SAndrew Turner	@ r2 = count
65*09a53ad8SAndrew Turner	@ returns original address in r0
66*09a53ad8SAndrew Turner
67*09a53ad8SAndrew Turner	mov	r3, r0		@ Leave r0 alone
68*09a53ad8SAndrew Turner	cbz	r2, 10f		@ Exit if 0 length
69*09a53ad8SAndrew Turner
70*09a53ad8SAndrew Turner	tst	r0, #7
71*09a53ad8SAndrew Turner	beq	2f		@ Already aligned
72*09a53ad8SAndrew Turner
73*09a53ad8SAndrew Turner	@ Ok, so we're misaligned here
74*09a53ad8SAndrew Turner1:
75*09a53ad8SAndrew Turner	strb	r1, [r3], #1
76*09a53ad8SAndrew Turner	subs	r2,r2,#1
77*09a53ad8SAndrew Turner	tst	r3, #7
78*09a53ad8SAndrew Turner	cbz	r2, 10f		@ Exit if we hit the end
79*09a53ad8SAndrew Turner	bne	1b		@ go round again if still misaligned
80*09a53ad8SAndrew Turner
81*09a53ad8SAndrew Turner2:
82*09a53ad8SAndrew Turner	@ OK, so we're aligned
83*09a53ad8SAndrew Turner	push	{r4,r5,r6,r7}
84*09a53ad8SAndrew Turner	bics	r4, r2, #15	@ if less than 16 bytes then need to finish it off
85*09a53ad8SAndrew Turner	beq	5f
86*09a53ad8SAndrew Turner
87*09a53ad8SAndrew Turner3:
88*09a53ad8SAndrew Turner	@ POSIX says that ch is cast to an unsigned char.  A uxtb is one
89*09a53ad8SAndrew Turner	@ byte and takes two cycles, where an AND is four bytes but one
90*09a53ad8SAndrew Turner	@ cycle.
91*09a53ad8SAndrew Turner	and	r1, #0xFF
92*09a53ad8SAndrew Turner	orr	r1, r1, r1, lsl#8	@ Same character into all bytes
93*09a53ad8SAndrew Turner	orr	r1, r1, r1, lsl#16
94*09a53ad8SAndrew Turner	mov	r5,r1
95*09a53ad8SAndrew Turner	mov	r6,r1
96*09a53ad8SAndrew Turner	mov	r7,r1
97*09a53ad8SAndrew Turner
98*09a53ad8SAndrew Turner4:
99*09a53ad8SAndrew Turner	subs	r4,r4,#16
100*09a53ad8SAndrew Turner	stmia	r3!,{r1,r5,r6,r7}
101*09a53ad8SAndrew Turner	bne	4b
102*09a53ad8SAndrew Turner	and	r2,r2,#15
103*09a53ad8SAndrew Turner
104*09a53ad8SAndrew Turner	@ At this point we're still aligned and we have upto align-1 bytes left to right
105*09a53ad8SAndrew Turner	@ we can avoid some of the byte-at-a time now by testing for some big chunks
106*09a53ad8SAndrew Turner	tst	r2,#8
107*09a53ad8SAndrew Turner	itt	ne
108*09a53ad8SAndrew Turner	subne	r2,r2,#8
109*09a53ad8SAndrew Turner	stmiane	r3!,{r1,r5}
110*09a53ad8SAndrew Turner
111*09a53ad8SAndrew Turner5:
112*09a53ad8SAndrew Turner	pop	{r4,r5,r6,r7}
113*09a53ad8SAndrew Turner	cbz	r2, 10f
114*09a53ad8SAndrew Turner
115*09a53ad8SAndrew Turner	@ Got to do any last < alignment bytes
116*09a53ad8SAndrew Turner6:
117*09a53ad8SAndrew Turner	subs	r2,r2,#1
118*09a53ad8SAndrew Turner	strb	r1,[r3],#1
119*09a53ad8SAndrew Turner	bne	6b
120*09a53ad8SAndrew Turner
121*09a53ad8SAndrew Turner10:
122*09a53ad8SAndrew Turner	bx	lr		@ goodbye
123