xref: /minix3/common/lib/libc/arch/arm/string/memset_naive.S (revision 84d9c625bfea59e274550651111ae9edfdc40fbd)
1/*-
2 * Copyright (c) 2013 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas of 3am Software Foundry.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32RCSID("$NetBSD: memset_naive.S,v 1.1 2013/01/08 20:15:00 matt Exp $")
33
34/*
35 * This isn't quite as simple/short as it could be but the truly trivial
36 * memset was an order of magnitude slower than this.
37 */
38
39ENTRY(memset)
40/* LINTSTUB: void *memset(void *, int, size_t) */
41	mov	ip, r0			/* need to preserve r0 */
42	cmp	r2, #10			/* 10 bytes or less? */
43	ble	.Lbyte_by_byte		/*    yes, bytewise is faster */
44	ands	r3, r1, #0xff		/* we are dealing with bytes */
45	orrne	r3, r3, r3, lsl #8	/* move value into 2nd byte lane */
46	orrne	r3, r3, r3, lsl #16	/* move value into all byte lanes */
47	mov	r1, r2			/* move count */
48	ands	r2, ip, #7		/* are we dword aligned? */
49	beq	1f			/*   yes we are */
50	rsb	r2, r2, #8		/* how many bytes until aligned? */
51	sub	r1, r1, r2		/* subtract from count */
52	tst	ip, #1			/* halfword aligned? */
53	strneb	r3, [ip], #1		/*   nope, write a byte */
54	tst	ip, #2			/* word aligned? */
55	strneh	r3, [ip], #2		/*   nope, write a halfword */
56	tst	ip, #4			/* dword aligned? */
57	strne	r3, [ip], #4		/*   nope, write a word */
58	/*
59	 * At this point, we are dword aligned.
60	 */
611:	mov	r2, r3			/* duplicate fill value */
622:	subs	r1, r1, #16		/* can we write 16 bytes? */
63	stmgeia	ip!, {r2,r3}		/*   yes, write the first 8 of them */
64	stmgeia	ip!, {r2,r3}		/*   yes, write the second 8 of them */
65	bgt	2b			/* more left to fill */
66	RETc(eq)			/*   no, return */
67	/*
68	 * Our count went negative but the bits below 16 haven't changed.
69	 * So we are effectively testing modulo 16.
70	 */
71	tst	r1, #8			/* can we write at least 8 bytes? */
72	stmneia	ip!, {r2,r3}		/*   so do it */
73	tst	r1, #4			/* can we write at least 4 bytes? */
74	strne	r3, [ip], #4		/*   so do it */
75	tst	r1, #2			/* can we write at least 2 bytes? */
76	strneh	r3, [ip], #2		/*   so do it */
77	tst	r1, #1			/* can we write 1 bytes? */
78	strneb	r3, [ip], #1		/*   so do it */
79	RET				/* return */
80
81.Lbyte_by_byte:
82	subs	r2, r2, #1		/* can we write a byte? */
83	RETc(lt)			/*   no, return */
84	strb	r3, [ip], #1		/* write a byte */
85	b	.Lbyte_by_byte		/* do next byte */
86END(memset)
87