xref: /netbsd-src/common/lib/libc/arch/or1k/string/memcpy.c (revision 27620987b8618edefe3919667fcfb2a0eb02578b)
1*27620987Smatt /* $NetBSD: memcpy.c,v 1.1 2014/09/03 19:34:25 matt Exp $ */
2*27620987Smatt /*-
3*27620987Smatt  * Copyright (c) 2014 The NetBSD Foundation, Inc.
4*27620987Smatt  * All rights reserved.
5*27620987Smatt  *
6*27620987Smatt  * This code is derived from software contributed to The NetBSD Foundation
7*27620987Smatt  * by Matt Thomas of 3am Software Foundry.
8*27620987Smatt  *
9*27620987Smatt  * Redistribution and use in source and binary forms, with or without
10*27620987Smatt  * modification, are permitted provided that the following conditions
11*27620987Smatt  * are met:
12*27620987Smatt  * 1. Redistributions of source code must retain the above copyright
13*27620987Smatt  *    notice, this list of conditions and the following disclaimer.
14*27620987Smatt  * 2. Redistributions in binary form must reproduce the above copyright
15*27620987Smatt  *    notice, this list of conditions and the following disclaimer in the
16*27620987Smatt  *    documentation and/or other materials provided with the distribution.
17*27620987Smatt  *
18*27620987Smatt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19*27620987Smatt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20*27620987Smatt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21*27620987Smatt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22*27620987Smatt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23*27620987Smatt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24*27620987Smatt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25*27620987Smatt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26*27620987Smatt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27*27620987Smatt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28*27620987Smatt  * POSSIBILITY OF SUCH DAMAGE.
29*27620987Smatt  */
30*27620987Smatt 
31*27620987Smatt #include <sys/cdefs.h>
32*27620987Smatt 
33*27620987Smatt __RCSID("$NetBSD: memcpy.c,v 1.1 2014/09/03 19:34:25 matt Exp $");
34*27620987Smatt 
35*27620987Smatt #include <stddef.h>
36*27620987Smatt #include <stdint.h>
37*27620987Smatt #include <string.h>
38*27620987Smatt 
39*27620987Smatt static inline unsigned long
combine_words(unsigned long w1,unsigned long w2,int shift1,int shift2)40*27620987Smatt combine_words(unsigned long w1, unsigned long w2, int shift1, int shift2)
41*27620987Smatt {
42*27620987Smatt #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43*27620987Smatt 	return (w1 << shift1) | (w2 >> shift2);
44*27620987Smatt #else
45*27620987Smatt 	return (w1 >> shift1) | (w2 << shift2);
46*27620987Smatt #endif
47*27620987Smatt }
48*27620987Smatt 
49*27620987Smatt void *
memcpy(void * restrict a,const void * restrict b,size_t len)50*27620987Smatt memcpy(void * restrict a, const void * restrict b, size_t len)
51*27620987Smatt {
52*27620987Smatt 	const unsigned char *cb = b;
53*27620987Smatt 	unsigned char *ca = a;
54*27620987Smatt 
55*27620987Smatt 	if (len == 0)
56*27620987Smatt 		return a;
57*27620987Smatt 
58*27620987Smatt 	/*
59*27620987Smatt 	 * Make sure the destination is long aligned.
60*27620987Smatt 	 */
61*27620987Smatt 	while ((uintptr_t)ca & (sizeof(long) - 1)) {
62*27620987Smatt 		*ca++ = *cb++;
63*27620987Smatt 		if (--len == 0)
64*27620987Smatt 			return a;
65*27620987Smatt 	}
66*27620987Smatt 
67*27620987Smatt 	unsigned long *la = (long *)ca;
68*27620987Smatt 	const int offset = (uintptr_t)cb & (sizeof(*la) - 1);
69*27620987Smatt 	const unsigned long *lb = (const unsigned long *) (cb - offset);
70*27620987Smatt 	unsigned long * const ea = la + len / sizeof(*la);
71*27620987Smatt 
72*27620987Smatt 	if (offset == 0) {
73*27620987Smatt 		/*
74*27620987Smatt 		 * a & b are now both long alignment.
75*27620987Smatt 		 * First try to copy 4 longs at a time,
76*27620987Smatt 		 */
77*27620987Smatt 		for (; la + 4 <= ea; la += 4, lb += 4) {
78*27620987Smatt 			la[0] = lb[0];
79*27620987Smatt 			la[1] = lb[1];
80*27620987Smatt 			la[2] = lb[2];
81*27620987Smatt 			la[3] = lb[3];
82*27620987Smatt 		}
83*27620987Smatt 		/*
84*27620987Smatt 		 * Now try to copy one long at a time.
85*27620987Smatt 		 */
86*27620987Smatt 		while (la <= ea) {
87*27620987Smatt 			*la++ = *lb++;
88*27620987Smatt 		}
89*27620987Smatt 	} else {
90*27620987Smatt 		const int shift1 = offset * 8;
91*27620987Smatt 		const int shift2 = sizeof(*la) * 8 - shift1;
92*27620987Smatt 		unsigned long w1 = *lb++;
93*27620987Smatt 
94*27620987Smatt 		/*
95*27620987Smatt 		 * We try to write 4 words per loop.
96*27620987Smatt 		 */
97*27620987Smatt 		for (; la + 4 <= ea; la += 4, lb += 4) {
98*27620987Smatt 			unsigned long w2 = lb[0];
99*27620987Smatt 
100*27620987Smatt 			la[0] = combine_words(w1, w2, shift1, shift2);
101*27620987Smatt 
102*27620987Smatt 			w1 = lb[1];
103*27620987Smatt 
104*27620987Smatt 			la[1] = combine_words(w2, w1, shift1, shift2);
105*27620987Smatt 
106*27620987Smatt 			w2 = lb[2];
107*27620987Smatt 
108*27620987Smatt 			la[2] = combine_words(w1, w2, shift1, shift2);
109*27620987Smatt 
110*27620987Smatt 			w1 = lb[3];
111*27620987Smatt 
112*27620987Smatt 			la[3] = combine_words(w2, w1, shift1, shift2);
113*27620987Smatt 		}
114*27620987Smatt 
115*27620987Smatt 		/*
116*27620987Smatt 		 * Now try to copy one long at a time.
117*27620987Smatt 		 */
118*27620987Smatt 		while (la <= ea) {
119*27620987Smatt 			unsigned long w2 = *lb++;
120*27620987Smatt 
121*27620987Smatt 			*la++ = combine_words(w1, w2, shift1, shift2);
122*27620987Smatt 
123*27620987Smatt 			w1 = w2;
124*27620987Smatt 		}
125*27620987Smatt 	}
126*27620987Smatt 	len &= sizeof(*la) - 1;
127*27620987Smatt 	if (len) {
128*27620987Smatt 		cb = (const unsigned char *)lb + offset;
129*27620987Smatt 		ca = (unsigned char *)la;
130*27620987Smatt 		while (len-- > 0) {
131*27620987Smatt 			*ca++ = *cb++;
132*27620987Smatt 		}
133*27620987Smatt 	}
134*27620987Smatt 	return a;
135*27620987Smatt }
136