1*27620987Smatt /* $NetBSD: memcpy.c,v 1.1 2014/09/03 19:34:25 matt Exp $ */
2*27620987Smatt /*-
3*27620987Smatt * Copyright (c) 2014 The NetBSD Foundation, Inc.
4*27620987Smatt * All rights reserved.
5*27620987Smatt *
6*27620987Smatt * This code is derived from software contributed to The NetBSD Foundation
7*27620987Smatt * by Matt Thomas of 3am Software Foundry.
8*27620987Smatt *
9*27620987Smatt * Redistribution and use in source and binary forms, with or without
10*27620987Smatt * modification, are permitted provided that the following conditions
11*27620987Smatt * are met:
12*27620987Smatt * 1. Redistributions of source code must retain the above copyright
13*27620987Smatt * notice, this list of conditions and the following disclaimer.
14*27620987Smatt * 2. Redistributions in binary form must reproduce the above copyright
15*27620987Smatt * notice, this list of conditions and the following disclaimer in the
16*27620987Smatt * documentation and/or other materials provided with the distribution.
17*27620987Smatt *
18*27620987Smatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19*27620987Smatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20*27620987Smatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21*27620987Smatt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22*27620987Smatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23*27620987Smatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24*27620987Smatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25*27620987Smatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26*27620987Smatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27*27620987Smatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28*27620987Smatt * POSSIBILITY OF SUCH DAMAGE.
29*27620987Smatt */
30*27620987Smatt
31*27620987Smatt #include <sys/cdefs.h>
32*27620987Smatt
33*27620987Smatt __RCSID("$NetBSD: memcpy.c,v 1.1 2014/09/03 19:34:25 matt Exp $");
34*27620987Smatt
35*27620987Smatt #include <stddef.h>
36*27620987Smatt #include <stdint.h>
37*27620987Smatt #include <string.h>
38*27620987Smatt
39*27620987Smatt static inline unsigned long
combine_words(unsigned long w1,unsigned long w2,int shift1,int shift2)40*27620987Smatt combine_words(unsigned long w1, unsigned long w2, int shift1, int shift2)
41*27620987Smatt {
42*27620987Smatt #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
43*27620987Smatt return (w1 << shift1) | (w2 >> shift2);
44*27620987Smatt #else
45*27620987Smatt return (w1 >> shift1) | (w2 << shift2);
46*27620987Smatt #endif
47*27620987Smatt }
48*27620987Smatt
49*27620987Smatt void *
memcpy(void * restrict a,const void * restrict b,size_t len)50*27620987Smatt memcpy(void * restrict a, const void * restrict b, size_t len)
51*27620987Smatt {
52*27620987Smatt const unsigned char *cb = b;
53*27620987Smatt unsigned char *ca = a;
54*27620987Smatt
55*27620987Smatt if (len == 0)
56*27620987Smatt return a;
57*27620987Smatt
58*27620987Smatt /*
59*27620987Smatt * Make sure the destination is long aligned.
60*27620987Smatt */
61*27620987Smatt while ((uintptr_t)ca & (sizeof(long) - 1)) {
62*27620987Smatt *ca++ = *cb++;
63*27620987Smatt if (--len == 0)
64*27620987Smatt return a;
65*27620987Smatt }
66*27620987Smatt
67*27620987Smatt unsigned long *la = (long *)ca;
68*27620987Smatt const int offset = (uintptr_t)cb & (sizeof(*la) - 1);
69*27620987Smatt const unsigned long *lb = (const unsigned long *) (cb - offset);
70*27620987Smatt unsigned long * const ea = la + len / sizeof(*la);
71*27620987Smatt
72*27620987Smatt if (offset == 0) {
73*27620987Smatt /*
74*27620987Smatt * a & b are now both long alignment.
75*27620987Smatt * First try to copy 4 longs at a time,
76*27620987Smatt */
77*27620987Smatt for (; la + 4 <= ea; la += 4, lb += 4) {
78*27620987Smatt la[0] = lb[0];
79*27620987Smatt la[1] = lb[1];
80*27620987Smatt la[2] = lb[2];
81*27620987Smatt la[3] = lb[3];
82*27620987Smatt }
83*27620987Smatt /*
84*27620987Smatt * Now try to copy one long at a time.
85*27620987Smatt */
86*27620987Smatt while (la <= ea) {
87*27620987Smatt *la++ = *lb++;
88*27620987Smatt }
89*27620987Smatt } else {
90*27620987Smatt const int shift1 = offset * 8;
91*27620987Smatt const int shift2 = sizeof(*la) * 8 - shift1;
92*27620987Smatt unsigned long w1 = *lb++;
93*27620987Smatt
94*27620987Smatt /*
95*27620987Smatt * We try to write 4 words per loop.
96*27620987Smatt */
97*27620987Smatt for (; la + 4 <= ea; la += 4, lb += 4) {
98*27620987Smatt unsigned long w2 = lb[0];
99*27620987Smatt
100*27620987Smatt la[0] = combine_words(w1, w2, shift1, shift2);
101*27620987Smatt
102*27620987Smatt w1 = lb[1];
103*27620987Smatt
104*27620987Smatt la[1] = combine_words(w2, w1, shift1, shift2);
105*27620987Smatt
106*27620987Smatt w2 = lb[2];
107*27620987Smatt
108*27620987Smatt la[2] = combine_words(w1, w2, shift1, shift2);
109*27620987Smatt
110*27620987Smatt w1 = lb[3];
111*27620987Smatt
112*27620987Smatt la[3] = combine_words(w2, w1, shift1, shift2);
113*27620987Smatt }
114*27620987Smatt
115*27620987Smatt /*
116*27620987Smatt * Now try to copy one long at a time.
117*27620987Smatt */
118*27620987Smatt while (la <= ea) {
119*27620987Smatt unsigned long w2 = *lb++;
120*27620987Smatt
121*27620987Smatt *la++ = combine_words(w1, w2, shift1, shift2);
122*27620987Smatt
123*27620987Smatt w1 = w2;
124*27620987Smatt }
125*27620987Smatt }
126*27620987Smatt len &= sizeof(*la) - 1;
127*27620987Smatt if (len) {
128*27620987Smatt cb = (const unsigned char *)lb + offset;
129*27620987Smatt ca = (unsigned char *)la;
130*27620987Smatt while (len-- > 0) {
131*27620987Smatt *ca++ = *cb++;
132*27620987Smatt }
133*27620987Smatt }
134*27620987Smatt return a;
135*27620987Smatt }
136