xref: /netbsd-src/sys/arch/sparc/sparc/cpu_in_cksum.c (revision f252fc5e80ed2c4f42e0411a2ab102d63a178257)
1 /*	$NetBSD: cpu_in_cksum.c,v 1.1 2010/09/20 10:21:10 tsutsui Exp $ */
2 
3 /*
4  * Copyright (c) 1995 Matthew R. Green.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1992, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * All advertising materials mentioning features or use of this software
34  * must display the following acknowledgement:
35  *	This product includes software developed by the University of
36  *	California, and it's contributors.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)in_cksum.c	8.1 (Berkeley) 6/11/93
63  */
64 
65 /*
66  * Copyright (c) 1995 Zubin Dittia.
67  * Copyright (c) 1994, 1998 Charles M. Hannum.
68  *
69  * All advertising materials mentioning features or use of this software
70  * must display the following acknowledgement:
71  *	This product includes software developed by the University of
72  *	California, and it's contributors.
73  *
74  * Redistribution and use in source and binary forms, with or without
75  * modification, are permitted provided that the following conditions
76  * are met:
77  * 1. Redistributions of source code must retain the above copyright
78  *    notice, this list of conditions and the following disclaimer.
79  * 2. Redistributions in binary form must reproduce the above copyright
80  *    notice, this list of conditions and the following disclaimer in the
81  *    documentation and/or other materials provided with the distribution.
82  * 3. All advertising materials mentioning features or use of this software
83  *    must display the following acknowledgement:
84  *	This product includes software developed by the University of
85  *	California, Berkeley and its contributors.
86  * 4. Neither the name of the University nor the names of its contributors
87  *    may be used to endorse or promote products derived from this software
88  *    without specific prior written permission.
89  *
90  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
91  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
94  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100  * SUCH DAMAGE.
101  *
102  *	@(#)in_cksum.c	8.1 (Berkeley) 6/11/93
103  */
104 
105 #include <sys/cdefs.h>
106 __KERNEL_RCSID(0, "$NetBSD: cpu_in_cksum.c,v 1.1 2010/09/20 10:21:10 tsutsui Exp $");
107 
108 #include <sys/param.h>
109 #include <sys/systm.h>
110 #include <sys/mbuf.h>
111 #include <netinet/in.h>
112 
113 /*
114  * Checksum routine for Internet Protocol family headers.
115  *
116  * This routine is very heavily used in the network
117  * code and should be modified for each CPU to be as fast as possible.
118  *
119  * SPARC version.
120  */
121 
122 /*
123  * The checksum computation code here is significantly faster than its
124  * vanilla C counterpart (by significantly, I mean 2-3 times faster if
125  * the data is in cache, and 1.5-2 times faster if the data is not in
126  * cache).
127  * We optimize on three fronts:
128  *	1. By using the add-with-carry (addxcc) instruction, we can use
129  *	   32-bit operations instead of 16-bit operations.
130  *	2. By unrolling the main loop to reduce branch overheads.
131  *	3. By doing a sequence of load,load,add,add,load,load,add,add,
132  *	   we can avoid the extra stall cycle which is incurred if the
133  *	   instruction immediately following a load tries to use the
134  *	   target register of the load.
135  * Another possible optimization is to replace a pair of 32-bit loads
136  * with a single 64-bit load (ldd) instruction, but I found that although
137  * this improves performance somewhat on Sun4c machines, it actually
138  * reduces performance considerably on Sun4m machines (I don't know why).
139  * So I chose to leave it out.
140  *
141  * Zubin Dittia (zubin@dworkin.wustl.edu)
142  */
143 
144 #define Asm	asm volatile
145 #define ADD64		Asm("	ld [%4+ 0],%1;   ld [%4+ 4],%2;		\
146 				addcc  %0,%1,%0; addxcc %0,%2,%0;	\
147 				ld [%4+ 8],%1;   ld [%4+12],%2;		\
148 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
149 				ld [%4+16],%1;   ld [%4+20],%2;		\
150 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
151 				ld [%4+24],%1;   ld [%4+28],%2;		\
152 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
153 				ld [%4+32],%1;   ld [%4+36],%2;		\
154 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
155 				ld [%4+40],%1;   ld [%4+44],%2;		\
156 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
157 				ld [%4+48],%1;   ld [%4+52],%2;		\
158 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
159 				ld [%4+56],%1;   ld [%4+60],%2;		\
160 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
161 				addxcc %0,0,%0"				\
162 				: "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
163 				: "0" (sum), "r" (w))
164 #define ADD32		Asm("	ld [%4+ 0],%1;   ld [%4+ 4],%2;		\
165 				addcc  %0,%1,%0; addxcc %0,%2,%0;	\
166 				ld [%4+ 8],%1;   ld [%4+12],%2;		\
167 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
168 				ld [%4+16],%1;   ld [%4+20],%2;		\
169 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
170 				ld [%4+24],%1;   ld [%4+28],%2;		\
171 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
172 				addxcc %0,0,%0"				\
173 				: "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
174 				: "0" (sum), "r" (w))
175 #define ADD16		Asm("	ld [%4+ 0],%1;   ld [%4+ 4],%2;		\
176 				addcc  %0,%1,%0; addxcc %0,%2,%0;	\
177 				ld [%4+ 8],%1;   ld [%4+12],%2;		\
178 				addxcc %0,%1,%0; addxcc %0,%2,%0;	\
179 				addxcc %0,0,%0"				\
180 				: "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
181 				: "0" (sum), "r" (w))
182 #define ADD8		Asm("	ld [%4+ 0],%1;   ld [%4+ 4],%2;		\
183 				addcc  %0,%1,%0; addxcc %0,%2,%0;	\
184 				addxcc %0,0,%0"				\
185 				: "=r" (sum), "=&r" (tmp1), "=&r" (tmp2)\
186 				: "0" (sum), "r" (w))
187 #define ADD4		Asm("	ld [%3+ 0],%1; 				\
188 				addcc  %0,%1,%0;			\
189 				addxcc %0,0,%0"				\
190 				: "=r" (sum), "=&r" (tmp1)		\
191 				: "0" (sum), "r" (w))
192 
193 #define REDUCE		{sum = (sum & 0xffff) + (sum >> 16);}
194 #define ADDCARRY	{if (sum > 0xffff) sum -= 0xffff;}
195 #define ROL		{sum = sum << 8;}	/* depends on recent REDUCE */
196 #define ADDBYTE		{ROL; sum += *w; byte_swapped ^= 1;}
197 #define ADDSHORT	{sum += *(uint16_t *)w;}
198 #define ADVANCE(n)	{w += n; mlen -= n;}
199 
200 int
cpu_in_cksum(struct mbuf * m,int len,int off,uint32_t sum)201 cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t sum)
202 {
203 	uint8_t *w;
204 	int mlen = 0;
205 	int byte_swapped = 0;
206 
207 	/*
208 	 * Declare two temporary registers for use by the asm code.  We
209 	 * allow the compiler to pick which specific machine registers to
210 	 * use, instead of hard-coding this in the asm code above.
211 	 */
212 	uint32_t tmp1, tmp2;
213 
214 	for (; m && len; m = m->m_next) {
215 		if (m->m_len == 0)
216 			continue;
217 		w = mtod(m, uint8_t *) + off;
218 		mlen = m->m_len - off;
219 		off = 0;
220 		if (len < mlen)
221 			mlen = len;
222 		len -= mlen;
223 
224 		/*
225 		 * Ensure that we're aligned on a word boundary here so
226 		 * that we can do 32 bit operations below.
227 		 */
228 		if (((uintptr_t)w & 3) != 0) {
229 			REDUCE;
230 			if (((uintptr_t)w & 1) != 0 && mlen >= 1) {
231 				ADDBYTE;
232 				ADVANCE(1);
233 			}
234 			if (((uintptr_t)w & 2) != 0 && mlen >= 2) {
235 				ADDSHORT;
236 				ADVANCE(2);
237 			}
238 		}
239 
240 		/*
241 		 * Do as many 32 bit operations as possible using the
242 		 * 64/32/16/8/4 macro's above, using as many as possible of
243 		 * these.
244 		 */
245 		while (mlen >= 64) {
246 			ADD64;
247 			ADVANCE(64);
248 		}
249 		if (mlen >= 32) {
250 			ADD32;
251 			ADVANCE(32);
252 		}
253 		if (mlen >= 16) {
254 			ADD16;
255 			ADVANCE(16);
256 		}
257 		if (mlen >= 8) {
258 			ADD8;
259 			ADVANCE(8);
260 		}
261 		if (mlen >= 4) {
262 			ADD4;
263 			ADVANCE(4)
264 		}
265 		if (mlen == 0)
266 			continue;
267 
268 		REDUCE;
269 		if (mlen >= 2) {
270 			ADDSHORT;
271 			ADVANCE(2);
272 		}
273 		if (mlen == 1) {
274 			ADDBYTE;
275 		}
276 	}
277 	if (byte_swapped) {
278 		REDUCE;
279 		ROL;
280 	}
281 	REDUCE;
282 	ADDCARRY;
283 
284 	return 0xffff ^ sum;
285 }
286