1 /* $NetBSD: in_cksum.c,v 1.15 2011/07/10 23:13:22 matt Exp $ */
2
3 /*
4 * Copyright (c) 1993 Regents of the University of California.
5 * All rights reserved.
6 *
7 * Permission to use, copy, modify, and distribute this software and its
8 * documentation for any purpose, without fee, and without written agreement is
9 * hereby granted, provided that the above copyright notice and the following
10 * paragraph appears in all copies of this software.
11 *
12 * THIS SOFTWARE IS PROVIDED BY THE REGENTS ``AS IS'' AND ANY EXPRESS OR
13 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
15 * EVENT SHALL THE REGENTS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
16 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
18 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
19 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
20 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 */
23
24 /*
25 * ccsum.c - Highly optimized MIPS checksum function.
26 * by Jonathan Kay, Computer Systems Lab, UCSD 4/2/93
27 *
28 * Version 2.0
29 * Techniques and credits:
30 * Basic algorithm is 3-instruction inner loop sum by Peter Desnoyers.
31 * Full word-size reading as described in Usenix W'93 paper.
32 * Pipelined latency absoption technique as described in paper.
33 * Unrolling chosen through testing and examination of actual workload.
34 * Rewrite in 'C' without loss of performance suggested by Vernon Schryver.
35 * 15% faster than version 1 ("Usenix version").
36 * 150% faster than Ultrix 4.2A checksum routine.
37 *
38 * BSD changes: Jonathan Stone, Stanford Distributed Systems Group, 1997-08-11
39 *
40 * re-written for incremental checksumming of BSD mbufs
41 * and byteswap out-of-phase mbuf sums.
42 */
43
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.15 2011/07/10 23:13:22 matt Exp $");
46
47 #include <sys/param.h>
48 #include <sys/endian.h>
49 #include <sys/mbuf.h>
50 #include <sys/systm.h>
51
52 #include <netinet/in_systm.h>
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip_var.h>
56
57
58 union memptr {
59 uint32_t *l;
60 uintptr_t u;
61 uint16_t *s;
62 uint8_t *c;
63 };
64
65 static inline uint32_t fastsum(union memptr, int, unsigned int, int);
66
67 /*
68 * Compute 1's complement sum over a contiguous block at 'buf' for 'n' bytes.
69 *
70 * Add the resulting checksum into 'oldsum' using 1's complement.
71 * 'odd_aligned' is a boolean which if set, indicate the data in 'buf'
72 * starts at an odd byte alignment within the containing packet,
73 * and so we must byteswap the memory-aligned 1's-complement sum
74 * over the data before adding it to `oldsum'.
75 */
76 static inline uint32_t
fastsum(union memptr buf,int n,unsigned int oldsum,int odd_aligned)77 fastsum(union memptr buf, int n, unsigned int oldsum, int odd_aligned)
78 {
79 unsigned long hilo = 0, high = 0;
80 unsigned long w0, w1;
81 unsigned int sum = 0;
82
83 /* Align to 32 bits. */
84 if (buf.u & 0x3) {
85 /*
86 * 16-bit-align.
87 * If buf is odd-byte-aligned, add the byte and toggle
88 * our byte-alignment flag.
89 * If we were odd-aligned on entry, an odd-aligned
90 * byte makes a 16-bit word with the previous odd byte,
91 * unaligned, making us aligned again.
92 * If we were not already odd-aligned, we are now,
93 * and we must byteswap our 16-bit-aligned sum of
94 *'buf' before accumulating it.
95 */
96 if (buf.u & 0x1) {
97 #if BYTE_ORDER == BIG_ENDIAN
98 sum += *(buf.c++);
99 #else
100 sum += (*(buf.c++) << 8);
101 #endif
102 n -= 1;
103 odd_aligned = !odd_aligned;
104 }
105 /* Skip to the end for very small mbufs */
106 if (n <= 2)
107 goto postunaligned;
108
109 /* 32-bit-align */
110 if (buf.u & 0x2) {
111 sum += *(buf.s++);
112 n -= 2;
113 }
114 }
115
116 /* 32-bit-aligned sum.
117 Peter Desnoyers' unbelievable 3-instruction main loop. */
118 if (n < 64 + 8)
119 goto notmuchleft;
120 w0 = buf.l[0];
121 w1 = buf.l[1];
122 do {
123 hilo += w0;
124 high += w0 >> 16;
125 w0 = buf.l[2];
126
127 hilo += w1;
128 high += w1 >> 16;
129 w1 = buf.l[3];
130
131 hilo += w0;
132 high += w0 >> 16;
133 w0 = buf.l[4];
134
135 hilo += w1;
136 high += w1 >> 16;
137 w1 = buf.l[5];
138
139 hilo += w0;
140 high += w0 >> 16;
141 w0 = buf.l[6];
142
143 hilo += w1;
144 high += w1 >> 16;
145 w1 = buf.l[7];
146
147 hilo += w0;
148 high += w0 >> 16;
149 w0 = buf.l[8];
150
151 hilo += w1;
152 high += w1 >> 16;
153 w1 = buf.l[9];
154
155
156 hilo += w0;
157 high += w0 >> 16;
158 w0 = buf.l[10];
159
160 hilo += w1;
161 high += w1 >> 16;
162 w1 = buf.l[11];
163
164 hilo += w0;
165 high += w0 >> 16;
166 w0 = buf.l[12];
167
168 hilo += w1;
169 high += w1 >> 16;
170 w1 = buf.l[13];
171
172 hilo += w0;
173 high += w0 >> 16;
174 w0 = buf.l[14];
175
176 hilo += w1;
177 high += w1 >> 16;
178 w1 = buf.l[15];
179
180 hilo += w0;
181 high += w0 >> 16;
182 w0 = buf.l[16];
183
184 hilo += w1;
185 high += w1 >> 16;
186 w1 = buf.l[17];
187
188
189 n -= 64;
190 buf.c += 64;
191
192 } while (n >= 64 + 8);
193 hilo -= (high << 16);
194 sum += hilo;
195 sum += high;
196
197 notmuchleft:
198 high = hilo = 0;
199 while (n >= sizeof(uint32_t)) {
200 w0 = *(buf.l++);
201 hilo += w0;
202 high += w0 >> 16;
203 n -= 4;
204 }
205 hilo -= (high << 16);
206 sum += hilo;
207 sum += high;
208
209 postunaligned:
210 /* handle post 32bit unaligned payloads */
211 if (n >= sizeof(uint16_t)) {
212 sum += *(buf.s++);
213 n -= sizeof(uint16_t);
214 }
215
216 /* handle a trailing odd byte */
217 if (n > 0) {
218 #if BYTE_ORDER == BIG_ENDIAN
219 sum += *(buf.c++) << 8;
220 #else
221 sum += *(buf.c++);
222 #endif
223 n = 0;
224 }
225
226 /*
227 * compensate for a trailing byte in previous mbuf
228 * by byteswapping the memory-aligned sum of this mbuf.
229 */
230 if (odd_aligned) {
231 sum = (sum & 0xffff) + (sum >> 16);
232 sum = (sum & 0xffff) + (sum >> 16);
233 sum = oldsum + ((sum >> 8) & 0xff) + ((sum & 0xff) << 8);
234 } else {
235 /* add upper and lower halfwords together to get full sum */
236 sum = oldsum + sum;
237 sum = (sum & 0xffff) + (sum >> 16);
238 }
239
240 /* fold carry from combining sums */
241 sum = (sum & 0xffff) + (sum >> 16);
242 return(sum);
243 }
244
245 /*
246 * Checksum routine for Internet Protocol family headers.
247 *
248 */
249 static inline int
in_cksum_internal(struct mbuf * m,int off,int len,uint32_t sum)250 in_cksum_internal(struct mbuf *m, int off, int len, uint32_t sum)
251 {
252 /*u_short **/ union memptr w;
253 int mlen;
254 int odd_aligned = 0;
255
256 for (; m && len; m = m->m_next) {
257 if (m->m_len == 0)
258 continue;
259 w.c = mtod(m, u_char *) + off;
260 mlen = m->m_len - off;
261 off = 0;
262 if (len < mlen)
263 mlen = len;
264 len -= mlen;
265
266 sum = fastsum(w, mlen, sum, odd_aligned);
267 odd_aligned = (odd_aligned + mlen) & 0x01;
268 }
269 if (len != 0) {
270 printf("cksum: out of data, %d\n", len);
271 }
272 return (~sum & 0xffff);
273 }
274
275 int
in_cksum(struct mbuf * m,int len)276 in_cksum(struct mbuf *m, int len)
277 {
278
279 return (in_cksum_internal(m, 0, len, 0));
280 }
281
282 int
in4_cksum(struct mbuf * m,uint8_t nxt,int off,int len)283 in4_cksum(struct mbuf *m, uint8_t nxt, int off, int len)
284 {
285 uint sum = 0;
286
287 if (nxt != 0) {
288 uint16_t *w;
289 union {
290 struct ipovly ipov;
291 u_int16_t w[10];
292 } u;
293
294 /* pseudo header */
295 memset(&u.ipov, 0, sizeof(u.ipov));
296 u.ipov.ih_len = htons(len);
297 u.ipov.ih_pr = nxt;
298 u.ipov.ih_src = mtod(m, struct ip *)->ip_src;
299 u.ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
300 w = u.w;
301 /* assumes sizeof(ipov) == 20 */
302 sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; sum += w[4];
303 sum += w[5]; sum += w[6]; sum += w[7]; sum += w[8]; sum += w[9];
304 }
305
306 /* skip unnecessary part */
307 while (m && off > 0) {
308 if (m->m_len > off)
309 break;
310 off -= m->m_len;
311 m = m->m_next;
312 }
313
314 return (in_cksum_internal(m, off, len, sum));
315 }
316