xref: /netbsd-src/sys/arch/sparc64/sparc64/cpu_in_cksum.S (revision a78f012a7974d0ace8f71500562dbcd25cab5399)
1/*	$NetBSD: cpu_in_cksum.S,v 1.4 2015/10/17 18:51:32 nakayama Exp $ */
2
3/*
4 * Copyright (c) 2001 Eduardo Horvath
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "assym.h"
28#include <machine/asm.h>
29
30/*
31 * int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum);
32 *
33 * The only fields of the mbuf we really care about
34 * is m_next and m_len and m_data.
35 */
36
37#define	IALIGN	.align	32
38
39	IALIGN
40ENTRY(cpu_in_cksum)
41
42/*
43 * Register args:
44 *
45 *	%o0 -	mbuf
46 *	%o1 -	len
47 *	%o2 -	off
48 *	%o3 -	sum
49 *
50 *	// skip unnecessary part
51 *	while (m && off > 0) {
52 *		if (m->m_len > off)
53 *			break;
54 *		off -= m->m_len;
55 *		m = m->m_next;
56 *	}
57 */
58
592:	brz,pn	%o0, 0f
60	brlez,pn %o1, 0f
61	lduw	[%o0 + M_LEN], %o5
62	cmp	%o2, %o5
63	blt,pt	%icc,0f
64	 nop
65	LDPTR	[%o0 + M_NEXT], %o0
66	ba 2b
67	 sub %o2,%o5,%o2
68
69/*
70 *
71 * Register usage:
72 *
73 *	%o0 -	mbuf
74 *	%o1 -	len
75 *	%o2 -	mlen
76 *	%o3 -	sum
77 *	%o4 -	temp
78 *	%o5 -	mdata
79 *	%g1 -	swapped
80 *	%g4 -	temp
81 *	%g5 -	temp
82 */
830:	srl	%o3, 0, %o3	! Make sure this is a 32-bit value going in
84	brz	%o0, Lfinish	! for (; m && len > 0; m->m_next) {
85	 clr	%g1		! swapped = 0;
86	brlez	%o1, Lfinish
87	 mov	%o2, %o4	! Stash this elsewhere for a bit
88
89	lduw	[%o0 + M_LEN], %o2	! Code duplicated at Lloop
90	srlx	%o3, 32, %g4	! REDUCE bigtime
91	sethi	%hi(0xffff), %g5
92	LDPTR	[%o0 + M_DATA], %o5
93	srl	%o3, 0, %o3
94	or	%g5, %lo(0xffff), %g5
95
96	sub	%o2, %o4, %o2	! Correct for initial offset
97	ba,pt	%icc, 0f
98	 add	%o5, %o4, %o5
99
100	IALIGN
101Lloop:
102	lduw	[%o0 + M_LEN], %o2
103	srlx	%o3, 32, %g4	! REDUCE bigtime
104	sethi	%hi(0xffff), %g5
105	LDPTR	[%o0 + M_DATA], %o5
106	srl	%o3, 0, %o3
107	or	%g5, %lo(0xffff), %g5
1080:
109	add	%o3, %g4, %o3
110	brz	%o2, Lnext	! if (m->m_len == 0) continue;
111
112	 cmp	%o1, %o2	! if (len < mlen)
113	movl	%icc, %o1, %o2	!	mlen = len;
114
115	btst	3, %o5		! if (!(*w & 3)) {
116	bz	Lint_aligned
117	 sub	%o1, %o2, %o1	! len -= mlen
118
119	srlx	%o3, 16, %o4	! REDUCE {sum = (sum & 0xffff) + (sum >> 16);}
120	and	%o3, %g5, %o3
121
122	add	%o3, %o4, %o3
123	btst	1, %o5		! if (!(*w & 3) &&
124	bz	Lshort_aligned
125	 nop
126
127	deccc	%o2
128	bl,a,pn	%icc, Lnext	! mlen >= 1) {
129	 inc	%o2
130	ldub	[%o5], %o4	! ADDBYTE {ROL; sum += *w; byte_swapped ^= 1;}
131	sllx	%o3, 8, %o3	! ROL { sum = sum << 8; }
132	inc	%o5		! }
133	add	%o3, %o4, %o3
134	xor	%g1, 1, %g1	! Flip byte_swapped
135
136Lshort_aligned:
137	btst	2, %o5		! if (!(*w & 3) &&
138	bz	Lint_aligned
139	 nop
140
141	deccc	2, %o2		! mlen >= 1) {
142	bl,a,pn	%icc, Lfinish_byte
143	 inc	2, %o2
144	lduh	[%o5], %o4	! ADDSHORT {sum += *(u_short *)w;}
145	inc	2, %o5		! }
146	add	%o3, %o4, %o3	! }
147Lint_aligned:
148	deccc	0xc, %o2	! while (mlen >= 12) {
149	ble,pn	%icc, Ltoofar
150	 clr	%g5
151	ba,pt	%icc, 0f
152	 clr	%g4
153	IALIGN
1540:
155	lduw	[%o5 + 0x00], %o4
156	add	%o3, %g4, %o3
157	deccc	0xc, %o2
158	lduw	[%o5 + 0x04], %g4
159	add	%o3, %g5, %o3
160	lduw	[%o5 + 0x08], %g5
161	inc	0xc, %o5	! ADVANCE(12) }
162	bg,pt	%icc, 0b
163	 add	%o3, %o4, %o3
164	add	%o3, %g4, %o3
165	add	%o3, %g5, %o3
166Ltoofar:
167	inc	0xc, %o2
168
169Ldo_int:
170	deccc	4, %o2
171	bl,pn	%icc, Lfinish_short
172	 nop
1730:
174	lduw	[%o5], %o4
175	inc	4, %o5
176	deccc	4, %o2
177	bge,pt	%icc, 0b
178	 add	%o3, %o4, %o3
179
180Lfinish_short:
181	btst	2, %o2
182	bz	Lfinish_byte
183	 nop
184	lduh	[%o5], %o4
185	inc	2, %o5
186	add	%o3, %o4, %o3
187
188Lfinish_byte:
189	btst	1, %o2
190	bz	Lnext
191	 nop
192	ldub	[%o5], %o4
193	sllx	%o3, 8, %o3	! ROL { sum = sum << 8; }
194	inc	%o5
195	xor	%g1, 1, %g1	! Flip byte_swapped
196	add	%o3, %o4, %o3
197
198Lnext:
199	LDPTR	[%o0 + M_NEXT], %o0
200Lfinish:
201	srlx	%o3, 32, %o4	! Reduce to 32-bits
202	srl	%o3, 0, %o3
203	brz,pt	%o0, 1f		! In general there is only one mbuf
204	 add	%o3, %o4, %o3
205	brgz,pt	%o1, Lloop	! But usually all need to be fully checksummed
206	 nop
2071:
208	sethi	%hi(0x0000ffff), %o5	! data ptr not needed any more
209
210	srlx	%o3, 16, %o4
211	or	%o5, %lo(0x0000ffff), %o5
212
213	and	%o3, %o5, %o3
214
215	add	%o3, %o4, %o3
216	brz,pt	%g1, 0f		! if (byte_swapped) {
217	 nop
218
219	sllx	%o3, 8, %o3	! ROL
220
221	srlx	%o3, 16, %o4	! REDUCE
222	and	%o3, %o5, %o3
223
224	add	%o3, %o4, %o3
2250:
226	subcc	%o3, %o5, %o4	! if (sum > 0xffff)
227	movg	%icc, %o4, %o3	! sum -= 0xffff;
228
229	clr	%g4		! In case we are using EMBEDANY (ick)
230	retl
231	 xor	%o3, %o5, %o0	! return (0xffff ^ sum);
232