xref: /netbsd-src/sys/arch/amd64/amd64/cpu_in_cksum.S (revision c54ac6aa9630f311ae653206ad2127dc0d15af93)
1/* $NetBSD: cpu_in_cksum.S,v 1.5 2019/11/15 09:50:01 maxv Exp $ */
2
3/*-
4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in
15 *    the documentation and/or other materials provided with the
16 *    distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <machine/asm.h>
33#ifdef _KERNEL
34#include <machine/frameasm.h>
35#else
36#define KMSAN_INIT_RET(sz)	/* nothing */
37#endif
38#include "assym.h"
39
40ENTRY(cpu_in_cksum)
41	pushq	%rbp
42	pushq	%rbx
43
44	/*
45	 * During most of the function the following values can
46	 * be found in the registers:
47	 *
48	 * %rdi: The current element in the mbuf chain.
49	 * %esi: Remaining bytes to check after the current mbuf.
50	 * %ebp: Minimum of %esi at the start of the loop and the
51	 *       length of the current mbuf.
52	 * %r8:  Overall sum. Carry must be handled on increment.
53	 * %r9 and %r10: Partial sums. This are normally modified
54	 *       without carry check, see comment in inner loop.
55	 * %rbx: Remaining data of current mbuf.
56	 * %dh:  Partial sum must be byte swapped before adding up.
57	 * %dl:  Current mbuf started at odd position. A word was split.
58	 */
59
60	movl	%ecx, %eax
61	movl	%edx, %ecx
62	movq	%rax, %r8
63	xorl	%edx,%edx
64
65	/* All requested bytes checksummed? */
66	testl	%esi, %esi
67	jz	.Mdone
68
69.Mmbuf_preloop:
70	/* No more data to process? */
71	testq	%rdi, %rdi
72	jz	.Mout_of_mbufs
73	movl	M_LEN(%rdi), %ebp
74	cmpl	%ebp, %ecx
75	jbe	1f
76	subl	%ebp, %ecx
77	movq	M_NEXT(%rdi), %rdi
78	jmp	.Mmbuf_preloop
791:
80	subl	%ecx, %ebp
81	movq	M_DATA(%rdi), %rbx
82	movl	%ecx, %eax
83	addq	%rax, %rbx
84	jmp	.Mmbuf_load_data
85
86.Mmbuf_loop:
87	/* All requested bytes checksummed? */
88	testl	%esi, %esi
89	jz	.Mdone
90
91	/* No more data to process? */
92	testq	%rdi, %rdi
93	jz	.Mout_of_mbufs
94
95	movl	M_LEN(%rdi), %ebp
96	movq	M_DATA(%rdi), %rbx
97.Mmbuf_load_data:
98
99	/* Skip empty mbufs. */
100	testl	%ebp, %ebp
101	jz	.Mmbuf_loop_next
102
103	/* If this mbuf is longer than necessary, just truncate it. */
104	cmpl	%ebp, %esi
105	cmovb	%esi, %ebp
106	subl	%ebp, %esi
107
108	xorq	%r9, %r9
109	xorq	%r10, %r10
110
111.Mmbuf_align_word:
112	/* Already aligned on a word boundary? */
113	testb	$1, %bl
114	jz	.Mmbuf_align_dword
115
116	/* Invert %dl. */
117	testb	%dl, %dl
118	setz	%dl
119
120	movzbl	(%rbx), %ecx
121	xchgb	%cl, %ch
122	addq	%rcx, %r9
123	incq	%rbx
124	decl	%ebp
125
126.Mmbuf_align_dword:
127	/*
128	 * If the current position is equivalent to an odd index,
129	 * byte swap the partial sums at the end to compensate.
130	 */
131	movb	%dl, %dh
132
133	/*
134	 * If the data is not already aligned at a dword boundary,
135	 * just add the first word to one of the partial sums.
136	 */
137	testb	$2, %bl
138	jz	.Mmbuf_inner_loop
139	cmpl	$2, %ebp
140	jb	.Mmbuf_trailing_bytes
141	movzwl	(%rbx), %ecx
142	addq	%rcx, %r9
143	leaq	2(%rbx), %rbx
144	leal	-2(%ebp), %ebp
145
146.Mmbuf_inner_loop:
147	.align	16
148	/*
149	 * Inner loop is unrolled to handle 32 byte at a time.
150	 * Dwords are summed up in %r9 and %10 without checking
151	 * for overflow. This exploits two adders and the order
152	 * constraint on flags.
153	 *
154	 * After the summing up, %r9 and %r10 are merged and
155	 * the sum is test for having either of the two highest
156	 * bits set. If that is the case, the partial sum is added
157	 * to the overall sum and both registers are zeroed.
158	 */
159	cmpl	$32, %ebp
160	jb	.Mmbuf_trailing_owords
161	movl	0(%rbx), %ecx
162	movl	4(%rbx), %eax
163	addq	%rcx, %r9
164	addq	%rax, %r10
165
166	movl	8(%rbx), %ecx
167	movl	12(%rbx), %eax
168	addq	%rcx, %r9
169	addq	%rax, %r10
170
171	movl	16(%rbx), %ecx
172	movl	20(%rbx), %eax
173	addq	%rcx, %r9
174	addq	%rax, %r10
175
176	movl	24(%rbx), %ecx
177	movl	28(%rbx), %eax
178	addq	%rcx, %r9
179	addq	%rax, %r10
180
181	leaq	32(%rbx), %rbx
182	leal	-32(%ebp), %ebp
183
184	addq	%r9, %r10
185	movq	%r10, %rax
186	shrq	$62, %rax
187	xorq	%r9, %r9
188	testb	%al, %al
189	jz	.Mmbuf_inner_loop
190
191	testb	%dh, %dh
192	jz	1f
193	rolq	$8, %r10
1941:
195	addq	%r10, %r8
196	adcq	$0, %r8
197	xorq	%r10, %r10
198
199	jmp	.Mmbuf_inner_loop
200
201	/*
202	 * One more check for 16, 8, 4, 2 and 1 remaining
203	 * byte in the mbuf...
204	 *
205	 * No more overflow checks needed here.
206	 */
207.Mmbuf_trailing_owords:
208	testw	$16, %bp
209	jz	.Mmbuf_trailing_qwords
210	movl	0(%rbx), %ecx
211	movl	4(%rbx), %eax
212	addq	%rcx, %r9
213	addq	%rax, %r10
214
215	movl	8(%rbx), %ecx
216	movl	12(%rbx), %eax
217	addq	%rcx, %r9
218	addq	%rax, %r10
219
220	leaq	16(%rbx), %rbx
221
222.Mmbuf_trailing_qwords:
223	testw	$8, %bp
224	jz	.Mmbuf_trailing_dwords
225	movl	0(%rbx), %ecx
226	movl	4(%rbx), %eax
227	addq	%rcx, %r9
228	addq	%rax, %r10
229
230	leaq	8(%rbx), %rbx
231
232.Mmbuf_trailing_dwords:
233	testw	$4, %bp
234	jz	.Mmbuf_trailing_words
235	movl	(%rbx), %ecx
236	addq	%rcx, %r9
237	leaq	4(%rbx), %rbx
238
239.Mmbuf_trailing_words:
240	testw	$2, %bp
241	jz	.Mmbuf_trailing_bytes
242	movzwl	(%rbx), %ecx
243	addq	%rcx, %r9
244	leaq	2(%rbx), %rbx
245
246.Mmbuf_trailing_bytes:
247	testw	$1, %bp
248	jz	.Mbyte_swap
249	movzbl	(%rbx), %ecx
250	addq	%rcx, %r9
251	/* Invert %dl as this is a split in a word. */
252	testb	%dl, %dl
253	setz	%dl
254
255.Mbyte_swap:
256	/* Byte swap by 8 bit rotate. */
257	testb	%dh, %dh
258	jz	1f
259	rolq	$8, %r9
260	rolq	$8, %r10
2611:
262	addq	%r10, %r8
263	adcq	%r9, %r8
264	adcq	$0, %r8
265
266.Mmbuf_loop_next:
267	movq	M_NEXT(%rdi), %rdi
268	jmp	.Mmbuf_loop
269
270.Mdone:
271	/*
272	 * Reduce 64 bit overall sum into 16 bit sum and
273	 * return the complement.
274	 */
275	movq	%r8, %rax
276	movq	%r8, %rbx
277	shrq	$32, %rax
278	addl	%eax, %ebx
279	adcl	$0, %ebx
280	movzwl	%bx, %eax
281	shrl	$16, %ebx
282	addw	%ax, %bx
283	adcw	$0, %bx
284	movw	%bx, %ax
285	notw	%ax
286
287.Mreturn:
288	popq	%rbx
289	popq	%rbp
290	KMSAN_INIT_RET(4)
291	ret
292
293.Mout_of_mbufs:
294#ifdef __PIC__
295	leaq	.Mout_of_mbufs_msg(%rip), %rdi
296#else
297	movq	$.Mout_of_mbufs_msg, %rdi
298#endif
299	movl	$0, %eax
300	call	PIC_PLT(_C_LABEL(printf))
301	jmp	.Mreturn
302END(cpu_in_cksum)
303
304	.section	.rodata
305	.type		.Mout_of_mbufs_msg, @object
306.Mout_of_mbufs_msg:
307	.string		"in_cksum: out of data\n"
308END(.Mout_of_mbufs_msg)
309