1/* $NetBSD: cpu_in_cksum.S,v 1.5 2019/11/15 09:50:01 maxv Exp $ */ 2 3/*- 4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33#ifdef _KERNEL 34#include <machine/frameasm.h> 35#else 36#define KMSAN_INIT_RET(sz) /* nothing */ 37#endif 38#include "assym.h" 39 40ENTRY(cpu_in_cksum) 41 pushq %rbp 42 pushq %rbx 43 44 /* 45 * During most of the function the following values can 46 * be found in the registers: 47 * 48 * %rdi: The current element in the mbuf chain. 49 * %esi: Remaining bytes to check after the current mbuf. 50 * %ebp: Minimum of %esi at the start of the loop and the 51 * length of the current mbuf. 52 * %r8: Overall sum. Carry must be handled on increment. 53 * %r9 and %r10: Partial sums. This are normally modified 54 * without carry check, see comment in inner loop. 55 * %rbx: Remaining data of current mbuf. 56 * %dh: Partial sum must be byte swapped before adding up. 57 * %dl: Current mbuf started at odd position. A word was split. 58 */ 59 60 movl %ecx, %eax 61 movl %edx, %ecx 62 movq %rax, %r8 63 xorl %edx,%edx 64 65 /* All requested bytes checksummed? */ 66 testl %esi, %esi 67 jz .Mdone 68 69.Mmbuf_preloop: 70 /* No more data to process? */ 71 testq %rdi, %rdi 72 jz .Mout_of_mbufs 73 movl M_LEN(%rdi), %ebp 74 cmpl %ebp, %ecx 75 jbe 1f 76 subl %ebp, %ecx 77 movq M_NEXT(%rdi), %rdi 78 jmp .Mmbuf_preloop 791: 80 subl %ecx, %ebp 81 movq M_DATA(%rdi), %rbx 82 movl %ecx, %eax 83 addq %rax, %rbx 84 jmp .Mmbuf_load_data 85 86.Mmbuf_loop: 87 /* All requested bytes checksummed? */ 88 testl %esi, %esi 89 jz .Mdone 90 91 /* No more data to process? */ 92 testq %rdi, %rdi 93 jz .Mout_of_mbufs 94 95 movl M_LEN(%rdi), %ebp 96 movq M_DATA(%rdi), %rbx 97.Mmbuf_load_data: 98 99 /* Skip empty mbufs. */ 100 testl %ebp, %ebp 101 jz .Mmbuf_loop_next 102 103 /* If this mbuf is longer than necessary, just truncate it. */ 104 cmpl %ebp, %esi 105 cmovb %esi, %ebp 106 subl %ebp, %esi 107 108 xorq %r9, %r9 109 xorq %r10, %r10 110 111.Mmbuf_align_word: 112 /* Already aligned on a word boundary? */ 113 testb $1, %bl 114 jz .Mmbuf_align_dword 115 116 /* Invert %dl. */ 117 testb %dl, %dl 118 setz %dl 119 120 movzbl (%rbx), %ecx 121 xchgb %cl, %ch 122 addq %rcx, %r9 123 incq %rbx 124 decl %ebp 125 126.Mmbuf_align_dword: 127 /* 128 * If the current position is equivalent to an odd index, 129 * byte swap the partial sums at the end to compensate. 130 */ 131 movb %dl, %dh 132 133 /* 134 * If the data is not already aligned at a dword boundary, 135 * just add the first word to one of the partial sums. 136 */ 137 testb $2, %bl 138 jz .Mmbuf_inner_loop 139 cmpl $2, %ebp 140 jb .Mmbuf_trailing_bytes 141 movzwl (%rbx), %ecx 142 addq %rcx, %r9 143 leaq 2(%rbx), %rbx 144 leal -2(%ebp), %ebp 145 146.Mmbuf_inner_loop: 147 .align 16 148 /* 149 * Inner loop is unrolled to handle 32 byte at a time. 150 * Dwords are summed up in %r9 and %10 without checking 151 * for overflow. This exploits two adders and the order 152 * constraint on flags. 153 * 154 * After the summing up, %r9 and %r10 are merged and 155 * the sum is test for having either of the two highest 156 * bits set. If that is the case, the partial sum is added 157 * to the overall sum and both registers are zeroed. 158 */ 159 cmpl $32, %ebp 160 jb .Mmbuf_trailing_owords 161 movl 0(%rbx), %ecx 162 movl 4(%rbx), %eax 163 addq %rcx, %r9 164 addq %rax, %r10 165 166 movl 8(%rbx), %ecx 167 movl 12(%rbx), %eax 168 addq %rcx, %r9 169 addq %rax, %r10 170 171 movl 16(%rbx), %ecx 172 movl 20(%rbx), %eax 173 addq %rcx, %r9 174 addq %rax, %r10 175 176 movl 24(%rbx), %ecx 177 movl 28(%rbx), %eax 178 addq %rcx, %r9 179 addq %rax, %r10 180 181 leaq 32(%rbx), %rbx 182 leal -32(%ebp), %ebp 183 184 addq %r9, %r10 185 movq %r10, %rax 186 shrq $62, %rax 187 xorq %r9, %r9 188 testb %al, %al 189 jz .Mmbuf_inner_loop 190 191 testb %dh, %dh 192 jz 1f 193 rolq $8, %r10 1941: 195 addq %r10, %r8 196 adcq $0, %r8 197 xorq %r10, %r10 198 199 jmp .Mmbuf_inner_loop 200 201 /* 202 * One more check for 16, 8, 4, 2 and 1 remaining 203 * byte in the mbuf... 204 * 205 * No more overflow checks needed here. 206 */ 207.Mmbuf_trailing_owords: 208 testw $16, %bp 209 jz .Mmbuf_trailing_qwords 210 movl 0(%rbx), %ecx 211 movl 4(%rbx), %eax 212 addq %rcx, %r9 213 addq %rax, %r10 214 215 movl 8(%rbx), %ecx 216 movl 12(%rbx), %eax 217 addq %rcx, %r9 218 addq %rax, %r10 219 220 leaq 16(%rbx), %rbx 221 222.Mmbuf_trailing_qwords: 223 testw $8, %bp 224 jz .Mmbuf_trailing_dwords 225 movl 0(%rbx), %ecx 226 movl 4(%rbx), %eax 227 addq %rcx, %r9 228 addq %rax, %r10 229 230 leaq 8(%rbx), %rbx 231 232.Mmbuf_trailing_dwords: 233 testw $4, %bp 234 jz .Mmbuf_trailing_words 235 movl (%rbx), %ecx 236 addq %rcx, %r9 237 leaq 4(%rbx), %rbx 238 239.Mmbuf_trailing_words: 240 testw $2, %bp 241 jz .Mmbuf_trailing_bytes 242 movzwl (%rbx), %ecx 243 addq %rcx, %r9 244 leaq 2(%rbx), %rbx 245 246.Mmbuf_trailing_bytes: 247 testw $1, %bp 248 jz .Mbyte_swap 249 movzbl (%rbx), %ecx 250 addq %rcx, %r9 251 /* Invert %dl as this is a split in a word. */ 252 testb %dl, %dl 253 setz %dl 254 255.Mbyte_swap: 256 /* Byte swap by 8 bit rotate. */ 257 testb %dh, %dh 258 jz 1f 259 rolq $8, %r9 260 rolq $8, %r10 2611: 262 addq %r10, %r8 263 adcq %r9, %r8 264 adcq $0, %r8 265 266.Mmbuf_loop_next: 267 movq M_NEXT(%rdi), %rdi 268 jmp .Mmbuf_loop 269 270.Mdone: 271 /* 272 * Reduce 64 bit overall sum into 16 bit sum and 273 * return the complement. 274 */ 275 movq %r8, %rax 276 movq %r8, %rbx 277 shrq $32, %rax 278 addl %eax, %ebx 279 adcl $0, %ebx 280 movzwl %bx, %eax 281 shrl $16, %ebx 282 addw %ax, %bx 283 adcw $0, %bx 284 movw %bx, %ax 285 notw %ax 286 287.Mreturn: 288 popq %rbx 289 popq %rbp 290 KMSAN_INIT_RET(4) 291 ret 292 293.Mout_of_mbufs: 294#ifdef __PIC__ 295 leaq .Mout_of_mbufs_msg(%rip), %rdi 296#else 297 movq $.Mout_of_mbufs_msg, %rdi 298#endif 299 movl $0, %eax 300 call PIC_PLT(_C_LABEL(printf)) 301 jmp .Mreturn 302END(cpu_in_cksum) 303 304 .section .rodata 305 .type .Mout_of_mbufs_msg, @object 306.Mout_of_mbufs_msg: 307 .string "in_cksum: out of data\n" 308END(.Mout_of_mbufs_msg) 309