1/* $NetBSD: cpu_in_cksum.S,v 1.4 2015/10/17 18:51:32 nakayama Exp $ */ 2 3/* 4 * Copyright (c) 2001 Eduardo Horvath 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "assym.h" 28#include <machine/asm.h> 29 30/* 31 * int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum); 32 * 33 * The only fields of the mbuf we really care about 34 * is m_next and m_len and m_data. 35 */ 36 37#define IALIGN .align 32 38 39 IALIGN 40ENTRY(cpu_in_cksum) 41 42/* 43 * Register args: 44 * 45 * %o0 - mbuf 46 * %o1 - len 47 * %o2 - off 48 * %o3 - sum 49 * 50 * // skip unnecessary part 51 * while (m && off > 0) { 52 * if (m->m_len > off) 53 * break; 54 * off -= m->m_len; 55 * m = m->m_next; 56 * } 57 */ 58 592: brz,pn %o0, 0f 60 brlez,pn %o1, 0f 61 lduw [%o0 + M_LEN], %o5 62 cmp %o2, %o5 63 blt,pt %icc,0f 64 nop 65 LDPTR [%o0 + M_NEXT], %o0 66 ba 2b 67 sub %o2,%o5,%o2 68 69/* 70 * 71 * Register usage: 72 * 73 * %o0 - mbuf 74 * %o1 - len 75 * %o2 - mlen 76 * %o3 - sum 77 * %o4 - temp 78 * %o5 - mdata 79 * %g1 - swapped 80 * %g4 - temp 81 * %g5 - temp 82 */ 830: srl %o3, 0, %o3 ! Make sure this is a 32-bit value going in 84 brz %o0, Lfinish ! for (; m && len > 0; m->m_next) { 85 clr %g1 ! swapped = 0; 86 brlez %o1, Lfinish 87 mov %o2, %o4 ! Stash this elsewhere for a bit 88 89 lduw [%o0 + M_LEN], %o2 ! Code duplicated at Lloop 90 srlx %o3, 32, %g4 ! REDUCE bigtime 91 sethi %hi(0xffff), %g5 92 LDPTR [%o0 + M_DATA], %o5 93 srl %o3, 0, %o3 94 or %g5, %lo(0xffff), %g5 95 96 sub %o2, %o4, %o2 ! Correct for initial offset 97 ba,pt %icc, 0f 98 add %o5, %o4, %o5 99 100 IALIGN 101Lloop: 102 lduw [%o0 + M_LEN], %o2 103 srlx %o3, 32, %g4 ! REDUCE bigtime 104 sethi %hi(0xffff), %g5 105 LDPTR [%o0 + M_DATA], %o5 106 srl %o3, 0, %o3 107 or %g5, %lo(0xffff), %g5 1080: 109 add %o3, %g4, %o3 110 brz %o2, Lnext ! if (m->m_len == 0) continue; 111 112 cmp %o1, %o2 ! if (len < mlen) 113 movl %icc, %o1, %o2 ! mlen = len; 114 115 btst 3, %o5 ! if (!(*w & 3)) { 116 bz Lint_aligned 117 sub %o1, %o2, %o1 ! len -= mlen 118 119 srlx %o3, 16, %o4 ! REDUCE {sum = (sum & 0xffff) + (sum >> 16);} 120 and %o3, %g5, %o3 121 122 add %o3, %o4, %o3 123 btst 1, %o5 ! if (!(*w & 3) && 124 bz Lshort_aligned 125 nop 126 127 deccc %o2 128 bl,a,pn %icc, Lnext ! mlen >= 1) { 129 inc %o2 130 ldub [%o5], %o4 ! ADDBYTE {ROL; sum += *w; byte_swapped ^= 1;} 131 sllx %o3, 8, %o3 ! ROL { sum = sum << 8; } 132 inc %o5 ! } 133 add %o3, %o4, %o3 134 xor %g1, 1, %g1 ! Flip byte_swapped 135 136Lshort_aligned: 137 btst 2, %o5 ! if (!(*w & 3) && 138 bz Lint_aligned 139 nop 140 141 deccc 2, %o2 ! mlen >= 1) { 142 bl,a,pn %icc, Lfinish_byte 143 inc 2, %o2 144 lduh [%o5], %o4 ! ADDSHORT {sum += *(u_short *)w;} 145 inc 2, %o5 ! } 146 add %o3, %o4, %o3 ! } 147Lint_aligned: 148 deccc 0xc, %o2 ! while (mlen >= 12) { 149 ble,pn %icc, Ltoofar 150 clr %g5 151 ba,pt %icc, 0f 152 clr %g4 153 IALIGN 1540: 155 lduw [%o5 + 0x00], %o4 156 add %o3, %g4, %o3 157 deccc 0xc, %o2 158 lduw [%o5 + 0x04], %g4 159 add %o3, %g5, %o3 160 lduw [%o5 + 0x08], %g5 161 inc 0xc, %o5 ! ADVANCE(12) } 162 bg,pt %icc, 0b 163 add %o3, %o4, %o3 164 add %o3, %g4, %o3 165 add %o3, %g5, %o3 166Ltoofar: 167 inc 0xc, %o2 168 169Ldo_int: 170 deccc 4, %o2 171 bl,pn %icc, Lfinish_short 172 nop 1730: 174 lduw [%o5], %o4 175 inc 4, %o5 176 deccc 4, %o2 177 bge,pt %icc, 0b 178 add %o3, %o4, %o3 179 180Lfinish_short: 181 btst 2, %o2 182 bz Lfinish_byte 183 nop 184 lduh [%o5], %o4 185 inc 2, %o5 186 add %o3, %o4, %o3 187 188Lfinish_byte: 189 btst 1, %o2 190 bz Lnext 191 nop 192 ldub [%o5], %o4 193 sllx %o3, 8, %o3 ! ROL { sum = sum << 8; } 194 inc %o5 195 xor %g1, 1, %g1 ! Flip byte_swapped 196 add %o3, %o4, %o3 197 198Lnext: 199 LDPTR [%o0 + M_NEXT], %o0 200Lfinish: 201 srlx %o3, 32, %o4 ! Reduce to 32-bits 202 srl %o3, 0, %o3 203 brz,pt %o0, 1f ! In general there is only one mbuf 204 add %o3, %o4, %o3 205 brgz,pt %o1, Lloop ! But usually all need to be fully checksummed 206 nop 2071: 208 sethi %hi(0x0000ffff), %o5 ! data ptr not needed any more 209 210 srlx %o3, 16, %o4 211 or %o5, %lo(0x0000ffff), %o5 212 213 and %o3, %o5, %o3 214 215 add %o3, %o4, %o3 216 brz,pt %g1, 0f ! if (byte_swapped) { 217 nop 218 219 sllx %o3, 8, %o3 ! ROL 220 221 srlx %o3, 16, %o4 ! REDUCE 222 and %o3, %o5, %o3 223 224 add %o3, %o4, %o3 2250: 226 subcc %o3, %o5, %o4 ! if (sum > 0xffff) 227 movg %icc, %o4, %o3 ! sum -= 0xffff; 228 229 clr %g4 ! In case we are using EMBEDANY (ick) 230 retl 231 xor %o3, %o5, %o0 ! return (0xffff ^ sum); 232