xref: /freebsd-src/contrib/arm-optimized-routines/networking/chksum.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson /*
231914882SAlex Richardson  * Compute 16-bit sum in ones' complement arithmetic (with end-around carry).
331914882SAlex Richardson  * This sum is often used as a simple checksum in networking.
431914882SAlex Richardson  *
531914882SAlex Richardson  * Copyright (c) 2020, Arm Limited.
6*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
731914882SAlex Richardson  */
831914882SAlex Richardson 
931914882SAlex Richardson #include "networking.h"
1031914882SAlex Richardson #include "chksum_common.h"
1131914882SAlex Richardson 
1231914882SAlex Richardson always_inline
1331914882SAlex Richardson static inline uint32_t
slurp_head32(const void ** pptr,uint32_t * nbytes)1431914882SAlex Richardson slurp_head32(const void **pptr, uint32_t *nbytes)
1531914882SAlex Richardson {
1631914882SAlex Richardson     uint32_t sum = 0;
1731914882SAlex Richardson     Assert(*nbytes >= 4);
1831914882SAlex Richardson     uint32_t off = (uintptr_t) *pptr % 4;
1931914882SAlex Richardson     if (likely(off != 0))
2031914882SAlex Richardson     {
2131914882SAlex Richardson 	/* Get rid of bytes 0..off-1 */
2231914882SAlex Richardson 	const unsigned char *ptr32 = align_ptr(*pptr, 4);
2331914882SAlex Richardson 	uint32_t mask = ~0U << (CHAR_BIT * off);
2431914882SAlex Richardson 	sum = load32(ptr32) & mask;
2531914882SAlex Richardson 	*pptr = ptr32 + 4;
2631914882SAlex Richardson 	*nbytes -= 4 - off;
2731914882SAlex Richardson     }
2831914882SAlex Richardson     return sum;
2931914882SAlex Richardson }
3031914882SAlex Richardson 
3131914882SAlex Richardson /* Additional loop unrolling would help when not auto-vectorizing */
3231914882SAlex Richardson unsigned short
__chksum(const void * ptr,unsigned int nbytes)3331914882SAlex Richardson __chksum(const void *ptr, unsigned int nbytes)
3431914882SAlex Richardson {
3531914882SAlex Richardson     bool swap = false;
3631914882SAlex Richardson     uint64_t sum = 0;
3731914882SAlex Richardson 
3831914882SAlex Richardson     if (nbytes > 300)
3931914882SAlex Richardson     {
4031914882SAlex Richardson 	/* 4-byte align pointer */
4131914882SAlex Richardson 	swap = (uintptr_t) ptr & 1;
4231914882SAlex Richardson 	sum = slurp_head32(&ptr, &nbytes);
4331914882SAlex Richardson     }
4431914882SAlex Richardson     /* Else benefit of aligning not worth the overhead */
4531914882SAlex Richardson 
4631914882SAlex Richardson     /* Sum all 16-byte chunks */
4731914882SAlex Richardson     const char *cptr = ptr;
4831914882SAlex Richardson     for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--)
4931914882SAlex Richardson     {
5031914882SAlex Richardson 	uint64_t h0 = load32(cptr + 0);
5131914882SAlex Richardson 	uint64_t h1 = load32(cptr + 4);
5231914882SAlex Richardson 	uint64_t h2 = load32(cptr + 8);
5331914882SAlex Richardson 	uint64_t h3 = load32(cptr + 12);
5431914882SAlex Richardson 	sum += h0 + h1 + h2 + h3;
5531914882SAlex Richardson 	cptr += 16;
5631914882SAlex Richardson     }
5731914882SAlex Richardson     nbytes %= 16;
5831914882SAlex Richardson     Assert(nbytes < 16);
5931914882SAlex Richardson 
6031914882SAlex Richardson     /* Handle any trailing 4-byte chunks */
6131914882SAlex Richardson     while (nbytes >= 4)
6231914882SAlex Richardson     {
6331914882SAlex Richardson 	sum += load32(cptr);
6431914882SAlex Richardson 	cptr += 4;
6531914882SAlex Richardson 	nbytes -= 4;
6631914882SAlex Richardson     }
6731914882SAlex Richardson     Assert(nbytes < 4);
6831914882SAlex Richardson 
6931914882SAlex Richardson     if (nbytes & 2)
7031914882SAlex Richardson     {
7131914882SAlex Richardson 	sum += load16(cptr);
7231914882SAlex Richardson 	cptr += 2;
7331914882SAlex Richardson     }
7431914882SAlex Richardson 
7531914882SAlex Richardson     if (nbytes & 1)
7631914882SAlex Richardson     {
7731914882SAlex Richardson 	sum += *(uint8_t *)cptr;
7831914882SAlex Richardson     }
7931914882SAlex Richardson 
8031914882SAlex Richardson     return fold_and_swap(sum, swap);
8131914882SAlex Richardson }
82