131914882SAlex Richardson /* 231914882SAlex Richardson * Compute 16-bit sum in ones' complement arithmetic (with end-around carry). 331914882SAlex Richardson * This sum is often used as a simple checksum in networking. 431914882SAlex Richardson * 531914882SAlex Richardson * Copyright (c) 2020, Arm Limited. 6*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 731914882SAlex Richardson */ 831914882SAlex Richardson 931914882SAlex Richardson #include "networking.h" 1031914882SAlex Richardson #include "chksum_common.h" 1131914882SAlex Richardson 1231914882SAlex Richardson always_inline 1331914882SAlex Richardson static inline uint32_t slurp_head32(const void ** pptr,uint32_t * nbytes)1431914882SAlex Richardsonslurp_head32(const void **pptr, uint32_t *nbytes) 1531914882SAlex Richardson { 1631914882SAlex Richardson uint32_t sum = 0; 1731914882SAlex Richardson Assert(*nbytes >= 4); 1831914882SAlex Richardson uint32_t off = (uintptr_t) *pptr % 4; 1931914882SAlex Richardson if (likely(off != 0)) 2031914882SAlex Richardson { 2131914882SAlex Richardson /* Get rid of bytes 0..off-1 */ 2231914882SAlex Richardson const unsigned char *ptr32 = align_ptr(*pptr, 4); 2331914882SAlex Richardson uint32_t mask = ~0U << (CHAR_BIT * off); 2431914882SAlex Richardson sum = load32(ptr32) & mask; 2531914882SAlex Richardson *pptr = ptr32 + 4; 2631914882SAlex Richardson *nbytes -= 4 - off; 2731914882SAlex Richardson } 2831914882SAlex Richardson return sum; 2931914882SAlex Richardson } 3031914882SAlex Richardson 3131914882SAlex Richardson /* Additional loop unrolling would help when not auto-vectorizing */ 3231914882SAlex Richardson unsigned short __chksum(const void * ptr,unsigned int nbytes)3331914882SAlex Richardson__chksum(const void *ptr, unsigned int nbytes) 3431914882SAlex Richardson { 3531914882SAlex Richardson bool swap = false; 3631914882SAlex Richardson uint64_t sum = 0; 3731914882SAlex Richardson 3831914882SAlex Richardson if (nbytes > 300) 3931914882SAlex Richardson { 4031914882SAlex Richardson /* 4-byte align pointer */ 4131914882SAlex Richardson swap = (uintptr_t) ptr & 1; 4231914882SAlex Richardson sum = slurp_head32(&ptr, &nbytes); 4331914882SAlex Richardson } 4431914882SAlex Richardson /* Else benefit of aligning not worth the overhead */ 4531914882SAlex Richardson 4631914882SAlex Richardson /* Sum all 16-byte chunks */ 4731914882SAlex Richardson const char *cptr = ptr; 4831914882SAlex Richardson for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--) 4931914882SAlex Richardson { 5031914882SAlex Richardson uint64_t h0 = load32(cptr + 0); 5131914882SAlex Richardson uint64_t h1 = load32(cptr + 4); 5231914882SAlex Richardson uint64_t h2 = load32(cptr + 8); 5331914882SAlex Richardson uint64_t h3 = load32(cptr + 12); 5431914882SAlex Richardson sum += h0 + h1 + h2 + h3; 5531914882SAlex Richardson cptr += 16; 5631914882SAlex Richardson } 5731914882SAlex Richardson nbytes %= 16; 5831914882SAlex Richardson Assert(nbytes < 16); 5931914882SAlex Richardson 6031914882SAlex Richardson /* Handle any trailing 4-byte chunks */ 6131914882SAlex Richardson while (nbytes >= 4) 6231914882SAlex Richardson { 6331914882SAlex Richardson sum += load32(cptr); 6431914882SAlex Richardson cptr += 4; 6531914882SAlex Richardson nbytes -= 4; 6631914882SAlex Richardson } 6731914882SAlex Richardson Assert(nbytes < 4); 6831914882SAlex Richardson 6931914882SAlex Richardson if (nbytes & 2) 7031914882SAlex Richardson { 7131914882SAlex Richardson sum += load16(cptr); 7231914882SAlex Richardson cptr += 2; 7331914882SAlex Richardson } 7431914882SAlex Richardson 7531914882SAlex Richardson if (nbytes & 1) 7631914882SAlex Richardson { 7731914882SAlex Richardson sum += *(uint8_t *)cptr; 7831914882SAlex Richardson } 7931914882SAlex Richardson 8031914882SAlex Richardson return fold_and_swap(sum, swap); 8131914882SAlex Richardson } 82