xref: /freebsd-src/contrib/arm-optimized-routines/networking/test/chksum.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson /*
231914882SAlex Richardson  * Ones' complement checksum test & benchmark
331914882SAlex Richardson  *
431914882SAlex Richardson  * Copyright (c) 2016-2020, Arm Limited.
5*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
831914882SAlex Richardson #define _GNU_SOURCE
931914882SAlex Richardson #include <inttypes.h>
1031914882SAlex Richardson #include <stdbool.h>
1131914882SAlex Richardson #include <stdint.h>
1231914882SAlex Richardson #include <stdio.h>
1331914882SAlex Richardson #include <stdlib.h>
1431914882SAlex Richardson #include <string.h>
1531914882SAlex Richardson #include <sys/mman.h>
1631914882SAlex Richardson #include <time.h>
1731914882SAlex Richardson #include <unistd.h>
1831914882SAlex Richardson #include "../include/networking.h"
1931914882SAlex Richardson 
2031914882SAlex Richardson #if WANT_ASSERT
2131914882SAlex Richardson #undef NDEBUG
2231914882SAlex Richardson #include <assert.h>
2331914882SAlex Richardson #define Assert(exp) assert(exp)
2431914882SAlex Richardson #else
2531914882SAlex Richardson #define Assert(exp) (void) (exp)
2631914882SAlex Richardson #endif
2731914882SAlex Richardson 
2831914882SAlex Richardson #ifdef __GNUC__
2931914882SAlex Richardson #define may_alias __attribute__((__may_alias__))
3031914882SAlex Richardson #else
3131914882SAlex Richardson #define may_alias
3231914882SAlex Richardson #endif
3331914882SAlex Richardson 
3431914882SAlex Richardson #define CACHE_LINE 64
3531914882SAlex Richardson #define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1))
3631914882SAlex Richardson 
3731914882SAlex Richardson /* Reference implementation - do not modify! */
3831914882SAlex Richardson static uint16_t
checksum_simple(const void * ptr,uint32_t nbytes)3931914882SAlex Richardson checksum_simple(const void *ptr, uint32_t nbytes)
4031914882SAlex Richardson {
4131914882SAlex Richardson     const uint16_t *may_alias hptr = ptr;
4231914882SAlex Richardson     uint64_t sum = 0;/* Need 64-bit accumulator when nbytes > 64K */
4331914882SAlex Richardson 
4431914882SAlex Richardson     /* Sum all halfwords, assume misaligned accesses are handled in HW */
4531914882SAlex Richardson     for (uint32_t nhalfs = nbytes >> 1; nhalfs != 0; nhalfs--)
4631914882SAlex Richardson     {
4731914882SAlex Richardson 	sum += *hptr++;
4831914882SAlex Richardson     }
4931914882SAlex Richardson 
5031914882SAlex Richardson     /* Add any trailing odd byte */
5131914882SAlex Richardson     if ((nbytes & 0x01) != 0)
5231914882SAlex Richardson     {
5331914882SAlex Richardson 	sum += *(uint8_t *) hptr;
5431914882SAlex Richardson     }
5531914882SAlex Richardson 
5631914882SAlex Richardson     /* Fold 64-bit sum to 32 bits */
5731914882SAlex Richardson     sum = (sum & 0xffffffff) + (sum >> 32);
5831914882SAlex Richardson     sum = (sum & 0xffffffff) + (sum >> 32);
5931914882SAlex Richardson     Assert(sum == (uint32_t) sum);
6031914882SAlex Richardson 
6131914882SAlex Richardson     /* Fold 32-bit sum to 16 bits */
6231914882SAlex Richardson     sum = (sum & 0xffff) + (sum >> 16);
6331914882SAlex Richardson     sum = (sum & 0xffff) + (sum >> 16);
6431914882SAlex Richardson     Assert(sum == (uint16_t) sum);
6531914882SAlex Richardson 
6631914882SAlex Richardson     return (uint16_t) sum;
6731914882SAlex Richardson }
6831914882SAlex Richardson 
6931914882SAlex Richardson static struct
7031914882SAlex Richardson {
7131914882SAlex Richardson     uint16_t (*cksum_fp)(const void *, uint32_t);
7231914882SAlex Richardson     const char *name;
7331914882SAlex Richardson } implementations[] =
7431914882SAlex Richardson {
7531914882SAlex Richardson     { checksum_simple, "simple"},
7631914882SAlex Richardson     { __chksum, "scalar"},
7731914882SAlex Richardson #if __arm__
7831914882SAlex Richardson     { __chksum_arm_simd, "simd" },
7931914882SAlex Richardson #elif __aarch64__
8031914882SAlex Richardson     { __chksum_aarch64_simd, "simd" },
8131914882SAlex Richardson #endif
8231914882SAlex Richardson     { NULL, NULL}
8331914882SAlex Richardson };
8431914882SAlex Richardson 
8531914882SAlex Richardson static int
find_impl(const char * name)8631914882SAlex Richardson find_impl(const char *name)
8731914882SAlex Richardson {
8831914882SAlex Richardson     for (int i = 0; implementations[i].name != NULL; i++)
8931914882SAlex Richardson     {
9031914882SAlex Richardson 	if (strcmp(implementations[i].name, name) == 0)
9131914882SAlex Richardson 	{
9231914882SAlex Richardson 	    return i;
9331914882SAlex Richardson 	}
9431914882SAlex Richardson     }
9531914882SAlex Richardson     return -1;
9631914882SAlex Richardson }
9731914882SAlex Richardson 
9831914882SAlex Richardson static uint16_t (*CKSUM_FP)(const void *, uint32_t);
9931914882SAlex Richardson static volatile uint16_t SINK;
10031914882SAlex Richardson 
10131914882SAlex Richardson static bool
verify(const void * data,uint32_t offset,uint32_t size)10231914882SAlex Richardson verify(const void *data, uint32_t offset, uint32_t size)
10331914882SAlex Richardson {
10431914882SAlex Richardson 
10531914882SAlex Richardson     uint16_t csum_expected = checksum_simple(data, size);
10631914882SAlex Richardson     uint16_t csum_actual = CKSUM_FP(data, size);
10731914882SAlex Richardson     if (csum_actual != csum_expected)
10831914882SAlex Richardson     {
10931914882SAlex Richardson 	fprintf(stderr, "\nInvalid checksum for offset %u size %u: "
11031914882SAlex Richardson 		"actual %04x expected %04x (valid)",
11131914882SAlex Richardson 		offset, size, csum_actual, csum_expected);
11231914882SAlex Richardson 	if (size < 65536)
11331914882SAlex Richardson 	{
11431914882SAlex Richardson 	    /* Fatal error */
11531914882SAlex Richardson 	    exit(EXIT_FAILURE);
11631914882SAlex Richardson 	}
11731914882SAlex Richardson 	/* Else some implementations only support sizes up to 2^16 */
11831914882SAlex Richardson 	return false;
11931914882SAlex Richardson     }
12031914882SAlex Richardson     return true;
12131914882SAlex Richardson }
12231914882SAlex Richardson 
12331914882SAlex Richardson static uint64_t
clock_get_ns(void)12431914882SAlex Richardson clock_get_ns(void)
12531914882SAlex Richardson {
12631914882SAlex Richardson     struct timespec ts;
12731914882SAlex Richardson     clock_gettime(CLOCK_MONOTONIC, &ts);
12831914882SAlex Richardson     return ts.tv_sec * (uint64_t) 1000000000 + ts.tv_nsec;
12931914882SAlex Richardson }
13031914882SAlex Richardson 
13131914882SAlex Richardson static void
benchmark(const uint8_t * base,size_t poolsize,uint32_t blksize,uint32_t numops,uint64_t cpufreq)13231914882SAlex Richardson benchmark(const uint8_t *base,
13331914882SAlex Richardson 	  size_t poolsize,
13431914882SAlex Richardson 	  uint32_t blksize,
13531914882SAlex Richardson 	  uint32_t numops,
13631914882SAlex Richardson 	  uint64_t cpufreq)
13731914882SAlex Richardson {
13831914882SAlex Richardson     printf("%11u ", (unsigned int) blksize); fflush(stdout);
13931914882SAlex Richardson 
14031914882SAlex Richardson     uint64_t start = clock_get_ns();
14131914882SAlex Richardson     for (uint32_t i = 0; i < numops; i ++)
14231914882SAlex Richardson     {
14331914882SAlex Richardson 	/* Read a random value from the pool */
14431914882SAlex Richardson 	uint32_t random = ((uint32_t *) base)[i % (poolsize / 4)];
14531914882SAlex Richardson 	/* Generate a random starting address */
14631914882SAlex Richardson 	const void *data = &base[random % (poolsize - blksize)];
14731914882SAlex Richardson 	SINK = CKSUM_FP(data, blksize);
14831914882SAlex Richardson     }
14931914882SAlex Richardson     uint64_t end = clock_get_ns();
15031914882SAlex Richardson 
15131914882SAlex Richardson #define MEGABYTE 1000000 /* Decimal megabyte (MB) */
15231914882SAlex Richardson     uint64_t elapsed_ns = end - start;
15331914882SAlex Richardson     uint64_t elapsed_ms = elapsed_ns / 1000000;
15431914882SAlex Richardson     uint32_t blks_per_s = (uint32_t) ((numops / elapsed_ms) * 1000);
15531914882SAlex Richardson     uint64_t accbytes = (uint64_t) numops * blksize;
15631914882SAlex Richardson     printf("%11ju ", (uintmax_t) ((accbytes / elapsed_ms) * 1000) / MEGABYTE);
15731914882SAlex Richardson     unsigned int cyc_per_blk = cpufreq / blks_per_s;
15831914882SAlex Richardson     printf("%11u ", cyc_per_blk);
15931914882SAlex Richardson     if (blksize != 0)
16031914882SAlex Richardson     {
16131914882SAlex Richardson 	unsigned int cyc_per_byte = 1000 * cyc_per_blk / blksize;
16231914882SAlex Richardson 	printf("%7u.%03u ",
16331914882SAlex Richardson 		cyc_per_byte / 1000, cyc_per_byte % 1000);
16431914882SAlex Richardson     }
16531914882SAlex Richardson     printf("\n");
16631914882SAlex Richardson }
16731914882SAlex Richardson 
main(int argc,char * argv[])16831914882SAlex Richardson int main(int argc, char *argv[])
16931914882SAlex Richardson {
17031914882SAlex Richardson     int c;
17131914882SAlex Richardson     bool DUMP = false;
17231914882SAlex Richardson     uint32_t IMPL = 0;/* Simple implementation */
17331914882SAlex Richardson     uint64_t CPUFREQ = 0;
17431914882SAlex Richardson     uint32_t BLKSIZE = 0;
17531914882SAlex Richardson     uint32_t NUMOPS = 1000000;
17631914882SAlex Richardson     uint32_t POOLSIZE = 512 * 1024;/* Typical ARM L2 cache size */
17731914882SAlex Richardson 
17831914882SAlex Richardson     setvbuf(stdout, NULL, _IOLBF, 160);
17931914882SAlex Richardson     while ((c = getopt(argc, argv, "b:df:i:n:p:")) != -1)
18031914882SAlex Richardson     {
18131914882SAlex Richardson 	switch (c)
18231914882SAlex Richardson 	{
18331914882SAlex Richardson 	    case 'b' :
18431914882SAlex Richardson 		{
18531914882SAlex Richardson 		    int blksize = atoi(optarg);
18631914882SAlex Richardson 		    if (blksize < 1 || blksize > POOLSIZE / 2)
18731914882SAlex Richardson 		    {
18831914882SAlex Richardson 			fprintf(stderr, "Invalid block size %d\n", blksize);
18931914882SAlex Richardson 			exit(EXIT_FAILURE);
19031914882SAlex Richardson 		    }
19131914882SAlex Richardson 		    BLKSIZE = (unsigned) blksize;
19231914882SAlex Richardson 		    break;
19331914882SAlex Richardson 		}
19431914882SAlex Richardson 	    case 'd' :
19531914882SAlex Richardson 		DUMP = true;
19631914882SAlex Richardson 		break;
19731914882SAlex Richardson 	    case 'f' :
19831914882SAlex Richardson 		{
19931914882SAlex Richardson 		    int64_t cpufreq = atoll(optarg);
20031914882SAlex Richardson 		    if (cpufreq < 1)
20131914882SAlex Richardson 		    {
20231914882SAlex Richardson 			fprintf(stderr, "Invalid CPU frequency %"PRId64"\n",
20331914882SAlex Richardson 				cpufreq);
20431914882SAlex Richardson 			exit(EXIT_FAILURE);
20531914882SAlex Richardson 		    }
20631914882SAlex Richardson 		    CPUFREQ = cpufreq;
20731914882SAlex Richardson 		    break;
20831914882SAlex Richardson 		}
20931914882SAlex Richardson 	    case 'i' :
21031914882SAlex Richardson 		{
21131914882SAlex Richardson 		    int impl = find_impl(optarg);
21231914882SAlex Richardson 		    if (impl < 0)
21331914882SAlex Richardson 		    {
21431914882SAlex Richardson 			fprintf(stderr, "Invalid implementation %s\n", optarg);
21531914882SAlex Richardson 			goto usage;
21631914882SAlex Richardson 		    }
21731914882SAlex Richardson 		    IMPL = (unsigned) impl;
21831914882SAlex Richardson 		    break;
21931914882SAlex Richardson 		}
22031914882SAlex Richardson 	    case 'n' :
22131914882SAlex Richardson 		{
22231914882SAlex Richardson 		    int numops = atoi(optarg);
22331914882SAlex Richardson 		    if (numops < 1)
22431914882SAlex Richardson 		    {
22531914882SAlex Richardson 			fprintf(stderr, "Invalid number of operations %d\n", numops);
22631914882SAlex Richardson 			exit(EXIT_FAILURE);
22731914882SAlex Richardson 		    }
22831914882SAlex Richardson 		    NUMOPS = (unsigned) numops;
22931914882SAlex Richardson 		    break;
23031914882SAlex Richardson 		}
23131914882SAlex Richardson 	    case 'p' :
23231914882SAlex Richardson 		{
23331914882SAlex Richardson 		    int poolsize = atoi(optarg);
23431914882SAlex Richardson 		    if (poolsize < 4096)
23531914882SAlex Richardson 		    {
23631914882SAlex Richardson 			fprintf(stderr, "Invalid pool size %d\n", poolsize);
23731914882SAlex Richardson 			exit(EXIT_FAILURE);
23831914882SAlex Richardson 		    }
23931914882SAlex Richardson 		    char c = optarg[strlen(optarg) - 1];
24031914882SAlex Richardson 		    if (c == 'M')
24131914882SAlex Richardson 		    {
24231914882SAlex Richardson 			POOLSIZE = (unsigned) poolsize * 1024 * 1024;
24331914882SAlex Richardson 		    }
24431914882SAlex Richardson 		    else if (c == 'K')
24531914882SAlex Richardson 		    {
24631914882SAlex Richardson 			POOLSIZE = (unsigned) poolsize * 1024;
24731914882SAlex Richardson 		    }
24831914882SAlex Richardson 		    else
24931914882SAlex Richardson 		    {
25031914882SAlex Richardson 			POOLSIZE = (unsigned) poolsize;
25131914882SAlex Richardson 		    }
25231914882SAlex Richardson 		    break;
25331914882SAlex Richardson 		}
25431914882SAlex Richardson 	    default :
25531914882SAlex Richardson usage :
25631914882SAlex Richardson 		fprintf(stderr, "Usage: checksum <options>\n"
25731914882SAlex Richardson 			"-b <blksize>    Block size\n"
25831914882SAlex Richardson 			"-d              Dump first 96 bytes of data\n"
25931914882SAlex Richardson 			"-f <cpufreq>    CPU frequency (Hz)\n"
26031914882SAlex Richardson 			"-i <impl>       Implementation\n"
26131914882SAlex Richardson 			"-n <numops>     Number of operations\n"
26231914882SAlex Richardson 			"-p <poolsize>   Pool size (K or M suffix)\n"
26331914882SAlex Richardson 		       );
26431914882SAlex Richardson 		printf("Implementations:");
26531914882SAlex Richardson 		for (int i = 0; implementations[i].name != NULL; i++)
26631914882SAlex Richardson 		{
26731914882SAlex Richardson 		    printf(" %s", implementations[i].name);
26831914882SAlex Richardson 		}
26931914882SAlex Richardson 		printf("\n");
27031914882SAlex Richardson 		exit(EXIT_FAILURE);
27131914882SAlex Richardson 	}
27231914882SAlex Richardson     }
27331914882SAlex Richardson     if (optind > argc)
27431914882SAlex Richardson     {
27531914882SAlex Richardson 	goto usage;
27631914882SAlex Richardson     }
27731914882SAlex Richardson 
27831914882SAlex Richardson     CKSUM_FP = implementations[IMPL].cksum_fp;
27931914882SAlex Richardson     POOLSIZE = ALIGN(POOLSIZE, CACHE_LINE);
28031914882SAlex Richardson     uint8_t *base = mmap(0, POOLSIZE, PROT_READ|PROT_WRITE,
28131914882SAlex Richardson 			MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
28231914882SAlex Richardson     if (base == MAP_FAILED)
28331914882SAlex Richardson     {
28431914882SAlex Richardson 	perror("aligned_alloc"), exit(EXIT_FAILURE);
28531914882SAlex Richardson     }
28631914882SAlex Richardson     for (size_t i = 0; i < POOLSIZE / 4; i++)
28731914882SAlex Richardson     {
28831914882SAlex Richardson 	((uint32_t *) base)[i] = rand();
28931914882SAlex Richardson     }
29031914882SAlex Richardson 
29131914882SAlex Richardson     printf("Implementation: %s\n", implementations[IMPL].name);
29231914882SAlex Richardson     printf("numops %u, poolsize ", NUMOPS);
29331914882SAlex Richardson     if (POOLSIZE % (1024 * 1024) == 0)
29431914882SAlex Richardson     {
29531914882SAlex Richardson 	printf("%uMiB", POOLSIZE / (1024 * 1024));
29631914882SAlex Richardson     }
29731914882SAlex Richardson     else if (POOLSIZE % 1024 == 0)
29831914882SAlex Richardson     {
29931914882SAlex Richardson 	printf("%uKiB", POOLSIZE / 1024);
30031914882SAlex Richardson     }
30131914882SAlex Richardson     else
30231914882SAlex Richardson     {
30331914882SAlex Richardson 	printf("%uB", POOLSIZE);
30431914882SAlex Richardson     }
30531914882SAlex Richardson     printf(", blocksize %u, CPU frequency %juMHz\n",
30631914882SAlex Richardson 	   BLKSIZE, (uintmax_t) (CPUFREQ / 1000000));
30731914882SAlex Richardson #if WANT_ASSERT
30831914882SAlex Richardson     printf("Warning: assertions are enabled\n");
30931914882SAlex Richardson #endif
31031914882SAlex Richardson 
31131914882SAlex Richardson     if (DUMP)
31231914882SAlex Richardson     {
31331914882SAlex Richardson 	/* Print out first 96 bytes of data for human debugging */
31431914882SAlex Richardson 	for (int i = 0; i < 96; i++)
31531914882SAlex Richardson 	{
31631914882SAlex Richardson 	    if (i % 8 == 0)
31731914882SAlex Richardson 		printf("%2u:", i);
31831914882SAlex Richardson 	    printf(" %02x", base[i]);
31931914882SAlex Richardson 	    if (i % 8 == 7)
32031914882SAlex Richardson 		printf("\n");
32131914882SAlex Richardson 	}
32231914882SAlex Richardson     }
32331914882SAlex Richardson 
32431914882SAlex Richardson     /* Verify that chosen algorithm handles all combinations of offsets and sizes */
32531914882SAlex Richardson     printf("Verifying..."); fflush(stdout);
32631914882SAlex Richardson     bool success = true;
32731914882SAlex Richardson     /* Check all (relevant) combinations of size and offset */
32831914882SAlex Richardson     for (int size = 0; size <= 256; size++)
32931914882SAlex Richardson     {
33031914882SAlex Richardson 	for (int offset = 0; offset < 255; offset++)
33131914882SAlex Richardson 	{
33231914882SAlex Richardson 	    /* Check at start of mapped memory */
33331914882SAlex Richardson 	    success &= verify(&base[offset], offset, size);
33431914882SAlex Richardson 	    /* Check at end of mapped memory */
33531914882SAlex Richardson 	    uint8_t *p = base + POOLSIZE - (size + offset);
33631914882SAlex Richardson 	    success &= verify(p, (uintptr_t) p % 64, size);
33731914882SAlex Richardson 	}
33831914882SAlex Richardson     }
33931914882SAlex Richardson     /* Check increasingly larger sizes */
34031914882SAlex Richardson     for (size_t size = 1; size < POOLSIZE; size *= 2)
34131914882SAlex Richardson     {
34231914882SAlex Richardson 	success &= verify(base, 0, size);
34331914882SAlex Richardson     }
34431914882SAlex Richardson     /* Check the full size, this can detect accumulator overflows */
34531914882SAlex Richardson     success &= verify(base, 0, POOLSIZE);
34631914882SAlex Richardson     printf("%s\n", success ? "OK" : "failure");
34731914882SAlex Richardson 
34831914882SAlex Richardson     /* Print throughput in decimal megabyte (1000000B) per second */
34931914882SAlex Richardson     if (CPUFREQ != 0)
35031914882SAlex Richardson     {
35131914882SAlex Richardson 	printf("%11s %11s %11s %11s\n",
35231914882SAlex Richardson 	       "block size", "MB/s", "cycles/blk", "cycles/byte");
35331914882SAlex Richardson     }
35431914882SAlex Richardson     else
35531914882SAlex Richardson     {
35631914882SAlex Richardson 	printf("%11s %11s %11s %11s\n",
35731914882SAlex Richardson 	       "block size", "MB/s", "ns/blk", "ns/byte");
35831914882SAlex Richardson 	CPUFREQ = 1000000000;
35931914882SAlex Richardson     }
36031914882SAlex Richardson     if (BLKSIZE != 0)
36131914882SAlex Richardson     {
36231914882SAlex Richardson 	benchmark(base, POOLSIZE, BLKSIZE, NUMOPS, CPUFREQ);
36331914882SAlex Richardson     }
36431914882SAlex Richardson     else
36531914882SAlex Richardson     {
36631914882SAlex Richardson 	static const uint16_t sizes[] =
36731914882SAlex Richardson 	    { 20, 42, 102, 250, 612, 1500, 3674, 9000, 0 };
36831914882SAlex Richardson 	for (int i = 0; sizes[i] != 0; i++)
36931914882SAlex Richardson 	{
37031914882SAlex Richardson 	    uint32_t numops = NUMOPS * 10000 / (40 + sizes[i]);
37131914882SAlex Richardson 	    benchmark(base, POOLSIZE, sizes[i], numops, CPUFREQ);
37231914882SAlex Richardson 	}
37331914882SAlex Richardson     }
37431914882SAlex Richardson 
37531914882SAlex Richardson     if (munmap(base, POOLSIZE) != 0)
37631914882SAlex Richardson     {
37731914882SAlex Richardson 	perror("munmap"), exit(EXIT_FAILURE);
37831914882SAlex Richardson     }
37931914882SAlex Richardson 
38031914882SAlex Richardson     return success ? EXIT_SUCCESS : EXIT_FAILURE;
38131914882SAlex Richardson }
382