xref: /freebsd-src/contrib/arm-optimized-routines/string/bench/strlen.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
131914882SAlex Richardson /*
231914882SAlex Richardson  * strlen benchmark.
331914882SAlex Richardson  *
4d49ad206SAndrew Turner  * Copyright (c) 2020-2021, Arm Limited.
5072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
831914882SAlex Richardson #define _GNU_SOURCE
931914882SAlex Richardson #include <stdint.h>
1031914882SAlex Richardson #include <stdio.h>
1131914882SAlex Richardson #include <string.h>
1231914882SAlex Richardson #include <assert.h>
1331914882SAlex Richardson #include "stringlib.h"
1431914882SAlex Richardson #include "benchlib.h"
1531914882SAlex Richardson 
16d49ad206SAndrew Turner #define ITERS 5000
17*f3087befSAndrew Turner #define ITERS2 40000000
18*f3087befSAndrew Turner #define ITERS3 4000000
19*f3087befSAndrew Turner #define NUM_TESTS 65536
2031914882SAlex Richardson 
2131914882SAlex Richardson #define MAX_ALIGN 32
22*f3087befSAndrew Turner #define MAX_STRLEN 128
2331914882SAlex Richardson 
2431914882SAlex Richardson static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
2531914882SAlex Richardson 
26*f3087befSAndrew Turner #define DOTEST(STR,TESTFN)			\
27*f3087befSAndrew Turner   printf (STR);					\
28*f3087befSAndrew Turner   RUN (TESTFN, strlen);				\
29*f3087befSAndrew Turner   RUNA64 (TESTFN, __strlen_aarch64);		\
30*f3087befSAndrew Turner   RUNA64 (TESTFN, __strlen_aarch64_mte);	\
31*f3087befSAndrew Turner   RUNSVE (TESTFN, __strlen_aarch64_sve);	\
32*f3087befSAndrew Turner   RUNT32 (TESTFN, __strlen_armv6t2);		\
33*f3087befSAndrew Turner   printf ("\n");
3431914882SAlex Richardson 
35d49ad206SAndrew Turner static uint16_t strlen_tests[NUM_TESTS];
3631914882SAlex Richardson 
3731914882SAlex Richardson typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
3831914882SAlex Richardson typedef struct { uint8_t align; uint16_t freq; } align_data_t;
3931914882SAlex Richardson 
4031914882SAlex Richardson #define SIZE_NUM 65536
4131914882SAlex Richardson #define SIZE_MASK (SIZE_NUM - 1)
4231914882SAlex Richardson static uint8_t strlen_len_arr[SIZE_NUM];
4331914882SAlex Richardson 
4431914882SAlex Richardson /* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
4531914882SAlex Richardson static freq_data_t strlen_len_freq[] =
4631914882SAlex Richardson {
4731914882SAlex Richardson   { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
4831914882SAlex Richardson   { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
4931914882SAlex Richardson   {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
5031914882SAlex Richardson   { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
5131914882SAlex Richardson   { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
5231914882SAlex Richardson   { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
5331914882SAlex Richardson   { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
5431914882SAlex Richardson   { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
5531914882SAlex Richardson   { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
5631914882SAlex Richardson   { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
5731914882SAlex Richardson   { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
5831914882SAlex Richardson   {107,    1}, { 0,     0}
5931914882SAlex Richardson };
6031914882SAlex Richardson 
6131914882SAlex Richardson #define ALIGN_NUM 1024
6231914882SAlex Richardson #define ALIGN_MASK (ALIGN_NUM - 1)
6331914882SAlex Richardson static uint8_t strlen_align_arr[ALIGN_NUM];
6431914882SAlex Richardson 
6531914882SAlex Richardson /* Alignment data for strlen based on SPEC2017.  */
6631914882SAlex Richardson static align_data_t string_align_freq[] =
6731914882SAlex Richardson {
6831914882SAlex Richardson   {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
6931914882SAlex Richardson };
7031914882SAlex Richardson 
7131914882SAlex Richardson static void
7231914882SAlex Richardson init_strlen_distribution (void)
7331914882SAlex Richardson {
7431914882SAlex Richardson   int i, j, freq, size, n;
7531914882SAlex Richardson 
7631914882SAlex Richardson   for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
7731914882SAlex Richardson     for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
7831914882SAlex Richardson       strlen_len_arr[n++] = size;
7931914882SAlex Richardson   assert (n == SIZE_NUM);
8031914882SAlex Richardson 
8131914882SAlex Richardson   for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
8231914882SAlex Richardson     for (j = 0, size = string_align_freq[i].align; j < freq; j++)
8331914882SAlex Richardson       strlen_align_arr[n++] = size;
8431914882SAlex Richardson   assert (n == ALIGN_NUM);
8531914882SAlex Richardson }
8631914882SAlex Richardson 
8731914882SAlex Richardson static void
8831914882SAlex Richardson init_strlen_tests (void)
8931914882SAlex Richardson {
9031914882SAlex Richardson   uint16_t index[MAX_ALIGN];
9131914882SAlex Richardson 
9231914882SAlex Richardson   memset (a, 'x', sizeof (a));
9331914882SAlex Richardson 
9431914882SAlex Richardson   /* Create indices for strings at all alignments.  */
9531914882SAlex Richardson   for (int i = 0; i < MAX_ALIGN; i++)
9631914882SAlex Richardson     {
9731914882SAlex Richardson       index[i] = i * (MAX_STRLEN + 1);
9831914882SAlex Richardson       a[index[i] + MAX_STRLEN] = 0;
9931914882SAlex Richardson     }
10031914882SAlex Richardson 
10131914882SAlex Richardson   /* Create a random set of strlen input strings using the string length
10231914882SAlex Richardson      and alignment distributions.  */
103d49ad206SAndrew Turner   for (int n = 0; n < NUM_TESTS; n++)
10431914882SAlex Richardson     {
10531914882SAlex Richardson       int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
10631914882SAlex Richardson       int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
10731914882SAlex Richardson 
10831914882SAlex Richardson       strlen_tests[n] =
10931914882SAlex Richardson 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
110*f3087befSAndrew Turner       assert ((strlen_tests[n] & (align - 1)) == 0);
111*f3087befSAndrew Turner       assert (strlen (a + strlen_tests[n]) == exp_len);
11231914882SAlex Richardson     }
11331914882SAlex Richardson }
11431914882SAlex Richardson 
11531914882SAlex Richardson static volatile size_t maskv = 0;
11631914882SAlex Richardson 
117*f3087befSAndrew Turner static void inline __attribute ((always_inline))
118*f3087befSAndrew Turner strlen_random (const char *name, size_t (*fn)(const char *))
11931914882SAlex Richardson {
120*f3087befSAndrew Turner   size_t res = 0, mask = maskv;
121*f3087befSAndrew Turner   uint64_t strlen_size = 0;
122*f3087befSAndrew Turner   printf ("%22s ", name);
12331914882SAlex Richardson 
124d49ad206SAndrew Turner   for (int c = 0; c < NUM_TESTS; c++)
125*f3087befSAndrew Turner     strlen_size += fn (a + strlen_tests[c]) + 1;
12631914882SAlex Richardson   strlen_size *= ITERS;
12731914882SAlex Richardson 
128*f3087befSAndrew Turner   /* Measure throughput of strlen.  */
12931914882SAlex Richardson   uint64_t t = clock_get_ns ();
13031914882SAlex Richardson   for (int i = 0; i < ITERS; i++)
131d49ad206SAndrew Turner     for (int c = 0; c < NUM_TESTS; c++)
132*f3087befSAndrew Turner       res += fn (a + strlen_tests[c]);
13331914882SAlex Richardson   t = clock_get_ns () - t;
134*f3087befSAndrew Turner   printf ("tp: %.3f ", (double)strlen_size / t);
135*f3087befSAndrew Turner 
136*f3087befSAndrew Turner   /* Measure latency of strlen result with (res & mask).  */
137*f3087befSAndrew Turner   t = clock_get_ns ();
138*f3087befSAndrew Turner   for (int i = 0; i < ITERS; i++)
139*f3087befSAndrew Turner     for (int c = 0; c < NUM_TESTS; c++)
140*f3087befSAndrew Turner       res += fn (a + strlen_tests[c] + (res & mask));
141*f3087befSAndrew Turner   t = clock_get_ns () - t;
142*f3087befSAndrew Turner   printf ("lat: %.3f\n", (double)strlen_size / t);
143*f3087befSAndrew Turner   maskv = res & mask;
14431914882SAlex Richardson }
14531914882SAlex Richardson 
146*f3087befSAndrew Turner static void inline __attribute ((always_inline))
147*f3087befSAndrew Turner strlen_small_aligned (const char *name, size_t (*fn)(const char *))
14831914882SAlex Richardson {
149*f3087befSAndrew Turner   printf ("%22s ", name);
15031914882SAlex Richardson 
151*f3087befSAndrew Turner   size_t res = 0, mask = maskv;
15231914882SAlex Richardson   for (int size = 1; size <= 64; size *= 2)
15331914882SAlex Richardson     {
15431914882SAlex Richardson       memset (a, 'x', size);
15531914882SAlex Richardson       a[size - 1] = 0;
15631914882SAlex Richardson 
15731914882SAlex Richardson       uint64_t t = clock_get_ns ();
15831914882SAlex Richardson       for (int i = 0; i < ITERS2; i++)
159*f3087befSAndrew Turner 	res += fn (a + (i & mask));
16031914882SAlex Richardson       t = clock_get_ns () - t;
161*f3087befSAndrew Turner       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
16231914882SAlex Richardson 	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
16331914882SAlex Richardson     }
164*f3087befSAndrew Turner   maskv &= res;
16531914882SAlex Richardson   printf ("\n");
16631914882SAlex Richardson }
16731914882SAlex Richardson 
168*f3087befSAndrew Turner static void inline __attribute ((always_inline))
169*f3087befSAndrew Turner strlen_small_unaligned (const char *name, size_t (*fn)(const char *))
17031914882SAlex Richardson {
171*f3087befSAndrew Turner   printf ("%22s ", name);
17231914882SAlex Richardson 
173*f3087befSAndrew Turner   size_t res = 0, mask = maskv;
17431914882SAlex Richardson   int align = 9;
17531914882SAlex Richardson   for (int size = 1; size <= 64; size *= 2)
17631914882SAlex Richardson     {
17731914882SAlex Richardson       memset (a + align, 'x', size);
17831914882SAlex Richardson       a[align + size - 1] = 0;
17931914882SAlex Richardson 
18031914882SAlex Richardson       uint64_t t = clock_get_ns ();
18131914882SAlex Richardson       for (int i = 0; i < ITERS2; i++)
182*f3087befSAndrew Turner 	res += fn (a + align + (i & mask));
18331914882SAlex Richardson       t = clock_get_ns () - t;
184*f3087befSAndrew Turner       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
18531914882SAlex Richardson 	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
18631914882SAlex Richardson     }
187*f3087befSAndrew Turner   maskv &= res;
18831914882SAlex Richardson   printf ("\n");
18931914882SAlex Richardson }
19031914882SAlex Richardson 
191*f3087befSAndrew Turner static void inline __attribute ((always_inline))
192*f3087befSAndrew Turner strlen_medium (const char *name, size_t (*fn)(const char *))
19331914882SAlex Richardson {
194*f3087befSAndrew Turner   printf ("%22s ", name);
19531914882SAlex Richardson 
196*f3087befSAndrew Turner   size_t res = 0, mask = maskv;
19731914882SAlex Richardson   for (int size = 128; size <= 4096; size *= 2)
19831914882SAlex Richardson     {
19931914882SAlex Richardson       memset (a, 'x', size);
20031914882SAlex Richardson       a[size - 1] = 0;
20131914882SAlex Richardson 
20231914882SAlex Richardson       uint64_t t = clock_get_ns ();
20331914882SAlex Richardson       for (int i = 0; i < ITERS3; i++)
204*f3087befSAndrew Turner 	res += fn (a + (i & mask));
20531914882SAlex Richardson       t = clock_get_ns () - t;
206*f3087befSAndrew Turner       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
20731914882SAlex Richardson 	      size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
20831914882SAlex Richardson     }
209*f3087befSAndrew Turner   maskv &= res;
21031914882SAlex Richardson   printf ("\n");
21131914882SAlex Richardson }
21231914882SAlex Richardson 
213*f3087befSAndrew Turner int main (void)
214*f3087befSAndrew Turner {
215*f3087befSAndrew Turner   rand32 (0x12345678);
216*f3087befSAndrew Turner   init_strlen_distribution ();
217*f3087befSAndrew Turner   init_strlen_tests ();
218*f3087befSAndrew Turner 
219*f3087befSAndrew Turner   DOTEST ("Random strlen (bytes/ns):\n", strlen_random);
220*f3087befSAndrew Turner   DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);
221*f3087befSAndrew Turner   DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);
222*f3087befSAndrew Turner   DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);
22331914882SAlex Richardson 
22431914882SAlex Richardson   return 0;
22531914882SAlex Richardson }
226