131914882SAlex Richardson /* 231914882SAlex Richardson * strlen benchmark. 331914882SAlex Richardson * 4d49ad206SAndrew Turner * Copyright (c) 2020-2021, Arm Limited. 5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson #define _GNU_SOURCE 931914882SAlex Richardson #include <stdint.h> 1031914882SAlex Richardson #include <stdio.h> 1131914882SAlex Richardson #include <string.h> 1231914882SAlex Richardson #include <assert.h> 1331914882SAlex Richardson #include "stringlib.h" 1431914882SAlex Richardson #include "benchlib.h" 1531914882SAlex Richardson 16d49ad206SAndrew Turner #define ITERS 5000 17*f3087befSAndrew Turner #define ITERS2 40000000 18*f3087befSAndrew Turner #define ITERS3 4000000 19*f3087befSAndrew Turner #define NUM_TESTS 65536 2031914882SAlex Richardson 2131914882SAlex Richardson #define MAX_ALIGN 32 22*f3087befSAndrew Turner #define MAX_STRLEN 128 2331914882SAlex Richardson 2431914882SAlex Richardson static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096))); 2531914882SAlex Richardson 26*f3087befSAndrew Turner #define DOTEST(STR,TESTFN) \ 27*f3087befSAndrew Turner printf (STR); \ 28*f3087befSAndrew Turner RUN (TESTFN, strlen); \ 29*f3087befSAndrew Turner RUNA64 (TESTFN, __strlen_aarch64); \ 30*f3087befSAndrew Turner RUNA64 (TESTFN, __strlen_aarch64_mte); \ 31*f3087befSAndrew Turner RUNSVE (TESTFN, __strlen_aarch64_sve); \ 32*f3087befSAndrew Turner RUNT32 (TESTFN, __strlen_armv6t2); \ 33*f3087befSAndrew Turner printf ("\n"); 3431914882SAlex Richardson 35d49ad206SAndrew Turner static uint16_t strlen_tests[NUM_TESTS]; 3631914882SAlex Richardson 3731914882SAlex Richardson typedef struct { uint16_t size; uint16_t freq; } freq_data_t; 3831914882SAlex Richardson typedef struct { uint8_t align; uint16_t freq; } align_data_t; 3931914882SAlex Richardson 4031914882SAlex Richardson #define SIZE_NUM 65536 4131914882SAlex Richardson #define SIZE_MASK (SIZE_NUM - 1) 4231914882SAlex Richardson static uint8_t strlen_len_arr[SIZE_NUM]; 4331914882SAlex Richardson 4431914882SAlex Richardson /* Frequency data for strlen sizes up to 128 based on SPEC2017. */ 4531914882SAlex Richardson static freq_data_t strlen_len_freq[] = 4631914882SAlex Richardson { 4731914882SAlex Richardson { 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115}, 4831914882SAlex Richardson { 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418}, 4931914882SAlex Richardson { 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79}, 5031914882SAlex Richardson { 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21}, 5131914882SAlex Richardson { 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9}, 5231914882SAlex Richardson { 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5}, 5331914882SAlex Richardson { 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2}, 5431914882SAlex Richardson { 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1}, 5531914882SAlex Richardson { 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1}, 5631914882SAlex Richardson { 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1}, 5731914882SAlex Richardson { 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1}, 5831914882SAlex Richardson {107, 1}, { 0, 0} 5931914882SAlex Richardson }; 6031914882SAlex Richardson 6131914882SAlex Richardson #define ALIGN_NUM 1024 6231914882SAlex Richardson #define ALIGN_MASK (ALIGN_NUM - 1) 6331914882SAlex Richardson static uint8_t strlen_align_arr[ALIGN_NUM]; 6431914882SAlex Richardson 6531914882SAlex Richardson /* Alignment data for strlen based on SPEC2017. */ 6631914882SAlex Richardson static align_data_t string_align_freq[] = 6731914882SAlex Richardson { 6831914882SAlex Richardson {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0} 6931914882SAlex Richardson }; 7031914882SAlex Richardson 7131914882SAlex Richardson static void 7231914882SAlex Richardson init_strlen_distribution (void) 7331914882SAlex Richardson { 7431914882SAlex Richardson int i, j, freq, size, n; 7531914882SAlex Richardson 7631914882SAlex Richardson for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++) 7731914882SAlex Richardson for (j = 0, size = strlen_len_freq[i].size; j < freq; j++) 7831914882SAlex Richardson strlen_len_arr[n++] = size; 7931914882SAlex Richardson assert (n == SIZE_NUM); 8031914882SAlex Richardson 8131914882SAlex Richardson for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++) 8231914882SAlex Richardson for (j = 0, size = string_align_freq[i].align; j < freq; j++) 8331914882SAlex Richardson strlen_align_arr[n++] = size; 8431914882SAlex Richardson assert (n == ALIGN_NUM); 8531914882SAlex Richardson } 8631914882SAlex Richardson 8731914882SAlex Richardson static void 8831914882SAlex Richardson init_strlen_tests (void) 8931914882SAlex Richardson { 9031914882SAlex Richardson uint16_t index[MAX_ALIGN]; 9131914882SAlex Richardson 9231914882SAlex Richardson memset (a, 'x', sizeof (a)); 9331914882SAlex Richardson 9431914882SAlex Richardson /* Create indices for strings at all alignments. */ 9531914882SAlex Richardson for (int i = 0; i < MAX_ALIGN; i++) 9631914882SAlex Richardson { 9731914882SAlex Richardson index[i] = i * (MAX_STRLEN + 1); 9831914882SAlex Richardson a[index[i] + MAX_STRLEN] = 0; 9931914882SAlex Richardson } 10031914882SAlex Richardson 10131914882SAlex Richardson /* Create a random set of strlen input strings using the string length 10231914882SAlex Richardson and alignment distributions. */ 103d49ad206SAndrew Turner for (int n = 0; n < NUM_TESTS; n++) 10431914882SAlex Richardson { 10531914882SAlex Richardson int align = strlen_align_arr[rand32 (0) & ALIGN_MASK]; 10631914882SAlex Richardson int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK]; 10731914882SAlex Richardson 10831914882SAlex Richardson strlen_tests[n] = 10931914882SAlex Richardson index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len; 110*f3087befSAndrew Turner assert ((strlen_tests[n] & (align - 1)) == 0); 111*f3087befSAndrew Turner assert (strlen (a + strlen_tests[n]) == exp_len); 11231914882SAlex Richardson } 11331914882SAlex Richardson } 11431914882SAlex Richardson 11531914882SAlex Richardson static volatile size_t maskv = 0; 11631914882SAlex Richardson 117*f3087befSAndrew Turner static void inline __attribute ((always_inline)) 118*f3087befSAndrew Turner strlen_random (const char *name, size_t (*fn)(const char *)) 11931914882SAlex Richardson { 120*f3087befSAndrew Turner size_t res = 0, mask = maskv; 121*f3087befSAndrew Turner uint64_t strlen_size = 0; 122*f3087befSAndrew Turner printf ("%22s ", name); 12331914882SAlex Richardson 124d49ad206SAndrew Turner for (int c = 0; c < NUM_TESTS; c++) 125*f3087befSAndrew Turner strlen_size += fn (a + strlen_tests[c]) + 1; 12631914882SAlex Richardson strlen_size *= ITERS; 12731914882SAlex Richardson 128*f3087befSAndrew Turner /* Measure throughput of strlen. */ 12931914882SAlex Richardson uint64_t t = clock_get_ns (); 13031914882SAlex Richardson for (int i = 0; i < ITERS; i++) 131d49ad206SAndrew Turner for (int c = 0; c < NUM_TESTS; c++) 132*f3087befSAndrew Turner res += fn (a + strlen_tests[c]); 13331914882SAlex Richardson t = clock_get_ns () - t; 134*f3087befSAndrew Turner printf ("tp: %.3f ", (double)strlen_size / t); 135*f3087befSAndrew Turner 136*f3087befSAndrew Turner /* Measure latency of strlen result with (res & mask). */ 137*f3087befSAndrew Turner t = clock_get_ns (); 138*f3087befSAndrew Turner for (int i = 0; i < ITERS; i++) 139*f3087befSAndrew Turner for (int c = 0; c < NUM_TESTS; c++) 140*f3087befSAndrew Turner res += fn (a + strlen_tests[c] + (res & mask)); 141*f3087befSAndrew Turner t = clock_get_ns () - t; 142*f3087befSAndrew Turner printf ("lat: %.3f\n", (double)strlen_size / t); 143*f3087befSAndrew Turner maskv = res & mask; 14431914882SAlex Richardson } 14531914882SAlex Richardson 146*f3087befSAndrew Turner static void inline __attribute ((always_inline)) 147*f3087befSAndrew Turner strlen_small_aligned (const char *name, size_t (*fn)(const char *)) 14831914882SAlex Richardson { 149*f3087befSAndrew Turner printf ("%22s ", name); 15031914882SAlex Richardson 151*f3087befSAndrew Turner size_t res = 0, mask = maskv; 15231914882SAlex Richardson for (int size = 1; size <= 64; size *= 2) 15331914882SAlex Richardson { 15431914882SAlex Richardson memset (a, 'x', size); 15531914882SAlex Richardson a[size - 1] = 0; 15631914882SAlex Richardson 15731914882SAlex Richardson uint64_t t = clock_get_ns (); 15831914882SAlex Richardson for (int i = 0; i < ITERS2; i++) 159*f3087befSAndrew Turner res += fn (a + (i & mask)); 16031914882SAlex Richardson t = clock_get_ns () - t; 161*f3087befSAndrew Turner printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, 16231914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); 16331914882SAlex Richardson } 164*f3087befSAndrew Turner maskv &= res; 16531914882SAlex Richardson printf ("\n"); 16631914882SAlex Richardson } 16731914882SAlex Richardson 168*f3087befSAndrew Turner static void inline __attribute ((always_inline)) 169*f3087befSAndrew Turner strlen_small_unaligned (const char *name, size_t (*fn)(const char *)) 17031914882SAlex Richardson { 171*f3087befSAndrew Turner printf ("%22s ", name); 17231914882SAlex Richardson 173*f3087befSAndrew Turner size_t res = 0, mask = maskv; 17431914882SAlex Richardson int align = 9; 17531914882SAlex Richardson for (int size = 1; size <= 64; size *= 2) 17631914882SAlex Richardson { 17731914882SAlex Richardson memset (a + align, 'x', size); 17831914882SAlex Richardson a[align + size - 1] = 0; 17931914882SAlex Richardson 18031914882SAlex Richardson uint64_t t = clock_get_ns (); 18131914882SAlex Richardson for (int i = 0; i < ITERS2; i++) 182*f3087befSAndrew Turner res += fn (a + align + (i & mask)); 18331914882SAlex Richardson t = clock_get_ns () - t; 184*f3087befSAndrew Turner printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, 18531914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t); 18631914882SAlex Richardson } 187*f3087befSAndrew Turner maskv &= res; 18831914882SAlex Richardson printf ("\n"); 18931914882SAlex Richardson } 19031914882SAlex Richardson 191*f3087befSAndrew Turner static void inline __attribute ((always_inline)) 192*f3087befSAndrew Turner strlen_medium (const char *name, size_t (*fn)(const char *)) 19331914882SAlex Richardson { 194*f3087befSAndrew Turner printf ("%22s ", name); 19531914882SAlex Richardson 196*f3087befSAndrew Turner size_t res = 0, mask = maskv; 19731914882SAlex Richardson for (int size = 128; size <= 4096; size *= 2) 19831914882SAlex Richardson { 19931914882SAlex Richardson memset (a, 'x', size); 20031914882SAlex Richardson a[size - 1] = 0; 20131914882SAlex Richardson 20231914882SAlex Richardson uint64_t t = clock_get_ns (); 20331914882SAlex Richardson for (int i = 0; i < ITERS3; i++) 204*f3087befSAndrew Turner res += fn (a + (i & mask)); 20531914882SAlex Richardson t = clock_get_ns () - t; 206*f3087befSAndrew Turner printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024, 20731914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t); 20831914882SAlex Richardson } 209*f3087befSAndrew Turner maskv &= res; 21031914882SAlex Richardson printf ("\n"); 21131914882SAlex Richardson } 21231914882SAlex Richardson 213*f3087befSAndrew Turner int main (void) 214*f3087befSAndrew Turner { 215*f3087befSAndrew Turner rand32 (0x12345678); 216*f3087befSAndrew Turner init_strlen_distribution (); 217*f3087befSAndrew Turner init_strlen_tests (); 218*f3087befSAndrew Turner 219*f3087befSAndrew Turner DOTEST ("Random strlen (bytes/ns):\n", strlen_random); 220*f3087befSAndrew Turner DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned); 221*f3087befSAndrew Turner DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned); 222*f3087befSAndrew Turner DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium); 22331914882SAlex Richardson 22431914882SAlex Richardson return 0; 22531914882SAlex Richardson } 226