xref: /netbsd-src/external/gpl3/gcc/dist/contrib/bench-stringop (revision fb8a8121f28072308659629b86cfb7c449bd93e1)
1*fb8a8121Smrg#!/bin/bash
2*fb8a8121Smrg
3*fb8a8121Smrg# Script to measure memset and memcpy for different sizes and strategies.
4*fb8a8121Smrg#
5*fb8a8121Smrg# Contributed by Jan Hubicka <jh@suse.cz>
6*fb8a8121Smrg#
7*fb8a8121Smrg# Copyright (C) 2019 Free Software Foundation, Inc.
8*fb8a8121Smrg#
9*fb8a8121Smrg# This file is part of GCC.
10*fb8a8121Smrg#
11*fb8a8121Smrg# GCC is free software; you can redistribute it and/or modify
12*fb8a8121Smrg# it under the terms of the GNU General Public License as published by
13*fb8a8121Smrg# the Free Software Foundation; either version 3, or (at your option)
14*fb8a8121Smrg# any later version.
15*fb8a8121Smrg#
16*fb8a8121Smrg# GCC is distributed in the hope that it will be useful,
17*fb8a8121Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of
18*fb8a8121Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19*fb8a8121Smrg# GNU General Public License for more details.
20*fb8a8121Smrg#
21*fb8a8121Smrg# You should have received a copy of the GNU General Public License
22*fb8a8121Smrg# along with GCC; see the file COPYING.  If not, write to
23*fb8a8121Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
24*fb8a8121Smrg# Boston, MA 02110-1301, USA.
25*fb8a8121Smrg
26*fb8a8121Smrg# This script will search a line starting with 'spawn' that includes the
27*fb8a8121Smrg# pattern you are looking for (typically a source file name).
28*fb8a8121Smrg#
29*fb8a8121Smrg# Once it finds that pattern, it re-executes the whole command
30*fb8a8121Smrg# in the spawn line.  If the pattern matches more than one spawn
31*fb8a8121Smrg# command, it asks which one you want.
32*fb8a8121Smrg
33*fb8a8121Smrgtest()
34*fb8a8121Smrg{
35*fb8a8121Smrgrm -f a.out
36*fb8a8121Smrgcat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize -
37*fb8a8121Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
38*fb8a8121Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
39*fb8a8121Smrg$type t[BUFFER_SIZE];
40*fb8a8121Smrgint main()
41*fb8a8121Smrg{
42*fb8a8121Smrg  unsigned int i;
43*fb8a8121Smrg  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
44*fb8a8121Smrg#ifdef test_memset
45*fb8a8121Smrg    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
46*fb8a8121Smrg#else
47*fb8a8121Smrg    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
48*fb8a8121Smrg#endif
49*fb8a8121Smrg  return 0;
50*fb8a8121Smrg}
51*fb8a8121SmrgEND
52*fb8a8121SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
53*fb8a8121Smrgecho -n " "$TIME
54*fb8a8121Smrgecho $TIME $4 >>/tmp/accum
55*fb8a8121Smrg}
56*fb8a8121Smrg
57*fb8a8121Smrgtest2()
58*fb8a8121Smrg{
59*fb8a8121Smrgrm -f a.out
60*fb8a8121Smrgcat <<END | clang -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize 2>/dev/null -
61*fb8a8121Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
62*fb8a8121Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
63*fb8a8121Smrg$type t[BUFFER_SIZE];
64*fb8a8121Smrgint main()
65*fb8a8121Smrg{
66*fb8a8121Smrg  unsigned int i;
67*fb8a8121Smrg  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
68*fb8a8121Smrg#ifdef test_memset
69*fb8a8121Smrg    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
70*fb8a8121Smrg#else
71*fb8a8121Smrg    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
72*fb8a8121Smrg#endif
73*fb8a8121Smrg  return 0;
74*fb8a8121Smrg}
75*fb8a8121SmrgEND
76*fb8a8121SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
77*fb8a8121Smrgecho -n " "$TIME
78*fb8a8121Smrgecho $TIME $4 >>/tmp/accum
79*fb8a8121Smrg}
80*fb8a8121Smrg
81*fb8a8121Smrgtestrow()
82*fb8a8121Smrg{
83*fb8a8121Smrgecho -n "" >/tmp/accum
84*fb8a8121Smrgprintf "%12i " $3
85*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=libcall" libcall
86*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1
87*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign
88*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4
89*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign
90*fb8a8121Smrgif [ "$mode" == 64 ]
91*fb8a8121Smrgthen
92*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8
93*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign
94*fb8a8121Smrgfi
95*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=loop -malign-stringops"  loop
96*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops"  loopnoalign
97*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl
98*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign
99*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -malign-stringops" sse
100*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -mno-align-stringops -msse2" ssenoalign
101*fb8a8121Smrg#test2 "$2" "$3" ""
102*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=byte_loop" byte
103*fb8a8121Smrgbest=`cat /tmp/accum | sort | head -1`
104*fb8a8121Smrgtest "$2" "$3" " -fprofile-generate" >/dev/null 2>&1
105*fb8a8121Smrgtest "$2" "$3" " -fprofile-use"
106*fb8a8121Smrgtest "$2" "$3" " -minline-stringops-dynamically"
107*fb8a8121Smrgecho "    $best"
108*fb8a8121Smrg}
109*fb8a8121Smrg
110*fb8a8121Smrgtest_all_sizes()
111*fb8a8121Smrg{
112*fb8a8121Smrgif [ "$mode" == 64 ]
113*fb8a8121Smrgthen
114*fb8a8121Smrgecho "  block size  libcall rep1    noalg   rep4    noalg   rep8    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
115*fb8a8121Smrgelse
116*fb8a8121Smrgecho "  block size  libcall rep1    noalg   rep4    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
117*fb8a8121Smrgfi
118*fb8a8121Smrg#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000
119*fb8a8121Smrg#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1
120*fb8a8121Smrgfor size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1
121*fb8a8121Smrg#for size in 128 256 1024 4096 8192 81920 819200
122*fb8a8121Smrgdo
123*fb8a8121Smrgtestrow "$1" "$2" $size
124*fb8a8121Smrgdone
125*fb8a8121Smrg}
126*fb8a8121Smrg
127*fb8a8121Smrgmode=$1
128*fb8a8121Smrgshift
129*fb8a8121Smrgexport memsize=$1
130*fb8a8121Smrgshift
131*fb8a8121Smrgcmdline=$*
132*fb8a8121Smrgif [ "$mode" != 32 ]
133*fb8a8121Smrgthen
134*fb8a8121Smrg  if [ "$mode" != 64 ]
135*fb8a8121Smrg  then
136*fb8a8121Smrg    echo "Usage:"
137*fb8a8121Smrg    echo "test_stringop mode size cmdline"
138*fb8a8121Smrg    echo "mode is either 32 or 64"
139*fb8a8121Smrg    echo "size is amount of memory copied in each test.  Should be chosed small enough so runtime is less than minute for each test and sorting works"
140*fb8a8121Smrg    echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3"
141*fb8a8121Smrg    exit
142*fb8a8121Smrg  fi
143*fb8a8121Smrgfi
144*fb8a8121Smrg
145*fb8a8121Smrgecho "memcpy"
146*fb8a8121Smrgexport STRINGOP=""
147*fb8a8121Smrgtype=char
148*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode"
149*fb8a8121Smrgecho "Aligned"
150*fb8a8121Smrgtype=long
151*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode"
152*fb8a8121Smrgecho "memset"
153*fb8a8121Smrgexport STRINGOP="-Dtest_memset=1"
154*fb8a8121Smrgtype=char
155*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode"
156*fb8a8121Smrgecho "Aligned"
157*fb8a8121Smrgtype=long
158*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode"
159