xref: /netbsd-src/external/gpl3/gcc.old/dist/contrib/bench-stringop (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1*4c3eb207Smrg#!/bin/bash
2*4c3eb207Smrg
3*4c3eb207Smrg# Script to measure memset and memcpy for different sizes and strategies.
4*4c3eb207Smrg#
5*4c3eb207Smrg# Contributed by Jan Hubicka <jh@suse.cz>
6*4c3eb207Smrg#
7*4c3eb207Smrg# Copyright (C) 2019 Free Software Foundation, Inc.
8*4c3eb207Smrg#
9*4c3eb207Smrg# This file is part of GCC.
10*4c3eb207Smrg#
11*4c3eb207Smrg# GCC is free software; you can redistribute it and/or modify
12*4c3eb207Smrg# it under the terms of the GNU General Public License as published by
13*4c3eb207Smrg# the Free Software Foundation; either version 3, or (at your option)
14*4c3eb207Smrg# any later version.
15*4c3eb207Smrg#
16*4c3eb207Smrg# GCC is distributed in the hope that it will be useful,
17*4c3eb207Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of
18*4c3eb207Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19*4c3eb207Smrg# GNU General Public License for more details.
20*4c3eb207Smrg#
21*4c3eb207Smrg# You should have received a copy of the GNU General Public License
22*4c3eb207Smrg# along with GCC; see the file COPYING.  If not, write to
23*4c3eb207Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
24*4c3eb207Smrg# Boston, MA 02110-1301, USA.
25*4c3eb207Smrg
26*4c3eb207Smrg# This script will search a line starting with 'spawn' that includes the
27*4c3eb207Smrg# pattern you are looking for (typically a source file name).
28*4c3eb207Smrg#
29*4c3eb207Smrg# Once it finds that pattern, it re-executes the whole command
30*4c3eb207Smrg# in the spawn line.  If the pattern matches more than one spawn
31*4c3eb207Smrg# command, it asks which one you want.
32*4c3eb207Smrg
33*4c3eb207Smrgtest()
34*4c3eb207Smrg{
35*4c3eb207Smrgrm -f a.out
36*4c3eb207Smrgcat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize -
37*4c3eb207Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
38*4c3eb207Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
39*4c3eb207Smrg$type t[BUFFER_SIZE];
40*4c3eb207Smrgint main()
41*4c3eb207Smrg{
42*4c3eb207Smrg  unsigned int i;
43*4c3eb207Smrg  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
44*4c3eb207Smrg#ifdef test_memset
45*4c3eb207Smrg    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
46*4c3eb207Smrg#else
47*4c3eb207Smrg    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
48*4c3eb207Smrg#endif
49*4c3eb207Smrg  return 0;
50*4c3eb207Smrg}
51*4c3eb207SmrgEND
52*4c3eb207SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
53*4c3eb207Smrgecho -n " "$TIME
54*4c3eb207Smrgecho $TIME $4 >>/tmp/accum
55*4c3eb207Smrg}
56*4c3eb207Smrg
57*4c3eb207Smrgtest2()
58*4c3eb207Smrg{
59*4c3eb207Smrgrm -f a.out
60*4c3eb207Smrgcat <<END | clang -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize 2>/dev/null -
61*4c3eb207Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
62*4c3eb207Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
63*4c3eb207Smrg$type t[BUFFER_SIZE];
64*4c3eb207Smrgint main()
65*4c3eb207Smrg{
66*4c3eb207Smrg  unsigned int i;
67*4c3eb207Smrg  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
68*4c3eb207Smrg#ifdef test_memset
69*4c3eb207Smrg    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
70*4c3eb207Smrg#else
71*4c3eb207Smrg    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
72*4c3eb207Smrg#endif
73*4c3eb207Smrg  return 0;
74*4c3eb207Smrg}
75*4c3eb207SmrgEND
76*4c3eb207SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
77*4c3eb207Smrgecho -n " "$TIME
78*4c3eb207Smrgecho $TIME $4 >>/tmp/accum
79*4c3eb207Smrg}
80*4c3eb207Smrg
81*4c3eb207Smrgtestrow()
82*4c3eb207Smrg{
83*4c3eb207Smrgecho -n "" >/tmp/accum
84*4c3eb207Smrgprintf "%12i " $3
85*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=libcall" libcall
86*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1
87*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign
88*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4
89*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign
90*4c3eb207Smrgif [ "$mode" == 64 ]
91*4c3eb207Smrgthen
92*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8
93*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign
94*4c3eb207Smrgfi
95*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=loop -malign-stringops"  loop
96*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops"  loopnoalign
97*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl
98*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign
99*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -malign-stringops" sse
100*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -mno-align-stringops -msse2" ssenoalign
101*4c3eb207Smrg#test2 "$2" "$3" ""
102*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=byte_loop" byte
103*4c3eb207Smrgbest=`cat /tmp/accum | sort | head -1`
104*4c3eb207Smrgtest "$2" "$3" " -fprofile-generate" >/dev/null 2>&1
105*4c3eb207Smrgtest "$2" "$3" " -fprofile-use"
106*4c3eb207Smrgtest "$2" "$3" " -minline-stringops-dynamically"
107*4c3eb207Smrgecho "    $best"
108*4c3eb207Smrg}
109*4c3eb207Smrg
110*4c3eb207Smrgtest_all_sizes()
111*4c3eb207Smrg{
112*4c3eb207Smrgif [ "$mode" == 64 ]
113*4c3eb207Smrgthen
114*4c3eb207Smrgecho "  block size  libcall rep1    noalg   rep4    noalg   rep8    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
115*4c3eb207Smrgelse
116*4c3eb207Smrgecho "  block size  libcall rep1    noalg   rep4    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
117*4c3eb207Smrgfi
118*4c3eb207Smrg#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000
119*4c3eb207Smrg#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1
120*4c3eb207Smrgfor size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1
121*4c3eb207Smrg#for size in 128 256 1024 4096 8192 81920 819200
122*4c3eb207Smrgdo
123*4c3eb207Smrgtestrow "$1" "$2" $size
124*4c3eb207Smrgdone
125*4c3eb207Smrg}
126*4c3eb207Smrg
127*4c3eb207Smrgmode=$1
128*4c3eb207Smrgshift
129*4c3eb207Smrgexport memsize=$1
130*4c3eb207Smrgshift
131*4c3eb207Smrgcmdline=$*
132*4c3eb207Smrgif [ "$mode" != 32 ]
133*4c3eb207Smrgthen
134*4c3eb207Smrg  if [ "$mode" != 64 ]
135*4c3eb207Smrg  then
136*4c3eb207Smrg    echo "Usage:"
137*4c3eb207Smrg    echo "test_stringop mode size cmdline"
138*4c3eb207Smrg    echo "mode is either 32 or 64"
139*4c3eb207Smrg    echo "size is amount of memory copied in each test.  Should be chosed small enough so runtime is less than minute for each test and sorting works"
140*4c3eb207Smrg    echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3"
141*4c3eb207Smrg    exit
142*4c3eb207Smrg  fi
143*4c3eb207Smrgfi
144*4c3eb207Smrg
145*4c3eb207Smrgecho "memcpy"
146*4c3eb207Smrgexport STRINGOP=""
147*4c3eb207Smrgtype=char
148*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode"
149*4c3eb207Smrgecho "Aligned"
150*4c3eb207Smrgtype=long
151*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode"
152*4c3eb207Smrgecho "memset"
153*4c3eb207Smrgexport STRINGOP="-Dtest_memset=1"
154*4c3eb207Smrgtype=char
155*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode"
156*4c3eb207Smrgecho "Aligned"
157*4c3eb207Smrgtype=long
158*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode"
159