1*4c3eb207Smrg#!/bin/bash 2*4c3eb207Smrg 3*4c3eb207Smrg# Script to measure memset and memcpy for different sizes and strategies. 4*4c3eb207Smrg# 5*4c3eb207Smrg# Contributed by Jan Hubicka <jh@suse.cz> 6*4c3eb207Smrg# 7*4c3eb207Smrg# Copyright (C) 2019 Free Software Foundation, Inc. 8*4c3eb207Smrg# 9*4c3eb207Smrg# This file is part of GCC. 10*4c3eb207Smrg# 11*4c3eb207Smrg# GCC is free software; you can redistribute it and/or modify 12*4c3eb207Smrg# it under the terms of the GNU General Public License as published by 13*4c3eb207Smrg# the Free Software Foundation; either version 3, or (at your option) 14*4c3eb207Smrg# any later version. 15*4c3eb207Smrg# 16*4c3eb207Smrg# GCC is distributed in the hope that it will be useful, 17*4c3eb207Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of 18*4c3eb207Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19*4c3eb207Smrg# GNU General Public License for more details. 20*4c3eb207Smrg# 21*4c3eb207Smrg# You should have received a copy of the GNU General Public License 22*4c3eb207Smrg# along with GCC; see the file COPYING. If not, write to 23*4c3eb207Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 24*4c3eb207Smrg# Boston, MA 02110-1301, USA. 25*4c3eb207Smrg 26*4c3eb207Smrg# This script will search a line starting with 'spawn' that includes the 27*4c3eb207Smrg# pattern you are looking for (typically a source file name). 28*4c3eb207Smrg# 29*4c3eb207Smrg# Once it finds that pattern, it re-executes the whole command 30*4c3eb207Smrg# in the spawn line. If the pattern matches more than one spawn 31*4c3eb207Smrg# command, it asks which one you want. 32*4c3eb207Smrg 33*4c3eb207Smrgtest() 34*4c3eb207Smrg{ 35*4c3eb207Smrgrm -f a.out 36*4c3eb207Smrgcat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize - 37*4c3eb207Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2) 38*4c3eb207Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/ 39*4c3eb207Smrg$type t[BUFFER_SIZE]; 40*4c3eb207Smrgint main() 41*4c3eb207Smrg{ 42*4c3eb207Smrg unsigned int i; 43*4c3eb207Smrg for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++) 44*4c3eb207Smrg#ifdef test_memset 45*4c3eb207Smrg __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 46*4c3eb207Smrg#else 47*4c3eb207Smrg __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 48*4c3eb207Smrg#endif 49*4c3eb207Smrg return 0; 50*4c3eb207Smrg} 51*4c3eb207SmrgEND 52*4c3eb207SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1` 53*4c3eb207Smrgecho -n " "$TIME 54*4c3eb207Smrgecho $TIME $4 >>/tmp/accum 55*4c3eb207Smrg} 56*4c3eb207Smrg 57*4c3eb207Smrgtest2() 58*4c3eb207Smrg{ 59*4c3eb207Smrgrm -f a.out 60*4c3eb207Smrgcat <<END | clang -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize 2>/dev/null - 61*4c3eb207Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2) 62*4c3eb207Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/ 63*4c3eb207Smrg$type t[BUFFER_SIZE]; 64*4c3eb207Smrgint main() 65*4c3eb207Smrg{ 66*4c3eb207Smrg unsigned int i; 67*4c3eb207Smrg for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++) 68*4c3eb207Smrg#ifdef test_memset 69*4c3eb207Smrg __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 70*4c3eb207Smrg#else 71*4c3eb207Smrg __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 72*4c3eb207Smrg#endif 73*4c3eb207Smrg return 0; 74*4c3eb207Smrg} 75*4c3eb207SmrgEND 76*4c3eb207SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1` 77*4c3eb207Smrgecho -n " "$TIME 78*4c3eb207Smrgecho $TIME $4 >>/tmp/accum 79*4c3eb207Smrg} 80*4c3eb207Smrg 81*4c3eb207Smrgtestrow() 82*4c3eb207Smrg{ 83*4c3eb207Smrgecho -n "" >/tmp/accum 84*4c3eb207Smrgprintf "%12i " $3 85*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=libcall" libcall 86*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1 87*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign 88*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4 89*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign 90*4c3eb207Smrgif [ "$mode" == 64 ] 91*4c3eb207Smrgthen 92*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8 93*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign 94*4c3eb207Smrgfi 95*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=loop -malign-stringops" loop 96*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops" loopnoalign 97*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl 98*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign 99*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -malign-stringops" sse 100*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -mno-align-stringops -msse2" ssenoalign 101*4c3eb207Smrg#test2 "$2" "$3" "" 102*4c3eb207Smrgtest "$2" "$3" "-mstringop-strategy=byte_loop" byte 103*4c3eb207Smrgbest=`cat /tmp/accum | sort | head -1` 104*4c3eb207Smrgtest "$2" "$3" " -fprofile-generate" >/dev/null 2>&1 105*4c3eb207Smrgtest "$2" "$3" " -fprofile-use" 106*4c3eb207Smrgtest "$2" "$3" " -minline-stringops-dynamically" 107*4c3eb207Smrgecho " $best" 108*4c3eb207Smrg} 109*4c3eb207Smrg 110*4c3eb207Smrgtest_all_sizes() 111*4c3eb207Smrg{ 112*4c3eb207Smrgif [ "$mode" == 64 ] 113*4c3eb207Smrgthen 114*4c3eb207Smrgecho " block size libcall rep1 noalg rep4 noalg rep8 noalg loop noalg unrl noalg sse noalg byte PGO dynamic BEST" 115*4c3eb207Smrgelse 116*4c3eb207Smrgecho " block size libcall rep1 noalg rep4 noalg loop noalg unrl noalg sse noalg byte PGO dynamic BEST" 117*4c3eb207Smrgfi 118*4c3eb207Smrg#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000 119*4c3eb207Smrg#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1 120*4c3eb207Smrgfor size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1 121*4c3eb207Smrg#for size in 128 256 1024 4096 8192 81920 819200 122*4c3eb207Smrgdo 123*4c3eb207Smrgtestrow "$1" "$2" $size 124*4c3eb207Smrgdone 125*4c3eb207Smrg} 126*4c3eb207Smrg 127*4c3eb207Smrgmode=$1 128*4c3eb207Smrgshift 129*4c3eb207Smrgexport memsize=$1 130*4c3eb207Smrgshift 131*4c3eb207Smrgcmdline=$* 132*4c3eb207Smrgif [ "$mode" != 32 ] 133*4c3eb207Smrgthen 134*4c3eb207Smrg if [ "$mode" != 64 ] 135*4c3eb207Smrg then 136*4c3eb207Smrg echo "Usage:" 137*4c3eb207Smrg echo "test_stringop mode size cmdline" 138*4c3eb207Smrg echo "mode is either 32 or 64" 139*4c3eb207Smrg echo "size is amount of memory copied in each test. Should be chosed small enough so runtime is less than minute for each test and sorting works" 140*4c3eb207Smrg echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3" 141*4c3eb207Smrg exit 142*4c3eb207Smrg fi 143*4c3eb207Smrgfi 144*4c3eb207Smrg 145*4c3eb207Smrgecho "memcpy" 146*4c3eb207Smrgexport STRINGOP="" 147*4c3eb207Smrgtype=char 148*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode" 149*4c3eb207Smrgecho "Aligned" 150*4c3eb207Smrgtype=long 151*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode" 152*4c3eb207Smrgecho "memset" 153*4c3eb207Smrgexport STRINGOP="-Dtest_memset=1" 154*4c3eb207Smrgtype=char 155*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode" 156*4c3eb207Smrgecho "Aligned" 157*4c3eb207Smrgtype=long 158*4c3eb207Smrgtest_all_sizes $mode "$cmdline -m$mode" 159