1*fb8a8121Smrg#!/bin/bash 2*fb8a8121Smrg 3*fb8a8121Smrg# Script to measure memset and memcpy for different sizes and strategies. 4*fb8a8121Smrg# 5*fb8a8121Smrg# Contributed by Jan Hubicka <jh@suse.cz> 6*fb8a8121Smrg# 7*fb8a8121Smrg# Copyright (C) 2019 Free Software Foundation, Inc. 8*fb8a8121Smrg# 9*fb8a8121Smrg# This file is part of GCC. 10*fb8a8121Smrg# 11*fb8a8121Smrg# GCC is free software; you can redistribute it and/or modify 12*fb8a8121Smrg# it under the terms of the GNU General Public License as published by 13*fb8a8121Smrg# the Free Software Foundation; either version 3, or (at your option) 14*fb8a8121Smrg# any later version. 15*fb8a8121Smrg# 16*fb8a8121Smrg# GCC is distributed in the hope that it will be useful, 17*fb8a8121Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of 18*fb8a8121Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19*fb8a8121Smrg# GNU General Public License for more details. 20*fb8a8121Smrg# 21*fb8a8121Smrg# You should have received a copy of the GNU General Public License 22*fb8a8121Smrg# along with GCC; see the file COPYING. If not, write to 23*fb8a8121Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 24*fb8a8121Smrg# Boston, MA 02110-1301, USA. 25*fb8a8121Smrg 26*fb8a8121Smrg# This script will search a line starting with 'spawn' that includes the 27*fb8a8121Smrg# pattern you are looking for (typically a source file name). 28*fb8a8121Smrg# 29*fb8a8121Smrg# Once it finds that pattern, it re-executes the whole command 30*fb8a8121Smrg# in the spawn line. If the pattern matches more than one spawn 31*fb8a8121Smrg# command, it asks which one you want. 32*fb8a8121Smrg 33*fb8a8121Smrgtest() 34*fb8a8121Smrg{ 35*fb8a8121Smrgrm -f a.out 36*fb8a8121Smrgcat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize - 37*fb8a8121Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2) 38*fb8a8121Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/ 39*fb8a8121Smrg$type t[BUFFER_SIZE]; 40*fb8a8121Smrgint main() 41*fb8a8121Smrg{ 42*fb8a8121Smrg unsigned int i; 43*fb8a8121Smrg for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++) 44*fb8a8121Smrg#ifdef test_memset 45*fb8a8121Smrg __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 46*fb8a8121Smrg#else 47*fb8a8121Smrg __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 48*fb8a8121Smrg#endif 49*fb8a8121Smrg return 0; 50*fb8a8121Smrg} 51*fb8a8121SmrgEND 52*fb8a8121SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1` 53*fb8a8121Smrgecho -n " "$TIME 54*fb8a8121Smrgecho $TIME $4 >>/tmp/accum 55*fb8a8121Smrg} 56*fb8a8121Smrg 57*fb8a8121Smrgtest2() 58*fb8a8121Smrg{ 59*fb8a8121Smrgrm -f a.out 60*fb8a8121Smrgcat <<END | clang -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize 2>/dev/null - 61*fb8a8121Smrg#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2) 62*fb8a8121Smrg/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/ 63*fb8a8121Smrg$type t[BUFFER_SIZE]; 64*fb8a8121Smrgint main() 65*fb8a8121Smrg{ 66*fb8a8121Smrg unsigned int i; 67*fb8a8121Smrg for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++) 68*fb8a8121Smrg#ifdef test_memset 69*fb8a8121Smrg __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 70*fb8a8121Smrg#else 71*fb8a8121Smrg __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0)); 72*fb8a8121Smrg#endif 73*fb8a8121Smrg return 0; 74*fb8a8121Smrg} 75*fb8a8121SmrgEND 76*fb8a8121SmrgTIME=`/usr/bin/time -f "%E" ./a.out 2>&1` 77*fb8a8121Smrgecho -n " "$TIME 78*fb8a8121Smrgecho $TIME $4 >>/tmp/accum 79*fb8a8121Smrg} 80*fb8a8121Smrg 81*fb8a8121Smrgtestrow() 82*fb8a8121Smrg{ 83*fb8a8121Smrgecho -n "" >/tmp/accum 84*fb8a8121Smrgprintf "%12i " $3 85*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=libcall" libcall 86*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1 87*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign 88*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4 89*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign 90*fb8a8121Smrgif [ "$mode" == 64 ] 91*fb8a8121Smrgthen 92*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8 93*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign 94*fb8a8121Smrgfi 95*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=loop -malign-stringops" loop 96*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops" loopnoalign 97*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl 98*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign 99*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -malign-stringops" sse 100*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=vector_loop -mno-align-stringops -msse2" ssenoalign 101*fb8a8121Smrg#test2 "$2" "$3" "" 102*fb8a8121Smrgtest "$2" "$3" "-mstringop-strategy=byte_loop" byte 103*fb8a8121Smrgbest=`cat /tmp/accum | sort | head -1` 104*fb8a8121Smrgtest "$2" "$3" " -fprofile-generate" >/dev/null 2>&1 105*fb8a8121Smrgtest "$2" "$3" " -fprofile-use" 106*fb8a8121Smrgtest "$2" "$3" " -minline-stringops-dynamically" 107*fb8a8121Smrgecho " $best" 108*fb8a8121Smrg} 109*fb8a8121Smrg 110*fb8a8121Smrgtest_all_sizes() 111*fb8a8121Smrg{ 112*fb8a8121Smrgif [ "$mode" == 64 ] 113*fb8a8121Smrgthen 114*fb8a8121Smrgecho " block size libcall rep1 noalg rep4 noalg rep8 noalg loop noalg unrl noalg sse noalg byte PGO dynamic BEST" 115*fb8a8121Smrgelse 116*fb8a8121Smrgecho " block size libcall rep1 noalg rep4 noalg loop noalg unrl noalg sse noalg byte PGO dynamic BEST" 117*fb8a8121Smrgfi 118*fb8a8121Smrg#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000 119*fb8a8121Smrg#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1 120*fb8a8121Smrgfor size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1 121*fb8a8121Smrg#for size in 128 256 1024 4096 8192 81920 819200 122*fb8a8121Smrgdo 123*fb8a8121Smrgtestrow "$1" "$2" $size 124*fb8a8121Smrgdone 125*fb8a8121Smrg} 126*fb8a8121Smrg 127*fb8a8121Smrgmode=$1 128*fb8a8121Smrgshift 129*fb8a8121Smrgexport memsize=$1 130*fb8a8121Smrgshift 131*fb8a8121Smrgcmdline=$* 132*fb8a8121Smrgif [ "$mode" != 32 ] 133*fb8a8121Smrgthen 134*fb8a8121Smrg if [ "$mode" != 64 ] 135*fb8a8121Smrg then 136*fb8a8121Smrg echo "Usage:" 137*fb8a8121Smrg echo "test_stringop mode size cmdline" 138*fb8a8121Smrg echo "mode is either 32 or 64" 139*fb8a8121Smrg echo "size is amount of memory copied in each test. Should be chosed small enough so runtime is less than minute for each test and sorting works" 140*fb8a8121Smrg echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3" 141*fb8a8121Smrg exit 142*fb8a8121Smrg fi 143*fb8a8121Smrgfi 144*fb8a8121Smrg 145*fb8a8121Smrgecho "memcpy" 146*fb8a8121Smrgexport STRINGOP="" 147*fb8a8121Smrgtype=char 148*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode" 149*fb8a8121Smrgecho "Aligned" 150*fb8a8121Smrgtype=long 151*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode" 152*fb8a8121Smrgecho "memset" 153*fb8a8121Smrgexport STRINGOP="-Dtest_memset=1" 154*fb8a8121Smrgtype=char 155*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode" 156*fb8a8121Smrgecho "Aligned" 157*fb8a8121Smrgtype=long 158*fb8a8121Smrgtest_all_sizes $mode "$cmdline -m$mode" 159