xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/k6/cross.pl (revision b49cc1491953ef2348eff9c84520ffd0678a5c8d)
1#! /usr/bin/perl
2
3# Copyright 2000, 2001 Free Software Foundation, Inc.
4#
5# This file is part of the GNU MP Library.
6#
7# The GNU MP Library is free software; you can redistribute it and/or modify
8# it under the terms of the GNU Lesser General Public License as published
9# by the Free Software Foundation; either version 3 of the License, or (at
10# your option) any later version.
11#
12# The GNU MP Library is distributed in the hope that it will be useful, but
13# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15# License for more details.
16#
17# You should have received a copy of the GNU Lesser General Public License
18# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20
21# Usage: cross.pl [filename.o]...
22#
23# Produce an annotated disassembly of the given object files, indicating
24# certain code alignment and addressing mode problems afflicting K6 chips.
25# "ZZ" is used on all annotations, so this can be searched for.
26#
27# With no arguments, all .o files corresponding to .asm files are processed.
28# This is good in the mpn object directory of a k6*-*-* build.
29#
30# Code alignments of 8 bytes or more are handled.  When 32 is used, cache
31# line boundaries will fall in at offsets 0x20,0x40,etc and problems are
32# flagged at those locations.  When 16 is used, the line boundaries can also
33# fall at offsets 0x10,0x30,0x50,etc, depending where the file is loaded, so
34# problems are identified there too.  Likewise when 8 byte alignment is used
35# problems are flagged additionally at 0x08,0x18,0x28,etc.
36#
37# Usually 32 byte alignment is used for k6 routines, but less is certainly
38# possible if through good luck, or a little tweaking, cache line crossing
39# problems can be avoided at the extra locations.
40#
41# Bugs:
42#
43# Instructions without mod/rm bytes or which are already vector decoded are
44# unaffected by cache line boundary crossing, but not all of these have yet
45# been put in as exceptions.  All that occur in practice in GMP are present
46# though.
47#
48# There's no messages for using the vector decoded addressing mode (%esi),
49# but that's easy to avoid when coding.
50#
51# Future:
52#
53# Warn about jump targets that are poorly aligned (less than 2 instructions
54# before a cache line boundary).
55
56use strict;
57
58sub disassemble {
59    my ($file) = @_;
60    my ($addr,$b1,$b2,$b3, $prefix,$opcode,$modrm);
61    my $align;
62
63    open (IN, "objdump -Srfh $file |")
64	|| die "Cannot open pipe from objdump\n";
65    while (<IN>) {
66	print;
67
68	if (/^[ \t]*[0-9]+[ \t]+\.text[ \t]/ && /2\*\*([0-9]+)$/) {
69	    $align = 1 << $1;
70	    if ($align < 8) {
71		print "ZZ cross.pl cannot handle alignment < 2**3\n";
72		$align = 8
73	    }
74	}
75
76	if (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
77	    ($addr,$b1,$b2,$b3) = ($1,$2,$3,$4);
78
79	} elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)[ \t]+([0-9a-f]+)/) {
80	    ($addr,$b1,$b2,$b3) = ($1,$2,$3,'');
81
82	} elsif (/^[ \t]*([0-9a-f]*):[ \t]*([0-9a-f]+)/) {
83	    ($addr,$b1,$b2,$b3) = ($1,$2,'','');
84
85	} else {
86	    next;
87	}
88
89	if ($b1 =~ /0f/) {
90	    $prefix = $b1;
91	    $opcode = $b2;
92	    $modrm = $b3;
93	} else {
94	    $prefix = '';
95	    $opcode = $b1;
96	    $modrm = $b2;
97	}
98
99	# modrm of the form 00-xxx-100 with an 0F prefix is the problem case
100	# for K6 and pre-CXT K6-2
101	if ($prefix =~ /0f/
102	    && $opcode !~ /^8/         # jcond disp32
103	    && $modrm =~ /^[0-3][4c]/) {
104	    print "ZZ ($file) >3 bytes to determine instruction length [K6]\n";
105	}
106
107	# with just an opcode, starting 1f mod 20h
108	if (($align==32 && $addr =~ /[13579bdf]f$/
109	     || $align==16 && $addr =~ /f$/
110	     || $align==8 && $addr =~ /[7f]$/)
111	    && $prefix !~ /0f/
112	    && $opcode !~ /1[012345]/ # adc
113	    && $opcode !~ /1[89abcd]/ # sbb
114	    && $opcode !~ /^4/        # inc/dec reg
115	    && $opcode !~ /^5/        # push/pop reg
116	    && $opcode !~ /68/        # push $imm32
117	    && $opcode !~ /^7/        # jcond disp8
118	    && $opcode !~ /a[89]/     # test+imm
119	    && $opcode !~ /a[a-f]/    # stos/lods/scas
120	    && $opcode !~ /b8/        # movl $imm32,%eax
121	    && $opcode !~ /d[0123]/   # rcl
122	    && $opcode !~ /e[0123]/   # loop/loopz/loopnz/jcxz
123	    && $opcode !~ /e8/        # call disp32
124	    && $opcode !~ /e[9b]/     # jmp disp32/disp8
125	    && $opcode !~ /f[89abcd]/ # clc,stc,cli,sti,cld,std
126	    && !($opcode =~ /f[67]/          # grp 1
127		 && $modrm =~ /^[2367abef]/) # mul, imul, div, idiv
128	    && $modrm !~ /^$/) {
129	    print "ZZ ($file) opcode/modrm cross 32-byte boundary\n";
130	}
131
132	# with an 0F prefix, anything starting at 1f mod 20h
133	if (($align==32 && $addr =~ /[13579bdf][f]$/
134	     || $align==16 && $addr =~ /f$/
135	     || $align==8 && $addr =~ /[7f]$/)
136	    && $prefix =~ /0f/
137	    && $opcode !~ /af/        # imul
138	    && $opcode !~ /a[45]/     # shldl
139	    && $opcode !~ /a[cd]/     # shrdl
140	    ) {
141	    print "ZZ ($file) prefix/opcode cross 32-byte boundary\n";
142	}
143
144	# with an 0F prefix, anything with mod/rm starting at 1e mod 20h
145	if (($align==32 && $addr =~ /[13579bdf][e]$/
146	     || $align==16 && $addr =~ /[e]$/
147	     || $align==8 && $addr =~ /[6e]$/)
148	    && $prefix =~ /0f/
149	     && $opcode !~ /^8/        # jcond disp32
150	     && $opcode !~ /af/        # imull reg,reg
151	     && $opcode !~ /a[45]/     # shldl
152	     && $opcode !~ /a[cd]/     # shrdl
153	    && $modrm !~ /^$/) {
154	    print "ZZ ($file) prefix/opcode/modrm cross 32-byte boundary\n";
155	}
156    }
157    close IN || die "Error from objdump (or objdump not available)\n";
158}
159
160
161my @files;
162if ($#ARGV >= 0) {
163    @files = @ARGV;
164} else {
165    @files = glob "*.asm";
166    map {s/.asm/.o/} @files;
167}
168
169foreach (@files)  {
170    disassemble($_);
171}
172