xref: /openbsd-src/gnu/usr.bin/perl/gen_syscall_emulator.pl (revision 312e26c80be876012ae9792d4323a301329d66ae)
1*312e26c8Safresh1#!/usr/bin/perl
2*312e26c8Safresh1#	$OpenBSD: gen_syscall_emulator.pl,v 1.1 2023/09/03 01:43:09 afresh1 Exp $	#
3*312e26c8Safresh1use v5.36;
4*312e26c8Safresh1use autodie;
5*312e26c8Safresh1
6*312e26c8Safresh1# Copyright (c) 2023 Andrew Hewus Fresh <afresh1@openbsd.org>
7*312e26c8Safresh1#
8*312e26c8Safresh1# Permission to use, copy, modify, and distribute this software for any
9*312e26c8Safresh1# purpose with or without fee is hereby granted, provided that the above
10*312e26c8Safresh1# copyright notice and this permission notice appear in all copies.
11*312e26c8Safresh1#
12*312e26c8Safresh1# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13*312e26c8Safresh1# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14*312e26c8Safresh1# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15*312e26c8Safresh1# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16*312e26c8Safresh1# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17*312e26c8Safresh1# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18*312e26c8Safresh1# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19*312e26c8Safresh1
20*312e26c8Safresh1my $includes = '/usr/include';
21*312e26c8Safresh1
22*312e26c8Safresh1# Because perl uses a long for every syscall argument,
23*312e26c8Safresh1# if we are building a syscall_emulator for use by perl,
24*312e26c8Safresh1# taking that into account make things work more consistently
25*312e26c8Safresh1# across different OpenBSD architectures.
26*312e26c8Safresh1# Unfortunately there doesn't appear to be an easy way
27*312e26c8Safresh1# to make everything work "the way it was".
28*312e26c8Safresh1use constant PERL_LONG_ARGS => 1;
29*312e26c8Safresh1
30*312e26c8Safresh1# See also /usr/src/sys/kern/syscalls.master
31*312e26c8Safresh1my %syscalls = parse_syscalls(
32*312e26c8Safresh1    "$includes/sys/syscall.h",
33*312e26c8Safresh1    "$includes/sys/syscallargs.h",
34*312e26c8Safresh1)->%*;
35*312e26c8Safresh1delete $syscalls{MAXSYSCALL}; # not an actual function
36*312e26c8Safresh1
37*312e26c8Safresh1# The ordered list of all the headers we need
38*312e26c8Safresh1my @headers = qw<
39*312e26c8Safresh1	sys/syscall.h
40*312e26c8Safresh1	stdarg.h
41*312e26c8Safresh1	errno.h
42*312e26c8Safresh1
43*312e26c8Safresh1	sys/socket.h
44*312e26c8Safresh1	sys/event.h
45*312e26c8Safresh1	sys/futex.h
46*312e26c8Safresh1	sys/ioctl.h
47*312e26c8Safresh1	sys/ktrace.h
48*312e26c8Safresh1	sys/mman.h
49*312e26c8Safresh1	sys/mount.h
50*312e26c8Safresh1	sys/msg.h
51*312e26c8Safresh1	sys/poll.h
52*312e26c8Safresh1	sys/ptrace.h
53*312e26c8Safresh1	sys/resource.h
54*312e26c8Safresh1	sys/select.h
55*312e26c8Safresh1	sys/sem.h
56*312e26c8Safresh1	sys/shm.h
57*312e26c8Safresh1	sys/stat.h
58*312e26c8Safresh1	sys/sysctl.h
59*312e26c8Safresh1	sys/time.h
60*312e26c8Safresh1	sys/uio.h
61*312e26c8Safresh1	sys/wait.h
62*312e26c8Safresh1
63*312e26c8Safresh1	dirent.h
64*312e26c8Safresh1	fcntl.h
65*312e26c8Safresh1	sched.h
66*312e26c8Safresh1	signal.h
67*312e26c8Safresh1	stdlib.h
68*312e26c8Safresh1	stdio.h
69*312e26c8Safresh1	syslog.h
70*312e26c8Safresh1	tib.h
71*312e26c8Safresh1	time.h
72*312e26c8Safresh1	unistd.h
73*312e26c8Safresh1>;
74*312e26c8Safresh1
75*312e26c8Safresh1foreach my $header (@headers) {
76*312e26c8Safresh1	my $filename = "$includes/$header";
77*312e26c8Safresh1	open my $fh, '<', $filename;
78*312e26c8Safresh1	my $content = do { local $/; readline $fh };
79*312e26c8Safresh1	close $fh;
80*312e26c8Safresh1
81*312e26c8Safresh1	foreach my $name (sort keys %syscalls) {
82*312e26c8Safresh1		my $s = $syscalls{$name};
83*312e26c8Safresh1		my $func_sig = find_func_sig($content, $name, $s);
84*312e26c8Safresh1
85*312e26c8Safresh1		if (ref $func_sig) {
86*312e26c8Safresh1			die "Multiple defs for $name <$header> <$s->{header}>"
87*312e26c8Safresh1			    if $s->{header};
88*312e26c8Safresh1			$s->{func} = $func_sig;
89*312e26c8Safresh1			$s->{header} = $header;
90*312e26c8Safresh1		} elsif ($func_sig) {
91*312e26c8Safresh1			$s->{mismatched_sig} = "$func_sig <$header>";
92*312e26c8Safresh1		}
93*312e26c8Safresh1	}
94*312e26c8Safresh1}
95*312e26c8Safresh1
96*312e26c8Safresh1say "/*\n * Generated from gen_syscall_emulator.pl\n */";
97*312e26c8Safresh1say "#include <$_>" for @headers;
98*312e26c8Safresh1print <<"EOL";
99*312e26c8Safresh1#include "syscall_emulator.h"
100*312e26c8Safresh1
101*312e26c8Safresh1long
102*312e26c8Safresh1syscall_emulator(int syscall, ...)
103*312e26c8Safresh1{
104*312e26c8Safresh1	long ret = 0;
105*312e26c8Safresh1	va_list args;
106*312e26c8Safresh1	va_start(args, syscall);
107*312e26c8Safresh1
108*312e26c8Safresh1	switch(syscall) {
109*312e26c8Safresh1EOL
110*312e26c8Safresh1
111*312e26c8Safresh1foreach my $name (
112*312e26c8Safresh1	sort { $syscalls{$a}{id} <=> $syscalls{$b}{id} } keys %syscalls
113*312e26c8Safresh1    ) {
114*312e26c8Safresh1	my %s = %{ $syscalls{$name} };
115*312e26c8Safresh1
116*312e26c8Safresh1	# Some syscalls we can't emulate, so we comment those out.
117*312e26c8Safresh1	$s{skip} //= "Indirect syscalls not supported"
118*312e26c8Safresh1	    if !$s{argtypes} && ($s{args}[-1] || '') eq '...';
119*312e26c8Safresh1	$s{skip} //= "Mismatched func: $s{mismatched_sig}"
120*312e26c8Safresh1	    if $s{mismatched_sig} and not $s{func};
121*312e26c8Safresh1	$s{skip} //= "No signature found in headers"
122*312e26c8Safresh1	    unless $s{header};
123*312e26c8Safresh1
124*312e26c8Safresh1	my $ret = $s{ret} eq 'void' ? '' : 'ret = ';
125*312e26c8Safresh1	$ret .= '(long)' if $s{ret} eq 'void *';
126*312e26c8Safresh1
127*312e26c8Safresh1	my (@args, @defines);
128*312e26c8Safresh1	my $argname = '';
129*312e26c8Safresh1	if ($s{argtypes}) {
130*312e26c8Safresh1		if (@{ $s{argtypes} } > 1) {
131*312e26c8Safresh1			@defines = map {
132*312e26c8Safresh1				my $t = $_->{type};
133*312e26c8Safresh1				my $n = $_->{name};
134*312e26c8Safresh1				$n = "_$n" if $n eq $name; # link :-/
135*312e26c8Safresh1				push @args, $n;
136*312e26c8Safresh1				PERL_LONG_ARGS
137*312e26c8Safresh1				    ? "$t $n = ($t)va_arg(args, long);"
138*312e26c8Safresh1				    : "$t $n = va_arg(args, $t);"
139*312e26c8Safresh1			    } @{ $s{argtypes} };
140*312e26c8Safresh1		} else {
141*312e26c8Safresh1			if (@{ $s{argtypes} }) {
142*312e26c8Safresh1				$argname = " // " . join ', ',
143*312e26c8Safresh1				    map { $_->{name} }
144*312e26c8Safresh1				    @{ $s{argtypes} };
145*312e26c8Safresh1			}
146*312e26c8Safresh1			@args = map { "va_arg(args, $_->{type})" }
147*312e26c8Safresh1			    @{ $s{argtypes} };
148*312e26c8Safresh1		}
149*312e26c8Safresh1	} else {
150*312e26c8Safresh1		@args = @{ $s{args} };
151*312e26c8Safresh1
152*312e26c8Safresh1		# If we didn't find args in syscallargs.h but have args
153*312e26c8Safresh1		# we don't know how to write our function.
154*312e26c8Safresh1		$s{skip} //= "Not found in sys/syscallargs.h"
155*312e26c8Safresh1		    if @args;
156*312e26c8Safresh1	}
157*312e26c8Safresh1
158*312e26c8Safresh1	#my $header = $s{header} ? " <$s{header}>" : '';
159*312e26c8Safresh1
160*312e26c8Safresh1	my $indent = "\t";
161*312e26c8Safresh1	say "$indent/* $s{skip}" if $s{skip};
162*312e26c8Safresh1
163*312e26c8Safresh1	$indent .= ' *' if $s{skip};
164*312e26c8Safresh1	say "${indent}                  $s{signature} <sys/syscall.h>"
165*312e26c8Safresh1	    if $s{skip} && $s{skip} =~ /Mismatch/;
166*312e26c8Safresh1
167*312e26c8Safresh1	my $brace = @defines ? " {" : "";
168*312e26c8Safresh1	say "${indent}case $s{define}:$brace"; # // $s{id}";
169*312e26c8Safresh1	say "${indent}\t$_" for @defines;
170*312e26c8Safresh1	#say "${indent}\t// $s{signature}$header";
171*312e26c8Safresh1	say "${indent}\t$ret$name(" . join(', ', @args) . ");$argname";
172*312e26c8Safresh1	say "${indent}\tbreak;";
173*312e26c8Safresh1	say "${indent}}" if $brace;
174*312e26c8Safresh1
175*312e26c8Safresh1	say "\t */" if $s{skip};
176*312e26c8Safresh1}
177*312e26c8Safresh1
178*312e26c8Safresh1print <<"EOL";
179*312e26c8Safresh1	default:
180*312e26c8Safresh1		ret = -1;
181*312e26c8Safresh1		errno = ENOSYS;
182*312e26c8Safresh1	}
183*312e26c8Safresh1	va_end(args);
184*312e26c8Safresh1
185*312e26c8Safresh1	return ret;
186*312e26c8Safresh1}
187*312e26c8Safresh1EOL
188*312e26c8Safresh1
189*312e26c8Safresh1
190*312e26c8Safresh1sub parse_syscalls($syscall, $args)
191*312e26c8Safresh1{
192*312e26c8Safresh1	my %s = parse_syscall_h($syscall)->%*;
193*312e26c8Safresh1
194*312e26c8Safresh1	my %a = parse_syscallargs_h($args)->%*;
195*312e26c8Safresh1	$s{$_}{argtypes} = $a{$_} for grep { $a{$_} } keys %s;
196*312e26c8Safresh1
197*312e26c8Safresh1	return \%s;
198*312e26c8Safresh1}
199*312e26c8Safresh1
200*312e26c8Safresh1sub parse_syscall_h($filename)
201*312e26c8Safresh1{
202*312e26c8Safresh1	my %s;
203*312e26c8Safresh1	open my $fh, '<', $filename;
204*312e26c8Safresh1	while (readline $fh) {
205*312e26c8Safresh1		if (m{^/\*
206*312e26c8Safresh1		    \s+ syscall: \s+ "(?<name>[^"]+)"
207*312e26c8Safresh1		    \s+	 ret: \s+ "(?<ret> [^"]+)"
208*312e26c8Safresh1		    \s+	args: \s+  (?<args>.*?)
209*312e26c8Safresh1		    \s* \*/
210*312e26c8Safresh1		  |
211*312e26c8Safresh1		    ^\#define \s+ (?<define>SYS_(?<name>\S+)) \s+ (?<id>\d+)
212*312e26c8Safresh1		}x)
213*312e26c8Safresh1		{
214*312e26c8Safresh1			my $name        = $+{name};
215*312e26c8Safresh1			$s{$name}{$_}   = $+{$_} for keys %+;
216*312e26c8Safresh1			$s{$name}{args} = [ $+{args} =~ /"(.*?)"/g ]
217*312e26c8Safresh1			    if exists $+{args};
218*312e26c8Safresh1		}
219*312e26c8Safresh1	}
220*312e26c8Safresh1	close $fh;
221*312e26c8Safresh1
222*312e26c8Safresh1	foreach my $name (keys %s) {
223*312e26c8Safresh1		my %d = %{ $s{$name} };
224*312e26c8Safresh1		next unless $d{ret}; # the MAXSYSCALL
225*312e26c8Safresh1
226*312e26c8Safresh1		my $ret = $d{ret};
227*312e26c8Safresh1		my @args = @{ $d{args} || [] };
228*312e26c8Safresh1		@args = 'void' unless @args;
229*312e26c8Safresh1
230*312e26c8Safresh1		if ($args[-1] ne '...') {
231*312e26c8Safresh1			my @a;
232*312e26c8Safresh1			for (@args) {
233*312e26c8Safresh1				push @a, $_;
234*312e26c8Safresh1				last if $_ eq '...';
235*312e26c8Safresh1			}
236*312e26c8Safresh1			@args = @a;
237*312e26c8Safresh1		}
238*312e26c8Safresh1
239*312e26c8Safresh1		my $args = join ", ", @args;
240*312e26c8Safresh1		$s{$name}{signature} = "$ret\t$name($args);" =~ s/\s+/ /gr;
241*312e26c8Safresh1		#print "    $s{$name}{signature}\n";
242*312e26c8Safresh1	}
243*312e26c8Safresh1
244*312e26c8Safresh1	return \%s;
245*312e26c8Safresh1}
246*312e26c8Safresh1
247*312e26c8Safresh1sub parse_syscallargs_h($filename)
248*312e26c8Safresh1{
249*312e26c8Safresh1	my %args;
250*312e26c8Safresh1
251*312e26c8Safresh1	open my $fh, '<', $filename;
252*312e26c8Safresh1	while (readline $fh) {
253*312e26c8Safresh1		if (my ($syscall) = /^struct \s+ sys_(\w+)_args \s+ \{/x) {
254*312e26c8Safresh1			$args{$syscall} = [];
255*312e26c8Safresh1			while (readline $fh) {
256*312e26c8Safresh1				last if /^\s*\};\s*$/;
257*312e26c8Safresh1				if (/syscallarg
258*312e26c8Safresh1				    \(  (?<type> [^)]+ ) \)
259*312e26c8Safresh1				    \s+ (?<name>   \w+ ) \s* ;
260*312e26c8Safresh1				/x) {
261*312e26c8Safresh1					push @{$args{$syscall}}, {%+};
262*312e26c8Safresh1				}
263*312e26c8Safresh1			}
264*312e26c8Safresh1		}
265*312e26c8Safresh1	}
266*312e26c8Safresh1	close $fh;
267*312e26c8Safresh1
268*312e26c8Safresh1	return \%args;
269*312e26c8Safresh1}
270*312e26c8Safresh1
271*312e26c8Safresh1sub find_func_sig($content, $name, $s)
272*312e26c8Safresh1{
273*312e26c8Safresh1	my $re = $s->{re} //= qr{^
274*312e26c8Safresh1		(?<ret> \S+ (?: [^\S\n]+ \S+)? ) [^\S\n]* \n?
275*312e26c8Safresh1		\b \Q$name\E \( (?<args> [^)]* ) \)
276*312e26c8Safresh1		[^;]*;
277*312e26c8Safresh1	    }xms;
278*312e26c8Safresh1
279*312e26c8Safresh1	$content =~ /$re/ || return !!0;
280*312e26c8Safresh1	my $ret  = $+{ret};
281*312e26c8Safresh1	my $args = $+{args};
282*312e26c8Safresh1
283*312e26c8Safresh1	for ($ret, $args) {
284*312e26c8Safresh1		s/^\s+//;
285*312e26c8Safresh1		s/\s+$//;
286*312e26c8Safresh1		s/\s+/ /g;
287*312e26c8Safresh1	}
288*312e26c8Safresh1
289*312e26c8Safresh1	# The actual functions may have this extra annotation
290*312e26c8Safresh1	$args =~ s/\*\s*__restrict/*/g;
291*312e26c8Safresh1
292*312e26c8Safresh1	my %func_sig = ( ret => $ret, args => [ split /\s*,\s*/, $args ] );
293*312e26c8Safresh1
294*312e26c8Safresh1	return "$ret $name($args);" =~ s/\s+/ /gr
295*312e26c8Safresh1	    unless sigs_match($s, \%func_sig);
296*312e26c8Safresh1
297*312e26c8Safresh1	return \%func_sig;
298*312e26c8Safresh1}
299*312e26c8Safresh1
300*312e26c8Safresh1# Tests whether two types are equivalent.
301*312e26c8Safresh1# Sometimes there are two ways to represent the same thing
302*312e26c8Safresh1# and it seems the functions and the syscalls
303*312e26c8Safresh1# differ a fair amount.
304*312e26c8Safresh1sub types_match($l, $r)
305*312e26c8Safresh1{
306*312e26c8Safresh1	state %m = (
307*312e26c8Safresh1	    caddr_t         => 'char *',
308*312e26c8Safresh1	    idtype_t        => 'int',
309*312e26c8Safresh1	    nfds_t          => 'u_int',
310*312e26c8Safresh1	    __off_t         => 'off_t',
311*312e26c8Safresh1	    pid_t           => 'int',
312*312e26c8Safresh1	    __size_t        => 'u_long',
313*312e26c8Safresh1	    size_t          => 'u_long',
314*312e26c8Safresh1	    'unsigned int'  => 'u_int',
315*312e26c8Safresh1	    'unsigned long' => 'u_long',
316*312e26c8Safresh1	);
317*312e26c8Safresh1
318*312e26c8Safresh1	$l //= '__undef__';
319*312e26c8Safresh1	$r //= '__undef__';
320*312e26c8Safresh1
321*312e26c8Safresh1	s/\b volatile \s+//x  for $l, $r;
322*312e26c8Safresh1	s/\b const    \s+//x  for $l, $r;
323*312e26c8Safresh1	s/\s* \[\d*\] $/ \*/x for $l, $r;
324*312e26c8Safresh1
325*312e26c8Safresh1	my ($f, $s) = sort { length($a) <=> length($b) } $l, $r;
326*312e26c8Safresh1	if (index($s, $f) == 0) {
327*312e26c8Safresh1		$s =~ s/^\Q$f\E\s*//;
328*312e26c8Safresh1		if ( $s && $s =~ /^\w+$/ ) {
329*312e26c8Safresh1			#warn "prefix ['$f', '$s']\n";
330*312e26c8Safresh1			s/\s*\Q$s\E$// for $l, $r;
331*312e26c8Safresh1		}
332*312e26c8Safresh1	}
333*312e26c8Safresh1
334*312e26c8Safresh1	$l = $m{$l} //= $l;
335*312e26c8Safresh1	$r = $m{$r} //= $r;
336*312e26c8Safresh1
337*312e26c8Safresh1	return $l eq $r;
338*312e26c8Safresh1}
339*312e26c8Safresh1
340*312e26c8Safresh1
341*312e26c8Safresh1# Tests whether two function signatures match,
342*312e26c8Safresh1# expected to be left from syscall.h, right from the appopriate header.
343*312e26c8Safresh1sub sigs_match($l, $r)
344*312e26c8Safresh1{
345*312e26c8Safresh1	return !!0 unless types_match( $l->{ret}, $l->{ret} );
346*312e26c8Safresh1
347*312e26c8Safresh1	my @l_args = @{ $l->{args} || [] };
348*312e26c8Safresh1	my @r_args = @{ $r->{args} || [] };
349*312e26c8Safresh1
350*312e26c8Safresh1	for (\@l_args, \@r_args) {
351*312e26c8Safresh1		@{$_} = 'void' unless @{$_};
352*312e26c8Safresh1	}
353*312e26c8Safresh1
354*312e26c8Safresh1	for my $i ( 0 .. $#l_args ) {
355*312e26c8Safresh1		return !!0 unless types_match($l_args[$i], $r_args[$i]);
356*312e26c8Safresh1		last if $l_args[$i] eq '...';
357*312e26c8Safresh1	}
358*312e26c8Safresh1
359*312e26c8Safresh1	return !!1;
360*312e26c8Safresh1}
361