xref: /openbsd-src/gnu/usr.bin/perl/ext/Opcode/Opcode.pm (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1package Opcode;
2
3use 5.006_001;
4
5use strict;
6
7our($VERSION, @ISA, @EXPORT_OK);
8
9$VERSION = "1.57";
10
11use Carp;
12use Exporter 'import';
13use XSLoader;
14
15BEGIN {
16    @EXPORT_OK = qw(
17	opset ops_to_opset
18	opset_to_ops opset_to_hex invert_opset
19	empty_opset full_opset
20	opdesc opcodes opmask define_optag
21	opmask_add verify_opset opdump
22    );
23}
24
25sub opset (;@);
26sub opset_to_hex ($);
27sub opdump (;$);
28use subs @EXPORT_OK;
29
30XSLoader::load();
31
32_init_optags();
33
34sub ops_to_opset { opset @_ }	# alias for old name
35
36sub opset_to_hex ($) {
37    return "(invalid opset)" unless verify_opset($_[0]);
38    unpack("h*",$_[0]);
39}
40
41sub opdump (;$) {
42	my $pat = shift;
43    # handy utility: perl -MOpcode=opdump -e 'opdump File'
44    foreach(opset_to_ops(full_opset)) {
45        my $op = sprintf "  %12s  %s\n", $_, opdesc($_);
46		next if defined $pat and $op !~ m/$pat/i;
47		print $op;
48    }
49}
50
51
52
53sub _init_optags {
54    my(%all, %seen);
55    @all{opset_to_ops(full_opset)} = (); # keys only
56
57    local($_);
58    local($/) = "\n=cut"; # skip to optags definition section
59    <DATA>;
60    $/ = "\n=";		# now read in 'pod section' chunks
61    while(<DATA>) {
62	next unless m/^item\s+(:\w+)/;
63	my $tag = $1;
64
65	# Split into lines, keep only indented lines
66	my @lines = grep { m/^\s/    } split(/\n/);
67	foreach (@lines) { s/(?:\t|--).*//  } # delete comments
68	my @ops   = map  { split ' ' } @lines; # get op words
69
70	foreach(@ops) {
71	    warn "$tag - $_ already tagged in $seen{$_}\n" if $seen{$_};
72	    $seen{$_} = $tag;
73	    delete $all{$_};
74	}
75	# opset will croak on invalid names
76	define_optag($tag, opset(@ops));
77    }
78    close(DATA);
79    warn "Untagged opnames: ".join(' ',keys %all)."\n" if %all;
80}
81
82
831;
84
85__DATA__
86
87=head1 NAME
88
89Opcode - Disable named opcodes when compiling perl code
90
91=head1 SYNOPSIS
92
93  use Opcode;
94
95
96=head1 DESCRIPTION
97
98Perl code is always compiled into an internal format before execution.
99
100Evaluating perl code (e.g. via "eval" or "do 'file'") causes
101the code to be compiled into an internal format and then,
102provided there was no error in the compilation, executed.
103The internal format is based on many distinct I<opcodes>.
104
105By default no opmask is in effect and any code can be compiled.
106
107The Opcode module allow you to define an I<operator mask> to be in
108effect when perl I<next> compiles any code.  Attempting to compile code
109which contains a masked opcode will cause the compilation to fail
110with an error. The code will not be executed.
111
112=head1 NOTE
113
114The Opcode module is not usually used directly. See the ops pragma and
115Safe modules for more typical uses.
116
117=head1 WARNING
118
119The Opcode module does not implement an effective sandbox for
120evaluating untrusted code with the perl interpreter.
121
122Bugs in the perl interpreter that could be abused to bypass
123Opcode restrictions are not treated as vulnerabilities. See
124L<perlsecpolicy> for additional information.
125
126The authors make B<no warranty>, implied or otherwise, about the
127suitability of this software for safety or security purposes.
128
129The authors shall not in any case be liable for special, incidental,
130consequential, indirect or other similar damages arising from the use
131of this software.
132
133Your mileage will vary. If in any doubt B<do not use it>.
134
135
136=head1 Operator Names and Operator Lists
137
138The canonical list of operator names is the contents of the array
139PL_op_name defined and initialised in file F<opcode.h> of the Perl
140source distribution (and installed into the perl library).
141
142Each operator has both a terse name (its opname) and a more verbose or
143recognisable descriptive name. The opdesc function can be used to
144return a list of descriptions for a list of operators.
145
146Many of the functions and methods listed below take a list of
147operators as parameters. Most operator lists can be made up of several
148types of element. Each element can be one of
149
150=over 8
151
152=item an operator name (opname)
153
154Operator names are typically small lowercase words like enterloop,
155leaveloop, last, next, redo etc. Sometimes they are rather cryptic
156like gv2cv, i_ncmp and ftsvtx.
157
158=item an operator tag name (optag)
159
160Operator tags can be used to refer to groups (or sets) of operators.
161Tag names always begin with a colon. The Opcode module defines several
162optags and the user can define others using the define_optag function.
163
164=item a negated opname or optag
165
166An opname or optag can be prefixed with an exclamation mark, e.g., !mkdir.
167Negating an opname or optag means remove the corresponding ops from the
168accumulated set of ops at that point.
169
170=item an operator set (opset)
171
172An I<opset> as a binary string of approximately 44 bytes which holds a
173set or zero or more operators.
174
175The opset and opset_to_ops functions can be used to convert from
176a list of operators to an opset and I<vice versa>.
177
178Wherever a list of operators can be given you can use one or more opsets.
179See also Manipulating Opsets below.
180
181=back
182
183
184=head1 Opcode Functions
185
186The Opcode package contains functions for manipulating operator names
187tags and sets. All are available for export by the package.
188
189=over 8
190
191=item opcodes
192
193In a scalar context opcodes returns the number of opcodes in this
194version of perl (around 350 for perl-5.7.0).
195
196In a list context it returns a list of all the operator names.
197(Not yet implemented, use @names = opset_to_ops(full_opset).)
198
199=item opset (OP, ...)
200
201Returns an opset containing the listed operators.
202
203=item opset_to_ops (OPSET)
204
205Returns a list of operator names corresponding to those operators in
206the set.
207
208=item opset_to_hex (OPSET)
209
210Returns a string representation of an opset. Can be handy for debugging.
211
212=item full_opset
213
214Returns an opset which includes all operators.
215
216=item empty_opset
217
218Returns an opset which contains no operators.
219
220=item invert_opset (OPSET)
221
222Returns an opset which is the inverse set of the one supplied.
223
224=item verify_opset (OPSET, ...)
225
226Returns true if the supplied opset looks like a valid opset (is the
227right length etc) otherwise it returns false. If an optional second
228parameter is true then verify_opset will croak on an invalid opset
229instead of returning false.
230
231Most of the other Opcode functions call verify_opset automatically
232and will croak if given an invalid opset.
233
234=item define_optag (OPTAG, OPSET)
235
236Define OPTAG as a symbolic name for OPSET. Optag names always start
237with a colon C<:>.
238
239The optag name used must not be defined already (define_optag will
240croak if it is already defined). Optag names are global to the perl
241process and optag definitions cannot be altered or deleted once
242defined.
243
244It is strongly recommended that applications using Opcode should use a
245leading capital letter on their tag names since lowercase names are
246reserved for use by the Opcode module. If using Opcode within a module
247you should prefix your tags names with the name of your module to
248ensure uniqueness and thus avoid clashes with other modules.
249
250=item opmask_add (OPSET)
251
252Adds the supplied opset to the current opmask. Note that there is
253currently I<no> mechanism for unmasking ops once they have been masked.
254This is intentional.
255
256=item opmask
257
258Returns an opset corresponding to the current opmask.
259
260=item opdesc (OP, ...)
261
262This takes a list of operator names and returns the corresponding list
263of operator descriptions.
264
265=item opdump (PAT)
266
267Dumps to STDOUT a two column list of op names and op descriptions.
268If an optional pattern is given then only lines which match the
269(case insensitive) pattern will be output.
270
271It's designed to be used as a handy command line utility:
272
273	perl -MOpcode=opdump -e opdump
274	perl -MOpcode=opdump -e 'opdump Eval'
275
276=back
277
278=head1 Manipulating Opsets
279
280Opsets may be manipulated using the perl bit vector operators & (and), | (or),
281^ (xor) and ~ (negate/invert).
282
283However you should never rely on the numerical position of any opcode
284within the opset. In other words both sides of a bit vector operator
285should be opsets returned from Opcode functions.
286
287Also, since the number of opcodes in your current version of perl might
288not be an exact multiple of eight, there may be unused bits in the last
289byte of an upset. This should not cause any problems (Opcode functions
290ignore those extra bits) but it does mean that using the ~ operator
291will typically not produce the same 'physical' opset 'string' as the
292invert_opset function.
293
294
295=head1 TO DO (maybe)
296
297    $bool = opset_eq($opset1, $opset2)	true if opsets are logically
298					equivalent
299    $yes = opset_can($opset, @ops)	true if $opset has all @ops set
300
301    @diff = opset_diff($opset1, $opset2) => ('foo', '!bar', ...)
302
303=cut
304
305# the =cut above is used by _init_optags() to get here quickly
306
307=head1 Predefined Opcode Tags
308
309=over 5
310
311=item :base_core
312
313    null stub scalar pushmark wantarray const defined undef
314
315    rv2sv sassign
316
317    rv2av aassign aelem aelemfast aelemfast_lex aslice kvaslice
318    av2arylen
319
320    rv2hv helem hslice kvhslice each values keys exists delete
321    aeach akeys avalues multideref argelem argdefelem argcheck
322
323    preinc i_preinc predec i_predec postinc i_postinc
324    postdec i_postdec int hex oct abs pow multiply i_multiply
325    divide i_divide modulo i_modulo add i_add subtract i_subtract
326
327    left_shift right_shift bit_and bit_xor bit_or nbit_and
328    nbit_xor nbit_or sbit_and sbit_xor sbit_or negate i_negate not
329    complement ncomplement scomplement
330
331    lt i_lt gt i_gt le i_le ge i_ge eq i_eq ne i_ne ncmp i_ncmp
332    slt sgt sle sge seq sne scmp
333    isa
334
335    substr vec stringify study pos length index rindex ord chr
336
337    ucfirst lcfirst uc lc fc quotemeta trans transr chop schop
338    chomp schomp
339
340    match split qr
341
342    list lslice splice push pop shift unshift reverse
343
344    cond_expr flip flop andassign orassign dorassign and or dor xor
345
346    warn die lineseq nextstate scope enter leave
347
348    rv2cv anoncode prototype coreargs avhvswitch anonconst
349
350    entersub leavesub leavesublv return method method_named
351    method_super method_redir method_redir_super
352     -- XXX loops via recursion?
353
354    cmpchain_and cmpchain_dup
355
356    is_bool
357    is_weak weaken unweaken
358
359    leaveeval -- needed for Safe to operate, is safe
360		 without entereval
361
362=item :base_mem
363
364These memory related ops are not included in :base_core because they
365can easily be used to implement a resource attack (e.g., consume all
366available memory).
367
368    concat multiconcat repeat join range
369
370    anonlist anonhash
371
372Note that despite the existence of this optag a memory resource attack
373may still be possible using only :base_core ops.
374
375Disabling these ops is a I<very> heavy handed way to attempt to prevent
376a memory resource attack. It's probable that a specific memory limit
377mechanism will be added to perl in the near future.
378
379=item :base_loop
380
381These loop ops are not included in :base_core because they can easily be
382used to implement a resource attack (e.g., consume all available CPU time).
383
384    grepstart grepwhile
385    mapstart mapwhile
386    enteriter iter
387    enterloop leaveloop unstack
388    last next redo
389    goto
390
391=item :base_io
392
393These ops enable I<filehandle> (rather than filename) based input and
394output. These are safe on the assumption that only pre-existing
395filehandles are available for use.  Usually, to create new filehandles
396other ops such as open would need to be enabled, if you don't take into
397account the magical open of ARGV.
398
399    readline rcatline getc read
400
401    formline enterwrite leavewrite
402
403    print say sysread syswrite send recv
404
405    eof tell seek sysseek
406
407    readdir telldir seekdir rewinddir
408
409=item :base_orig
410
411These are a hotchpotch of opcodes still waiting to be considered
412
413    gvsv gv gelem
414
415    padsv padav padhv padcv padany padrange introcv clonecv
416
417    once
418
419    rv2gv refgen srefgen ref refassign lvref lvrefslice lvavref
420    blessed refaddr reftype
421
422    bless -- could be used to change ownership of objects
423	     (reblessing)
424
425     regcmaybe regcreset regcomp subst substcont
426
427    sprintf prtf -- can core dump
428
429    crypt
430
431    tie untie
432
433    dbmopen dbmclose
434    sselect select
435    pipe_op sockpair
436
437    getppid getpgrp setpgrp getpriority setpriority
438    localtime gmtime
439
440    entertry leavetry -- can be used to 'hide' fatal errors
441    entertrycatch poptry catch leavetrycatch -- similar
442
443    entergiven leavegiven
444    enterwhen leavewhen
445    break continue
446    smartmatch
447
448    pushdefer
449
450    custom -- where should this go
451
452    ceil floor
453
454=item :base_math
455
456These ops are not included in :base_core because of the risk of them being
457used to generate floating point exceptions (which would have to be caught
458using a $SIG{FPE} handler).
459
460    atan2 sin cos exp log sqrt
461
462These ops are not included in :base_core because they have an effect
463beyond the scope of the compartment.
464
465    rand srand
466
467=item :base_thread
468
469These ops are related to multi-threading.
470
471    lock
472
473=item :default
474
475A handy tag name for a I<reasonable> default set of ops.  (The current ops
476allowed are unstable while development continues. It will change.)
477
478    :base_core :base_mem :base_loop :base_orig :base_thread
479
480This list used to contain :base_io prior to Opcode 1.07.
481
482If safety matters to you (and why else would you be using the Opcode module?)
483then you should not rely on the definition of this, or indeed any other, optag!
484
485=item :filesys_read
486
487    stat lstat readlink
488
489    ftatime ftblk ftchr ftctime ftdir fteexec fteowned
490    fteread ftewrite ftfile ftis ftlink ftmtime ftpipe
491    ftrexec ftrowned ftrread ftsgid ftsize ftsock ftsuid
492    fttty ftzero ftrwrite ftsvtx
493
494    fttext ftbinary
495
496    fileno
497
498=item :sys_db
499
500    ghbyname ghbyaddr ghostent shostent ehostent      -- hosts
501    gnbyname gnbyaddr gnetent snetent enetent         -- networks
502    gpbyname gpbynumber gprotoent sprotoent eprotoent -- protocols
503    gsbyname gsbyport gservent sservent eservent      -- services
504
505    gpwnam gpwuid gpwent spwent epwent getlogin       -- users
506    ggrnam ggrgid ggrent sgrent egrent                -- groups
507
508=item :browse
509
510A handy tag name for a I<reasonable> default set of ops beyond the
511:default optag.  Like :default (and indeed all the other optags) its
512current definition is unstable while development continues. It will change.
513
514The :browse tag represents the next step beyond :default. It is a
515superset of the :default ops and adds :filesys_read the :sys_db.
516The intent being that scripts can access more (possibly sensitive)
517information about your system but not be able to change it.
518
519    :default :filesys_read :sys_db
520
521=item :filesys_open
522
523    sysopen open close
524    umask binmode
525
526    open_dir closedir -- other dir ops are in :base_io
527
528=item :filesys_write
529
530    link unlink rename symlink truncate
531
532    mkdir rmdir
533
534    utime chmod chown
535
536    fcntl -- not strictly filesys related, but possibly as
537	     dangerous?
538
539=item :subprocess
540
541    backtick system
542
543    fork
544
545    wait waitpid
546
547    glob -- access to Cshell via <`rm *`>
548
549=item :ownprocess
550
551    exec exit kill
552
553    time tms -- could be used for timing attacks (paranoid?)
554
555=item :others
556
557This tag holds groups of assorted specialist opcodes that don't warrant
558having optags defined for them.
559
560SystemV Interprocess Communications:
561
562    msgctl msgget msgrcv msgsnd
563
564    semctl semget semop
565
566    shmctl shmget shmread shmwrite
567
568=item :load
569
570This tag holds opcodes related to loading modules and getting information
571about calling environment and args.
572
573    require dofile
574    caller runcv
575
576=item :still_to_be_decided
577
578    chdir
579    flock ioctl
580
581    socket getpeername ssockopt
582    bind connect listen accept shutdown gsockopt getsockname
583
584    sleep alarm -- changes global timer state and signal handling
585    sort -- assorted problems including core dumps
586    tied -- can be used to access object implementing a tie
587    pack unpack -- can be used to create/use memory pointers
588
589    hintseval -- constant op holding eval hints
590
591    entereval -- can be used to hide code from initial compile
592
593    reset
594
595    dbstate -- perl -d version of nextstate(ment) opcode
596
597=item :dangerous
598
599This tag is simply a bucket for opcodes that are unlikely to be used via
600a tag name but need to be tagged for completeness and documentation.
601
602    syscall dump chroot
603
604=back
605
606=head1 SEE ALSO
607
608L<ops> -- perl pragma interface to Opcode module.
609
610L<Safe> -- Opcode and namespace limited execution compartments
611
612=head1 AUTHORS
613
614Originally designed and implemented by Malcolm Beattie,
615mbeattie@sable.ox.ac.uk as part of Safe version 1.
616
617Split out from Safe module version 1, named opcode tags and other
618changes added by Tim Bunce.
619
620=cut
621