xref: /openbsd-src/gnu/usr.bin/perl/lib/meta_notation.pm (revision 256a93a44f36679bee503f12e49566c2183f6181)
1*b8851fccSafresh1use strict;
2*b8851fccSafresh1use warnings;
3*b8851fccSafresh1
4*b8851fccSafresh1# A tiny private library routine which is a helper to several Perl core
5*b8851fccSafresh1# modules, to allow a paradigm to be implemented in a single place.  The name,
6*b8851fccSafresh1# contents, or even the existence of this file may be changed at any time and
7*b8851fccSafresh1# are NOT to be used by anything outside the Perl core.
8*b8851fccSafresh1
9*b8851fccSafresh1sub _meta_notation ($) {
10*b8851fccSafresh1
11*b8851fccSafresh1    # Returns a copy of the input string with the nonprintable characters
12*b8851fccSafresh1    # below 0x100 changed into printables.  Any ASCII printables or above 0xFF
13*b8851fccSafresh1    # are unchanged.  (XXX Probably above-Latin1 characters should be
14*b8851fccSafresh1    # converted to \X{...})
15*b8851fccSafresh1    #
16*b8851fccSafresh1    # \0 .. \x1F (which are "\c@" .. "\c_") are changed into ^@, ^A, ^B, ...
17*b8851fccSafresh1    # ^Z, ^[, ^\, ^], ^^, ^_
18*b8851fccSafresh1    # \c? is changed into ^?.
19*b8851fccSafresh1    #
20*b8851fccSafresh1    # The above accounts for all the ASCII-range nonprintables.
21*b8851fccSafresh1    #
22*b8851fccSafresh1    # On ASCII platforms, the upper-Latin1-range characters are converted to
23*b8851fccSafresh1    # Meta notation, so that \xC1 becomes 'M-A', \xE2 becomes 'M-b', etc.
24*b8851fccSafresh1    # This is how it always has worked, so is continued that way for backwards
25*b8851fccSafresh1    # compatibility.  The range \x80 .. \x9F becomes M-^@ .. M-^A, M-^B, ...
26*b8851fccSafresh1    # M-^Z, M-^[, M-^\, M-^], M-^, M-^_
27*b8851fccSafresh1    #
28*b8851fccSafresh1    # On EBCDIC platforms, the upper-Latin1-range characters are converted
29*b8851fccSafresh1    # into '\x{...}'  Meta notation doesn't make sense on EBCDIC platforms
30*b8851fccSafresh1    # because the ASCII-range printables are a mixture of upper bit set or
31*b8851fccSafresh1    # not.  [A-Za-Z0-9] all have the upper bit set.  The underscore likely
32*b8851fccSafresh1    # doesn't; and other punctuation may or may not.  There's no simple
33*b8851fccSafresh1    # pattern.
34*b8851fccSafresh1
35*b8851fccSafresh1    my $string = shift;
36*b8851fccSafresh1
37*b8851fccSafresh1    $string =~ s/([\0-\037])/
38*b8851fccSafresh1               sprintf("^%c",utf8::unicode_to_native(ord($1)^64))/xeg;
39*b8851fccSafresh1    $string =~ s/\c?/^?/g;
40*b8851fccSafresh1    if (ord("A") == 65) {
41*b8851fccSafresh1        $string =~ s/([\200-\237])/sprintf("M-^%c",(ord($1)&0177)^64)/eg;
42*b8851fccSafresh1        $string =~ s/([\240-\377])/sprintf("M-%c"  ,ord($1)&0177)/eg;
43*b8851fccSafresh1    }
44*b8851fccSafresh1    else {
45*b8851fccSafresh1        # Leave alone things above \xff
46*b8851fccSafresh1        $string =~ s/( (?[ [\x00-\xFF] & [:^print:]])) /
47*b8851fccSafresh1                  sprintf("\\x{%X}", ord($1))/xaeg;
48*b8851fccSafresh1    }
49*b8851fccSafresh1
50*b8851fccSafresh1    return $string;
51*b8851fccSafresh1}
52*b8851fccSafresh11
53