toke.c - OpenGrok cross reference for /openbsd-src/gnu/usr.bin/perl/toke.c

Lines Matching defs:UTF
106 #   define UTF cBOOL(!IN_BYTES)
108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
694                  t += UTF ? UTF8SKIP(t) : 1)
699             if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
703                      (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704                      t += UTF ? UTF8SKIP(t) : 1)
712                           UTF8fARG(UTF, t - t_start, t_start));
720                 t += UTF ? UTF8SKIP(t) : 1;
725                      UTF8fARG(UTF, s - t, t));
728     yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
753         uni = UTF;
763         if (! UTF && LIKELY(PL_multi_close < 256)) {
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1069 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1081     return UTF;
1159 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1178     if (UTF) {
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1429      * UTF-8 (because it was off), but now we do need to check it, or our
1432      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1544     if (UTF) {
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1610     if (UTF) {
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1681         if (UTF)
2087     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088         s += UTF ? UTF8SKIP(s) : 1;
2094                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2230                   &&  UTF
2263     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2311                                                                 UTF ? SVf_UTF8 : 0));
2321                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2827     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2830         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2846      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
3017             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3020                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3155     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3161     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3165                                            UTF-8) */
3233              * a byte that can't occur in legal UTF-8, and hence can signify a
3470                  * hyphen, the min, and the max.  For UTF-8, we need this
3472                  * bytes (is variant) when in UTF-8 (except we've already
3516                      * precise amount needed for the UTF-8 variants.  Just
3688             if (UTF
3797                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3812                                                UTF))
3830                                                UTF))
3847                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3882                         * since such escapes are likely longer than any UTF-8
3885                         * UTF-8 for it contains 14.  And, we have to allow for
4028                           * there to upgrade to UTF-8 for small enough code
4099                                 /* For the non-UTF-8 case, we can determine the
4218                             /* Make sure \N{} return is UTF-8.  This is because
4322          * to/from UTF-8.
4324          * If the input has the same representation in UTF-8 as not, it will be
4330             /* If neither source nor output is UTF-8, is also a single byte,
4332              * convert to UTF-8 */
4336         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4365             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4368                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4602             if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4610                                          UTF ? SVf_UTF8 : 0,
4818                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4824         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5115                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5121     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5128     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5186                                                              UTF))
5190     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5193         s += UTF ? UTF8SKIP(s) : 1;
5249         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5254                 0, cBOOL(UTF), FALSE, FALSE);
5352         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5403                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5413                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5414                                 t += UTF ? UTF8SKIP(t) : 1;
5440                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5456                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5462                             && get_cvn_flags(tmpbuf, len, UTF
5468                                     UTF8fARG(UTF, len, tmpbuf));
5483                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5487             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5556     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5771                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5971             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6153         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6174             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6334         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6457                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6459                         t += UTF ? UTF8SKIP(t) : 1;
6462             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6463                 t += UTF ? UTF8SKIP(t) : 1;
6465                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6467                     t += UTF ? UTF8SKIP(t) : 1;
6584             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6749                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
7039         if (UTF)
7058     if (UTF) {
7081     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7083         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7087                       UTF8fARG(UTF, (s - d), d),
7105                                    UTF))
7108                         GV_ADD | (UTF ? SVf_UTF8 : 0));
7134     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7182             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7247     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7259             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7343         /* If it looks like the start of a BOM or raw UTF-16,
7607                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7683         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7729                     UTF8fARG(UTF, len, PL_tokenbuf),
7744             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7748                         UTF8fARG(UTF, len, PL_tokenbuf));
7814         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7857             if (UTF && !IN_BYTES
7901         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7909             if (UTF && !IN_BYTES
8469         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8482                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8926                           UTF8fARG(UTF, len, PL_tokenbuf));
9073                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9092                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9176         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9602                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9707         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
10020                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10021             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10034                             UTF ? SVf_UTF8 : 0);
10056                                                         UTF ? SVf_UTF8 : 0);
10081                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10110                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10119                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10127                                                       UTF ? SVf_UTF8 : 0 ));
10132                      | ( UTF ? SVf_UTF8 : 0 ),
10171     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10173         s += UTF ? UTF8SKIP(s) : 1;
10174         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10175             s += UTF ? UTF8SKIP(s) : 1;
10183             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10205    <type> is assumed to be well formed UTF-8.
10313         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10330              /* The UTF-8 case must come first, otherwise things
10414     bool is_utf8 = cBOOL(UTF);
10446     bool is_utf8 = cBOOL(UTF);
10519      *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10520      *      '{' without knowing if is UTF-8 or not. */
10702     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10705         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10707                        UTF ? SVf_UTF8 : 0);
11083         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11088         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11089             peek += UTF ? UTF8SKIP(peek) : 1;
11458         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11538     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11539         d += UTF ? UTF8SKIP(d) : 1;
11601                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11619             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11707     /* The only non-UTF character that isn't a stand alone grapheme is
11724     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11754         if (UTF) {
11773         deprecated_opening_delims = (UTF)
11806                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11809         close_delim_code = (UTF)
11822                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11895                 if (   UTF  /* All Non-UTF-8's are graphemes */
11915             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
12777             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13079                                  UTF8fARG(UTF, contlen, context));
13119             /* UTF-16 little-endian? (or UTF-32LE?) */
13120             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13122                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13125             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13133             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13138         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13141             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13149             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13156             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13158             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
13166                        /* UTF-32 big-endian */
13168                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13174                    * are a good indicator of UTF-16BE. */
13177                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13182                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13192                    * are a good indicator of UTF-16LE. */
13195               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13200               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13243         /* First, look in our buffer of existing UTF-8 data:  */
13265         /* OK, not a complete line there, so need to read some more UTF-16.
13272                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13884         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13898             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);