Lines Matching defs:UTF

106 #   define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
694 t += UTF ? UTF8SKIP(t) : 1)
699 if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
703 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704 t += UTF ? UTF8SKIP(t) : 1)
712 UTF8fARG(UTF, t - t_start, t_start));
720 t += UTF ? UTF8SKIP(t) : 1;
725 UTF8fARG(UTF, s - t, t));
728 yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
753 uni = UTF;
763 if (! UTF && LIKELY(PL_multi_close < 256)) {
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8. Lexing code must be robust in the face of invalid
1069 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1081 return UTF;
1159 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1178 if (UTF) {
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1429 * UTF-8 (because it was off), but now we do need to check it, or our
1432 * at the input we do the well-formed UTF-8 check. If we aren't in the
1544 if (UTF) {
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1610 if (UTF) {
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1681 if (UTF)
2087 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088 s += UTF ? UTF8SKIP(s) : 1;
2094 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2230 && UTF
2263 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2311 UTF ? SVf_UTF8 : 0));
2321 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2827 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2830 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2846 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
3017 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3020 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3155 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3161 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3165 UTF-8) */
3233 * a byte that can't occur in legal UTF-8, and hence can signify a
3470 * hyphen, the min, and the max. For UTF-8, we need this
3472 * bytes (is variant) when in UTF-8 (except we've already
3516 * precise amount needed for the UTF-8 variants. Just
3688 if (UTF
3797 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3812 UTF))
3830 UTF))
3847 /* Here, 'uv' won't fit unless we convert to UTF-8.
3882 * since such escapes are likely longer than any UTF-8
3885 * UTF-8 for it contains 14. And, we have to allow for
4028 * there to upgrade to UTF-8 for small enough code
4099 /* For the non-UTF-8 case, we can determine the
4218 /* Make sure \N{} return is UTF-8. This is because
4322 * to/from UTF-8.
4324 * If the input has the same representation in UTF-8 as not, it will be
4330 /* If neither source nor output is UTF-8, is also a single byte,
4332 * convert to UTF-8 */
4336 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4365 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4368 UTF-8 for output. It will occupy 2 bytes, but don't include
4602 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4610 UTF ? SVf_UTF8 : 0,
4818 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4824 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5115 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5121 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5128 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5186 UTF))
5190 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5193 s += UTF ? UTF8SKIP(s) : 1;
5249 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5254 0, cBOOL(UTF), FALSE, FALSE);
5352 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5403 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5413 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5414 t += UTF ? UTF8SKIP(t) : 1;
5440 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5456 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5462 && get_cvn_flags(tmpbuf, len, UTF
5468 UTF8fARG(UTF, len, tmpbuf));
5483 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5487 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5556 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5771 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5971 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6153 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6174 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6334 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6457 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6459 t += UTF ? UTF8SKIP(t) : 1;
6462 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6463 t += UTF ? UTF8SKIP(t) : 1;
6465 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6467 t += UTF ? UTF8SKIP(t) : 1;
6584 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6749 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
7039 if (UTF)
7058 if (UTF) {
7081 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7083 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7087 UTF8fARG(UTF, (s - d), d),
7105 UTF))
7108 GV_ADD | (UTF ? SVf_UTF8 : 0));
7134 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7182 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7247 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7259 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7343 /* If it looks like the start of a BOM or raw UTF-16,
7607 UTF8fARG(UTF, strlen(PL_tokenbuf),
7683 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7729 UTF8fARG(UTF, len, PL_tokenbuf),
7744 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7748 UTF8fARG(UTF, len, PL_tokenbuf));
7814 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7857 if (UTF && !IN_BYTES
7901 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7909 if (UTF && !IN_BYTES
8469 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8482 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8926 UTF8fARG(UTF, len, PL_tokenbuf));
9073 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9092 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9176 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9602 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9707 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
10020 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10021 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10034 UTF ? SVf_UTF8 : 0);
10056 UTF ? SVf_UTF8 : 0);
10081 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10110 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10119 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10127 UTF ? SVf_UTF8 : 0 ));
10132 | ( UTF ? SVf_UTF8 : 0 ),
10171 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10173 s += UTF ? UTF8SKIP(s) : 1;
10174 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10175 s += UTF ? UTF8SKIP(s) : 1;
10183 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10205 <type> is assumed to be well formed UTF-8.
10313 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10330 /* The UTF-8 case must come first, otherwise things
10414 bool is_utf8 = cBOOL(UTF);
10446 bool is_utf8 = cBOOL(UTF);
10519 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10520 * '{' without knowing if is UTF-8 or not. */
10702 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10705 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10707 UTF ? SVf_UTF8 : 0);
11083 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11088 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11089 peek += UTF ? UTF8SKIP(peek) : 1;
11458 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11538 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11539 d += UTF ? UTF8SKIP(d) : 1;
11601 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11619 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11707 /* The only non-UTF character that isn't a stand alone grapheme is
11724 if (! UTF || UTF8_IS_INVARIANT(*s)) {
11754 if (UTF) {
11773 deprecated_opening_delims = (UTF)
11806 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11809 close_delim_code = (UTF)
11822 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11895 if ( UTF /* All Non-UTF-8's are graphemes */
11915 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
12777 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13079 UTF8fARG(UTF, contlen, context));
13119 /* UTF-16 little-endian? (or UTF-32LE?) */
13120 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13122 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13125 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13133 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13138 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13141 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13149 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13156 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13158 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
13166 /* UTF-32 big-endian */
13168 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13174 * are a good indicator of UTF-16BE. */
13177 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13182 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13192 * are a good indicator of UTF-16LE. */
13195 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13200 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13243 /* First, look in our buffer of existing UTF-8 data: */
13265 /* OK, not a complete line there, so need to read some more UTF-16.
13272 Gosh, UTF-16 is a pain. All the benefits of variable length,
13884 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13898 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);