18dffb485Schristos /* Parse a printf-style format string. 28dffb485Schristos 3*5ba1f45fSchristos Copyright (C) 1986-2024 Free Software Foundation, Inc. 48dffb485Schristos 58dffb485Schristos This file is part of GDB. 68dffb485Schristos 78dffb485Schristos This program is free software; you can redistribute it and/or modify 88dffb485Schristos it under the terms of the GNU General Public License as published by 98dffb485Schristos the Free Software Foundation; either version 3 of the License, or 108dffb485Schristos (at your option) any later version. 118dffb485Schristos 128dffb485Schristos This program is distributed in the hope that it will be useful, 138dffb485Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of 148dffb485Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 158dffb485Schristos GNU General Public License for more details. 168dffb485Schristos 178dffb485Schristos You should have received a copy of the GNU General Public License 188dffb485Schristos along with this program. If not, see <http://www.gnu.org/licenses/>. */ 198dffb485Schristos 208dffb485Schristos #include "format.h" 218dffb485Schristos 22*5ba1f45fSchristos format_pieces::format_pieces (const char **arg, bool gdb_extensions, 23*5ba1f45fSchristos bool value_extension) 248dffb485Schristos { 258dffb485Schristos const char *s; 268dffb485Schristos const char *string; 278dffb485Schristos const char *prev_start; 288dffb485Schristos const char *percent_loc; 298dffb485Schristos char *sub_start, *current_substring; 308dffb485Schristos enum argclass this_argclass; 318dffb485Schristos 328dffb485Schristos s = *arg; 338dffb485Schristos 348dffb485Schristos if (gdb_extensions) 358dffb485Schristos { 368dffb485Schristos string = *arg; 378dffb485Schristos *arg += strlen (*arg); 388dffb485Schristos } 398dffb485Schristos else 408dffb485Schristos { 418dffb485Schristos /* Parse the format-control string and copy it into the string STRING, 428dffb485Schristos processing some kinds of escape sequence. */ 438dffb485Schristos 448dffb485Schristos char *f = (char *) alloca (strlen (s) + 1); 458dffb485Schristos string = f; 468dffb485Schristos 47*5ba1f45fSchristos while (*s != '"' && *s != '\0') 488dffb485Schristos { 498dffb485Schristos int c = *s++; 508dffb485Schristos switch (c) 518dffb485Schristos { 528dffb485Schristos case '\0': 538dffb485Schristos continue; 548dffb485Schristos 558dffb485Schristos case '\\': 568dffb485Schristos switch (c = *s++) 578dffb485Schristos { 588dffb485Schristos case '\\': 598dffb485Schristos *f++ = '\\'; 608dffb485Schristos break; 618dffb485Schristos case 'a': 628dffb485Schristos *f++ = '\a'; 638dffb485Schristos break; 648dffb485Schristos case 'b': 658dffb485Schristos *f++ = '\b'; 668dffb485Schristos break; 678dffb485Schristos case 'e': 688dffb485Schristos *f++ = '\e'; 698dffb485Schristos break; 708dffb485Schristos case 'f': 718dffb485Schristos *f++ = '\f'; 728dffb485Schristos break; 738dffb485Schristos case 'n': 748dffb485Schristos *f++ = '\n'; 758dffb485Schristos break; 768dffb485Schristos case 'r': 778dffb485Schristos *f++ = '\r'; 788dffb485Schristos break; 798dffb485Schristos case 't': 808dffb485Schristos *f++ = '\t'; 818dffb485Schristos break; 828dffb485Schristos case 'v': 838dffb485Schristos *f++ = '\v'; 848dffb485Schristos break; 858dffb485Schristos case '"': 868dffb485Schristos *f++ = '"'; 878dffb485Schristos break; 888dffb485Schristos default: 898dffb485Schristos /* ??? TODO: handle other escape sequences. */ 908dffb485Schristos error (_("Unrecognized escape character \\%c in format string."), 918dffb485Schristos c); 928dffb485Schristos } 938dffb485Schristos break; 948dffb485Schristos 958dffb485Schristos default: 968dffb485Schristos *f++ = c; 978dffb485Schristos } 988dffb485Schristos } 998dffb485Schristos 1008dffb485Schristos /* Terminate our escape-processed copy. */ 1018dffb485Schristos *f++ = '\0'; 1028dffb485Schristos 1038dffb485Schristos /* Whether the format string ended with double-quote or zero, we're 1048dffb485Schristos done with it; it's up to callers to complain about syntax. */ 1058dffb485Schristos *arg = s; 1068dffb485Schristos } 1078dffb485Schristos 1088dffb485Schristos /* Need extra space for the '\0's. Doubling the size is sufficient. */ 1098dffb485Schristos 1108dffb485Schristos current_substring = (char *) xmalloc (strlen (string) * 2 + 1000); 1118dffb485Schristos m_storage.reset (current_substring); 1128dffb485Schristos 1138dffb485Schristos /* Now scan the string for %-specs and see what kinds of args they want. 1148dffb485Schristos argclass classifies the %-specs so we can give printf-type functions 1158dffb485Schristos something of the right size. */ 1168dffb485Schristos 1178dffb485Schristos const char *f = string; 1188dffb485Schristos prev_start = string; 1198dffb485Schristos while (*f) 1208dffb485Schristos if (*f++ == '%') 1218dffb485Schristos { 1228dffb485Schristos int seen_hash = 0, seen_zero = 0, lcount = 0, seen_prec = 0; 1238dffb485Schristos int seen_space = 0, seen_plus = 0; 1248dffb485Schristos int seen_big_l = 0, seen_h = 0, seen_big_h = 0; 1258dffb485Schristos int seen_big_d = 0, seen_double_big_d = 0; 1268dffb485Schristos int seen_size_t = 0; 1278dffb485Schristos int bad = 0; 1288dffb485Schristos int n_int_args = 0; 1298dffb485Schristos bool seen_i64 = false; 1308dffb485Schristos 1318dffb485Schristos /* Skip over "%%", it will become part of a literal piece. */ 1328dffb485Schristos if (*f == '%') 1338dffb485Schristos { 1348dffb485Schristos f++; 1358dffb485Schristos continue; 1368dffb485Schristos } 1378dffb485Schristos 1388dffb485Schristos sub_start = current_substring; 1398dffb485Schristos 1408dffb485Schristos strncpy (current_substring, prev_start, f - 1 - prev_start); 1418dffb485Schristos current_substring += f - 1 - prev_start; 1428dffb485Schristos *current_substring++ = '\0'; 1438dffb485Schristos 1448dffb485Schristos if (*sub_start != '\0') 1458dffb485Schristos m_pieces.emplace_back (sub_start, literal_piece, 0); 1468dffb485Schristos 1478dffb485Schristos percent_loc = f - 1; 1488dffb485Schristos 1498dffb485Schristos /* Check the validity of the format specifier, and work 1508dffb485Schristos out what argument it expects. We only accept C89 1518dffb485Schristos format strings, with the exception of long long (which 1528dffb485Schristos we autoconf for). */ 1538dffb485Schristos 1548dffb485Schristos /* The first part of a format specifier is a set of flag 1558dffb485Schristos characters. */ 1568dffb485Schristos while (*f != '\0' && strchr ("0-+ #", *f)) 1578dffb485Schristos { 1588dffb485Schristos if (*f == '#') 1598dffb485Schristos seen_hash = 1; 1608dffb485Schristos else if (*f == '0') 1618dffb485Schristos seen_zero = 1; 1628dffb485Schristos else if (*f == ' ') 1638dffb485Schristos seen_space = 1; 1648dffb485Schristos else if (*f == '+') 1658dffb485Schristos seen_plus = 1; 1668dffb485Schristos f++; 1678dffb485Schristos } 1688dffb485Schristos 1698dffb485Schristos /* The next part of a format specifier is a width. */ 1708dffb485Schristos if (gdb_extensions && *f == '*') 1718dffb485Schristos { 1728dffb485Schristos ++f; 1738dffb485Schristos ++n_int_args; 1748dffb485Schristos } 1758dffb485Schristos else 1768dffb485Schristos { 1778dffb485Schristos while (*f != '\0' && strchr ("0123456789", *f)) 1788dffb485Schristos f++; 1798dffb485Schristos } 1808dffb485Schristos 1818dffb485Schristos /* The next part of a format specifier is a precision. */ 1828dffb485Schristos if (*f == '.') 1838dffb485Schristos { 1848dffb485Schristos seen_prec = 1; 1858dffb485Schristos f++; 1868dffb485Schristos if (gdb_extensions && *f == '*') 1878dffb485Schristos { 1888dffb485Schristos ++f; 1898dffb485Schristos ++n_int_args; 1908dffb485Schristos } 1918dffb485Schristos else 1928dffb485Schristos { 1938dffb485Schristos while (*f != '\0' && strchr ("0123456789", *f)) 1948dffb485Schristos f++; 1958dffb485Schristos } 1968dffb485Schristos } 1978dffb485Schristos 1988dffb485Schristos /* The next part of a format specifier is a length modifier. */ 1998dffb485Schristos switch (*f) 2008dffb485Schristos { 2018dffb485Schristos case 'h': 2028dffb485Schristos seen_h = 1; 2038dffb485Schristos f++; 2048dffb485Schristos break; 2058dffb485Schristos case 'l': 2068dffb485Schristos f++; 2078dffb485Schristos lcount++; 2088dffb485Schristos if (*f == 'l') 2098dffb485Schristos { 2108dffb485Schristos f++; 2118dffb485Schristos lcount++; 2128dffb485Schristos } 2138dffb485Schristos break; 2148dffb485Schristos case 'L': 2158dffb485Schristos seen_big_l = 1; 2168dffb485Schristos f++; 2178dffb485Schristos break; 2188dffb485Schristos case 'H': 2198dffb485Schristos /* Decimal32 modifier. */ 2208dffb485Schristos seen_big_h = 1; 2218dffb485Schristos f++; 2228dffb485Schristos break; 2238dffb485Schristos case 'D': 2248dffb485Schristos /* Decimal64 and Decimal128 modifiers. */ 2258dffb485Schristos f++; 2268dffb485Schristos 2278dffb485Schristos /* Check for a Decimal128. */ 2288dffb485Schristos if (*f == 'D') 2298dffb485Schristos { 2308dffb485Schristos f++; 2318dffb485Schristos seen_double_big_d = 1; 2328dffb485Schristos } 2338dffb485Schristos else 2348dffb485Schristos seen_big_d = 1; 2358dffb485Schristos break; 2368dffb485Schristos case 'z': 2378dffb485Schristos /* For size_t or ssize_t. */ 2388dffb485Schristos seen_size_t = 1; 2398dffb485Schristos f++; 2408dffb485Schristos break; 2418dffb485Schristos case 'I': 2428dffb485Schristos /* Support the Windows '%I64' extension, because an 2438dffb485Schristos earlier call to format_pieces might have converted %lld 2448dffb485Schristos to %I64d. */ 2458dffb485Schristos if (f[1] == '6' && f[2] == '4') 2468dffb485Schristos { 2478dffb485Schristos f += 3; 2488dffb485Schristos lcount = 2; 2498dffb485Schristos seen_i64 = true; 2508dffb485Schristos } 2518dffb485Schristos break; 2528dffb485Schristos } 2538dffb485Schristos 2548dffb485Schristos switch (*f) 2558dffb485Schristos { 2568dffb485Schristos case 'u': 2578dffb485Schristos if (seen_hash) 2588dffb485Schristos bad = 1; 259*5ba1f45fSchristos [[fallthrough]]; 2608dffb485Schristos 2618dffb485Schristos case 'o': 2628dffb485Schristos case 'x': 2638dffb485Schristos case 'X': 2648dffb485Schristos if (seen_space || seen_plus) 2658dffb485Schristos bad = 1; 266*5ba1f45fSchristos [[fallthrough]]; 2678dffb485Schristos 2688dffb485Schristos case 'd': 2698dffb485Schristos case 'i': 2708dffb485Schristos if (seen_size_t) 2718dffb485Schristos this_argclass = size_t_arg; 2728dffb485Schristos else if (lcount == 0) 2738dffb485Schristos this_argclass = int_arg; 2748dffb485Schristos else if (lcount == 1) 2758dffb485Schristos this_argclass = long_arg; 2768dffb485Schristos else 2778dffb485Schristos this_argclass = long_long_arg; 2788dffb485Schristos 2798dffb485Schristos if (seen_big_l) 2808dffb485Schristos bad = 1; 2818dffb485Schristos break; 2828dffb485Schristos 2838dffb485Schristos case 'c': 2848dffb485Schristos this_argclass = lcount == 0 ? int_arg : wide_char_arg; 2858dffb485Schristos if (lcount > 1 || seen_h || seen_big_l) 2868dffb485Schristos bad = 1; 2878dffb485Schristos if (seen_prec || seen_zero || seen_space || seen_plus) 2888dffb485Schristos bad = 1; 2898dffb485Schristos break; 2908dffb485Schristos 2918dffb485Schristos case 'p': 2928dffb485Schristos this_argclass = ptr_arg; 2938dffb485Schristos if (lcount || seen_h || seen_big_l) 2948dffb485Schristos bad = 1; 2958dffb485Schristos if (seen_prec) 2968dffb485Schristos bad = 1; 2978dffb485Schristos if (seen_hash || seen_zero || seen_space || seen_plus) 2988dffb485Schristos bad = 1; 2998dffb485Schristos 3008dffb485Schristos if (gdb_extensions) 3018dffb485Schristos { 3028dffb485Schristos switch (f[1]) 3038dffb485Schristos { 3048dffb485Schristos case 's': 3058dffb485Schristos case 'F': 3068dffb485Schristos case '[': 3078dffb485Schristos case ']': 3088dffb485Schristos f++; 3098dffb485Schristos break; 3108dffb485Schristos } 3118dffb485Schristos } 3128dffb485Schristos 3138dffb485Schristos break; 3148dffb485Schristos 3158dffb485Schristos case 's': 3168dffb485Schristos this_argclass = lcount == 0 ? string_arg : wide_string_arg; 3178dffb485Schristos if (lcount > 1 || seen_h || seen_big_l) 3188dffb485Schristos bad = 1; 3198dffb485Schristos if (seen_zero || seen_space || seen_plus) 3208dffb485Schristos bad = 1; 3218dffb485Schristos break; 3228dffb485Schristos 3238dffb485Schristos case 'e': 3248dffb485Schristos case 'f': 3258dffb485Schristos case 'g': 3268dffb485Schristos case 'E': 3278dffb485Schristos case 'G': 3288dffb485Schristos if (seen_double_big_d) 3298dffb485Schristos this_argclass = dec128float_arg; 3308dffb485Schristos else if (seen_big_d) 3318dffb485Schristos this_argclass = dec64float_arg; 3328dffb485Schristos else if (seen_big_h) 3338dffb485Schristos this_argclass = dec32float_arg; 3348dffb485Schristos else if (seen_big_l) 3358dffb485Schristos this_argclass = long_double_arg; 3368dffb485Schristos else 3378dffb485Schristos this_argclass = double_arg; 3388dffb485Schristos 3398dffb485Schristos if (lcount || seen_h) 3408dffb485Schristos bad = 1; 3418dffb485Schristos break; 3428dffb485Schristos 343*5ba1f45fSchristos case 'V': 344*5ba1f45fSchristos if (!value_extension) 345*5ba1f45fSchristos error (_("Unrecognized format specifier '%c' in printf"), *f); 346*5ba1f45fSchristos 347*5ba1f45fSchristos if (lcount > 1 || seen_h || seen_big_h || seen_big_h 348*5ba1f45fSchristos || seen_big_d || seen_double_big_d || seen_size_t 349*5ba1f45fSchristos || seen_prec || seen_zero || seen_space || seen_plus) 350*5ba1f45fSchristos bad = 1; 351*5ba1f45fSchristos 352*5ba1f45fSchristos this_argclass = value_arg; 353*5ba1f45fSchristos 354*5ba1f45fSchristos if (f[1] == '[') 355*5ba1f45fSchristos { 356*5ba1f45fSchristos /* Move F forward to the next ']' character if such a 357*5ba1f45fSchristos character exists, otherwise leave F unchanged. */ 358*5ba1f45fSchristos const char *tmp = strchr (f, ']'); 359*5ba1f45fSchristos if (tmp != nullptr) 360*5ba1f45fSchristos f = tmp; 361*5ba1f45fSchristos } 362*5ba1f45fSchristos break; 363*5ba1f45fSchristos 3648dffb485Schristos case '*': 3658dffb485Schristos error (_("`*' not supported for precision or width in printf")); 3668dffb485Schristos 3678dffb485Schristos case 'n': 3688dffb485Schristos error (_("Format specifier `n' not supported in printf")); 3698dffb485Schristos 3708dffb485Schristos case '\0': 3718dffb485Schristos error (_("Incomplete format specifier at end of format string")); 3728dffb485Schristos 3738dffb485Schristos default: 3748dffb485Schristos error (_("Unrecognized format specifier '%c' in printf"), *f); 3758dffb485Schristos } 3768dffb485Schristos 3778dffb485Schristos if (bad) 3788dffb485Schristos error (_("Inappropriate modifiers to " 3798dffb485Schristos "format specifier '%c' in printf"), 3808dffb485Schristos *f); 3818dffb485Schristos 3828dffb485Schristos f++; 3838dffb485Schristos 3848dffb485Schristos sub_start = current_substring; 3858dffb485Schristos 3868dffb485Schristos if (lcount > 1 && !seen_i64 && USE_PRINTF_I64) 3878dffb485Schristos { 3888dffb485Schristos /* Windows' printf does support long long, but not the usual way. 3898dffb485Schristos Convert %lld to %I64d. */ 3908dffb485Schristos int length_before_ll = f - percent_loc - 1 - lcount; 3918dffb485Schristos 3928dffb485Schristos strncpy (current_substring, percent_loc, length_before_ll); 3938dffb485Schristos strcpy (current_substring + length_before_ll, "I64"); 3948dffb485Schristos current_substring[length_before_ll + 3] = 3958dffb485Schristos percent_loc[length_before_ll + lcount]; 3968dffb485Schristos current_substring += length_before_ll + 4; 3978dffb485Schristos } 3988dffb485Schristos else if (this_argclass == wide_string_arg 3998dffb485Schristos || this_argclass == wide_char_arg) 4008dffb485Schristos { 4018dffb485Schristos /* Convert %ls or %lc to %s. */ 4028dffb485Schristos int length_before_ls = f - percent_loc - 2; 4038dffb485Schristos 4048dffb485Schristos strncpy (current_substring, percent_loc, length_before_ls); 4058dffb485Schristos strcpy (current_substring + length_before_ls, "s"); 4068dffb485Schristos current_substring += length_before_ls + 2; 4078dffb485Schristos } 4088dffb485Schristos else 4098dffb485Schristos { 4108dffb485Schristos strncpy (current_substring, percent_loc, f - percent_loc); 4118dffb485Schristos current_substring += f - percent_loc; 4128dffb485Schristos } 4138dffb485Schristos 4148dffb485Schristos *current_substring++ = '\0'; 4158dffb485Schristos 4168dffb485Schristos prev_start = f; 4178dffb485Schristos 4188dffb485Schristos m_pieces.emplace_back (sub_start, this_argclass, n_int_args); 4198dffb485Schristos } 4208dffb485Schristos 4218dffb485Schristos /* Record the remainder of the string. */ 4228dffb485Schristos 4238dffb485Schristos if (f > prev_start) 4248dffb485Schristos { 4258dffb485Schristos sub_start = current_substring; 4268dffb485Schristos 4278dffb485Schristos strncpy (current_substring, prev_start, f - prev_start); 4288dffb485Schristos current_substring += f - prev_start; 4298dffb485Schristos *current_substring++ = '\0'; 4308dffb485Schristos 4318dffb485Schristos m_pieces.emplace_back (sub_start, literal_piece, 0); 4328dffb485Schristos } 4338dffb485Schristos } 434