1 /* Parse a printf-style format string. 2 3 Copyright (C) 1986-2024 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "format.h" 21 22 format_pieces::format_pieces (const char **arg, bool gdb_extensions, 23 bool value_extension) 24 { 25 const char *s; 26 const char *string; 27 const char *prev_start; 28 const char *percent_loc; 29 char *sub_start, *current_substring; 30 enum argclass this_argclass; 31 32 s = *arg; 33 34 if (gdb_extensions) 35 { 36 string = *arg; 37 *arg += strlen (*arg); 38 } 39 else 40 { 41 /* Parse the format-control string and copy it into the string STRING, 42 processing some kinds of escape sequence. */ 43 44 char *f = (char *) alloca (strlen (s) + 1); 45 string = f; 46 47 while (*s != '"' && *s != '\0') 48 { 49 int c = *s++; 50 switch (c) 51 { 52 case '\0': 53 continue; 54 55 case '\\': 56 switch (c = *s++) 57 { 58 case '\\': 59 *f++ = '\\'; 60 break; 61 case 'a': 62 *f++ = '\a'; 63 break; 64 case 'b': 65 *f++ = '\b'; 66 break; 67 case 'e': 68 *f++ = '\e'; 69 break; 70 case 'f': 71 *f++ = '\f'; 72 break; 73 case 'n': 74 *f++ = '\n'; 75 break; 76 case 'r': 77 *f++ = '\r'; 78 break; 79 case 't': 80 *f++ = '\t'; 81 break; 82 case 'v': 83 *f++ = '\v'; 84 break; 85 case '"': 86 *f++ = '"'; 87 break; 88 default: 89 /* ??? TODO: handle other escape sequences. */ 90 error (_("Unrecognized escape character \\%c in format string."), 91 c); 92 } 93 break; 94 95 default: 96 *f++ = c; 97 } 98 } 99 100 /* Terminate our escape-processed copy. */ 101 *f++ = '\0'; 102 103 /* Whether the format string ended with double-quote or zero, we're 104 done with it; it's up to callers to complain about syntax. */ 105 *arg = s; 106 } 107 108 /* Need extra space for the '\0's. Doubling the size is sufficient. */ 109 110 current_substring = (char *) xmalloc (strlen (string) * 2 + 1000); 111 m_storage.reset (current_substring); 112 113 /* Now scan the string for %-specs and see what kinds of args they want. 114 argclass classifies the %-specs so we can give printf-type functions 115 something of the right size. */ 116 117 const char *f = string; 118 prev_start = string; 119 while (*f) 120 if (*f++ == '%') 121 { 122 int seen_hash = 0, seen_zero = 0, lcount = 0, seen_prec = 0; 123 int seen_space = 0, seen_plus = 0; 124 int seen_big_l = 0, seen_h = 0, seen_big_h = 0; 125 int seen_big_d = 0, seen_double_big_d = 0; 126 int seen_size_t = 0; 127 int bad = 0; 128 int n_int_args = 0; 129 bool seen_i64 = false; 130 131 /* Skip over "%%", it will become part of a literal piece. */ 132 if (*f == '%') 133 { 134 f++; 135 continue; 136 } 137 138 sub_start = current_substring; 139 140 strncpy (current_substring, prev_start, f - 1 - prev_start); 141 current_substring += f - 1 - prev_start; 142 *current_substring++ = '\0'; 143 144 if (*sub_start != '\0') 145 m_pieces.emplace_back (sub_start, literal_piece, 0); 146 147 percent_loc = f - 1; 148 149 /* Check the validity of the format specifier, and work 150 out what argument it expects. We only accept C89 151 format strings, with the exception of long long (which 152 we autoconf for). */ 153 154 /* The first part of a format specifier is a set of flag 155 characters. */ 156 while (*f != '\0' && strchr ("0-+ #", *f)) 157 { 158 if (*f == '#') 159 seen_hash = 1; 160 else if (*f == '0') 161 seen_zero = 1; 162 else if (*f == ' ') 163 seen_space = 1; 164 else if (*f == '+') 165 seen_plus = 1; 166 f++; 167 } 168 169 /* The next part of a format specifier is a width. */ 170 if (gdb_extensions && *f == '*') 171 { 172 ++f; 173 ++n_int_args; 174 } 175 else 176 { 177 while (*f != '\0' && strchr ("0123456789", *f)) 178 f++; 179 } 180 181 /* The next part of a format specifier is a precision. */ 182 if (*f == '.') 183 { 184 seen_prec = 1; 185 f++; 186 if (gdb_extensions && *f == '*') 187 { 188 ++f; 189 ++n_int_args; 190 } 191 else 192 { 193 while (*f != '\0' && strchr ("0123456789", *f)) 194 f++; 195 } 196 } 197 198 /* The next part of a format specifier is a length modifier. */ 199 switch (*f) 200 { 201 case 'h': 202 seen_h = 1; 203 f++; 204 break; 205 case 'l': 206 f++; 207 lcount++; 208 if (*f == 'l') 209 { 210 f++; 211 lcount++; 212 } 213 break; 214 case 'L': 215 seen_big_l = 1; 216 f++; 217 break; 218 case 'H': 219 /* Decimal32 modifier. */ 220 seen_big_h = 1; 221 f++; 222 break; 223 case 'D': 224 /* Decimal64 and Decimal128 modifiers. */ 225 f++; 226 227 /* Check for a Decimal128. */ 228 if (*f == 'D') 229 { 230 f++; 231 seen_double_big_d = 1; 232 } 233 else 234 seen_big_d = 1; 235 break; 236 case 'z': 237 /* For size_t or ssize_t. */ 238 seen_size_t = 1; 239 f++; 240 break; 241 case 'I': 242 /* Support the Windows '%I64' extension, because an 243 earlier call to format_pieces might have converted %lld 244 to %I64d. */ 245 if (f[1] == '6' && f[2] == '4') 246 { 247 f += 3; 248 lcount = 2; 249 seen_i64 = true; 250 } 251 break; 252 } 253 254 switch (*f) 255 { 256 case 'u': 257 if (seen_hash) 258 bad = 1; 259 [[fallthrough]]; 260 261 case 'o': 262 case 'x': 263 case 'X': 264 if (seen_space || seen_plus) 265 bad = 1; 266 [[fallthrough]]; 267 268 case 'd': 269 case 'i': 270 if (seen_size_t) 271 this_argclass = size_t_arg; 272 else if (lcount == 0) 273 this_argclass = int_arg; 274 else if (lcount == 1) 275 this_argclass = long_arg; 276 else 277 this_argclass = long_long_arg; 278 279 if (seen_big_l) 280 bad = 1; 281 break; 282 283 case 'c': 284 this_argclass = lcount == 0 ? int_arg : wide_char_arg; 285 if (lcount > 1 || seen_h || seen_big_l) 286 bad = 1; 287 if (seen_prec || seen_zero || seen_space || seen_plus) 288 bad = 1; 289 break; 290 291 case 'p': 292 this_argclass = ptr_arg; 293 if (lcount || seen_h || seen_big_l) 294 bad = 1; 295 if (seen_prec) 296 bad = 1; 297 if (seen_hash || seen_zero || seen_space || seen_plus) 298 bad = 1; 299 300 if (gdb_extensions) 301 { 302 switch (f[1]) 303 { 304 case 's': 305 case 'F': 306 case '[': 307 case ']': 308 f++; 309 break; 310 } 311 } 312 313 break; 314 315 case 's': 316 this_argclass = lcount == 0 ? string_arg : wide_string_arg; 317 if (lcount > 1 || seen_h || seen_big_l) 318 bad = 1; 319 if (seen_zero || seen_space || seen_plus) 320 bad = 1; 321 break; 322 323 case 'e': 324 case 'f': 325 case 'g': 326 case 'E': 327 case 'G': 328 if (seen_double_big_d) 329 this_argclass = dec128float_arg; 330 else if (seen_big_d) 331 this_argclass = dec64float_arg; 332 else if (seen_big_h) 333 this_argclass = dec32float_arg; 334 else if (seen_big_l) 335 this_argclass = long_double_arg; 336 else 337 this_argclass = double_arg; 338 339 if (lcount || seen_h) 340 bad = 1; 341 break; 342 343 case 'V': 344 if (!value_extension) 345 error (_("Unrecognized format specifier '%c' in printf"), *f); 346 347 if (lcount > 1 || seen_h || seen_big_h || seen_big_h 348 || seen_big_d || seen_double_big_d || seen_size_t 349 || seen_prec || seen_zero || seen_space || seen_plus) 350 bad = 1; 351 352 this_argclass = value_arg; 353 354 if (f[1] == '[') 355 { 356 /* Move F forward to the next ']' character if such a 357 character exists, otherwise leave F unchanged. */ 358 const char *tmp = strchr (f, ']'); 359 if (tmp != nullptr) 360 f = tmp; 361 } 362 break; 363 364 case '*': 365 error (_("`*' not supported for precision or width in printf")); 366 367 case 'n': 368 error (_("Format specifier `n' not supported in printf")); 369 370 case '\0': 371 error (_("Incomplete format specifier at end of format string")); 372 373 default: 374 error (_("Unrecognized format specifier '%c' in printf"), *f); 375 } 376 377 if (bad) 378 error (_("Inappropriate modifiers to " 379 "format specifier '%c' in printf"), 380 *f); 381 382 f++; 383 384 sub_start = current_substring; 385 386 if (lcount > 1 && !seen_i64 && USE_PRINTF_I64) 387 { 388 /* Windows' printf does support long long, but not the usual way. 389 Convert %lld to %I64d. */ 390 int length_before_ll = f - percent_loc - 1 - lcount; 391 392 strncpy (current_substring, percent_loc, length_before_ll); 393 strcpy (current_substring + length_before_ll, "I64"); 394 current_substring[length_before_ll + 3] = 395 percent_loc[length_before_ll + lcount]; 396 current_substring += length_before_ll + 4; 397 } 398 else if (this_argclass == wide_string_arg 399 || this_argclass == wide_char_arg) 400 { 401 /* Convert %ls or %lc to %s. */ 402 int length_before_ls = f - percent_loc - 2; 403 404 strncpy (current_substring, percent_loc, length_before_ls); 405 strcpy (current_substring + length_before_ls, "s"); 406 current_substring += length_before_ls + 2; 407 } 408 else 409 { 410 strncpy (current_substring, percent_loc, f - percent_loc); 411 current_substring += f - percent_loc; 412 } 413 414 *current_substring++ = '\0'; 415 416 prev_start = f; 417 418 m_pieces.emplace_back (sub_start, this_argclass, n_int_args); 419 } 420 421 /* Record the remainder of the string. */ 422 423 if (f > prev_start) 424 { 425 sub_start = current_substring; 426 427 strncpy (current_substring, prev_start, f - prev_start); 428 current_substring += f - prev_start; 429 *current_substring++ = '\0'; 430 431 m_pieces.emplace_back (sub_start, literal_piece, 0); 432 } 433 } 434