xref: /netbsd-src/external/gpl3/binutils.old/dist/binutils/strings.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /* strings -- print the strings of printable characters in files
2    Copyright (C) 1993-2018 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Usage: strings [options] file...
20 
21    Options:
22    --all
23    -a
24    -		Scan each file in its entirety.
25 
26    --data
27    -d		Scan only the initialized data section(s) of object files.
28 
29    --print-file-name
30    -f		Print the name of the file before each string.
31 
32    --bytes=min-len
33    -n min-len
34    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35 		that are followed by a NUL or a newline.  Default is 4.
36 
37    --radix={o,x,d}
38    -t {o,x,d}	Print the offset within the file before each string,
39 		in octal/hex/decimal.
40 
41   --include-all-whitespace
42   -w		By default tab and space are the only whitepace included in graphic
43 		char sequences.  This option considers all of isspace() valid.
44 
45    -o		Like -to.  (Some other implementations have -o like -to,
46 		others like -td.  We chose one arbitrarily.)
47 
48    --encoding={s,S,b,l,B,L}
49    -e {s,S,b,l,B,L}
50 		Select character encoding: 7-bit-character, 8-bit-character,
51 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52 		littleendian 32-bit.
53 
54    --target=BFDNAME
55    -T {bfdname}
56 		Specify a non-default object file format.
57 
58   --output-separator=sep_string
59   -s sep_string	String used to separate parsed strings in output.
60 		Default is newline.
61 
62    --help
63    -h		Print the usage message on the standard output.
64 
65    --version
66    -V
67    -v		Print the program version number.
68 
69    Written by Richard Stallman <rms@gnu.ai.mit.edu>
70    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
71 
72 #include "sysdep.h"
73 #include "bfd.h"
74 #include "getopt.h"
75 #include "libiberty.h"
76 #include "safe-ctype.h"
77 #include "bucomm.h"
78 
79 #define STRING_ISGRAPHIC(c) \
80       (   (c) >= 0 \
81        && (c) <= 255 \
82        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
83 	   || (include_all_whitespace && ISSPACE (c))) \
84       )
85 
86 #ifndef errno
87 extern int errno;
88 #endif
89 
90 /* The BFD section flags that identify an initialized data section.  */
91 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
92 
93 /* Radix for printing addresses (must be 8, 10 or 16).  */
94 static int address_radix;
95 
96 /* Minimum length of sequence of graphic chars to trigger output.  */
97 static int string_min;
98 
99 /* Whether or not we include all whitespace as a graphic char.   */
100 static bfd_boolean include_all_whitespace;
101 
102 /* TRUE means print address within file for each string.  */
103 static bfd_boolean print_addresses;
104 
105 /* TRUE means print filename for each string.  */
106 static bfd_boolean print_filenames;
107 
108 /* TRUE means for object files scan only the data section.  */
109 static bfd_boolean datasection_only;
110 
111 /* The BFD object file format.  */
112 static char *target;
113 
114 /* The character encoding format.  */
115 static char encoding;
116 static int encoding_bytes;
117 
118 /* Output string used to separate parsed strings  */
119 static char *output_separator;
120 
121 static struct option long_options[] =
122 {
123   {"all", no_argument, NULL, 'a'},
124   {"data", no_argument, NULL, 'd'},
125   {"print-file-name", no_argument, NULL, 'f'},
126   {"bytes", required_argument, NULL, 'n'},
127   {"radix", required_argument, NULL, 't'},
128   {"include-all-whitespace", no_argument, NULL, 'w'},
129   {"encoding", required_argument, NULL, 'e'},
130   {"target", required_argument, NULL, 'T'},
131   {"output-separator", required_argument, NULL, 's'},
132   {"help", no_argument, NULL, 'h'},
133   {"version", no_argument, NULL, 'v'},
134   {NULL, 0, NULL, 0}
135 };
136 
137 static bfd_boolean strings_file (char *);
138 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
139 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
140 
141 int main (int, char **);
142 
143 int
144 main (int argc, char **argv)
145 {
146   int optc;
147   int exit_status = 0;
148   bfd_boolean files_given = FALSE;
149   char *s;
150   int numeric_opt = 0;
151 
152 #if defined (HAVE_SETLOCALE)
153   setlocale (LC_ALL, "");
154 #endif
155   bindtextdomain (PACKAGE, LOCALEDIR);
156   textdomain (PACKAGE);
157 
158   program_name = argv[0];
159   xmalloc_set_program_name (program_name);
160   bfd_set_error_program_name (program_name);
161 
162   expandargv (&argc, &argv);
163 
164   string_min = 4;
165   include_all_whitespace = FALSE;
166   print_addresses = FALSE;
167   print_filenames = FALSE;
168   if (DEFAULT_STRINGS_ALL)
169     datasection_only = FALSE;
170   else
171     datasection_only = TRUE;
172   target = NULL;
173   encoding = 's';
174   output_separator = NULL;
175 
176   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
177 			      long_options, (int *) 0)) != EOF)
178     {
179       switch (optc)
180 	{
181 	case 'a':
182 	  datasection_only = FALSE;
183 	  break;
184 
185 	case 'd':
186 	  datasection_only = TRUE;
187 	  break;
188 
189 	case 'f':
190 	  print_filenames = TRUE;
191 	  break;
192 
193 	case 'H':
194 	case 'h':
195 	  usage (stdout, 0);
196 
197 	case 'n':
198 	  string_min = (int) strtoul (optarg, &s, 0);
199 	  if (s != NULL && *s != 0)
200 	    fatal (_("invalid integer argument %s"), optarg);
201 	  break;
202 
203 	case 'w':
204 	  include_all_whitespace = TRUE;
205 	  break;
206 
207 	case 'o':
208 	  print_addresses = TRUE;
209 	  address_radix = 8;
210 	  break;
211 
212 	case 't':
213 	  print_addresses = TRUE;
214 	  if (optarg[1] != '\0')
215 	    usage (stderr, 1);
216 	  switch (optarg[0])
217 	    {
218 	    case 'o':
219 	      address_radix = 8;
220 	      break;
221 
222 	    case 'd':
223 	      address_radix = 10;
224 	      break;
225 
226 	    case 'x':
227 	      address_radix = 16;
228 	      break;
229 
230 	    default:
231 	      usage (stderr, 1);
232 	    }
233 	  break;
234 
235 	case 'T':
236 	  target = optarg;
237 	  break;
238 
239 	case 'e':
240 	  if (optarg[1] != '\0')
241 	    usage (stderr, 1);
242 	  encoding = optarg[0];
243 	  break;
244 
245 	case 's':
246 	  output_separator = optarg;
247           break;
248 
249 	case 'V':
250 	case 'v':
251 	  print_version ("strings");
252 	  break;
253 
254 	case '?':
255 	  usage (stderr, 1);
256 
257 	default:
258 	  numeric_opt = optind;
259 	  break;
260 	}
261     }
262 
263   if (numeric_opt != 0)
264     {
265       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
266       if (s != NULL && *s != 0)
267 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
268     }
269   if (string_min < 1)
270     fatal (_("invalid minimum string length %d"), string_min);
271 
272   switch (encoding)
273     {
274     case 'S':
275     case 's':
276       encoding_bytes = 1;
277       break;
278     case 'b':
279     case 'l':
280       encoding_bytes = 2;
281       break;
282     case 'B':
283     case 'L':
284       encoding_bytes = 4;
285       break;
286     default:
287       usage (stderr, 1);
288     }
289 
290   bfd_init ();
291   set_default_bfd_target ();
292 
293   if (optind >= argc)
294     {
295       datasection_only = FALSE;
296       SET_BINARY (fileno (stdin));
297       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
298       files_given = TRUE;
299     }
300   else
301     {
302       for (; optind < argc; ++optind)
303 	{
304 	  if (strcmp (argv[optind], "-") == 0)
305 	    datasection_only = FALSE;
306 	  else
307 	    {
308 	      files_given = TRUE;
309 	      exit_status |= !strings_file (argv[optind]);
310 	    }
311 	}
312     }
313 
314   if (!files_given)
315     usage (stderr, 1);
316 
317   return (exit_status);
318 }
319 
320 /* Scan section SECT of the file ABFD, whose printable name is
321    FILENAME.  If it contains initialized data set GOT_A_SECTION and
322    print the strings in it.  */
323 
324 static void
325 strings_a_section (bfd *abfd, asection *sect, const char *filename,
326 		   bfd_boolean *got_a_section)
327 {
328   bfd_size_type sectsize;
329   bfd_byte *mem;
330 
331   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
332     return;
333 
334   sectsize = bfd_get_section_size (sect);
335   if (sectsize == 0)
336     return;
337 
338   if (!bfd_malloc_and_get_section (abfd, sect, &mem))
339     {
340       non_fatal (_("%s: Reading section %s failed: %s"),
341 		 filename, sect->name, bfd_errmsg (bfd_get_error ()));
342       return;
343     }
344 
345   *got_a_section = TRUE;
346   print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
347   free (mem);
348 }
349 
350 /* Scan all of the sections in FILE, and print the strings
351    in the initialized data section(s).
352 
353    Return TRUE if successful,
354    FALSE if not (such as if FILE is not an object file).  */
355 
356 static bfd_boolean
357 strings_object_file (const char *file)
358 {
359   bfd *abfd;
360   asection *s;
361   bfd_boolean got_a_section;
362 
363   abfd = bfd_openr (file, target);
364 
365   if (abfd == NULL)
366     /* Treat the file as a non-object file.  */
367     return FALSE;
368 
369   /* This call is mainly for its side effect of reading in the sections.
370      We follow the traditional behavior of `strings' in that we don't
371      complain if we don't recognize a file to be an object file.  */
372   if (!bfd_check_format (abfd, bfd_object))
373     {
374       bfd_close (abfd);
375       return FALSE;
376     }
377 
378   got_a_section = FALSE;
379   for (s = abfd->sections; s != NULL; s = s->next)
380     strings_a_section (abfd, s, file, &got_a_section);
381 
382   if (!bfd_close (abfd))
383     {
384       bfd_nonfatal (file);
385       return FALSE;
386     }
387 
388   return got_a_section;
389 }
390 
391 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
392 
393 static bfd_boolean
394 strings_file (char *file)
395 {
396   struct stat st;
397 
398   /* get_file_size does not support non-S_ISREG files.  */
399 
400   if (stat (file, &st) < 0)
401     {
402       if (errno == ENOENT)
403 	non_fatal (_("'%s': No such file"), file);
404       else
405 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
406 		   file, strerror (errno));
407       return FALSE;
408     }
409   else if (S_ISDIR (st.st_mode))
410     {
411       non_fatal (_("Warning: '%s' is a directory"), file);
412       return FALSE;
413     }
414 
415   /* If we weren't told to scan the whole file,
416      try to open it as an object file and only look at
417      initialized data sections.  If that fails, fall back to the
418      whole file.  */
419   if (!datasection_only || !strings_object_file (file))
420     {
421       FILE *stream;
422 
423       stream = fopen (file, FOPEN_RB);
424       if (stream == NULL)
425 	{
426 	  fprintf (stderr, "%s: ", program_name);
427 	  perror (file);
428 	  return FALSE;
429 	}
430 
431       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
432 
433       if (fclose (stream) == EOF)
434 	{
435 	  fprintf (stderr, "%s: ", program_name);
436 	  perror (file);
437 	  return FALSE;
438 	}
439     }
440 
441   return TRUE;
442 }
443 
444 /* Read the next character, return EOF if none available.
445    Assume that STREAM is positioned so that the next byte read
446    is at address ADDRESS in the file.
447 
448    If STREAM is NULL, do not read from it.
449    The caller can supply a buffer of characters
450    to be processed before the data in STREAM.
451    MAGIC is the address of the buffer and
452    MAGICCOUNT is how many characters are in it.  */
453 
454 static long
455 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
456 {
457   int c, i;
458   long r = 0;
459 
460   for (i = 0; i < encoding_bytes; i++)
461     {
462       if (*magiccount)
463 	{
464 	  (*magiccount)--;
465 	  c = *(*magic)++;
466 	}
467       else
468 	{
469 	  if (stream == NULL)
470 	    return EOF;
471 
472 	  /* Only use getc_unlocked if we found a declaration for it.
473 	     Otherwise, libc is not thread safe by default, and we
474 	     should not use it.  */
475 
476 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
477 	  c = getc_unlocked (stream);
478 #else
479 	  c = getc (stream);
480 #endif
481 	  if (c == EOF)
482 	    return EOF;
483 	}
484 
485       (*address)++;
486       r = (r << 8) | (c & 0xff);
487     }
488 
489   switch (encoding)
490     {
491     default:
492       break;
493     case 'l':
494       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
495       break;
496     case 'L':
497       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
498 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
499       break;
500     }
501 
502   return r;
503 }
504 
505 /* Find the strings in file FILENAME, read from STREAM.
506    Assume that STREAM is positioned so that the next byte read
507    is at address ADDRESS in the file.
508    Stop reading at address STOP_POINT in the file, if nonzero.
509 
510    If STREAM is NULL, do not read from it.
511    The caller can supply a buffer of characters
512    to be processed before the data in STREAM.
513    MAGIC is the address of the buffer and
514    MAGICCOUNT is how many characters are in it.
515    Those characters come at address ADDRESS and the data in STREAM follow.  */
516 
517 static void
518 print_strings (const char *filename, FILE *stream, file_ptr address,
519 	       int stop_point, int magiccount, char *magic)
520 {
521   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
522 
523   while (1)
524     {
525       file_ptr start;
526       int i;
527       long c;
528 
529       /* See if the next `string_min' chars are all graphic chars.  */
530     tryline:
531       if (stop_point && address >= stop_point)
532 	break;
533       start = address;
534       for (i = 0; i < string_min; i++)
535 	{
536 	  c = get_char (stream, &address, &magiccount, &magic);
537 	  if (c == EOF)
538 	    {
539 	      free (buf);
540 	      return;
541 	    }
542 	  if (! STRING_ISGRAPHIC (c))
543 	    /* Found a non-graphic.  Try again starting with next char.  */
544 	    goto tryline;
545 	  buf[i] = c;
546 	}
547 
548       /* We found a run of `string_min' graphic characters.  Print up
549 	 to the next non-graphic character.  */
550 
551       if (print_filenames)
552 	printf ("%s: ", filename);
553       if (print_addresses)
554 	switch (address_radix)
555 	  {
556 	  case 8:
557 #ifdef HAVE_LONG_LONG
558 	    if (sizeof (start) > sizeof (long))
559 	      {
560 # ifndef __MSVCRT__
561 	        printf ("%7llo ", (unsigned long long) start);
562 # else
563 	        printf ("%7I64o ", (unsigned long long) start);
564 # endif
565 	      }
566 	    else
567 #elif !BFD_HOST_64BIT_LONG
568 	    if (start != (unsigned long) start)
569 	      printf ("++%7lo ", (unsigned long) start);
570 	    else
571 #endif
572 	      printf ("%7lo ", (unsigned long) start);
573 	    break;
574 
575 	  case 10:
576 #ifdef HAVE_LONG_LONG
577 	    if (sizeof (start) > sizeof (long))
578 	      {
579 # ifndef __MSVCRT__
580 	        printf ("%7lld ", (unsigned long long) start);
581 # else
582 	        printf ("%7I64d ", (unsigned long long) start);
583 # endif
584 	      }
585 	    else
586 #elif !BFD_HOST_64BIT_LONG
587 	    if (start != (unsigned long) start)
588 	      printf ("++%7lu ", (unsigned long) start);
589 	    else
590 #endif
591 	      printf ("%7ld ", (long) start);
592 	    break;
593 
594 	  case 16:
595 #ifdef HAVE_LONG_LONG
596 	    if (sizeof (start) > sizeof (long))
597 	      {
598 # ifndef __MSVCRT__
599 	        printf ("%7llx ", (unsigned long long) start);
600 # else
601 	        printf ("%7I64x ", (unsigned long long) start);
602 # endif
603 	      }
604 	    else
605 #elif !BFD_HOST_64BIT_LONG
606 	    if (start != (unsigned long) start)
607 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
608 		      (unsigned long) (start & 0xffffffff));
609 	    else
610 #endif
611 	      printf ("%7lx ", (unsigned long) start);
612 	    break;
613 	  }
614 
615       buf[i] = '\0';
616       fputs (buf, stdout);
617 
618       while (1)
619 	{
620 	  c = get_char (stream, &address, &magiccount, &magic);
621 	  if (c == EOF)
622 	    break;
623 	  if (! STRING_ISGRAPHIC (c))
624 	    break;
625 	  putchar (c);
626 	}
627 
628       if (output_separator)
629         fputs (output_separator, stdout);
630       else
631         putchar ('\n');
632     }
633   free (buf);
634 }
635 
636 static void
637 usage (FILE *stream, int status)
638 {
639   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
640   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
641   fprintf (stream, _(" The options are:\n"));
642 
643   if (DEFAULT_STRINGS_ALL)
644     fprintf (stream, _("\
645   -a - --all                Scan the entire file, not just the data section [default]\n\
646   -d --data                 Only scan the data sections in the file\n"));
647   else
648     fprintf (stream, _("\
649   -a - --all                Scan the entire file, not just the data section\n\
650   -d --data                 Only scan the data sections in the file [default]\n"));
651 
652   fprintf (stream, _("\
653   -f --print-file-name      Print the name of the file before each string\n\
654   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
655   -<number>                   least [number] characters (default 4).\n\
656   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
657   -w --include-all-whitespace Include all whitespace as valid string characters\n\
658   -o                        An alias for --radix=o\n\
659   -T --target=<BFDNAME>     Specify the binary file format\n\
660   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
661                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
662   -s --output-separator=<string> String used to separate strings in output.\n\
663   @<file>                   Read options from <file>\n\
664   -h --help                 Display this information\n\
665   -v -V --version           Print the program's version number\n"));
666   list_supported_targets (program_name, stream);
667   if (REPORT_BUGS_TO[0] && status == 0)
668     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
669   exit (status);
670 }
671