xref: /netbsd-src/external/gpl3/binutils.old/dist/binutils/strings.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* strings -- print the strings of printable characters in files
2    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012
4    Free Software Foundation, Inc.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19    02110-1301, USA.  */
20 
21 /* Usage: strings [options] file...
22 
23    Options:
24    --all
25    -a
26    -		Do not scan only the initialized data section of object files.
27 
28    --print-file-name
29    -f		Print the name of the file before each string.
30 
31    --bytes=min-len
32    -n min-len
33    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
34 		that are followed by a NUL or a newline.  Default is 4.
35 
36    --radix={o,x,d}
37    -t {o,x,d}	Print the offset within the file before each string,
38 		in octal/hex/decimal.
39 
40    -o		Like -to.  (Some other implementations have -o like -to,
41 		others like -td.  We chose one arbitrarily.)
42 
43    --encoding={s,S,b,l,B,L}
44    -e {s,S,b,l,B,L}
45 		Select character encoding: 7-bit-character, 8-bit-character,
46 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
47 		littleendian 32-bit.
48 
49    --target=BFDNAME
50    -T {bfdname}
51 		Specify a non-default object file format.
52 
53    --help
54    -h		Print the usage message on the standard output.
55 
56    --version
57    -V
58    -v		Print the program version number.
59 
60    Written by Richard Stallman <rms@gnu.ai.mit.edu>
61    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
62 
63 #include "sysdep.h"
64 #include "bfd.h"
65 #include "getopt.h"
66 #include "libiberty.h"
67 #include "safe-ctype.h"
68 #include "bucomm.h"
69 
70 #define STRING_ISGRAPHIC(c) \
71       (   (c) >= 0 \
72        && (c) <= 255 \
73        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
74 
75 #ifndef errno
76 extern int errno;
77 #endif
78 
79 /* The BFD section flags that identify an initialized data section.  */
80 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
81 
82 /* Radix for printing addresses (must be 8, 10 or 16).  */
83 static int address_radix;
84 
85 /* Minimum length of sequence of graphic chars to trigger output.  */
86 static int string_min;
87 
88 /* TRUE means print address within file for each string.  */
89 static bfd_boolean print_addresses;
90 
91 /* TRUE means print filename for each string.  */
92 static bfd_boolean print_filenames;
93 
94 /* TRUE means for object files scan only the data section.  */
95 static bfd_boolean datasection_only;
96 
97 /* TRUE if we found an initialized data section in the current file.  */
98 static bfd_boolean got_a_section;
99 
100 /* The BFD object file format.  */
101 static char *target;
102 
103 /* The character encoding format.  */
104 static char encoding;
105 static int encoding_bytes;
106 
107 static struct option long_options[] =
108 {
109   {"all", no_argument, NULL, 'a'},
110   {"print-file-name", no_argument, NULL, 'f'},
111   {"bytes", required_argument, NULL, 'n'},
112   {"radix", required_argument, NULL, 't'},
113   {"encoding", required_argument, NULL, 'e'},
114   {"target", required_argument, NULL, 'T'},
115   {"help", no_argument, NULL, 'h'},
116   {"version", no_argument, NULL, 'v'},
117   {NULL, 0, NULL, 0}
118 };
119 
120 /* Records the size of a named file so that we
121    do not repeatedly run bfd_stat() on it.  */
122 
123 typedef struct
124 {
125   const char *  filename;
126   bfd_size_type filesize;
127 } filename_and_size_t;
128 
129 static void strings_a_section (bfd *, asection *, void *);
130 static bfd_boolean strings_object_file (const char *);
131 static bfd_boolean strings_file (char *file);
132 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
133 static void usage (FILE *, int);
134 static long get_char (FILE *, file_ptr *, int *, char **);
135 
136 int main (int, char **);
137 
138 int
139 main (int argc, char **argv)
140 {
141   int optc;
142   int exit_status = 0;
143   bfd_boolean files_given = FALSE;
144   char *s;
145   int numeric_opt = 0;
146 
147 #if defined (HAVE_SETLOCALE)
148   setlocale (LC_ALL, "");
149 #endif
150   bindtextdomain (PACKAGE, LOCALEDIR);
151   textdomain (PACKAGE);
152 
153   program_name = argv[0];
154   xmalloc_set_program_name (program_name);
155 
156   expandargv (&argc, &argv);
157 
158   string_min = 4;
159   print_addresses = FALSE;
160   print_filenames = FALSE;
161   datasection_only = TRUE;
162   target = NULL;
163   encoding = 's';
164 
165   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
166 			      long_options, (int *) 0)) != EOF)
167     {
168       switch (optc)
169 	{
170 	case 'a':
171 	  datasection_only = FALSE;
172 	  break;
173 
174 	case 'f':
175 	  print_filenames = TRUE;
176 	  break;
177 
178 	case 'H':
179 	case 'h':
180 	  usage (stdout, 0);
181 
182 	case 'n':
183 	  string_min = (int) strtoul (optarg, &s, 0);
184 	  if (s != NULL && *s != 0)
185 	    fatal (_("invalid integer argument %s"), optarg);
186 	  break;
187 
188 	case 'o':
189 	  print_addresses = TRUE;
190 	  address_radix = 8;
191 	  break;
192 
193 	case 't':
194 	  print_addresses = TRUE;
195 	  if (optarg[1] != '\0')
196 	    usage (stderr, 1);
197 	  switch (optarg[0])
198 	    {
199 	    case 'o':
200 	      address_radix = 8;
201 	      break;
202 
203 	    case 'd':
204 	      address_radix = 10;
205 	      break;
206 
207 	    case 'x':
208 	      address_radix = 16;
209 	      break;
210 
211 	    default:
212 	      usage (stderr, 1);
213 	    }
214 	  break;
215 
216 	case 'T':
217 	  target = optarg;
218 	  break;
219 
220 	case 'e':
221 	  if (optarg[1] != '\0')
222 	    usage (stderr, 1);
223 	  encoding = optarg[0];
224 	  break;
225 
226 	case 'V':
227 	case 'v':
228 	  print_version ("strings");
229 	  break;
230 
231 	case '?':
232 	  usage (stderr, 1);
233 
234 	default:
235 	  numeric_opt = optind;
236 	  break;
237 	}
238     }
239 
240   if (numeric_opt != 0)
241     {
242       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
243       if (s != NULL && *s != 0)
244 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
245     }
246   if (string_min < 1)
247     fatal (_("invalid minimum string length %d"), string_min);
248 
249   switch (encoding)
250     {
251     case 'S':
252     case 's':
253       encoding_bytes = 1;
254       break;
255     case 'b':
256     case 'l':
257       encoding_bytes = 2;
258       break;
259     case 'B':
260     case 'L':
261       encoding_bytes = 4;
262       break;
263     default:
264       usage (stderr, 1);
265     }
266 
267   bfd_init ();
268   set_default_bfd_target ();
269 
270   if (optind >= argc)
271     {
272       datasection_only = FALSE;
273       SET_BINARY (fileno (stdin));
274       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
275       files_given = TRUE;
276     }
277   else
278     {
279       for (; optind < argc; ++optind)
280 	{
281 	  if (strcmp (argv[optind], "-") == 0)
282 	    datasection_only = FALSE;
283 	  else
284 	    {
285 	      files_given = TRUE;
286 	      exit_status |= strings_file (argv[optind]) == FALSE;
287 	    }
288 	}
289     }
290 
291   if (!files_given)
292     usage (stderr, 1);
293 
294   return (exit_status);
295 }
296 
297 /* Scan section SECT of the file ABFD, whose printable name is in
298    ARG->filename and whose size might be in ARG->filesize.  If it
299    contains initialized data set `got_a_section' and print the
300    strings in it.
301 
302    FIXME: We ought to be able to return error codes/messages for
303    certain conditions.  */
304 
305 static void
306 strings_a_section (bfd *abfd, asection *sect, void *arg)
307 {
308   filename_and_size_t * filename_and_sizep;
309   bfd_size_type *filesizep;
310   bfd_size_type sectsize;
311   void *mem;
312 
313   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
314     return;
315 
316   sectsize = bfd_get_section_size (sect);
317 
318   if (sectsize <= 0)
319     return;
320 
321   /* Get the size of the file.  This might have been cached for us.  */
322   filename_and_sizep = (filename_and_size_t *) arg;
323   filesizep = & filename_and_sizep->filesize;
324 
325   if (*filesizep == 0)
326     {
327       struct stat st;
328 
329       if (bfd_stat (abfd, &st))
330 	return;
331 
332       /* Cache the result so that we do not repeatedly stat this file.  */
333       *filesizep = st.st_size;
334     }
335 
336   /* Compare the size of the section against the size of the file.
337      If the section is bigger then the file must be corrupt and
338      we should not try dumping it.  */
339   if (sectsize >= *filesizep)
340     return;
341 
342   mem = xmalloc (sectsize);
343 
344   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
345     {
346       got_a_section = TRUE;
347 
348       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
349 		     0, sectsize, (char *) mem);
350     }
351 
352   free (mem);
353 }
354 
355 /* Scan all of the sections in FILE, and print the strings
356    in the initialized data section(s).
357 
358    Return TRUE if successful,
359    FALSE if not (such as if FILE is not an object file).  */
360 
361 static bfd_boolean
362 strings_object_file (const char *file)
363 {
364   filename_and_size_t filename_and_size;
365   bfd *abfd;
366 
367   abfd = bfd_openr (file, target);
368 
369   if (abfd == NULL)
370     /* Treat the file as a non-object file.  */
371     return FALSE;
372 
373   /* This call is mainly for its side effect of reading in the sections.
374      We follow the traditional behavior of `strings' in that we don't
375      complain if we don't recognize a file to be an object file.  */
376   if (!bfd_check_format (abfd, bfd_object))
377     {
378       bfd_close (abfd);
379       return FALSE;
380     }
381 
382   got_a_section = FALSE;
383   filename_and_size.filename = file;
384   filename_and_size.filesize = 0;
385   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
386 
387   if (!bfd_close (abfd))
388     {
389       bfd_nonfatal (file);
390       return FALSE;
391     }
392 
393   return got_a_section;
394 }
395 
396 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
397 
398 static bfd_boolean
399 strings_file (char *file)
400 {
401   struct stat st;
402 
403   /* get_file_size does not support non-S_ISREG files.  */
404 
405   if (stat (file, &st) < 0)
406     {
407       if (errno == ENOENT)
408 	non_fatal (_("'%s': No such file"), file);
409       else
410 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
411 		   file, strerror (errno));
412       return FALSE;
413     }
414 
415   /* If we weren't told to scan the whole file,
416      try to open it as an object file and only look at
417      initialized data sections.  If that fails, fall back to the
418      whole file.  */
419   if (!datasection_only || !strings_object_file (file))
420     {
421       FILE *stream;
422 
423       stream = fopen (file, FOPEN_RB);
424       if (stream == NULL)
425 	{
426 	  fprintf (stderr, "%s: ", program_name);
427 	  perror (file);
428 	  return FALSE;
429 	}
430 
431       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
432 
433       if (fclose (stream) == EOF)
434 	{
435 	  fprintf (stderr, "%s: ", program_name);
436 	  perror (file);
437 	  return FALSE;
438 	}
439     }
440 
441   return TRUE;
442 }
443 
444 /* Read the next character, return EOF if none available.
445    Assume that STREAM is positioned so that the next byte read
446    is at address ADDRESS in the file.
447 
448    If STREAM is NULL, do not read from it.
449    The caller can supply a buffer of characters
450    to be processed before the data in STREAM.
451    MAGIC is the address of the buffer and
452    MAGICCOUNT is how many characters are in it.  */
453 
454 static long
455 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
456 {
457   int c, i;
458   long r = EOF;
459   unsigned char buf[4];
460 
461   for (i = 0; i < encoding_bytes; i++)
462     {
463       if (*magiccount)
464 	{
465 	  (*magiccount)--;
466 	  c = *(*magic)++;
467 	}
468       else
469 	{
470 	  if (stream == NULL)
471 	    return EOF;
472 
473 	  /* Only use getc_unlocked if we found a declaration for it.
474 	     Otherwise, libc is not thread safe by default, and we
475 	     should not use it.  */
476 
477 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
478 	  c = getc_unlocked (stream);
479 #else
480 	  c = getc (stream);
481 #endif
482 	  if (c == EOF)
483 	    return EOF;
484 	}
485 
486       (*address)++;
487       buf[i] = c;
488     }
489 
490   switch (encoding)
491     {
492     case 'S':
493     case 's':
494       r = buf[0];
495       break;
496     case 'b':
497       r = (buf[0] << 8) | buf[1];
498       break;
499     case 'l':
500       r = buf[0] | (buf[1] << 8);
501       break;
502     case 'B':
503       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
504 	((long) buf[2] << 8) | buf[3];
505       break;
506     case 'L':
507       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
508 	((long) buf[3] << 24);
509       break;
510     }
511 
512   if (r == EOF)
513     return 0;
514 
515   return r;
516 }
517 
518 /* Find the strings in file FILENAME, read from STREAM.
519    Assume that STREAM is positioned so that the next byte read
520    is at address ADDRESS in the file.
521    Stop reading at address STOP_POINT in the file, if nonzero.
522 
523    If STREAM is NULL, do not read from it.
524    The caller can supply a buffer of characters
525    to be processed before the data in STREAM.
526    MAGIC is the address of the buffer and
527    MAGICCOUNT is how many characters are in it.
528    Those characters come at address ADDRESS and the data in STREAM follow.  */
529 
530 static void
531 print_strings (const char *filename, FILE *stream, file_ptr address,
532 	       int stop_point, int magiccount, char *magic)
533 {
534   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
535 
536   while (1)
537     {
538       file_ptr start;
539       int i;
540       long c;
541 
542       /* See if the next `string_min' chars are all graphic chars.  */
543     tryline:
544       if (stop_point && address >= stop_point)
545 	break;
546       start = address;
547       for (i = 0; i < string_min; i++)
548 	{
549 	  c = get_char (stream, &address, &magiccount, &magic);
550 	  if (c == EOF)
551 	    {
552 	      free (buf);
553 	      return;
554 	    }
555 	  if (! STRING_ISGRAPHIC (c))
556 	    /* Found a non-graphic.  Try again starting with next char.  */
557 	    goto tryline;
558 	  buf[i] = c;
559 	}
560 
561       /* We found a run of `string_min' graphic characters.  Print up
562 	 to the next non-graphic character.  */
563 
564       if (print_filenames)
565 	printf ("%s: ", filename);
566       if (print_addresses)
567 	switch (address_radix)
568 	  {
569 	  case 8:
570 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
571 	    if (sizeof (start) > sizeof (long))
572 	      {
573 #ifndef __MSVCRT__
574 	        printf ("%7llo ", (unsigned long long) start);
575 #else
576 	        printf ("%7I64o ", (unsigned long long) start);
577 #endif
578 	      }
579 	    else
580 #elif !BFD_HOST_64BIT_LONG
581 	    if (start != (unsigned long) start)
582 	      printf ("++%7lo ", (unsigned long) start);
583 	    else
584 #endif
585 	      printf ("%7lo ", (unsigned long) start);
586 	    break;
587 
588 	  case 10:
589 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
590 	    if (sizeof (start) > sizeof (long))
591 	      {
592 #ifndef __MSVCRT__
593 	        printf ("%7lld ", (unsigned long long) start);
594 #else
595 	        printf ("%7I64d ", (unsigned long long) start);
596 #endif
597 	      }
598 	    else
599 #elif !BFD_HOST_64BIT_LONG
600 	    if (start != (unsigned long) start)
601 	      printf ("++%7llu ", (unsigned long) start);
602 	    else
603 #endif
604 	      printf ("%7ld ", (long) start);
605 	    break;
606 
607 	  case 16:
608 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
609 	    if (sizeof (start) > sizeof (long))
610 	      {
611 #ifndef __MSVCRT__
612 	        printf ("%7llx ", (unsigned long long) start);
613 #else
614 	        printf ("%7I64x ", (unsigned long long) start);
615 #endif
616 	      }
617 	    else
618 #elif !BFD_HOST_64BIT_LONG
619 	    if (start != (unsigned long) start)
620 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
621 		      (unsigned long) (start & 0xffffffff));
622 	    else
623 #endif
624 	      printf ("%7lx ", (unsigned long) start);
625 	    break;
626 	  }
627 
628       buf[i] = '\0';
629       fputs (buf, stdout);
630 
631       while (1)
632 	{
633 	  c = get_char (stream, &address, &magiccount, &magic);
634 	  if (c == EOF)
635 	    break;
636 	  if (! STRING_ISGRAPHIC (c))
637 	    break;
638 	  putchar (c);
639 	}
640 
641       putchar ('\n');
642     }
643   free (buf);
644 }
645 
646 static void
647 usage (FILE *stream, int status)
648 {
649   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
650   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
651   fprintf (stream, _(" The options are:\n\
652   -a - --all                Scan the entire file, not just the data section\n\
653   -f --print-file-name      Print the name of the file before each string\n\
654   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
655   -<number>                   least [number] characters (default 4).\n\
656   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
657   -o                        An alias for --radix=o\n\
658   -T --target=<BFDNAME>     Specify the binary file format\n\
659   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
660                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
661   @<file>                   Read options from <file>\n\
662   -h --help                 Display this information\n\
663   -v -V --version           Print the program's version number\n"));
664   list_supported_targets (program_name, stream);
665   if (REPORT_BUGS_TO[0] && status == 0)
666     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
667   exit (status);
668 }
669