xref: /openbsd-src/gnu/usr.bin/binutils-2.17/binutils/strings.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /* strings -- print the strings of printable characters in files
2    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18    02110-1301, USA.  */
19 
20 /* Usage: strings [options] file...
21 
22    Options:
23    --all
24    -a
25    -		Do not scan only the initialized data section of object files.
26 
27    --print-file-name
28    -f		Print the name of the file before each string.
29 
30    --bytes=min-len
31    -n min-len
32    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33 		that are followed by a NUL or a newline.  Default is 4.
34 
35    --radix={o,x,d}
36    -t {o,x,d}	Print the offset within the file before each string,
37 		in octal/hex/decimal.
38 
39    -o		Like -to.  (Some other implementations have -o like -to,
40 		others like -td.  We chose one arbitrarily.)
41 
42    --encoding={s,S,b,l,B,L}
43    -e {s,S,b,l,B,L}
44 		Select character encoding: 7-bit-character, 8-bit-character,
45 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46 		littleendian 32-bit.
47 
48    --target=BFDNAME
49 		Specify a non-default object file format.
50 
51    --help
52    -h		Print the usage message on the standard output.
53 
54    --version
55    -v		Print the program version number.
56 
57    Written by Richard Stallman <rms@gnu.ai.mit.edu>
58    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
59 
60 #ifdef HAVE_CONFIG_H
61 #include "config.h"
62 #endif
63 #include "bfd.h"
64 #include <stdio.h>
65 #include "getopt.h"
66 #include <errno.h>
67 #include "bucomm.h"
68 #include "libiberty.h"
69 #include "safe-ctype.h"
70 #include <sys/stat.h>
71 
72 /* Some platforms need to put stdin into binary mode, to read
73     binary files.  */
74 #ifdef HAVE_SETMODE
75 #ifndef O_BINARY
76 #ifdef _O_BINARY
77 #define O_BINARY _O_BINARY
78 #define setmode _setmode
79 #else
80 #define O_BINARY 0
81 #endif
82 #endif
83 #if O_BINARY
84 #include <io.h>
85 #define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
86 #endif
87 #endif
88 
89 #define STRING_ISGRAPHIC(c) \
90       (   (c) >= 0 \
91        && (c) <= 255 \
92        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
93 
94 #ifndef errno
95 extern int errno;
96 #endif
97 
98 /* The BFD section flags that identify an initialized data section.  */
99 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
100 
101 #ifdef HAVE_FOPEN64
102 typedef off64_t file_off;
103 #define file_open(s,m) fopen64(s, m)
104 #else
105 typedef off_t file_off;
106 #define file_open(s,m) fopen(s, m)
107 #endif
108 #ifdef HAVE_STAT64
109 typedef struct stat64 statbuf;
110 #define file_stat(f,s) stat64(f, s)
111 #else
112 typedef struct stat statbuf;
113 #define file_stat(f,s) stat(f, s)
114 #endif
115 
116 /* Radix for printing addresses (must be 8, 10 or 16).  */
117 static int address_radix;
118 
119 /* Minimum length of sequence of graphic chars to trigger output.  */
120 static int string_min;
121 
122 /* TRUE means print address within file for each string.  */
123 static bfd_boolean print_addresses;
124 
125 /* TRUE means print filename for each string.  */
126 static bfd_boolean print_filenames;
127 
128 /* TRUE means for object files scan only the data section.  */
129 static bfd_boolean datasection_only;
130 
131 /* TRUE if we found an initialized data section in the current file.  */
132 static bfd_boolean got_a_section;
133 
134 /* The BFD object file format.  */
135 static char *target;
136 
137 /* The character encoding format.  */
138 static char encoding;
139 static int encoding_bytes;
140 
141 static struct option long_options[] =
142 {
143   {"all", no_argument, NULL, 'a'},
144   {"print-file-name", no_argument, NULL, 'f'},
145   {"bytes", required_argument, NULL, 'n'},
146   {"radix", required_argument, NULL, 't'},
147   {"encoding", required_argument, NULL, 'e'},
148   {"target", required_argument, NULL, 'T'},
149   {"help", no_argument, NULL, 'h'},
150   {"version", no_argument, NULL, 'v'},
151   {NULL, 0, NULL, 0}
152 };
153 
154 /* Records the size of a named file so that we
155    do not repeatedly run bfd_stat() on it.  */
156 
157 typedef struct
158 {
159   const char *  filename;
160   bfd_size_type filesize;
161 } filename_and_size_t;
162 
163 static void strings_a_section (bfd *, asection *, void *);
164 static bfd_boolean strings_object_file (const char *);
165 static bfd_boolean strings_file (char *file);
166 static int integer_arg (char *s);
167 static void print_strings (const char *, FILE *, file_off, int, int, char *);
168 static void usage (FILE *, int);
169 static long get_char (FILE *, file_off *, int *, char **);
170 
171 int main (int, char **);
172 
173 int
174 main (int argc, char **argv)
175 {
176   int optc;
177   int exit_status = 0;
178   bfd_boolean files_given = FALSE;
179 
180 #if defined (HAVE_SETLOCALE)
181   setlocale (LC_ALL, "");
182 #endif
183   bindtextdomain (PACKAGE, LOCALEDIR);
184   textdomain (PACKAGE);
185 
186   program_name = argv[0];
187   xmalloc_set_program_name (program_name);
188 
189   expandargv (&argc, &argv);
190 
191   string_min = -1;
192   print_addresses = FALSE;
193   print_filenames = FALSE;
194   datasection_only = TRUE;
195   target = NULL;
196   encoding = 's';
197 
198   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:Vv0123456789",
199 			      long_options, (int *) 0)) != EOF)
200     {
201       switch (optc)
202 	{
203 	case 'a':
204 	  datasection_only = FALSE;
205 	  break;
206 
207 	case 'f':
208 	  print_filenames = TRUE;
209 	  break;
210 
211 	case 'H':
212 	case 'h':
213 	  usage (stdout, 0);
214 
215 	case 'n':
216 	  string_min = integer_arg (optarg);
217 	  if (string_min < 1)
218 	    fatal (_("invalid number %s"), optarg);
219 	  break;
220 
221 	case 'o':
222 	  print_addresses = TRUE;
223 	  address_radix = 8;
224 	  break;
225 
226 	case 't':
227 	  print_addresses = TRUE;
228 	  if (optarg[1] != '\0')
229 	    usage (stderr, 1);
230 	  switch (optarg[0])
231 	    {
232 	    case 'o':
233 	      address_radix = 8;
234 	      break;
235 
236 	    case 'd':
237 	      address_radix = 10;
238 	      break;
239 
240 	    case 'x':
241 	      address_radix = 16;
242 	      break;
243 
244 	    default:
245 	      usage (stderr, 1);
246 	    }
247 	  break;
248 
249 	case 'T':
250 	  target = optarg;
251 	  break;
252 
253 	case 'e':
254 	  if (optarg[1] != '\0')
255 	    usage (stderr, 1);
256 	  encoding = optarg[0];
257 	  break;
258 
259 	case 'V':
260 	case 'v':
261 	  print_version ("strings");
262 	  break;
263 
264 	case '?':
265 	  usage (stderr, 1);
266 
267 	default:
268 	  if (string_min < 0)
269 	    string_min = optc - '0';
270 	  else
271 	    string_min = string_min * 10 + optc - '0';
272 	  break;
273 	}
274     }
275 
276   if (string_min < 0)
277     string_min = 4;
278 
279   switch (encoding)
280     {
281     case 'S':
282     case 's':
283       encoding_bytes = 1;
284       break;
285     case 'b':
286     case 'l':
287       encoding_bytes = 2;
288       break;
289     case 'B':
290     case 'L':
291       encoding_bytes = 4;
292       break;
293     default:
294       usage (stderr, 1);
295     }
296 
297   bfd_init ();
298   set_default_bfd_target ();
299 
300   if (optind >= argc)
301     {
302       datasection_only = FALSE;
303 #ifdef SET_BINARY
304       SET_BINARY (fileno (stdin));
305 #endif
306       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
307       files_given = TRUE;
308     }
309   else
310     {
311       for (; optind < argc; ++optind)
312 	{
313 	  if (strcmp (argv[optind], "-") == 0)
314 	    datasection_only = FALSE;
315 	  else
316 	    {
317 	      files_given = TRUE;
318 	      exit_status |= strings_file (argv[optind]) == FALSE;
319 	    }
320 	}
321     }
322 
323   if (!files_given)
324     usage (stderr, 1);
325 
326   return (exit_status);
327 }
328 
329 /* Scan section SECT of the file ABFD, whose printable name is in
330    ARG->filename and whose size might be in ARG->filesize.  If it
331    contains initialized data set `got_a_section' and print the
332    strings in it.
333 
334    FIXME: We ought to be able to return error codes/messages for
335    certain conditions.  */
336 
337 static void
338 strings_a_section (bfd *abfd, asection *sect, void *arg)
339 {
340   filename_and_size_t * filename_and_sizep;
341   bfd_size_type *filesizep;
342   bfd_size_type sectsize;
343   void *mem;
344 
345   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
346     return;
347 
348   sectsize = bfd_get_section_size (sect);
349 
350   if (sectsize <= 0)
351     return;
352 
353   /* Get the size of the file.  This might have been cached for us.  */
354   filename_and_sizep = (filename_and_size_t *) arg;
355   filesizep = & filename_and_sizep->filesize;
356 
357   if (*filesizep == 0)
358     {
359       struct stat st;
360 
361       if (bfd_stat (abfd, &st))
362 	return;
363 
364       /* Cache the result so that we do not repeatedly stat this file.  */
365       *filesizep = st.st_size;
366     }
367 
368   /* Compare the size of the section against the size of the file.
369      If the section is bigger then the file must be corrupt and
370      we should not try dumping it.  */
371   if (sectsize >= *filesizep)
372     return;
373 
374   mem = xmalloc (sectsize);
375 
376   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
377     {
378       got_a_section = TRUE;
379 
380       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
381 		     0, sectsize, mem);
382     }
383 
384   free (mem);
385 }
386 
387 /* Scan all of the sections in FILE, and print the strings
388    in the initialized data section(s).
389 
390    Return TRUE if successful,
391    FALSE if not (such as if FILE is not an object file).  */
392 
393 static bfd_boolean
394 strings_object_file (const char *file)
395 {
396   filename_and_size_t filename_and_size;
397   bfd *abfd;
398 
399   abfd = bfd_openr (file, target);
400 
401   if (abfd == NULL)
402     /* Treat the file as a non-object file.  */
403     return FALSE;
404 
405   /* This call is mainly for its side effect of reading in the sections.
406      We follow the traditional behavior of `strings' in that we don't
407      complain if we don't recognize a file to be an object file.  */
408   if (!bfd_check_format (abfd, bfd_object))
409     {
410       bfd_close (abfd);
411       return FALSE;
412     }
413 
414   got_a_section = FALSE;
415   filename_and_size.filename = file;
416   filename_and_size.filesize = 0;
417   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
418 
419   if (!bfd_close (abfd))
420     {
421       bfd_nonfatal (file);
422       return FALSE;
423     }
424 
425   return got_a_section;
426 }
427 
428 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
429 
430 static bfd_boolean
431 strings_file (char *file)
432 {
433   statbuf st;
434 
435   if (file_stat (file, &st) < 0)
436     {
437       if (errno == ENOENT)
438 	non_fatal (_("'%s': No such file"), file);
439       else
440 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
441 		   file, strerror (errno));
442       return FALSE;
443     }
444 
445   /* If we weren't told to scan the whole file,
446      try to open it as an object file and only look at
447      initialized data sections.  If that fails, fall back to the
448      whole file.  */
449   if (!datasection_only || !strings_object_file (file))
450     {
451       FILE *stream;
452 
453       stream = file_open (file, FOPEN_RB);
454       if (stream == NULL)
455 	{
456 	  fprintf (stderr, "%s: ", program_name);
457 	  perror (file);
458 	  return FALSE;
459 	}
460 
461       print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
462 
463       if (fclose (stream) == EOF)
464 	{
465 	  fprintf (stderr, "%s: ", program_name);
466 	  perror (file);
467 	  return FALSE;
468 	}
469     }
470 
471   return TRUE;
472 }
473 
474 /* Read the next character, return EOF if none available.
475    Assume that STREAM is positioned so that the next byte read
476    is at address ADDRESS in the file.
477 
478    If STREAM is NULL, do not read from it.
479    The caller can supply a buffer of characters
480    to be processed before the data in STREAM.
481    MAGIC is the address of the buffer and
482    MAGICCOUNT is how many characters are in it.  */
483 
484 static long
485 get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
486 {
487   int c, i;
488   long r = EOF;
489   unsigned char buf[4];
490 
491   for (i = 0; i < encoding_bytes; i++)
492     {
493       if (*magiccount)
494 	{
495 	  (*magiccount)--;
496 	  c = *(*magic)++;
497 	}
498       else
499 	{
500 	  if (stream == NULL)
501 	    return EOF;
502 
503 	  /* Only use getc_unlocked if we found a declaration for it.
504 	     Otherwise, libc is not thread safe by default, and we
505 	     should not use it.  */
506 
507 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
508 	  c = getc_unlocked (stream);
509 #else
510 	  c = getc (stream);
511 #endif
512 	  if (c == EOF)
513 	    return EOF;
514 	}
515 
516       (*address)++;
517       buf[i] = c;
518     }
519 
520   switch (encoding)
521     {
522     case 'S':
523     case 's':
524       r = buf[0];
525       break;
526     case 'b':
527       r = (buf[0] << 8) | buf[1];
528       break;
529     case 'l':
530       r = buf[0] | (buf[1] << 8);
531       break;
532     case 'B':
533       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
534 	((long) buf[2] << 8) | buf[3];
535       break;
536     case 'L':
537       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
538 	((long) buf[3] << 24);
539       break;
540     }
541 
542   if (r == EOF)
543     return 0;
544 
545   return r;
546 }
547 
548 /* Find the strings in file FILENAME, read from STREAM.
549    Assume that STREAM is positioned so that the next byte read
550    is at address ADDRESS in the file.
551    Stop reading at address STOP_POINT in the file, if nonzero.
552 
553    If STREAM is NULL, do not read from it.
554    The caller can supply a buffer of characters
555    to be processed before the data in STREAM.
556    MAGIC is the address of the buffer and
557    MAGICCOUNT is how many characters are in it.
558    Those characters come at address ADDRESS and the data in STREAM follow.  */
559 
560 static void
561 print_strings (const char *filename, FILE *stream, file_off address,
562 	       int stop_point, int magiccount, char *magic)
563 {
564   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
565 
566   while (1)
567     {
568       file_off start;
569       int i;
570       long c;
571 
572       /* See if the next `string_min' chars are all graphic chars.  */
573     tryline:
574       if (stop_point && address >= stop_point)
575 	break;
576       start = address;
577       for (i = 0; i < string_min; i++)
578 	{
579 	  c = get_char (stream, &address, &magiccount, &magic);
580 	  if (c == EOF)
581 	    return;
582 	  if (! STRING_ISGRAPHIC (c))
583 	    /* Found a non-graphic.  Try again starting with next char.  */
584 	    goto tryline;
585 	  buf[i] = c;
586 	}
587 
588       /* We found a run of `string_min' graphic characters.  Print up
589 	 to the next non-graphic character.  */
590 
591       if (print_filenames)
592 	printf ("%s: ", filename);
593       if (print_addresses)
594 	switch (address_radix)
595 	  {
596 	  case 8:
597 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
598 	    if (sizeof (start) > sizeof (long))
599 	      printf ("%7Lo ", (unsigned long long) start);
600 	    else
601 #else
602 # if !BFD_HOST_64BIT_LONG
603 	    if (start != (unsigned long) start)
604 	      printf ("++%7lo ", (unsigned long) start);
605 	    else
606 # endif
607 #endif
608 	      printf ("%7lo ", (unsigned long) start);
609 	    break;
610 
611 	  case 10:
612 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
613 	    if (sizeof (start) > sizeof (long))
614 	      printf ("%7Ld ", (unsigned long long) start);
615 	    else
616 #else
617 # if !BFD_HOST_64BIT_LONG
618 	    if (start != (unsigned long) start)
619 	      printf ("++%7ld ", (unsigned long) start);
620 	    else
621 # endif
622 #endif
623 	      printf ("%7ld ", (long) start);
624 	    break;
625 
626 	  case 16:
627 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
628 	    if (sizeof (start) > sizeof (long))
629 	      printf ("%7Lx ", (unsigned long long) start);
630 	    else
631 #else
632 # if !BFD_HOST_64BIT_LONG
633 	    if (start != (unsigned long) start)
634 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
635 		      (unsigned long) (start & 0xffffffff));
636 	    else
637 # endif
638 #endif
639 	      printf ("%7lx ", (unsigned long) start);
640 	    break;
641 	  }
642 
643       buf[i] = '\0';
644       fputs (buf, stdout);
645 
646       while (1)
647 	{
648 	  c = get_char (stream, &address, &magiccount, &magic);
649 	  if (c == EOF)
650 	    break;
651 	  if (! STRING_ISGRAPHIC (c))
652 	    break;
653 	  putchar (c);
654 	}
655 
656       putchar ('\n');
657     }
658 }
659 
660 /* Parse string S as an integer, using decimal radix by default,
661    but allowing octal and hex numbers as in C.  */
662 
663 static int
664 integer_arg (char *s)
665 {
666   int value;
667   int radix = 10;
668   char *p = s;
669   int c;
670 
671   if (*p != '0')
672     radix = 10;
673   else if (*++p == 'x')
674     {
675       radix = 16;
676       p++;
677     }
678   else
679     radix = 8;
680 
681   value = 0;
682   while (((c = *p++) >= '0' && c <= '9')
683 	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
684     {
685       value *= radix;
686       if (c >= '0' && c <= '9')
687 	value += c - '0';
688       else
689 	value += (c & ~40) - 'A';
690     }
691 
692   if (c == 'b')
693     value *= 512;
694   else if (c == 'B')
695     value *= 1024;
696   else
697     p--;
698 
699   if (*p)
700     fatal (_("invalid integer argument %s"), s);
701 
702   return value;
703 }
704 
705 static void
706 usage (FILE *stream, int status)
707 {
708   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
709   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
710   fprintf (stream, _(" The options are:\n\
711   -a - --all                Scan the entire file, not just the data section\n\
712   -f --print-file-name      Print the name of the file before each string\n\
713   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
714   -<number>                 least [number] characters (default 4).\n\
715   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
716   -o                        An alias for --radix=o\n\
717   -T --target=<BFDNAME>     Specify the binary file format\n\
718   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
719                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
720   @<file>                   Read options from <file>\n\
721   -h --help                 Display this information\n\
722   -v --version              Print the program's version number\n"));
723   list_supported_targets (program_name, stream);
724   if (status == 0)
725     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
726   exit (status);
727 }
728