xref: /netbsd-src/external/gpl3/binutils.old/dist/binutils/strings.c (revision e992f068c547fd6e84b3f104dc2340adcc955732)
175fd0b74Schristos /* strings -- print the strings of printable characters in files
2*e992f068Schristos    Copyright (C) 1993-2022 Free Software Foundation, Inc.
375fd0b74Schristos 
475fd0b74Schristos    This program is free software; you can redistribute it and/or modify
575fd0b74Schristos    it under the terms of the GNU General Public License as published by
675fd0b74Schristos    the Free Software Foundation; either version 3, or (at your option)
775fd0b74Schristos    any later version.
875fd0b74Schristos 
975fd0b74Schristos    This program is distributed in the hope that it will be useful,
1075fd0b74Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
1175fd0b74Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1275fd0b74Schristos    GNU General Public License for more details.
1375fd0b74Schristos 
1475fd0b74Schristos    You should have received a copy of the GNU General Public License
1575fd0b74Schristos    along with this program; if not, write to the Free Software
1675fd0b74Schristos    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
1775fd0b74Schristos    02110-1301, USA.  */
1875fd0b74Schristos 
1975fd0b74Schristos /* Usage: strings [options] file...
2075fd0b74Schristos 
2175fd0b74Schristos    Options:
2275fd0b74Schristos    --all
2375fd0b74Schristos    -a
2475fd0b74Schristos    -		Scan each file in its entirety.
2575fd0b74Schristos 
2675fd0b74Schristos    --data
2775fd0b74Schristos    -d		Scan only the initialized data section(s) of object files.
2875fd0b74Schristos 
2975fd0b74Schristos    --print-file-name
3075fd0b74Schristos    -f		Print the name of the file before each string.
3175fd0b74Schristos 
3275fd0b74Schristos    --bytes=min-len
3375fd0b74Schristos    -n min-len
3475fd0b74Schristos    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35*e992f068Schristos 		that are followed by a NUL or a non-displayable character.
36*e992f068Schristos 		Default is 4.
3775fd0b74Schristos 
3875fd0b74Schristos    --radix={o,x,d}
3975fd0b74Schristos    -t {o,x,d}	Print the offset within the file before each string,
4075fd0b74Schristos 		in octal/hex/decimal.
4175fd0b74Schristos 
4275fd0b74Schristos   --include-all-whitespace
4375fd0b74Schristos   -w		By default tab and space are the only whitepace included in graphic
4475fd0b74Schristos 		char sequences.  This option considers all of isspace() valid.
4575fd0b74Schristos 
4675fd0b74Schristos    -o		Like -to.  (Some other implementations have -o like -to,
4775fd0b74Schristos 		others like -td.  We chose one arbitrarily.)
4875fd0b74Schristos 
4975fd0b74Schristos    --encoding={s,S,b,l,B,L}
5075fd0b74Schristos    -e {s,S,b,l,B,L}
5175fd0b74Schristos 		Select character encoding: 7-bit-character, 8-bit-character,
5275fd0b74Schristos 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
5375fd0b74Schristos 		littleendian 32-bit.
5475fd0b74Schristos 
5575fd0b74Schristos    --target=BFDNAME
5675fd0b74Schristos    -T {bfdname}
5775fd0b74Schristos 		Specify a non-default object file format.
5875fd0b74Schristos 
59*e992f068Schristos   --unicode={default|locale|invalid|hex|escape|highlight}
60*e992f068Schristos   -U {d|l|i|x|e|h}
61*e992f068Schristos 		Determine how to handle UTF-8 unicode characters.  The default
62*e992f068Schristos 		is no special treatment.  All other versions of this option
63*e992f068Schristos 		only apply if the encoding is valid and enabling the option
64*e992f068Schristos 		implies --encoding=S.
65*e992f068Schristos 		The 'locale' option displays the characters according to the
66*e992f068Schristos 		current locale.  The 'invalid' option treats them as
67*e992f068Schristos 		non-string characters.  The 'hex' option displays them as hex
68*e992f068Schristos 		byte sequences.  The 'escape' option displays them as escape
69*e992f068Schristos 		sequences and the 'highlight' option displays them as
70*e992f068Schristos 		coloured escape sequences.
71*e992f068Schristos 
7275fd0b74Schristos   --output-separator=sep_string
7375fd0b74Schristos   -s sep_string	String used to separate parsed strings in output.
7475fd0b74Schristos 		Default is newline.
7575fd0b74Schristos 
7675fd0b74Schristos    --help
7775fd0b74Schristos    -h		Print the usage message on the standard output.
7875fd0b74Schristos 
7975fd0b74Schristos    --version
8075fd0b74Schristos    -V
8175fd0b74Schristos    -v		Print the program version number.
8275fd0b74Schristos 
8375fd0b74Schristos    Written by Richard Stallman <rms@gnu.ai.mit.edu>
8475fd0b74Schristos    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
8575fd0b74Schristos 
8675fd0b74Schristos #include "sysdep.h"
8775fd0b74Schristos #include "bfd.h"
8875fd0b74Schristos #include "getopt.h"
8975fd0b74Schristos #include "libiberty.h"
9075fd0b74Schristos #include "safe-ctype.h"
9175fd0b74Schristos #include "bucomm.h"
9275fd0b74Schristos 
93*e992f068Schristos #ifndef streq
94*e992f068Schristos #define streq(a,b) (strcmp ((a),(b)) == 0)
95*e992f068Schristos #endif
96*e992f068Schristos 
97*e992f068Schristos typedef enum unicode_display_type
98*e992f068Schristos {
99*e992f068Schristos   unicode_default = 0,
100*e992f068Schristos   unicode_locale,
101*e992f068Schristos   unicode_escape,
102*e992f068Schristos   unicode_hex,
103*e992f068Schristos   unicode_highlight,
104*e992f068Schristos   unicode_invalid
105*e992f068Schristos } unicode_display_type;
106*e992f068Schristos 
107*e992f068Schristos static unicode_display_type unicode_display = unicode_default;
108*e992f068Schristos 
10975fd0b74Schristos #define STRING_ISGRAPHIC(c) \
11075fd0b74Schristos       (   (c) >= 0 \
11175fd0b74Schristos        && (c) <= 255 \
11275fd0b74Schristos        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
113ede78133Schristos 	   || (include_all_whitespace && ISSPACE (c))) \
11475fd0b74Schristos       )
11575fd0b74Schristos 
11675fd0b74Schristos #ifndef errno
11775fd0b74Schristos extern int errno;
11875fd0b74Schristos #endif
11975fd0b74Schristos 
12075fd0b74Schristos /* The BFD section flags that identify an initialized data section.  */
12175fd0b74Schristos #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
12275fd0b74Schristos 
12375fd0b74Schristos /* Radix for printing addresses (must be 8, 10 or 16).  */
12475fd0b74Schristos static int address_radix;
12575fd0b74Schristos 
12675fd0b74Schristos /* Minimum length of sequence of graphic chars to trigger output.  */
127*e992f068Schristos static unsigned int string_min;
12875fd0b74Schristos 
12975fd0b74Schristos /* Whether or not we include all whitespace as a graphic char.   */
130*e992f068Schristos static bool include_all_whitespace;
13175fd0b74Schristos 
13275fd0b74Schristos /* TRUE means print address within file for each string.  */
133*e992f068Schristos static bool print_addresses;
13475fd0b74Schristos 
13575fd0b74Schristos /* TRUE means print filename for each string.  */
136*e992f068Schristos static bool print_filenames;
13775fd0b74Schristos 
13875fd0b74Schristos /* TRUE means for object files scan only the data section.  */
139*e992f068Schristos static bool datasection_only;
14075fd0b74Schristos 
14175fd0b74Schristos /* The BFD object file format.  */
14275fd0b74Schristos static char *target;
14375fd0b74Schristos 
14475fd0b74Schristos /* The character encoding format.  */
14575fd0b74Schristos static char encoding;
14675fd0b74Schristos static int encoding_bytes;
14775fd0b74Schristos 
14875fd0b74Schristos /* Output string used to separate parsed strings  */
14975fd0b74Schristos static char *output_separator;
15075fd0b74Schristos 
15175fd0b74Schristos static struct option long_options[] =
15275fd0b74Schristos {
15375fd0b74Schristos   {"all", no_argument, NULL, 'a'},
15475fd0b74Schristos   {"bytes", required_argument, NULL, 'n'},
155*e992f068Schristos   {"data", no_argument, NULL, 'd'},
15675fd0b74Schristos   {"encoding", required_argument, NULL, 'e'},
15775fd0b74Schristos   {"help", no_argument, NULL, 'h'},
158*e992f068Schristos   {"include-all-whitespace", no_argument, NULL, 'w'},
159*e992f068Schristos   {"output-separator", required_argument, NULL, 's'},
160*e992f068Schristos   {"print-file-name", no_argument, NULL, 'f'},
161*e992f068Schristos   {"radix", required_argument, NULL, 't'},
162*e992f068Schristos   {"target", required_argument, NULL, 'T'},
163*e992f068Schristos   {"unicode", required_argument, NULL, 'U'},
16475fd0b74Schristos   {"version", no_argument, NULL, 'v'},
16575fd0b74Schristos   {NULL, 0, NULL, 0}
16675fd0b74Schristos };
16775fd0b74Schristos 
168*e992f068Schristos static bool strings_file (char *);
169*e992f068Schristos static void print_strings (const char *, FILE *, file_ptr, int, char *);
170ede78133Schristos static void usage (FILE *, int) ATTRIBUTE_NORETURN;
17175fd0b74Schristos 
17275fd0b74Schristos int main (int, char **);
17375fd0b74Schristos 
17475fd0b74Schristos int
main(int argc,char ** argv)17575fd0b74Schristos main (int argc, char **argv)
17675fd0b74Schristos {
17775fd0b74Schristos   int optc;
17875fd0b74Schristos   int exit_status = 0;
179*e992f068Schristos   bool files_given = false;
18075fd0b74Schristos   char *s;
18175fd0b74Schristos   int numeric_opt = 0;
18275fd0b74Schristos 
18375fd0b74Schristos   setlocale (LC_ALL, "");
18475fd0b74Schristos   bindtextdomain (PACKAGE, LOCALEDIR);
18575fd0b74Schristos   textdomain (PACKAGE);
18675fd0b74Schristos 
18775fd0b74Schristos   program_name = argv[0];
18875fd0b74Schristos   xmalloc_set_program_name (program_name);
18975fd0b74Schristos   bfd_set_error_program_name (program_name);
19075fd0b74Schristos 
19175fd0b74Schristos   expandargv (&argc, &argv);
19275fd0b74Schristos 
19375fd0b74Schristos   string_min = 4;
194*e992f068Schristos   include_all_whitespace = false;
195*e992f068Schristos   print_addresses = false;
196*e992f068Schristos   print_filenames = false;
19775fd0b74Schristos   if (DEFAULT_STRINGS_ALL)
198*e992f068Schristos     datasection_only = false;
19975fd0b74Schristos   else
200*e992f068Schristos     datasection_only = true;
20175fd0b74Schristos   target = NULL;
20275fd0b74Schristos   encoding = 's';
20375fd0b74Schristos   output_separator = NULL;
20475fd0b74Schristos 
205*e992f068Schristos   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789",
20675fd0b74Schristos 			      long_options, (int *) 0)) != EOF)
20775fd0b74Schristos     {
20875fd0b74Schristos       switch (optc)
20975fd0b74Schristos 	{
21075fd0b74Schristos 	case 'a':
211*e992f068Schristos 	  datasection_only = false;
21275fd0b74Schristos 	  break;
21375fd0b74Schristos 
21475fd0b74Schristos 	case 'd':
215*e992f068Schristos 	  datasection_only = true;
21675fd0b74Schristos 	  break;
21775fd0b74Schristos 
21875fd0b74Schristos 	case 'f':
219*e992f068Schristos 	  print_filenames = true;
22075fd0b74Schristos 	  break;
22175fd0b74Schristos 
22275fd0b74Schristos 	case 'H':
22375fd0b74Schristos 	case 'h':
22475fd0b74Schristos 	  usage (stdout, 0);
22575fd0b74Schristos 
22675fd0b74Schristos 	case 'n':
22775fd0b74Schristos 	  string_min = (int) strtoul (optarg, &s, 0);
22875fd0b74Schristos 	  if (s != NULL && *s != 0)
22975fd0b74Schristos 	    fatal (_("invalid integer argument %s"), optarg);
23075fd0b74Schristos 	  break;
23175fd0b74Schristos 
23275fd0b74Schristos 	case 'w':
233*e992f068Schristos 	  include_all_whitespace = true;
23475fd0b74Schristos 	  break;
23575fd0b74Schristos 
23675fd0b74Schristos 	case 'o':
237*e992f068Schristos 	  print_addresses = true;
23875fd0b74Schristos 	  address_radix = 8;
23975fd0b74Schristos 	  break;
24075fd0b74Schristos 
24175fd0b74Schristos 	case 't':
242*e992f068Schristos 	  print_addresses = true;
24375fd0b74Schristos 	  if (optarg[1] != '\0')
24475fd0b74Schristos 	    usage (stderr, 1);
24575fd0b74Schristos 	  switch (optarg[0])
24675fd0b74Schristos 	    {
24775fd0b74Schristos 	    case 'o':
24875fd0b74Schristos 	      address_radix = 8;
24975fd0b74Schristos 	      break;
25075fd0b74Schristos 
25175fd0b74Schristos 	    case 'd':
25275fd0b74Schristos 	      address_radix = 10;
25375fd0b74Schristos 	      break;
25475fd0b74Schristos 
25575fd0b74Schristos 	    case 'x':
25675fd0b74Schristos 	      address_radix = 16;
25775fd0b74Schristos 	      break;
25875fd0b74Schristos 
25975fd0b74Schristos 	    default:
26075fd0b74Schristos 	      usage (stderr, 1);
26175fd0b74Schristos 	    }
26275fd0b74Schristos 	  break;
26375fd0b74Schristos 
26475fd0b74Schristos 	case 'T':
26575fd0b74Schristos 	  target = optarg;
26675fd0b74Schristos 	  break;
26775fd0b74Schristos 
26875fd0b74Schristos 	case 'e':
26975fd0b74Schristos 	  if (optarg[1] != '\0')
27075fd0b74Schristos 	    usage (stderr, 1);
27175fd0b74Schristos 	  encoding = optarg[0];
27275fd0b74Schristos 	  break;
27375fd0b74Schristos 
27475fd0b74Schristos 	case 's':
27575fd0b74Schristos 	  output_separator = optarg;
27675fd0b74Schristos 	  break;
27775fd0b74Schristos 
278*e992f068Schristos 	case 'U':
279*e992f068Schristos 	  if (streq (optarg, "default") || streq (optarg, "d"))
280*e992f068Schristos 	    unicode_display = unicode_default;
281*e992f068Schristos 	  else if (streq (optarg, "locale") || streq (optarg, "l"))
282*e992f068Schristos 	    unicode_display = unicode_locale;
283*e992f068Schristos 	  else if (streq (optarg, "escape") || streq (optarg, "e"))
284*e992f068Schristos 	    unicode_display = unicode_escape;
285*e992f068Schristos 	  else if (streq (optarg, "invalid") || streq (optarg, "i"))
286*e992f068Schristos 	    unicode_display = unicode_invalid;
287*e992f068Schristos 	  else if (streq (optarg, "hex") || streq (optarg, "x"))
288*e992f068Schristos 	    unicode_display = unicode_hex;
289*e992f068Schristos 	  else if (streq (optarg, "highlight") || streq (optarg, "h"))
290*e992f068Schristos 	    unicode_display = unicode_highlight;
291*e992f068Schristos 	  else
292*e992f068Schristos 	    fatal (_("invalid argument to -U/--unicode: %s"), optarg);
293*e992f068Schristos 	  break;
294*e992f068Schristos 
29575fd0b74Schristos 	case 'V':
29675fd0b74Schristos 	case 'v':
29775fd0b74Schristos 	  print_version ("strings");
29875fd0b74Schristos 	  break;
29975fd0b74Schristos 
30075fd0b74Schristos 	case '?':
30175fd0b74Schristos 	  usage (stderr, 1);
30275fd0b74Schristos 
30375fd0b74Schristos 	default:
30475fd0b74Schristos 	  numeric_opt = optind;
30575fd0b74Schristos 	  break;
30675fd0b74Schristos 	}
30775fd0b74Schristos     }
30875fd0b74Schristos 
309*e992f068Schristos   if (unicode_display != unicode_default)
310*e992f068Schristos     encoding = 'S';
311*e992f068Schristos 
31275fd0b74Schristos   if (numeric_opt != 0)
31375fd0b74Schristos     {
31475fd0b74Schristos       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
31575fd0b74Schristos       if (s != NULL && *s != 0)
31675fd0b74Schristos 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
31775fd0b74Schristos     }
31875fd0b74Schristos   if (string_min < 1)
31975fd0b74Schristos     fatal (_("invalid minimum string length %d"), string_min);
32075fd0b74Schristos 
32175fd0b74Schristos   switch (encoding)
32275fd0b74Schristos     {
32375fd0b74Schristos     case 'S':
32475fd0b74Schristos     case 's':
32575fd0b74Schristos       encoding_bytes = 1;
32675fd0b74Schristos       break;
32775fd0b74Schristos     case 'b':
32875fd0b74Schristos     case 'l':
32975fd0b74Schristos       encoding_bytes = 2;
33075fd0b74Schristos       break;
33175fd0b74Schristos     case 'B':
33275fd0b74Schristos     case 'L':
33375fd0b74Schristos       encoding_bytes = 4;
33475fd0b74Schristos       break;
33575fd0b74Schristos     default:
33675fd0b74Schristos       usage (stderr, 1);
33775fd0b74Schristos     }
33875fd0b74Schristos 
339012573ebSchristos   if (bfd_init () != BFD_INIT_MAGIC)
340012573ebSchristos     fatal (_("fatal error: libbfd ABI mismatch"));
34175fd0b74Schristos   set_default_bfd_target ();
34275fd0b74Schristos 
34375fd0b74Schristos   if (optind >= argc)
34475fd0b74Schristos     {
345*e992f068Schristos       datasection_only = false;
34675fd0b74Schristos       SET_BINARY (fileno (stdin));
347*e992f068Schristos       print_strings ("{standard input}", stdin, 0, 0, (char *) NULL);
348*e992f068Schristos       files_given = true;
34975fd0b74Schristos     }
35075fd0b74Schristos   else
35175fd0b74Schristos     {
35275fd0b74Schristos       for (; optind < argc; ++optind)
35375fd0b74Schristos 	{
354*e992f068Schristos 	  if (streq (argv[optind], "-"))
355*e992f068Schristos 	    datasection_only = false;
35675fd0b74Schristos 	  else
35775fd0b74Schristos 	    {
358*e992f068Schristos 	      files_given = true;
359ede78133Schristos 	      exit_status |= !strings_file (argv[optind]);
36075fd0b74Schristos 	    }
36175fd0b74Schristos 	}
36275fd0b74Schristos     }
36375fd0b74Schristos 
36475fd0b74Schristos   if (!files_given)
36575fd0b74Schristos     usage (stderr, 1);
36675fd0b74Schristos 
36775fd0b74Schristos   return (exit_status);
36875fd0b74Schristos }
36975fd0b74Schristos 
370ede78133Schristos /* Scan section SECT of the file ABFD, whose printable name is
371ede78133Schristos    FILENAME.  If it contains initialized data set GOT_A_SECTION and
372ede78133Schristos    print the strings in it.  */
37375fd0b74Schristos 
37475fd0b74Schristos static void
strings_a_section(bfd * abfd,asection * sect,const char * filename,bool * got_a_section)375ede78133Schristos strings_a_section (bfd *abfd, asection *sect, const char *filename,
376*e992f068Schristos 		   bool *got_a_section)
37775fd0b74Schristos {
37875fd0b74Schristos   bfd_size_type sectsize;
379ede78133Schristos   bfd_byte *mem;
38075fd0b74Schristos 
38175fd0b74Schristos   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
38275fd0b74Schristos     return;
38375fd0b74Schristos 
384012573ebSchristos   sectsize = bfd_section_size (sect);
385ede78133Schristos   if (sectsize == 0)
38675fd0b74Schristos     return;
38775fd0b74Schristos 
388ede78133Schristos   if (!bfd_malloc_and_get_section (abfd, sect, &mem))
38975fd0b74Schristos     {
390ede78133Schristos       non_fatal (_("%s: Reading section %s failed: %s"),
391ede78133Schristos 		 filename, sect->name, bfd_errmsg (bfd_get_error ()));
39275fd0b74Schristos       return;
39375fd0b74Schristos     }
39475fd0b74Schristos 
395*e992f068Schristos   *got_a_section = true;
396*e992f068Schristos   print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem);
39775fd0b74Schristos   free (mem);
39875fd0b74Schristos }
39975fd0b74Schristos 
40075fd0b74Schristos /* Scan all of the sections in FILE, and print the strings
40175fd0b74Schristos    in the initialized data section(s).
40275fd0b74Schristos 
40375fd0b74Schristos    Return TRUE if successful,
40475fd0b74Schristos    FALSE if not (such as if FILE is not an object file).  */
40575fd0b74Schristos 
406*e992f068Schristos static bool
strings_object_file(const char * file)40775fd0b74Schristos strings_object_file (const char *file)
40875fd0b74Schristos {
40975fd0b74Schristos   bfd *abfd;
410ede78133Schristos   asection *s;
411*e992f068Schristos   bool got_a_section;
41275fd0b74Schristos 
41375fd0b74Schristos   abfd = bfd_openr (file, target);
41475fd0b74Schristos 
41575fd0b74Schristos   if (abfd == NULL)
41675fd0b74Schristos     /* Treat the file as a non-object file.  */
417*e992f068Schristos     return false;
41875fd0b74Schristos 
41975fd0b74Schristos   /* This call is mainly for its side effect of reading in the sections.
42075fd0b74Schristos      We follow the traditional behavior of `strings' in that we don't
42175fd0b74Schristos      complain if we don't recognize a file to be an object file.  */
42275fd0b74Schristos   if (!bfd_check_format (abfd, bfd_object))
42375fd0b74Schristos     {
42475fd0b74Schristos       bfd_close (abfd);
425*e992f068Schristos       return false;
42675fd0b74Schristos     }
42775fd0b74Schristos 
428*e992f068Schristos   got_a_section = false;
429ede78133Schristos   for (s = abfd->sections; s != NULL; s = s->next)
430ede78133Schristos     strings_a_section (abfd, s, file, &got_a_section);
43175fd0b74Schristos 
43275fd0b74Schristos   if (!bfd_close (abfd))
43375fd0b74Schristos     {
43475fd0b74Schristos       bfd_nonfatal (file);
435*e992f068Schristos       return false;
43675fd0b74Schristos     }
43775fd0b74Schristos 
43875fd0b74Schristos   return got_a_section;
43975fd0b74Schristos }
44075fd0b74Schristos 
44175fd0b74Schristos /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
44275fd0b74Schristos 
443*e992f068Schristos static bool
strings_file(char * file)44475fd0b74Schristos strings_file (char *file)
44575fd0b74Schristos {
44675fd0b74Schristos   struct stat st;
44775fd0b74Schristos 
44875fd0b74Schristos   /* get_file_size does not support non-S_ISREG files.  */
44975fd0b74Schristos 
45075fd0b74Schristos   if (stat (file, &st) < 0)
45175fd0b74Schristos     {
45275fd0b74Schristos       if (errno == ENOENT)
45375fd0b74Schristos 	non_fatal (_("'%s': No such file"), file);
45475fd0b74Schristos       else
45575fd0b74Schristos 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
45675fd0b74Schristos 		   file, strerror (errno));
457*e992f068Schristos       return false;
45875fd0b74Schristos     }
459ede78133Schristos   else if (S_ISDIR (st.st_mode))
460ede78133Schristos     {
461ede78133Schristos       non_fatal (_("Warning: '%s' is a directory"), file);
462*e992f068Schristos       return false;
463ede78133Schristos     }
46475fd0b74Schristos 
46575fd0b74Schristos   /* If we weren't told to scan the whole file,
46675fd0b74Schristos      try to open it as an object file and only look at
46775fd0b74Schristos      initialized data sections.  If that fails, fall back to the
46875fd0b74Schristos      whole file.  */
46975fd0b74Schristos   if (!datasection_only || !strings_object_file (file))
47075fd0b74Schristos     {
47175fd0b74Schristos       FILE *stream;
47275fd0b74Schristos 
47375fd0b74Schristos       stream = fopen (file, FOPEN_RB);
47475fd0b74Schristos       if (stream == NULL)
47575fd0b74Schristos 	{
47675fd0b74Schristos 	  fprintf (stderr, "%s: ", program_name);
47775fd0b74Schristos 	  perror (file);
478*e992f068Schristos 	  return false;
47975fd0b74Schristos 	}
48075fd0b74Schristos 
481*e992f068Schristos       print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL);
48275fd0b74Schristos 
48375fd0b74Schristos       if (fclose (stream) == EOF)
48475fd0b74Schristos 	{
48575fd0b74Schristos 	  fprintf (stderr, "%s: ", program_name);
48675fd0b74Schristos 	  perror (file);
487*e992f068Schristos 	  return false;
48875fd0b74Schristos 	}
48975fd0b74Schristos     }
49075fd0b74Schristos 
491*e992f068Schristos   return true;
49275fd0b74Schristos }
49375fd0b74Schristos 
49475fd0b74Schristos /* Read the next character, return EOF if none available.
49575fd0b74Schristos    Assume that STREAM is positioned so that the next byte read
49675fd0b74Schristos    is at address ADDRESS in the file.
49775fd0b74Schristos 
49875fd0b74Schristos    If STREAM is NULL, do not read from it.
49975fd0b74Schristos    The caller can supply a buffer of characters
50075fd0b74Schristos    to be processed before the data in STREAM.
50175fd0b74Schristos    MAGIC is the address of the buffer and
50275fd0b74Schristos    MAGICCOUNT is how many characters are in it.  */
50375fd0b74Schristos 
50475fd0b74Schristos static long
get_char(FILE * stream,file_ptr * address,int * magiccount,char ** magic)50575fd0b74Schristos get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
50675fd0b74Schristos {
50775fd0b74Schristos   int c, i;
50875fd0b74Schristos   long r = 0;
50975fd0b74Schristos 
51075fd0b74Schristos   for (i = 0; i < encoding_bytes; i++)
51175fd0b74Schristos     {
51275fd0b74Schristos       if (*magiccount)
51375fd0b74Schristos 	{
51475fd0b74Schristos 	  (*magiccount)--;
51575fd0b74Schristos 	  c = *(*magic)++;
51675fd0b74Schristos 	}
51775fd0b74Schristos       else
51875fd0b74Schristos 	{
51975fd0b74Schristos 	  if (stream == NULL)
52075fd0b74Schristos 	    return EOF;
52175fd0b74Schristos 
52275fd0b74Schristos 	  /* Only use getc_unlocked if we found a declaration for it.
52375fd0b74Schristos 	     Otherwise, libc is not thread safe by default, and we
52475fd0b74Schristos 	     should not use it.  */
52575fd0b74Schristos 
52675fd0b74Schristos #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
52775fd0b74Schristos 	  c = getc_unlocked (stream);
52875fd0b74Schristos #else
52975fd0b74Schristos 	  c = getc (stream);
53075fd0b74Schristos #endif
53175fd0b74Schristos 	  if (c == EOF)
53275fd0b74Schristos 	    return EOF;
53375fd0b74Schristos 	}
53475fd0b74Schristos 
53575fd0b74Schristos       (*address)++;
53675fd0b74Schristos       r = (r << 8) | (c & 0xff);
53775fd0b74Schristos     }
53875fd0b74Schristos 
53975fd0b74Schristos   switch (encoding)
54075fd0b74Schristos     {
54175fd0b74Schristos     default:
54275fd0b74Schristos       break;
54375fd0b74Schristos     case 'l':
54475fd0b74Schristos       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
54575fd0b74Schristos       break;
54675fd0b74Schristos     case 'L':
54775fd0b74Schristos       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
54875fd0b74Schristos 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
54975fd0b74Schristos       break;
55075fd0b74Schristos     }
55175fd0b74Schristos 
55275fd0b74Schristos   return r;
55375fd0b74Schristos }
554012573ebSchristos 
555012573ebSchristos /* Throw away one byte of a (possibly) multi-byte char C, updating
556012573ebSchristos    address and buffer to suit.  */
557012573ebSchristos 
558012573ebSchristos static void
unget_part_char(long c,file_ptr * address,int * magiccount,char ** magic)559012573ebSchristos unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
560012573ebSchristos {
561012573ebSchristos   static char tmp[4];
562012573ebSchristos 
563012573ebSchristos   if (encoding_bytes > 1)
564012573ebSchristos     {
565012573ebSchristos       *address -= encoding_bytes - 1;
566012573ebSchristos 
567012573ebSchristos       if (*magiccount == 0)
568012573ebSchristos 	{
569012573ebSchristos 	  /* If no magic buffer exists, use temp buffer.  */
570012573ebSchristos 	  switch (encoding)
571012573ebSchristos 	    {
572012573ebSchristos 	    default:
573012573ebSchristos 	      break;
574012573ebSchristos 	    case 'b':
575012573ebSchristos 	      tmp[0] = c & 0xff;
576012573ebSchristos 	      *magiccount = 1;
577012573ebSchristos 	      break;
578012573ebSchristos 	    case 'l':
579012573ebSchristos 	      tmp[0] = (c >> 8) & 0xff;
580012573ebSchristos 	      *magiccount = 1;
581012573ebSchristos 	      break;
582012573ebSchristos 	    case 'B':
583012573ebSchristos 	      tmp[0] = (c >> 16) & 0xff;
584012573ebSchristos 	      tmp[1] = (c >> 8) & 0xff;
585012573ebSchristos 	      tmp[2] = c & 0xff;
586012573ebSchristos 	      *magiccount = 3;
587012573ebSchristos 	      break;
588012573ebSchristos 	    case 'L':
589012573ebSchristos 	      tmp[0] = (c >> 8) & 0xff;
590012573ebSchristos 	      tmp[1] = (c >> 16) & 0xff;
591012573ebSchristos 	      tmp[2] = (c >> 24) & 0xff;
592012573ebSchristos 	      *magiccount = 3;
593012573ebSchristos 	      break;
594012573ebSchristos 	    }
595012573ebSchristos 	  *magic = tmp;
596012573ebSchristos 	}
597012573ebSchristos       else
598012573ebSchristos 	{
599012573ebSchristos 	  /* If magic buffer exists, rewind.  */
600012573ebSchristos 	  *magic -= encoding_bytes - 1;
601012573ebSchristos 	  *magiccount += encoding_bytes - 1;
602012573ebSchristos 	}
603012573ebSchristos     }
604012573ebSchristos }
605*e992f068Schristos 
606*e992f068Schristos static void
print_filename_and_address(const char * filename,file_ptr address)607*e992f068Schristos print_filename_and_address (const char * filename, file_ptr address)
608*e992f068Schristos {
609*e992f068Schristos   if (print_filenames)
610*e992f068Schristos     printf ("%s: ", filename);
611*e992f068Schristos 
612*e992f068Schristos   if (! print_addresses)
613*e992f068Schristos     return;
614*e992f068Schristos 
615*e992f068Schristos   switch (address_radix)
616*e992f068Schristos     {
617*e992f068Schristos     case 8:
618*e992f068Schristos       if (sizeof (address) > sizeof (long))
619*e992f068Schristos 	{
620*e992f068Schristos #ifndef __MSVCRT__
621*e992f068Schristos 	  printf ("%7llo ", (unsigned long long) address);
622*e992f068Schristos #else
623*e992f068Schristos 	  printf ("%7I64o ", (unsigned long long) address);
624*e992f068Schristos #endif
625*e992f068Schristos 	}
626*e992f068Schristos       else
627*e992f068Schristos 	printf ("%7lo ", (unsigned long) address);
628*e992f068Schristos       break;
629*e992f068Schristos 
630*e992f068Schristos     case 10:
631*e992f068Schristos       if (sizeof (address) > sizeof (long))
632*e992f068Schristos 	{
633*e992f068Schristos #ifndef __MSVCRT__
634*e992f068Schristos 	  printf ("%7llu ", (unsigned long long) address);
635*e992f068Schristos #else
636*e992f068Schristos 	  printf ("%7I64d ", (unsigned long long) address);
637*e992f068Schristos #endif
638*e992f068Schristos 	}
639*e992f068Schristos       else
640*e992f068Schristos 	printf ("%7ld ", (long) address);
641*e992f068Schristos       break;
642*e992f068Schristos 
643*e992f068Schristos     case 16:
644*e992f068Schristos       if (sizeof (address) > sizeof (long))
645*e992f068Schristos 	{
646*e992f068Schristos #ifndef __MSVCRT__
647*e992f068Schristos 	  printf ("%7llx ", (unsigned long long) address);
648*e992f068Schristos #else
649*e992f068Schristos 	  printf ("%7I64x ", (unsigned long long) address);
650*e992f068Schristos #endif
651*e992f068Schristos 	}
652*e992f068Schristos       else
653*e992f068Schristos 	printf ("%7lx ", (unsigned long) address);
654*e992f068Schristos       break;
655*e992f068Schristos     }
656*e992f068Schristos }
657*e992f068Schristos 
658*e992f068Schristos /* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding.
659*e992f068Schristos    If the encoding is valid then returns the number of bytes it uses.  */
660*e992f068Schristos 
661*e992f068Schristos static unsigned int
is_valid_utf8(const unsigned char * buffer,unsigned long buflen)662*e992f068Schristos is_valid_utf8 (const unsigned char * buffer, unsigned long buflen)
663*e992f068Schristos {
664*e992f068Schristos   if (buffer[0] < 0xc0)
665*e992f068Schristos     return 0;
666*e992f068Schristos 
667*e992f068Schristos   if (buflen < 2)
668*e992f068Schristos     return 0;
669*e992f068Schristos 
670*e992f068Schristos   if ((buffer[1] & 0xc0) != 0x80)
671*e992f068Schristos     return 0;
672*e992f068Schristos 
673*e992f068Schristos   if ((buffer[0] & 0x20) == 0)
674*e992f068Schristos     return 2;
675*e992f068Schristos 
676*e992f068Schristos   if (buflen < 3)
677*e992f068Schristos     return 0;
678*e992f068Schristos 
679*e992f068Schristos   if ((buffer[2] & 0xc0) != 0x80)
680*e992f068Schristos     return 0;
681*e992f068Schristos 
682*e992f068Schristos   if ((buffer[0] & 0x10) == 0)
683*e992f068Schristos     return 3;
684*e992f068Schristos 
685*e992f068Schristos   if (buflen < 4)
686*e992f068Schristos     return 0;
687*e992f068Schristos 
688*e992f068Schristos   if ((buffer[3] & 0xc0) != 0x80)
689*e992f068Schristos     return 0;
690*e992f068Schristos 
691*e992f068Schristos   return 4;
692*e992f068Schristos }
693*e992f068Schristos 
694*e992f068Schristos /* Display a UTF-8 encoded character in BUFFER according to the setting
695*e992f068Schristos    of unicode_display.  The character is known to be valid.
696*e992f068Schristos    Returns the number of bytes consumed.  */
697*e992f068Schristos 
698*e992f068Schristos static unsigned int
display_utf8_char(const unsigned char * buffer)699*e992f068Schristos display_utf8_char (const unsigned char * buffer)
700*e992f068Schristos {
701*e992f068Schristos   unsigned int j;
702*e992f068Schristos   unsigned int utf8_len;
703*e992f068Schristos 
704*e992f068Schristos   switch (buffer[0] & 0x30)
705*e992f068Schristos     {
706*e992f068Schristos     case 0x00:
707*e992f068Schristos     case 0x10:
708*e992f068Schristos       utf8_len = 2;
709*e992f068Schristos       break;
710*e992f068Schristos     case 0x20:
711*e992f068Schristos       utf8_len = 3;
712*e992f068Schristos       break;
713*e992f068Schristos     default:
714*e992f068Schristos       utf8_len = 4;
715*e992f068Schristos     }
716*e992f068Schristos 
717*e992f068Schristos   switch (unicode_display)
718*e992f068Schristos     {
719*e992f068Schristos     default:
720*e992f068Schristos       fprintf (stderr, "ICE: unexpected unicode display type\n");
721*e992f068Schristos       break;
722*e992f068Schristos 
723*e992f068Schristos     case unicode_escape:
724*e992f068Schristos     case unicode_highlight:
725*e992f068Schristos       if (unicode_display == unicode_highlight && isatty (1))
726*e992f068Schristos 	printf ("\x1B[31;47m"); /* Red.  */
727*e992f068Schristos 
728*e992f068Schristos       switch (utf8_len)
729*e992f068Schristos 	{
730*e992f068Schristos 	case 2:
731*e992f068Schristos 	  printf ("\\u%02x%02x",
732*e992f068Schristos 		  ((buffer[0] & 0x1c) >> 2),
733*e992f068Schristos 		  ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f));
734*e992f068Schristos 	  break;
735*e992f068Schristos 
736*e992f068Schristos 	case 3:
737*e992f068Schristos 	  printf ("\\u%02x%02x",
738*e992f068Schristos 		  ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2),
739*e992f068Schristos 		  ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f)));
740*e992f068Schristos 	  break;
741*e992f068Schristos 
742*e992f068Schristos 	case 4:
743*e992f068Schristos 	  printf ("\\u%02x%02x%02x",
744*e992f068Schristos 		  ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2),
745*e992f068Schristos 		  ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2),
746*e992f068Schristos 		  ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f)));
747*e992f068Schristos 	  break;
748*e992f068Schristos 	default:
749*e992f068Schristos 	  /* URG.  */
750*e992f068Schristos 	  break;
751*e992f068Schristos 	}
752*e992f068Schristos 
753*e992f068Schristos       if (unicode_display == unicode_highlight && isatty (1))
754*e992f068Schristos 	printf ("\033[0m"); /* Default colour.  */
755*e992f068Schristos       break;
756*e992f068Schristos 
757*e992f068Schristos     case unicode_hex:
758*e992f068Schristos       putchar ('<');
759*e992f068Schristos       printf ("0x");
760*e992f068Schristos       for (j = 0; j < utf8_len; j++)
761*e992f068Schristos 	printf ("%02x", buffer [j]);
762*e992f068Schristos       putchar ('>');
763*e992f068Schristos       break;
764*e992f068Schristos 
765*e992f068Schristos     case unicode_locale:
766*e992f068Schristos       printf ("%.1s", buffer);
767*e992f068Schristos       break;
768*e992f068Schristos     }
769*e992f068Schristos 
770*e992f068Schristos   return utf8_len;
771*e992f068Schristos }
772*e992f068Schristos 
773*e992f068Schristos /* Display strings in BUFFER.  Treat any UTF-8 encoded characters encountered
774*e992f068Schristos    according to the setting of the unicode_display variable.  The buffer
775*e992f068Schristos    contains BUFLEN bytes.
776*e992f068Schristos 
777*e992f068Schristos    Display the characters as if they started at ADDRESS and are contained in
778*e992f068Schristos    FILENAME.  */
779*e992f068Schristos 
780*e992f068Schristos static void
print_unicode_buffer(const char * filename,file_ptr address,const unsigned char * buffer,unsigned long buflen)781*e992f068Schristos print_unicode_buffer (const char *            filename,
782*e992f068Schristos 		      file_ptr                address,
783*e992f068Schristos 		      const unsigned char *   buffer,
784*e992f068Schristos 		      unsigned long           buflen)
785*e992f068Schristos {
786*e992f068Schristos   /* Paranoia checks...  */
787*e992f068Schristos   if (filename == NULL
788*e992f068Schristos       || buffer == NULL
789*e992f068Schristos       || unicode_display == unicode_default
790*e992f068Schristos       || encoding != 'S'
791*e992f068Schristos       || encoding_bytes != 1)
792*e992f068Schristos     {
793*e992f068Schristos       fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n");
794*e992f068Schristos       return;
795*e992f068Schristos     }
796*e992f068Schristos 
797*e992f068Schristos   if (buflen == 0)
798*e992f068Schristos     return;
799*e992f068Schristos 
800*e992f068Schristos   /* We must only display strings that are at least string_min *characters*
801*e992f068Schristos      long.  So we scan the buffer in two stages.  First we locate the start
802*e992f068Schristos      of a potential string.  Then we walk along it until we have found
803*e992f068Schristos      string_min characters.  Then we go back to the start point and start
804*e992f068Schristos      displaying characters according to the unicode_display setting.  */
805*e992f068Schristos 
806*e992f068Schristos   unsigned long start_point = 0;
807*e992f068Schristos   unsigned long i = 0;
808*e992f068Schristos   unsigned int char_len = 1;
809*e992f068Schristos   unsigned int num_found = 0;
810*e992f068Schristos 
811*e992f068Schristos   for (i = 0; i < buflen; i += char_len)
812*e992f068Schristos     {
813*e992f068Schristos       int c = buffer[i];
814*e992f068Schristos 
815*e992f068Schristos       char_len = 1;
816*e992f068Schristos 
817*e992f068Schristos       /* Find the first potential character of a string.  */
818*e992f068Schristos       if (! STRING_ISGRAPHIC (c))
819*e992f068Schristos 	{
820*e992f068Schristos 	  num_found = 0;
821*e992f068Schristos 	  continue;
822*e992f068Schristos 	}
823*e992f068Schristos 
824*e992f068Schristos       if (c > 126)
825*e992f068Schristos 	{
826*e992f068Schristos 	  if (c < 0xc0)
827*e992f068Schristos 	    {
828*e992f068Schristos 	      num_found = 0;
829*e992f068Schristos 	      continue;
830*e992f068Schristos 	    }
831*e992f068Schristos 
832*e992f068Schristos 	  if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0)
833*e992f068Schristos 	    {
834*e992f068Schristos 	      char_len = 1;
835*e992f068Schristos 	      num_found = 0;
836*e992f068Schristos 	      continue;
837*e992f068Schristos 	    }
838*e992f068Schristos 
839*e992f068Schristos 	  if (unicode_display == unicode_invalid)
840*e992f068Schristos 	    {
841*e992f068Schristos 	      /* We have found a valid UTF-8 character, but we treat it as non-graphic.  */
842*e992f068Schristos 	      num_found = 0;
843*e992f068Schristos 	      continue;
844*e992f068Schristos 	    }
845*e992f068Schristos 	}
846*e992f068Schristos 
847*e992f068Schristos       if (num_found == 0)
848*e992f068Schristos 	/* We have found a potential starting point for a string.  */
849*e992f068Schristos 	start_point = i;
850*e992f068Schristos 
851*e992f068Schristos       ++ num_found;
852*e992f068Schristos 
853*e992f068Schristos       if (num_found >= string_min)
854*e992f068Schristos 	break;
855*e992f068Schristos     }
856*e992f068Schristos 
857*e992f068Schristos   if (num_found < string_min)
858*e992f068Schristos     return;
859*e992f068Schristos 
860*e992f068Schristos   print_filename_and_address (filename, address + start_point);
861*e992f068Schristos 
862*e992f068Schristos   /* We have found string_min characters.  Display them and any
863*e992f068Schristos      more that follow.  */
864*e992f068Schristos   for (i = start_point; i < buflen; i += char_len)
865*e992f068Schristos     {
866*e992f068Schristos       int c = buffer[i];
867*e992f068Schristos 
868*e992f068Schristos       char_len = 1;
869*e992f068Schristos 
870*e992f068Schristos       if (! STRING_ISGRAPHIC (c))
871*e992f068Schristos 	break;
872*e992f068Schristos       else if (c < 127)
873*e992f068Schristos 	putchar (c);
874*e992f068Schristos       else if (! is_valid_utf8 (buffer + i, buflen - i))
875*e992f068Schristos 	break;
876*e992f068Schristos       else if (unicode_display == unicode_invalid)
877*e992f068Schristos 	break;
878*e992f068Schristos       else
879*e992f068Schristos 	char_len = display_utf8_char (buffer + i);
880*e992f068Schristos     }
881*e992f068Schristos 
882*e992f068Schristos   if (output_separator)
883*e992f068Schristos     fputs (output_separator, stdout);
884*e992f068Schristos   else
885*e992f068Schristos     putchar ('\n');
886*e992f068Schristos 
887*e992f068Schristos   /* FIXME: Using tail recursion here is lazy programming...  */
888*e992f068Schristos   print_unicode_buffer (filename, address + i, buffer + i, buflen - i);
889*e992f068Schristos }
890*e992f068Schristos 
891*e992f068Schristos static int
get_unicode_byte(FILE * stream,unsigned char * putback,unsigned int * num_putback,unsigned int * num_read)892*e992f068Schristos get_unicode_byte (FILE *          stream,
893*e992f068Schristos 		  unsigned char * putback,
894*e992f068Schristos 		  unsigned int *  num_putback,
895*e992f068Schristos 		  unsigned int *  num_read)
896*e992f068Schristos {
897*e992f068Schristos   if (* num_putback > 0)
898*e992f068Schristos     {
899*e992f068Schristos       * num_putback = * num_putback - 1;
900*e992f068Schristos       return putback [* num_putback];
901*e992f068Schristos     }
902*e992f068Schristos 
903*e992f068Schristos   * num_read = * num_read + 1;
904*e992f068Schristos 
905*e992f068Schristos #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
906*e992f068Schristos   return getc_unlocked (stream);
907*e992f068Schristos #else
908*e992f068Schristos   return getc (stream);
909*e992f068Schristos #endif
910*e992f068Schristos }
911*e992f068Schristos 
912*e992f068Schristos /* Helper function for print_unicode_stream.  */
913*e992f068Schristos 
914*e992f068Schristos static void
print_unicode_stream_body(const char * filename,file_ptr address,FILE * stream,unsigned char * putback_buf,unsigned int num_putback,unsigned char * print_buf)915*e992f068Schristos print_unicode_stream_body (const char *     filename,
916*e992f068Schristos 			   file_ptr         address,
917*e992f068Schristos 			   FILE *           stream,
918*e992f068Schristos 			   unsigned char *  putback_buf,
919*e992f068Schristos 			   unsigned int     num_putback,
920*e992f068Schristos 			   unsigned char *  print_buf)
921*e992f068Schristos {
922*e992f068Schristos   /* It would be nice if we could just read the stream into a buffer
923*e992f068Schristos      and then process if with print_unicode_buffer.  But the input
924*e992f068Schristos      might be huge or it might time-locked (eg stdin).  So instead
925*e992f068Schristos      we go one byte at a time...  */
926*e992f068Schristos 
927*e992f068Schristos   file_ptr start_point = 0;
928*e992f068Schristos   unsigned int num_read = 0;
929*e992f068Schristos   unsigned int num_chars = 0;
930*e992f068Schristos   unsigned int num_print = 0;
931*e992f068Schristos   int c = 0;
932*e992f068Schristos 
933*e992f068Schristos   /* Find a series of string_min characters.  Put them into print_buf.  */
934*e992f068Schristos   do
935*e992f068Schristos     {
936*e992f068Schristos       if (num_chars >= string_min)
937*e992f068Schristos 	break;
938*e992f068Schristos 
939*e992f068Schristos       c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
940*e992f068Schristos       if (c == EOF)
941*e992f068Schristos 	break;
942*e992f068Schristos 
943*e992f068Schristos       if (! STRING_ISGRAPHIC (c))
944*e992f068Schristos 	{
945*e992f068Schristos 	  num_chars = num_print = 0;
946*e992f068Schristos 	  continue;
947*e992f068Schristos 	}
948*e992f068Schristos 
949*e992f068Schristos       if (num_chars == 0)
950*e992f068Schristos 	start_point = num_read - 1;
951*e992f068Schristos 
952*e992f068Schristos       if (c < 127)
953*e992f068Schristos 	{
954*e992f068Schristos 	  print_buf[num_print] = c;
955*e992f068Schristos 	  num_chars ++;
956*e992f068Schristos 	  num_print ++;
957*e992f068Schristos 	  continue;
958*e992f068Schristos 	}
959*e992f068Schristos 
960*e992f068Schristos       if (c < 0xc0)
961*e992f068Schristos 	{
962*e992f068Schristos 	  num_chars = num_print = 0;
963*e992f068Schristos 	  continue;
964*e992f068Schristos 	}
965*e992f068Schristos 
966*e992f068Schristos       /* We *might* have a UTF-8 sequence.  Time to start peeking.  */
967*e992f068Schristos       char utf8[4];
968*e992f068Schristos 
969*e992f068Schristos       utf8[0] = c;
970*e992f068Schristos       c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
971*e992f068Schristos       if (c == EOF)
972*e992f068Schristos 	break;
973*e992f068Schristos       utf8[1] = c;
974*e992f068Schristos 
975*e992f068Schristos       if ((utf8[1] & 0xc0) != 0x80)
976*e992f068Schristos 	{
977*e992f068Schristos 	  /* Invalid UTF-8.  */
978*e992f068Schristos 	  putback_buf[num_putback++] = utf8[1];
979*e992f068Schristos 	  num_chars = num_print = 0;
980*e992f068Schristos 	  continue;
981*e992f068Schristos 	}
982*e992f068Schristos       else if ((utf8[0] & 0x20) == 0)
983*e992f068Schristos 	{
984*e992f068Schristos 	  /* A valid 2-byte UTF-8 encoding.  */
985*e992f068Schristos 	  if (unicode_display == unicode_invalid)
986*e992f068Schristos 	    {
987*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
988*e992f068Schristos 	      num_chars = num_print = 0;
989*e992f068Schristos 	    }
990*e992f068Schristos 	  else
991*e992f068Schristos 	    {
992*e992f068Schristos 	      print_buf[num_print ++] = utf8[0];
993*e992f068Schristos 	      print_buf[num_print ++] = utf8[1];
994*e992f068Schristos 	      num_chars ++;
995*e992f068Schristos 	    }
996*e992f068Schristos 	  continue;
997*e992f068Schristos 	}
998*e992f068Schristos 
999*e992f068Schristos       c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1000*e992f068Schristos       if (c == EOF)
1001*e992f068Schristos 	break;
1002*e992f068Schristos       utf8[2] = c;
1003*e992f068Schristos 
1004*e992f068Schristos       if ((utf8[2] & 0xc0) != 0x80)
1005*e992f068Schristos 	{
1006*e992f068Schristos 	  /* Invalid UTF-8.  */
1007*e992f068Schristos 	  putback_buf[num_putback++] = utf8[2];
1008*e992f068Schristos 	  putback_buf[num_putback++] = utf8[1];
1009*e992f068Schristos 	  num_chars = num_print = 0;
1010*e992f068Schristos 	  continue;
1011*e992f068Schristos 	}
1012*e992f068Schristos       else if ((utf8[0] & 0x10) == 0)
1013*e992f068Schristos 	{
1014*e992f068Schristos 	  /* A valid 3-byte UTF-8 encoding.  */
1015*e992f068Schristos 	  if (unicode_display == unicode_invalid)
1016*e992f068Schristos 	    {
1017*e992f068Schristos 	      putback_buf[num_putback++] = utf8[2];
1018*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
1019*e992f068Schristos 	      num_chars = num_print = 0;
1020*e992f068Schristos 	    }
1021*e992f068Schristos 	  else
1022*e992f068Schristos 	    {
1023*e992f068Schristos 	      print_buf[num_print ++] = utf8[0];
1024*e992f068Schristos 	      print_buf[num_print ++] = utf8[1];
1025*e992f068Schristos 	      print_buf[num_print ++] = utf8[2];
1026*e992f068Schristos 	      num_chars ++;
1027*e992f068Schristos 	    }
1028*e992f068Schristos 	  continue;
1029*e992f068Schristos 	}
1030*e992f068Schristos 
1031*e992f068Schristos       c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1032*e992f068Schristos       if (c == EOF)
1033*e992f068Schristos 	break;
1034*e992f068Schristos       utf8[3] = c;
1035*e992f068Schristos 
1036*e992f068Schristos       if ((utf8[3] & 0xc0) != 0x80)
1037*e992f068Schristos 	{
1038*e992f068Schristos 	  /* Invalid UTF-8.  */
1039*e992f068Schristos 	  putback_buf[num_putback++] = utf8[3];
1040*e992f068Schristos 	  putback_buf[num_putback++] = utf8[2];
1041*e992f068Schristos 	  putback_buf[num_putback++] = utf8[1];
1042*e992f068Schristos 	  num_chars = num_print = 0;
1043*e992f068Schristos 	}
1044*e992f068Schristos       /* We have a valid 4-byte UTF-8 encoding.  */
1045*e992f068Schristos       else if (unicode_display == unicode_invalid)
1046*e992f068Schristos 	{
1047*e992f068Schristos 	  putback_buf[num_putback++] = utf8[3];
1048*e992f068Schristos 	  putback_buf[num_putback++] = utf8[1];
1049*e992f068Schristos 	  putback_buf[num_putback++] = utf8[2];
1050*e992f068Schristos 	  num_chars = num_print = 0;
1051*e992f068Schristos 	}
1052*e992f068Schristos       else
1053*e992f068Schristos 	{
1054*e992f068Schristos 	  print_buf[num_print ++] = utf8[0];
1055*e992f068Schristos 	  print_buf[num_print ++] = utf8[1];
1056*e992f068Schristos 	  print_buf[num_print ++] = utf8[2];
1057*e992f068Schristos 	  print_buf[num_print ++] = utf8[3];
1058*e992f068Schristos 	  num_chars ++;
1059*e992f068Schristos 	}
1060*e992f068Schristos     }
1061*e992f068Schristos   while (1);
1062*e992f068Schristos 
1063*e992f068Schristos   if (num_chars >= string_min)
1064*e992f068Schristos     {
1065*e992f068Schristos       /* We know that we have string_min valid characters in print_buf,
1066*e992f068Schristos 	 and there may be more to come in the stream.  Start displaying
1067*e992f068Schristos 	 them.  */
1068*e992f068Schristos 
1069*e992f068Schristos       print_filename_and_address (filename, address + start_point);
1070*e992f068Schristos 
1071*e992f068Schristos       unsigned int i;
1072*e992f068Schristos       for (i = 0; i < num_print;)
1073*e992f068Schristos 	{
1074*e992f068Schristos 	  if (print_buf[i] < 127)
1075*e992f068Schristos 	    putchar (print_buf[i++]);
1076*e992f068Schristos 	  else
1077*e992f068Schristos 	    i += display_utf8_char (print_buf + i);
1078*e992f068Schristos 	}
1079*e992f068Schristos 
1080*e992f068Schristos       /* OK so now we have to start read unchecked bytes.  */
1081*e992f068Schristos 
1082*e992f068Schristos       /* Find a series of string_min characters.  Put them into print_buf.  */
1083*e992f068Schristos       do
1084*e992f068Schristos 	{
1085*e992f068Schristos 	  c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1086*e992f068Schristos 	  if (c == EOF)
1087*e992f068Schristos 	    break;
1088*e992f068Schristos 
1089*e992f068Schristos 	  if (! STRING_ISGRAPHIC (c))
1090*e992f068Schristos 	    break;
1091*e992f068Schristos 
1092*e992f068Schristos 	  if (c < 127)
1093*e992f068Schristos 	    {
1094*e992f068Schristos 	      putchar (c);
1095*e992f068Schristos 	      continue;
1096*e992f068Schristos 	    }
1097*e992f068Schristos 
1098*e992f068Schristos 	  if (c < 0xc0)
1099*e992f068Schristos 	    break;
1100*e992f068Schristos 
1101*e992f068Schristos 	  /* We *might* have a UTF-8 sequence.  Time to start peeking.  */
1102*e992f068Schristos 	  unsigned char utf8[4];
1103*e992f068Schristos 
1104*e992f068Schristos 	  utf8[0] = c;
1105*e992f068Schristos 	  c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1106*e992f068Schristos 	  if (c == EOF)
1107*e992f068Schristos 	    break;
1108*e992f068Schristos 	  utf8[1] = c;
1109*e992f068Schristos 
1110*e992f068Schristos 	  if ((utf8[1] & 0xc0) != 0x80)
1111*e992f068Schristos 	    {
1112*e992f068Schristos 	      /* Invalid UTF-8.  */
1113*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
1114*e992f068Schristos 	      break;
1115*e992f068Schristos 	    }
1116*e992f068Schristos 	  else if ((utf8[0] & 0x20) == 0)
1117*e992f068Schristos 	    {
1118*e992f068Schristos 	      /* Valid 2-byte UTF-8.  */
1119*e992f068Schristos 	      if (unicode_display == unicode_invalid)
1120*e992f068Schristos 		{
1121*e992f068Schristos 		  putback_buf[num_putback++] = utf8[1];
1122*e992f068Schristos 		  break;
1123*e992f068Schristos 		}
1124*e992f068Schristos 	      else
1125*e992f068Schristos 		{
1126*e992f068Schristos 		  (void) display_utf8_char (utf8);
1127*e992f068Schristos 		  continue;
1128*e992f068Schristos 		}
1129*e992f068Schristos 	    }
1130*e992f068Schristos 
1131*e992f068Schristos 	  c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1132*e992f068Schristos 	  if (c == EOF)
1133*e992f068Schristos 	    break;
1134*e992f068Schristos 	  utf8[2] = c;
1135*e992f068Schristos 
1136*e992f068Schristos 	  if ((utf8[2] & 0xc0) != 0x80)
1137*e992f068Schristos 	    {
1138*e992f068Schristos 	      /* Invalid UTF-8.  */
1139*e992f068Schristos 	      putback_buf[num_putback++] = utf8[2];
1140*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
1141*e992f068Schristos 	      break;
1142*e992f068Schristos 	    }
1143*e992f068Schristos 	  else if ((utf8[0] & 0x10) == 0)
1144*e992f068Schristos 	    {
1145*e992f068Schristos 	      /* Valid 3-byte UTF-8.  */
1146*e992f068Schristos 	      if (unicode_display == unicode_invalid)
1147*e992f068Schristos 		{
1148*e992f068Schristos 		  putback_buf[num_putback++] = utf8[2];
1149*e992f068Schristos 		  putback_buf[num_putback++] = utf8[1];
1150*e992f068Schristos 		  break;
1151*e992f068Schristos 		}
1152*e992f068Schristos 	      else
1153*e992f068Schristos 		{
1154*e992f068Schristos 		  (void) display_utf8_char (utf8);
1155*e992f068Schristos 		  continue;
1156*e992f068Schristos 		}
1157*e992f068Schristos 	    }
1158*e992f068Schristos 
1159*e992f068Schristos 	  c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read);
1160*e992f068Schristos 	  if (c == EOF)
1161*e992f068Schristos 	    break;
1162*e992f068Schristos 	  utf8[3] = c;
1163*e992f068Schristos 
1164*e992f068Schristos 	  if ((utf8[3] & 0xc0) != 0x80)
1165*e992f068Schristos 	    {
1166*e992f068Schristos 	      /* Invalid UTF-8.  */
1167*e992f068Schristos 	      putback_buf[num_putback++] = utf8[3];
1168*e992f068Schristos 	      putback_buf[num_putback++] = utf8[2];
1169*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
1170*e992f068Schristos 	      break;
1171*e992f068Schristos 	    }
1172*e992f068Schristos 	  else if (unicode_display == unicode_invalid)
1173*e992f068Schristos 	    {
1174*e992f068Schristos 	      putback_buf[num_putback++] = utf8[3];
1175*e992f068Schristos 	      putback_buf[num_putback++] = utf8[2];
1176*e992f068Schristos 	      putback_buf[num_putback++] = utf8[1];
1177*e992f068Schristos 	      break;
1178*e992f068Schristos 	    }
1179*e992f068Schristos 	  else
1180*e992f068Schristos 	    /* A valid 4-byte UTF-8 encoding.  */
1181*e992f068Schristos 	    (void) display_utf8_char (utf8);
1182*e992f068Schristos 	}
1183*e992f068Schristos       while (1);
1184*e992f068Schristos 
1185*e992f068Schristos       if (output_separator)
1186*e992f068Schristos 	fputs (output_separator, stdout);
1187*e992f068Schristos       else
1188*e992f068Schristos 	putchar ('\n');
1189*e992f068Schristos     }
1190*e992f068Schristos 
1191*e992f068Schristos   if (c != EOF)
1192*e992f068Schristos     /* FIXME: Using tail recursion here is lazy, but it works.  */
1193*e992f068Schristos     print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf);
1194*e992f068Schristos }
1195*e992f068Schristos 
1196*e992f068Schristos /* Display strings read in from STREAM.  Treat any UTF-8 encoded characters
1197*e992f068Schristos    encountered according to the setting of the unicode_display variable.
1198*e992f068Schristos    The stream is positioned at ADDRESS and is attached to FILENAME.  */
1199*e992f068Schristos 
1200*e992f068Schristos static void
print_unicode_stream(const char * filename,file_ptr address,FILE * stream)1201*e992f068Schristos print_unicode_stream (const char * filename,
1202*e992f068Schristos 		      file_ptr     address,
1203*e992f068Schristos 		      FILE *       stream)
1204*e992f068Schristos {
1205*e992f068Schristos   /* Paranoia checks...  */
1206*e992f068Schristos   if (filename == NULL
1207*e992f068Schristos       || stream == NULL
1208*e992f068Schristos       || unicode_display == unicode_default
1209*e992f068Schristos       || encoding != 'S'
1210*e992f068Schristos       || encoding_bytes != 1)
1211*e992f068Schristos     {
1212*e992f068Schristos       fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n");
1213*e992f068Schristos       return;
1214*e992f068Schristos     }
1215*e992f068Schristos 
1216*e992f068Schristos   /* Allocate space for string_min 4-byte utf-8 characters.  */
1217*e992f068Schristos   unsigned char * print_buf = xmalloc ((4 * string_min) + 1);
1218*e992f068Schristos   /* We should never have to put back more than 4 bytes.  */
1219*e992f068Schristos   unsigned char putback_buf[5];
1220*e992f068Schristos   unsigned int num_putback = 0;
1221*e992f068Schristos 
1222*e992f068Schristos   print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf);
1223*e992f068Schristos   free (print_buf);
1224*e992f068Schristos }
122575fd0b74Schristos 
122675fd0b74Schristos /* Find the strings in file FILENAME, read from STREAM.
122775fd0b74Schristos    Assume that STREAM is positioned so that the next byte read
122875fd0b74Schristos    is at address ADDRESS in the file.
122975fd0b74Schristos 
123075fd0b74Schristos    If STREAM is NULL, do not read from it.
123175fd0b74Schristos    The caller can supply a buffer of characters
123275fd0b74Schristos    to be processed before the data in STREAM.
123375fd0b74Schristos    MAGIC is the address of the buffer and
123475fd0b74Schristos    MAGICCOUNT is how many characters are in it.
123575fd0b74Schristos    Those characters come at address ADDRESS and the data in STREAM follow.  */
123675fd0b74Schristos 
123775fd0b74Schristos static void
print_strings(const char * filename,FILE * stream,file_ptr address,int magiccount,char * magic)123875fd0b74Schristos print_strings (const char *filename, FILE *stream, file_ptr address,
1239*e992f068Schristos 	       int magiccount, char *magic)
124075fd0b74Schristos {
1241*e992f068Schristos   if (unicode_display != unicode_default)
1242*e992f068Schristos     {
1243*e992f068Schristos       if (magic != NULL)
1244*e992f068Schristos 	print_unicode_buffer (filename, address,
1245*e992f068Schristos 			      (const unsigned char *) magic, magiccount);
1246*e992f068Schristos 
1247*e992f068Schristos       if (stream != NULL)
1248*e992f068Schristos 	print_unicode_stream (filename, address, stream);
1249*e992f068Schristos       return;
1250*e992f068Schristos     }
1251*e992f068Schristos 
125275fd0b74Schristos   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
125375fd0b74Schristos 
125475fd0b74Schristos   while (1)
125575fd0b74Schristos     {
125675fd0b74Schristos       file_ptr start;
1257*e992f068Schristos       unsigned int i;
125875fd0b74Schristos       long c;
125975fd0b74Schristos 
126075fd0b74Schristos       /* See if the next `string_min' chars are all graphic chars.  */
126175fd0b74Schristos     tryline:
126275fd0b74Schristos       start = address;
126375fd0b74Schristos       for (i = 0; i < string_min; i++)
126475fd0b74Schristos 	{
126575fd0b74Schristos 	  c = get_char (stream, &address, &magiccount, &magic);
126675fd0b74Schristos 	  if (c == EOF)
126775fd0b74Schristos 	    {
126875fd0b74Schristos 	      free (buf);
126975fd0b74Schristos 	      return;
127075fd0b74Schristos 	    }
1271012573ebSchristos 
127275fd0b74Schristos 	  if (! STRING_ISGRAPHIC (c))
1273012573ebSchristos 	    {
1274012573ebSchristos 	      /* Found a non-graphic.  Try again starting with next byte.  */
1275012573ebSchristos 	      unget_part_char (c, &address, &magiccount, &magic);
127675fd0b74Schristos 	      goto tryline;
1277012573ebSchristos 	    }
127875fd0b74Schristos 	  buf[i] = c;
127975fd0b74Schristos 	}
128075fd0b74Schristos 
128175fd0b74Schristos       /* We found a run of `string_min' graphic characters.  Print up
128275fd0b74Schristos 	 to the next non-graphic character.  */
1283*e992f068Schristos       print_filename_and_address (filename, start);
128475fd0b74Schristos 
128575fd0b74Schristos       buf[i] = '\0';
128675fd0b74Schristos       fputs (buf, stdout);
128775fd0b74Schristos 
128875fd0b74Schristos       while (1)
128975fd0b74Schristos 	{
129075fd0b74Schristos 	  c = get_char (stream, &address, &magiccount, &magic);
129175fd0b74Schristos 	  if (c == EOF)
129275fd0b74Schristos 	    break;
129375fd0b74Schristos 	  if (! STRING_ISGRAPHIC (c))
1294012573ebSchristos 	    {
1295012573ebSchristos 	      unget_part_char (c, &address, &magiccount, &magic);
129675fd0b74Schristos 	      break;
1297012573ebSchristos 	    }
129875fd0b74Schristos 	  putchar (c);
129975fd0b74Schristos 	}
130075fd0b74Schristos 
130175fd0b74Schristos       if (output_separator)
130275fd0b74Schristos 	fputs (output_separator, stdout);
130375fd0b74Schristos       else
130475fd0b74Schristos 	putchar ('\n');
130575fd0b74Schristos     }
130675fd0b74Schristos   free (buf);
130775fd0b74Schristos }
130875fd0b74Schristos 
130975fd0b74Schristos static void
usage(FILE * stream,int status)131075fd0b74Schristos usage (FILE *stream, int status)
131175fd0b74Schristos {
131275fd0b74Schristos   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
131375fd0b74Schristos   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
131475fd0b74Schristos   fprintf (stream, _(" The options are:\n"));
131575fd0b74Schristos 
131675fd0b74Schristos   if (DEFAULT_STRINGS_ALL)
131775fd0b74Schristos     fprintf (stream, _("\
131875fd0b74Schristos   -a - --all                Scan the entire file, not just the data section [default]\n\
131975fd0b74Schristos   -d --data                 Only scan the data sections in the file\n"));
132075fd0b74Schristos   else
132175fd0b74Schristos     fprintf (stream, _("\
132275fd0b74Schristos   -a - --all                Scan the entire file, not just the data section\n\
132375fd0b74Schristos   -d --data                 Only scan the data sections in the file [default]\n"));
132475fd0b74Schristos 
132575fd0b74Schristos   fprintf (stream, _("\
132675fd0b74Schristos   -f --print-file-name      Print the name of the file before each string\n\
1327*e992f068Schristos   -n <number>               Locate & print any sequence of at least <number>\n\
1328*e992f068Schristos     --bytes=<number>         displayable characters.  (The default is 4).\n\
132975fd0b74Schristos   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
133075fd0b74Schristos   -w --include-all-whitespace Include all whitespace as valid string characters\n\
133175fd0b74Schristos   -o                        An alias for --radix=o\n\
133275fd0b74Schristos   -T --target=<BFDNAME>     Specify the binary file format\n\
133375fd0b74Schristos   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
133475fd0b74Schristos                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
1335*e992f068Schristos   --unicode={default|show|invalid|hex|escape|highlight}\n\
1336*e992f068Schristos   -U {d|s|i|x|e|h}          Specify how to treat UTF-8 encoded unicode characters\n\
133775fd0b74Schristos   -s --output-separator=<string> String used to separate strings in output.\n\
133875fd0b74Schristos   @<file>                   Read options from <file>\n\
133975fd0b74Schristos   -h --help                 Display this information\n\
134075fd0b74Schristos   -v -V --version           Print the program's version number\n"));
134175fd0b74Schristos   list_supported_targets (program_name, stream);
134275fd0b74Schristos   if (REPORT_BUGS_TO[0] && status == 0)
134375fd0b74Schristos     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
134475fd0b74Schristos   exit (status);
134575fd0b74Schristos }
1346