xref: /netbsd-src/external/gpl3/binutils.old/dist/gas/app.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* This is the Assembler Pre-Processor
2    Copyright (C) 1987-2018 Free Software Foundation, Inc.
3 
4    This file is part of GAS, the GNU Assembler.
5 
6    GAS is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GAS is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GAS; see the file COPYING.  If not, write to the Free
18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19    02110-1301, USA.  */
20 
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
22 /* App, the assembler pre-processor.  This pre-processor strips out
23    excess spaces, turns single-quoted characters into a decimal
24    constant, and turns the # in # <number> <filename> <garbage> into a
25    .linefile.  This needs better error-handling.  */
26 
27 #include "as.h"
28 
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const  /* empty */
32 #endif
33 #endif
34 
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
38 
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode.  This is different from
41    flag_m68k_mri, because the two flags will be affected by the .mri
42    pseudo-op at different times.  */
43 static int scrub_m68k_mri;
44 
45 /* The pseudo-op which switches in and out of MRI mode.  See the
46    comment in do_scrub_chars.  */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
51 
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54    See the comment in do_scrub_chars.  */
55 static const char   symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
61 
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65 
66 #define LEX_IS_SYMBOL_COMPONENT		1
67 #define LEX_IS_WHITESPACE		2
68 #define LEX_IS_LINE_SEPARATOR		3
69 #define LEX_IS_COMMENT_START		4
70 #define LEX_IS_LINE_COMMENT_START	5
71 #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
72 #define	LEX_IS_STRINGQUOTE		8
73 #define	LEX_IS_COLON			9
74 #define	LEX_IS_NEWLINE			10
75 #define	LEX_IS_ONECHAR_QUOTE		11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST		12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST		13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR	14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H			15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
96 
97 static int process_escape (int);
98 
99 /* FIXME-soon: The entire lexer/parser thingy should be
100    built statically at compile time rather than dynamically
101    each and every time the assembler is run.  xoxorich.  */
102 
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105 {
106   const char *p;
107   int c;
108 
109   lex[' '] = LEX_IS_WHITESPACE;
110   lex['\t'] = LEX_IS_WHITESPACE;
111   lex['\r'] = LEX_IS_WHITESPACE;
112   lex['\n'] = LEX_IS_NEWLINE;
113   lex[':'] = LEX_IS_COLON;
114 
115 #ifdef TC_M68K
116   scrub_m68k_mri = m68k_mri;
117 
118   if (! m68k_mri)
119 #endif
120     {
121       lex['"'] = LEX_IS_STRINGQUOTE;
122 
123 #if ! defined (TC_HPPA)
124       lex['\''] = LEX_IS_ONECHAR_QUOTE;
125 #endif
126 
127 #ifdef SINGLE_QUOTE_STRINGS
128       lex['\''] = LEX_IS_STRINGQUOTE;
129 #endif
130     }
131 
132   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133      in state 5 of do_scrub_chars must be changed.  */
134 
135   /* Note that these override the previous defaults, e.g. if ';' is a
136      comment char, then it isn't a line separator.  */
137   for (p = symbol_chars; *p; ++p)
138     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139 
140   for (c = 128; c < 256; ++c)
141     lex[c] = LEX_IS_SYMBOL_COMPONENT;
142 
143 #ifdef tc_symbol_chars
144   /* This macro permits the processor to specify all characters which
145      may appears in an operand.  This will prevent the scrubber from
146      discarding meaningful whitespace in certain cases.  The i386
147      backend uses this to support prefixes, which can confuse the
148      scrubber as to whether it is parsing operands or opcodes.  */
149   for (p = tc_symbol_chars; *p; ++p)
150     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151 #endif
152 
153   /* The m68k backend wants to be able to change comment_chars.  */
154 #ifndef tc_comment_chars
155 #define tc_comment_chars comment_chars
156 #endif
157   for (p = tc_comment_chars; *p; p++)
158     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159 
160   for (p = line_comment_chars; *p; p++)
161     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162 
163 #ifndef tc_line_separator_chars
164 #define tc_line_separator_chars line_separator_chars
165 #endif
166   for (p = tc_line_separator_chars; *p; p++)
167     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
168 
169 #ifdef tc_parallel_separator_chars
170   /* This macro permits the processor to specify all characters which
171      separate parallel insns on the same line.  */
172   for (p = tc_parallel_separator_chars; *p; p++)
173     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174 #endif
175 
176   /* Only allow slash-star comments if slash is not in use.
177      FIXME: This isn't right.  We should always permit them.  */
178   if (lex['/'] == 0)
179     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
180 
181 #ifdef TC_M68K
182   if (m68k_mri)
183     {
184       lex['\''] = LEX_IS_STRINGQUOTE;
185       lex[';'] = LEX_IS_COMMENT_START;
186       lex['*'] = LEX_IS_LINE_COMMENT_START;
187       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188 	 then it can't be used in an expression.  */
189       lex['!'] = LEX_IS_LINE_COMMENT_START;
190     }
191 #endif
192 
193 #ifdef TC_V850
194   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195 #endif
196 #ifdef DOUBLEBAR_PARALLEL
197   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198 #endif
199 #ifdef TC_D30V
200   /* Must do this is we want VLIW instruction with "->" or "<-".  */
201   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202 #endif
203 
204 #ifdef H_TICK_HEX
205   if (enable_h_tick_hex)
206     {
207       lex['h'] = LEX_IS_H;
208       lex['H'] = LEX_IS_H;
209     }
210 #endif
211 }
212 
213 /* Saved state of the scrubber.  */
214 static int state;
215 static int old_state;
216 static const char *out_string;
217 static char out_buf[20];
218 static int add_newlines;
219 static char *saved_input;
220 static size_t saved_input_len;
221 static char input_buffer[32 * 1024];
222 static const char *mri_state;
223 static char mri_last_ch;
224 
225 /* Data structure for saving the state of app across #include's.  Note that
226    app is called asynchronously to the parsing of the .include's, so our
227    state at the time .include is interpreted is completely unrelated.
228    That's why we have to save it all.  */
229 
230 struct app_save
231 {
232   int          state;
233   int          old_state;
234   const char * out_string;
235   char         out_buf[sizeof (out_buf)];
236   int          add_newlines;
237   char *       saved_input;
238   size_t       saved_input_len;
239 #ifdef TC_M68K
240   int          scrub_m68k_mri;
241 #endif
242   const char * mri_state;
243   char         mri_last_ch;
244 #if defined TC_ARM && defined OBJ_ELF
245   const char * symver_state;
246 #endif
247 #ifdef TC_ARM
248   char last_char;
249 #endif
250 };
251 
252 char *
253 app_push (void)
254 {
255   struct app_save *saved;
256 
257   saved = XNEW (struct app_save);
258   saved->state = state;
259   saved->old_state = old_state;
260   saved->out_string = out_string;
261   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262   saved->add_newlines = add_newlines;
263   if (saved_input == NULL)
264     saved->saved_input = NULL;
265   else
266     {
267       saved->saved_input = XNEWVEC (char, saved_input_len);
268       memcpy (saved->saved_input, saved_input, saved_input_len);
269       saved->saved_input_len = saved_input_len;
270     }
271 #ifdef TC_M68K
272   saved->scrub_m68k_mri = scrub_m68k_mri;
273 #endif
274   saved->mri_state = mri_state;
275   saved->mri_last_ch = mri_last_ch;
276 #if defined TC_ARM && defined OBJ_ELF
277   saved->symver_state = symver_state;
278 #endif
279 #ifdef TC_ARM
280   saved->last_char = last_char;
281 #endif
282 
283   /* do_scrub_begin() is not useful, just wastes time.  */
284 
285   state = 0;
286   saved_input = NULL;
287   add_newlines = 0;
288 
289   return (char *) saved;
290 }
291 
292 void
293 app_pop (char *arg)
294 {
295   struct app_save *saved = (struct app_save *) arg;
296 
297   /* There is no do_scrub_end ().  */
298   state = saved->state;
299   old_state = saved->old_state;
300   out_string = saved->out_string;
301   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302   add_newlines = saved->add_newlines;
303   if (saved->saved_input == NULL)
304     saved_input = NULL;
305   else
306     {
307       gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309       saved_input = input_buffer;
310       saved_input_len = saved->saved_input_len;
311       free (saved->saved_input);
312     }
313 #ifdef TC_M68K
314   scrub_m68k_mri = saved->scrub_m68k_mri;
315 #endif
316   mri_state = saved->mri_state;
317   mri_last_ch = saved->mri_last_ch;
318 #if defined TC_ARM && defined OBJ_ELF
319   symver_state = saved->symver_state;
320 #endif
321 #ifdef TC_ARM
322   last_char = saved->last_char;
323 #endif
324 
325   free (arg);
326 }
327 
328 /* @@ This assumes that \n &c are the same on host and target.  This is not
329    necessarily true.  */
330 
331 static int
332 process_escape (int ch)
333 {
334   switch (ch)
335     {
336     case 'b':
337       return '\b';
338     case 'f':
339       return '\f';
340     case 'n':
341       return '\n';
342     case 'r':
343       return '\r';
344     case 't':
345       return '\t';
346     case '\'':
347       return '\'';
348     case '"':
349       return '\"';
350     default:
351       return ch;
352     }
353 }
354 
355 /* This function is called to process input characters.  The GET
356    parameter is used to retrieve more input characters.  GET should
357    set its parameter to point to a buffer, and return the length of
358    the buffer; it should return 0 at end of file.  The scrubbed output
359    characters are put into the buffer starting at TOSTART; the TOSTART
360    buffer is TOLEN bytes in length.  The function returns the number
361    of scrubbed characters put into TOSTART.  This will be TOLEN unless
362    end of file was seen.  This function is arranged as a state
363    machine, and saves its state so that it may return at any point.
364    This is the way the old code used to work.  */
365 
366 size_t
367 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
368 {
369   char *to = tostart;
370   char *toend = tostart + tolen;
371   char *from;
372   char *fromend;
373   size_t fromlen;
374   int ch, ch2 = 0;
375   /* Character that started the string we're working on.  */
376   static char quotechar;
377 
378   /*State 0: beginning of normal line
379 	  1: After first whitespace on line (flush more white)
380 	  2: After first non-white (opcode) on line (keep 1white)
381 	  3: after second white on line (into operands) (flush white)
382 	  4: after putting out a .linefile, put out digits
383 	  5: parsing a string, then go to old-state
384 	  6: putting out \ escape in a "d string.
385 	  7: no longer used
386 	  8: no longer used
387 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
388 	 10: After seeing whitespace in state 9 (keep white before symchar)
389 	 11: After seeing a symbol character in state 0 (eg a label definition)
390 	 -1: output string in out_string and go to the state in old_state
391 	 -2: flush text until a '*' '/' is seen, then go to state old_state
392 #ifdef TC_V850
393 	 12: After seeing a dash, looking for a second dash as a start
394 	     of comment.
395 #endif
396 #ifdef DOUBLEBAR_PARALLEL
397 	 13: After seeing a vertical bar, looking for a second
398 	     vertical bar as a parallel expression separator.
399 #endif
400 #ifdef TC_PREDICATE_START_CHAR
401 	 14: After seeing a predicate start character at state 0, looking
402 	     for a predicate end character as predicate.
403 	 15: After seeing a predicate start character at state 1, looking
404 	     for a predicate end character as predicate.
405 #endif
406 #ifdef TC_Z80
407 	 16: After seeing an 'a' or an 'A' at the start of a symbol
408 	 17: After seeing an 'f' or an 'F' in state 16
409 #endif
410 	  */
411 
412   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413      constructs like ``.loc 1 20''.  This was turning into ``.loc
414      120''.  States 9 and 10 ensure that a space is never dropped in
415      between characters which could appear in an identifier.  Ian
416      Taylor, ian@cygnus.com.
417 
418      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419      correctly on the PA (and any other target where colons are optional).
420      Jeff Law, law@cs.utah.edu.
421 
422      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423      get squashed into "cmp r1,r2||trap#1", with the all important space
424      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
425 
426   /* This macro gets the next input character.  */
427 
428 #define GET()							\
429   (from < fromend						\
430    ? * (unsigned char *) (from++)				\
431    : (saved_input = NULL,					\
432       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
433       from = input_buffer,					\
434       fromend = from + fromlen,					\
435       (fromlen == 0						\
436        ? EOF							\
437        : * (unsigned char *) (from++))))
438 
439   /* This macro pushes a character back on the input stream.  */
440 
441 #define UNGET(uch) (*--from = (uch))
442 
443   /* This macro puts a character into the output buffer.  If this
444      character fills the output buffer, this macro jumps to the label
445      TOFULL.  We use this rather ugly approach because we need to
446      handle two different termination conditions: EOF on the input
447      stream, and a full output buffer.  It would be simpler if we
448      always read in the entire input stream before processing it, but
449      I don't want to make such a significant change to the assembler's
450      memory usage.  */
451 
452 #define PUT(pch)				\
453   do						\
454     {						\
455       *to++ = (pch);				\
456       if (to >= toend)				\
457 	goto tofull;				\
458     }						\
459   while (0)
460 
461   if (saved_input != NULL)
462     {
463       from = saved_input;
464       fromend = from + saved_input_len;
465     }
466   else
467     {
468       fromlen = (*get) (input_buffer, sizeof input_buffer);
469       if (fromlen == 0)
470 	return 0;
471       from = input_buffer;
472       fromend = from + fromlen;
473     }
474 
475   while (1)
476     {
477       /* The cases in this switch end with continue, in order to
478 	 branch back to the top of this while loop and generate the
479 	 next output character in the appropriate state.  */
480       switch (state)
481 	{
482 	case -1:
483 	  ch = *out_string++;
484 	  if (*out_string == '\0')
485 	    {
486 	      state = old_state;
487 	      old_state = 3;
488 	    }
489 	  PUT (ch);
490 	  continue;
491 
492 	case -2:
493 	  for (;;)
494 	    {
495 	      do
496 		{
497 		  ch = GET ();
498 
499 		  if (ch == EOF)
500 		    {
501 		      as_warn (_("end of file in comment"));
502 		      goto fromeof;
503 		    }
504 
505 		  if (ch == '\n')
506 		    PUT ('\n');
507 		}
508 	      while (ch != '*');
509 
510 	      while ((ch = GET ()) == '*')
511 		;
512 
513 	      if (ch == EOF)
514 		{
515 		  as_warn (_("end of file in comment"));
516 		  goto fromeof;
517 		}
518 
519 	      if (ch == '/')
520 		break;
521 
522 	      UNGET (ch);
523 	    }
524 
525 	  state = old_state;
526 	  UNGET (' ');
527 	  continue;
528 
529 	case 4:
530 	  ch = GET ();
531 	  if (ch == EOF)
532 	    goto fromeof;
533 	  else if (ch >= '0' && ch <= '9')
534 	    PUT (ch);
535 	  else
536 	    {
537 	      while (ch != EOF && IS_WHITESPACE (ch))
538 		ch = GET ();
539 	      if (ch == '"')
540 		{
541 		  quotechar = ch;
542 		  state = 5;
543 		  old_state = 3;
544 		  PUT (ch);
545 		}
546 	      else
547 		{
548 		  while (ch != EOF && ch != '\n')
549 		    ch = GET ();
550 		  state = 0;
551 		  PUT (ch);
552 		}
553 	    }
554 	  continue;
555 
556 	case 5:
557 	  /* We are going to copy everything up to a quote character,
558 	     with special handling for a backslash.  We try to
559 	     optimize the copying in the simple case without using the
560 	     GET and PUT macros.  */
561 	  {
562 	    char *s;
563 	    ptrdiff_t len;
564 
565 	    for (s = from; s < fromend; s++)
566 	      {
567 		ch = *s;
568 		if (ch == '\\'
569 		    || ch == quotechar
570 		    || ch == '\n')
571 		  break;
572 	      }
573 	    len = s - from;
574 	    if (len > toend - to)
575 	      len = toend - to;
576 	    if (len > 0)
577 	      {
578 		memcpy (to, from, len);
579 		to += len;
580 		from += len;
581 		if (to >= toend)
582 		  goto tofull;
583 	      }
584 	  }
585 
586 	  ch = GET ();
587 	  if (ch == EOF)
588 	    {
589 	      /* This buffer is here specifically so
590 		 that the UNGET below will work.  */
591 	      static char one_char_buf[1];
592 
593 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
594 	      state = old_state;
595 	      from = fromend = one_char_buf + 1;
596 	      fromlen = 1;
597 	      UNGET ('\n');
598 	      PUT (quotechar);
599 	    }
600 	  else if (ch == quotechar)
601 	    {
602 	      state = old_state;
603 	      PUT (ch);
604 	    }
605 #ifndef NO_STRING_ESCAPES
606 	  else if (ch == '\\')
607 	    {
608 	      state = 6;
609 	      PUT (ch);
610 	    }
611 #endif
612 	  else if (scrub_m68k_mri && ch == '\n')
613 	    {
614 	      /* Just quietly terminate the string.  This permits lines like
615 		   bne	label	loop if we haven't reach end yet.  */
616 	      state = old_state;
617 	      UNGET (ch);
618 	      PUT ('\'');
619 	    }
620 	  else
621 	    {
622 	      PUT (ch);
623 	    }
624 	  continue;
625 
626 	case 6:
627 	  state = 5;
628 	  ch = GET ();
629 	  switch (ch)
630 	    {
631 	      /* Handle strings broken across lines, by turning '\n' into
632 		 '\\' and 'n'.  */
633 	    case '\n':
634 	      UNGET ('n');
635 	      add_newlines++;
636 	      PUT ('\\');
637 	      continue;
638 
639 	    case EOF:
640 	      as_warn (_("end of file in string; '%c' inserted"), quotechar);
641 	      PUT (quotechar);
642 	      continue;
643 
644 	    case '"':
645 	    case '\\':
646 	    case 'b':
647 	    case 'f':
648 	    case 'n':
649 	    case 'r':
650 	    case 't':
651 	    case 'v':
652 	    case 'x':
653 	    case 'X':
654 	    case '0':
655 	    case '1':
656 	    case '2':
657 	    case '3':
658 	    case '4':
659 	    case '5':
660 	    case '6':
661 	    case '7':
662 	      break;
663 
664 	    default:
665 #ifdef ONLY_STANDARD_ESCAPES
666 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
667 #endif
668 	      break;
669 	    }
670 	  PUT (ch);
671 	  continue;
672 
673 #ifdef DOUBLEBAR_PARALLEL
674 	case 13:
675 	  ch = GET ();
676 	  if (ch != '|')
677 	    abort ();
678 
679 	  /* Reset back to state 1 and pretend that we are parsing a
680 	     line from just after the first white space.  */
681 	  state = 1;
682 	  PUT ('|');
683 #ifdef TC_TIC6X
684 	  /* "||^" is used for SPMASKed instructions.  */
685 	  ch = GET ();
686 	  if (ch == EOF)
687 	    goto fromeof;
688 	  else if (ch == '^')
689 	    PUT ('^');
690 	  else
691 	    UNGET (ch);
692 #endif
693 	  continue;
694 #endif
695 #ifdef TC_Z80
696 	case 16:
697 	  /* We have seen an 'a' at the start of a symbol, look for an 'f'.  */
698 	  ch = GET ();
699 	  if (ch == 'f' || ch == 'F')
700 	    {
701 	      state = 17;
702 	      PUT (ch);
703 	    }
704 	  else
705 	    {
706 	      state = 9;
707 	      break;
708 	    }
709 	  /* Fall through.  */
710 	case 17:
711 	  /* We have seen "af" at the start of a symbol,
712 	     a ' here is a part of that symbol.  */
713 	  ch = GET ();
714 	  state = 9;
715 	  if (ch == '\'')
716 	    /* Change to avoid warning about unclosed string.  */
717 	    PUT ('`');
718 	  else if (ch != EOF)
719 	    UNGET (ch);
720 	  break;
721 #endif
722 	}
723 
724       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
725 
726       /* flushchar: */
727       ch = GET ();
728 
729 #ifdef TC_PREDICATE_START_CHAR
730       if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
731 	{
732 	  state += 14;
733 	  PUT (ch);
734 	  continue;
735 	}
736       else if (state == 14 || state == 15)
737 	{
738 	  if (ch == TC_PREDICATE_END_CHAR)
739 	    {
740 	      state -= 14;
741 	      PUT (ch);
742 	      ch = GET ();
743 	    }
744 	  else
745 	    {
746 	      PUT (ch);
747 	      continue;
748 	    }
749 	}
750 #endif
751 
752     recycle:
753 
754 #if defined TC_ARM && defined OBJ_ELF
755       /* We need to watch out for .symver directives.  See the comment later
756 	 in this function.  */
757       if (symver_state == NULL)
758 	{
759 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
760 	    symver_state = symver_pseudo + 1;
761 	}
762       else
763 	{
764 	  /* We advance to the next state if we find the right
765 	     character.  */
766 	  if (ch != '\0' && (*symver_state == ch))
767 	    ++symver_state;
768 	  else if (*symver_state != '\0')
769 	    /* We did not get the expected character, or we didn't
770 	       get a valid terminating character after seeing the
771 	       entire pseudo-op, so we must go back to the beginning.  */
772 	    symver_state = NULL;
773 	  else
774 	    {
775 	      /* We've read the entire pseudo-op.  If this is the end
776 		 of the line, go back to the beginning.  */
777 	      if (IS_NEWLINE (ch))
778 		symver_state = NULL;
779 	    }
780 	}
781 #endif /* TC_ARM && OBJ_ELF */
782 
783 #ifdef TC_M68K
784       /* We want to have pseudo-ops which control whether we are in
785 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
786 	 the scrubber, that means that we need a special purpose
787 	 recognizer here.  */
788       if (mri_state == NULL)
789 	{
790 	  if ((state == 0 || state == 1)
791 	      && ch == mri_pseudo[0])
792 	    mri_state = mri_pseudo + 1;
793 	}
794       else
795 	{
796 	  /* We advance to the next state if we find the right
797 	     character, or if we need a space character and we get any
798 	     whitespace character, or if we need a '0' and we get a
799 	     '1' (this is so that we only need one state to handle
800 	     ``.mri 0'' and ``.mri 1'').  */
801 	  if (ch != '\0'
802 	      && (*mri_state == ch
803 		  || (*mri_state == ' '
804 		      && lex[ch] == LEX_IS_WHITESPACE)
805 		  || (*mri_state == '0'
806 		      && ch == '1')))
807 	    {
808 	      mri_last_ch = ch;
809 	      ++mri_state;
810 	    }
811 	  else if (*mri_state != '\0'
812 		   || (lex[ch] != LEX_IS_WHITESPACE
813 		       && lex[ch] != LEX_IS_NEWLINE))
814 	    {
815 	      /* We did not get the expected character, or we didn't
816 		 get a valid terminating character after seeing the
817 		 entire pseudo-op, so we must go back to the
818 		 beginning.  */
819 	      mri_state = NULL;
820 	    }
821 	  else
822 	    {
823 	      /* We've read the entire pseudo-op.  mips_last_ch is
824 		 either '0' or '1' indicating whether to enter or
825 		 leave MRI mode.  */
826 	      do_scrub_begin (mri_last_ch == '1');
827 	      mri_state = NULL;
828 
829 	      /* We continue handling the character as usual.  The
830 		 main gas reader must also handle the .mri pseudo-op
831 		 to control expression parsing and the like.  */
832 	    }
833 	}
834 #endif
835 
836       if (ch == EOF)
837 	{
838 	  if (state != 0)
839 	    {
840 	      as_warn (_("end of file not at end of a line; newline inserted"));
841 	      state = 0;
842 	      PUT ('\n');
843 	    }
844 	  goto fromeof;
845 	}
846 
847       switch (lex[ch])
848 	{
849 	case LEX_IS_WHITESPACE:
850 	  do
851 	    {
852 	      ch = GET ();
853 	    }
854 	  while (ch != EOF && IS_WHITESPACE (ch));
855 	  if (ch == EOF)
856 	    goto fromeof;
857 
858 	  if (state == 0)
859 	    {
860 	      /* Preserve a single whitespace character at the
861 		 beginning of a line.  */
862 	      state = 1;
863 	      UNGET (ch);
864 	      PUT (' ');
865 	      break;
866 	    }
867 
868 #ifdef KEEP_WHITE_AROUND_COLON
869 	  if (lex[ch] == LEX_IS_COLON)
870 	    {
871 	      /* Only keep this white if there's no white *after* the
872 		 colon.  */
873 	      ch2 = GET ();
874 	      if (ch2 != EOF)
875 		UNGET (ch2);
876 	      if (!IS_WHITESPACE (ch2))
877 		{
878 		  state = 9;
879 		  UNGET (ch);
880 		  PUT (' ');
881 		  break;
882 		}
883 	    }
884 #endif
885 	  if (IS_COMMENT (ch)
886 	      || ch == '/'
887 	      || IS_LINE_SEPARATOR (ch)
888 	      || IS_PARALLEL_SEPARATOR (ch))
889 	    {
890 	      if (scrub_m68k_mri)
891 		{
892 		  /* In MRI mode, we keep these spaces.  */
893 		  UNGET (ch);
894 		  PUT (' ');
895 		  break;
896 		}
897 	      goto recycle;
898 	    }
899 
900 	  /* If we're in state 2 or 11, we've seen a non-white
901 	     character followed by whitespace.  If the next character
902 	     is ':', this is whitespace after a label name which we
903 	     normally must ignore.  In MRI mode, though, spaces are
904 	     not permitted between the label and the colon.  */
905 	  if ((state == 2 || state == 11)
906 	      && lex[ch] == LEX_IS_COLON
907 	      && ! scrub_m68k_mri)
908 	    {
909 	      state = 1;
910 	      PUT (ch);
911 	      break;
912 	    }
913 
914 	  switch (state)
915 	    {
916 	    case 1:
917 	      /* We can arrive here if we leave a leading whitespace
918 		 character at the beginning of a line.  */
919 	      goto recycle;
920 	    case 2:
921 	      state = 3;
922 	      if (to + 1 < toend)
923 		{
924 		  /* Optimize common case by skipping UNGET/GET.  */
925 		  PUT (' ');	/* Sp after opco */
926 		  goto recycle;
927 		}
928 	      UNGET (ch);
929 	      PUT (' ');
930 	      break;
931 	    case 3:
932 #ifndef TC_KEEP_OPERAND_SPACES
933 	      /* For TI C6X, we keep these spaces as they may separate
934 		 functional unit specifiers from operands.  */
935 	      if (scrub_m68k_mri)
936 #endif
937 		{
938 		  /* In MRI mode, we keep these spaces.  */
939 		  UNGET (ch);
940 		  PUT (' ');
941 		  break;
942 		}
943 	      goto recycle;	/* Sp in operands */
944 	    case 9:
945 	    case 10:
946 #ifndef TC_KEEP_OPERAND_SPACES
947 	      if (scrub_m68k_mri)
948 #endif
949 		{
950 		  /* In MRI mode, we keep these spaces.  */
951 		  state = 3;
952 		  UNGET (ch);
953 		  PUT (' ');
954 		  break;
955 		}
956 	      state = 10;	/* Sp after symbol char */
957 	      goto recycle;
958 	    case 11:
959 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
960 		state = 1;
961 	      else
962 		{
963 		  /* We know that ch is not ':', since we tested that
964 		     case above.  Therefore this is not a label, so it
965 		     must be the opcode, and we've just seen the
966 		     whitespace after it.  */
967 		  state = 3;
968 		}
969 	      UNGET (ch);
970 	      PUT (' ');	/* Sp after label definition.  */
971 	      break;
972 	    default:
973 	      BAD_CASE (state);
974 	    }
975 	  break;
976 
977 	case LEX_IS_TWOCHAR_COMMENT_1ST:
978 	  ch2 = GET ();
979 	  if (ch2 == '*')
980 	    {
981 	      for (;;)
982 		{
983 		  do
984 		    {
985 		      ch2 = GET ();
986 		      if (ch2 != EOF && IS_NEWLINE (ch2))
987 			add_newlines++;
988 		    }
989 		  while (ch2 != EOF && ch2 != '*');
990 
991 		  while (ch2 == '*')
992 		    ch2 = GET ();
993 
994 		  if (ch2 == EOF || ch2 == '/')
995 		    break;
996 
997 		  /* This UNGET will ensure that we count newlines
998 		     correctly.  */
999 		  UNGET (ch2);
1000 		}
1001 
1002 	      if (ch2 == EOF)
1003 		as_warn (_("end of file in multiline comment"));
1004 
1005 	      ch = ' ';
1006 	      goto recycle;
1007 	    }
1008 #ifdef DOUBLESLASH_LINE_COMMENTS
1009 	  else if (ch2 == '/')
1010 	    {
1011 	      do
1012 		{
1013 		  ch = GET ();
1014 		}
1015 	      while (ch != EOF && !IS_NEWLINE (ch));
1016 	      if (ch == EOF)
1017 		as_warn ("end of file in comment; newline inserted");
1018 	      state = 0;
1019 	      PUT ('\n');
1020 	      break;
1021 	    }
1022 #endif
1023 	  else
1024 	    {
1025 	      if (ch2 != EOF)
1026 		UNGET (ch2);
1027 	      if (state == 9 || state == 10)
1028 		state = 3;
1029 	      PUT (ch);
1030 	    }
1031 	  break;
1032 
1033 	case LEX_IS_STRINGQUOTE:
1034 	  quotechar = ch;
1035 	  if (state == 10)
1036 	    {
1037 	      /* Preserve the whitespace in foo "bar".  */
1038 	      UNGET (ch);
1039 	      state = 3;
1040 	      PUT (' ');
1041 
1042 	      /* PUT didn't jump out.  We could just break, but we
1043 		 know what will happen, so optimize a bit.  */
1044 	      ch = GET ();
1045 	      old_state = 3;
1046 	    }
1047 	  else if (state == 9)
1048 	    old_state = 3;
1049 	  else
1050 	    old_state = state;
1051 	  state = 5;
1052 	  PUT (ch);
1053 	  break;
1054 
1055 	case LEX_IS_ONECHAR_QUOTE:
1056 #ifdef H_TICK_HEX
1057 	  if (state == 9 && enable_h_tick_hex)
1058 	    {
1059 	      char c;
1060 
1061 	      c = GET ();
1062 	      as_warn ("'%c found after symbol", c);
1063 	      UNGET (c);
1064 	    }
1065 #endif
1066 	  if (state == 10)
1067 	    {
1068 	      /* Preserve the whitespace in foo 'b'.  */
1069 	      UNGET (ch);
1070 	      state = 3;
1071 	      PUT (' ');
1072 	      break;
1073 	    }
1074 	  ch = GET ();
1075 	  if (ch == EOF)
1076 	    {
1077 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
1078 	      ch = 0;
1079 	    }
1080 	  if (ch == '\\')
1081 	    {
1082 	      ch = GET ();
1083 	      if (ch == EOF)
1084 		{
1085 		  as_warn (_("end of file in escape character"));
1086 		  ch = '\\';
1087 		}
1088 	      else
1089 		ch = process_escape (ch);
1090 	    }
1091 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
1092 
1093 	  /* None of these 'x constants for us.  We want 'x'.  */
1094 	  if ((ch = GET ()) != '\'')
1095 	    {
1096 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1097 	      as_warn (_("missing close quote; (assumed)"));
1098 #else
1099 	      if (ch != EOF)
1100 		UNGET (ch);
1101 #endif
1102 	    }
1103 	  if (strlen (out_buf) == 1)
1104 	    {
1105 	      PUT (out_buf[0]);
1106 	      break;
1107 	    }
1108 	  if (state == 9)
1109 	    old_state = 3;
1110 	  else
1111 	    old_state = state;
1112 	  state = -1;
1113 	  out_string = out_buf;
1114 	  PUT (*out_string++);
1115 	  break;
1116 
1117 	case LEX_IS_COLON:
1118 #ifdef KEEP_WHITE_AROUND_COLON
1119 	  state = 9;
1120 #else
1121 	  if (state == 9 || state == 10)
1122 	    state = 3;
1123 	  else if (state != 3)
1124 	    state = 1;
1125 #endif
1126 	  PUT (ch);
1127 	  break;
1128 
1129 	case LEX_IS_NEWLINE:
1130 	  /* Roll out a bunch of newlines from inside comments, etc.  */
1131 	  if (add_newlines)
1132 	    {
1133 	      --add_newlines;
1134 	      UNGET (ch);
1135 	    }
1136 	  /* Fall through.  */
1137 
1138 	case LEX_IS_LINE_SEPARATOR:
1139 	  state = 0;
1140 	  PUT (ch);
1141 	  break;
1142 
1143 	case LEX_IS_PARALLEL_SEPARATOR:
1144 	  state = 1;
1145 	  PUT (ch);
1146 	  break;
1147 
1148 #ifdef TC_V850
1149 	case LEX_IS_DOUBLEDASH_1ST:
1150 	  ch2 = GET ();
1151 	  if (ch2 != '-')
1152 	    {
1153 	      if (ch2 != EOF)
1154 		UNGET (ch2);
1155 	      goto de_fault;
1156 	    }
1157 	  /* Read and skip to end of line.  */
1158 	  do
1159 	    {
1160 	      ch = GET ();
1161 	    }
1162 	  while (ch != EOF && ch != '\n');
1163 
1164 	  if (ch == EOF)
1165 	    as_warn (_("end of file in comment; newline inserted"));
1166 
1167 	  state = 0;
1168 	  PUT ('\n');
1169 	  break;
1170 #endif
1171 #ifdef DOUBLEBAR_PARALLEL
1172 	case LEX_IS_DOUBLEBAR_1ST:
1173 	  ch2 = GET ();
1174 	  if (ch2 != EOF)
1175 	    UNGET (ch2);
1176 	  if (ch2 != '|')
1177 	    goto de_fault;
1178 
1179 	  /* Handle '||' in two states as invoking PUT twice might
1180 	     result in the first one jumping out of this loop.  We'd
1181 	     then lose track of the state and one '|' char.  */
1182 	  state = 13;
1183 	  PUT ('|');
1184 	  break;
1185 #endif
1186 	case LEX_IS_LINE_COMMENT_START:
1187 	  /* FIXME-someday: The two character comment stuff was badly
1188 	     thought out.  On i386, we want '/' as line comment start
1189 	     AND we want C style comments.  hence this hack.  The
1190 	     whole lexical process should be reworked.  xoxorich.  */
1191 	  if (ch == '/')
1192 	    {
1193 	      ch2 = GET ();
1194 	      if (ch2 == '*')
1195 		{
1196 		  old_state = 3;
1197 		  state = -2;
1198 		  break;
1199 		}
1200 	      else if (ch2 != EOF)
1201 		{
1202 		  UNGET (ch2);
1203 		}
1204 	    }
1205 
1206 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
1207 	    {
1208 	      int startch;
1209 
1210 	      startch = ch;
1211 
1212 	      do
1213 		{
1214 		  ch = GET ();
1215 		}
1216 	      while (ch != EOF && IS_WHITESPACE (ch));
1217 
1218 	      if (ch == EOF)
1219 		{
1220 		  as_warn (_("end of file in comment; newline inserted"));
1221 		  PUT ('\n');
1222 		  break;
1223 		}
1224 
1225 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1226 		{
1227 		  /* Not a cpp line.  */
1228 		  while (ch != EOF && !IS_NEWLINE (ch))
1229 		    ch = GET ();
1230 		  if (ch == EOF)
1231 		    {
1232 		      as_warn (_("end of file in comment; newline inserted"));
1233 		      PUT ('\n');
1234 		    }
1235 		  else /* IS_NEWLINE (ch) */
1236 		    {
1237 		      /* To process non-zero add_newlines.  */
1238 		      UNGET (ch);
1239 		    }
1240 		  state = 0;
1241 		  break;
1242 		}
1243 	      /* Looks like `# 123 "filename"' from cpp.  */
1244 	      UNGET (ch);
1245 	      old_state = 4;
1246 	      state = -1;
1247 	      if (scrub_m68k_mri)
1248 		out_string = "\tlinefile ";
1249 	      else
1250 		out_string = "\t.linefile ";
1251 	      PUT (*out_string++);
1252 	      break;
1253 	    }
1254 
1255 #ifdef TC_D10V
1256 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1257 	     Trap is the only short insn that has a first operand that is
1258 	     neither register nor label.
1259 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1260 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1261 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1262 	     only character in line_comment_chars for d10v, hence we
1263 	     can recognize it as such.  */
1264 	  /* An alternative approach would be to reset the state to 1 when
1265 	     we see '||', '<'- or '->', but that seems to be overkill.  */
1266 	  if (state == 10)
1267 	    PUT (' ');
1268 #endif
1269 	  /* We have a line comment character which is not at the
1270 	     start of a line.  If this is also a normal comment
1271 	     character, fall through.  Otherwise treat it as a default
1272 	     character.  */
1273 	  if (strchr (tc_comment_chars, ch) == NULL
1274 	      && (! scrub_m68k_mri
1275 		  || (ch != '!' && ch != '*')))
1276 	    goto de_fault;
1277 	  if (scrub_m68k_mri
1278 	      && (ch == '!' || ch == '*' || ch == '#')
1279 	      && state != 1
1280 	      && state != 10)
1281 	    goto de_fault;
1282 	  /* Fall through.  */
1283 	case LEX_IS_COMMENT_START:
1284 #if defined TC_ARM && defined OBJ_ELF
1285 	  /* On the ARM, `@' is the comment character.
1286 	     Unfortunately this is also a special character in ELF .symver
1287 	     directives (and .type, though we deal with those another way).
1288 	     So we check if this line is such a directive, and treat
1289 	     the character as default if so.  This is a hack.  */
1290 	  if ((symver_state != NULL) && (*symver_state == 0))
1291 	    goto de_fault;
1292 #endif
1293 
1294 #ifdef TC_ARM
1295 	  /* For the ARM, care is needed not to damage occurrences of \@
1296 	     by stripping the @ onwards.  Yuck.  */
1297 	  if ((to > tostart ? to[-1] : last_char) == '\\')
1298 	    /* Do not treat the @ as a start-of-comment.  */
1299 	    goto de_fault;
1300 #endif
1301 
1302 #ifdef WARN_COMMENTS
1303 	  if (!found_comment)
1304 	    found_comment_file = as_where (&found_comment);
1305 #endif
1306 	  do
1307 	    {
1308 	      ch = GET ();
1309 	    }
1310 	  while (ch != EOF && !IS_NEWLINE (ch));
1311 	  if (ch == EOF)
1312 	    as_warn (_("end of file in comment; newline inserted"));
1313 	  state = 0;
1314 	  PUT ('\n');
1315 	  break;
1316 
1317 #ifdef H_TICK_HEX
1318 	case LEX_IS_H:
1319 	  /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1320 	     the H' with 0x to make them gas-style hex characters.  */
1321 	  if (enable_h_tick_hex)
1322 	    {
1323 	      char quot;
1324 
1325 	      quot = GET ();
1326 	      if (quot == '\'')
1327 		{
1328 		  UNGET ('x');
1329 		  ch = '0';
1330 		}
1331 	      else
1332 		UNGET (quot);
1333 	    }
1334 #endif
1335 	  /* Fall through.  */
1336 
1337 	case LEX_IS_SYMBOL_COMPONENT:
1338 	  if (state == 10)
1339 	    {
1340 	      /* This is a symbol character following another symbol
1341 		 character, with whitespace in between.  We skipped
1342 		 the whitespace earlier, so output it now.  */
1343 	      UNGET (ch);
1344 	      state = 3;
1345 	      PUT (' ');
1346 	      break;
1347 	    }
1348 
1349 #ifdef TC_Z80
1350 	  /* "af'" is a symbol containing '\''.  */
1351 	  if (state == 3 && (ch == 'a' || ch == 'A'))
1352 	    {
1353 	      state = 16;
1354 	      PUT (ch);
1355 	      ch = GET ();
1356 	      if (ch == 'f' || ch == 'F')
1357 		{
1358 		  state = 17;
1359 		  PUT (ch);
1360 		  break;
1361 		}
1362 	      else
1363 		{
1364 		  state = 9;
1365 		  if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1366 		    {
1367 		      if (ch != EOF)
1368 			UNGET (ch);
1369 		      break;
1370 		    }
1371 		}
1372 	    }
1373 #endif
1374 	  if (state == 3)
1375 	    state = 9;
1376 
1377 	  /* This is a common case.  Quickly copy CH and all the
1378 	     following symbol component or normal characters.  */
1379 	  if (to + 1 < toend
1380 	      && mri_state == NULL
1381 #if defined TC_ARM && defined OBJ_ELF
1382 	      && symver_state == NULL
1383 #endif
1384 	      )
1385 	    {
1386 	      char *s;
1387 	      ptrdiff_t len;
1388 
1389 	      for (s = from; s < fromend; s++)
1390 		{
1391 		  int type;
1392 
1393 		  ch2 = *(unsigned char *) s;
1394 		  type = lex[ch2];
1395 		  if (type != 0
1396 		      && type != LEX_IS_SYMBOL_COMPONENT)
1397 		    break;
1398 		}
1399 
1400 	      if (s > from)
1401 		/* Handle the last character normally, for
1402 		   simplicity.  */
1403 		--s;
1404 
1405 	      len = s - from;
1406 
1407 	      if (len > (toend - to) - 1)
1408 		len = (toend - to) - 1;
1409 
1410 	      if (len > 0)
1411 		{
1412 		  PUT (ch);
1413 		  memcpy (to, from, len);
1414 		  to += len;
1415 		  from += len;
1416 		  if (to >= toend)
1417 		    goto tofull;
1418 		  ch = GET ();
1419 		}
1420 	    }
1421 
1422 	  /* Fall through.  */
1423 	default:
1424 	de_fault:
1425 	  /* Some relatively `normal' character.  */
1426 	  if (state == 0)
1427 	    {
1428 	      state = 11;	/* Now seeing label definition.  */
1429 	    }
1430 	  else if (state == 1)
1431 	    {
1432 	      state = 2;	/* Ditto.  */
1433 	    }
1434 	  else if (state == 9)
1435 	    {
1436 	      if (!IS_SYMBOL_COMPONENT (ch))
1437 		state = 3;
1438 	    }
1439 	  else if (state == 10)
1440 	    {
1441 	      if (ch == '\\')
1442 		{
1443 		  /* Special handling for backslash: a backslash may
1444 		     be the beginning of a formal parameter (of a
1445 		     macro) following another symbol character, with
1446 		     whitespace in between.  If that is the case, we
1447 		     output a space before the parameter.  Strictly
1448 		     speaking, correct handling depends upon what the
1449 		     macro parameter expands into; if the parameter
1450 		     expands into something which does not start with
1451 		     an operand character, then we don't want to keep
1452 		     the space.  We don't have enough information to
1453 		     make the right choice, so here we are making the
1454 		     choice which is more likely to be correct.  */
1455 		  if (to + 1 >= toend)
1456 		    {
1457 		      /* If we're near the end of the buffer, save the
1458 		         character for the next time round.  Otherwise
1459 		         we'll lose our state.  */
1460 		      UNGET (ch);
1461 		      goto tofull;
1462 		    }
1463 		  *to++ = ' ';
1464 		}
1465 
1466 	      state = 3;
1467 	    }
1468 	  PUT (ch);
1469 	  break;
1470 	}
1471     }
1472 
1473   /*NOTREACHED*/
1474 
1475  fromeof:
1476   /* We have reached the end of the input.  */
1477 #ifdef TC_ARM
1478   if (to > tostart)
1479     last_char = to[-1];
1480 #endif
1481   return to - tostart;
1482 
1483  tofull:
1484   /* The output buffer is full.  Save any input we have not yet
1485      processed.  */
1486   if (fromend > from)
1487     {
1488       saved_input = from;
1489       saved_input_len = fromend - from;
1490     }
1491   else
1492     saved_input = NULL;
1493 
1494 #ifdef TC_ARM
1495   if (to > tostart)
1496     last_char = to[-1];
1497 #endif
1498   return to - tostart;
1499 }
1500