xref: /openbsd-src/gnu/usr.bin/binutils/gas/app.c (revision 007c2a4539b8b8aaa95c5e73e77620090abe113b)
12159047fSniklas /* This is the Assembler Pre-Processor
2b55d4692Sfgsch    Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3c074d1c9Sdrahn    1999, 2000, 2002, 2003
4191aa565Sniklas    Free Software Foundation, Inc.
52159047fSniklas 
62159047fSniklas    This file is part of GAS, the GNU Assembler.
72159047fSniklas 
82159047fSniklas    GAS is free software; you can redistribute it and/or modify
92159047fSniklas    it under the terms of the GNU General Public License as published by
102159047fSniklas    the Free Software Foundation; either version 2, or (at your option)
112159047fSniklas    any later version.
122159047fSniklas 
132159047fSniklas    GAS is distributed in the hope that it will be useful,
142159047fSniklas    but WITHOUT ANY WARRANTY; without even the implied warranty of
152159047fSniklas    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
162159047fSniklas    GNU General Public License for more details.
172159047fSniklas 
182159047fSniklas    You should have received a copy of the GNU General Public License
19b305b0f1Sespie    along with GAS; see the file COPYING.  If not, write to the Free
20b305b0f1Sespie    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21b305b0f1Sespie    02111-1307, USA.  */
222159047fSniklas 
23c074d1c9Sdrahn /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90.  */
242159047fSniklas /* App, the assembler pre-processor.  This pre-processor strips out excess
252159047fSniklas    spaces, turns single-quoted characters into a decimal constant, and turns
262159047fSniklas    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
272159047fSniklas    pair.  This needs better error-handling.  */
282159047fSniklas 
292159047fSniklas #include <stdio.h>
30c074d1c9Sdrahn #include "as.h"			/* For BAD_CASE() only.  */
312159047fSniklas 
322159047fSniklas #if (__STDC__ != 1)
332159047fSniklas #ifndef const
342159047fSniklas #define const  /* empty */
352159047fSniklas #endif
362159047fSniklas #endif
372159047fSniklas 
38b305b0f1Sespie #ifdef TC_M68K
39191aa565Sniklas /* Whether we are scrubbing in m68k MRI mode.  This is different from
40191aa565Sniklas    flag_m68k_mri, because the two flags will be affected by the .mri
41191aa565Sniklas    pseudo-op at different times.  */
42191aa565Sniklas static int scrub_m68k_mri;
43191aa565Sniklas 
44191aa565Sniklas /* The pseudo-op which switches in and out of MRI mode.  See the
45191aa565Sniklas    comment in do_scrub_chars.  */
46191aa565Sniklas static const char mri_pseudo[] = ".mri 0";
47c074d1c9Sdrahn #else
48c074d1c9Sdrahn #define scrub_m68k_mri 0
49c074d1c9Sdrahn #endif
50191aa565Sniklas 
51b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
52b305b0f1Sespie /* The pseudo-op for which we need to special-case `@' characters.
53b305b0f1Sespie    See the comment in do_scrub_chars.  */
54b305b0f1Sespie static const char   symver_pseudo[] = ".symver";
55b305b0f1Sespie static const char * symver_state;
56b305b0f1Sespie #endif
57b305b0f1Sespie 
582159047fSniklas static char lex[256];
592159047fSniklas static const char symbol_chars[] =
602159047fSniklas "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
612159047fSniklas 
622159047fSniklas #define LEX_IS_SYMBOL_COMPONENT		1
632159047fSniklas #define LEX_IS_WHITESPACE		2
642159047fSniklas #define LEX_IS_LINE_SEPARATOR		3
652159047fSniklas #define LEX_IS_COMMENT_START		4
662159047fSniklas #define LEX_IS_LINE_COMMENT_START	5
672159047fSniklas #define	LEX_IS_TWOCHAR_COMMENT_1ST	6
682159047fSniklas #define	LEX_IS_STRINGQUOTE		8
692159047fSniklas #define	LEX_IS_COLON			9
702159047fSniklas #define	LEX_IS_NEWLINE			10
712159047fSniklas #define	LEX_IS_ONECHAR_QUOTE		11
72b305b0f1Sespie #ifdef TC_V850
73b305b0f1Sespie #define LEX_IS_DOUBLEDASH_1ST		12
74b305b0f1Sespie #endif
75b305b0f1Sespie #ifdef TC_M32R
76b305b0f1Sespie #define DOUBLEBAR_PARALLEL
77b305b0f1Sespie #endif
78b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
79b305b0f1Sespie #define LEX_IS_DOUBLEBAR_1ST		13
80b305b0f1Sespie #endif
81c074d1c9Sdrahn #define LEX_IS_PARALLEL_SEPARATOR	14
822159047fSniklas #define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
832159047fSniklas #define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
842159047fSniklas #define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
85c074d1c9Sdrahn #define IS_PARALLEL_SEPARATOR(c)	(lex[c] == LEX_IS_PARALLEL_SEPARATOR)
862159047fSniklas #define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
872159047fSniklas #define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
882159047fSniklas #define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)
892159047fSniklas 
90*007c2a45Smiod static int process_escape (int);
912159047fSniklas 
922159047fSniklas /* FIXME-soon: The entire lexer/parser thingy should be
932159047fSniklas    built statically at compile time rather than dynamically
942159047fSniklas    each and every time the assembler is run.  xoxorich.  */
952159047fSniklas 
962159047fSniklas void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)97*007c2a45Smiod do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
982159047fSniklas {
992159047fSniklas   const char *p;
100b305b0f1Sespie   int c;
101191aa565Sniklas 
1022159047fSniklas   lex[' '] = LEX_IS_WHITESPACE;
1032159047fSniklas   lex['\t'] = LEX_IS_WHITESPACE;
104b305b0f1Sespie   lex['\r'] = LEX_IS_WHITESPACE;
1052159047fSniklas   lex['\n'] = LEX_IS_NEWLINE;
1062159047fSniklas   lex[':'] = LEX_IS_COLON;
1072159047fSniklas 
108b305b0f1Sespie #ifdef TC_M68K
109b305b0f1Sespie   scrub_m68k_mri = m68k_mri;
110b305b0f1Sespie 
111191aa565Sniklas   if (! m68k_mri)
112b305b0f1Sespie #endif
1132159047fSniklas     {
1142159047fSniklas       lex['"'] = LEX_IS_STRINGQUOTE;
1152159047fSniklas 
116b305b0f1Sespie #if ! defined (TC_HPPA) && ! defined (TC_I370)
117c074d1c9Sdrahn       /* I370 uses single-quotes to delimit integer, float constants.  */
1182159047fSniklas       lex['\''] = LEX_IS_ONECHAR_QUOTE;
1192159047fSniklas #endif
1202159047fSniklas 
1212159047fSniklas #ifdef SINGLE_QUOTE_STRINGS
1222159047fSniklas       lex['\''] = LEX_IS_STRINGQUOTE;
1232159047fSniklas #endif
1242159047fSniklas     }
1252159047fSniklas 
1262159047fSniklas   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
1272159047fSniklas      in state 5 of do_scrub_chars must be changed.  */
1282159047fSniklas 
1292159047fSniklas   /* Note that these override the previous defaults, e.g. if ';' is a
1302159047fSniklas      comment char, then it isn't a line separator.  */
1312159047fSniklas   for (p = symbol_chars; *p; ++p)
1322159047fSniklas     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
1332159047fSniklas 
134b305b0f1Sespie   for (c = 128; c < 256; ++c)
135b305b0f1Sespie     lex[c] = LEX_IS_SYMBOL_COMPONENT;
136b305b0f1Sespie 
137b305b0f1Sespie #ifdef tc_symbol_chars
138b305b0f1Sespie   /* This macro permits the processor to specify all characters which
139b305b0f1Sespie      may appears in an operand.  This will prevent the scrubber from
140b305b0f1Sespie      discarding meaningful whitespace in certain cases.  The i386
141b305b0f1Sespie      backend uses this to support prefixes, which can confuse the
142b305b0f1Sespie      scrubber as to whether it is parsing operands or opcodes.  */
143b305b0f1Sespie   for (p = tc_symbol_chars; *p; ++p)
144b305b0f1Sespie     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
145b305b0f1Sespie #endif
146b305b0f1Sespie 
147191aa565Sniklas   /* The m68k backend wants to be able to change comment_chars.  */
148191aa565Sniklas #ifndef tc_comment_chars
149191aa565Sniklas #define tc_comment_chars comment_chars
150191aa565Sniklas #endif
151191aa565Sniklas   for (p = tc_comment_chars; *p; p++)
1522159047fSniklas     lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
1532159047fSniklas 
1542159047fSniklas   for (p = line_comment_chars; *p; p++)
1552159047fSniklas     lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
1562159047fSniklas 
1572159047fSniklas   for (p = line_separator_chars; *p; p++)
1582159047fSniklas     lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
159c074d1c9Sdrahn 
160c074d1c9Sdrahn #ifdef tc_parallel_separator_chars
161c074d1c9Sdrahn   /* This macro permits the processor to specify all characters which
162c074d1c9Sdrahn      separate parallel insns on the same line.  */
163c074d1c9Sdrahn   for (p = tc_parallel_separator_chars; *p; p++)
164c074d1c9Sdrahn     lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
165c074d1c9Sdrahn #endif
1662159047fSniklas 
167b305b0f1Sespie   /* Only allow slash-star comments if slash is not in use.
168b305b0f1Sespie      FIXME: This isn't right.  We should always permit them.  */
1692159047fSniklas   if (lex['/'] == 0)
1702159047fSniklas     lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
1712159047fSniklas 
172b305b0f1Sespie #ifdef TC_M68K
173191aa565Sniklas   if (m68k_mri)
1742159047fSniklas     {
1752159047fSniklas       lex['\''] = LEX_IS_STRINGQUOTE;
1762159047fSniklas       lex[';'] = LEX_IS_COMMENT_START;
1772159047fSniklas       lex['*'] = LEX_IS_LINE_COMMENT_START;
1782159047fSniklas       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
1792159047fSniklas 	 then it can't be used in an expression.  */
1802159047fSniklas       lex['!'] = LEX_IS_LINE_COMMENT_START;
1812159047fSniklas     }
182b305b0f1Sespie #endif
183b305b0f1Sespie 
184b305b0f1Sespie #ifdef TC_V850
185b305b0f1Sespie   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
186b305b0f1Sespie #endif
187b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
188b305b0f1Sespie   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
189b305b0f1Sespie #endif
190b305b0f1Sespie #ifdef TC_D30V
191c074d1c9Sdrahn   /* Must do this is we want VLIW instruction with "->" or "<-".  */
192b305b0f1Sespie   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
193b305b0f1Sespie #endif
194c074d1c9Sdrahn }
1952159047fSniklas 
196c074d1c9Sdrahn /* Saved state of the scrubber.  */
1972159047fSniklas static int state;
1982159047fSniklas static int old_state;
1992159047fSniklas static char *out_string;
2002159047fSniklas static char out_buf[20];
2012159047fSniklas static int add_newlines;
2022159047fSniklas static char *saved_input;
2032159047fSniklas static int saved_input_len;
204b305b0f1Sespie static char input_buffer[32 * 1024];
205191aa565Sniklas static const char *mri_state;
206191aa565Sniklas static char mri_last_ch;
2072159047fSniklas 
2082159047fSniklas /* Data structure for saving the state of app across #include's.  Note that
2092159047fSniklas    app is called asynchronously to the parsing of the .include's, so our
2102159047fSniklas    state at the time .include is interpreted is completely unrelated.
2112159047fSniklas    That's why we have to save it all.  */
2122159047fSniklas 
213c074d1c9Sdrahn struct app_save
214c074d1c9Sdrahn {
2152159047fSniklas   int          state;
2162159047fSniklas   int          old_state;
2172159047fSniklas   char *       out_string;
2182159047fSniklas   char         out_buf[sizeof (out_buf)];
2192159047fSniklas   int          add_newlines;
2202159047fSniklas   char *       saved_input;
2212159047fSniklas   int          saved_input_len;
222b305b0f1Sespie #ifdef TC_M68K
223191aa565Sniklas   int          scrub_m68k_mri;
224b305b0f1Sespie #endif
225191aa565Sniklas   const char * mri_state;
226191aa565Sniklas   char         mri_last_ch;
227b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
228b305b0f1Sespie   const char * symver_state;
229b305b0f1Sespie #endif
2302159047fSniklas };
2312159047fSniklas 
2322159047fSniklas char *
app_push(void)233*007c2a45Smiod app_push (void)
2342159047fSniklas {
2352159047fSniklas   register struct app_save *saved;
2362159047fSniklas 
2372159047fSniklas   saved = (struct app_save *) xmalloc (sizeof (*saved));
2382159047fSniklas   saved->state = state;
2392159047fSniklas   saved->old_state = old_state;
2402159047fSniklas   saved->out_string = out_string;
2412159047fSniklas   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
2422159047fSniklas   saved->add_newlines = add_newlines;
243b305b0f1Sespie   if (saved_input == NULL)
244b305b0f1Sespie     saved->saved_input = NULL;
245b305b0f1Sespie   else
246b305b0f1Sespie     {
247b305b0f1Sespie       saved->saved_input = xmalloc (saved_input_len);
248b305b0f1Sespie       memcpy (saved->saved_input, saved_input, saved_input_len);
2492159047fSniklas       saved->saved_input_len = saved_input_len;
250b305b0f1Sespie     }
251b305b0f1Sespie #ifdef TC_M68K
252191aa565Sniklas   saved->scrub_m68k_mri = scrub_m68k_mri;
253b305b0f1Sespie #endif
254191aa565Sniklas   saved->mri_state = mri_state;
255191aa565Sniklas   saved->mri_last_ch = mri_last_ch;
256b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
257b305b0f1Sespie   saved->symver_state = symver_state;
258b305b0f1Sespie #endif
2592159047fSniklas 
2602159047fSniklas   /* do_scrub_begin() is not useful, just wastes time.  */
2612159047fSniklas 
2622159047fSniklas   state = 0;
2632159047fSniklas   saved_input = NULL;
2642159047fSniklas 
2652159047fSniklas   return (char *) saved;
2662159047fSniklas }
2672159047fSniklas 
2682159047fSniklas void
app_pop(char * arg)269*007c2a45Smiod app_pop (char *arg)
2702159047fSniklas {
2712159047fSniklas   register struct app_save *saved = (struct app_save *) arg;
2722159047fSniklas 
2732159047fSniklas   /* There is no do_scrub_end ().  */
2742159047fSniklas   state = saved->state;
2752159047fSniklas   old_state = saved->old_state;
2762159047fSniklas   out_string = saved->out_string;
2772159047fSniklas   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
2782159047fSniklas   add_newlines = saved->add_newlines;
279b305b0f1Sespie   if (saved->saved_input == NULL)
280b305b0f1Sespie     saved_input = NULL;
281b305b0f1Sespie   else
282b305b0f1Sespie     {
283b305b0f1Sespie       assert (saved->saved_input_len <= (int) (sizeof input_buffer));
284b305b0f1Sespie       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
285b305b0f1Sespie       saved_input = input_buffer;
2862159047fSniklas       saved_input_len = saved->saved_input_len;
287b305b0f1Sespie       free (saved->saved_input);
288b305b0f1Sespie     }
289b305b0f1Sespie #ifdef TC_M68K
290191aa565Sniklas   scrub_m68k_mri = saved->scrub_m68k_mri;
291b305b0f1Sespie #endif
292191aa565Sniklas   mri_state = saved->mri_state;
293191aa565Sniklas   mri_last_ch = saved->mri_last_ch;
294b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
295b305b0f1Sespie   symver_state = saved->symver_state;
296b305b0f1Sespie #endif
2972159047fSniklas 
2982159047fSniklas   free (arg);
299c074d1c9Sdrahn }
3002159047fSniklas 
3012159047fSniklas /* @@ This assumes that \n &c are the same on host and target.  This is not
3022159047fSniklas    necessarily true.  */
303c074d1c9Sdrahn 
3042159047fSniklas static int
process_escape(int ch)305*007c2a45Smiod process_escape (int ch)
3062159047fSniklas {
3072159047fSniklas   switch (ch)
3082159047fSniklas     {
3092159047fSniklas     case 'b':
3102159047fSniklas       return '\b';
3112159047fSniklas     case 'f':
3122159047fSniklas       return '\f';
3132159047fSniklas     case 'n':
3142159047fSniklas       return '\n';
3152159047fSniklas     case 'r':
3162159047fSniklas       return '\r';
3172159047fSniklas     case 't':
3182159047fSniklas       return '\t';
3192159047fSniklas     case '\'':
3202159047fSniklas       return '\'';
3212159047fSniklas     case '"':
3222159047fSniklas       return '\"';
3232159047fSniklas     default:
3242159047fSniklas       return ch;
3252159047fSniklas     }
3262159047fSniklas }
3272159047fSniklas 
3282159047fSniklas /* This function is called to process input characters.  The GET
3292159047fSniklas    parameter is used to retrieve more input characters.  GET should
3302159047fSniklas    set its parameter to point to a buffer, and return the length of
3312159047fSniklas    the buffer; it should return 0 at end of file.  The scrubbed output
3322159047fSniklas    characters are put into the buffer starting at TOSTART; the TOSTART
3332159047fSniklas    buffer is TOLEN bytes in length.  The function returns the number
3342159047fSniklas    of scrubbed characters put into TOSTART.  This will be TOLEN unless
3352159047fSniklas    end of file was seen.  This function is arranged as a state
3362159047fSniklas    machine, and saves its state so that it may return at any point.
3372159047fSniklas    This is the way the old code used to work.  */
3382159047fSniklas 
3392159047fSniklas int
do_scrub_chars(int (* get)(char *,int),char * tostart,int tolen)340*007c2a45Smiod do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
3412159047fSniklas {
3422159047fSniklas   char *to = tostart;
3432159047fSniklas   char *toend = tostart + tolen;
3442159047fSniklas   char *from;
3452159047fSniklas   char *fromend;
3462159047fSniklas   int fromlen;
3472159047fSniklas   register int ch, ch2 = 0;
3482159047fSniklas 
3492159047fSniklas   /*State 0: beginning of normal line
3502159047fSniklas 	  1: After first whitespace on line (flush more white)
3512159047fSniklas 	  2: After first non-white (opcode) on line (keep 1white)
3522159047fSniklas 	  3: after second white on line (into operands) (flush white)
3532159047fSniklas 	  4: after putting out a .line, put out digits
3542159047fSniklas 	  5: parsing a string, then go to old-state
3552159047fSniklas 	  6: putting out \ escape in a "d string.
3562159047fSniklas 	  7: After putting out a .appfile, put out string.
3572159047fSniklas 	  8: After putting out a .appfile string, flush until newline.
3582159047fSniklas 	  9: After seeing symbol char in state 3 (keep 1white after symchar)
3592159047fSniklas 	 10: After seeing whitespace in state 9 (keep white before symchar)
3602159047fSniklas 	 11: After seeing a symbol character in state 0 (eg a label definition)
3612159047fSniklas 	 -1: output string in out_string and go to the state in old_state
3622159047fSniklas 	 -2: flush text until a '*' '/' is seen, then go to state old_state
363b305b0f1Sespie #ifdef TC_V850
364c074d1c9Sdrahn 	 12: After seeing a dash, looking for a second dash as a start
365c074d1c9Sdrahn 	     of comment.
366b305b0f1Sespie #endif
367b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
368c074d1c9Sdrahn 	 13: After seeing a vertical bar, looking for a second
369c074d1c9Sdrahn 	     vertical bar as a parallel expression separator.
370b305b0f1Sespie #endif
371*007c2a45Smiod #ifdef TC_IA64
372*007c2a45Smiod 	 14: After seeing a `(' at state 0, looking for a `)' as
373*007c2a45Smiod 	     predicate.
374*007c2a45Smiod 	 15: After seeing a `(' at state 1, looking for a `)' as
375*007c2a45Smiod 	     predicate.
376*007c2a45Smiod #endif
3772159047fSniklas 	  */
3782159047fSniklas 
3792159047fSniklas   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
3802159047fSniklas      constructs like ``.loc 1 20''.  This was turning into ``.loc
3812159047fSniklas      120''.  States 9 and 10 ensure that a space is never dropped in
382c074d1c9Sdrahn      between characters which could appear in an identifier.  Ian
3832159047fSniklas      Taylor, ian@cygnus.com.
3842159047fSniklas 
3852159047fSniklas      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
3862159047fSniklas      correctly on the PA (and any other target where colons are optional).
387b305b0f1Sespie      Jeff Law, law@cs.utah.edu.
388b305b0f1Sespie 
389b305b0f1Sespie      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
390b305b0f1Sespie      get squashed into "cmp r1,r2||trap#1", with the all important space
391b305b0f1Sespie      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
3922159047fSniklas 
3932159047fSniklas   /* This macro gets the next input character.  */
3942159047fSniklas 
3952159047fSniklas #define GET()							\
3962159047fSniklas   (from < fromend						\
397b305b0f1Sespie    ? * (unsigned char *) (from++)				\
398b305b0f1Sespie    : (saved_input = NULL,					\
399b305b0f1Sespie       fromlen = (*get) (input_buffer, sizeof input_buffer),	\
400b305b0f1Sespie       from = input_buffer,					\
4012159047fSniklas       fromend = from + fromlen,					\
4022159047fSniklas       (fromlen == 0						\
4032159047fSniklas        ? EOF							\
404b305b0f1Sespie        : * (unsigned char *) (from++))))
4052159047fSniklas 
4062159047fSniklas   /* This macro pushes a character back on the input stream.  */
4072159047fSniklas 
4082159047fSniklas #define UNGET(uch) (*--from = (uch))
4092159047fSniklas 
4102159047fSniklas   /* This macro puts a character into the output buffer.  If this
4112159047fSniklas      character fills the output buffer, this macro jumps to the label
4122159047fSniklas      TOFULL.  We use this rather ugly approach because we need to
4132159047fSniklas      handle two different termination conditions: EOF on the input
4142159047fSniklas      stream, and a full output buffer.  It would be simpler if we
4152159047fSniklas      always read in the entire input stream before processing it, but
4162159047fSniklas      I don't want to make such a significant change to the assembler's
4172159047fSniklas      memory usage.  */
4182159047fSniklas 
4192159047fSniklas #define PUT(pch)				\
4202159047fSniklas   do						\
4212159047fSniklas     {						\
4222159047fSniklas       *to++ = (pch);				\
4232159047fSniklas       if (to >= toend)				\
4242159047fSniklas 	goto tofull;				\
4252159047fSniklas     }						\
4262159047fSniklas   while (0)
4272159047fSniklas 
4282159047fSniklas   if (saved_input != NULL)
4292159047fSniklas     {
4302159047fSniklas       from = saved_input;
4312159047fSniklas       fromend = from + saved_input_len;
4322159047fSniklas     }
4332159047fSniklas   else
4342159047fSniklas     {
435b305b0f1Sespie       fromlen = (*get) (input_buffer, sizeof input_buffer);
4362159047fSniklas       if (fromlen == 0)
4372159047fSniklas 	return 0;
438b305b0f1Sespie       from = input_buffer;
4392159047fSniklas       fromend = from + fromlen;
4402159047fSniklas     }
4412159047fSniklas 
4422159047fSniklas   while (1)
4432159047fSniklas     {
4442159047fSniklas       /* The cases in this switch end with continue, in order to
4452159047fSniklas 	 branch back to the top of this while loop and generate the
4462159047fSniklas 	 next output character in the appropriate state.  */
4472159047fSniklas       switch (state)
4482159047fSniklas 	{
4492159047fSniklas 	case -1:
4502159047fSniklas 	  ch = *out_string++;
4512159047fSniklas 	  if (*out_string == '\0')
4522159047fSniklas 	    {
4532159047fSniklas 	      state = old_state;
4542159047fSniklas 	      old_state = 3;
4552159047fSniklas 	    }
4562159047fSniklas 	  PUT (ch);
4572159047fSniklas 	  continue;
4582159047fSniklas 
4592159047fSniklas 	case -2:
4602159047fSniklas 	  for (;;)
4612159047fSniklas 	    {
4622159047fSniklas 	      do
4632159047fSniklas 		{
4642159047fSniklas 		  ch = GET ();
4652159047fSniklas 
4662159047fSniklas 		  if (ch == EOF)
4672159047fSniklas 		    {
468b305b0f1Sespie 		      as_warn (_("end of file in comment"));
4692159047fSniklas 		      goto fromeof;
4702159047fSniklas 		    }
4712159047fSniklas 
4722159047fSniklas 		  if (ch == '\n')
4732159047fSniklas 		    PUT ('\n');
4742159047fSniklas 		}
4752159047fSniklas 	      while (ch != '*');
4762159047fSniklas 
4772159047fSniklas 	      while ((ch = GET ()) == '*')
4782159047fSniklas 		;
4792159047fSniklas 
4802159047fSniklas 	      if (ch == EOF)
4812159047fSniklas 		{
482b305b0f1Sespie 		  as_warn (_("end of file in comment"));
4832159047fSniklas 		  goto fromeof;
4842159047fSniklas 		}
4852159047fSniklas 
4862159047fSniklas 	      if (ch == '/')
4872159047fSniklas 		break;
4882159047fSniklas 
4892159047fSniklas 	      UNGET (ch);
4902159047fSniklas 	    }
4912159047fSniklas 
4922159047fSniklas 	  state = old_state;
493b305b0f1Sespie 	  UNGET (' ');
4942159047fSniklas 	  continue;
4952159047fSniklas 
4962159047fSniklas 	case 4:
4972159047fSniklas 	  ch = GET ();
4982159047fSniklas 	  if (ch == EOF)
4992159047fSniklas 	    goto fromeof;
5002159047fSniklas 	  else if (ch >= '0' && ch <= '9')
5012159047fSniklas 	    PUT (ch);
5022159047fSniklas 	  else
5032159047fSniklas 	    {
5042159047fSniklas 	      while (ch != EOF && IS_WHITESPACE (ch))
5052159047fSniklas 		ch = GET ();
5062159047fSniklas 	      if (ch == '"')
5072159047fSniklas 		{
5082159047fSniklas 		  UNGET (ch);
509191aa565Sniklas 		  if (scrub_m68k_mri)
510191aa565Sniklas 		    out_string = "\n\tappfile ";
511191aa565Sniklas 		  else
5122159047fSniklas 		    out_string = "\n\t.appfile ";
5132159047fSniklas 		  old_state = 7;
5142159047fSniklas 		  state = -1;
5152159047fSniklas 		  PUT (*out_string++);
5162159047fSniklas 		}
5172159047fSniklas 	      else
5182159047fSniklas 		{
5192159047fSniklas 		  while (ch != EOF && ch != '\n')
5202159047fSniklas 		    ch = GET ();
5212159047fSniklas 		  state = 0;
5222159047fSniklas 		  PUT (ch);
5232159047fSniklas 		}
5242159047fSniklas 	    }
5252159047fSniklas 	  continue;
5262159047fSniklas 
5272159047fSniklas 	case 5:
5282159047fSniklas 	  /* We are going to copy everything up to a quote character,
5292159047fSniklas 	     with special handling for a backslash.  We try to
5302159047fSniklas 	     optimize the copying in the simple case without using the
5312159047fSniklas 	     GET and PUT macros.  */
5322159047fSniklas 	  {
5332159047fSniklas 	    char *s;
5342159047fSniklas 	    int len;
5352159047fSniklas 
5362159047fSniklas 	    for (s = from; s < fromend; s++)
5372159047fSniklas 	      {
5382159047fSniklas 		ch = *s;
5392159047fSniklas 		/* This condition must be changed if the type of any
5402159047fSniklas 		   other character can be LEX_IS_STRINGQUOTE.  */
5412159047fSniklas 		if (ch == '\\'
5422159047fSniklas 		    || ch == '"'
5432159047fSniklas 		    || ch == '\''
5442159047fSniklas 		    || ch == '\n')
5452159047fSniklas 		  break;
5462159047fSniklas 	      }
5472159047fSniklas 	    len = s - from;
5482159047fSniklas 	    if (len > toend - to)
5492159047fSniklas 	      len = toend - to;
5502159047fSniklas 	    if (len > 0)
5512159047fSniklas 	      {
5522159047fSniklas 		memcpy (to, from, len);
5532159047fSniklas 		to += len;
5542159047fSniklas 		from += len;
5552159047fSniklas 	      }
5562159047fSniklas 	  }
5572159047fSniklas 
5582159047fSniklas 	  ch = GET ();
5592159047fSniklas 	  if (ch == EOF)
5602159047fSniklas 	    {
561c074d1c9Sdrahn 	      as_warn (_("end of file in string; inserted '\"'"));
5622159047fSniklas 	      state = old_state;
5632159047fSniklas 	      UNGET ('\n');
5642159047fSniklas 	      PUT ('"');
5652159047fSniklas 	    }
5662159047fSniklas 	  else if (lex[ch] == LEX_IS_STRINGQUOTE)
5672159047fSniklas 	    {
5682159047fSniklas 	      state = old_state;
5692159047fSniklas 	      PUT (ch);
5702159047fSniklas 	    }
5712159047fSniklas #ifndef NO_STRING_ESCAPES
5722159047fSniklas 	  else if (ch == '\\')
5732159047fSniklas 	    {
5742159047fSniklas 	      state = 6;
5752159047fSniklas 	      PUT (ch);
5762159047fSniklas 	    }
5772159047fSniklas #endif
578191aa565Sniklas 	  else if (scrub_m68k_mri && ch == '\n')
5792159047fSniklas 	    {
5802159047fSniklas 	      /* Just quietly terminate the string.  This permits lines like
581c074d1c9Sdrahn 		   bne	label	loop if we haven't reach end yet.  */
5822159047fSniklas 	      state = old_state;
5832159047fSniklas 	      UNGET (ch);
5842159047fSniklas 	      PUT ('\'');
5852159047fSniklas 	    }
5862159047fSniklas 	  else
5872159047fSniklas 	    {
5882159047fSniklas 	      PUT (ch);
5892159047fSniklas 	    }
5902159047fSniklas 	  continue;
5912159047fSniklas 
5922159047fSniklas 	case 6:
5932159047fSniklas 	  state = 5;
5942159047fSniklas 	  ch = GET ();
5952159047fSniklas 	  switch (ch)
5962159047fSniklas 	    {
5972159047fSniklas 	      /* Handle strings broken across lines, by turning '\n' into
5982159047fSniklas 		 '\\' and 'n'.  */
5992159047fSniklas 	    case '\n':
6002159047fSniklas 	      UNGET ('n');
6012159047fSniklas 	      add_newlines++;
6022159047fSniklas 	      PUT ('\\');
6032159047fSniklas 	      continue;
6042159047fSniklas 
605c074d1c9Sdrahn 	    case EOF:
606c074d1c9Sdrahn 	      as_warn (_("end of file in string; '\"' inserted"));
607c074d1c9Sdrahn 	      PUT ('"');
608c074d1c9Sdrahn 	      continue;
609c074d1c9Sdrahn 
6102159047fSniklas 	    case '"':
6112159047fSniklas 	    case '\\':
6122159047fSniklas 	    case 'b':
6132159047fSniklas 	    case 'f':
6142159047fSniklas 	    case 'n':
6152159047fSniklas 	    case 'r':
6162159047fSniklas 	    case 't':
6172159047fSniklas 	    case 'v':
6182159047fSniklas 	    case 'x':
6192159047fSniklas 	    case 'X':
6202159047fSniklas 	    case '0':
6212159047fSniklas 	    case '1':
6222159047fSniklas 	    case '2':
6232159047fSniklas 	    case '3':
6242159047fSniklas 	    case '4':
6252159047fSniklas 	    case '5':
6262159047fSniklas 	    case '6':
6272159047fSniklas 	    case '7':
6282159047fSniklas 	      break;
6292159047fSniklas 
630c074d1c9Sdrahn 	    default:
631c074d1c9Sdrahn #ifdef ONLY_STANDARD_ESCAPES
632c074d1c9Sdrahn 	      as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
633c074d1c9Sdrahn #endif
634c074d1c9Sdrahn 	      break;
6352159047fSniklas 	    }
6362159047fSniklas 	  PUT (ch);
6372159047fSniklas 	  continue;
6382159047fSniklas 
6392159047fSniklas 	case 7:
6402159047fSniklas 	  ch = GET ();
6412159047fSniklas 	  state = 5;
6422159047fSniklas 	  old_state = 8;
6432159047fSniklas 	  if (ch == EOF)
6442159047fSniklas 	    goto fromeof;
6452159047fSniklas 	  PUT (ch);
6462159047fSniklas 	  continue;
6472159047fSniklas 
6482159047fSniklas 	case 8:
6492159047fSniklas 	  do
6502159047fSniklas 	    ch = GET ();
6512159047fSniklas 	  while (ch != '\n' && ch != EOF);
6522159047fSniklas 	  if (ch == EOF)
6532159047fSniklas 	    goto fromeof;
6542159047fSniklas 	  state = 0;
6552159047fSniklas 	  PUT (ch);
6562159047fSniklas 	  continue;
657c074d1c9Sdrahn 
658c074d1c9Sdrahn #ifdef DOUBLEBAR_PARALLEL
659c074d1c9Sdrahn 	case 13:
660c074d1c9Sdrahn 	  ch = GET ();
661c074d1c9Sdrahn 	  if (ch != '|')
662c074d1c9Sdrahn 	    abort ();
663c074d1c9Sdrahn 
664c074d1c9Sdrahn 	  /* Reset back to state 1 and pretend that we are parsing a
665c074d1c9Sdrahn 	     line from just after the first white space.  */
666c074d1c9Sdrahn 	  state = 1;
667c074d1c9Sdrahn 	  PUT ('|');
668c074d1c9Sdrahn 	  continue;
669c074d1c9Sdrahn #endif
6702159047fSniklas 	}
6712159047fSniklas 
672c074d1c9Sdrahn       /* OK, we are somewhere in states 0 through 4 or 9 through 11.  */
6732159047fSniklas 
6742159047fSniklas       /* flushchar: */
6752159047fSniklas       ch = GET ();
676191aa565Sniklas 
677*007c2a45Smiod #ifdef TC_IA64
678*007c2a45Smiod       if (ch == '(' && (state == 0 || state == 1))
679*007c2a45Smiod 	{
680*007c2a45Smiod 	  state += 14;
681*007c2a45Smiod 	  PUT (ch);
682*007c2a45Smiod 	  continue;
683*007c2a45Smiod 	}
684*007c2a45Smiod       else if (state == 14 || state == 15)
685*007c2a45Smiod 	{
686*007c2a45Smiod 	  if (ch == ')')
687*007c2a45Smiod 	    {
688*007c2a45Smiod 	      state -= 14;
689*007c2a45Smiod 	      PUT (ch);
690*007c2a45Smiod 	      ch = GET ();
691*007c2a45Smiod 	    }
692*007c2a45Smiod 	  else
693*007c2a45Smiod 	    {
694*007c2a45Smiod 	      PUT (ch);
695*007c2a45Smiod 	      continue;
696*007c2a45Smiod 	    }
697*007c2a45Smiod 	}
698*007c2a45Smiod #endif
699*007c2a45Smiod 
7002159047fSniklas     recycle:
701191aa565Sniklas 
702b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
703b305b0f1Sespie       /* We need to watch out for .symver directives.  See the comment later
704b305b0f1Sespie 	 in this function.  */
705b305b0f1Sespie       if (symver_state == NULL)
706b305b0f1Sespie 	{
707b305b0f1Sespie 	  if ((state == 0 || state == 1) && ch == symver_pseudo[0])
708b305b0f1Sespie 	    symver_state = symver_pseudo + 1;
709b305b0f1Sespie 	}
710b305b0f1Sespie       else
711b305b0f1Sespie 	{
712b305b0f1Sespie 	  /* We advance to the next state if we find the right
713b305b0f1Sespie 	     character.  */
714b305b0f1Sespie 	  if (ch != '\0' && (*symver_state == ch))
715b305b0f1Sespie 	    ++symver_state;
716b305b0f1Sespie 	  else if (*symver_state != '\0')
717b305b0f1Sespie 	    /* We did not get the expected character, or we didn't
718b305b0f1Sespie 	       get a valid terminating character after seeing the
719b305b0f1Sespie 	       entire pseudo-op, so we must go back to the beginning.  */
720b305b0f1Sespie 	    symver_state = NULL;
721b305b0f1Sespie 	  else
722b305b0f1Sespie 	    {
723b305b0f1Sespie 	      /* We've read the entire pseudo-op.  If this is the end
724b305b0f1Sespie 		 of the line, go back to the beginning.  */
725b305b0f1Sespie 	      if (IS_NEWLINE (ch))
726b305b0f1Sespie 		symver_state = NULL;
727b305b0f1Sespie 	    }
728b305b0f1Sespie 	}
729b305b0f1Sespie #endif /* TC_ARM && OBJ_ELF */
730b305b0f1Sespie 
731191aa565Sniklas #ifdef TC_M68K
732191aa565Sniklas       /* We want to have pseudo-ops which control whether we are in
733191aa565Sniklas 	 MRI mode or not.  Unfortunately, since m68k MRI mode affects
734191aa565Sniklas 	 the scrubber, that means that we need a special purpose
735191aa565Sniklas 	 recognizer here.  */
736191aa565Sniklas       if (mri_state == NULL)
737191aa565Sniklas 	{
738191aa565Sniklas 	  if ((state == 0 || state == 1)
739191aa565Sniklas 	      && ch == mri_pseudo[0])
740191aa565Sniklas 	    mri_state = mri_pseudo + 1;
741191aa565Sniklas 	}
742191aa565Sniklas       else
743191aa565Sniklas 	{
744191aa565Sniklas 	  /* We advance to the next state if we find the right
745191aa565Sniklas 	     character, or if we need a space character and we get any
746191aa565Sniklas 	     whitespace character, or if we need a '0' and we get a
747191aa565Sniklas 	     '1' (this is so that we only need one state to handle
748191aa565Sniklas 	     ``.mri 0'' and ``.mri 1'').  */
749191aa565Sniklas 	  if (ch != '\0'
750191aa565Sniklas 	      && (*mri_state == ch
751191aa565Sniklas 		  || (*mri_state == ' '
752191aa565Sniklas 		      && lex[ch] == LEX_IS_WHITESPACE)
753191aa565Sniklas 		  || (*mri_state == '0'
754191aa565Sniklas 		      && ch == '1')))
755191aa565Sniklas 	    {
756191aa565Sniklas 	      mri_last_ch = ch;
757191aa565Sniklas 	      ++mri_state;
758191aa565Sniklas 	    }
759191aa565Sniklas 	  else if (*mri_state != '\0'
760191aa565Sniklas 		   || (lex[ch] != LEX_IS_WHITESPACE
761191aa565Sniklas 		       && lex[ch] != LEX_IS_NEWLINE))
762191aa565Sniklas 	    {
763191aa565Sniklas 	      /* We did not get the expected character, or we didn't
764191aa565Sniklas 		 get a valid terminating character after seeing the
765191aa565Sniklas 		 entire pseudo-op, so we must go back to the
766191aa565Sniklas 		 beginning.  */
767191aa565Sniklas 	      mri_state = NULL;
768191aa565Sniklas 	    }
769191aa565Sniklas 	  else
770191aa565Sniklas 	    {
771191aa565Sniklas 	      /* We've read the entire pseudo-op.  mips_last_ch is
772191aa565Sniklas 		 either '0' or '1' indicating whether to enter or
773191aa565Sniklas 		 leave MRI mode.  */
774191aa565Sniklas 	      do_scrub_begin (mri_last_ch == '1');
775b305b0f1Sespie 	      mri_state = NULL;
776191aa565Sniklas 
777191aa565Sniklas 	      /* We continue handling the character as usual.  The
778191aa565Sniklas 		 main gas reader must also handle the .mri pseudo-op
779191aa565Sniklas 		 to control expression parsing and the like.  */
780191aa565Sniklas 	    }
781191aa565Sniklas 	}
782191aa565Sniklas #endif
783191aa565Sniklas 
7842159047fSniklas       if (ch == EOF)
7852159047fSniklas 	{
7862159047fSniklas 	  if (state != 0)
7872159047fSniklas 	    {
788b305b0f1Sespie 	      as_warn (_("end of file not at end of a line; newline inserted"));
7892159047fSniklas 	      state = 0;
7902159047fSniklas 	      PUT ('\n');
7912159047fSniklas 	    }
7922159047fSniklas 	  goto fromeof;
7932159047fSniklas 	}
7942159047fSniklas 
7952159047fSniklas       switch (lex[ch])
7962159047fSniklas 	{
7972159047fSniklas 	case LEX_IS_WHITESPACE:
7982159047fSniklas 	  do
7992159047fSniklas 	    {
8002159047fSniklas 	      ch = GET ();
8012159047fSniklas 	    }
8022159047fSniklas 	  while (ch != EOF && IS_WHITESPACE (ch));
8032159047fSniklas 	  if (ch == EOF)
8042159047fSniklas 	    goto fromeof;
8052159047fSniklas 
8062159047fSniklas 	  if (state == 0)
8072159047fSniklas 	    {
8082159047fSniklas 	      /* Preserve a single whitespace character at the
8092159047fSniklas 		 beginning of a line.  */
8102159047fSniklas 	      state = 1;
8112159047fSniklas 	      UNGET (ch);
8122159047fSniklas 	      PUT (' ');
8132159047fSniklas 	      break;
8142159047fSniklas 	    }
8152159047fSniklas 
816b305b0f1Sespie #ifdef KEEP_WHITE_AROUND_COLON
817b305b0f1Sespie 	  if (lex[ch] == LEX_IS_COLON)
818b305b0f1Sespie 	    {
819b55d4692Sfgsch 	      /* Only keep this white if there's no white *after* the
820b55d4692Sfgsch 		 colon.  */
821b305b0f1Sespie 	      ch2 = GET ();
822b305b0f1Sespie 	      UNGET (ch2);
823b305b0f1Sespie 	      if (!IS_WHITESPACE (ch2))
824b305b0f1Sespie 		{
825b305b0f1Sespie 		  state = 9;
826b305b0f1Sespie 		  UNGET (ch);
827b305b0f1Sespie 		  PUT (' ');
828b305b0f1Sespie 		  break;
829b305b0f1Sespie 		}
830b305b0f1Sespie 	    }
831b305b0f1Sespie #endif
8322159047fSniklas 	  if (IS_COMMENT (ch)
8332159047fSniklas 	      || ch == '/'
834c074d1c9Sdrahn 	      || IS_LINE_SEPARATOR (ch)
835c074d1c9Sdrahn 	      || IS_PARALLEL_SEPARATOR (ch))
8362159047fSniklas 	    {
837191aa565Sniklas 	      if (scrub_m68k_mri)
8382159047fSniklas 		{
8392159047fSniklas 		  /* In MRI mode, we keep these spaces.  */
8402159047fSniklas 		  UNGET (ch);
8412159047fSniklas 		  PUT (' ');
8422159047fSniklas 		  break;
8432159047fSniklas 		}
8442159047fSniklas 	      goto recycle;
8452159047fSniklas 	    }
8462159047fSniklas 
8472159047fSniklas 	  /* If we're in state 2 or 11, we've seen a non-white
8482159047fSniklas 	     character followed by whitespace.  If the next character
8492159047fSniklas 	     is ':', this is whitespace after a label name which we
8502159047fSniklas 	     normally must ignore.  In MRI mode, though, spaces are
8512159047fSniklas 	     not permitted between the label and the colon.  */
8522159047fSniklas 	  if ((state == 2 || state == 11)
8532159047fSniklas 	      && lex[ch] == LEX_IS_COLON
854191aa565Sniklas 	      && ! scrub_m68k_mri)
8552159047fSniklas 	    {
8562159047fSniklas 	      state = 1;
8572159047fSniklas 	      PUT (ch);
8582159047fSniklas 	      break;
8592159047fSniklas 	    }
8602159047fSniklas 
8612159047fSniklas 	  switch (state)
8622159047fSniklas 	    {
8632159047fSniklas 	    case 0:
8642159047fSniklas 	      state++;
8652159047fSniklas 	      goto recycle;	/* Punted leading sp */
8662159047fSniklas 	    case 1:
8672159047fSniklas 	      /* We can arrive here if we leave a leading whitespace
8682159047fSniklas 		 character at the beginning of a line.  */
8692159047fSniklas 	      goto recycle;
8702159047fSniklas 	    case 2:
8712159047fSniklas 	      state = 3;
8722159047fSniklas 	      if (to + 1 < toend)
8732159047fSniklas 		{
8742159047fSniklas 		  /* Optimize common case by skipping UNGET/GET.  */
8752159047fSniklas 		  PUT (' ');	/* Sp after opco */
8762159047fSniklas 		  goto recycle;
8772159047fSniklas 		}
8782159047fSniklas 	      UNGET (ch);
8792159047fSniklas 	      PUT (' ');
8802159047fSniklas 	      break;
8812159047fSniklas 	    case 3:
882191aa565Sniklas 	      if (scrub_m68k_mri)
8832159047fSniklas 		{
8842159047fSniklas 		  /* In MRI mode, we keep these spaces.  */
8852159047fSniklas 		  UNGET (ch);
8862159047fSniklas 		  PUT (' ');
8872159047fSniklas 		  break;
8882159047fSniklas 		}
8892159047fSniklas 	      goto recycle;	/* Sp in operands */
8902159047fSniklas 	    case 9:
8912159047fSniklas 	    case 10:
892191aa565Sniklas 	      if (scrub_m68k_mri)
8932159047fSniklas 		{
8942159047fSniklas 		  /* In MRI mode, we keep these spaces.  */
8952159047fSniklas 		  state = 3;
8962159047fSniklas 		  UNGET (ch);
8972159047fSniklas 		  PUT (' ');
8982159047fSniklas 		  break;
8992159047fSniklas 		}
9002159047fSniklas 	      state = 10;	/* Sp after symbol char */
9012159047fSniklas 	      goto recycle;
9022159047fSniklas 	    case 11:
903b305b0f1Sespie 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
9042159047fSniklas 		state = 1;
905191aa565Sniklas 	      else
906191aa565Sniklas 		{
907191aa565Sniklas 		  /* We know that ch is not ':', since we tested that
908191aa565Sniklas 		     case above.  Therefore this is not a label, so it
909191aa565Sniklas 		     must be the opcode, and we've just seen the
910191aa565Sniklas 		     whitespace after it.  */
911191aa565Sniklas 		  state = 3;
912191aa565Sniklas 		}
9132159047fSniklas 	      UNGET (ch);
9142159047fSniklas 	      PUT (' ');	/* Sp after label definition.  */
9152159047fSniklas 	      break;
9162159047fSniklas 	    default:
9172159047fSniklas 	      BAD_CASE (state);
9182159047fSniklas 	    }
9192159047fSniklas 	  break;
9202159047fSniklas 
9212159047fSniklas 	case LEX_IS_TWOCHAR_COMMENT_1ST:
9222159047fSniklas 	  ch2 = GET ();
923b305b0f1Sespie 	  if (ch2 == '*')
9242159047fSniklas 	    {
9252159047fSniklas 	      for (;;)
9262159047fSniklas 		{
9272159047fSniklas 		  do
9282159047fSniklas 		    {
9292159047fSniklas 		      ch2 = GET ();
9302159047fSniklas 		      if (ch2 != EOF && IS_NEWLINE (ch2))
9312159047fSniklas 			add_newlines++;
9322159047fSniklas 		    }
933b305b0f1Sespie 		  while (ch2 != EOF && ch2 != '*');
9342159047fSniklas 
935b305b0f1Sespie 		  while (ch2 == '*')
9362159047fSniklas 		    ch2 = GET ();
9372159047fSniklas 
938b305b0f1Sespie 		  if (ch2 == EOF || ch2 == '/')
9392159047fSniklas 		    break;
940b305b0f1Sespie 
941b305b0f1Sespie 		  /* This UNGET will ensure that we count newlines
942b305b0f1Sespie 		     correctly.  */
943191aa565Sniklas 		  UNGET (ch2);
9442159047fSniklas 		}
945b305b0f1Sespie 
9462159047fSniklas 	      if (ch2 == EOF)
947b305b0f1Sespie 		as_warn (_("end of file in multiline comment"));
9482159047fSniklas 
9492159047fSniklas 	      ch = ' ';
9502159047fSniklas 	      goto recycle;
9512159047fSniklas 	    }
952b55d4692Sfgsch #ifdef DOUBLESLASH_LINE_COMMENTS
953b55d4692Sfgsch 	  else if (ch2 == '/')
954b55d4692Sfgsch 	    {
955b55d4692Sfgsch 	      do
956b55d4692Sfgsch 		{
957b55d4692Sfgsch 		  ch = GET ();
958b55d4692Sfgsch 		}
959b55d4692Sfgsch 	      while (ch != EOF && !IS_NEWLINE (ch));
960b55d4692Sfgsch 	      if (ch == EOF)
961b55d4692Sfgsch 		as_warn ("end of file in comment; newline inserted");
962b55d4692Sfgsch 	      state = 0;
963b55d4692Sfgsch 	      PUT ('\n');
964b55d4692Sfgsch 	      break;
965b55d4692Sfgsch 	    }
966b55d4692Sfgsch #endif
9672159047fSniklas 	  else
9682159047fSniklas 	    {
9692159047fSniklas 	      if (ch2 != EOF)
9702159047fSniklas 		UNGET (ch2);
9712159047fSniklas 	      if (state == 9 || state == 10)
9722159047fSniklas 		state = 3;
9732159047fSniklas 	      PUT (ch);
9742159047fSniklas 	    }
9752159047fSniklas 	  break;
9762159047fSniklas 
9772159047fSniklas 	case LEX_IS_STRINGQUOTE:
9782159047fSniklas 	  if (state == 10)
9792159047fSniklas 	    {
980c074d1c9Sdrahn 	      /* Preserve the whitespace in foo "bar".  */
9812159047fSniklas 	      UNGET (ch);
9822159047fSniklas 	      state = 3;
9832159047fSniklas 	      PUT (' ');
9842159047fSniklas 
9852159047fSniklas 	      /* PUT didn't jump out.  We could just break, but we
9862159047fSniklas 		 know what will happen, so optimize a bit.  */
9872159047fSniklas 	      ch = GET ();
9882159047fSniklas 	      old_state = 3;
9892159047fSniklas 	    }
9902159047fSniklas 	  else if (state == 9)
9912159047fSniklas 	    old_state = 3;
9922159047fSniklas 	  else
9932159047fSniklas 	    old_state = state;
9942159047fSniklas 	  state = 5;
9952159047fSniklas 	  PUT (ch);
9962159047fSniklas 	  break;
9972159047fSniklas 
9982159047fSniklas #ifndef IEEE_STYLE
9992159047fSniklas 	case LEX_IS_ONECHAR_QUOTE:
10002159047fSniklas 	  if (state == 10)
10012159047fSniklas 	    {
1002c074d1c9Sdrahn 	      /* Preserve the whitespace in foo 'b'.  */
10032159047fSniklas 	      UNGET (ch);
10042159047fSniklas 	      state = 3;
10052159047fSniklas 	      PUT (' ');
10062159047fSniklas 	      break;
10072159047fSniklas 	    }
10082159047fSniklas 	  ch = GET ();
10092159047fSniklas 	  if (ch == EOF)
10102159047fSniklas 	    {
1011b305b0f1Sespie 	      as_warn (_("end of file after a one-character quote; \\0 inserted"));
10122159047fSniklas 	      ch = 0;
10132159047fSniklas 	    }
10142159047fSniklas 	  if (ch == '\\')
10152159047fSniklas 	    {
10162159047fSniklas 	      ch = GET ();
10172159047fSniklas 	      if (ch == EOF)
10182159047fSniklas 		{
1019b305b0f1Sespie 		  as_warn (_("end of file in escape character"));
10202159047fSniklas 		  ch = '\\';
10212159047fSniklas 		}
10222159047fSniklas 	      else
10232159047fSniklas 		ch = process_escape (ch);
10242159047fSniklas 	    }
10252159047fSniklas 	  sprintf (out_buf, "%d", (int) (unsigned char) ch);
10262159047fSniklas 
10272159047fSniklas 	  /* None of these 'x constants for us.  We want 'x'.  */
10282159047fSniklas 	  if ((ch = GET ()) != '\'')
10292159047fSniklas 	    {
10302159047fSniklas #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1031c074d1c9Sdrahn 	      as_warn (_("missing close quote; (assumed)"));
10322159047fSniklas #else
10332159047fSniklas 	      if (ch != EOF)
10342159047fSniklas 		UNGET (ch);
10352159047fSniklas #endif
10362159047fSniklas 	    }
10372159047fSniklas 	  if (strlen (out_buf) == 1)
10382159047fSniklas 	    {
10392159047fSniklas 	      PUT (out_buf[0]);
10402159047fSniklas 	      break;
10412159047fSniklas 	    }
10422159047fSniklas 	  if (state == 9)
10432159047fSniklas 	    old_state = 3;
10442159047fSniklas 	  else
10452159047fSniklas 	    old_state = state;
10462159047fSniklas 	  state = -1;
10472159047fSniklas 	  out_string = out_buf;
10482159047fSniklas 	  PUT (*out_string++);
10492159047fSniklas 	  break;
10502159047fSniklas #endif
10512159047fSniklas 
10522159047fSniklas 	case LEX_IS_COLON:
1053b305b0f1Sespie #ifdef KEEP_WHITE_AROUND_COLON
1054b305b0f1Sespie 	  state = 9;
1055b305b0f1Sespie #else
10562159047fSniklas 	  if (state == 9 || state == 10)
10572159047fSniklas 	    state = 3;
10582159047fSniklas 	  else if (state != 3)
10592159047fSniklas 	    state = 1;
1060b305b0f1Sespie #endif
10612159047fSniklas 	  PUT (ch);
10622159047fSniklas 	  break;
10632159047fSniklas 
10642159047fSniklas 	case LEX_IS_NEWLINE:
10652159047fSniklas 	  /* Roll out a bunch of newlines from inside comments, etc.  */
10662159047fSniklas 	  if (add_newlines)
10672159047fSniklas 	    {
10682159047fSniklas 	      --add_newlines;
10692159047fSniklas 	      UNGET (ch);
10702159047fSniklas 	    }
1071b55d4692Sfgsch 	  /* Fall through.  */
10722159047fSniklas 
10732159047fSniklas 	case LEX_IS_LINE_SEPARATOR:
10742159047fSniklas 	  state = 0;
10752159047fSniklas 	  PUT (ch);
10762159047fSniklas 	  break;
10772159047fSniklas 
1078c074d1c9Sdrahn 	case LEX_IS_PARALLEL_SEPARATOR:
1079c074d1c9Sdrahn 	  state = 1;
1080c074d1c9Sdrahn 	  PUT (ch);
1081c074d1c9Sdrahn 	  break;
1082c074d1c9Sdrahn 
1083b305b0f1Sespie #ifdef TC_V850
1084b305b0f1Sespie 	case LEX_IS_DOUBLEDASH_1ST:
1085b305b0f1Sespie 	  ch2 = GET ();
1086b305b0f1Sespie 	  if (ch2 != '-')
1087b305b0f1Sespie 	    {
1088b305b0f1Sespie 	      UNGET (ch2);
1089b305b0f1Sespie 	      goto de_fault;
1090b305b0f1Sespie 	    }
1091b55d4692Sfgsch 	  /* Read and skip to end of line.  */
1092b305b0f1Sespie 	  do
1093b305b0f1Sespie 	    {
1094b305b0f1Sespie 	      ch = GET ();
1095b305b0f1Sespie 	    }
1096b305b0f1Sespie 	  while (ch != EOF && ch != '\n');
1097c074d1c9Sdrahn 
1098b305b0f1Sespie 	  if (ch == EOF)
1099b305b0f1Sespie 	    as_warn (_("end of file in comment; newline inserted"));
1100c074d1c9Sdrahn 
1101b305b0f1Sespie 	  state = 0;
1102b305b0f1Sespie 	  PUT ('\n');
1103b305b0f1Sespie 	  break;
1104b305b0f1Sespie #endif
1105b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
1106b305b0f1Sespie 	case LEX_IS_DOUBLEBAR_1ST:
1107b305b0f1Sespie 	  ch2 = GET ();
1108b305b0f1Sespie 	  UNGET (ch2);
1109c074d1c9Sdrahn 	  if (ch2 != '|')
1110b305b0f1Sespie 	    goto de_fault;
1111c074d1c9Sdrahn 
1112c074d1c9Sdrahn 	  /* Handle '||' in two states as invoking PUT twice might
1113c074d1c9Sdrahn 	     result in the first one jumping out of this loop.  We'd
1114c074d1c9Sdrahn 	     then lose track of the state and one '|' char.  */
1115c074d1c9Sdrahn 	  state = 13;
1116b305b0f1Sespie 	  PUT ('|');
1117b305b0f1Sespie 	  break;
1118b305b0f1Sespie #endif
11192159047fSniklas 	case LEX_IS_LINE_COMMENT_START:
11200c6d0228Sniklas 	  /* FIXME-someday: The two character comment stuff was badly
11210c6d0228Sniklas 	     thought out.  On i386, we want '/' as line comment start
11220c6d0228Sniklas 	     AND we want C style comments.  hence this hack.  The
11230c6d0228Sniklas 	     whole lexical process should be reworked.  xoxorich.  */
11242159047fSniklas 	  if (ch == '/')
11252159047fSniklas 	    {
11262159047fSniklas 	      ch2 = GET ();
11272159047fSniklas 	      if (ch2 == '*')
11282159047fSniklas 		{
1129b305b0f1Sespie 		  old_state = 3;
11302159047fSniklas 		  state = -2;
11312159047fSniklas 		  break;
11322159047fSniklas 		}
11332159047fSniklas 	      else
11342159047fSniklas 		{
11352159047fSniklas 		  UNGET (ch2);
11362159047fSniklas 		}
1137c074d1c9Sdrahn 	    }
11382159047fSniklas 
11390c6d0228Sniklas 	  if (state == 0 || state == 1)	/* Only comment at start of line.  */
11400c6d0228Sniklas 	    {
11410c6d0228Sniklas 	      int startch;
11420c6d0228Sniklas 
11430c6d0228Sniklas 	      startch = ch;
11442159047fSniklas 
11452159047fSniklas 	      do
11462159047fSniklas 		{
11472159047fSniklas 		  ch = GET ();
11482159047fSniklas 		}
11492159047fSniklas 	      while (ch != EOF && IS_WHITESPACE (ch));
1150c074d1c9Sdrahn 
11512159047fSniklas 	      if (ch == EOF)
11522159047fSniklas 		{
1153b305b0f1Sespie 		  as_warn (_("end of file in comment; newline inserted"));
11542159047fSniklas 		  PUT ('\n');
11552159047fSniklas 		  break;
11562159047fSniklas 		}
1157c074d1c9Sdrahn 
11580c6d0228Sniklas 	      if (ch < '0' || ch > '9' || state != 0 || startch != '#')
11592159047fSniklas 		{
11600c6d0228Sniklas 		  /* Not a cpp line.  */
11612159047fSniklas 		  while (ch != EOF && !IS_NEWLINE (ch))
11622159047fSniklas 		    ch = GET ();
11632159047fSniklas 		  if (ch == EOF)
1164c074d1c9Sdrahn 		    as_warn (_("end of file in comment; newline inserted"));
11652159047fSniklas 		  state = 0;
11662159047fSniklas 		  PUT ('\n');
11672159047fSniklas 		  break;
11682159047fSniklas 		}
1169b55d4692Sfgsch 	      /* Looks like `# 123 "filename"' from cpp.  */
11702159047fSniklas 	      UNGET (ch);
11712159047fSniklas 	      old_state = 4;
11722159047fSniklas 	      state = -1;
1173191aa565Sniklas 	      if (scrub_m68k_mri)
1174191aa565Sniklas 		out_string = "\tappline ";
1175191aa565Sniklas 	      else
11762159047fSniklas 		out_string = "\t.appline ";
11772159047fSniklas 	      PUT (*out_string++);
11782159047fSniklas 	      break;
11792159047fSniklas 	    }
11802159047fSniklas 
1181b305b0f1Sespie #ifdef TC_D10V
1182b305b0f1Sespie 	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1183b305b0f1Sespie 	     Trap is the only short insn that has a first operand that is
1184b305b0f1Sespie 	     neither register nor label.
1185b305b0f1Sespie 	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1186b55d4692Sfgsch 	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1187b55d4692Sfgsch 	     already LEX_IS_LINE_COMMENT_START.  However, it is the
1188b55d4692Sfgsch 	     only character in line_comment_chars for d10v, hence we
1189b55d4692Sfgsch 	     can recognize it as such.  */
1190b305b0f1Sespie 	  /* An alternative approach would be to reset the state to 1 when
1191b305b0f1Sespie 	     we see '||', '<'- or '->', but that seems to be overkill.  */
1192b55d4692Sfgsch 	  if (state == 10)
1193b55d4692Sfgsch 	    PUT (' ');
1194b305b0f1Sespie #endif
11952159047fSniklas 	  /* We have a line comment character which is not at the
11962159047fSniklas 	     start of a line.  If this is also a normal comment
11972159047fSniklas 	     character, fall through.  Otherwise treat it as a default
11982159047fSniklas 	     character.  */
1199191aa565Sniklas 	  if (strchr (tc_comment_chars, ch) == NULL
1200191aa565Sniklas 	      && (! scrub_m68k_mri
12012159047fSniklas 		  || (ch != '!' && ch != '*')))
12022159047fSniklas 	    goto de_fault;
1203191aa565Sniklas 	  if (scrub_m68k_mri
1204191aa565Sniklas 	      && (ch == '!' || ch == '*' || ch == '#')
12052159047fSniklas 	      && state != 1
12062159047fSniklas 	      && state != 10)
12072159047fSniklas 	    goto de_fault;
12082159047fSniklas 	  /* Fall through.  */
12092159047fSniklas 	case LEX_IS_COMMENT_START:
1210b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
1211b305b0f1Sespie 	  /* On the ARM, `@' is the comment character.
1212b305b0f1Sespie 	     Unfortunately this is also a special character in ELF .symver
1213b55d4692Sfgsch 	     directives (and .type, though we deal with those another way).
1214b55d4692Sfgsch 	     So we check if this line is such a directive, and treat
1215b55d4692Sfgsch 	     the character as default if so.  This is a hack.  */
1216b305b0f1Sespie 	  if ((symver_state != NULL) && (*symver_state == 0))
1217b305b0f1Sespie 	    goto de_fault;
1218b305b0f1Sespie #endif
1219b55d4692Sfgsch #ifdef WARN_COMMENTS
1220b55d4692Sfgsch 	  if (!found_comment)
1221b55d4692Sfgsch 	    as_where (&found_comment_file, &found_comment);
1222b55d4692Sfgsch #endif
12232159047fSniklas 	  do
12242159047fSniklas 	    {
12252159047fSniklas 	      ch = GET ();
12262159047fSniklas 	    }
12272159047fSniklas 	  while (ch != EOF && !IS_NEWLINE (ch));
12282159047fSniklas 	  if (ch == EOF)
1229b305b0f1Sespie 	    as_warn (_("end of file in comment; newline inserted"));
12302159047fSniklas 	  state = 0;
12312159047fSniklas 	  PUT ('\n');
12322159047fSniklas 	  break;
12332159047fSniklas 
12342159047fSniklas 	case LEX_IS_SYMBOL_COMPONENT:
12352159047fSniklas 	  if (state == 10)
12362159047fSniklas 	    {
12372159047fSniklas 	      /* This is a symbol character following another symbol
12382159047fSniklas 		 character, with whitespace in between.  We skipped
12392159047fSniklas 		 the whitespace earlier, so output it now.  */
12402159047fSniklas 	      UNGET (ch);
12412159047fSniklas 	      state = 3;
12422159047fSniklas 	      PUT (' ');
12432159047fSniklas 	      break;
12442159047fSniklas 	    }
12452159047fSniklas 
12462159047fSniklas 	  if (state == 3)
12472159047fSniklas 	    state = 9;
12482159047fSniklas 
12492159047fSniklas 	  /* This is a common case.  Quickly copy CH and all the
12502159047fSniklas 	     following symbol component or normal characters.  */
1251b305b0f1Sespie 	  if (to + 1 < toend
1252b305b0f1Sespie 	      && mri_state == NULL
1253b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
1254b305b0f1Sespie 	      && symver_state == NULL
1255b305b0f1Sespie #endif
1256b305b0f1Sespie 	      )
12572159047fSniklas 	    {
12582159047fSniklas 	      char *s;
12592159047fSniklas 	      int len;
12602159047fSniklas 
12612159047fSniklas 	      for (s = from; s < fromend; s++)
12622159047fSniklas 		{
12632159047fSniklas 		  int type;
12642159047fSniklas 
1265b305b0f1Sespie 		  ch2 = *(unsigned char *) s;
12662159047fSniklas 		  type = lex[ch2];
12672159047fSniklas 		  if (type != 0
12682159047fSniklas 		      && type != LEX_IS_SYMBOL_COMPONENT)
12692159047fSniklas 		    break;
12702159047fSniklas 		}
1271c074d1c9Sdrahn 
12722159047fSniklas 	      if (s > from)
12732159047fSniklas 		/* Handle the last character normally, for
12742159047fSniklas 		   simplicity.  */
12752159047fSniklas 		--s;
1276c074d1c9Sdrahn 
12772159047fSniklas 	      len = s - from;
1278c074d1c9Sdrahn 
12792159047fSniklas 	      if (len > (toend - to) - 1)
12802159047fSniklas 		len = (toend - to) - 1;
1281c074d1c9Sdrahn 
12822159047fSniklas 	      if (len > 0)
12832159047fSniklas 		{
12842159047fSniklas 		  PUT (ch);
12852159047fSniklas 		  if (len > 8)
12862159047fSniklas 		    {
12872159047fSniklas 		      memcpy (to, from, len);
12882159047fSniklas 		      to += len;
12892159047fSniklas 		      from += len;
12902159047fSniklas 		    }
12912159047fSniklas 		  else
12922159047fSniklas 		    {
12932159047fSniklas 		      switch (len)
12942159047fSniklas 			{
12952159047fSniklas 			case 8: *to++ = *from++;
12962159047fSniklas 			case 7: *to++ = *from++;
12972159047fSniklas 			case 6: *to++ = *from++;
12982159047fSniklas 			case 5: *to++ = *from++;
12992159047fSniklas 			case 4: *to++ = *from++;
13002159047fSniklas 			case 3: *to++ = *from++;
13012159047fSniklas 			case 2: *to++ = *from++;
13022159047fSniklas 			case 1: *to++ = *from++;
13032159047fSniklas 			}
13042159047fSniklas 		    }
13052159047fSniklas 		  ch = GET ();
13062159047fSniklas 		}
13072159047fSniklas 	    }
13082159047fSniklas 
13092159047fSniklas 	  /* Fall through.  */
13102159047fSniklas 	default:
13112159047fSniklas 	de_fault:
13122159047fSniklas 	  /* Some relatively `normal' character.  */
13132159047fSniklas 	  if (state == 0)
13142159047fSniklas 	    {
1315c074d1c9Sdrahn 	      state = 11;	/* Now seeing label definition.  */
13162159047fSniklas 	    }
13172159047fSniklas 	  else if (state == 1)
13182159047fSniklas 	    {
1319c074d1c9Sdrahn 	      state = 2;	/* Ditto.  */
13202159047fSniklas 	    }
13212159047fSniklas 	  else if (state == 9)
13222159047fSniklas 	    {
1323*007c2a45Smiod 	      if (!IS_SYMBOL_COMPONENT (ch))
13242159047fSniklas 		state = 3;
13252159047fSniklas 	    }
13262159047fSniklas 	  else if (state == 10)
13272159047fSniklas 	    {
1328b305b0f1Sespie 	      if (ch == '\\')
1329b305b0f1Sespie 		{
1330b305b0f1Sespie 		  /* Special handling for backslash: a backslash may
1331b305b0f1Sespie 		     be the beginning of a formal parameter (of a
1332b305b0f1Sespie 		     macro) following another symbol character, with
1333b305b0f1Sespie 		     whitespace in between.  If that is the case, we
1334b305b0f1Sespie 		     output a space before the parameter.  Strictly
1335b305b0f1Sespie 		     speaking, correct handling depends upon what the
1336b305b0f1Sespie 		     macro parameter expands into; if the parameter
1337b305b0f1Sespie 		     expands into something which does not start with
1338b305b0f1Sespie 		     an operand character, then we don't want to keep
1339b305b0f1Sespie 		     the space.  We don't have enough information to
1340b305b0f1Sespie 		     make the right choice, so here we are making the
1341b305b0f1Sespie 		     choice which is more likely to be correct.  */
1342b305b0f1Sespie 		  PUT (' ');
1343b305b0f1Sespie 		}
1344b305b0f1Sespie 
13452159047fSniklas 	      state = 3;
13462159047fSniklas 	    }
13472159047fSniklas 	  PUT (ch);
13482159047fSniklas 	  break;
13492159047fSniklas 	}
13502159047fSniklas     }
13512159047fSniklas 
13522159047fSniklas   /*NOTREACHED*/
13532159047fSniklas 
13542159047fSniklas  fromeof:
13552159047fSniklas   /* We have reached the end of the input.  */
13562159047fSniklas   return to - tostart;
13572159047fSniklas 
13582159047fSniklas  tofull:
13592159047fSniklas   /* The output buffer is full.  Save any input we have not yet
13602159047fSniklas      processed.  */
13612159047fSniklas   if (fromend > from)
13622159047fSniklas     {
1363b305b0f1Sespie       saved_input = from;
13642159047fSniklas       saved_input_len = fromend - from;
13652159047fSniklas     }
13662159047fSniklas   else
13672159047fSniklas     saved_input = NULL;
1368b305b0f1Sespie 
13692159047fSniklas   return to - tostart;
13702159047fSniklas }
13712159047fSniklas 
1372