12159047fSniklas /* This is the Assembler Pre-Processor
2b55d4692Sfgsch Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3c074d1c9Sdrahn 1999, 2000, 2002, 2003
4191aa565Sniklas Free Software Foundation, Inc.
52159047fSniklas
62159047fSniklas This file is part of GAS, the GNU Assembler.
72159047fSniklas
82159047fSniklas GAS is free software; you can redistribute it and/or modify
92159047fSniklas it under the terms of the GNU General Public License as published by
102159047fSniklas the Free Software Foundation; either version 2, or (at your option)
112159047fSniklas any later version.
122159047fSniklas
132159047fSniklas GAS is distributed in the hope that it will be useful,
142159047fSniklas but WITHOUT ANY WARRANTY; without even the implied warranty of
152159047fSniklas MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
162159047fSniklas GNU General Public License for more details.
172159047fSniklas
182159047fSniklas You should have received a copy of the GNU General Public License
19b305b0f1Sespie along with GAS; see the file COPYING. If not, write to the Free
20b305b0f1Sespie Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21b305b0f1Sespie 02111-1307, USA. */
222159047fSniklas
23c074d1c9Sdrahn /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
242159047fSniklas /* App, the assembler pre-processor. This pre-processor strips out excess
252159047fSniklas spaces, turns single-quoted characters into a decimal constant, and turns
262159047fSniklas # <number> <filename> <garbage> into a .line <number>\n.file <filename>
272159047fSniklas pair. This needs better error-handling. */
282159047fSniklas
292159047fSniklas #include <stdio.h>
30c074d1c9Sdrahn #include "as.h" /* For BAD_CASE() only. */
312159047fSniklas
322159047fSniklas #if (__STDC__ != 1)
332159047fSniklas #ifndef const
342159047fSniklas #define const /* empty */
352159047fSniklas #endif
362159047fSniklas #endif
372159047fSniklas
38b305b0f1Sespie #ifdef TC_M68K
39191aa565Sniklas /* Whether we are scrubbing in m68k MRI mode. This is different from
40191aa565Sniklas flag_m68k_mri, because the two flags will be affected by the .mri
41191aa565Sniklas pseudo-op at different times. */
42191aa565Sniklas static int scrub_m68k_mri;
43191aa565Sniklas
44191aa565Sniklas /* The pseudo-op which switches in and out of MRI mode. See the
45191aa565Sniklas comment in do_scrub_chars. */
46191aa565Sniklas static const char mri_pseudo[] = ".mri 0";
47c074d1c9Sdrahn #else
48c074d1c9Sdrahn #define scrub_m68k_mri 0
49c074d1c9Sdrahn #endif
50191aa565Sniklas
51b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
52b305b0f1Sespie /* The pseudo-op for which we need to special-case `@' characters.
53b305b0f1Sespie See the comment in do_scrub_chars. */
54b305b0f1Sespie static const char symver_pseudo[] = ".symver";
55b305b0f1Sespie static const char * symver_state;
56b305b0f1Sespie #endif
57b305b0f1Sespie
582159047fSniklas static char lex[256];
592159047fSniklas static const char symbol_chars[] =
602159047fSniklas "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
612159047fSniklas
622159047fSniklas #define LEX_IS_SYMBOL_COMPONENT 1
632159047fSniklas #define LEX_IS_WHITESPACE 2
642159047fSniklas #define LEX_IS_LINE_SEPARATOR 3
652159047fSniklas #define LEX_IS_COMMENT_START 4
662159047fSniklas #define LEX_IS_LINE_COMMENT_START 5
672159047fSniklas #define LEX_IS_TWOCHAR_COMMENT_1ST 6
682159047fSniklas #define LEX_IS_STRINGQUOTE 8
692159047fSniklas #define LEX_IS_COLON 9
702159047fSniklas #define LEX_IS_NEWLINE 10
712159047fSniklas #define LEX_IS_ONECHAR_QUOTE 11
72b305b0f1Sespie #ifdef TC_V850
73b305b0f1Sespie #define LEX_IS_DOUBLEDASH_1ST 12
74b305b0f1Sespie #endif
75b305b0f1Sespie #ifdef TC_M32R
76b305b0f1Sespie #define DOUBLEBAR_PARALLEL
77b305b0f1Sespie #endif
78b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
79b305b0f1Sespie #define LEX_IS_DOUBLEBAR_1ST 13
80b305b0f1Sespie #endif
81c074d1c9Sdrahn #define LEX_IS_PARALLEL_SEPARATOR 14
822159047fSniklas #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
832159047fSniklas #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
842159047fSniklas #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
85c074d1c9Sdrahn #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
862159047fSniklas #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
872159047fSniklas #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
882159047fSniklas #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
892159047fSniklas
90*007c2a45Smiod static int process_escape (int);
912159047fSniklas
922159047fSniklas /* FIXME-soon: The entire lexer/parser thingy should be
932159047fSniklas built statically at compile time rather than dynamically
942159047fSniklas each and every time the assembler is run. xoxorich. */
952159047fSniklas
962159047fSniklas void
do_scrub_begin(int m68k_mri ATTRIBUTE_UNUSED)97*007c2a45Smiod do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
982159047fSniklas {
992159047fSniklas const char *p;
100b305b0f1Sespie int c;
101191aa565Sniklas
1022159047fSniklas lex[' '] = LEX_IS_WHITESPACE;
1032159047fSniklas lex['\t'] = LEX_IS_WHITESPACE;
104b305b0f1Sespie lex['\r'] = LEX_IS_WHITESPACE;
1052159047fSniklas lex['\n'] = LEX_IS_NEWLINE;
1062159047fSniklas lex[':'] = LEX_IS_COLON;
1072159047fSniklas
108b305b0f1Sespie #ifdef TC_M68K
109b305b0f1Sespie scrub_m68k_mri = m68k_mri;
110b305b0f1Sespie
111191aa565Sniklas if (! m68k_mri)
112b305b0f1Sespie #endif
1132159047fSniklas {
1142159047fSniklas lex['"'] = LEX_IS_STRINGQUOTE;
1152159047fSniklas
116b305b0f1Sespie #if ! defined (TC_HPPA) && ! defined (TC_I370)
117c074d1c9Sdrahn /* I370 uses single-quotes to delimit integer, float constants. */
1182159047fSniklas lex['\''] = LEX_IS_ONECHAR_QUOTE;
1192159047fSniklas #endif
1202159047fSniklas
1212159047fSniklas #ifdef SINGLE_QUOTE_STRINGS
1222159047fSniklas lex['\''] = LEX_IS_STRINGQUOTE;
1232159047fSniklas #endif
1242159047fSniklas }
1252159047fSniklas
1262159047fSniklas /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
1272159047fSniklas in state 5 of do_scrub_chars must be changed. */
1282159047fSniklas
1292159047fSniklas /* Note that these override the previous defaults, e.g. if ';' is a
1302159047fSniklas comment char, then it isn't a line separator. */
1312159047fSniklas for (p = symbol_chars; *p; ++p)
1322159047fSniklas lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
1332159047fSniklas
134b305b0f1Sespie for (c = 128; c < 256; ++c)
135b305b0f1Sespie lex[c] = LEX_IS_SYMBOL_COMPONENT;
136b305b0f1Sespie
137b305b0f1Sespie #ifdef tc_symbol_chars
138b305b0f1Sespie /* This macro permits the processor to specify all characters which
139b305b0f1Sespie may appears in an operand. This will prevent the scrubber from
140b305b0f1Sespie discarding meaningful whitespace in certain cases. The i386
141b305b0f1Sespie backend uses this to support prefixes, which can confuse the
142b305b0f1Sespie scrubber as to whether it is parsing operands or opcodes. */
143b305b0f1Sespie for (p = tc_symbol_chars; *p; ++p)
144b305b0f1Sespie lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
145b305b0f1Sespie #endif
146b305b0f1Sespie
147191aa565Sniklas /* The m68k backend wants to be able to change comment_chars. */
148191aa565Sniklas #ifndef tc_comment_chars
149191aa565Sniklas #define tc_comment_chars comment_chars
150191aa565Sniklas #endif
151191aa565Sniklas for (p = tc_comment_chars; *p; p++)
1522159047fSniklas lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
1532159047fSniklas
1542159047fSniklas for (p = line_comment_chars; *p; p++)
1552159047fSniklas lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
1562159047fSniklas
1572159047fSniklas for (p = line_separator_chars; *p; p++)
1582159047fSniklas lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
159c074d1c9Sdrahn
160c074d1c9Sdrahn #ifdef tc_parallel_separator_chars
161c074d1c9Sdrahn /* This macro permits the processor to specify all characters which
162c074d1c9Sdrahn separate parallel insns on the same line. */
163c074d1c9Sdrahn for (p = tc_parallel_separator_chars; *p; p++)
164c074d1c9Sdrahn lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
165c074d1c9Sdrahn #endif
1662159047fSniklas
167b305b0f1Sespie /* Only allow slash-star comments if slash is not in use.
168b305b0f1Sespie FIXME: This isn't right. We should always permit them. */
1692159047fSniklas if (lex['/'] == 0)
1702159047fSniklas lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
1712159047fSniklas
172b305b0f1Sespie #ifdef TC_M68K
173191aa565Sniklas if (m68k_mri)
1742159047fSniklas {
1752159047fSniklas lex['\''] = LEX_IS_STRINGQUOTE;
1762159047fSniklas lex[';'] = LEX_IS_COMMENT_START;
1772159047fSniklas lex['*'] = LEX_IS_LINE_COMMENT_START;
1782159047fSniklas /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
1792159047fSniklas then it can't be used in an expression. */
1802159047fSniklas lex['!'] = LEX_IS_LINE_COMMENT_START;
1812159047fSniklas }
182b305b0f1Sespie #endif
183b305b0f1Sespie
184b305b0f1Sespie #ifdef TC_V850
185b305b0f1Sespie lex['-'] = LEX_IS_DOUBLEDASH_1ST;
186b305b0f1Sespie #endif
187b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
188b305b0f1Sespie lex['|'] = LEX_IS_DOUBLEBAR_1ST;
189b305b0f1Sespie #endif
190b305b0f1Sespie #ifdef TC_D30V
191c074d1c9Sdrahn /* Must do this is we want VLIW instruction with "->" or "<-". */
192b305b0f1Sespie lex['-'] = LEX_IS_SYMBOL_COMPONENT;
193b305b0f1Sespie #endif
194c074d1c9Sdrahn }
1952159047fSniklas
196c074d1c9Sdrahn /* Saved state of the scrubber. */
1972159047fSniklas static int state;
1982159047fSniklas static int old_state;
1992159047fSniklas static char *out_string;
2002159047fSniklas static char out_buf[20];
2012159047fSniklas static int add_newlines;
2022159047fSniklas static char *saved_input;
2032159047fSniklas static int saved_input_len;
204b305b0f1Sespie static char input_buffer[32 * 1024];
205191aa565Sniklas static const char *mri_state;
206191aa565Sniklas static char mri_last_ch;
2072159047fSniklas
2082159047fSniklas /* Data structure for saving the state of app across #include's. Note that
2092159047fSniklas app is called asynchronously to the parsing of the .include's, so our
2102159047fSniklas state at the time .include is interpreted is completely unrelated.
2112159047fSniklas That's why we have to save it all. */
2122159047fSniklas
213c074d1c9Sdrahn struct app_save
214c074d1c9Sdrahn {
2152159047fSniklas int state;
2162159047fSniklas int old_state;
2172159047fSniklas char * out_string;
2182159047fSniklas char out_buf[sizeof (out_buf)];
2192159047fSniklas int add_newlines;
2202159047fSniklas char * saved_input;
2212159047fSniklas int saved_input_len;
222b305b0f1Sespie #ifdef TC_M68K
223191aa565Sniklas int scrub_m68k_mri;
224b305b0f1Sespie #endif
225191aa565Sniklas const char * mri_state;
226191aa565Sniklas char mri_last_ch;
227b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
228b305b0f1Sespie const char * symver_state;
229b305b0f1Sespie #endif
2302159047fSniklas };
2312159047fSniklas
2322159047fSniklas char *
app_push(void)233*007c2a45Smiod app_push (void)
2342159047fSniklas {
2352159047fSniklas register struct app_save *saved;
2362159047fSniklas
2372159047fSniklas saved = (struct app_save *) xmalloc (sizeof (*saved));
2382159047fSniklas saved->state = state;
2392159047fSniklas saved->old_state = old_state;
2402159047fSniklas saved->out_string = out_string;
2412159047fSniklas memcpy (saved->out_buf, out_buf, sizeof (out_buf));
2422159047fSniklas saved->add_newlines = add_newlines;
243b305b0f1Sespie if (saved_input == NULL)
244b305b0f1Sespie saved->saved_input = NULL;
245b305b0f1Sespie else
246b305b0f1Sespie {
247b305b0f1Sespie saved->saved_input = xmalloc (saved_input_len);
248b305b0f1Sespie memcpy (saved->saved_input, saved_input, saved_input_len);
2492159047fSniklas saved->saved_input_len = saved_input_len;
250b305b0f1Sespie }
251b305b0f1Sespie #ifdef TC_M68K
252191aa565Sniklas saved->scrub_m68k_mri = scrub_m68k_mri;
253b305b0f1Sespie #endif
254191aa565Sniklas saved->mri_state = mri_state;
255191aa565Sniklas saved->mri_last_ch = mri_last_ch;
256b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
257b305b0f1Sespie saved->symver_state = symver_state;
258b305b0f1Sespie #endif
2592159047fSniklas
2602159047fSniklas /* do_scrub_begin() is not useful, just wastes time. */
2612159047fSniklas
2622159047fSniklas state = 0;
2632159047fSniklas saved_input = NULL;
2642159047fSniklas
2652159047fSniklas return (char *) saved;
2662159047fSniklas }
2672159047fSniklas
2682159047fSniklas void
app_pop(char * arg)269*007c2a45Smiod app_pop (char *arg)
2702159047fSniklas {
2712159047fSniklas register struct app_save *saved = (struct app_save *) arg;
2722159047fSniklas
2732159047fSniklas /* There is no do_scrub_end (). */
2742159047fSniklas state = saved->state;
2752159047fSniklas old_state = saved->old_state;
2762159047fSniklas out_string = saved->out_string;
2772159047fSniklas memcpy (out_buf, saved->out_buf, sizeof (out_buf));
2782159047fSniklas add_newlines = saved->add_newlines;
279b305b0f1Sespie if (saved->saved_input == NULL)
280b305b0f1Sespie saved_input = NULL;
281b305b0f1Sespie else
282b305b0f1Sespie {
283b305b0f1Sespie assert (saved->saved_input_len <= (int) (sizeof input_buffer));
284b305b0f1Sespie memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
285b305b0f1Sespie saved_input = input_buffer;
2862159047fSniklas saved_input_len = saved->saved_input_len;
287b305b0f1Sespie free (saved->saved_input);
288b305b0f1Sespie }
289b305b0f1Sespie #ifdef TC_M68K
290191aa565Sniklas scrub_m68k_mri = saved->scrub_m68k_mri;
291b305b0f1Sespie #endif
292191aa565Sniklas mri_state = saved->mri_state;
293191aa565Sniklas mri_last_ch = saved->mri_last_ch;
294b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
295b305b0f1Sespie symver_state = saved->symver_state;
296b305b0f1Sespie #endif
2972159047fSniklas
2982159047fSniklas free (arg);
299c074d1c9Sdrahn }
3002159047fSniklas
3012159047fSniklas /* @@ This assumes that \n &c are the same on host and target. This is not
3022159047fSniklas necessarily true. */
303c074d1c9Sdrahn
3042159047fSniklas static int
process_escape(int ch)305*007c2a45Smiod process_escape (int ch)
3062159047fSniklas {
3072159047fSniklas switch (ch)
3082159047fSniklas {
3092159047fSniklas case 'b':
3102159047fSniklas return '\b';
3112159047fSniklas case 'f':
3122159047fSniklas return '\f';
3132159047fSniklas case 'n':
3142159047fSniklas return '\n';
3152159047fSniklas case 'r':
3162159047fSniklas return '\r';
3172159047fSniklas case 't':
3182159047fSniklas return '\t';
3192159047fSniklas case '\'':
3202159047fSniklas return '\'';
3212159047fSniklas case '"':
3222159047fSniklas return '\"';
3232159047fSniklas default:
3242159047fSniklas return ch;
3252159047fSniklas }
3262159047fSniklas }
3272159047fSniklas
3282159047fSniklas /* This function is called to process input characters. The GET
3292159047fSniklas parameter is used to retrieve more input characters. GET should
3302159047fSniklas set its parameter to point to a buffer, and return the length of
3312159047fSniklas the buffer; it should return 0 at end of file. The scrubbed output
3322159047fSniklas characters are put into the buffer starting at TOSTART; the TOSTART
3332159047fSniklas buffer is TOLEN bytes in length. The function returns the number
3342159047fSniklas of scrubbed characters put into TOSTART. This will be TOLEN unless
3352159047fSniklas end of file was seen. This function is arranged as a state
3362159047fSniklas machine, and saves its state so that it may return at any point.
3372159047fSniklas This is the way the old code used to work. */
3382159047fSniklas
3392159047fSniklas int
do_scrub_chars(int (* get)(char *,int),char * tostart,int tolen)340*007c2a45Smiod do_scrub_chars (int (*get) (char *, int), char *tostart, int tolen)
3412159047fSniklas {
3422159047fSniklas char *to = tostart;
3432159047fSniklas char *toend = tostart + tolen;
3442159047fSniklas char *from;
3452159047fSniklas char *fromend;
3462159047fSniklas int fromlen;
3472159047fSniklas register int ch, ch2 = 0;
3482159047fSniklas
3492159047fSniklas /*State 0: beginning of normal line
3502159047fSniklas 1: After first whitespace on line (flush more white)
3512159047fSniklas 2: After first non-white (opcode) on line (keep 1white)
3522159047fSniklas 3: after second white on line (into operands) (flush white)
3532159047fSniklas 4: after putting out a .line, put out digits
3542159047fSniklas 5: parsing a string, then go to old-state
3552159047fSniklas 6: putting out \ escape in a "d string.
3562159047fSniklas 7: After putting out a .appfile, put out string.
3572159047fSniklas 8: After putting out a .appfile string, flush until newline.
3582159047fSniklas 9: After seeing symbol char in state 3 (keep 1white after symchar)
3592159047fSniklas 10: After seeing whitespace in state 9 (keep white before symchar)
3602159047fSniklas 11: After seeing a symbol character in state 0 (eg a label definition)
3612159047fSniklas -1: output string in out_string and go to the state in old_state
3622159047fSniklas -2: flush text until a '*' '/' is seen, then go to state old_state
363b305b0f1Sespie #ifdef TC_V850
364c074d1c9Sdrahn 12: After seeing a dash, looking for a second dash as a start
365c074d1c9Sdrahn of comment.
366b305b0f1Sespie #endif
367b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
368c074d1c9Sdrahn 13: After seeing a vertical bar, looking for a second
369c074d1c9Sdrahn vertical bar as a parallel expression separator.
370b305b0f1Sespie #endif
371*007c2a45Smiod #ifdef TC_IA64
372*007c2a45Smiod 14: After seeing a `(' at state 0, looking for a `)' as
373*007c2a45Smiod predicate.
374*007c2a45Smiod 15: After seeing a `(' at state 1, looking for a `)' as
375*007c2a45Smiod predicate.
376*007c2a45Smiod #endif
3772159047fSniklas */
3782159047fSniklas
3792159047fSniklas /* I added states 9 and 10 because the MIPS ECOFF assembler uses
3802159047fSniklas constructs like ``.loc 1 20''. This was turning into ``.loc
3812159047fSniklas 120''. States 9 and 10 ensure that a space is never dropped in
382c074d1c9Sdrahn between characters which could appear in an identifier. Ian
3832159047fSniklas Taylor, ian@cygnus.com.
3842159047fSniklas
3852159047fSniklas I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
3862159047fSniklas correctly on the PA (and any other target where colons are optional).
387b305b0f1Sespie Jeff Law, law@cs.utah.edu.
388b305b0f1Sespie
389b305b0f1Sespie I added state 13 so that something like "cmp r1, r2 || trap #1" does not
390b305b0f1Sespie get squashed into "cmp r1,r2||trap#1", with the all important space
391b305b0f1Sespie between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
3922159047fSniklas
3932159047fSniklas /* This macro gets the next input character. */
3942159047fSniklas
3952159047fSniklas #define GET() \
3962159047fSniklas (from < fromend \
397b305b0f1Sespie ? * (unsigned char *) (from++) \
398b305b0f1Sespie : (saved_input = NULL, \
399b305b0f1Sespie fromlen = (*get) (input_buffer, sizeof input_buffer), \
400b305b0f1Sespie from = input_buffer, \
4012159047fSniklas fromend = from + fromlen, \
4022159047fSniklas (fromlen == 0 \
4032159047fSniklas ? EOF \
404b305b0f1Sespie : * (unsigned char *) (from++))))
4052159047fSniklas
4062159047fSniklas /* This macro pushes a character back on the input stream. */
4072159047fSniklas
4082159047fSniklas #define UNGET(uch) (*--from = (uch))
4092159047fSniklas
4102159047fSniklas /* This macro puts a character into the output buffer. If this
4112159047fSniklas character fills the output buffer, this macro jumps to the label
4122159047fSniklas TOFULL. We use this rather ugly approach because we need to
4132159047fSniklas handle two different termination conditions: EOF on the input
4142159047fSniklas stream, and a full output buffer. It would be simpler if we
4152159047fSniklas always read in the entire input stream before processing it, but
4162159047fSniklas I don't want to make such a significant change to the assembler's
4172159047fSniklas memory usage. */
4182159047fSniklas
4192159047fSniklas #define PUT(pch) \
4202159047fSniklas do \
4212159047fSniklas { \
4222159047fSniklas *to++ = (pch); \
4232159047fSniklas if (to >= toend) \
4242159047fSniklas goto tofull; \
4252159047fSniklas } \
4262159047fSniklas while (0)
4272159047fSniklas
4282159047fSniklas if (saved_input != NULL)
4292159047fSniklas {
4302159047fSniklas from = saved_input;
4312159047fSniklas fromend = from + saved_input_len;
4322159047fSniklas }
4332159047fSniklas else
4342159047fSniklas {
435b305b0f1Sespie fromlen = (*get) (input_buffer, sizeof input_buffer);
4362159047fSniklas if (fromlen == 0)
4372159047fSniklas return 0;
438b305b0f1Sespie from = input_buffer;
4392159047fSniklas fromend = from + fromlen;
4402159047fSniklas }
4412159047fSniklas
4422159047fSniklas while (1)
4432159047fSniklas {
4442159047fSniklas /* The cases in this switch end with continue, in order to
4452159047fSniklas branch back to the top of this while loop and generate the
4462159047fSniklas next output character in the appropriate state. */
4472159047fSniklas switch (state)
4482159047fSniklas {
4492159047fSniklas case -1:
4502159047fSniklas ch = *out_string++;
4512159047fSniklas if (*out_string == '\0')
4522159047fSniklas {
4532159047fSniklas state = old_state;
4542159047fSniklas old_state = 3;
4552159047fSniklas }
4562159047fSniklas PUT (ch);
4572159047fSniklas continue;
4582159047fSniklas
4592159047fSniklas case -2:
4602159047fSniklas for (;;)
4612159047fSniklas {
4622159047fSniklas do
4632159047fSniklas {
4642159047fSniklas ch = GET ();
4652159047fSniklas
4662159047fSniklas if (ch == EOF)
4672159047fSniklas {
468b305b0f1Sespie as_warn (_("end of file in comment"));
4692159047fSniklas goto fromeof;
4702159047fSniklas }
4712159047fSniklas
4722159047fSniklas if (ch == '\n')
4732159047fSniklas PUT ('\n');
4742159047fSniklas }
4752159047fSniklas while (ch != '*');
4762159047fSniklas
4772159047fSniklas while ((ch = GET ()) == '*')
4782159047fSniklas ;
4792159047fSniklas
4802159047fSniklas if (ch == EOF)
4812159047fSniklas {
482b305b0f1Sespie as_warn (_("end of file in comment"));
4832159047fSniklas goto fromeof;
4842159047fSniklas }
4852159047fSniklas
4862159047fSniklas if (ch == '/')
4872159047fSniklas break;
4882159047fSniklas
4892159047fSniklas UNGET (ch);
4902159047fSniklas }
4912159047fSniklas
4922159047fSniklas state = old_state;
493b305b0f1Sespie UNGET (' ');
4942159047fSniklas continue;
4952159047fSniklas
4962159047fSniklas case 4:
4972159047fSniklas ch = GET ();
4982159047fSniklas if (ch == EOF)
4992159047fSniklas goto fromeof;
5002159047fSniklas else if (ch >= '0' && ch <= '9')
5012159047fSniklas PUT (ch);
5022159047fSniklas else
5032159047fSniklas {
5042159047fSniklas while (ch != EOF && IS_WHITESPACE (ch))
5052159047fSniklas ch = GET ();
5062159047fSniklas if (ch == '"')
5072159047fSniklas {
5082159047fSniklas UNGET (ch);
509191aa565Sniklas if (scrub_m68k_mri)
510191aa565Sniklas out_string = "\n\tappfile ";
511191aa565Sniklas else
5122159047fSniklas out_string = "\n\t.appfile ";
5132159047fSniklas old_state = 7;
5142159047fSniklas state = -1;
5152159047fSniklas PUT (*out_string++);
5162159047fSniklas }
5172159047fSniklas else
5182159047fSniklas {
5192159047fSniklas while (ch != EOF && ch != '\n')
5202159047fSniklas ch = GET ();
5212159047fSniklas state = 0;
5222159047fSniklas PUT (ch);
5232159047fSniklas }
5242159047fSniklas }
5252159047fSniklas continue;
5262159047fSniklas
5272159047fSniklas case 5:
5282159047fSniklas /* We are going to copy everything up to a quote character,
5292159047fSniklas with special handling for a backslash. We try to
5302159047fSniklas optimize the copying in the simple case without using the
5312159047fSniklas GET and PUT macros. */
5322159047fSniklas {
5332159047fSniklas char *s;
5342159047fSniklas int len;
5352159047fSniklas
5362159047fSniklas for (s = from; s < fromend; s++)
5372159047fSniklas {
5382159047fSniklas ch = *s;
5392159047fSniklas /* This condition must be changed if the type of any
5402159047fSniklas other character can be LEX_IS_STRINGQUOTE. */
5412159047fSniklas if (ch == '\\'
5422159047fSniklas || ch == '"'
5432159047fSniklas || ch == '\''
5442159047fSniklas || ch == '\n')
5452159047fSniklas break;
5462159047fSniklas }
5472159047fSniklas len = s - from;
5482159047fSniklas if (len > toend - to)
5492159047fSniklas len = toend - to;
5502159047fSniklas if (len > 0)
5512159047fSniklas {
5522159047fSniklas memcpy (to, from, len);
5532159047fSniklas to += len;
5542159047fSniklas from += len;
5552159047fSniklas }
5562159047fSniklas }
5572159047fSniklas
5582159047fSniklas ch = GET ();
5592159047fSniklas if (ch == EOF)
5602159047fSniklas {
561c074d1c9Sdrahn as_warn (_("end of file in string; inserted '\"'"));
5622159047fSniklas state = old_state;
5632159047fSniklas UNGET ('\n');
5642159047fSniklas PUT ('"');
5652159047fSniklas }
5662159047fSniklas else if (lex[ch] == LEX_IS_STRINGQUOTE)
5672159047fSniklas {
5682159047fSniklas state = old_state;
5692159047fSniklas PUT (ch);
5702159047fSniklas }
5712159047fSniklas #ifndef NO_STRING_ESCAPES
5722159047fSniklas else if (ch == '\\')
5732159047fSniklas {
5742159047fSniklas state = 6;
5752159047fSniklas PUT (ch);
5762159047fSniklas }
5772159047fSniklas #endif
578191aa565Sniklas else if (scrub_m68k_mri && ch == '\n')
5792159047fSniklas {
5802159047fSniklas /* Just quietly terminate the string. This permits lines like
581c074d1c9Sdrahn bne label loop if we haven't reach end yet. */
5822159047fSniklas state = old_state;
5832159047fSniklas UNGET (ch);
5842159047fSniklas PUT ('\'');
5852159047fSniklas }
5862159047fSniklas else
5872159047fSniklas {
5882159047fSniklas PUT (ch);
5892159047fSniklas }
5902159047fSniklas continue;
5912159047fSniklas
5922159047fSniklas case 6:
5932159047fSniklas state = 5;
5942159047fSniklas ch = GET ();
5952159047fSniklas switch (ch)
5962159047fSniklas {
5972159047fSniklas /* Handle strings broken across lines, by turning '\n' into
5982159047fSniklas '\\' and 'n'. */
5992159047fSniklas case '\n':
6002159047fSniklas UNGET ('n');
6012159047fSniklas add_newlines++;
6022159047fSniklas PUT ('\\');
6032159047fSniklas continue;
6042159047fSniklas
605c074d1c9Sdrahn case EOF:
606c074d1c9Sdrahn as_warn (_("end of file in string; '\"' inserted"));
607c074d1c9Sdrahn PUT ('"');
608c074d1c9Sdrahn continue;
609c074d1c9Sdrahn
6102159047fSniklas case '"':
6112159047fSniklas case '\\':
6122159047fSniklas case 'b':
6132159047fSniklas case 'f':
6142159047fSniklas case 'n':
6152159047fSniklas case 'r':
6162159047fSniklas case 't':
6172159047fSniklas case 'v':
6182159047fSniklas case 'x':
6192159047fSniklas case 'X':
6202159047fSniklas case '0':
6212159047fSniklas case '1':
6222159047fSniklas case '2':
6232159047fSniklas case '3':
6242159047fSniklas case '4':
6252159047fSniklas case '5':
6262159047fSniklas case '6':
6272159047fSniklas case '7':
6282159047fSniklas break;
6292159047fSniklas
630c074d1c9Sdrahn default:
631c074d1c9Sdrahn #ifdef ONLY_STANDARD_ESCAPES
632c074d1c9Sdrahn as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
633c074d1c9Sdrahn #endif
634c074d1c9Sdrahn break;
6352159047fSniklas }
6362159047fSniklas PUT (ch);
6372159047fSniklas continue;
6382159047fSniklas
6392159047fSniklas case 7:
6402159047fSniklas ch = GET ();
6412159047fSniklas state = 5;
6422159047fSniklas old_state = 8;
6432159047fSniklas if (ch == EOF)
6442159047fSniklas goto fromeof;
6452159047fSniklas PUT (ch);
6462159047fSniklas continue;
6472159047fSniklas
6482159047fSniklas case 8:
6492159047fSniklas do
6502159047fSniklas ch = GET ();
6512159047fSniklas while (ch != '\n' && ch != EOF);
6522159047fSniklas if (ch == EOF)
6532159047fSniklas goto fromeof;
6542159047fSniklas state = 0;
6552159047fSniklas PUT (ch);
6562159047fSniklas continue;
657c074d1c9Sdrahn
658c074d1c9Sdrahn #ifdef DOUBLEBAR_PARALLEL
659c074d1c9Sdrahn case 13:
660c074d1c9Sdrahn ch = GET ();
661c074d1c9Sdrahn if (ch != '|')
662c074d1c9Sdrahn abort ();
663c074d1c9Sdrahn
664c074d1c9Sdrahn /* Reset back to state 1 and pretend that we are parsing a
665c074d1c9Sdrahn line from just after the first white space. */
666c074d1c9Sdrahn state = 1;
667c074d1c9Sdrahn PUT ('|');
668c074d1c9Sdrahn continue;
669c074d1c9Sdrahn #endif
6702159047fSniklas }
6712159047fSniklas
672c074d1c9Sdrahn /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
6732159047fSniklas
6742159047fSniklas /* flushchar: */
6752159047fSniklas ch = GET ();
676191aa565Sniklas
677*007c2a45Smiod #ifdef TC_IA64
678*007c2a45Smiod if (ch == '(' && (state == 0 || state == 1))
679*007c2a45Smiod {
680*007c2a45Smiod state += 14;
681*007c2a45Smiod PUT (ch);
682*007c2a45Smiod continue;
683*007c2a45Smiod }
684*007c2a45Smiod else if (state == 14 || state == 15)
685*007c2a45Smiod {
686*007c2a45Smiod if (ch == ')')
687*007c2a45Smiod {
688*007c2a45Smiod state -= 14;
689*007c2a45Smiod PUT (ch);
690*007c2a45Smiod ch = GET ();
691*007c2a45Smiod }
692*007c2a45Smiod else
693*007c2a45Smiod {
694*007c2a45Smiod PUT (ch);
695*007c2a45Smiod continue;
696*007c2a45Smiod }
697*007c2a45Smiod }
698*007c2a45Smiod #endif
699*007c2a45Smiod
7002159047fSniklas recycle:
701191aa565Sniklas
702b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
703b305b0f1Sespie /* We need to watch out for .symver directives. See the comment later
704b305b0f1Sespie in this function. */
705b305b0f1Sespie if (symver_state == NULL)
706b305b0f1Sespie {
707b305b0f1Sespie if ((state == 0 || state == 1) && ch == symver_pseudo[0])
708b305b0f1Sespie symver_state = symver_pseudo + 1;
709b305b0f1Sespie }
710b305b0f1Sespie else
711b305b0f1Sespie {
712b305b0f1Sespie /* We advance to the next state if we find the right
713b305b0f1Sespie character. */
714b305b0f1Sespie if (ch != '\0' && (*symver_state == ch))
715b305b0f1Sespie ++symver_state;
716b305b0f1Sespie else if (*symver_state != '\0')
717b305b0f1Sespie /* We did not get the expected character, or we didn't
718b305b0f1Sespie get a valid terminating character after seeing the
719b305b0f1Sespie entire pseudo-op, so we must go back to the beginning. */
720b305b0f1Sespie symver_state = NULL;
721b305b0f1Sespie else
722b305b0f1Sespie {
723b305b0f1Sespie /* We've read the entire pseudo-op. If this is the end
724b305b0f1Sespie of the line, go back to the beginning. */
725b305b0f1Sespie if (IS_NEWLINE (ch))
726b305b0f1Sespie symver_state = NULL;
727b305b0f1Sespie }
728b305b0f1Sespie }
729b305b0f1Sespie #endif /* TC_ARM && OBJ_ELF */
730b305b0f1Sespie
731191aa565Sniklas #ifdef TC_M68K
732191aa565Sniklas /* We want to have pseudo-ops which control whether we are in
733191aa565Sniklas MRI mode or not. Unfortunately, since m68k MRI mode affects
734191aa565Sniklas the scrubber, that means that we need a special purpose
735191aa565Sniklas recognizer here. */
736191aa565Sniklas if (mri_state == NULL)
737191aa565Sniklas {
738191aa565Sniklas if ((state == 0 || state == 1)
739191aa565Sniklas && ch == mri_pseudo[0])
740191aa565Sniklas mri_state = mri_pseudo + 1;
741191aa565Sniklas }
742191aa565Sniklas else
743191aa565Sniklas {
744191aa565Sniklas /* We advance to the next state if we find the right
745191aa565Sniklas character, or if we need a space character and we get any
746191aa565Sniklas whitespace character, or if we need a '0' and we get a
747191aa565Sniklas '1' (this is so that we only need one state to handle
748191aa565Sniklas ``.mri 0'' and ``.mri 1''). */
749191aa565Sniklas if (ch != '\0'
750191aa565Sniklas && (*mri_state == ch
751191aa565Sniklas || (*mri_state == ' '
752191aa565Sniklas && lex[ch] == LEX_IS_WHITESPACE)
753191aa565Sniklas || (*mri_state == '0'
754191aa565Sniklas && ch == '1')))
755191aa565Sniklas {
756191aa565Sniklas mri_last_ch = ch;
757191aa565Sniklas ++mri_state;
758191aa565Sniklas }
759191aa565Sniklas else if (*mri_state != '\0'
760191aa565Sniklas || (lex[ch] != LEX_IS_WHITESPACE
761191aa565Sniklas && lex[ch] != LEX_IS_NEWLINE))
762191aa565Sniklas {
763191aa565Sniklas /* We did not get the expected character, or we didn't
764191aa565Sniklas get a valid terminating character after seeing the
765191aa565Sniklas entire pseudo-op, so we must go back to the
766191aa565Sniklas beginning. */
767191aa565Sniklas mri_state = NULL;
768191aa565Sniklas }
769191aa565Sniklas else
770191aa565Sniklas {
771191aa565Sniklas /* We've read the entire pseudo-op. mips_last_ch is
772191aa565Sniklas either '0' or '1' indicating whether to enter or
773191aa565Sniklas leave MRI mode. */
774191aa565Sniklas do_scrub_begin (mri_last_ch == '1');
775b305b0f1Sespie mri_state = NULL;
776191aa565Sniklas
777191aa565Sniklas /* We continue handling the character as usual. The
778191aa565Sniklas main gas reader must also handle the .mri pseudo-op
779191aa565Sniklas to control expression parsing and the like. */
780191aa565Sniklas }
781191aa565Sniklas }
782191aa565Sniklas #endif
783191aa565Sniklas
7842159047fSniklas if (ch == EOF)
7852159047fSniklas {
7862159047fSniklas if (state != 0)
7872159047fSniklas {
788b305b0f1Sespie as_warn (_("end of file not at end of a line; newline inserted"));
7892159047fSniklas state = 0;
7902159047fSniklas PUT ('\n');
7912159047fSniklas }
7922159047fSniklas goto fromeof;
7932159047fSniklas }
7942159047fSniklas
7952159047fSniklas switch (lex[ch])
7962159047fSniklas {
7972159047fSniklas case LEX_IS_WHITESPACE:
7982159047fSniklas do
7992159047fSniklas {
8002159047fSniklas ch = GET ();
8012159047fSniklas }
8022159047fSniklas while (ch != EOF && IS_WHITESPACE (ch));
8032159047fSniklas if (ch == EOF)
8042159047fSniklas goto fromeof;
8052159047fSniklas
8062159047fSniklas if (state == 0)
8072159047fSniklas {
8082159047fSniklas /* Preserve a single whitespace character at the
8092159047fSniklas beginning of a line. */
8102159047fSniklas state = 1;
8112159047fSniklas UNGET (ch);
8122159047fSniklas PUT (' ');
8132159047fSniklas break;
8142159047fSniklas }
8152159047fSniklas
816b305b0f1Sespie #ifdef KEEP_WHITE_AROUND_COLON
817b305b0f1Sespie if (lex[ch] == LEX_IS_COLON)
818b305b0f1Sespie {
819b55d4692Sfgsch /* Only keep this white if there's no white *after* the
820b55d4692Sfgsch colon. */
821b305b0f1Sespie ch2 = GET ();
822b305b0f1Sespie UNGET (ch2);
823b305b0f1Sespie if (!IS_WHITESPACE (ch2))
824b305b0f1Sespie {
825b305b0f1Sespie state = 9;
826b305b0f1Sespie UNGET (ch);
827b305b0f1Sespie PUT (' ');
828b305b0f1Sespie break;
829b305b0f1Sespie }
830b305b0f1Sespie }
831b305b0f1Sespie #endif
8322159047fSniklas if (IS_COMMENT (ch)
8332159047fSniklas || ch == '/'
834c074d1c9Sdrahn || IS_LINE_SEPARATOR (ch)
835c074d1c9Sdrahn || IS_PARALLEL_SEPARATOR (ch))
8362159047fSniklas {
837191aa565Sniklas if (scrub_m68k_mri)
8382159047fSniklas {
8392159047fSniklas /* In MRI mode, we keep these spaces. */
8402159047fSniklas UNGET (ch);
8412159047fSniklas PUT (' ');
8422159047fSniklas break;
8432159047fSniklas }
8442159047fSniklas goto recycle;
8452159047fSniklas }
8462159047fSniklas
8472159047fSniklas /* If we're in state 2 or 11, we've seen a non-white
8482159047fSniklas character followed by whitespace. If the next character
8492159047fSniklas is ':', this is whitespace after a label name which we
8502159047fSniklas normally must ignore. In MRI mode, though, spaces are
8512159047fSniklas not permitted between the label and the colon. */
8522159047fSniklas if ((state == 2 || state == 11)
8532159047fSniklas && lex[ch] == LEX_IS_COLON
854191aa565Sniklas && ! scrub_m68k_mri)
8552159047fSniklas {
8562159047fSniklas state = 1;
8572159047fSniklas PUT (ch);
8582159047fSniklas break;
8592159047fSniklas }
8602159047fSniklas
8612159047fSniklas switch (state)
8622159047fSniklas {
8632159047fSniklas case 0:
8642159047fSniklas state++;
8652159047fSniklas goto recycle; /* Punted leading sp */
8662159047fSniklas case 1:
8672159047fSniklas /* We can arrive here if we leave a leading whitespace
8682159047fSniklas character at the beginning of a line. */
8692159047fSniklas goto recycle;
8702159047fSniklas case 2:
8712159047fSniklas state = 3;
8722159047fSniklas if (to + 1 < toend)
8732159047fSniklas {
8742159047fSniklas /* Optimize common case by skipping UNGET/GET. */
8752159047fSniklas PUT (' '); /* Sp after opco */
8762159047fSniklas goto recycle;
8772159047fSniklas }
8782159047fSniklas UNGET (ch);
8792159047fSniklas PUT (' ');
8802159047fSniklas break;
8812159047fSniklas case 3:
882191aa565Sniklas if (scrub_m68k_mri)
8832159047fSniklas {
8842159047fSniklas /* In MRI mode, we keep these spaces. */
8852159047fSniklas UNGET (ch);
8862159047fSniklas PUT (' ');
8872159047fSniklas break;
8882159047fSniklas }
8892159047fSniklas goto recycle; /* Sp in operands */
8902159047fSniklas case 9:
8912159047fSniklas case 10:
892191aa565Sniklas if (scrub_m68k_mri)
8932159047fSniklas {
8942159047fSniklas /* In MRI mode, we keep these spaces. */
8952159047fSniklas state = 3;
8962159047fSniklas UNGET (ch);
8972159047fSniklas PUT (' ');
8982159047fSniklas break;
8992159047fSniklas }
9002159047fSniklas state = 10; /* Sp after symbol char */
9012159047fSniklas goto recycle;
9022159047fSniklas case 11:
903b305b0f1Sespie if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
9042159047fSniklas state = 1;
905191aa565Sniklas else
906191aa565Sniklas {
907191aa565Sniklas /* We know that ch is not ':', since we tested that
908191aa565Sniklas case above. Therefore this is not a label, so it
909191aa565Sniklas must be the opcode, and we've just seen the
910191aa565Sniklas whitespace after it. */
911191aa565Sniklas state = 3;
912191aa565Sniklas }
9132159047fSniklas UNGET (ch);
9142159047fSniklas PUT (' '); /* Sp after label definition. */
9152159047fSniklas break;
9162159047fSniklas default:
9172159047fSniklas BAD_CASE (state);
9182159047fSniklas }
9192159047fSniklas break;
9202159047fSniklas
9212159047fSniklas case LEX_IS_TWOCHAR_COMMENT_1ST:
9222159047fSniklas ch2 = GET ();
923b305b0f1Sespie if (ch2 == '*')
9242159047fSniklas {
9252159047fSniklas for (;;)
9262159047fSniklas {
9272159047fSniklas do
9282159047fSniklas {
9292159047fSniklas ch2 = GET ();
9302159047fSniklas if (ch2 != EOF && IS_NEWLINE (ch2))
9312159047fSniklas add_newlines++;
9322159047fSniklas }
933b305b0f1Sespie while (ch2 != EOF && ch2 != '*');
9342159047fSniklas
935b305b0f1Sespie while (ch2 == '*')
9362159047fSniklas ch2 = GET ();
9372159047fSniklas
938b305b0f1Sespie if (ch2 == EOF || ch2 == '/')
9392159047fSniklas break;
940b305b0f1Sespie
941b305b0f1Sespie /* This UNGET will ensure that we count newlines
942b305b0f1Sespie correctly. */
943191aa565Sniklas UNGET (ch2);
9442159047fSniklas }
945b305b0f1Sespie
9462159047fSniklas if (ch2 == EOF)
947b305b0f1Sespie as_warn (_("end of file in multiline comment"));
9482159047fSniklas
9492159047fSniklas ch = ' ';
9502159047fSniklas goto recycle;
9512159047fSniklas }
952b55d4692Sfgsch #ifdef DOUBLESLASH_LINE_COMMENTS
953b55d4692Sfgsch else if (ch2 == '/')
954b55d4692Sfgsch {
955b55d4692Sfgsch do
956b55d4692Sfgsch {
957b55d4692Sfgsch ch = GET ();
958b55d4692Sfgsch }
959b55d4692Sfgsch while (ch != EOF && !IS_NEWLINE (ch));
960b55d4692Sfgsch if (ch == EOF)
961b55d4692Sfgsch as_warn ("end of file in comment; newline inserted");
962b55d4692Sfgsch state = 0;
963b55d4692Sfgsch PUT ('\n');
964b55d4692Sfgsch break;
965b55d4692Sfgsch }
966b55d4692Sfgsch #endif
9672159047fSniklas else
9682159047fSniklas {
9692159047fSniklas if (ch2 != EOF)
9702159047fSniklas UNGET (ch2);
9712159047fSniklas if (state == 9 || state == 10)
9722159047fSniklas state = 3;
9732159047fSniklas PUT (ch);
9742159047fSniklas }
9752159047fSniklas break;
9762159047fSniklas
9772159047fSniklas case LEX_IS_STRINGQUOTE:
9782159047fSniklas if (state == 10)
9792159047fSniklas {
980c074d1c9Sdrahn /* Preserve the whitespace in foo "bar". */
9812159047fSniklas UNGET (ch);
9822159047fSniklas state = 3;
9832159047fSniklas PUT (' ');
9842159047fSniklas
9852159047fSniklas /* PUT didn't jump out. We could just break, but we
9862159047fSniklas know what will happen, so optimize a bit. */
9872159047fSniklas ch = GET ();
9882159047fSniklas old_state = 3;
9892159047fSniklas }
9902159047fSniklas else if (state == 9)
9912159047fSniklas old_state = 3;
9922159047fSniklas else
9932159047fSniklas old_state = state;
9942159047fSniklas state = 5;
9952159047fSniklas PUT (ch);
9962159047fSniklas break;
9972159047fSniklas
9982159047fSniklas #ifndef IEEE_STYLE
9992159047fSniklas case LEX_IS_ONECHAR_QUOTE:
10002159047fSniklas if (state == 10)
10012159047fSniklas {
1002c074d1c9Sdrahn /* Preserve the whitespace in foo 'b'. */
10032159047fSniklas UNGET (ch);
10042159047fSniklas state = 3;
10052159047fSniklas PUT (' ');
10062159047fSniklas break;
10072159047fSniklas }
10082159047fSniklas ch = GET ();
10092159047fSniklas if (ch == EOF)
10102159047fSniklas {
1011b305b0f1Sespie as_warn (_("end of file after a one-character quote; \\0 inserted"));
10122159047fSniklas ch = 0;
10132159047fSniklas }
10142159047fSniklas if (ch == '\\')
10152159047fSniklas {
10162159047fSniklas ch = GET ();
10172159047fSniklas if (ch == EOF)
10182159047fSniklas {
1019b305b0f1Sespie as_warn (_("end of file in escape character"));
10202159047fSniklas ch = '\\';
10212159047fSniklas }
10222159047fSniklas else
10232159047fSniklas ch = process_escape (ch);
10242159047fSniklas }
10252159047fSniklas sprintf (out_buf, "%d", (int) (unsigned char) ch);
10262159047fSniklas
10272159047fSniklas /* None of these 'x constants for us. We want 'x'. */
10282159047fSniklas if ((ch = GET ()) != '\'')
10292159047fSniklas {
10302159047fSniklas #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1031c074d1c9Sdrahn as_warn (_("missing close quote; (assumed)"));
10322159047fSniklas #else
10332159047fSniklas if (ch != EOF)
10342159047fSniklas UNGET (ch);
10352159047fSniklas #endif
10362159047fSniklas }
10372159047fSniklas if (strlen (out_buf) == 1)
10382159047fSniklas {
10392159047fSniklas PUT (out_buf[0]);
10402159047fSniklas break;
10412159047fSniklas }
10422159047fSniklas if (state == 9)
10432159047fSniklas old_state = 3;
10442159047fSniklas else
10452159047fSniklas old_state = state;
10462159047fSniklas state = -1;
10472159047fSniklas out_string = out_buf;
10482159047fSniklas PUT (*out_string++);
10492159047fSniklas break;
10502159047fSniklas #endif
10512159047fSniklas
10522159047fSniklas case LEX_IS_COLON:
1053b305b0f1Sespie #ifdef KEEP_WHITE_AROUND_COLON
1054b305b0f1Sespie state = 9;
1055b305b0f1Sespie #else
10562159047fSniklas if (state == 9 || state == 10)
10572159047fSniklas state = 3;
10582159047fSniklas else if (state != 3)
10592159047fSniklas state = 1;
1060b305b0f1Sespie #endif
10612159047fSniklas PUT (ch);
10622159047fSniklas break;
10632159047fSniklas
10642159047fSniklas case LEX_IS_NEWLINE:
10652159047fSniklas /* Roll out a bunch of newlines from inside comments, etc. */
10662159047fSniklas if (add_newlines)
10672159047fSniklas {
10682159047fSniklas --add_newlines;
10692159047fSniklas UNGET (ch);
10702159047fSniklas }
1071b55d4692Sfgsch /* Fall through. */
10722159047fSniklas
10732159047fSniklas case LEX_IS_LINE_SEPARATOR:
10742159047fSniklas state = 0;
10752159047fSniklas PUT (ch);
10762159047fSniklas break;
10772159047fSniklas
1078c074d1c9Sdrahn case LEX_IS_PARALLEL_SEPARATOR:
1079c074d1c9Sdrahn state = 1;
1080c074d1c9Sdrahn PUT (ch);
1081c074d1c9Sdrahn break;
1082c074d1c9Sdrahn
1083b305b0f1Sespie #ifdef TC_V850
1084b305b0f1Sespie case LEX_IS_DOUBLEDASH_1ST:
1085b305b0f1Sespie ch2 = GET ();
1086b305b0f1Sespie if (ch2 != '-')
1087b305b0f1Sespie {
1088b305b0f1Sespie UNGET (ch2);
1089b305b0f1Sespie goto de_fault;
1090b305b0f1Sespie }
1091b55d4692Sfgsch /* Read and skip to end of line. */
1092b305b0f1Sespie do
1093b305b0f1Sespie {
1094b305b0f1Sespie ch = GET ();
1095b305b0f1Sespie }
1096b305b0f1Sespie while (ch != EOF && ch != '\n');
1097c074d1c9Sdrahn
1098b305b0f1Sespie if (ch == EOF)
1099b305b0f1Sespie as_warn (_("end of file in comment; newline inserted"));
1100c074d1c9Sdrahn
1101b305b0f1Sespie state = 0;
1102b305b0f1Sespie PUT ('\n');
1103b305b0f1Sespie break;
1104b305b0f1Sespie #endif
1105b305b0f1Sespie #ifdef DOUBLEBAR_PARALLEL
1106b305b0f1Sespie case LEX_IS_DOUBLEBAR_1ST:
1107b305b0f1Sespie ch2 = GET ();
1108b305b0f1Sespie UNGET (ch2);
1109c074d1c9Sdrahn if (ch2 != '|')
1110b305b0f1Sespie goto de_fault;
1111c074d1c9Sdrahn
1112c074d1c9Sdrahn /* Handle '||' in two states as invoking PUT twice might
1113c074d1c9Sdrahn result in the first one jumping out of this loop. We'd
1114c074d1c9Sdrahn then lose track of the state and one '|' char. */
1115c074d1c9Sdrahn state = 13;
1116b305b0f1Sespie PUT ('|');
1117b305b0f1Sespie break;
1118b305b0f1Sespie #endif
11192159047fSniklas case LEX_IS_LINE_COMMENT_START:
11200c6d0228Sniklas /* FIXME-someday: The two character comment stuff was badly
11210c6d0228Sniklas thought out. On i386, we want '/' as line comment start
11220c6d0228Sniklas AND we want C style comments. hence this hack. The
11230c6d0228Sniklas whole lexical process should be reworked. xoxorich. */
11242159047fSniklas if (ch == '/')
11252159047fSniklas {
11262159047fSniklas ch2 = GET ();
11272159047fSniklas if (ch2 == '*')
11282159047fSniklas {
1129b305b0f1Sespie old_state = 3;
11302159047fSniklas state = -2;
11312159047fSniklas break;
11322159047fSniklas }
11332159047fSniklas else
11342159047fSniklas {
11352159047fSniklas UNGET (ch2);
11362159047fSniklas }
1137c074d1c9Sdrahn }
11382159047fSniklas
11390c6d0228Sniklas if (state == 0 || state == 1) /* Only comment at start of line. */
11400c6d0228Sniklas {
11410c6d0228Sniklas int startch;
11420c6d0228Sniklas
11430c6d0228Sniklas startch = ch;
11442159047fSniklas
11452159047fSniklas do
11462159047fSniklas {
11472159047fSniklas ch = GET ();
11482159047fSniklas }
11492159047fSniklas while (ch != EOF && IS_WHITESPACE (ch));
1150c074d1c9Sdrahn
11512159047fSniklas if (ch == EOF)
11522159047fSniklas {
1153b305b0f1Sespie as_warn (_("end of file in comment; newline inserted"));
11542159047fSniklas PUT ('\n');
11552159047fSniklas break;
11562159047fSniklas }
1157c074d1c9Sdrahn
11580c6d0228Sniklas if (ch < '0' || ch > '9' || state != 0 || startch != '#')
11592159047fSniklas {
11600c6d0228Sniklas /* Not a cpp line. */
11612159047fSniklas while (ch != EOF && !IS_NEWLINE (ch))
11622159047fSniklas ch = GET ();
11632159047fSniklas if (ch == EOF)
1164c074d1c9Sdrahn as_warn (_("end of file in comment; newline inserted"));
11652159047fSniklas state = 0;
11662159047fSniklas PUT ('\n');
11672159047fSniklas break;
11682159047fSniklas }
1169b55d4692Sfgsch /* Looks like `# 123 "filename"' from cpp. */
11702159047fSniklas UNGET (ch);
11712159047fSniklas old_state = 4;
11722159047fSniklas state = -1;
1173191aa565Sniklas if (scrub_m68k_mri)
1174191aa565Sniklas out_string = "\tappline ";
1175191aa565Sniklas else
11762159047fSniklas out_string = "\t.appline ";
11772159047fSniklas PUT (*out_string++);
11782159047fSniklas break;
11792159047fSniklas }
11802159047fSniklas
1181b305b0f1Sespie #ifdef TC_D10V
1182b305b0f1Sespie /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1183b305b0f1Sespie Trap is the only short insn that has a first operand that is
1184b305b0f1Sespie neither register nor label.
1185b305b0f1Sespie We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1186b55d4692Sfgsch We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1187b55d4692Sfgsch already LEX_IS_LINE_COMMENT_START. However, it is the
1188b55d4692Sfgsch only character in line_comment_chars for d10v, hence we
1189b55d4692Sfgsch can recognize it as such. */
1190b305b0f1Sespie /* An alternative approach would be to reset the state to 1 when
1191b305b0f1Sespie we see '||', '<'- or '->', but that seems to be overkill. */
1192b55d4692Sfgsch if (state == 10)
1193b55d4692Sfgsch PUT (' ');
1194b305b0f1Sespie #endif
11952159047fSniklas /* We have a line comment character which is not at the
11962159047fSniklas start of a line. If this is also a normal comment
11972159047fSniklas character, fall through. Otherwise treat it as a default
11982159047fSniklas character. */
1199191aa565Sniklas if (strchr (tc_comment_chars, ch) == NULL
1200191aa565Sniklas && (! scrub_m68k_mri
12012159047fSniklas || (ch != '!' && ch != '*')))
12022159047fSniklas goto de_fault;
1203191aa565Sniklas if (scrub_m68k_mri
1204191aa565Sniklas && (ch == '!' || ch == '*' || ch == '#')
12052159047fSniklas && state != 1
12062159047fSniklas && state != 10)
12072159047fSniklas goto de_fault;
12082159047fSniklas /* Fall through. */
12092159047fSniklas case LEX_IS_COMMENT_START:
1210b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
1211b305b0f1Sespie /* On the ARM, `@' is the comment character.
1212b305b0f1Sespie Unfortunately this is also a special character in ELF .symver
1213b55d4692Sfgsch directives (and .type, though we deal with those another way).
1214b55d4692Sfgsch So we check if this line is such a directive, and treat
1215b55d4692Sfgsch the character as default if so. This is a hack. */
1216b305b0f1Sespie if ((symver_state != NULL) && (*symver_state == 0))
1217b305b0f1Sespie goto de_fault;
1218b305b0f1Sespie #endif
1219b55d4692Sfgsch #ifdef WARN_COMMENTS
1220b55d4692Sfgsch if (!found_comment)
1221b55d4692Sfgsch as_where (&found_comment_file, &found_comment);
1222b55d4692Sfgsch #endif
12232159047fSniklas do
12242159047fSniklas {
12252159047fSniklas ch = GET ();
12262159047fSniklas }
12272159047fSniklas while (ch != EOF && !IS_NEWLINE (ch));
12282159047fSniklas if (ch == EOF)
1229b305b0f1Sespie as_warn (_("end of file in comment; newline inserted"));
12302159047fSniklas state = 0;
12312159047fSniklas PUT ('\n');
12322159047fSniklas break;
12332159047fSniklas
12342159047fSniklas case LEX_IS_SYMBOL_COMPONENT:
12352159047fSniklas if (state == 10)
12362159047fSniklas {
12372159047fSniklas /* This is a symbol character following another symbol
12382159047fSniklas character, with whitespace in between. We skipped
12392159047fSniklas the whitespace earlier, so output it now. */
12402159047fSniklas UNGET (ch);
12412159047fSniklas state = 3;
12422159047fSniklas PUT (' ');
12432159047fSniklas break;
12442159047fSniklas }
12452159047fSniklas
12462159047fSniklas if (state == 3)
12472159047fSniklas state = 9;
12482159047fSniklas
12492159047fSniklas /* This is a common case. Quickly copy CH and all the
12502159047fSniklas following symbol component or normal characters. */
1251b305b0f1Sespie if (to + 1 < toend
1252b305b0f1Sespie && mri_state == NULL
1253b305b0f1Sespie #if defined TC_ARM && defined OBJ_ELF
1254b305b0f1Sespie && symver_state == NULL
1255b305b0f1Sespie #endif
1256b305b0f1Sespie )
12572159047fSniklas {
12582159047fSniklas char *s;
12592159047fSniklas int len;
12602159047fSniklas
12612159047fSniklas for (s = from; s < fromend; s++)
12622159047fSniklas {
12632159047fSniklas int type;
12642159047fSniklas
1265b305b0f1Sespie ch2 = *(unsigned char *) s;
12662159047fSniklas type = lex[ch2];
12672159047fSniklas if (type != 0
12682159047fSniklas && type != LEX_IS_SYMBOL_COMPONENT)
12692159047fSniklas break;
12702159047fSniklas }
1271c074d1c9Sdrahn
12722159047fSniklas if (s > from)
12732159047fSniklas /* Handle the last character normally, for
12742159047fSniklas simplicity. */
12752159047fSniklas --s;
1276c074d1c9Sdrahn
12772159047fSniklas len = s - from;
1278c074d1c9Sdrahn
12792159047fSniklas if (len > (toend - to) - 1)
12802159047fSniklas len = (toend - to) - 1;
1281c074d1c9Sdrahn
12822159047fSniklas if (len > 0)
12832159047fSniklas {
12842159047fSniklas PUT (ch);
12852159047fSniklas if (len > 8)
12862159047fSniklas {
12872159047fSniklas memcpy (to, from, len);
12882159047fSniklas to += len;
12892159047fSniklas from += len;
12902159047fSniklas }
12912159047fSniklas else
12922159047fSniklas {
12932159047fSniklas switch (len)
12942159047fSniklas {
12952159047fSniklas case 8: *to++ = *from++;
12962159047fSniklas case 7: *to++ = *from++;
12972159047fSniklas case 6: *to++ = *from++;
12982159047fSniklas case 5: *to++ = *from++;
12992159047fSniklas case 4: *to++ = *from++;
13002159047fSniklas case 3: *to++ = *from++;
13012159047fSniklas case 2: *to++ = *from++;
13022159047fSniklas case 1: *to++ = *from++;
13032159047fSniklas }
13042159047fSniklas }
13052159047fSniklas ch = GET ();
13062159047fSniklas }
13072159047fSniklas }
13082159047fSniklas
13092159047fSniklas /* Fall through. */
13102159047fSniklas default:
13112159047fSniklas de_fault:
13122159047fSniklas /* Some relatively `normal' character. */
13132159047fSniklas if (state == 0)
13142159047fSniklas {
1315c074d1c9Sdrahn state = 11; /* Now seeing label definition. */
13162159047fSniklas }
13172159047fSniklas else if (state == 1)
13182159047fSniklas {
1319c074d1c9Sdrahn state = 2; /* Ditto. */
13202159047fSniklas }
13212159047fSniklas else if (state == 9)
13222159047fSniklas {
1323*007c2a45Smiod if (!IS_SYMBOL_COMPONENT (ch))
13242159047fSniklas state = 3;
13252159047fSniklas }
13262159047fSniklas else if (state == 10)
13272159047fSniklas {
1328b305b0f1Sespie if (ch == '\\')
1329b305b0f1Sespie {
1330b305b0f1Sespie /* Special handling for backslash: a backslash may
1331b305b0f1Sespie be the beginning of a formal parameter (of a
1332b305b0f1Sespie macro) following another symbol character, with
1333b305b0f1Sespie whitespace in between. If that is the case, we
1334b305b0f1Sespie output a space before the parameter. Strictly
1335b305b0f1Sespie speaking, correct handling depends upon what the
1336b305b0f1Sespie macro parameter expands into; if the parameter
1337b305b0f1Sespie expands into something which does not start with
1338b305b0f1Sespie an operand character, then we don't want to keep
1339b305b0f1Sespie the space. We don't have enough information to
1340b305b0f1Sespie make the right choice, so here we are making the
1341b305b0f1Sespie choice which is more likely to be correct. */
1342b305b0f1Sespie PUT (' ');
1343b305b0f1Sespie }
1344b305b0f1Sespie
13452159047fSniklas state = 3;
13462159047fSniklas }
13472159047fSniklas PUT (ch);
13482159047fSniklas break;
13492159047fSniklas }
13502159047fSniklas }
13512159047fSniklas
13522159047fSniklas /*NOTREACHED*/
13532159047fSniklas
13542159047fSniklas fromeof:
13552159047fSniklas /* We have reached the end of the input. */
13562159047fSniklas return to - tostart;
13572159047fSniklas
13582159047fSniklas tofull:
13592159047fSniklas /* The output buffer is full. Save any input we have not yet
13602159047fSniklas processed. */
13612159047fSniklas if (fromend > from)
13622159047fSniklas {
1363b305b0f1Sespie saved_input = from;
13642159047fSniklas saved_input_len = fromend - from;
13652159047fSniklas }
13662159047fSniklas else
13672159047fSniklas saved_input = NULL;
1368b305b0f1Sespie
13692159047fSniklas return to - tostart;
13702159047fSniklas }
13712159047fSniklas
1372