xref: /csrg-svn/old/as.vax/asmain.c (revision 15560)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.14 11/21/83";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 
17 #define	unix_lang_name "VAX/UNIX Assembler V11/21/83 4.14"
18 /*
19  *	variables to manage reading the assembly source files
20  */
21 char	*dotsname;	/*the current file name; managed by the parser*/
22 int	lineno;		/*current line number; managed by the parser*/
23 char	**innames;	/*names of the files being assembled*/
24 int	ninfiles;	/*how many interesting files there are*/
25 /*
26  *	Flags settable from the argv process argument list
27  */
28 int	silent = 0;	/*don't complain about any errors*/
29 int	savelabels = 0;	/*write the labels to the a.out file*/
30 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
31 int 	maxalign = 2;	/*default .align maximum*/
32 int	anyerrs = 0;	/*no errors yet*/
33 int	anywarnings=0;	/*no warnings yet*/
34 int	orgwarn = 0;	/*Bad origins*/
35 int	passno = 1;	/* current pass*/
36 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
37 int	readonlydata = 0;	/* initialzed data -> text space */
38 
39 int	nGHnumbers = 0;		/* GH numbers used */
40 int	nGHopcodes = 0;		/* GH opcodes used */
41 int	nnewopcodes = 0;	/* new opcodes used */
42 
43 #ifdef DEBUG
44 int 	debug = 0;
45 int	toktrace = 0;
46 #endif
47 
48 int	useVM =	0;
49 
50 char	*endcore;	/*where to get more symbol space*/
51 
52 /*
53  *	Managers of the a.out file.
54  */
55 struct	exec	hdr;
56 #define	MAGIC	0407
57 u_long	tsize;		/* total text size */
58 u_long	dsize;		/* total data size */
59 u_long	datbase;	/* base of the data segment */
60 u_long	trsize;		/* total text relocation size */
61 u_long	drsize;		/* total data relocation size */
62 
63 /*
64  *	Information about the current segment is accumulated in
65  *	usedot; the most important information stored is the
66  *	accumulated size of each of the text and data segments
67  *
68  *	dotp points to the correct usedot expression for the current segment
69  */
70 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
71 struct	exp	*dotp;			/* data/text location pointer */
72 /*
73  *	The inter pass temporary token file is opened and closed by stdio, but
74  *	is written to using direct read/write, as the temporary file
75  *	is composed of buffers exactly BUFSIZ long.
76  */
77 FILE	*tokfile;			/* interpass communication file */
78 char	tokfilename[TNAMESIZE];
79 /*
80  *	The string file is the string table
81  *	cat'ed to the end of the built up a.out file
82  */
83 FILE	*strfile;			/* interpass string file */
84 char	strfilename[TNAMESIZE];
85 int	strfilepos = 0;			/* position within the string file */
86 /*
87  *	a.out is created during the second pass.
88  *	It is opened by stdio, but is filled with the parallel
89  *	block I/O library
90  */
91 char	*outfile = "a.out";
92 FILE	*a_out_file;
93 off_t	a_out_off;			/* cumulative offsets for segments */
94 /*
95  *	The logical files containing the assembled data for each of
96  *	the text and data segments are
97  *	managed by the parallel block I/O library.
98  *	a.out is logically opened in many places at once to
99  *	receive the assembled data from the various segments as
100  *	it all trickles in, but is physically opened only once
101  *	to minimize file overhead.
102  */
103 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
104 BFILE	*txtfil;			/* current text/data file */
105 /*
106  *	Relocation information is accumulated seperately for each
107  *	segment.  This is required by the old loader (from BTL),
108  *	but not by the new loader (Bill Joy).
109  *
110  *	However, the size of the relocation information can not be computed
111  *	during or after the 1st pass because the ''absoluteness' of values
112  *	is unknown until all locally declared symbols have been seen.
113  *	Thus, the size of the relocation information is only
114  *	known after the second pass is finished.
115  *	This obviates the use of the block I/O
116  *	library, which requires knowing the exact offsets in a.out.
117  *
118  *	So, we save the relocation information internally (we don't
119  *	go to internal files to minimize overhead).
120  *
121  *	Empirically, we studied 259 files composing the system,
122  *	two compilers and a compiler generator: (all of which have
123  *	fairly large source files)
124  *
125  *	Number of files = 259
126  *		Number of non zero text reloc files: 233
127  *		Number of non zero data reloc files: 53
128  *	Average text relocation = 889
129  *	Average data relocation = 346
130  *	Number of files > BUFSIZ text relocation = 71
131  *	Number of files > BUFSIZ data relocation = 6
132  *
133  *	For compiled C code, there is usually one text segment and two
134  *	data segments; we see that allocating our own buffers and
135  *	doing our internal handling of relocation information will,
136  *	on the average, not use more memory than taken up by the buffers
137  *	allocated for doing file I/O in parallel to a number of file.
138  *
139  *	If we are assembling with the -V option, we
140  *	use the left over token buffers from the 2nd pass,
141  *	otherwise, we create our own.
142  *
143  *	When the 2nd pass is complete, closeoutrel flushes the token
144  *	buffers out to a BFILE.
145  *
146  *	The internals to relbufdesc are known only in assyms.c
147  *
148  *	outrel constructs the relocation information.
149  *	closeoutrel flushes the relocation information to relfil.
150  */
151 struct	relbufdesc	*rusefile[NLOC+NLOC];
152 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
153 BFILE	*relocfile;			/* concatnated relocation info */
154 /*
155  *	Once the relocation information has been written,
156  *	we can write out the symbol table using the Block I/O
157  *	mechanisms, as we once again know the offsets into
158  *	the a.out file.
159  *
160  *	We use relfil to output the symbol table information.
161  */
162 char	*tmpdirprefix = "/tmp/";
163 int delexit();
164 
165 main(argc, argv)
166 	int	argc;
167 	char 	**argv;
168 {
169 	char	*sbrk();
170 
171 	tokfilename[0] = 0;
172 	strfilename[0] = 0;
173 	endcore = sbrk(0);
174 
175 	argprocess(argc, argv);		/* process argument lists */
176 	if (anyerrs) exit(1);
177 
178 	initialize();
179 	zeroorigins();			/* set origins to zero */
180 	zerolocals();			/* fix local label counters */
181 
182 	i_pass1();			/* open temp files, etc */
183 	pass1();			/* first pass through .s files */
184 	testlocals();			/* check for undefined locals */
185 	if (anyerrs) delexit();
186 
187 	pass1_5();			/* resolve jxxx */
188 	if (anyerrs) delexit();
189 
190 	open_a_out();			/* open a.out */
191 	roundsegments();		/* round segments to FW */
192 	build_hdr();			/* build initial header, and output */
193 
194 	i_pass2();			/* reopen temporary file, etc */
195 	pass2();			/* second pass through the virtual .s */
196 	if (anyerrs) delexit();
197 
198 	fillsegments();			/* fill segments with 0 to FW */
199 	reloc_syms();			/* dump relocation and symbol table */
200 
201 	delete();			/* remove tmp file */
202 	bflush();			/* close off block I/O view of a.out */
203 	fix_a_out();			/* add in text and data reloc counts */
204 
205 	if (anyerrs == 0 && orgwarn)
206 		yyerror("Caution: absolute origins.\n");
207 
208 	if (nGHnumbers)
209 		yywarning("Caution: G or H format floating point numbers");
210 	if (nGHopcodes)
211 		yywarning("Caution: G or H format floating point operators");
212 	if (nnewopcodes)
213 		yywarning("Caution: New Opcodes");
214 	if (nGHnumbers || nGHopcodes || nnewopcodes)
215 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
216 
217 	exit(anyerrs != 0);
218 }
219 
220 argprocess(argc, argv)
221 	int	argc;
222 	char	*argv[];
223 {
224 	register	char	*cp;
225 
226 	ninfiles = 0;
227 	silent = 0;
228 #ifdef DEBUG
229 	debug = 0;
230 #endif
231 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
232 	dotsname = "<argv error>";
233 	while (argc > 1) {
234 		if (argv[1][0] != '-')
235 			innames[ninfiles++] = argv[1];
236 		else {
237 			cp = argv[1] + 1;
238 			/*
239 			 *	We can throw away single minus signs, so
240 			 *	that make scripts for the PDP 11 assembler work
241 			 *	on this assembler too
242 			 */
243 			while (*cp){
244 				switch(*cp++){
245 				 default:
246 					yyerror("Unknown flag: %c", *--cp);
247 					cp++;
248 					break;
249 				 case 'v':
250 					selfwhat(stdout);
251 					exit(1);
252 				 case 'd':
253 					d124 = *cp++ - '0';
254 					if ( (d124 != 1) && (d124 != 2) &&
255 					     (d124 != 4)){
256 						yyerror("-d[124] only");
257 						exit(1);
258 					}
259 					break;
260 				 case 'a':
261 					maxalign = atoi(cp+1);
262 					for (cp++; isdigit(*cp); cp++)
263 						/*VOID*/;
264 					if ( (maxalign > 16) || (maxalign < 0)){
265 						yyerror("-a: 0<=align<=16");
266 						exit(1);
267 					}
268 					break;
269 				 case 'o':
270 					if (argc < 3){
271 						yyerror("-o what???");
272 						exit(1);
273 					}
274 					outfile = argv[2];
275 				   bumpone:
276 					argc -= 2;
277 					argv += 2;
278 					goto nextarg;
279 
280 				 case 't':
281 					if (argc < 3){
282 						yyerror("-t what???");
283 						exit(1);
284 					}
285 					tmpdirprefix = argv[2];
286 					goto bumpone;
287 
288 				 case 'V':
289 					useVM = 1;
290 					break;
291 				 case 'W':
292 					silent = 1;
293 					break;
294 				 case 'L':
295 					savelabels = 1;
296 					break;
297 				 case 'J':
298 					jxxxJUMP = 1;
299 					break;
300 #ifdef DEBUG
301 				 case 'D':
302 					debug = 1;
303 					break;
304 				 case 'T':
305 					toktrace = 1;
306 					break;
307 #endif
308 				 case 'R':
309 					readonlydata = 1;
310 					break;
311 				}	/*end of the switch*/
312 			}	/*end of pulling out all arguments*/
313 		}	/*end of a flag argument*/
314 		--argc; ++argv;
315 	   nextarg:;
316 	}
317 	/* innames[ninfiles] = 0; */
318 }
319 /*
320  *	poke through the data space and find all sccs identifiers.
321  *	We assume:
322  *	a) that extern char **environ; is the first thing in the bss
323  *	segment (true, if one is using the new version of cmgt.crt0.c)
324  *	b) that the sccsid's have not been put into text space.
325  */
326 selfwhat(place)
327 	FILE	*place;
328 {
329 	extern	char **environ;
330 	register	char	*ub;
331 	register	char *cp;
332 	register	char	*pat;
333 	char	*sbrk();
334 
335 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
336 		if (cp[0] != '@') continue;
337 		if (cp[1] != '(') continue;
338 		if (cp[2] != '#') continue;
339 		if (cp[3] != ')') continue;
340 		fputc('\t', place);
341 		for (cp += 4; cp < ub; cp++){
342 			if (*cp == 0) break;
343 			if (*cp == '>') break;
344 			if (*cp == '\n') break;
345 			fputc(*cp, place);
346 		}
347 		fputc('\n', place);
348 	}
349 }
350 
351 initialize()
352 {
353 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
354 		signal(SIGINT, delexit);
355 	/*
356 	 *	Install symbols in the table
357 	 */
358 	symtabinit();
359 	syminstall();
360 	/*
361 	 *	Build the expression parser accelerator token sets
362 	 */
363 	buildtokensets();
364 }
365 
366 zeroorigins()
367 {
368 	register	int	locindex;
369 	/*
370 	 *	Mark usedot: the first NLOC slots are for named text segments,
371 	 *	the next for named data segments.
372 	 */
373 	for (locindex = 0; locindex < NLOC; locindex++){
374 		usedot[locindex].e_xtype = XTEXT;
375 		usedot[NLOC + locindex].e_xtype = XDATA;
376 		usedot[locindex].e_xvalue = 0;
377 		usedot[NLOC + locindex].e_xvalue = 0;
378 	}
379 }
380 
381 zerolocals()
382 {
383 	register	int	i;
384 
385 	for (i = 0; i <= 9; i++) {
386 		lgensym[i] = 1;
387 		genref[i] = 0;
388 	}
389 }
390 
391 i_pass1()
392 {
393 	FILE	*tempopen();
394 	if (useVM == 0)
395 		tokfile = tempopen(tokfilename, "T");
396 	strfile = tempopen(strfilename, "S");
397 	/*
398 	 *	write out the string length.
399 	 *	This will be overwritten when the
400 	 *	strings are tacked onto the growing a.out file
401 	 */
402 	strfilepos = sizeof(int);
403 	fwrite(&strfilepos, sizeof(int), 1, strfile);
404 
405 	inittokfile();
406 	initijxxx();
407 }
408 
409 FILE *tempopen(tname, part)
410 	char	*tname;
411 	char	*part;
412 {
413 	FILE	*file;
414 	sprintf(tname, "%s%sas%s%05d",
415 		tmpdirprefix,
416 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : 0,
417 		part,
418 		getpid());
419 	file = fopen(tname, "w");
420 	if (file == NULL) {
421 		yyerror("Bad pass 1 temporary file for writing %s", tname);
422 		delexit();
423 	}
424 	return(file);
425 }
426 
427 pass1()
428 {
429 	register	int	i;
430 
431 	passno = 1;
432 	dotp = &usedot[0];
433 	txtfil = (BFILE *)0;
434 	relfil = (struct relbufdesc *)0;
435 
436 	if (ninfiles == 0){		/*take the input from stdin directly*/
437 		lineno = 1;
438 		dotsname = "<stdin>";
439 
440 		yyparse();
441 	} else {		/*we have the names tanked*/
442 		for (i = 0; i < ninfiles; i++){
443 			new_dot_s(innames[i]);
444 			if (freopen(innames[i], "r", stdin) == NULL) {
445 				yyerror( "Can't open source file %s\n",
446 					innames[i]);
447 				exit(2);
448 			}
449 			/* stdio is NOT used to read the input characters */
450 			/* we use read directly, into our own buffers */
451 			yyparse();
452 		}
453 	}
454 
455 	closetokfile();		/*kick out the last buffered intermediate text*/
456 }
457 
458 testlocals()
459 {
460 	register	int	i;
461 	for (i = 0; i <= 9; i++) {
462 		if (genref[i])
463 			yyerror("Reference to undefined local label %df", i);
464 		lgensym[i] = 1;
465 		genref[i] = 0;
466 	}
467 }
468 
469 pass1_5()
470 {
471 	sortsymtab();
472 #ifdef DEBUG
473 	if (debug) dumpsymtab();
474 #endif
475 	jxxxfix();
476 #ifdef DEBUG
477 	if (debug) dumpsymtab();
478 #endif
479 }
480 
481 open_a_out()
482 {
483 	/*
484 	 *	Open up the a.out file now, and get set to build
485 	 *	up offsets into it for all of the various text,data
486 	 *	text relocation and data relocation segments.
487 	 */
488 	a_out_file = fopen(outfile, "w");
489 	if (a_out_file == NULL) {
490 		yyerror("Cannot create %s", outfile);
491 		delexit();
492 	}
493 	biofd = a_out_file->_file;
494 	a_out_off = 0;
495 }
496 
497 roundsegments()
498 {
499 	register	int	locindex;
500 	register	long	v;
501 	/*
502 	 *	round and assign text segment origins
503 	 *	the exec header always goes in usefile[0]
504 	 */
505 	tsize = 0;
506 	for (locindex=0; locindex<NLOC; locindex++) {
507 		v = round(usedot[locindex].e_xvalue, FW);
508 		usedot[locindex].e_xvalue = tsize;
509 		if ((locindex == 0) || (v != 0) ){
510 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
511 			bopen(usefile[locindex], a_out_off);
512 			if (locindex == 0)
513 				a_out_off = sizeof (struct exec);
514 		} else {
515 			usefile[locindex] = (BFILE *)-1;
516 		}
517 		tsize += v;
518 		a_out_off += v;
519 	}
520 	/*
521 	 *		Round and assign data segment origins.
522 	 */
523 	datbase = round(tsize, FW);
524 	for (locindex=0; locindex<NLOC; locindex++) {
525 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
526 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
527 		if (v != 0){
528 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
529 			bopen(usefile[NLOC + locindex], a_out_off);
530 		} else {
531 			usefile[NLOC + locindex] = (BFILE *)-1;
532 		}
533 		dsize += v;
534 		a_out_off += v;
535 	}
536 	/*
537 	 *	Assign final values to symbols
538 	 */
539 	hdr.a_bss = dsize;
540 	freezesymtab();		/* this touches hdr.a_bss */
541 	stabfix();
542 	/*
543 	 *	Set up the relocation information "files" to
544 	 *	be zero; outrel takes care of the rest
545 	 */
546 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
547 		rusefile[locindex] = (struct relbufdesc *)0;
548 	}
549 }
550 
551 build_hdr()
552 {
553 	/*
554 	 *	Except for the text and data relocation sizes,
555 	 *	calculate the final values for the header
556 	 *
557 	 *	Write out the initial copy; we to come
558 	 *	back later and patch up a_trsize and a_drsize,
559 	 *	and overwrite this first version of the header.
560 	 */
561 	hdr.a_magic = MAGIC;
562 	hdr.a_text = tsize;
563 	hdr.a_data = dsize;
564 	hdr.a_bss -= dsize;
565 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
566 	hdr.a_entry = 0;
567 	hdr.a_trsize = 0;
568 	hdr.a_drsize = 0;
569 
570 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
571 }
572 
573 i_pass2()
574 {
575 	if (useVM == 0) {
576 		fclose(tokfile);
577 		tokfile = fopen(tokfilename, "r");
578 		if (tokfile==NULL) {
579 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
580 		   delexit();
581 		}
582 	}
583 	fclose(strfile);
584 	strfile = fopen(strfilename, "r");
585 }
586 
587 pass2()
588 {
589 #ifdef DEBUG
590 	if (debug)
591 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
592 #endif DEBUG
593 	passno = 2;
594 	lineno = 1;
595 	dotp = &usedot[0];
596 	txtfil = usefile[0];	/* already opened (always!) */
597 	relfil = 0;		/* outrel takes care of the rest */
598 	initoutrel();
599 
600 	inittokfile();
601 
602 	yyparse();
603 
604 	closetokfile();
605 }
606 
607 fillsegments()
608 {
609 	int	locindex;
610 	/*
611 	 *	Round text and data segments to FW by appending zeros
612 	 */
613 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
614 		if (usefile[locindex]) {
615 			txtfil = usefile[locindex];
616 			dotp = &usedot[locindex];
617 			while (usedot[locindex].e_xvalue & FW)
618 				outb(0);
619 		}
620 	}
621 }
622 
623 reloc_syms()
624 {
625 	u_long	closerelfil();
626 	/*
627 	 *	Move the relocation information to a.out
628 	 *	a_out_off is the offset so far:
629 	 *	exec + text segments + data segments
630 	 */
631 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
632 	bopen(relocfile, a_out_off);
633 	a_out_off += closeoutrel(relocfile);
634 
635 	hdr.a_trsize = trsize;
636 	hdr.a_drsize = drsize;
637 	if (readonlydata) {
638 		hdr.a_text += hdr.a_data;
639 		hdr.a_data = 0;
640 		hdr.a_trsize += hdr.a_drsize;
641 		hdr.a_drsize = 0;
642 	}
643 	/*
644 	 *	Output the symbol table and the string pool
645 	 *
646 	 *	We must first rewind the string pool file to its beginning,
647 	 *	in case it was seek'ed into for fetching ascii and asciz
648 	 *	strings.
649 	 */
650 	fseek(strfile, 0, 0);
651 	symwrite(relocfile);
652 }
653 
654 fix_a_out()
655 {
656 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
657 		yyerror("Reposition for header rewrite fails");
658 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
659 		yyerror("Rewrite of header fails");
660 }
661 
662 delexit()
663 {
664 	delete();
665 	if (passno == 2){
666 		unlink(outfile);
667 	}
668 	exit(1);
669 }
670 
671 delete()
672 {
673 	if (useVM == 0 || tokfilename[0])
674 		unlink(tokfilename);
675 	if (strfilename[0])
676 		unlink(strfilename);
677 }
678 
679 sawabort()
680 {
681 	char	*fillinbuffer();
682 	while (fillinbuffer() != (char *)0)
683 		continue;
684 	delete();
685 	exit(1);	/*although the previous pass will also exit non zero*/
686 }
687 
688 panic(fmt, a1, a2, a3, a4)
689 	char	*fmt;
690 	/*VARARGS 1*/
691 {
692 	yyerror("Assembler panic: bad internal data structure.");
693 	yyerror(fmt, a1, a2, a3, a4);
694 	delete();
695 	abort();
696 }
697