xref: /csrg-svn/old/as.tahoe/asmain.c (revision 32434)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.13 6/30/83";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 
17 #define	unix_lang_name "VAX/UNIX Assembler V6/30/83 4.13"
18 /*
19  *	variables to manage reading the assembly source files
20  */
21 char	*dotsname;	/*the current file name; managed by the parser*/
22 int	lineno;		/*current line number; managed by the parser*/
23 char	**innames;	/*names of the files being assembled*/
24 int	ninfiles;	/*how many interesting files there are*/
25 FILE	*source;	/*current source file (for listing) */
26 char	layout[400];	/*layout bytes */
27 char	*layoutpos = layout;	/*layout position in listfile */
28 int	ind = 0;	/*innames in-index: 0..minfiles */
29 int	endofsource = 0;
30 long	sourcepos;
31 /*
32  *	Flags settable from the argv process argument list
33  */
34 int	silent = 0;	/*don't complain about any errors*/
35 int	savelabels = 0;	/*write the labels to the a.out file*/
36 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
37 int	anyerrs = 0;	/*no errors yet*/
38 int	anywarnings=0;	/*no warnings yet*/
39 int	orgwarn = 0;	/*Bad origins*/
40 int	passno = 1;	/* current pass*/
41 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
42 int	readonlydata = 0;	/* initialzed data -> text space */
43 int	liston = 0;	/* don't produce listing */
44 
45 
46 #ifdef DEBUG
47 int 	debug = 0;
48 int	toktrace = 0;
49 #endif
50 
51 int	useVM =	0;
52 
53 char	*endcore;	/*where to get more symbol space*/
54 
55 /*
56  *	Managers of the a.out file.
57  */
58 struct	exec	hdr;
59 #define	MAGIC	0407
60 u_long	tsize;		/* total text size */
61 u_long	dsize;		/* total data size */
62 u_long	datbase;	/* base of the data segment */
63 u_long	trsize;		/* total text relocation size */
64 u_long	drsize;		/* total data relocation size */
65 
66 /*
67  *	Information about the current segment is accumulated in
68  *	usedot; the most important information stored is the
69  *	accumulated size of each of the text and data segments
70  *
71  *	dotp points to the correct usedot expression for the current segment
72  */
73 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
74 struct	exp	*dotp;			/* data/text location pointer */
75 /*
76  *	The inter pass temporary token file is opened and closed by stdio, but
77  *	is written to using direct read/write, as the temporary file
78  *	is composed of buffers exactly BUFSIZ long.
79  */
80 FILE	*tokfile;			/* interpass communication file */
81 char	tokfilename[TNAMESIZE];
82 /*
83  *	The string file is the string table
84  *	cat'ed to the end of the built up a.out file
85  */
86 FILE	*strfile;			/* interpass string file */
87 char	strfilename[TNAMESIZE];
88 int	strfilepos = 0;			/* position within the string file */
89 /*
90  *	a.out is created during the second pass.
91  *	It is opened by stdio, but is filled with the parallel
92  *	block I/O library
93  */
94 char	*outfile = "a.out";
95 FILE	*a_out_file;
96 off_t	a_out_off;			/* cumulative offsets for segments */
97 /*
98  *	The logical files containing the assembled data for each of
99  *	the text and data segments are
100  *	managed by the parallel block I/O library.
101  *	a.out is logically opened in many places at once to
102  *	receive the assembled data from the various segments as
103  *	it all trickles in, but is physically opened only once
104  *	to minimize file overhead.
105  */
106 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
107 BFILE	*txtfil;			/* current text/data file */
108 /*
109  *	Relocation information is accumulated seperately for each
110  *	segment.  This is required by the old loader (from BTL),
111  *	but not by the new loader (Bill Joy).
112  *
113  *	However, the size of the relocation information can not be computed
114  *	during or after the 1st pass because the ''absoluteness' of values
115  *	is unknown until all locally declared symbols have been seen.
116  *	Thus, the size of the relocation information is only
117  *	known after the second pass is finished.
118  *	This obviates the use of the block I/O
119  *	library, which requires knowing the exact offsets in a.out.
120  *
121  *	So, we save the relocation information internally (we don't
122  *	go to internal files to minimize overhead).
123  *
124  *	Empirically, we studied 259 files composing the system,
125  *	two compilers and a compiler generator: (all of which have
126  *	fairly large source files)
127  *
128  *	Number of files = 259
129  *		Number of non zero text reloc files: 233
130  *		Number of non zero data reloc files: 53
131  *	Average text relocation = 889
132  *	Average data relocation = 346
133  *	Number of files > BUFSIZ text relocation = 71
134  *	Number of files > BUFSIZ data relocation = 6
135  *
136  *	For compiled C code, there is usually one text segment and two
137  *	data segments; we see that allocating our own buffers and
138  *	doing our internal handling of relocation information will,
139  *	on the average, not use more memory than taken up by the buffers
140  *	allocated for doing file I/O in parallel to a number of file.
141  *
142  *	If we are assembling with the -V option, we
143  *	use the left over token buffers from the 2nd pass,
144  *	otherwise, we create our own.
145  *
146  *	When the 2nd pass is complete, closeoutrel flushes the token
147  *	buffers out to a BFILE.
148  *
149  *	The internals to relbufdesc are known only in assyms.c
150  *
151  *	outrel constructs the relocation information.
152  *	closeoutrel flushes the relocation information to relfil.
153  */
154 struct	relbufdesc	*rusefile[NLOC+NLOC];
155 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
156 BFILE	*relocfile;			/* concatnated relocation info */
157 /*
158  *	Once the relocation information has been written,
159  *	we can write out the symbol table using the Block I/O
160  *	mechanisms, as we once again know the offsets into
161  *	the a.out file.
162  *
163  *	We use relfil to output the symbol table information.
164  */
165 char	*tmpdirprefix = "/tmp/";
166 int delexit();
167 
168 main(argc, argv)
169 	int	argc;
170 	char 	**argv;
171 {
172 	char	*sbrk();
173 
174 	tokfilename[0] = 0;
175 	strfilename[0] = 0;
176 	endcore = sbrk(0);
177 
178 	argprocess(argc, argv);		/* process argument lists */
179 	if (anyerrs) exit(1);
180 
181 	initialize();
182 	zeroorigins();			/* set origins to zero */
183 	zerolocals();			/* fix local label counters */
184 
185 	i_pass1();			/* open temp files, etc */
186 	pass1();			/* first pass through .s files */
187 	testlocals();			/* check for undefined locals */
188 	if (anyerrs) delexit();
189 
190 	pass1_5();			/* resolve jxxx */
191 	if (anyerrs) delexit();
192 
193 	open_a_out();			/* open a.out */
194 	roundsegments();		/* round segments to FW */
195 	build_hdr();			/* build initial header, and output */
196 
197 	i_pass2();			/* reopen temporary file, etc */
198 	pass2();			/* second pass through the virtual .s */
199 	if (anyerrs) delexit();
200 
201 	fillsegments();			/* fill segments with 0 to FW */
202 	reloc_syms();			/* dump relocation and symbol table */
203 
204 	delete();			/* remove tmp file */
205 	bflush();			/* close off block I/O view of a.out */
206 	fix_a_out();			/* add in text and data reloc counts */
207 
208 	if (anyerrs == 0 && orgwarn)
209 		yyerror("Caution: absolute origins.\n");
210 
211 	exit(anyerrs != 0);
212 }
213 
214 argprocess(argc, argv)
215 	int	argc;
216 	char	*argv[];
217 {
218 	register	char	*cp;
219 
220 	ninfiles = 0;
221 	silent = 0;
222 #ifdef DEBUG
223 	debug = 0;
224 #endif
225 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
226 	dotsname = "<argv error>";
227 	while (argc > 1) {
228 		if (argv[1][0] != '-')
229 			innames[ninfiles++] = argv[1];
230 		else {
231 			cp = argv[1] + 1;
232 			/*
233 			 *	We can throw away single minus signs, so
234 			 *	that make scripts for the PDP 11 assembler work
235 			 *	on this assembler too
236 			 */
237 			while (*cp){
238 				switch(*cp++){
239 				 default:
240 					yyerror("Unknown flag: %c", *--cp);
241 					cp++;
242 					break;
243 				 case 'v':
244 					selfwhat(stdout);
245 					exit(1);
246 				 case 'd':
247 					d124 = *cp++ - '0';
248 					if ( (d124 != 1) && (d124 != 2) &&
249 					     (d124 != 4)){
250 						yyerror("-d[124] only");
251 						exit(1);
252 					}
253 					break;
254 				 case 'P':
255 					liston = 1;
256 					listfile = stdout;
257 					break;
258 				 case 'o':
259 					if (argc < 3){
260 						yyerror("-o what???");
261 						exit(1);
262 					}
263 					outfile = argv[2];
264 				   bumpone:
265 					argc -= 2;
266 					argv += 2;
267 					goto nextarg;
268 
269 				 case 't':
270 					if (argc < 3){
271 						yyerror("-t what???");
272 						exit(1);
273 					}
274 					tmpdirprefix = argv[2];
275 					goto bumpone;
276 
277 				 case 'V':
278 					useVM = 1;
279 					break;
280 				 case 'W':
281 					silent = 1;
282 					break;
283 				 case 'L':
284 					savelabels = 1;
285 					break;
286 				 case 'J':
287 					jxxxJUMP = 1;
288 					break;
289 #ifdef DEBUG
290 				 case 'D':
291 					debug = 1;
292 					break;
293 				 case 'T':
294 					toktrace = 1;
295 					break;
296 #endif
297 				 case 'R':
298 					readonlydata = 1;
299 					break;
300 				}	/*end of the switch*/
301 			}	/*end of pulling out all arguments*/
302 		}	/*end of a flag argument*/
303 		--argc; ++argv;
304 	   nextarg:;
305 	}
306 	/* innames[ninfiles] = 0; */
307 }
308 /*
309  *	poke through the data space and find all sccs identifiers.
310  *	We assume:
311  *	a) that extern char **environ; is the first thing in the bss
312  *	segment (true, if one is using the new version of cmgt.crt0.c)
313  *	b) that the sccsid's have not been put into text space.
314  */
315 selfwhat(place)
316 	FILE	*place;
317 {
318 	extern	char **environ;
319 	register	char	*ub;
320 	register	char *cp;
321 	char	*sbrk();
322 
323 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
324 		if (cp[0] != '@') continue;
325 		if (cp[1] != '(') continue;
326 		if (cp[2] != '#') continue;
327 		if (cp[3] != ')') continue;
328 		fputc('\t', place);
329 		for (cp += 4; cp < ub; cp++){
330 			if (*cp == 0) break;
331 			if (*cp == '>') break;
332 			if (*cp == '\n') break;
333 			fputc(*cp, place);
334 		}
335 		fputc('\n', place);
336 	}
337 }
338 
339 initialize()
340 {
341 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
342 		signal(SIGINT, delexit);
343 	/*
344 	 *	Install symbols in the table
345 	 */
346 	symtabinit();
347 	syminstall();
348 	/*
349 	 *	Build the expression parser accelerator token sets
350 	 */
351 	buildtokensets();
352 }
353 
354 zeroorigins()
355 {
356 	register	int	locindex;
357 	/*
358 	 *	Mark usedot: the first NLOC slots are for named text segments,
359 	 *	the next for named data segments.
360 	 */
361 	for (locindex = 0; locindex < NLOC; locindex++){
362 		usedot[locindex].e_xtype = XTEXT;
363 		usedot[NLOC + locindex].e_xtype = XDATA;
364 		usedot[locindex].e_xvalue = 0;
365 		usedot[NLOC + locindex].e_xvalue = 0;
366 	}
367 }
368 
369 zerolocals()
370 {
371 	register	int	i;
372 
373 	for (i = 0; i <= 9; i++) {
374 		lgensym[i] = 1;
375 		genref[i] = 0;
376 	}
377 }
378 
379 i_pass1()
380 {
381 	FILE	*tempopen();
382 	if (useVM == 0)
383 		tokfile = tempopen(tokfilename, "T");
384 	strfile = tempopen(strfilename, "S");
385 	/*
386 	 *	write out the string length.
387 	 *	This will be overwritten when the
388 	 *	strings are tacked onto the growing a.out file
389 	 */
390 	strfilepos = sizeof(int);
391 	fwrite(&strfilepos, sizeof(int), 1, strfile);
392 
393 	inittokfile();
394 	initijxxx();
395 }
396 
397 FILE *tempopen(tname, part)
398 	char	*tname;
399 	char	*part;
400 {
401 	FILE	*file;
402 	sprintf(tname, "%s%sas%s%05d",
403 		tmpdirprefix,
404 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : 0,
405 		part,
406 		getpid());
407 	file = fopen(tname, "w");
408 	if (file == NULL) {
409 		yyerror("Bad pass 1 temporary file for writing %s", tname);
410 		delexit();
411 	}
412 	return(file);
413 }
414 
415 pass1()
416 {
417 	register	int	i;
418 
419 	passno = 1;
420 	dotp = &usedot[0];
421 	txtfil = (BFILE *)0;
422 	relfil = (struct relbufdesc *)0;
423 
424 	if (ninfiles == 0){		/*take the input from stdin directly*/
425 		lineno = 1;
426 		dotsname = "<stdin>";
427 
428 		yyparse();
429 	} else {		/*we have the names tanked*/
430 		for (i = 0; i < ninfiles; i++){
431 			new_dot_s(innames[i]);
432 			if (freopen(innames[i], "r", stdin) == NULL) {
433 				yyerror( "Can't open source file %s\n",
434 					innames[i]);
435 				exit(2);
436 			}
437 			/* stdio is NOT used to read the input characters */
438 			/* we use read directly, into our own buffers */
439 			yyparse();
440 		}
441 	}
442 
443 	closetokfile();		/*kick out the last buffered intermediate text*/
444 }
445 
446 testlocals()
447 {
448 	register	int	i;
449 	for (i = 0; i <= 9; i++) {
450 		if (genref[i])
451 			yyerror("Reference to undefined local label %df", i);
452 		lgensym[i] = 1;
453 		genref[i] = 0;
454 	}
455 }
456 
457 pass1_5()
458 {
459 	sortsymtab();
460 #ifdef DEBUG
461 	if (debug) dumpsymtab();
462 #endif
463 	jxxxfix();
464 #ifdef DEBUG
465 	if (debug) dumpsymtab();
466 #endif
467 }
468 
469 open_a_out()
470 {
471 	/*
472 	 *	Open up the a.out file now, and get set to build
473 	 *	up offsets into it for all of the various text,data
474 	 *	text relocation and data relocation segments.
475 	 */
476 	a_out_file = fopen(outfile, "w");
477 	if (a_out_file == NULL) {
478 		yyerror("Cannot create %s", outfile);
479 		delexit();
480 	}
481 	biofd = a_out_file->_file;
482 	a_out_off = 0;
483 }
484 
485 roundsegments()
486 {
487 	register	int	locindex;
488 	register	long	v;
489 	/*
490 	 *	round and assign text segment origins
491 	 *	the exec header always goes in usefile[0]
492 	 */
493 	tsize = 0;
494 	for (locindex=0; locindex<NLOC; locindex++) {
495 		v = round(usedot[locindex].e_xvalue, FW);
496 		usedot[locindex].e_xvalue = tsize;
497 		if ((locindex == 0) || (v != 0) ){
498 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
499 			bopen(usefile[locindex], a_out_off);
500 			if (locindex == 0)
501 				a_out_off = sizeof (struct exec);
502 		} else {
503 			usefile[locindex] = (BFILE *)-1;
504 		}
505 		tsize += v;
506 		a_out_off += v;
507 	}
508 	/*
509 	 *		Round and assign data segment origins.
510 	 */
511 	datbase = round(tsize, FW);
512 	for (locindex=0; locindex<NLOC; locindex++) {
513 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
514 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
515 		if (v != 0){
516 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
517 			bopen(usefile[NLOC + locindex], a_out_off);
518 		} else {
519 			usefile[NLOC + locindex] = (BFILE *)-1;
520 		}
521 		dsize += v;
522 		a_out_off += v;
523 	}
524 	/*
525 	 *	Assign final values to symbols
526 	 */
527 	hdr.a_bss = dsize;
528 	freezesymtab();		/* this touches hdr.a_bss */
529 	stabfix();
530 	/*
531 	 *	Set up the relocation information "files" to
532 	 *	be zero; outrel takes care of the rest
533 	 */
534 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
535 		rusefile[locindex] = (struct relbufdesc *)0;
536 	}
537 }
538 
539 build_hdr()
540 {
541 	/*
542 	 *	Except for the text and data relocation sizes,
543 	 *	calculate the final values for the header
544 	 *
545 	 *	Write out the initial copy; we to come
546 	 *	back later and patch up a_trsize and a_drsize,
547 	 *	and overwrite this first version of the header.
548 	 */
549 	hdr.a_magic = MAGIC;
550 	hdr.a_text = tsize;
551 	hdr.a_data = dsize;
552 	hdr.a_bss -= dsize;
553 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
554 	hdr.a_entry = 0;
555 	hdr.a_trsize = 0;
556 	hdr.a_drsize = 0;
557 
558 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
559 }
560 
561 i_pass2()
562 {
563 	if (useVM == 0) {
564 		fclose(tokfile);
565 		tokfile = fopen(tokfilename, "r");
566 		if (tokfile==NULL) {
567 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
568 		   delexit();
569 		}
570 	}
571 	fclose(strfile);
572 	strfile = fopen(strfilename, "r");
573 }
574 
575 pass2()
576 {
577 #ifdef DEBUG
578 	if (debug)
579 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
580 #endif DEBUG
581 	passno = 2;
582 	lineno = 1;
583 	if (liston && ninfiles != 0)
584 	{
585 		char ch;
586 		source = fopen (innames[ind++], "r");
587 		sprintf (layoutpos, "%4ld  00000000    ", lineno);
588 		layoutpos += LHEAD;
589 		ch = getc (source);
590 		if (ch == EOF)
591 		{
592 			if (ind == ninfiles)
593 				endofsource = 1;
594 			else
595 				source = fopen (innames[ind++], "r");
596 		}
597 		else
598 			ungetc (ch, source);
599 	}
600 	else
601 		endofsource = 1;
602 	dotp = &usedot[0];
603 	txtfil = usefile[0];	/* already opened (always!) */
604 	relfil = 0;		/* outrel takes care of the rest */
605 	initoutrel();
606 
607 	inittokfile();
608 
609 	yyparse();
610 
611 	closetokfile();
612 }
613 
614 fillsegments()
615 {
616 	int	locindex;
617 	/*
618 	 *	Round text and data segments to FW by appending zeros
619 	 */
620 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
621 		if (usefile[locindex]) {
622 			txtfil = usefile[locindex];
623 			dotp = &usedot[locindex];
624 			while (usedot[locindex].e_xvalue & FW)
625 				outb(0);
626 		}
627 	}
628 }
629 
630 reloc_syms()
631 {
632 	u_long	closerelfil();
633 	/*
634 	 *	Move the relocation information to a.out
635 	 *	a_out_off is the offset so far:
636 	 *	exec + text segments + data segments
637 	 */
638 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
639 	bopen(relocfile, a_out_off);
640 	a_out_off += closeoutrel(relocfile);
641 
642 	hdr.a_trsize = trsize;
643 	hdr.a_drsize = drsize;
644 	if (readonlydata) {
645 		hdr.a_text += hdr.a_data;
646 		hdr.a_data = 0;
647 		hdr.a_trsize += hdr.a_drsize;
648 		hdr.a_drsize = 0;
649 	}
650 	/*
651 	 *	Output the symbol table and the string pool
652 	 *
653 	 *	We must first rewind the string pool file to its beginning,
654 	 *	in case it was seek'ed into for fetching ascii and asciz
655 	 *	strings.
656 	 */
657 	fseek(strfile, 0, 0);
658 	symwrite(relocfile);
659 }
660 
661 fix_a_out()
662 {
663 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
664 		yyerror("Reposition for header rewrite fails");
665 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
666 		yyerror("Rewrite of header fails");
667 }
668 
669 delexit()
670 {
671 	delete();
672 	if (passno == 2){
673 		unlink(outfile);
674 	}
675 	exit(1);
676 }
677 
678 delete()
679 {
680 	if (useVM == 0 || tokfilename[0])
681 		unlink(tokfilename);
682 	if (strfilename[0])
683 		unlink(strfilename);
684 }
685 
686 sawabort()
687 {
688 	char	*fillinbuffer();
689 	while (fillinbuffer() != (char *)0)
690 		continue;
691 	delete();
692 	exit(1);	/*although the previous pass will also exit non zero*/
693 }
694 
695 panic(fmt, a1, a2, a3, a4)
696 	char	*fmt;
697 	/*VARARGS 1*/
698 {
699 	yyerror("Assembler panic: bad internal data structure.");
700 	yyerror(fmt, a1, a2, a3, a4);
701 	delete();
702 	abort();
703 }
704