xref: /csrg-svn/old/as.vax/asmain.c (revision 16071)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.15 02/17/84";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 
17 #include <sys/stat.h>
18 
19 #define	unix_lang_name "VAX/UNIX Assembler V02/17/84 4.15"
20 /*
21  *	variables to manage reading the assembly source files
22  */
23 char	*dotsname;	/*the current file name; managed by the parser*/
24 int	lineno;		/*current line number; managed by the parser*/
25 char	**innames;	/*names of the files being assembled*/
26 int	ninfiles;	/*how many interesting files there are*/
27 /*
28  *	Flags settable from the argv process argument list
29  */
30 int	silent = 0;	/*don't complain about any errors*/
31 int	savelabels = 0;	/*write the labels to the a.out file*/
32 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
33 int 	maxalign = 2;	/*default .align maximum*/
34 int	anyerrs = 0;	/*no errors yet*/
35 int	anywarnings=0;	/*no warnings yet*/
36 int	orgwarn = 0;	/*Bad origins*/
37 int	passno = 1;	/* current pass*/
38 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
39 int	readonlydata = 0;	/* initialzed data -> text space */
40 
41 int	nGHnumbers = 0;		/* GH numbers used */
42 int	nGHopcodes = 0;		/* GH opcodes used */
43 int	nnewopcodes = 0;	/* new opcodes used */
44 
45 #ifdef DEBUG
46 int 	debug = 0;
47 int	toktrace = 0;
48 #endif
49 
50 int	useVM =	0;
51 
52 char	*endcore;	/*where to get more symbol space*/
53 
54 /*
55  *	Managers of the a.out file.
56  */
57 struct	exec	hdr;
58 #define	MAGIC	0407
59 u_long	tsize;		/* total text size */
60 u_long	dsize;		/* total data size */
61 u_long	datbase;	/* base of the data segment */
62 u_long	trsize;		/* total text relocation size */
63 u_long	drsize;		/* total data relocation size */
64 
65 /*
66  *	Information about the current segment is accumulated in
67  *	usedot; the most important information stored is the
68  *	accumulated size of each of the text and data segments
69  *
70  *	dotp points to the correct usedot expression for the current segment
71  */
72 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
73 struct	exp	*dotp;			/* data/text location pointer */
74 /*
75  *	The inter pass temporary token file is opened and closed by stdio, but
76  *	is written to using direct read/write, as the temporary file
77  *	is composed of buffers exactly BUFSIZ long.
78  */
79 FILE	*tokfile;			/* interpass communication file */
80 char	tokfilename[TNAMESIZE];
81 /*
82  *	The string file is the string table
83  *	cat'ed to the end of the built up a.out file
84  */
85 FILE	*strfile;			/* interpass string file */
86 char	strfilename[TNAMESIZE];
87 int	strfilepos = 0;			/* position within the string file */
88 /*
89  *	a.out is created during the second pass.
90  *	It is opened by stdio, but is filled with the parallel
91  *	block I/O library
92  */
93 char	*outfile = "a.out";
94 FILE	*a_out_file;
95 off_t	a_out_off;			/* cumulative offsets for segments */
96 /*
97  *	The logical files containing the assembled data for each of
98  *	the text and data segments are
99  *	managed by the parallel block I/O library.
100  *	a.out is logically opened in many places at once to
101  *	receive the assembled data from the various segments as
102  *	it all trickles in, but is physically opened only once
103  *	to minimize file overhead.
104  */
105 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
106 BFILE	*txtfil;			/* current text/data file */
107 /*
108  *	Relocation information is accumulated seperately for each
109  *	segment.  This is required by the old loader (from BTL),
110  *	but not by the new loader (Bill Joy).
111  *
112  *	However, the size of the relocation information can not be computed
113  *	during or after the 1st pass because the ''absoluteness' of values
114  *	is unknown until all locally declared symbols have been seen.
115  *	Thus, the size of the relocation information is only
116  *	known after the second pass is finished.
117  *	This obviates the use of the block I/O
118  *	library, which requires knowing the exact offsets in a.out.
119  *
120  *	So, we save the relocation information internally (we don't
121  *	go to internal files to minimize overhead).
122  *
123  *	Empirically, we studied 259 files composing the system,
124  *	two compilers and a compiler generator: (all of which have
125  *	fairly large source files)
126  *
127  *	Number of files = 259
128  *		Number of non zero text reloc files: 233
129  *		Number of non zero data reloc files: 53
130  *	Average text relocation = 889
131  *	Average data relocation = 346
132  *	Number of files > BUFSIZ text relocation = 71
133  *	Number of files > BUFSIZ data relocation = 6
134  *
135  *	For compiled C code, there is usually one text segment and two
136  *	data segments; we see that allocating our own buffers and
137  *	doing our internal handling of relocation information will,
138  *	on the average, not use more memory than taken up by the buffers
139  *	allocated for doing file I/O in parallel to a number of file.
140  *
141  *	If we are assembling with the -V option, we
142  *	use the left over token buffers from the 2nd pass,
143  *	otherwise, we create our own.
144  *
145  *	When the 2nd pass is complete, closeoutrel flushes the token
146  *	buffers out to a BFILE.
147  *
148  *	The internals to relbufdesc are known only in assyms.c
149  *
150  *	outrel constructs the relocation information.
151  *	closeoutrel flushes the relocation information to relfil.
152  */
153 struct	relbufdesc	*rusefile[NLOC+NLOC];
154 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
155 BFILE	*relocfile;			/* concatnated relocation info */
156 /*
157  *	Once the relocation information has been written,
158  *	we can write out the symbol table using the Block I/O
159  *	mechanisms, as we once again know the offsets into
160  *	the a.out file.
161  *
162  *	We use relfil to output the symbol table information.
163  */
164 char	*tmpdirprefix = "/tmp/";
165 int delexit();
166 
167 main(argc, argv)
168 	int	argc;
169 	char 	**argv;
170 {
171 	char	*sbrk();
172 
173 	tokfilename[0] = 0;
174 	strfilename[0] = 0;
175 	endcore = sbrk(0);
176 
177 	argprocess(argc, argv);		/* process argument lists */
178 	if (anyerrs) exit(1);
179 
180 	initialize();
181 	zeroorigins();			/* set origins to zero */
182 	zerolocals();			/* fix local label counters */
183 
184 	i_pass1();			/* open temp files, etc */
185 	pass1();			/* first pass through .s files */
186 	testlocals();			/* check for undefined locals */
187 	if (anyerrs) delexit();
188 
189 	pass1_5();			/* resolve jxxx */
190 	if (anyerrs) delexit();
191 
192 	open_a_out();			/* open a.out */
193 	roundsegments();		/* round segments to FW */
194 	build_hdr();			/* build initial header, and output */
195 
196 	i_pass2();			/* reopen temporary file, etc */
197 	pass2();			/* second pass through the virtual .s */
198 	if (anyerrs) delexit();
199 
200 	fillsegments();			/* fill segments with 0 to FW */
201 	reloc_syms();			/* dump relocation and symbol table */
202 
203 	delete();			/* remove tmp file */
204 	bflush();			/* close off block I/O view of a.out */
205 	fix_a_out();			/* add in text and data reloc counts */
206 
207 	if (anyerrs == 0 && orgwarn)
208 		yyerror("Caution: absolute origins.\n");
209 
210 	if (nGHnumbers)
211 		yywarning("Caution: G or H format floating point numbers");
212 	if (nGHopcodes)
213 		yywarning("Caution: G or H format floating point operators");
214 	if (nnewopcodes)
215 		yywarning("Caution: New Opcodes");
216 	if (nGHnumbers || nGHopcodes || nnewopcodes)
217 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
218 
219 	exit(anyerrs != 0);
220 }
221 
222 argprocess(argc, argv)
223 	int	argc;
224 	char	*argv[];
225 {
226 	register	char	*cp;
227 
228 	ninfiles = 0;
229 	silent = 0;
230 #ifdef DEBUG
231 	debug = 0;
232 #endif
233 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
234 	dotsname = "<argv error>";
235 	while (argc > 1) {
236 		if (argv[1][0] != '-')
237 			innames[ninfiles++] = argv[1];
238 		else {
239 			cp = argv[1] + 1;
240 			/*
241 			 *	We can throw away single minus signs, so
242 			 *	that make scripts for the PDP 11 assembler work
243 			 *	on this assembler too
244 			 */
245 			while (*cp){
246 				switch(*cp++){
247 				 default:
248 					yyerror("Unknown flag: %c", *--cp);
249 					cp++;
250 					break;
251 				 case 'v':
252 					selfwhat(stdout);
253 					exit(1);
254 				 case 'd':
255 					d124 = *cp++ - '0';
256 					if ( (d124 != 1) && (d124 != 2) &&
257 					     (d124 != 4)){
258 						yyerror("-d[124] only");
259 						exit(1);
260 					}
261 					break;
262 				 case 'a':
263 					maxalign = atoi(cp+1);
264 					for (cp++; isdigit(*cp); cp++)
265 						/*VOID*/;
266 					if ( (maxalign > 16) || (maxalign < 0)){
267 						yyerror("-a: 0<=align<=16");
268 						exit(1);
269 					}
270 					break;
271 				 case 'o':
272 					if (argc < 3){
273 						yyerror("-o what???");
274 						exit(1);
275 					}
276 					outfile = argv[2];
277 				   bumpone:
278 					argc -= 2;
279 					argv += 2;
280 					goto nextarg;
281 
282 				 case 't':
283 					if (argc < 3){
284 						yyerror("-t what???");
285 						exit(1);
286 					}
287 					tmpdirprefix = argv[2];
288 					goto bumpone;
289 
290 				 case 'V':
291 					useVM = 1;
292 					break;
293 				 case 'W':
294 					silent = 1;
295 					break;
296 				 case 'L':
297 					savelabels = 1;
298 					break;
299 				 case 'J':
300 					jxxxJUMP = 1;
301 					break;
302 #ifdef DEBUG
303 				 case 'D':
304 					debug = 1;
305 					break;
306 				 case 'T':
307 					toktrace = 1;
308 					break;
309 #endif
310 				 case 'R':
311 					readonlydata = 1;
312 					break;
313 				}	/*end of the switch*/
314 			}	/*end of pulling out all arguments*/
315 		}	/*end of a flag argument*/
316 		--argc; ++argv;
317 	   nextarg:;
318 	}
319 	/* innames[ninfiles] = 0; */
320 }
321 /*
322  *	poke through the data space and find all sccs identifiers.
323  *	We assume:
324  *	a) that extern char **environ; is the first thing in the bss
325  *	segment (true, if one is using the new version of cmgt.crt0.c)
326  *	b) that the sccsid's have not been put into text space.
327  */
328 selfwhat(place)
329 	FILE	*place;
330 {
331 	extern	char **environ;
332 	register	char	*ub;
333 	register	char *cp;
334 	register	char	*pat;
335 	char	*sbrk();
336 
337 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
338 		if (cp[0] != '@') continue;
339 		if (cp[1] != '(') continue;
340 		if (cp[2] != '#') continue;
341 		if (cp[3] != ')') continue;
342 		fputc('\t', place);
343 		for (cp += 4; cp < ub; cp++){
344 			if (*cp == 0) break;
345 			if (*cp == '>') break;
346 			if (*cp == '\n') break;
347 			fputc(*cp, place);
348 		}
349 		fputc('\n', place);
350 	}
351 }
352 
353 initialize()
354 {
355 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
356 		signal(SIGINT, delexit);
357 	/*
358 	 *	Install symbols in the table
359 	 */
360 	symtabinit();
361 	syminstall();
362 	/*
363 	 *	Build the expression parser accelerator token sets
364 	 */
365 	buildtokensets();
366 }
367 
368 zeroorigins()
369 {
370 	register	int	locindex;
371 	/*
372 	 *	Mark usedot: the first NLOC slots are for named text segments,
373 	 *	the next for named data segments.
374 	 */
375 	for (locindex = 0; locindex < NLOC; locindex++){
376 		usedot[locindex].e_xtype = XTEXT;
377 		usedot[NLOC + locindex].e_xtype = XDATA;
378 		usedot[locindex].e_xvalue = 0;
379 		usedot[NLOC + locindex].e_xvalue = 0;
380 	}
381 }
382 
383 zerolocals()
384 {
385 	register	int	i;
386 
387 	for (i = 0; i <= 9; i++) {
388 		lgensym[i] = 1;
389 		genref[i] = 0;
390 	}
391 }
392 
393 i_pass1()
394 {
395 	FILE	*tempopen();
396 	if (useVM == 0)
397 		tokfile = tempopen(tokfilename, "T");
398 	strfile = tempopen(strfilename, "S");
399 	/*
400 	 *	write out the string length.
401 	 *	This will be overwritten when the
402 	 *	strings are tacked onto the growing a.out file
403 	 */
404 	strfilepos = sizeof(int);
405 	fwrite(&strfilepos, sizeof(int), 1, strfile);
406 
407 	inittokfile();
408 	initijxxx();
409 }
410 
411 FILE *tempopen(tname, part)
412 	char	*tname;
413 	char	*part;
414 {
415 	FILE	*file;
416 	sprintf(tname, "%s%sas%s%05d",
417 		tmpdirprefix,
418 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : 0,
419 		part,
420 		getpid());
421 	file = fopen(tname, "w");
422 	if (file == NULL) {
423 		yyerror("Bad pass 1 temporary file for writing %s", tname);
424 		delexit();
425 	}
426 	return(file);
427 }
428 
429 pass1()
430 {
431 	register	int	i;
432 
433 	passno = 1;
434 	dotp = &usedot[0];
435 	txtfil = (BFILE *)0;
436 	relfil = (struct relbufdesc *)0;
437 
438 	if (ninfiles == 0){		/*take the input from stdin directly*/
439 		lineno = 1;
440 		dotsname = "<stdin>";
441 
442 		yyparse();
443 	} else {		/*we have the names tanked*/
444 		for (i = 0; i < ninfiles; i++){
445 			new_dot_s(innames[i]);
446 			if (freopen(innames[i], "r", stdin) == NULL) {
447 				yyerror( "Can't open source file %s\n",
448 					innames[i]);
449 				exit(2);
450 			}
451 			/* stdio is NOT used to read the input characters */
452 			/* we use read directly, into our own buffers */
453 			yyparse();
454 		}
455 	}
456 
457 	closetokfile();		/*kick out the last buffered intermediate text*/
458 }
459 
460 testlocals()
461 {
462 	register	int	i;
463 	for (i = 0; i <= 9; i++) {
464 		if (genref[i])
465 			yyerror("Reference to undefined local label %df", i);
466 		lgensym[i] = 1;
467 		genref[i] = 0;
468 	}
469 }
470 
471 pass1_5()
472 {
473 	sortsymtab();
474 #ifdef DEBUG
475 	if (debug) dumpsymtab();
476 #endif
477 	jxxxfix();
478 #ifdef DEBUG
479 	if (debug) dumpsymtab();
480 #endif
481 }
482 
483 open_a_out()
484 {
485 	struct stat stb;
486 
487 	/*
488 	 *	Open up the a.out file now, and get set to build
489 	 *	up offsets into it for all of the various text,data
490 	 *	text relocation and data relocation segments.
491 	 */
492 	a_out_file = fopen(outfile, "w");
493 	if (a_out_file == NULL) {
494 		yyerror("Cannot create %s", outfile);
495 		delexit();
496 	}
497 	biofd = a_out_file->_file;
498 	fstat(biofd, &stb);
499 	biobufsize = stb.st_blksize;
500 	a_out_off = 0;
501 }
502 
503 roundsegments()
504 {
505 	register	int	locindex;
506 	register	long	v;
507 	/*
508 	 *	round and assign text segment origins
509 	 *	the exec header always goes in usefile[0]
510 	 */
511 	tsize = 0;
512 	for (locindex=0; locindex<NLOC; locindex++) {
513 		v = round(usedot[locindex].e_xvalue, FW);
514 		usedot[locindex].e_xvalue = tsize;
515 		if ((locindex == 0) || (v != 0) ){
516 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
517 			bopen(usefile[locindex], a_out_off);
518 			if (locindex == 0)
519 				a_out_off = sizeof (struct exec);
520 		} else {
521 			usefile[locindex] = (BFILE *)-1;
522 		}
523 		tsize += v;
524 		a_out_off += v;
525 	}
526 	/*
527 	 *		Round and assign data segment origins.
528 	 */
529 	datbase = round(tsize, FW);
530 	for (locindex=0; locindex<NLOC; locindex++) {
531 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
532 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
533 		if (v != 0){
534 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
535 			bopen(usefile[NLOC + locindex], a_out_off);
536 		} else {
537 			usefile[NLOC + locindex] = (BFILE *)-1;
538 		}
539 		dsize += v;
540 		a_out_off += v;
541 	}
542 	/*
543 	 *	Assign final values to symbols
544 	 */
545 	hdr.a_bss = dsize;
546 	freezesymtab();		/* this touches hdr.a_bss */
547 	stabfix();
548 	/*
549 	 *	Set up the relocation information "files" to
550 	 *	be zero; outrel takes care of the rest
551 	 */
552 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
553 		rusefile[locindex] = (struct relbufdesc *)0;
554 	}
555 }
556 
557 build_hdr()
558 {
559 	/*
560 	 *	Except for the text and data relocation sizes,
561 	 *	calculate the final values for the header
562 	 *
563 	 *	Write out the initial copy; we to come
564 	 *	back later and patch up a_trsize and a_drsize,
565 	 *	and overwrite this first version of the header.
566 	 */
567 	hdr.a_magic = MAGIC;
568 	hdr.a_text = tsize;
569 	hdr.a_data = dsize;
570 	hdr.a_bss -= dsize;
571 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
572 	hdr.a_entry = 0;
573 	hdr.a_trsize = 0;
574 	hdr.a_drsize = 0;
575 
576 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
577 }
578 
579 i_pass2()
580 {
581 	if (useVM == 0) {
582 		fclose(tokfile);
583 		tokfile = fopen(tokfilename, "r");
584 		if (tokfile==NULL) {
585 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
586 		   delexit();
587 		}
588 	}
589 	fclose(strfile);
590 	strfile = fopen(strfilename, "r");
591 }
592 
593 pass2()
594 {
595 #ifdef DEBUG
596 	if (debug)
597 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
598 #endif DEBUG
599 	passno = 2;
600 	lineno = 1;
601 	dotp = &usedot[0];
602 	txtfil = usefile[0];	/* already opened (always!) */
603 	relfil = 0;		/* outrel takes care of the rest */
604 	initoutrel();
605 
606 	inittokfile();
607 
608 	yyparse();
609 
610 	closetokfile();
611 }
612 
613 fillsegments()
614 {
615 	int	locindex;
616 	/*
617 	 *	Round text and data segments to FW by appending zeros
618 	 */
619 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
620 		if (usefile[locindex]) {
621 			txtfil = usefile[locindex];
622 			dotp = &usedot[locindex];
623 			while (usedot[locindex].e_xvalue & FW)
624 				outb(0);
625 		}
626 	}
627 }
628 
629 reloc_syms()
630 {
631 	u_long	closerelfil();
632 	/*
633 	 *	Move the relocation information to a.out
634 	 *	a_out_off is the offset so far:
635 	 *	exec + text segments + data segments
636 	 */
637 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
638 	bopen(relocfile, a_out_off);
639 	a_out_off += closeoutrel(relocfile);
640 
641 	hdr.a_trsize = trsize;
642 	hdr.a_drsize = drsize;
643 	if (readonlydata) {
644 		hdr.a_text += hdr.a_data;
645 		hdr.a_data = 0;
646 		hdr.a_trsize += hdr.a_drsize;
647 		hdr.a_drsize = 0;
648 	}
649 	/*
650 	 *	Output the symbol table and the string pool
651 	 *
652 	 *	We must first rewind the string pool file to its beginning,
653 	 *	in case it was seek'ed into for fetching ascii and asciz
654 	 *	strings.
655 	 */
656 	fseek(strfile, 0, 0);
657 	symwrite(relocfile);
658 }
659 
660 fix_a_out()
661 {
662 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
663 		yyerror("Reposition for header rewrite fails");
664 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
665 		yyerror("Rewrite of header fails");
666 }
667 
668 delexit()
669 {
670 	delete();
671 	if (passno == 2){
672 		unlink(outfile);
673 	}
674 	exit(1);
675 }
676 
677 delete()
678 {
679 	if (useVM == 0 || tokfilename[0])
680 		unlink(tokfilename);
681 	if (strfilename[0])
682 		unlink(strfilename);
683 }
684 
685 sawabort()
686 {
687 	char	*fillinbuffer();
688 	while (fillinbuffer() != (char *)0)
689 		continue;
690 	delete();
691 	exit(1);	/*although the previous pass will also exit non zero*/
692 }
693 
694 panic(fmt, a1, a2, a3, a4)
695 	char	*fmt;
696 	/*VARARGS 1*/
697 {
698 	yyerror("Assembler panic: bad internal data structure.");
699 	yyerror(fmt, a1, a2, a3, a4);
700 	delete();
701 	abort();
702 }
703