xref: /inferno-os/os/boot/rpcg/zqs.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1*74a4d8c2SCharles.Forsyth #include "boot.h"
2*74a4d8c2SCharles.Forsyth #include "squeeze.h"
3*74a4d8c2SCharles.Forsyth 
4*74a4d8c2SCharles.Forsyth /*
5*74a4d8c2SCharles.Forsyth  * for details of `unsqueeze' see:
6*74a4d8c2SCharles.Forsyth  *
7*74a4d8c2SCharles.Forsyth  * %A Mark Taunton
8*74a4d8c2SCharles.Forsyth  * %T Compressed Executables: An Exercise in Thinking Small
9*74a4d8c2SCharles.Forsyth  * %P 385-404
10*74a4d8c2SCharles.Forsyth  * %I USENIX
11*74a4d8c2SCharles.Forsyth  * %B USENIX Conference Proceedings
12*74a4d8c2SCharles.Forsyth  * %D Summer 1991
13*74a4d8c2SCharles.Forsyth  * %C Nashville, TN
14*74a4d8c2SCharles.Forsyth  *
15*74a4d8c2SCharles.Forsyth  * several of the unimplemented improvements described in the paper
16*74a4d8c2SCharles.Forsyth  * have been implemented here
17*74a4d8c2SCharles.Forsyth  *
18*74a4d8c2SCharles.Forsyth  * there is a further transformation on the powerpc (QFLAG!=0) to shuffle bits
19*74a4d8c2SCharles.Forsyth  * in certain instructions so as to push the fixed bits to the top of the word.
20*74a4d8c2SCharles.Forsyth  */
21*74a4d8c2SCharles.Forsyth 
22*74a4d8c2SCharles.Forsyth #define	EXECHDRLEN	(8*4)
23*74a4d8c2SCharles.Forsyth 
24*74a4d8c2SCharles.Forsyth typedef struct Squeeze Squeeze;
25*74a4d8c2SCharles.Forsyth struct Squeeze {
26*74a4d8c2SCharles.Forsyth 	int	n;
27*74a4d8c2SCharles.Forsyth 	ulong	tab[7*256];
28*74a4d8c2SCharles.Forsyth };
29*74a4d8c2SCharles.Forsyth 
30*74a4d8c2SCharles.Forsyth #define	GET4(p)	(((((((p)[0]<<8)|(p)[1])<<8)|(p)[2])<<8)|(p)[3])
31*74a4d8c2SCharles.Forsyth 
32*74a4d8c2SCharles.Forsyth /*
33*74a4d8c2SCharles.Forsyth  * for speed of unsqueezing from Flash, certain checks are
34*74a4d8c2SCharles.Forsyth  * not done inside the loop (as they would be in the unsqueeze program zqs),
35*74a4d8c2SCharles.Forsyth  * but instead the checksum is expected to catch corrupted files.
36*74a4d8c2SCharles.Forsyth  * in fact the Squeeze array bounds can't be exceeded in practice
37*74a4d8c2SCharles.Forsyth  * because the tables are always full for a squeezed kernel.
38*74a4d8c2SCharles.Forsyth  */
39*74a4d8c2SCharles.Forsyth enum {
40*74a4d8c2SCharles.Forsyth 	QFLAG = 1,	/* invert powerpc-specific code transformation */
41*74a4d8c2SCharles.Forsyth 	CHECK = 0,	/* check precise bounds in Squeeze array (otherwise checksum detects error) */
42*74a4d8c2SCharles.Forsyth };
43*74a4d8c2SCharles.Forsyth 
44*74a4d8c2SCharles.Forsyth static	ulong	chksum;
45*74a4d8c2SCharles.Forsyth static	int	rdtab(Block*, Squeeze*, int);
46*74a4d8c2SCharles.Forsyth static	ulong*	unsqueeze(ulong*, uchar*, uchar*, Squeeze*, Squeeze*, ulong);
47*74a4d8c2SCharles.Forsyth static	uchar*	unsqzseg(uchar*, Block*, long, long, char*);
48*74a4d8c2SCharles.Forsyth static	Alarm*	unsqzal;
49*74a4d8c2SCharles.Forsyth 
50*74a4d8c2SCharles.Forsyth int
issqueezed(uchar * b)51*74a4d8c2SCharles.Forsyth issqueezed(uchar *b)
52*74a4d8c2SCharles.Forsyth {
53*74a4d8c2SCharles.Forsyth 	return GET4(b) == SQMAGIC? GET4(b+SQHDRLEN): 0;
54*74a4d8c2SCharles.Forsyth }
55*74a4d8c2SCharles.Forsyth 
56*74a4d8c2SCharles.Forsyth static void
unsqzdot(Alarm *)57*74a4d8c2SCharles.Forsyth unsqzdot(Alarm*)
58*74a4d8c2SCharles.Forsyth {
59*74a4d8c2SCharles.Forsyth 	unsqzal = alarm(500, unsqzdot, nil);
60*74a4d8c2SCharles.Forsyth 	print(".");
61*74a4d8c2SCharles.Forsyth }
62*74a4d8c2SCharles.Forsyth 
63*74a4d8c2SCharles.Forsyth long
unsqueezef(Block * b,ulong * entryp)64*74a4d8c2SCharles.Forsyth unsqueezef(Block *b, ulong *entryp)
65*74a4d8c2SCharles.Forsyth {
66*74a4d8c2SCharles.Forsyth 	uchar *loada, *wp;
67*74a4d8c2SCharles.Forsyth 	ulong toptxt, topdat, oldsum;
68*74a4d8c2SCharles.Forsyth 	long asis, nst, nsd;
69*74a4d8c2SCharles.Forsyth 	Sqhdr *sqh;
70*74a4d8c2SCharles.Forsyth 	Exec *ex;
71*74a4d8c2SCharles.Forsyth 
72*74a4d8c2SCharles.Forsyth 	if(BLEN(b) < SQHDRLEN+EXECHDRLEN)
73*74a4d8c2SCharles.Forsyth 		return -1;
74*74a4d8c2SCharles.Forsyth 	sqh = (Sqhdr*)b->rp;
75*74a4d8c2SCharles.Forsyth 	if(GET4(sqh->magic) != SQMAGIC)
76*74a4d8c2SCharles.Forsyth 		return -1;
77*74a4d8c2SCharles.Forsyth 	chksum = 0;
78*74a4d8c2SCharles.Forsyth 	toptxt = GET4(sqh->toptxt);
79*74a4d8c2SCharles.Forsyth 	topdat = GET4(sqh->topdat);
80*74a4d8c2SCharles.Forsyth 	oldsum = GET4(sqh->sum);
81*74a4d8c2SCharles.Forsyth 	asis = GET4(sqh->asis);
82*74a4d8c2SCharles.Forsyth 	nst = GET4(sqh->text);
83*74a4d8c2SCharles.Forsyth 	nsd = GET4(sqh->data);
84*74a4d8c2SCharles.Forsyth 	b->rp += SQHDRLEN;
85*74a4d8c2SCharles.Forsyth 	ex = (Exec*)b->rp;
86*74a4d8c2SCharles.Forsyth 	if(GET4(ex->magic) != Q_MAGIC){
87*74a4d8c2SCharles.Forsyth 		print("zqs: not powerPC executable\n");
88*74a4d8c2SCharles.Forsyth 		return -1;
89*74a4d8c2SCharles.Forsyth 	}
90*74a4d8c2SCharles.Forsyth 	*entryp = GET4(ex->entry);
91*74a4d8c2SCharles.Forsyth 	b->rp += EXECHDRLEN;
92*74a4d8c2SCharles.Forsyth 	loada = KADDR(PADDR(*entryp));
93*74a4d8c2SCharles.Forsyth 	wp = unsqzseg(loada, b, nst, toptxt, "text");
94*74a4d8c2SCharles.Forsyth 	if(wp == nil){
95*74a4d8c2SCharles.Forsyth 		print("zqs: format error\n");
96*74a4d8c2SCharles.Forsyth 		return -1;
97*74a4d8c2SCharles.Forsyth 	}
98*74a4d8c2SCharles.Forsyth 	if(nsd){
99*74a4d8c2SCharles.Forsyth 		wp = (uchar*)PGROUND((ulong)wp);
100*74a4d8c2SCharles.Forsyth 		wp = unsqzseg(wp, b, nsd, topdat, "data");
101*74a4d8c2SCharles.Forsyth 		if(wp == nil){
102*74a4d8c2SCharles.Forsyth 			print("zqs: format error\n");
103*74a4d8c2SCharles.Forsyth 			return -1;
104*74a4d8c2SCharles.Forsyth 		}
105*74a4d8c2SCharles.Forsyth 	}
106*74a4d8c2SCharles.Forsyth 	if(asis){
107*74a4d8c2SCharles.Forsyth 		memmove(wp, b->rp, asis);
108*74a4d8c2SCharles.Forsyth 		wp += asis;
109*74a4d8c2SCharles.Forsyth 		b->rp += asis;
110*74a4d8c2SCharles.Forsyth 	}
111*74a4d8c2SCharles.Forsyth 	if(chksum != oldsum){
112*74a4d8c2SCharles.Forsyth 		print("\nsqueezed kernel: checksum error: %8.8lux need %8.8lux\n", chksum, oldsum);
113*74a4d8c2SCharles.Forsyth 		return -1;
114*74a4d8c2SCharles.Forsyth 	}
115*74a4d8c2SCharles.Forsyth 	return wp-loada;
116*74a4d8c2SCharles.Forsyth }
117*74a4d8c2SCharles.Forsyth 
118*74a4d8c2SCharles.Forsyth static uchar *
unsqzseg(uchar * wp,Block * b,long ns,long top,char * what)119*74a4d8c2SCharles.Forsyth unsqzseg(uchar *wp, Block *b, long ns, long top, char *what)
120*74a4d8c2SCharles.Forsyth {
121*74a4d8c2SCharles.Forsyth 	static Squeeze sq3, sq4;
122*74a4d8c2SCharles.Forsyth 
123*74a4d8c2SCharles.Forsyth 	print("unpack %s %8.8lux %lud:", what, wp, ns);
124*74a4d8c2SCharles.Forsyth 	if(ns == 0)
125*74a4d8c2SCharles.Forsyth 		return wp;
126*74a4d8c2SCharles.Forsyth 	if(rdtab(b, &sq3, 0) < 0)
127*74a4d8c2SCharles.Forsyth 		return nil;
128*74a4d8c2SCharles.Forsyth 	if(rdtab(b, &sq4, 8) < 0)
129*74a4d8c2SCharles.Forsyth 		return nil;
130*74a4d8c2SCharles.Forsyth 	if(BLEN(b) < ns){
131*74a4d8c2SCharles.Forsyth 		print(" **size error\n");
132*74a4d8c2SCharles.Forsyth 		return nil;
133*74a4d8c2SCharles.Forsyth 	}
134*74a4d8c2SCharles.Forsyth 	unsqzal = alarm(500, unsqzdot, nil);
135*74a4d8c2SCharles.Forsyth 	wp = (uchar*)unsqueeze((ulong*)wp, b->rp, b->rp+ns, &sq3, &sq4, top);
136*74a4d8c2SCharles.Forsyth 	cancel(unsqzal);
137*74a4d8c2SCharles.Forsyth 	unsqzal = nil;
138*74a4d8c2SCharles.Forsyth 	print("\n");
139*74a4d8c2SCharles.Forsyth 	if(wp == nil){
140*74a4d8c2SCharles.Forsyth 		print("zqs: corrupt squeezed data stream\n");
141*74a4d8c2SCharles.Forsyth 		return nil;
142*74a4d8c2SCharles.Forsyth 	}
143*74a4d8c2SCharles.Forsyth 	b->rp += ns;
144*74a4d8c2SCharles.Forsyth 	return wp;
145*74a4d8c2SCharles.Forsyth }
146*74a4d8c2SCharles.Forsyth 
147*74a4d8c2SCharles.Forsyth static ulong*
unsqueeze(ulong * wp,uchar * rp,uchar * ep,Squeeze * sq3,Squeeze * sq4,ulong top)148*74a4d8c2SCharles.Forsyth unsqueeze(ulong *wp, uchar *rp, uchar *ep, Squeeze *sq3, Squeeze *sq4, ulong top)
149*74a4d8c2SCharles.Forsyth {
150*74a4d8c2SCharles.Forsyth 	ulong nx, csum;
151*74a4d8c2SCharles.Forsyth 	int code, n;
152*74a4d8c2SCharles.Forsyth 
153*74a4d8c2SCharles.Forsyth 	if(QFLAG){
154*74a4d8c2SCharles.Forsyth 		QREMAP(top);	/* adjust top just once, outside the loop */
155*74a4d8c2SCharles.Forsyth 	}
156*74a4d8c2SCharles.Forsyth 	csum = chksum;
157*74a4d8c2SCharles.Forsyth 	while(rp < ep){
158*74a4d8c2SCharles.Forsyth 		/* no function calls within this loop for speed */
159*74a4d8c2SCharles.Forsyth 		code = *rp;
160*74a4d8c2SCharles.Forsyth 		rp++;
161*74a4d8c2SCharles.Forsyth 		n = 0;
162*74a4d8c2SCharles.Forsyth 		nx = code>>4;
163*74a4d8c2SCharles.Forsyth 		do{
164*74a4d8c2SCharles.Forsyth 			if(nx == 0){
165*74a4d8c2SCharles.Forsyth 				nx = top;
166*74a4d8c2SCharles.Forsyth 			}else{
167*74a4d8c2SCharles.Forsyth 				if(nx==1){
168*74a4d8c2SCharles.Forsyth 					nx = (((((rp[3]<<8)|rp[2])<<8)|rp[1])<<8)|rp[0];
169*74a4d8c2SCharles.Forsyth 					rp += 4;
170*74a4d8c2SCharles.Forsyth 				}else if(nx <= 8){	/* 2 to 8 */
171*74a4d8c2SCharles.Forsyth 					nx = ((nx-2)<<8) | rp[0];
172*74a4d8c2SCharles.Forsyth 					if(CHECK && nx >= sq4->n)
173*74a4d8c2SCharles.Forsyth 						return nil;	/* corrupted file */
174*74a4d8c2SCharles.Forsyth 					nx = sq4->tab[nx] | rp[1];
175*74a4d8c2SCharles.Forsyth 					rp += 2;
176*74a4d8c2SCharles.Forsyth 				}else{	/* 9 to 15 */
177*74a4d8c2SCharles.Forsyth 					nx = ((nx-9)<<8) | rp[0];
178*74a4d8c2SCharles.Forsyth 					if(CHECK && nx >= sq3->n)
179*74a4d8c2SCharles.Forsyth 						return nil;	/* corrupted file */
180*74a4d8c2SCharles.Forsyth 					nx = sq3->tab[nx];
181*74a4d8c2SCharles.Forsyth 					rp++;
182*74a4d8c2SCharles.Forsyth 				}
183*74a4d8c2SCharles.Forsyth 				if(rp > ep)
184*74a4d8c2SCharles.Forsyth 					return nil;	/* corrupted file */
185*74a4d8c2SCharles.Forsyth 				if(QFLAG){
186*74a4d8c2SCharles.Forsyth 					QREMAP(nx);
187*74a4d8c2SCharles.Forsyth 				}
188*74a4d8c2SCharles.Forsyth 			}
189*74a4d8c2SCharles.Forsyth 			*wp = nx;
190*74a4d8c2SCharles.Forsyth 			wp++;
191*74a4d8c2SCharles.Forsyth 			csum += nx;
192*74a4d8c2SCharles.Forsyth 			nx = code & 0xF;
193*74a4d8c2SCharles.Forsyth 		}while(++n == 1);
194*74a4d8c2SCharles.Forsyth 	}
195*74a4d8c2SCharles.Forsyth 	chksum = csum;
196*74a4d8c2SCharles.Forsyth 	return wp;
197*74a4d8c2SCharles.Forsyth }
198*74a4d8c2SCharles.Forsyth 
199*74a4d8c2SCharles.Forsyth static int
rdtab(Block * b,Squeeze * sq,int shift)200*74a4d8c2SCharles.Forsyth rdtab(Block *b, Squeeze *sq, int shift)
201*74a4d8c2SCharles.Forsyth {
202*74a4d8c2SCharles.Forsyth 	uchar *p, *ep;
203*74a4d8c2SCharles.Forsyth 	ulong v, w;
204*74a4d8c2SCharles.Forsyth 	int i;
205*74a4d8c2SCharles.Forsyth 
206*74a4d8c2SCharles.Forsyth 	if(BLEN(b) < 2)
207*74a4d8c2SCharles.Forsyth 		return -1;
208*74a4d8c2SCharles.Forsyth 	i = (b->rp[0]<<8) | b->rp[1];
209*74a4d8c2SCharles.Forsyth 	if(1)
210*74a4d8c2SCharles.Forsyth 		print(" T%d", i);
211*74a4d8c2SCharles.Forsyth 	b->rp += 2;
212*74a4d8c2SCharles.Forsyth 	if((i -= 2) > 0){
213*74a4d8c2SCharles.Forsyth 		if(BLEN(b) < i)
214*74a4d8c2SCharles.Forsyth 			return -1;
215*74a4d8c2SCharles.Forsyth 	}
216*74a4d8c2SCharles.Forsyth 	sq->n = 0;
217*74a4d8c2SCharles.Forsyth 	p = b->rp;
218*74a4d8c2SCharles.Forsyth 	ep = b->rp+i;
219*74a4d8c2SCharles.Forsyth 	b->rp += i;
220*74a4d8c2SCharles.Forsyth 	v = 0;
221*74a4d8c2SCharles.Forsyth 	while(p < ep){
222*74a4d8c2SCharles.Forsyth 		w = 0;
223*74a4d8c2SCharles.Forsyth 		do{
224*74a4d8c2SCharles.Forsyth 			if(p >= ep)
225*74a4d8c2SCharles.Forsyth 				return -1;
226*74a4d8c2SCharles.Forsyth 			w = (w<<7) | (*p & 0x7F);
227*74a4d8c2SCharles.Forsyth 		}while(*p++ & 0x80);
228*74a4d8c2SCharles.Forsyth 		v += w;
229*74a4d8c2SCharles.Forsyth 		if(0)
230*74a4d8c2SCharles.Forsyth 			print("%d %8.8lux %8.8lux\n", sq->n, v, w);
231*74a4d8c2SCharles.Forsyth 		sq->tab[sq->n++] = v<<shift;
232*74a4d8c2SCharles.Forsyth 	}
233*74a4d8c2SCharles.Forsyth 	return 0;
234*74a4d8c2SCharles.Forsyth }
235