xref: /plan9/sys/src/cmd/aux/olefs.c (revision 59c21d95eabd8f0704c9b4a4cb647ed908ae2da6)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <auth.h>
5 #include <fcall.h>
6 #include <thread.h>
7 #include <9p.h>
8 
9 /* little endian */
10 #define SHORT(p)	(((uchar*)(p))[0] | (((uchar*)(p))[1] << 8))
11 #define LONG(p)	((ulong)SHORT(p) |(((ulong)SHORT((p)+2)) << 16))
12 
13 typedef struct Ofile	Ofile;
14 typedef struct Odir	Odir;
15 
16 enum {
17 	/* special block map entries */
18 	Bspecial = 0xFFFFFFFD,
19 	Bendchain = 0xFFFFFFFE,
20 	Bunused = 0xFFFFFFFF,
21 
22 	Blocksize = 0x200,
23 
24 	Odirsize = 0x80,
25 
26 	/* Odir types */
27 	Tstorage = 1,
28 	Tstream = 2,
29 	Troot = 5,
30 };
31 
32 /*
33  * the file consists of chains of blocks of size 0x200.
34  * to find what block follows block n, you look at
35  * blockmap[n].  that block follows it unless it is Bspecial
36  * or Bendchain.
37  *
38  * it's like the MS-DOS file system allocation tables.
39  */
40 struct Ofile {
41 	Biobuf *b;
42 	ulong nblock;
43 	ulong *blockmap;
44 	ulong rootblock;
45 	ulong smapblock;
46 	ulong *smallmap;
47 };
48 
49 /* Odir headers are found in directory listings in the Olefile */
50 /* prev and next form a binary tree of directory entries */
51 struct Odir {
52 	Ofile *f;
53 	Rune name[32+1];
54 	uchar type;
55 	uchar isroot;
56 	ulong left;
57 	ulong right;
58 	ulong dir;
59 	ulong start;
60 	ulong size;
61 };
62 
63 void*
emalloc(ulong sz)64 emalloc(ulong sz)
65 {
66 	void *v;
67 
68 	v = malloc(sz);
69 	assert(v != nil);
70 	return v;
71 }
72 
73 int
convM2OD(Odir * f,void * buf,int nbuf)74 convM2OD(Odir *f, void *buf, int nbuf)
75 {
76 	int i;
77 	char *p;
78 	int len;
79 
80 	if(nbuf < Odirsize)
81 		return -1;
82 
83 	/*
84 	 * the short at 0x40 is the length of the name.
85 	 * when zero, it means there is no Odir here.
86 	 */
87 	p = buf;
88 	len = SHORT(p+0x40);
89 	if(len == 0)
90 		return 0;
91 
92 	if(len > 32)	/* shouldn't happen */
93 		len = 32;
94 
95 	for(i=0; i<len; i++)
96 		f->name[i] = SHORT(p+i*2);
97 	f->name[len] = 0;
98 
99 	f->type = p[0x42];
100 	f->left = LONG(p+0x44);
101 	f->right = LONG(p+0x48);
102 	f->dir = LONG(p+0x4C);
103 	f->start = LONG(p+0x74);
104 	f->size = LONG(p+0x78);
105 
106 	/* BUG: grab time in ms format from here */
107 
108 	return 1;
109 }
110 
111 int
oreadblock(Ofile * f,int block,ulong off,char * buf,int nbuf)112 oreadblock(Ofile *f, int block, ulong off, char *buf, int nbuf)
113 {
114 	int n;
115 
116 	if(block < 0 || block >= f->nblock) {
117 		werrstr("attempt to read %x/%lux\n", block, f->nblock);
118 		return -1;
119 	}
120 
121 	if(off >= Blocksize){
122 		print("offset too far into block\n");
123 		return 0;
124 	}
125 
126 	if(off+nbuf > Blocksize)
127 		nbuf = Blocksize-off;
128 
129 	/* blocks start numbering at -1 [sic] */
130 	off += (block+1)*Blocksize;
131 
132 	if(Bseek(f->b, off, 0) != off){
133 		print("seek failed\n");
134 		return -1;
135 	}
136 
137 	n = Bread(f->b, buf, nbuf);
138 	if(n < 0)
139 		print("Bread failed: %r");
140 	return n;
141 }
142 
143 int
chainlen(Ofile * f,ulong start)144 chainlen(Ofile *f, ulong start)
145 {
146 	int i;
147 	for(i=0; start < 0xFFFF0000; i++)
148 		start = f->blockmap[start];
149 
150 	return i;
151 }
152 
153 /*
154  * read nbuf bytes starting at offset off from the
155  * chain whose first block is block.  the chain is linked
156  * together via the blockmap as described above,
157  * like the MS-DOS file allocation tables.
158  */
159 int
oreadchain(Ofile * f,ulong block,int off,char * buf,int nbuf)160 oreadchain(Ofile *f, ulong block, int off, char *buf, int nbuf)
161 {
162 	int i;
163 	int offblock;
164 
165 	offblock = off/Blocksize;
166 	for(i=0; i<offblock && block < 0xFFFF0000; i++)
167 		block = f->blockmap[block];
168 	return oreadblock(f, block, off%Blocksize, buf, nbuf);
169 }
170 
171 int
oreadfile(Odir * d,int off,char * buf,int nbuf)172 oreadfile(Odir *d, int off, char *buf, int nbuf)
173 {
174 	/*
175 	 * if d->size < 0x1000 then d->start refers
176 	 * to a small depot block, else a big one.
177 	 * if this is the root entry, it's a big one
178 	 * no matter what.
179 	 */
180 
181 	if(off >= d->size)
182 		return 0;
183 	if(off+nbuf > d->size)
184 		nbuf = d->size-off;
185 
186 	if(d->size >= 0x1000
187 	|| memcmp(d->name, L"Root Entry", 11*sizeof(Rune)) == 0)
188 		return oreadchain(d->f, d->start, off, buf, nbuf);
189 	else {	/* small block */
190 		off += d->start*64;
191 		return oreadchain(d->f, d->f->smapblock, off, buf, nbuf);
192 	}
193 }
194 
195 int
oreaddir(Ofile * f,int entry,Odir * d)196 oreaddir(Ofile *f, int entry, Odir *d)
197 {
198 	char buf[Odirsize];
199 
200 	if(oreadchain(f, f->rootblock, entry*Odirsize, buf, Odirsize) != Odirsize)
201 		return -1;
202 
203 	d->f = f;
204 	return convM2OD(d, buf, Odirsize);
205 }
206 
207 void
dumpdir(Ofile * f,ulong dnum)208 dumpdir(Ofile *f, ulong dnum)
209 {
210 	Odir d;
211 
212 	if(oreaddir(f, dnum, &d) != 1) {
213 		fprint(2, "dumpdir %lux failed\n", dnum);
214 		return;
215 	}
216 
217 	fprint(2, "%.8lux type %d size %lud l %.8lux r %.8lux d %.8lux (%S)\n", dnum, d.type, d.size, d.left, d.right, d.dir, d.name);
218 	if(d.left != (ulong)-1)
219 		dumpdir(f, d.left);
220 	if(d.right != (ulong)-1)
221 		dumpdir(f, d.right);
222 	if(d.dir != (ulong)-1)
223 		dumpdir(f, d.dir);
224 }
225 
226 Ofile*
oleopen(char * fn)227 oleopen(char *fn)
228 {
229 	int i, j, k, block;
230 	int ndepot;
231 	ulong u;
232 	Odir rootdir;
233 	ulong extrablock;
234 	uchar buf[Blocksize];
235 
236 	Ofile *f;
237 	Biobuf *b;
238 	static char magic[] = {
239 		0xD0, 0xCF, 0x11, 0xE0,
240 		0xA1, 0xB1, 0x1A, 0xE1
241 	};
242 
243 	b = Bopen(fn, OREAD);
244 	if(b == nil)
245 		return nil;
246 
247 	/* the first bytes are magic */
248 	if(Bread(b, buf, sizeof magic) != sizeof magic
249 	|| memcmp(buf, magic, sizeof magic) != 0) {
250 		Bterm(b);
251 		werrstr("bad magic: not OLE file");
252 		return nil;
253 	}
254 
255 	f = emalloc(sizeof *f);
256 	f->b = b;
257 
258 	/*
259 	 * the header contains a list of depots, which are
260 	 * block maps.  we assimilate them into one large map,
261 	 * kept in main memory.
262 	 */
263 	Bseek(b, 0, 0);
264 	if(Bread(b, buf, Blocksize) != Blocksize) {
265 		Bterm(b);
266 		free(f);
267 		print("short read\n");
268 		return nil;
269 	}
270 
271 	ndepot = LONG(buf+0x2C);
272 	f->nblock = ndepot*(Blocksize/4);
273 //	fprint(2, "ndepot = %d f->nblock = %lud\n", ndepot, f->nblock);
274 	f->rootblock = LONG(buf+0x30);
275 	f->smapblock = LONG(buf+0x3C);
276 	f->blockmap = emalloc(sizeof(f->blockmap[0])*f->nblock);
277 	extrablock = LONG(buf+0x44);
278 
279 	u = 0;
280 
281 	/* the big block map fills to the end of the first 512-byte block */
282 	for(i=0; i<ndepot && i<(0x200-0x4C)/4; i++) {
283 		if(Bseek(b, 0x4C+4*i, 0) != 0x4C+4*i
284 		|| Bread(b, buf, 4) != 4) {
285 			print("bseek %d fail\n", 0x4C+4*i);
286 			goto Die;
287 		}
288 		block = LONG(buf);
289 		if((ulong)block == Bendchain) {
290 			ndepot = i;
291 			f->nblock = ndepot*(Blocksize/4);
292 			break;
293 		}
294 
295 		if(Bseek(b, (block+1)*Blocksize, 0) != (block+1)*Blocksize) {
296 			print("Xbseek %d fail\n", (block+1)*Blocksize);
297 			goto Die;
298 		}
299 		for(j=0; j<Blocksize/4; j++) {
300 			if(Bread(b, buf, 4) != 4) {
301 				print("Bread fail seek block %x, %d i %d ndepot %d\n", block, (block+1)*Blocksize, i, ndepot);
302 				goto Die;
303 			}
304 			f->blockmap[u++] = LONG(buf);
305 		}
306 	}
307 	/*
308 	 * if the first block can't hold it, it continues in the block at LONG(hdr+0x44).
309 	 * if that in turn is not big enough, there's a next block number at the end of
310 	 * each block.
311 	 */
312 	while(i < ndepot) {
313 		for(k=0; k<(0x200-4)/4 && i<ndepot; i++, k++) {
314 			if(Bseek(b, 0x200+extrablock*Blocksize+4*i, 0) != 0x200+extrablock*0x200+4*i
315 			|| Bread(b, buf, 4) != 4) {
316 				print("bseek %d fail\n", 0x4C+4*i);
317 				goto Die;
318 			}
319 			block = LONG(buf);
320 			if((ulong)block == Bendchain) {
321 				ndepot = i;
322 				f->nblock = ndepot*(Blocksize/4);
323 				goto Break2;
324 			}
325 
326 			if(Bseek(b, (block+1)*Blocksize, 0) != (block+1)*Blocksize) {
327 				print("Xbseek %d fail\n", (block+1)*Blocksize);
328 				goto Die;
329 			}
330 			for(j=0; j<Blocksize/4; j++) {
331 				if(Bread(b, buf, 4) != 4) {
332 					print("Bread fail seek block %x, %d i %d ndepot %d\n", block, (block+1)*Blocksize, i, ndepot);
333 					goto Die;
334 				}
335 				f->blockmap[u++] = LONG(buf);
336 			}
337 		}
338 		if(Bseek(b, 0x200+extrablock*Blocksize+Blocksize-4, 0) != 0x200+extrablock*Blocksize+Blocksize-4
339 		|| Bread(b, buf, 4) != 4) {
340 			print("bseek %d fail\n", 0x4C+4*i);
341 			goto Die;
342 		}
343 		extrablock = LONG(buf);
344 	}
345 Break2:;
346 
347 	if(oreaddir(f, 0, &rootdir) <= 0){
348 		print("oreaddir could not read root\n");
349 		goto Die;
350 	}
351 
352 	f->smapblock = rootdir.start;
353 	return f;
354 
355 Die:
356 	Bterm(b);
357 	free(f->blockmap);
358 	free(f);
359 	return nil;
360 }
361 
362 void
oleread(Req * r)363 oleread(Req *r)
364 {
365 	Odir *d;
366 	char *p;
367 	int e, n;
368 	long c;
369 	vlong o;
370 
371 	o = r->ifcall.offset;
372 	d = r->fid->file->aux;
373 	if(d == nil) {
374 		respond(r, "cannot happen");
375 		return;
376 	}
377 
378 	c = r->ifcall.count;
379 
380 	if(o >= d->size) {
381 		r->ofcall.count = 0;
382 		respond(r, nil);
383 		return;
384 	}
385 
386 	if(o+c > d->size)
387 		c = d->size-o;
388 
389 	/*
390 	 * oreadfile returns so little data, it will
391 	 * help to read as much as we can.
392 	 */
393 	e = c+o;
394 	n = 0;
395 	for(p=r->ofcall.data; o<e; o+=n, p+=n) {
396 		n = oreadfile(d, o, p, e-o);
397 		if(n <= 0)
398 			break;
399 	}
400 
401 	if(n == -1 && o == r->ifcall.offset)
402 		respond(r, "error reading word file");
403 	else {
404 		r->ofcall.count = o - r->ifcall.offset;
405 		respond(r, nil);
406 	}
407 }
408 
409 Odir*
copydir(Odir * d)410 copydir(Odir *d)
411 {
412 	Odir *e;
413 
414 	e = emalloc(sizeof(*d));
415 	*e = *d;
416 	return e;
417 }
418 
419 void
filldir(File * t,Ofile * f,int dnum,int nrecur)420 filldir(File *t, Ofile *f, int dnum, int nrecur)
421 {
422 	Odir d;
423 	int i;
424 	Rune rbuf[40];
425 	char buf[UTFmax*nelem(rbuf)];
426 	File *nt;
427 
428 	if(dnum == 0xFFFFFFFF || oreaddir(f, dnum, &d) != 1)
429 		return;
430 
431 	/*
432 	 * i hope there are no broken files with
433 	 * circular trees.  i hate infinite loops.
434 	 */
435 	if(nrecur > 100)
436 		sysfatal("tree too large in office file: probably circular");
437 
438 	filldir(t, f, d.left, nrecur+1);
439 
440 	/* add current tree entry */
441 	runestrecpy(rbuf, rbuf+sizeof rbuf, d.name);
442 	for(i=0; rbuf[i]; i++)
443 		if(rbuf[i] == L' ')
444 			rbuf[i] = L'␣';
445 		else if(rbuf[i] <= 0x20 || rbuf[i] == L'/'
446 			|| (0x80 <= rbuf[i] && rbuf[i] <= 0x9F))
447 				rbuf[i] = ':';
448 
449 	snprint(buf, sizeof buf, "%S", rbuf);
450 
451 	if(d.dir == 0xFFFFFFFF) {
452 		/* make file */
453 		nt = createfile(t, buf, nil, 0444, nil);
454 		if(nt == nil)
455 			sysfatal("nt nil: create %s: %r", buf);
456 		nt->aux = copydir(&d);
457 		nt->length = d.size;
458 	} else /* make directory */
459 		nt = createfile(t, buf, nil, DMDIR|0777, nil);
460 
461 	filldir(t, f, d.right, nrecur+1);
462 
463 	if(d.dir != 0xFFFFFFFF)
464 		filldir(nt, f, d.dir, nrecur+1);
465 
466 	closefile(nt);
467 }
468 
469 Srv olesrv = {
470 	.read=	oleread,
471 };
472 
473 void
main(int argc,char ** argv)474 main(int argc, char **argv)
475 {
476 	char *mtpt;
477 	Ofile *f;
478 	Odir d;
479 
480 	mtpt = "/mnt/doc";
481 	ARGBEGIN{
482 	case 'm':
483 		mtpt = ARGF();
484 		break;
485 
486 	default:
487 		goto Usage;
488 	}ARGEND
489 
490 	if(argc != 1) {
491 	Usage:
492 		fprint(2, "usage: olefs file\n");
493 		exits("usage");
494 	}
495 
496 	f = oleopen(argv[0]);
497 	if(f == nil) {
498 		print("error opening %s: %r\n", argv[0]);
499 		exits("open");
500 	}
501 
502 //	dumpdir(f, 0);
503 
504 	if(oreaddir(f, 0, &d) != 1) {
505 		fprint(2, "oreaddir error: %r\n");
506 		exits("oreaddir");
507 	}
508 
509 	olesrv.tree = alloctree(nil, nil, DMDIR|0777, nil);
510 	filldir(olesrv.tree->root, f, d.dir, 0);
511 	postmountsrv(&olesrv, nil, mtpt, MREPL);
512 	exits(0);
513 }
514