xref: /plan9/sys/src/cmd/aux/antiword/misc.c (revision 25b329d522281a8cdd35da0dcc08c3fc621059a9)
1 /*
2  * misc.c
3  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
4  *
5  * Description:
6  * Miscellaneous functions
7  */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <time.h>
14 #if defined(__riscos)
15 #include "DeskLib:SWI.h"
16 #else
17 #include <errno.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #endif /* __riscos */
21 #if !defined(S_ISREG)
22 #define S_ISREG(x)	(((x) & S_IFMT) == S_IFREG)
23 #endif /* !S_ISREG */
24 #include "antiword.h"
25 #if defined(__vms)
26 #include <unixlib.h>
27 #endif
28 
29 #if !defined(__riscos)
30 /*
31  * szGetHomeDirectory - get the name of the home directory
32  */
33 const char *
szGetHomeDirectory(void)34 szGetHomeDirectory(void)
35 {
36 	const char	*szHome;
37 
38 #if defined(__vms)
39 	szHome = decc$translate_vms(getenv("HOME"));
40 #elif defined(__Plan9__)
41 	szHome = getenv("home");
42 #else
43 	szHome = getenv("HOME");
44 #endif /* __vms */
45 
46 	if (szHome == NULL || szHome[0] == '\0') {
47 #if defined(N_PLAT_NLM)
48 		szHome = "SYS:";
49 #elif defined(__dos)
50 		szHome = "C:";
51 #else
52 		werr(0, "I can't find the name of your HOME directory");
53 		szHome = "";
54 #endif /* __dos */
55 	}
56 	return szHome;
57 } /* end of szGetHomeDirectory */
58 
59 /*
60  * szGetAntiwordDirectory - get the name of the Antiword directory
61  */
62 const char *
szGetAntiwordDirectory(void)63 szGetAntiwordDirectory(void)
64 {
65 #if defined(__vms)
66 	return decc$translate_vms(getenv("ANTIWORDHOME"));
67 #else
68 	return getenv("ANTIWORDHOME");
69 #endif /* __vms */
70 } /* end of szGetAntiwordDirectory */
71 #endif /* !__riscos */
72 
73 /*
74  * Get the size of the specified file.
75  * Returns -1 if the file does not exist or is not a proper file.
76  */
77 long
lGetFilesize(const char * szFilename)78 lGetFilesize(const char *szFilename)
79 {
80 #if defined(__riscos)
81 	os_error	*e;
82 	int	iType, iSize;
83 
84 	e = SWI(2, 5, SWI_OS_File | XOS_Bit,
85 		17, szFilename,
86 		&iType, NULL, NULL, NULL, &iSize);
87 	if (e != NULL) {
88 		werr(0, "Get Filesize error %d: %s",
89 			e->errnum, e->errmess);
90 		return -1;
91 	}
92 	if (iType != 1) {
93 		/* It's not a proper file or the file does not exist */
94 		return -1;
95 	}
96 	return (long)iSize;
97 #else
98 	struct stat	tBuffer;
99 
100 	errno = 0;
101 	if (stat(szFilename, &tBuffer) != 0) {
102 		werr(0, "Get Filesize error %d", errno);
103 		return -1;
104 	}
105 	if (!S_ISREG(tBuffer.st_mode)) {
106 		/* It's not a regular file */
107 		return -1;
108 	}
109 	return (long)tBuffer.st_size;
110 #endif /* __riscos */
111 } /* end of lGetFilesize */
112 
113 #if defined(DEBUG)
114 void
vPrintBlock(const char * szFile,int iLine,const UCHAR * aucBlock,size_t tLength)115 vPrintBlock(const char	*szFile, int iLine,
116 		const UCHAR *aucBlock, size_t tLength)
117 {
118 	int i, j;
119 
120 	fail(szFile == NULL || iLine < 0 || aucBlock == NULL);
121 
122 	fprintf(stderr, "%s[%3d]:\n", szFile, iLine);
123 	for (i = 0; i < 32; i++) {
124 		if (16 * i >= (int)tLength) {
125 			return;
126 		}
127 		fprintf(stderr, "%03x: ", (unsigned int)(16 * i));
128 		for (j = 0; j < 16; j++) {
129 			if (16 * i + j < (int)tLength) {
130 				fprintf(stderr, "%02x ",
131 					(unsigned int)aucBlock[16 * i + j]);
132 			}
133 		}
134 		fprintf(stderr, "\n");
135 	}
136 } /* end of vPrintBlock */
137 
138 void
vPrintUnicode(const char * szFile,int iLine,const UCHAR * aucUni,size_t tLen)139 vPrintUnicode(const char *szFile, int iLine, const UCHAR *aucUni, size_t tLen)
140 {
141 	char	*szASCII;
142 
143 	fail(tLen % 2 != 0);
144 
145 	tLen /= 2;	/* Length in bytes to length in characters */
146 	szASCII = xmalloc(tLen + 1);
147 	(void)unincpy(szASCII, aucUni, tLen);
148 	szASCII[tLen] = '\0';
149 	(void)fprintf(stderr, "%s[%3d]: %.*s\n",
150 				szFile, iLine, (int)tLen, szASCII);
151 	szASCII = xfree(szASCII);
152 } /* end of vPrintUnicode */
153 
154 BOOL
bCheckDoubleLinkedList(output_type * pAnchor)155 bCheckDoubleLinkedList(output_type *pAnchor)
156 {
157 	output_type	*pCurr, *pLast;
158 	int		iInList;
159 
160 	pLast = pAnchor;
161 	iInList = 0;
162 	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
163 		pLast = pCurr;
164 		iInList++;
165 	}
166 	NO_DBG_DEC(iInList);
167 	for (pCurr = pLast; pCurr != NULL; pCurr = pCurr->pPrev) {
168 		pLast = pCurr;
169 		iInList--;
170 	}
171 	DBG_DEC_C(iInList != 0, iInList);
172 	return pAnchor == pLast && iInList == 0;
173 } /* end of bCheckDoubleLinkedList */
174 #endif /* DEBUG */
175 
176 /*
177  * bReadBytes
178  * This function reads the specified number of bytes from the specified file,
179  * starting from the specified offset.
180  * Returns TRUE when successfull, otherwise FALSE
181  */
182 BOOL
bReadBytes(UCHAR * aucBytes,size_t tMemb,ULONG ulOffset,FILE * pFile)183 bReadBytes(UCHAR *aucBytes, size_t tMemb, ULONG ulOffset, FILE *pFile)
184 {
185 	fail(aucBytes == NULL || pFile == NULL || ulOffset > (ULONG)LONG_MAX);
186 
187 	if (ulOffset > (ULONG)LONG_MAX) {
188 		return FALSE;
189 	}
190 	if (fseek(pFile, (long)ulOffset, SEEK_SET) != 0) {
191 		return FALSE;
192 	}
193 	if (fread(aucBytes, sizeof(UCHAR), tMemb, pFile) != tMemb) {
194 		return FALSE;
195 	}
196 	return TRUE;
197 } /* end of bReadBytes */
198 
199 /*
200  * bReadBuffer
201  * This function fills the specified buffer with the specified number of bytes,
202  * starting at the specified offset within the Big/Small Block Depot.
203  *
204  * Returns TRUE when successful, otherwise FALSE
205  */
206 BOOL
bReadBuffer(FILE * pFile,ULONG ulStartBlock,const ULONG * aulBlockDepot,size_t tBlockDepotLen,size_t tBlockSize,UCHAR * aucBuffer,ULONG ulOffset,size_t tToRead)207 bReadBuffer(FILE *pFile, ULONG ulStartBlock,
208 	const ULONG *aulBlockDepot, size_t tBlockDepotLen, size_t tBlockSize,
209 	UCHAR *aucBuffer, ULONG ulOffset, size_t tToRead)
210 {
211 	ULONG	ulBegin, ulIndex;
212 	size_t	tLen;
213 
214 	fail(pFile == NULL);
215 	fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
216 	fail(aulBlockDepot == NULL);
217 	fail(tBlockSize != BIG_BLOCK_SIZE && tBlockSize != SMALL_BLOCK_SIZE);
218 	fail(aucBuffer == NULL);
219 	fail(tToRead == 0);
220 
221 	for (ulIndex = ulStartBlock;
222 	     ulIndex != END_OF_CHAIN && tToRead != 0;
223 	     ulIndex = aulBlockDepot[ulIndex]) {
224 		if (ulIndex >= (ULONG)tBlockDepotLen) {
225 			DBG_DEC(ulIndex);
226 			DBG_DEC(tBlockDepotLen);
227 			if (tBlockSize >= BIG_BLOCK_SIZE) {
228 				werr(1, "The Big Block Depot is damaged");
229 			} else {
230 				werr(1, "The Small Block Depot is damaged");
231 			}
232 		}
233 		if (ulOffset >= (ULONG)tBlockSize) {
234 			ulOffset -= tBlockSize;
235 			continue;
236 		}
237 		ulBegin = ulDepotOffset(ulIndex, tBlockSize) + ulOffset;
238 		tLen = min(tBlockSize - (size_t)ulOffset, tToRead);
239 		ulOffset = 0;
240 		if (!bReadBytes(aucBuffer, tLen, ulBegin, pFile)) {
241 			werr(0, "Read big block 0x%lx not possible", ulBegin);
242 			return FALSE;
243 		}
244 		aucBuffer += tLen;
245 		tToRead -= tLen;
246 	}
247 	DBG_DEC_C(tToRead != 0, tToRead);
248 	return tToRead == 0;
249 } /* end of bReadBuffer */
250 
251 /*
252  * Convert a Word colornumber into a true color for use in a drawfile
253  *
254  * Returns the true color
255  */
256 ULONG
ulColor2Color(UCHAR ucFontColor)257 ulColor2Color(UCHAR ucFontColor)
258 {
259 	static const ULONG	aulColorTable[] = {
260 		/*  0 */	0x00000000UL,	/* Automatic */
261 		/*  1 */	0x00000000UL,	/* Black */
262 		/*  2 */	0xff000000UL,	/* Blue */
263 		/*  3 */	0xffff0000UL,	/* Turquoise */
264 		/*  4 */	0x00ff0000UL,	/* Bright Green */
265 		/*  5 */	0xff00ff00UL,	/* Pink */
266 		/*  6 */	0x0000ff00UL,	/* Red */
267 		/*  7 */	0x00ffff00UL,	/* Yellow */
268 		/*  8 */	0xffffff00UL,	/* White */
269 		/*  9 */	0x80000000UL,	/* Dark Blue */
270 		/* 10 */	0x80800000UL,	/* Teal */
271 		/* 11 */	0x00800000UL,	/* Green */
272 		/* 12 */	0x80008000UL,	/* Violet */
273 		/* 13 */	0x00008000UL,	/* Dark Red */
274 		/* 14 */	0x00808000UL,	/* Dark Yellow */
275 		/* 15 */	0x80808000UL,	/* Gray 50% */
276 		/* 16 */	0xc0c0c000UL,	/* Gray 25% */
277 	};
278 	if ((size_t)ucFontColor >= elementsof(aulColorTable)) {
279 		return aulColorTable[0];
280 	}
281 	return aulColorTable[(int)ucFontColor];
282 } /* end of ulColor2Color */
283 
284 /*
285  * iFindSplit - find a place to split the string
286  *
287  * returns the index of the split character or -1 if no split found.
288  */
289 static int
iFindSplit(const char * szString,size_t tStringLen)290 iFindSplit(const char *szString, size_t tStringLen)
291 {
292 	size_t	tSplit;
293 
294 	if (tStringLen == 0) {
295 		return -1;
296 	}
297 	tSplit = tStringLen - 1;
298 	while (tSplit >= 1) {
299 		if (szString[tSplit] == ' ' ||
300 		    (szString[tSplit] == '-' && szString[tSplit - 1] != ' ')) {
301 			return (int)tSplit;
302 		}
303 		tSplit--;
304 	}
305 	return -1;
306 } /* end of iFindSplit */
307 
308 /*
309  * pSplitList - split the specified list in a printable part and a leftover part
310  *
311  * returns the pointer to the leftover part
312  */
313 output_type *
pSplitList(output_type * pAnchor)314 pSplitList(output_type *pAnchor)
315 {
316 	output_type	*pCurr, *pLeftOver;
317 	int		iIndex;
318 
319  	fail(pAnchor == NULL);
320 
321 	for (pCurr = pAnchor; pCurr->pNext != NULL; pCurr = pCurr->pNext)
322 		;	/* EMPTY */
323 	iIndex = -1;
324 	for (; pCurr != NULL; pCurr = pCurr->pPrev) {
325 		iIndex = iFindSplit(pCurr->szStorage, pCurr->tNextFree);
326 		if (iIndex >= 0) {
327 			break;
328 		}
329 	}
330 
331 	if (pCurr == NULL || iIndex < 0) {
332 		/* No split, no leftover */
333 		return NULL;
334 	}
335 	/* Split over the iIndex-th character */
336 	NO_DBG_MSG("pLeftOver");
337 	pLeftOver = xmalloc(sizeof(*pLeftOver));
338 	fail(pCurr->tNextFree < (size_t)iIndex);
339 	pLeftOver->tStorageSize = pCurr->tNextFree - (size_t)iIndex;
340 	pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);
341 	pLeftOver->tNextFree = pCurr->tNextFree - (size_t)iIndex - 1;
342 	(void)strncpy(pLeftOver->szStorage,
343 		pCurr->szStorage + iIndex + 1, pLeftOver->tNextFree);
344 	pLeftOver->szStorage[pLeftOver->tNextFree] = '\0';
345 	NO_DBG_MSG(pLeftOver->szStorage);
346 	pLeftOver->ucFontColor = pCurr->ucFontColor;
347 	pLeftOver->usFontStyle = pCurr->usFontStyle;
348 	pLeftOver->tFontRef = pCurr->tFontRef;
349 	pLeftOver->usFontSize = pCurr->usFontSize;
350 	pLeftOver->lStringWidth = lComputeStringWidth(
351 					pLeftOver->szStorage,
352 					pLeftOver->tNextFree,
353 					pLeftOver->tFontRef,
354 					pLeftOver->usFontSize);
355 	pLeftOver->pPrev = NULL;
356 	pLeftOver->pNext = pCurr->pNext;
357 	if (pLeftOver->pNext != NULL) {
358 		pLeftOver->pNext->pPrev = pLeftOver;
359 	}
360 	fail(!bCheckDoubleLinkedList(pLeftOver));
361 
362 	NO_DBG_MSG("pAnchor");
363 	NO_DBG_HEX(pCurr->szStorage[iIndex]);
364 	while (iIndex >= 0 && isspace((int)(UCHAR)pCurr->szStorage[iIndex])) {
365 		iIndex--;
366 	}
367 	pCurr->tNextFree = (size_t)iIndex + 1;
368 	pCurr->szStorage[pCurr->tNextFree] = '\0';
369 	NO_DBG_MSG(pCurr->szStorage);
370 	pCurr->lStringWidth = lComputeStringWidth(
371 					pCurr->szStorage,
372 					pCurr->tNextFree,
373 					pCurr->tFontRef,
374 					pCurr->usFontSize);
375 	pCurr->pNext = NULL;
376 	fail(!bCheckDoubleLinkedList(pAnchor));
377 
378 	return pLeftOver;
379 } /* end of pSplitList */
380 
381 /*
382  * tNumber2Roman - convert a number to Roman Numerals
383  *
384  * returns the number of characters written
385  */
386 size_t
tNumber2Roman(UINT uiNumber,BOOL bUpperCase,char * szOutput)387 tNumber2Roman(UINT uiNumber, BOOL bUpperCase, char *szOutput)
388 {
389 	char	*outp, *p, *q;
390 	UINT	uiNextVal, uiValue;
391 
392 	fail(szOutput == NULL);
393 
394 	uiNumber %= 4000;	/* Very high numbers can't be represented */
395 	if (uiNumber == 0) {
396 		szOutput[0] = '\0';
397 		return 0;
398 	}
399 
400 	outp = szOutput;
401 	p = bUpperCase ? "M\2D\5C\2L\5X\2V\5I" : "m\2d\5c\2l\5x\2v\5i";
402 	uiValue = 1000;
403 	for (;;) {
404 		while (uiNumber >= uiValue) {
405 			*outp++ = *p;
406 			uiNumber -= uiValue;
407 		}
408 		if (uiNumber == 0) {
409 			*outp = '\0';
410 			fail(outp < szOutput);
411 			return (size_t)(outp - szOutput);
412 		}
413 		q = p + 1;
414 		uiNextVal = uiValue / (UINT)(UCHAR)*q;
415 		if ((int)*q == 2) {		/* magic */
416 			uiNextVal /= (UINT)(UCHAR)*(q += 2);
417 		}
418 		if (uiNumber + uiNextVal >= uiValue) {
419 			*outp++ = *++q;
420 			uiNumber += uiNextVal;
421 		} else {
422 			p++;
423 			uiValue /= (UINT)(UCHAR)(*p++);
424 		}
425 	}
426 } /* end of tNumber2Roman */
427 
428 /*
429  * iNumber2Alpha - convert a number to alphabetic "numbers"
430  *
431  * returns the number of characters written
432  */
433 size_t
tNumber2Alpha(UINT uiNumber,BOOL bUpperCase,char * szOutput)434 tNumber2Alpha(UINT uiNumber, BOOL bUpperCase, char *szOutput)
435 {
436 	char	*outp;
437 	UINT	uiTmp;
438 
439 	fail(szOutput == NULL);
440 
441 	if (uiNumber == 0) {
442 		szOutput[0] = '\0';
443 		return 0;
444 	}
445 
446 	outp = szOutput;
447 	uiTmp = (UINT)(bUpperCase ? 'A': 'a');
448 	if (uiNumber <= 26) {
449 		uiNumber -= 1;
450 		*outp++ = (char)(uiTmp + uiNumber);
451 	} else if (uiNumber <= 26U + 26U*26U) {
452 		uiNumber -= 26 + 1;
453 		*outp++ = (char)(uiTmp + uiNumber / 26);
454 		*outp++ = (char)(uiTmp + uiNumber % 26);
455 	} else if (uiNumber <= 26U + 26U*26U + 26U*26U*26U) {
456 		uiNumber -= 26 + 26*26 + 1;
457 		*outp++ = (char)(uiTmp + uiNumber / (26*26));
458 		*outp++ = (char)(uiTmp + uiNumber / 26 % 26);
459 		*outp++ = (char)(uiTmp + uiNumber % 26);
460 	}
461 	*outp = '\0';
462 	fail(outp < szOutput);
463 	return (size_t)(outp - szOutput);
464 } /* end of tNumber2Alpha */
465 
466 /*
467  * unincpy - copy a counted Unicode string to an single-byte string
468  */
469 char *
unincpy(char * s1,const UCHAR * s2,size_t n)470 unincpy(char *s1, const UCHAR *s2, size_t n)
471 {
472 	char	*pcDest;
473 	ULONG	ulChar;
474 	size_t	tLen;
475 	USHORT	usUni;
476 
477 	for (pcDest = s1, tLen = 0; tLen < n; pcDest++, tLen++) {
478 		usUni = usGetWord(tLen * 2, s2);
479 		if (usUni == 0) {
480 			break;
481 		}
482 		ulChar = ulTranslateCharacters(usUni, 0, 8,
483 				conversion_unknown, encoding_neutral, FALSE);
484 		if (ulChar == IGNORE_CHARACTER) {
485 			ulChar = (ULONG)'?';
486 		}
487 		*pcDest = (char)ulChar;
488 	}
489 	for (; tLen < n; tLen++) {
490 		*pcDest++ = '\0';
491 	}
492 	return s1;
493 } /* end of unincpy */
494 
495 /*
496  * unilen - calculate the length of a Unicode string
497  *
498  * returns the length in bytes
499  */
500 size_t
unilen(const UCHAR * s)501 unilen(const UCHAR *s)
502 {
503 	size_t	tLen;
504 	USHORT	usUni;
505 
506 	tLen = 0;
507 	for (;;) {
508 		usUni = usGetWord(tLen, s);
509 		if (usUni == 0) {
510 			return tLen;
511 		}
512 		tLen += 2;
513 	}
514 } /* end of unilen */
515 
516 /*
517  * szBaseName - get the basename of the specified filename
518  */
519 const char *
szBasename(const char * szFilename)520 szBasename(const char *szFilename)
521 {
522 	const char	*szTmp;
523 
524 	fail(szFilename == NULL);
525 
526 	if (szFilename == NULL || szFilename[0] == '\0') {
527 		return "null";
528 	}
529 
530 	szTmp = strrchr(szFilename, FILE_SEPARATOR[0]);
531 	if (szTmp == NULL) {
532 		return szFilename;
533 	}
534 	return ++szTmp;
535 } /* end of szBasename */
536 
537 /*
538  * lComputeLeading - compute the leading
539  *
540  * NOTE: the fontsize is specified in half points
541  *
542  * Returns the leading in drawunits
543  */
544 long
lComputeLeading(USHORT usFontSize)545 lComputeLeading(USHORT usFontSize)
546 {
547 	long	lLeading;
548 
549 	lLeading = (long)usFontSize * 500L;
550 	if (usFontSize < 18) {		/* Small text: 112% */
551 		lLeading *= 112;
552 	} else if (usFontSize < 28) {	/* Normal text: 124% */
553 		lLeading *= 124;
554 	} else if (usFontSize < 48) {	/* Small headlines: 104% */
555 		lLeading *= 104;
556 	} else {			/* Large headlines: 100% */
557 		lLeading *= 100;
558 	}
559 	lLeading = lMilliPoints2DrawUnits(lLeading);
560 	lLeading += 50;
561 	lLeading /= 100;
562 	return lLeading;
563 } /* end of lComputeLeading */
564 
565 /*
566  * Convert a UCS character to an UTF-8 string
567  *
568  * Returns the string length of the result
569  */
570 size_t
tUcs2Utf8(ULONG ulChar,char * szResult,size_t tMaxResultLen)571 tUcs2Utf8(ULONG ulChar, char *szResult, size_t tMaxResultLen)
572 {
573 	if (szResult == NULL || tMaxResultLen == 0) {
574 		return 0;
575 	}
576 
577 	if (ulChar < 0x80 && tMaxResultLen >= 2) {
578 		szResult[0] = (char)ulChar;
579 		szResult[1] = '\0';
580 		return 1;
581 	}
582 	if (ulChar < 0x800 && tMaxResultLen >= 3) {
583 		szResult[0] = (char)(0xc0 | ulChar >> 6);
584 		szResult[1] = (char)(0x80 | (ulChar & 0x3f));
585 		szResult[2] = '\0';
586 		return 2;
587 	}
588 	if (ulChar < 0x10000 && tMaxResultLen >= 4) {
589 		szResult[0] = (char)(0xe0 | ulChar >> 12);
590 		szResult[1] = (char)(0x80 | (ulChar >> 6 & 0x3f));
591 		szResult[2] = (char)(0x80 | (ulChar & 0x3f));
592 		szResult[3] = '\0';
593 		return 3;
594 	}
595 	if (ulChar < 0x200000 && tMaxResultLen >= 5) {
596 		szResult[0] = (char)(0xf0 | ulChar >> 18);
597 		szResult[1] = (char)(0x80 | (ulChar >> 12 & 0x3f));
598 		szResult[2] = (char)(0x80 | (ulChar >> 6 & 0x3f));
599 		szResult[3] = (char)(0x80 | (ulChar & 0x3f));
600 		szResult[4] = '\0';
601 		return 4;
602 	}
603 	szResult[0] = '\0';
604 	return 0;
605 } /* end of tUcs2Utf8 */
606 
607 /*
608  * vGetBulletValue - get the bullet value for the conversing type and encoding
609  */
610 void
vGetBulletValue(conversion_type eConversionType,encoding_type eEncoding,char * szResult,size_t tMaxResultLen)611 vGetBulletValue(conversion_type eConversionType, encoding_type eEncoding,
612 	char *szResult, size_t tMaxResultLen)
613 {
614 	fail(szResult == NULL);
615 	fail(tMaxResultLen < 2);
616 
617 	if (eEncoding == encoding_utf_8) {
618 		(void)tUcs2Utf8(UNICODE_BULLET, szResult, tMaxResultLen);
619 	} else {
620 		szResult[0] = (char)ucGetBulletCharacter(eConversionType,
621 							eEncoding);
622 		szResult[1] = '\0';
623 	}
624 } /* end of vGetBulletValue */
625 
626 /*
627  * bAllZero - are all bytes zero?
628  */
629 BOOL
bAllZero(const UCHAR * aucBytes,size_t tLength)630 bAllZero(const UCHAR *aucBytes, size_t tLength)
631 {
632 	size_t	tIndex;
633 
634 	if (aucBytes == NULL || tLength == 0) {
635 		return TRUE;
636 	}
637 
638 	for (tIndex = 0; tIndex < tLength; tIndex++) {
639 		if (aucBytes[tIndex] != 0) {
640 			return FALSE;
641 		}
642 	}
643 	return TRUE;
644 } /* end of bAllZero */
645 
646 #if !defined(__riscos)
647 /*
648  * GetCodesetFromLocale - get the codeset from the current locale
649  *
650  * Original version: Copyright (C) 1999  Bruno Haible
651  * Syntax:
652  * language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]]
653  *
654  * Returns TRUE when sucessful, otherwise FALSE
655  */
656 static BOOL
bGetCodesetFromLocale(char * szCodeset,size_t tMaxCodesetLength,BOOL * pbEuro)657 bGetCodesetFromLocale(char *szCodeset, size_t tMaxCodesetLength, BOOL *pbEuro)
658 {
659 #if !defined(__dos)
660 	const char	*szLocale;
661 	const char	*pcTmp;
662 	size_t		tIndex;
663 	char		szModifier[6];
664 #endif /* __dos */
665 
666 	if (pbEuro != NULL) {
667 		*pbEuro = FALSE;	/* Until proven otherwise */
668 	}
669 	if (szCodeset == NULL || tMaxCodesetLength == 0) {
670 		return FALSE;
671 	}
672 
673 #if defined(__dos)
674 	if (tMaxCodesetLength < 2 + sizeof(int) * 3 + 1) {
675 		DBG_DEC(tMaxCodesetLength);
676 		DBG_DEC(2 + sizeof(int) * 3 + 1);
677 		return FALSE;
678 	}
679 	/* Get the active codepage from DOS */
680 	sprintf(szCodeset, "cp%d", iGetCodepage());
681 	DBG_MSG(szCodeset);
682 #else
683 	/* Get the locale from the environment */
684 	szLocale = getenv("LC_ALL");
685 	if (szLocale == NULL || szLocale[0] == '\0') {
686 		szLocale = getenv("LC_CTYPE");
687 		if (szLocale == NULL || szLocale[0] == '\0') {
688 			szLocale = getenv("LANG");
689 		}
690 	}
691 	if (szLocale == NULL || szLocale[0] == '\0') {
692 		/* No locale, so no codeset name and no modifier */
693 		return FALSE;
694 	}
695 	DBG_MSG(szLocale);
696 	pcTmp = strchr(szLocale, '.');
697 	if (pcTmp == NULL) {
698 		/* No codeset name */
699 		szCodeset[0] = '\0';
700 	} else {
701 		/* Copy the codeset name */
702 		pcTmp++;
703 		for (tIndex = 0; tIndex < tMaxCodesetLength; tIndex++) {
704 			if (*pcTmp == '@' || *pcTmp == '+' ||
705 			    *pcTmp == ',' || *pcTmp == '_' ||
706 			    *pcTmp == '\0') {
707 				szCodeset[tIndex] = '\0';
708 				break;
709 			}
710 			szCodeset[tIndex] = *pcTmp;
711 			pcTmp++;
712 		}
713 		szCodeset[tMaxCodesetLength - 1] = '\0';
714 	}
715 	if (pbEuro == NULL) {
716 		/* No need to get the modifier */
717 		return TRUE;
718 	}
719 	pcTmp = strchr(szLocale, '@');
720 	if (pcTmp != NULL) {
721 		/* Copy the modifier */
722 		pcTmp++;
723 		for (tIndex = 0; tIndex < sizeof(szModifier); tIndex++) {
724 			if (*pcTmp == '+' || *pcTmp == ',' ||
725 			    *pcTmp == '_' || *pcTmp == '\0') {
726 				szModifier[tIndex] = '\0';
727 				break;
728 			}
729 			szModifier[tIndex] = *pcTmp;
730 			pcTmp++;
731 		}
732 		szModifier[sizeof(szModifier) - 1] = '\0';
733 		*pbEuro = STRCEQ(szModifier, "Euro");
734 	}
735 #endif /* __dos */
736 	return TRUE;
737 } /* end of bGetCodesetFromLocale */
738 
739 /*
740  * GetNormalizedCodeset - get the normalized codeset from the current locale
741  *
742  * Returns TRUE when sucessful, otherwise FALSE
743  */
744 BOOL
bGetNormalizedCodeset(char * szCodeset,size_t tMaxCodesetLength,BOOL * pbEuro)745 bGetNormalizedCodeset(char *szCodeset, size_t tMaxCodesetLength, BOOL *pbEuro)
746 {
747 	BOOL	bOnlyDigits;
748 	const char	*pcSrc;
749 	char	*pcDest;
750 	char	*szTmp, *szCodesetNorm;
751 
752 	if (pbEuro != NULL) {
753 		*pbEuro = FALSE;	/* Until proven otherwise */
754 	}
755 	if (szCodeset == NULL || tMaxCodesetLength < 4) {
756 		return FALSE;
757 	}
758 
759 	/* Get the codeset name */
760 	szTmp = xmalloc(tMaxCodesetLength - 3);
761 	if (!bGetCodesetFromLocale(szTmp, tMaxCodesetLength - 3, pbEuro)) {
762 		szTmp = xfree(szTmp);
763 		return FALSE;
764 	}
765 	/* Normalize the codeset name */
766 	szCodesetNorm = xmalloc(tMaxCodesetLength - 3);
767 	bOnlyDigits = TRUE;
768 	pcDest = szCodesetNorm;
769 	for (pcSrc = szTmp; *pcSrc != '\0'; pcSrc++) {
770 		if (isalnum(*pcSrc)) {
771 			*pcDest = tolower(*pcSrc);
772 			if (!isdigit(*pcDest)) {
773 				bOnlyDigits = FALSE;
774 			}
775 			pcDest++;
776 		}
777 	}
778 	*pcDest = '\0';
779 	DBG_MSG(szCodesetNorm);
780 	/* Add "iso" when szCodesetNorm contains all digits */
781 	if (bOnlyDigits && szCodesetNorm[0] != '\0') {
782 		fail(strlen(szCodesetNorm) + 3 >= tMaxCodesetLength);
783 		sprintf(szCodeset, "iso%s", szCodesetNorm);
784 	} else {
785 		fail(strlen(szCodesetNorm) >= tMaxCodesetLength);
786 		strncpy(szCodeset, szCodesetNorm, pcDest - szCodesetNorm + 1);
787 		szCodeset[tMaxCodesetLength - 1] = '\0';
788 	}
789 	DBG_MSG(szCodeset);
790 	/* Clean up and leave */
791 	szCodesetNorm = xfree(szCodesetNorm);
792 	szTmp = xfree(szTmp);
793 	return TRUE;
794 } /* end of bGetNormalizedCodeset */
795 
796 /*
797  * szGetDefaultMappingFile - get the default mapping file
798  *
799  * Returns the basename of the default mapping file
800  */
801 const char *
szGetDefaultMappingFile(void)802 szGetDefaultMappingFile(void)
803 {
804 	static const struct {
805 		const char	*szCodeset;
806 		const char	*szMappingFile;
807 	} atMappingFile[] = {
808 		{ "iso88591",	MAPPING_FILE_8859_1 },
809 		{ "iso88592",	MAPPING_FILE_8859_2 },
810 		{ "iso88593",	"8859-3.txt" },
811 		{ "iso88594",	"8859-4.txt" },
812 		{ "iso88595",	"8859-5.txt" },
813 		{ "iso88596",	MAPPING_FILE_8859_5 },
814 		{ "iso88597",	"8859-7.txt" },
815 		{ "iso88598",	"8859-8.txt" },
816 		{ "iso88599",	"8859-9.txt" },
817 		{ "iso885910",	"8859-10.txt" },
818 		{ "iso885913",	"8859-13.txt" },
819 		{ "iso885914",	"8859-14.txt" },
820 		{ "iso885915",	MAPPING_FILE_8859_15 },
821 		{ "iso885916",	"8859-16.txt" },
822 		{ "koi8r",	MAPPING_FILE_KOI8_R },
823 		{ "koi8u",	MAPPING_FILE_KOI8_U },
824 		{ "utf8",	MAPPING_FILE_UTF_8 },
825 		{ "cp437",	MAPPING_FILE_CP437 },
826 		{ "cp850",	"cp850.txt" },
827 		{ "cp852",	MAPPING_FILE_CP852 },
828 		{ "cp862",	"cp862.txt" },
829 		{ "cp864",	"cp864.txt" },
830 		{ "cp866",	MAPPING_FILE_CP866 },
831 		{ "cp1250",	MAPPING_FILE_CP1250 },
832 		{ "cp1251",	MAPPING_FILE_CP1251 },
833 		{ "cp1252",	"cp1252.txt" },
834 	};
835 	size_t	tIndex;
836 	BOOL	bEuro;
837 	char	szCodeset[20];
838 
839 	szCodeset[0] = '\0';
840 	bEuro = FALSE;
841 	/* Get the normalized codeset name */
842 	if (!bGetNormalizedCodeset(szCodeset, sizeof(szCodeset), &bEuro)) {
843 		return MAPPING_FILE_8859_1;
844 	}
845 	if (szCodeset[0] == '\0') {
846 		if (bEuro) {
847 			/* Default mapping file (with Euro sign) */
848 			return MAPPING_FILE_8859_15;
849 		} else {
850 			/* Default mapping file (without Euro sign) */
851 			return MAPPING_FILE_8859_1;
852 		}
853 	}
854 	/* Find the name in the table */
855 	for (tIndex = 0; tIndex < elementsof(atMappingFile); tIndex++) {
856 		if (STREQ(atMappingFile[tIndex].szCodeset, szCodeset)) {
857 			return atMappingFile[tIndex].szMappingFile;
858 		}
859 	}
860 	/* Default default mapping file */
861 #if defined(__dos)
862 	return MAPPING_FILE_CP437;
863 #else
864 	return MAPPING_FILE_8859_1;
865 #endif /* __dos */
866 } /* end of szGetDefaultMappingFile */
867 #endif /* !__riscos */
868 
869 /*
870  * tConvertDTTM - convert Windows Date and Time format
871  *
872  * returns Unix time_t or -1
873  */
874 time_t
tConvertDTTM(ULONG ulDTTM)875 tConvertDTTM(ULONG ulDTTM)
876 {
877 	struct tm	tTime;
878 	time_t		tResult;
879 
880 	if (ulDTTM == 0) {
881 		return (time_t)-1;
882 	}
883 	memset(&tTime, 0, sizeof(tTime));
884 	tTime.tm_min = (int)(ulDTTM & 0x0000003f);
885 	tTime.tm_hour = (int)((ulDTTM & 0x000007c0) >> 6);
886 	tTime.tm_mday = (int)((ulDTTM & 0x0000f800) >> 11);
887 	tTime.tm_mon = (int)((ulDTTM & 0x000f0000) >> 16);
888 	tTime.tm_year = (int)((ulDTTM & 0x1ff00000) >> 20);
889 	tTime.tm_isdst = -1;
890 	tTime.tm_mon--;         /* From 01-12 to 00-11 */
891 	tResult = mktime(&tTime);
892 	NO_DBG_MSG(ctime(&tResult));
893 	return tResult;
894 } /* end of tConvertDTTM */
895