xref: /onnv-gate/usr/src/cmd/man/src/util/instant.src/traninit.c (revision 0:68f95e015346)
1 /*
2  *  Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
3  *  All rights reserved.
4  */
5 /*
6 #pragma ident	"%Z%%M%	%I%	%E% SMI"
7  * Copyright (c) 1994
8  * Open Software Foundation, Inc.
9  *
10  * Permission is hereby granted to use, copy, modify and freely distribute
11  * the software in this file and its documentation for any purpose without
12  * fee, provided that the above copyright notice appears in all copies and
13  * that both the copyright notice and this permission notice appear in
14  * supporting documentation.  Further, provided that the name of Open
15  * Software Foundation, Inc. ("OSF") not be used in advertising or
16  * publicity pertaining to distribution of the software without prior
17  * written permission from OSF.  OSF makes no representations about the
18  * suitability of this software for any purpose.  It is provided "as is"
19  * without express or implied warranty.
20  */
21 /*
22  * Copyright (c) 1996 X Consortium
23  * Copyright (c) 1995, 1996 Dalrymple Consulting
24  *
25  * Permission is hereby granted, free of charge, to any person obtaining a copy
26  * of this software and associated documentation files (the "Software"), to deal
27  * in the Software without restriction, including without limitation the rights
28  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
29  * copies of the Software, and to permit persons to whom the Software is
30  * furnished to do so, subject to the following conditions:
31  *
32  * The above copyright notice and this permission notice shall be included in
33  * all copies or substantial portions of the Software.
34  *
35  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
38  * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
39  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
40  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
41  * OTHER DEALINGS IN THE SOFTWARE.
42  *
43  * Except as contained in this notice, the names of the X Consortium and
44  * Dalrymple Consulting shall not be used in advertising or otherwise to
45  * promote the sale, use or other dealings in this Software without prior
46  * written authorization.
47  */
48 /* ________________________________________________________________________
49  *
50  *  Program to manipulate SGML instances.
51  *
52  *  This module contains the initialization routines for translation module.
53  *  They mostly deal with reading data files (translation specs, SDATA
54  *  mappings, character mappings).
55  *
56  *  Entry points:
57  *	ReadTransSpec(transfile)	read/store translation spec from file
58  *	ReadSDATA(sdatafile)		read/store SDATA mappings from file
59  *	ReadMapping(mapfile)		read/store char mappings from file
60  * ________________________________________________________________________
61  */
62 
63 #ifndef lint
64 static char *RCSid =
65   "$Header: /usr/src/docbook-to-man/Instant/RCS/traninit.c,v 1.6 1998/06/28 19:15:41 fld Exp fld $";
66 #endif
67 
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <ctype.h>
71 #include <string.h>
72 #include <memory.h>
73 #include <sys/types.h>
74 #include <errno.h>
75 
76 #include <tptregexp.h>
77 #include "general.h"
78 #include "translate.h"
79 
80 #ifndef TRUE
81 #define TRUE	(1 == 1)
82 #endif
83 
84 /* forward references */
85 void	RememberTransSpec(Trans_t *, int);
86 
87 /* ______________________________________________________________________ */
88 /*  Read the translation specs from the input file, storing in memory.
89  *  Arguments:
90  *	Name of translation spec file.
91  */
92 
93 void
ReadTransSpec(char * transfile)94 ReadTransSpec(
95     char *transfile
96 )
97 {
98     FILE	*fp;
99     char	buf[LINESIZE], *cp, *fn, *cp2;
100     int		lineno=0, c, i;
101     Trans_t	T;
102 
103     if ((fp=OpenFile(transfile)) == NULL) {
104 	fprintf(stderr, "Can not open translation spec file '%s'.\n%s\n",
105 		transfile, strerror(errno));
106 	return;
107     }
108 
109     memset(&T, 0, sizeof T);		/* initialize/clear structure */
110     while (fgets(buf, LINESIZE, fp))	/* read line from .ts file */
111     {
112 	lineno++;
113 	/* skip comment and blank lines */
114 	if (buf[0] == '#' || buf[0] == NL) continue;
115 
116 	/* '-' indicates end of a spec.  When we hit one, remember what we've
117 	 * accumulated so far, and null-out the accumulating structure. */
118 	if (buf[0] == '-') {
119 	    T.lineno = lineno;
120 	    RememberTransSpec(&T, lineno);
121 	    memset(&T, 0, sizeof T);
122 	    continue;
123 	}
124 
125 	stripNL(buf);
126 
127 	/*  See if next line is continued from this one -- ie. it starts with
128 	 *  whitespace.  If so, append to current line.  (This is similar to
129 	 *  how e-mail headers work...) */
130 	while (1) {
131 	    c = getc(fp);		/* 1st char of next line */
132 	    if (IsWhite(c)) {		/* space or tab? */
133 		/* keep getting characters until it's a non-whitespace */
134 		c = getc(fp);
135 		while (IsWhite(c)) c = getc(fp);
136 		ungetc(c, fp);		/* put back non-whitespace */
137 		i = strlen(buf);
138 		buf[i++] = ' ';
139 		fn = buf + i;		/* point to end of string in buffer */
140 		fgets(fn, LINESIZE-i, fp);	/* read and append to buf */
141 		lineno++;
142 		stripNL(buf);
143 	    }
144 	    else {
145 		ungetc(c, fp);		/* put back non-whitespace */
146 		break;
147 	    }
148 	}
149 	/* Isolate field value */
150 	if ((cp=strchr(buf, ':'))) {
151 	    cp++;				/* point past colon */
152 	    while (*cp && IsWhite(*cp)) cp++;	/* point to content */
153 	}
154 	else {
155 	    fprintf(stderr,
156 		"Trans spec error, missing colon (skipping line):\n  %s\n", fn);
157 	    continue;
158 	}
159 	fn = buf;		/* fn is name of the field, cp the value. */
160 
161 	/* Check field names in order that they're likely to occur. */
162 	if (!strncmp("GI:",          fn, 3)) {
163 	    /* if we are folding the case of GIs, make all upper (unless
164 	       it's an internal pseudo-GI name, which starts with '_') */
165 	    if (fold_case && cp[0] != '_' && cp[0] != '#') {
166 		for (cp2=cp; *cp2; cp2++)
167 		    if (islower(*cp2)) *cp2 = toupper(*cp2);
168 	    }
169 	    T.gi = AddElemName(cp);
170 	}
171 	else if (!strncmp("StartText:",   fn, 10)) T.starttext	= strdup(cp);
172 	else if (!strncmp("EndText:",     fn, 8))  T.endtext	= strdup(cp);
173 	else if (!strncmp("Relation:",    fn, 9))  {
174 	    if (!T.relations) T.relations = NewMap(IMS_relations);
175 	    SetMapping(T.relations, cp);
176 	}
177 	else if (!strncmp("Replace:",     fn, 8))  T.replace	= strdup(cp);
178 	else if (!strncmp("AttValue:",    fn, 9)) {
179 	    if (!T.nattpairs) {
180 		Malloc(1, T.attpair, AttPair_t);
181 	    }
182 	    else
183 		Realloc((T.nattpairs+1), T.attpair, AttPair_t);
184 	    /* we'll split name/value pairs later */
185 	    T.attpair[T.nattpairs].name = strdup(cp);
186 	    T.nattpairs++;
187 	}
188 	/* If there's only one item in context, it's the parent.  Treat
189 	 * it specially, since it's easier to just check parent gi.
190 	 */
191 	else if (!strncmp("Context:",     fn, 8))  T.context	= strdup(cp);
192 	else if (!strncmp("Message:",     fn, 8))  T.message	= strdup(cp);
193 	else if (!strncmp("SpecID:",      fn, 7))  T.my_id	= atoi(cp);
194 	else if (!strncmp("Action:",      fn, 7))  T.use_id	= atoi(cp);
195 	else if (!strncmp("Content:",     fn, 8))  T.content	= strdup(cp);
196 	else if (!strncmp("PAttSet:",     fn, 8))  T.pattrset	= strdup(cp);
197 	else if (!strncmp("Verbatim:",    fn, 9))  T.verbatim	= TRUE;
198 	else if (!strncmp("Ignore:",      fn, 7)) {
199 	    if (!strcmp(cp, "all"))		T.ignore = IGN_ALL;
200 	    else if (!strcmp(cp, "data"))	T.ignore = IGN_DATA;
201 	    else if (!strcmp(cp, "children"))	T.ignore = IGN_CHILDREN;
202 	    else
203 		fprintf(stderr, "Bad 'Ignore:' arg in transpec (line %d): %s\n",
204 			lineno, cp);
205 	}
206 	else if (!strncmp("VarValue:",    fn, 9)) {
207 	    char	**tok;
208 	    i = 2;
209 	    tok = Split(cp, &i, S_STRDUP);
210 	    T.var_name	= tok[0];
211 	    T.var_value	= tok[1];
212 	}
213 	else if (!strncmp("VarREValue:",    fn, 11)) {
214 	    char	**tok;
215 	    i = 2;
216 	    tok = Split(cp, &i, S_STRDUP);
217 	    T.var_RE_name = tok[0];
218 	    ExpandVariables(tok[1], buf, 0);
219 	    if (!(T.var_RE_value=tpt_regcomp(buf)))	{
220 	    	fprintf(stderr, "Regex error in VarREValue Content: %s\n",
221 					tok[1]);
222 	    }
223 	}
224 	else if (!strncmp("Set:", fn, 4)) {
225 	    if (!T.set_var) T.set_var = NewMap(IMS_setvar);
226 	    SetMapping(T.set_var, cp);
227 	}
228 	else if (!strncmp("Increment:",   fn, 10)) {
229 	    if (!T.incr_var) T.incr_var = NewMap(IMS_incvar);
230 	    SetMapping(T.incr_var, cp);
231 	}
232 	else if (!strncmp("Substitute:",   fn, 11)) {
233 	    if (!T.incr_var) T.substitute = NewMap(IMS_incvar);
234 	    SetMapping(T.substitute, cp);
235 	}
236 	else if (!strncmp("NthChild:",    fn, 9))  T.nth_child	= atoi(cp);
237 	else if (!strncmp("Var:", fn, 4)) SetMapping(Variables, cp);
238 	else if (!strncmp("Quit:",        fn, 5))  T.quit	= strdup(cp);
239 	else if (!strncmp("Trim:",        fn, 5))  T.trim	= strdup(cp);
240 	else
241 	    fprintf(stderr, "Unknown translation spec (skipping it): %s\n", fn);
242     }
243     fclose(fp);
244 }
245 
246 /* ______________________________________________________________________ */
247 /*  Store translation spec 't' in memory.
248  *  Arguments:
249  *	Pointer to translation spec to remember.
250  *	Line number where translation spec ends.
251  */
252 void
RememberTransSpec(Trans_t * t,int lineno)253 RememberTransSpec(
254     Trans_t	*t,
255     int		lineno
256 )
257 {
258     char	*cp;
259     int		i, do_regex;
260     static Trans_t *last_t;
261     char buf[1000];
262 
263     /* If context testing, check some details and set things up for later. */
264     if (t->context) {
265 	/* See if the context specified is a regular expression.
266 	 * If so, compile the reg expr.  It is assumed to be a regex if
267 	 * it contains a character other than what's allowed for GIs in the
268 	 * OSF sgml declaration (alphas, nums, '-', and '.').
269 	 */
270 	for (do_regex=0,cp=t->context; *cp; cp++) {
271 	    if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
272 		do_regex = 1;
273 		break;
274 	    }
275 	}
276 
277 	if (do_regex) {
278 	    t->depth = MAX_DEPTH;
279 	    if (!(t->context_re=tpt_regcomp(t->context))) {
280 		fprintf(stderr, "Regex error in Context: %s\n", t->context);
281 	    }
282 	}
283 	else {
284 	    /* If there's only one item in context, it's the parent.  Treat
285 	     * it specially, since it's faster to just check parent gi.
286 	     */
287 	    cp = t->context;
288 	    if (!strchr(cp, ' ')) {
289 		t->parent  = t->context;
290 		t->context = NULL;
291 	    }
292 	    else {
293 		/* Figure out depth of context string */
294 		t->depth = 0;
295 		while (*cp) {
296 		    if (*cp) t->depth++;
297 		    while (*cp && !IsWhite(*cp)) cp++;	/* find end of gi */
298 		    while (*cp && IsWhite(*cp)) cp++;	/* skip space */
299 		}
300 	    }
301 	}
302     }
303 
304     /* Compile regular expressions for each attribute */
305     for (i=0; i<t->nattpairs; i++) {
306 	/* Initially, name points to "name value".  Split them... */
307 	cp = t->attpair[i].name;
308 	while (*cp && !IsWhite(*cp)) cp++;	/* point past end of name */
309 	if (*cp) {	/* value found */
310 	    *cp++ = EOS;			/* terminate name */
311 	    while (*cp && IsWhite(*cp)) cp++;	/* point to value */
312 	    ExpandVariables(cp, buf, 0);	/* expand any variables */
313 	    t->attpair[i].val = strdup(buf);
314 	}
315 	else {		/* value not found */
316 	    t->attpair[i].val = ".";
317 	}
318 	if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) {
319 	    fprintf(stderr, "Regex error in AttValue: %s %s\n",
320 		    t->attpair[i].name, t->attpair[i].val);
321 	}
322     }
323 
324     /* Compile regular expression for content */
325     t->content_re = 0;
326     if (t->content) {
327 	ExpandVariables(t->content, buf, 0);
328 	if (!(t->content_re=tpt_regcomp(buf)))
329 	    fprintf(stderr, "Regex error in Content: %s\n",
330 		    t->content);
331     }
332 
333     /* If multiple GIs, break up into a vector, then remember it.  We either
334      * sture the individual, or the list - not both. */
335     if (t->gi && strchr(t->gi, ' ')) {
336 	t->gilist = Split(t->gi, 0, S_ALVEC);
337 	t->gi = NULL;
338     }
339 
340     /* Now, store structure in linked list. */
341     if (!TrSpecs) {
342 	Malloc(1, TrSpecs, Trans_t);
343 	last_t = TrSpecs;
344     }
345     else {
346 	Malloc(1, last_t->next, Trans_t);
347 	last_t = last_t->next;
348     }
349     *last_t = *t;
350 }
351 
352 
353 /* ______________________________________________________________________ */
354 /*  Read mapping file, filling in structure slots (just name-value pairs).
355  *  Arguments:
356  *	Name of character mapping file.
357  */
358 
359 void
ReadCharMap(char * filename)360 ReadCharMap(
361     char *filename
362 )
363 {
364     FILE	*fp;
365     char	buf[LINESIZE], *name, *val;
366     int		lineno=0;
367     int		n_alloc=0;	/* number of slots allocated so far */
368 
369     if ((fp=OpenFile(filename)) == NULL) {
370 	fprintf(stderr, "Can not open character mapping file '%s'.\n%s\n",
371 		filename, strerror(errno));
372 	return;
373     }
374 
375     /* We allocate slots in blocks of N, so we don't have to call
376      * malloc so many times. */
377     n_alloc  = 32;
378     Calloc(n_alloc, CharMap, Mapping_t);
379 
380     nCharMap = 0;
381     while (fgets(buf, LINESIZE, fp))
382     {
383 	lineno++;
384 	/* skip comment and blank lines */
385 	if (buf[0] == '#' || buf[0] == NL) continue;
386 	stripNL(buf);
387 
388 	/* Need more slots for mapping structures? */
389 	if (nCharMap >= n_alloc) {
390 	    n_alloc += 32;
391 	    Realloc(n_alloc, CharMap, Mapping_t);
392 	}
393 	name = val = buf;
394 	while (*val && !IsWhite(*val)) val++;	/* point past end of name */
395 	if (*val) {
396 	    *val++ = EOS;				/* terminate name */
397 	    while (*val && IsWhite(*val)) val++;	/* point to value */
398 	}
399 	if (name) {
400 	    CharMap[nCharMap].name = strdup(name);
401 	    if (val) CharMap[nCharMap].sval = strdup(val);
402 	    if (CharMap[nCharMap].name[0] == '\\') CharMap[nCharMap].name++;
403 	    nCharMap++;
404 	}
405     }
406     fclose(fp);
407 }
408 
409 /* ______________________________________________________________________ */
410 /* Read SDATA mapping file, remembering the mappings in memory.
411  * Input file format is 2 columns, name and value, separated by one or
412  * more tabs (not spaces).
413  * This can be called multuple times, reading several files.
414  *  Arguments:
415  *	Name of SDATA entity mapping file.
416  */
417 
418 void
ReadSDATA(char * filename)419 ReadSDATA(
420     char *filename
421 )
422 {
423     FILE	*fp;
424     char	buf[LINESIZE], *name, *val;
425     int		lineno=0;
426 
427     if ((fp=OpenFile(filename)) == NULL) {
428 	fprintf(stderr, "Can not open SDATA file '%s': %s", filename,
429 		strerror(errno));
430 	return;
431     }
432 
433     if (!SDATAmap) SDATAmap = NewMap(IMS_sdata);
434 
435     while (fgets(buf, LINESIZE, fp))
436     {
437 	lineno++;
438 	/* skip comment and blank lines */
439 	if (buf[0] == '#' || buf[0] == NL) continue;
440 	stripNL(buf);
441 
442 	name = val = buf;
443 	while (*val && *val != TAB) val++;	/* point past end of name */
444 	if (*val) {
445 	    *val++ = EOS;			/* terminate name */
446 	    while (*val && *val == TAB) val++;	/* point to value */
447 	}
448 
449 	SetMappingNV(SDATAmap, name, val);
450     }
451     fclose(fp);
452 }
453 
454 /* ______________________________________________________________________ */
455