1 /*
2 * Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
3 * All rights reserved.
4 */
5 /*
6 #pragma ident "%Z%%M% %I% %E% SMI"
7 * Copyright (c) 1994
8 * Open Software Foundation, Inc.
9 *
10 * Permission is hereby granted to use, copy, modify and freely distribute
11 * the software in this file and its documentation for any purpose without
12 * fee, provided that the above copyright notice appears in all copies and
13 * that both the copyright notice and this permission notice appear in
14 * supporting documentation. Further, provided that the name of Open
15 * Software Foundation, Inc. ("OSF") not be used in advertising or
16 * publicity pertaining to distribution of the software without prior
17 * written permission from OSF. OSF makes no representations about the
18 * suitability of this software for any purpose. It is provided "as is"
19 * without express or implied warranty.
20 */
21 /*
22 * Copyright (c) 1996 X Consortium
23 * Copyright (c) 1995, 1996 Dalrymple Consulting
24 *
25 * Permission is hereby granted, free of charge, to any person obtaining a copy
26 * of this software and associated documentation files (the "Software"), to deal
27 * in the Software without restriction, including without limitation the rights
28 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
29 * copies of the Software, and to permit persons to whom the Software is
30 * furnished to do so, subject to the following conditions:
31 *
32 * The above copyright notice and this permission notice shall be included in
33 * all copies or substantial portions of the Software.
34 *
35 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
38 * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
39 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
40 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
41 * OTHER DEALINGS IN THE SOFTWARE.
42 *
43 * Except as contained in this notice, the names of the X Consortium and
44 * Dalrymple Consulting shall not be used in advertising or otherwise to
45 * promote the sale, use or other dealings in this Software without prior
46 * written authorization.
47 */
48 /* ________________________________________________________________________
49 *
50 * Program to manipulate SGML instances.
51 *
52 * This module contains the initialization routines for translation module.
53 * They mostly deal with reading data files (translation specs, SDATA
54 * mappings, character mappings).
55 *
56 * Entry points:
57 * ReadTransSpec(transfile) read/store translation spec from file
58 * ReadSDATA(sdatafile) read/store SDATA mappings from file
59 * ReadMapping(mapfile) read/store char mappings from file
60 * ________________________________________________________________________
61 */
62
63 #ifndef lint
64 static char *RCSid =
65 "$Header: /usr/src/docbook-to-man/Instant/RCS/traninit.c,v 1.6 1998/06/28 19:15:41 fld Exp fld $";
66 #endif
67
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <ctype.h>
71 #include <string.h>
72 #include <memory.h>
73 #include <sys/types.h>
74 #include <errno.h>
75
76 #include <tptregexp.h>
77 #include "general.h"
78 #include "translate.h"
79
80 #ifndef TRUE
81 #define TRUE (1 == 1)
82 #endif
83
84 /* forward references */
85 void RememberTransSpec(Trans_t *, int);
86
87 /* ______________________________________________________________________ */
88 /* Read the translation specs from the input file, storing in memory.
89 * Arguments:
90 * Name of translation spec file.
91 */
92
93 void
ReadTransSpec(char * transfile)94 ReadTransSpec(
95 char *transfile
96 )
97 {
98 FILE *fp;
99 char buf[LINESIZE], *cp, *fn, *cp2;
100 int lineno=0, c, i;
101 Trans_t T;
102
103 if ((fp=OpenFile(transfile)) == NULL) {
104 fprintf(stderr, "Can not open translation spec file '%s'.\n%s\n",
105 transfile, strerror(errno));
106 return;
107 }
108
109 memset(&T, 0, sizeof T); /* initialize/clear structure */
110 while (fgets(buf, LINESIZE, fp)) /* read line from .ts file */
111 {
112 lineno++;
113 /* skip comment and blank lines */
114 if (buf[0] == '#' || buf[0] == NL) continue;
115
116 /* '-' indicates end of a spec. When we hit one, remember what we've
117 * accumulated so far, and null-out the accumulating structure. */
118 if (buf[0] == '-') {
119 T.lineno = lineno;
120 RememberTransSpec(&T, lineno);
121 memset(&T, 0, sizeof T);
122 continue;
123 }
124
125 stripNL(buf);
126
127 /* See if next line is continued from this one -- ie. it starts with
128 * whitespace. If so, append to current line. (This is similar to
129 * how e-mail headers work...) */
130 while (1) {
131 c = getc(fp); /* 1st char of next line */
132 if (IsWhite(c)) { /* space or tab? */
133 /* keep getting characters until it's a non-whitespace */
134 c = getc(fp);
135 while (IsWhite(c)) c = getc(fp);
136 ungetc(c, fp); /* put back non-whitespace */
137 i = strlen(buf);
138 buf[i++] = ' ';
139 fn = buf + i; /* point to end of string in buffer */
140 fgets(fn, LINESIZE-i, fp); /* read and append to buf */
141 lineno++;
142 stripNL(buf);
143 }
144 else {
145 ungetc(c, fp); /* put back non-whitespace */
146 break;
147 }
148 }
149 /* Isolate field value */
150 if ((cp=strchr(buf, ':'))) {
151 cp++; /* point past colon */
152 while (*cp && IsWhite(*cp)) cp++; /* point to content */
153 }
154 else {
155 fprintf(stderr,
156 "Trans spec error, missing colon (skipping line):\n %s\n", fn);
157 continue;
158 }
159 fn = buf; /* fn is name of the field, cp the value. */
160
161 /* Check field names in order that they're likely to occur. */
162 if (!strncmp("GI:", fn, 3)) {
163 /* if we are folding the case of GIs, make all upper (unless
164 it's an internal pseudo-GI name, which starts with '_') */
165 if (fold_case && cp[0] != '_' && cp[0] != '#') {
166 for (cp2=cp; *cp2; cp2++)
167 if (islower(*cp2)) *cp2 = toupper(*cp2);
168 }
169 T.gi = AddElemName(cp);
170 }
171 else if (!strncmp("StartText:", fn, 10)) T.starttext = strdup(cp);
172 else if (!strncmp("EndText:", fn, 8)) T.endtext = strdup(cp);
173 else if (!strncmp("Relation:", fn, 9)) {
174 if (!T.relations) T.relations = NewMap(IMS_relations);
175 SetMapping(T.relations, cp);
176 }
177 else if (!strncmp("Replace:", fn, 8)) T.replace = strdup(cp);
178 else if (!strncmp("AttValue:", fn, 9)) {
179 if (!T.nattpairs) {
180 Malloc(1, T.attpair, AttPair_t);
181 }
182 else
183 Realloc((T.nattpairs+1), T.attpair, AttPair_t);
184 /* we'll split name/value pairs later */
185 T.attpair[T.nattpairs].name = strdup(cp);
186 T.nattpairs++;
187 }
188 /* If there's only one item in context, it's the parent. Treat
189 * it specially, since it's easier to just check parent gi.
190 */
191 else if (!strncmp("Context:", fn, 8)) T.context = strdup(cp);
192 else if (!strncmp("Message:", fn, 8)) T.message = strdup(cp);
193 else if (!strncmp("SpecID:", fn, 7)) T.my_id = atoi(cp);
194 else if (!strncmp("Action:", fn, 7)) T.use_id = atoi(cp);
195 else if (!strncmp("Content:", fn, 8)) T.content = strdup(cp);
196 else if (!strncmp("PAttSet:", fn, 8)) T.pattrset = strdup(cp);
197 else if (!strncmp("Verbatim:", fn, 9)) T.verbatim = TRUE;
198 else if (!strncmp("Ignore:", fn, 7)) {
199 if (!strcmp(cp, "all")) T.ignore = IGN_ALL;
200 else if (!strcmp(cp, "data")) T.ignore = IGN_DATA;
201 else if (!strcmp(cp, "children")) T.ignore = IGN_CHILDREN;
202 else
203 fprintf(stderr, "Bad 'Ignore:' arg in transpec (line %d): %s\n",
204 lineno, cp);
205 }
206 else if (!strncmp("VarValue:", fn, 9)) {
207 char **tok;
208 i = 2;
209 tok = Split(cp, &i, S_STRDUP);
210 T.var_name = tok[0];
211 T.var_value = tok[1];
212 }
213 else if (!strncmp("VarREValue:", fn, 11)) {
214 char **tok;
215 i = 2;
216 tok = Split(cp, &i, S_STRDUP);
217 T.var_RE_name = tok[0];
218 ExpandVariables(tok[1], buf, 0);
219 if (!(T.var_RE_value=tpt_regcomp(buf))) {
220 fprintf(stderr, "Regex error in VarREValue Content: %s\n",
221 tok[1]);
222 }
223 }
224 else if (!strncmp("Set:", fn, 4)) {
225 if (!T.set_var) T.set_var = NewMap(IMS_setvar);
226 SetMapping(T.set_var, cp);
227 }
228 else if (!strncmp("Increment:", fn, 10)) {
229 if (!T.incr_var) T.incr_var = NewMap(IMS_incvar);
230 SetMapping(T.incr_var, cp);
231 }
232 else if (!strncmp("Substitute:", fn, 11)) {
233 if (!T.incr_var) T.substitute = NewMap(IMS_incvar);
234 SetMapping(T.substitute, cp);
235 }
236 else if (!strncmp("NthChild:", fn, 9)) T.nth_child = atoi(cp);
237 else if (!strncmp("Var:", fn, 4)) SetMapping(Variables, cp);
238 else if (!strncmp("Quit:", fn, 5)) T.quit = strdup(cp);
239 else if (!strncmp("Trim:", fn, 5)) T.trim = strdup(cp);
240 else
241 fprintf(stderr, "Unknown translation spec (skipping it): %s\n", fn);
242 }
243 fclose(fp);
244 }
245
246 /* ______________________________________________________________________ */
247 /* Store translation spec 't' in memory.
248 * Arguments:
249 * Pointer to translation spec to remember.
250 * Line number where translation spec ends.
251 */
252 void
RememberTransSpec(Trans_t * t,int lineno)253 RememberTransSpec(
254 Trans_t *t,
255 int lineno
256 )
257 {
258 char *cp;
259 int i, do_regex;
260 static Trans_t *last_t;
261 char buf[1000];
262
263 /* If context testing, check some details and set things up for later. */
264 if (t->context) {
265 /* See if the context specified is a regular expression.
266 * If so, compile the reg expr. It is assumed to be a regex if
267 * it contains a character other than what's allowed for GIs in the
268 * OSF sgml declaration (alphas, nums, '-', and '.').
269 */
270 for (do_regex=0,cp=t->context; *cp; cp++) {
271 if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
272 do_regex = 1;
273 break;
274 }
275 }
276
277 if (do_regex) {
278 t->depth = MAX_DEPTH;
279 if (!(t->context_re=tpt_regcomp(t->context))) {
280 fprintf(stderr, "Regex error in Context: %s\n", t->context);
281 }
282 }
283 else {
284 /* If there's only one item in context, it's the parent. Treat
285 * it specially, since it's faster to just check parent gi.
286 */
287 cp = t->context;
288 if (!strchr(cp, ' ')) {
289 t->parent = t->context;
290 t->context = NULL;
291 }
292 else {
293 /* Figure out depth of context string */
294 t->depth = 0;
295 while (*cp) {
296 if (*cp) t->depth++;
297 while (*cp && !IsWhite(*cp)) cp++; /* find end of gi */
298 while (*cp && IsWhite(*cp)) cp++; /* skip space */
299 }
300 }
301 }
302 }
303
304 /* Compile regular expressions for each attribute */
305 for (i=0; i<t->nattpairs; i++) {
306 /* Initially, name points to "name value". Split them... */
307 cp = t->attpair[i].name;
308 while (*cp && !IsWhite(*cp)) cp++; /* point past end of name */
309 if (*cp) { /* value found */
310 *cp++ = EOS; /* terminate name */
311 while (*cp && IsWhite(*cp)) cp++; /* point to value */
312 ExpandVariables(cp, buf, 0); /* expand any variables */
313 t->attpair[i].val = strdup(buf);
314 }
315 else { /* value not found */
316 t->attpair[i].val = ".";
317 }
318 if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) {
319 fprintf(stderr, "Regex error in AttValue: %s %s\n",
320 t->attpair[i].name, t->attpair[i].val);
321 }
322 }
323
324 /* Compile regular expression for content */
325 t->content_re = 0;
326 if (t->content) {
327 ExpandVariables(t->content, buf, 0);
328 if (!(t->content_re=tpt_regcomp(buf)))
329 fprintf(stderr, "Regex error in Content: %s\n",
330 t->content);
331 }
332
333 /* If multiple GIs, break up into a vector, then remember it. We either
334 * sture the individual, or the list - not both. */
335 if (t->gi && strchr(t->gi, ' ')) {
336 t->gilist = Split(t->gi, 0, S_ALVEC);
337 t->gi = NULL;
338 }
339
340 /* Now, store structure in linked list. */
341 if (!TrSpecs) {
342 Malloc(1, TrSpecs, Trans_t);
343 last_t = TrSpecs;
344 }
345 else {
346 Malloc(1, last_t->next, Trans_t);
347 last_t = last_t->next;
348 }
349 *last_t = *t;
350 }
351
352
353 /* ______________________________________________________________________ */
354 /* Read mapping file, filling in structure slots (just name-value pairs).
355 * Arguments:
356 * Name of character mapping file.
357 */
358
359 void
ReadCharMap(char * filename)360 ReadCharMap(
361 char *filename
362 )
363 {
364 FILE *fp;
365 char buf[LINESIZE], *name, *val;
366 int lineno=0;
367 int n_alloc=0; /* number of slots allocated so far */
368
369 if ((fp=OpenFile(filename)) == NULL) {
370 fprintf(stderr, "Can not open character mapping file '%s'.\n%s\n",
371 filename, strerror(errno));
372 return;
373 }
374
375 /* We allocate slots in blocks of N, so we don't have to call
376 * malloc so many times. */
377 n_alloc = 32;
378 Calloc(n_alloc, CharMap, Mapping_t);
379
380 nCharMap = 0;
381 while (fgets(buf, LINESIZE, fp))
382 {
383 lineno++;
384 /* skip comment and blank lines */
385 if (buf[0] == '#' || buf[0] == NL) continue;
386 stripNL(buf);
387
388 /* Need more slots for mapping structures? */
389 if (nCharMap >= n_alloc) {
390 n_alloc += 32;
391 Realloc(n_alloc, CharMap, Mapping_t);
392 }
393 name = val = buf;
394 while (*val && !IsWhite(*val)) val++; /* point past end of name */
395 if (*val) {
396 *val++ = EOS; /* terminate name */
397 while (*val && IsWhite(*val)) val++; /* point to value */
398 }
399 if (name) {
400 CharMap[nCharMap].name = strdup(name);
401 if (val) CharMap[nCharMap].sval = strdup(val);
402 if (CharMap[nCharMap].name[0] == '\\') CharMap[nCharMap].name++;
403 nCharMap++;
404 }
405 }
406 fclose(fp);
407 }
408
409 /* ______________________________________________________________________ */
410 /* Read SDATA mapping file, remembering the mappings in memory.
411 * Input file format is 2 columns, name and value, separated by one or
412 * more tabs (not spaces).
413 * This can be called multuple times, reading several files.
414 * Arguments:
415 * Name of SDATA entity mapping file.
416 */
417
418 void
ReadSDATA(char * filename)419 ReadSDATA(
420 char *filename
421 )
422 {
423 FILE *fp;
424 char buf[LINESIZE], *name, *val;
425 int lineno=0;
426
427 if ((fp=OpenFile(filename)) == NULL) {
428 fprintf(stderr, "Can not open SDATA file '%s': %s", filename,
429 strerror(errno));
430 return;
431 }
432
433 if (!SDATAmap) SDATAmap = NewMap(IMS_sdata);
434
435 while (fgets(buf, LINESIZE, fp))
436 {
437 lineno++;
438 /* skip comment and blank lines */
439 if (buf[0] == '#' || buf[0] == NL) continue;
440 stripNL(buf);
441
442 name = val = buf;
443 while (*val && *val != TAB) val++; /* point past end of name */
444 if (*val) {
445 *val++ = EOS; /* terminate name */
446 while (*val && *val == TAB) val++; /* point to value */
447 }
448
449 SetMappingNV(SDATAmap, name, val);
450 }
451 fclose(fp);
452 }
453
454 /* ______________________________________________________________________ */
455