1 /* $NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $ */
2
3 /*-
4 * Copyright (c) 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Barry Brachman.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)word.c 8.1 (Berkeley) 6/11/93";
39 #else
40 __RCSID("$NetBSD: word.c,v 1.10 2021/05/02 12:50:43 rillig Exp $");
41 #endif
42 #endif /* not lint */
43
44 #include <sys/types.h>
45 #include <sys/stat.h>
46
47 #include <err.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <string.h>
51
52 #include "bog.h"
53 #include "extern.h"
54
55 static char *dictspace, *dictend;
56 static char *sp;
57
58 static int first = 1, lastch = 0;
59
60 extern struct dictindex dictindex[];
61 extern int wordlen;
62
63 /*
64 * Return the next word in the compressed dictionary in 'buffer' or
65 * NULL on end-of-file
66 */
67 char *
nextword(FILE * fp)68 nextword(FILE *fp)
69 {
70 int ch, pcount;
71 char *p;
72 static char buf[MAXWORDLEN + 1];
73
74 if (fp == NULL) {
75 if (sp == dictend)
76 return (NULL);
77
78 p = buf + (int) *sp++;
79
80 /*
81 * The dictionary ends with a null byte
82 */
83 while (*sp >= 'a')
84 if ((*p++ = *sp++) == 'q')
85 *p++ = 'u';
86 } else {
87 if (first) {
88 if ((pcount = getc(fp)) == EOF)
89 return (NULL);
90 first = 0;
91 } else if ((pcount = lastch) == EOF)
92 return (NULL);
93
94 p = buf + pcount;
95
96 while ((ch = getc(fp)) != EOF && ch >= 'a')
97 if ((*p++ = ch) == 'q')
98 *p++ = 'u';
99 lastch = ch;
100 }
101 wordlen = (int) (p - buf);
102 *p = '\0';
103 return (buf);
104 }
105
106 /*
107 * Reset the state of nextword() and do the fseek()
108 */
109 long
dictseek(FILE * fp,long offset,int ptrname)110 dictseek(FILE *fp, long offset, int ptrname)
111 {
112 if (fp == NULL) {
113 if ((sp = dictspace + offset) >= dictend)
114 return (-1);
115 return (0);
116 }
117
118 first = 1;
119 return (fseek(fp, offset, ptrname));
120 }
121
122 FILE *
opendict(const char * dict)123 opendict(const char *dict)
124 {
125 FILE *fp;
126
127 if ((fp = fopen(dict, "r")) == NULL)
128 return (NULL);
129 return (fp);
130 }
131
132 /*
133 * Load the given dictionary and initialize the pointers
134 */
135 int
loaddict(FILE * fp)136 loaddict(FILE *fp)
137 {
138 struct stat statb;
139 long n;
140 int st;
141 char *p;
142
143 if (fstat(fileno(fp), &statb) < 0) {
144 (void)fclose(fp);
145 return (-1);
146 }
147
148 /*
149 * An extra character (a sentinel) is allocated and set to null
150 * to improve the expansion loop in nextword().
151 */
152 if ((dictspace = malloc(statb.st_size + 1)) == NULL) {
153 (void)fclose(fp);
154 return (-1);
155 }
156 n = (long)statb.st_size;
157 sp = dictspace;
158 dictend = dictspace + n;
159
160 p = dictspace;
161 st = -1;
162 while (n > 0 && (st = fread(p, 1, BUFSIZ, fp)) > 0) {
163 p += st;
164 n -= st;
165 }
166 if (st < 0) {
167 (void)fclose(fp);
168 warnx("Error reading dictionary");
169 return (-1);
170 }
171 *p = '\0';
172 return (0);
173 }
174
175 /*
176 * Dependent on the exact format of the index file:
177 * Starting offset field begins in column 1 and length field in column 9
178 * Taking the easy way out, the input buffer is made "large" and a check
179 * is made for lines that are too long
180 */
181 int
loadindex(const char * indexfile)182 loadindex(const char *indexfile)
183 {
184 int i, j;
185 char buf[BUFSIZ];
186 FILE *fp;
187
188 if ((fp = fopen(indexfile, "r")) == NULL) {
189 warn("Can't open '%s'", indexfile);
190 return (-1);
191 }
192 i = 0;
193 while (fgets(buf, sizeof(buf), fp) != NULL) {
194 if (strchr(buf, '\n') == NULL) {
195 warnx("A line in the index file is too long");
196 (void) fclose(fp);
197 return(-1);
198 }
199 j = *buf - 'a';
200 if (i != j) {
201 warnx("Bad index order");
202 (void) fclose(fp);
203 return(-1);
204 }
205 dictindex[j].start = atol(buf + 1);
206 dictindex[j].length = atol(buf + 9) - dictindex[j].start;
207 i++;
208 }
209 (void) fclose(fp);
210 if (i != 26) {
211 warnx("Bad index length");
212 return(-1);
213 }
214 return(0);
215 }
216