1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <String.h>
5 #include <ctype.h>
6 #include <thread.h>
7 #include "wiki.h"
8
9 static Wpage*
mkwtxt(int type,char * text)10 mkwtxt(int type, char *text)
11 {
12 Wpage *w;
13
14 w = emalloc(sizeof(*w));
15 w->type = type;
16 w->text = text;
17 setmalloctag(w, getcallerpc(&type));
18 return w;
19 }
20
21 /*
22 * turn runs of whitespace into single spaces,
23 * eliminate whitespace at beginning and end.
24 */
25 char*
strcondense(char * s,int cutbegin)26 strcondense(char *s, int cutbegin)
27 {
28 char *r, *w, *es;
29 int inspace;
30
31 es = s+strlen(s);
32 inspace = cutbegin;
33 for(r=w=s; *r; r++){
34 if(isspace(*r)){
35 if(!inspace){
36 inspace=1;
37 *w++ = ' ';
38 }
39 }else{
40 inspace=0;
41 *w++ = *r;
42 }
43 }
44 assert(w <= es);
45 if(inspace && w>s){
46 --w;
47 *w = '\0';
48 }
49 else
50 *w = '\0';
51 return s;
52 }
53
54 /*
55 * turn runs of Wplain into single Wplain.
56 */
57 static Wpage*
wcondense(Wpage * wtxt)58 wcondense(Wpage *wtxt)
59 {
60 Wpage *ow, *w;
61
62 for(w=wtxt; w; ){
63 if(w->type == Wplain)
64 strcondense(w->text, 1);
65
66 if(w->type != Wplain || w->next==nil
67 || w->next->type != Wplain){
68 w=w->next;
69 continue;
70 }
71
72 w->text = erealloc(w->text, strlen(w->text)+1+strlen(w->next->text)+1);
73 strcat(w->text, " ");
74 strcat(w->text, w->next->text);
75
76 ow = w->next;
77 w->next = ow->next;
78 ow->next = nil;
79 freepage(ow);
80 }
81 return wtxt;
82 }
83
84 /*
85 * Parse a link, without the brackets.
86 */
87 static Wpage*
mklink(char * s)88 mklink(char *s)
89 {
90 char *q;
91 Wpage *w;
92
93 for(q=s; *q && *q != '|'; q++)
94 ;
95
96 if(*q == '\0'){
97 w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
98 w->url = nil;
99 }else{
100 *q = '\0';
101 w = mkwtxt(Wlink, estrdup(strcondense(s, 1)));
102 w->url = estrdup(strcondense(q+1, 1));
103 }
104 setmalloctag(w, getcallerpc(&s));
105 return w;
106 }
107
108 /*
109 * Parse Wplains, inserting Wlink nodes where appropriate.
110 */
111 static Wpage*
wlink(Wpage * wtxt)112 wlink(Wpage *wtxt)
113 {
114 char *p, *q, *r, *s;
115 Wpage *w, *nw;
116
117 for(w=wtxt; w; w=nw){
118 nw = w->next;
119 if(w->type != Wplain)
120 continue;
121 while(w->text[0]){
122 p = w->text;
123 for(q=p; *q && *q != '['; q++)
124 ;
125 if(*q == '\0')
126 break;
127 for(r=q; *r && *r != ']'; r++)
128 ;
129 if(*r == '\0')
130 break;
131 *q = '\0';
132 *r = '\0';
133 s = w->text;
134 w->text = estrdup(w->text);
135 w->next = mklink(q+1);
136 w = w->next;
137 w->next = mkwtxt(Wplain, estrdup(r+1));
138 free(s);
139 w = w->next;
140 w->next = nw;
141 }
142 assert(w->next == nw);
143 }
144 return wtxt;
145 }
146
147 static int
ismanchar(int c)148 ismanchar(int c)
149 {
150 return ('a' <= c && c <= 'z')
151 || ('A' <= c && c <= 'Z')
152 || ('0' <= c && c <= '9')
153 || c=='_' || c=='-' || c=='.' || c=='/'
154 || (c < 0); /* UTF */
155 }
156
157 static Wpage*
findmanref(char * p,char ** beginp,char ** endp)158 findmanref(char *p, char **beginp, char **endp)
159 {
160 char *q, *r;
161 Wpage *w;
162
163 q=p;
164 for(;;){
165 for(; q[0] && (q[0] != '(' || !isdigit(q[1]) || q[2] != ')'); q++)
166 ;
167 if(*q == '\0')
168 break;
169 for(r=q; r>p && ismanchar(r[-1]); r--)
170 ;
171 if(r==q){
172 q += 3;
173 continue;
174 }
175 *q = '\0';
176 w = mkwtxt(Wman, estrdup(r));
177 *beginp = r;
178 *q = '(';
179 w->section = q[1]-'0';
180 *endp = q+3;
181 setmalloctag(w, getcallerpc(&p));
182 return w;
183 }
184 return nil;
185 }
186
187 /*
188 * Parse Wplains, looking for man page references.
189 * This should be done by using a plumb(6)-style
190 * control file rather than hard-coding things here.
191 */
192 static Wpage*
wman(Wpage * wtxt)193 wman(Wpage *wtxt)
194 {
195 char *q, *r;
196 Wpage *w, *mw, *nw;
197
198 for(w=wtxt; w; w=nw){
199 nw = w->next;
200 if(w->type != Wplain)
201 continue;
202 while(w->text[0]){
203 if((mw = findmanref(w->text, &q, &r)) == nil)
204 break;
205 *q = '\0';
206 w->next = mw;
207 w = w->next;
208 w->next = mkwtxt(Wplain, estrdup(r));
209 w = w->next;
210 w->next = nw;
211 }
212 assert(w->next == nw);
213 }
214 return wtxt;
215 }
216
isheading(char * p)217 static int isheading(char *p) {
218 Rune r;
219 int hasupper=0;
220 while(*p) {
221 p+=chartorune(&r,p);
222 if(isupperrune(r))
223 hasupper=1;
224 else if(islowerrune(r))
225 return 0;
226 }
227 return hasupper;
228 }
229
230 Wpage*
Brdpage(char * (* rdline)(void *,int),void * b)231 Brdpage(char *(*rdline)(void*,int), void *b)
232 {
233 char *p, *c;
234 int waspara;
235 Wpage *w, **pw;
236
237 w = nil;
238 pw = &w;
239 waspara = 1;
240 while((p = rdline(b, '\n')) != nil){
241 if(p[0] != '!')
242 p = strcondense(p, 1);
243 if(p[0] == '\0'){
244 if(waspara==0){
245 waspara=1;
246 *pw = mkwtxt(Wpara, nil);
247 pw = &(*pw)->next;
248 }
249 continue;
250 }
251 waspara = 0;
252 switch(p[0]){
253 case '*':
254 *pw = mkwtxt(Wbullet, nil);
255 pw = &(*pw)->next;
256 *pw = mkwtxt(Wplain, estrdup(p+1));
257 pw = &(*pw)->next;
258 break;
259 case '!':
260 *pw = mkwtxt(Wpre, estrdup(p[1]==' '?p+2:p+1));
261 pw = &(*pw)->next;
262 break;
263 case '-':
264 for(c = p; *c != '\0'; c++) {
265 if(*c != '-') {
266 c = p;
267 break;
268 }
269 }
270
271 if( (c-p) > 4) {
272 *pw = mkwtxt(Whr, nil);
273 pw = &(*pw)->next;
274 break;
275 }
276 /* else fall thru */
277 default:
278 if(isheading(p)){
279 *pw = mkwtxt(Wheading, estrdup(p));
280 pw = &(*pw)->next;
281 continue;
282 }
283 *pw = mkwtxt(Wplain, estrdup(p));
284 pw = &(*pw)->next;
285 break;
286 }
287 }
288 if(w == nil)
289 werrstr("empty page");
290
291 *pw = nil;
292 w = wcondense(w);
293 w = wlink(w);
294 w = wman(w);
295 setmalloctag(w, getcallerpc(&rdline));
296
297 return w;
298 }
299
300 void
printpage(Wpage * w)301 printpage(Wpage *w)
302 {
303 for(; w; w=w->next){
304 switch(w->type){
305 case Wpara:
306 print("para\n");
307 break;
308 case Wheading:
309 print("heading '%s'\n", w->text);
310 break;
311 case Wbullet:
312 print("bullet\n");
313 break;
314 case Wlink:
315 print("link '%s' '%s'\n", w->text, w->url);
316 break;
317 case Wman:
318 print("man %d %s\n", w->section, w->text);
319 break;
320 case Wplain:
321 print("plain '%s'\n", w->text);
322 break;
323 case Whr:
324 print("hr\n");
325 break;
326 case Wpre:
327 print("pre '%s'\n", w->text);
328 break;
329 }
330 }
331 }
332