13e12c5d1SDavid du Colombier #include "tdef.h"
23e12c5d1SDavid du Colombier #include "fns.h"
33e12c5d1SDavid du Colombier #include "ext.h"
487dfdc75SDavid du Colombier #include <assert.h>
53e12c5d1SDavid du Colombier
63e12c5d1SDavid du Colombier #define HY_BIT 0200 /* stuff in here only works for 7-bit ascii */
73e12c5d1SDavid du Colombier /* this value is used (as a literal) in suftab.c */
83e12c5d1SDavid du Colombier /* to encode possible hyphenation points in suffixes. */
93e12c5d1SDavid du Colombier /* it could be changed, by widening the tables */
103e12c5d1SDavid du Colombier /* to be shorts instead of chars. */
113e12c5d1SDavid du Colombier
123e12c5d1SDavid du Colombier /*
133e12c5d1SDavid du Colombier * troff8.c
143e12c5d1SDavid du Colombier *
153e12c5d1SDavid du Colombier * hyphenation
163e12c5d1SDavid du Colombier */
173e12c5d1SDavid du Colombier
18219b2ee8SDavid du Colombier int hexsize = 0; /* hyphenation exception list size */
19219b2ee8SDavid du Colombier char *hbufp = NULL; /* base of list */
20219b2ee8SDavid du Colombier char *nexth = NULL; /* first free slot in list */
213e12c5d1SDavid du Colombier Tchar *hyend;
223e12c5d1SDavid du Colombier
233e12c5d1SDavid du Colombier #define THRESH 160 /* digram goodness threshold */
243e12c5d1SDavid du Colombier int thresh = THRESH;
253e12c5d1SDavid du Colombier
263e12c5d1SDavid du Colombier int texhyphen(void);
273e12c5d1SDavid du Colombier static int alpha(Tchar);
283e12c5d1SDavid du Colombier
hyphen(Tchar * wp)293e12c5d1SDavid du Colombier void hyphen(Tchar *wp)
303e12c5d1SDavid du Colombier {
313e12c5d1SDavid du Colombier int j;
323e12c5d1SDavid du Colombier Tchar *i;
333e12c5d1SDavid du Colombier
343e12c5d1SDavid du Colombier i = wp;
353e12c5d1SDavid du Colombier while (punct((*i++)))
363e12c5d1SDavid du Colombier ;
373e12c5d1SDavid du Colombier if (!alpha(*--i))
383e12c5d1SDavid du Colombier return;
393e12c5d1SDavid du Colombier wdstart = i++;
403e12c5d1SDavid du Colombier while (alpha(*i++))
413e12c5d1SDavid du Colombier ;
423e12c5d1SDavid du Colombier hyend = wdend = --i - 1;
433e12c5d1SDavid du Colombier while (punct((*i++)))
443e12c5d1SDavid du Colombier ;
453e12c5d1SDavid du Colombier if (*--i)
463e12c5d1SDavid du Colombier return;
473e12c5d1SDavid du Colombier if (wdend - wdstart < 4) /* 4 chars is too short to hyphenate */
483e12c5d1SDavid du Colombier return;
493e12c5d1SDavid du Colombier hyp = hyptr;
503e12c5d1SDavid du Colombier *hyp = 0;
513e12c5d1SDavid du Colombier hyoff = 2;
523e12c5d1SDavid du Colombier
533e12c5d1SDavid du Colombier /* for now, try exceptions first, then tex (if hyphalg is non-zero),
543e12c5d1SDavid du Colombier then suffix and digram if tex didn't hyphenate it at all.
553e12c5d1SDavid du Colombier */
563e12c5d1SDavid du Colombier
573e12c5d1SDavid du Colombier if (!exword() && !texhyphen() && !suffix())
583e12c5d1SDavid du Colombier digram();
593e12c5d1SDavid du Colombier
603e12c5d1SDavid du Colombier /* this appears to sort hyphenation points into increasing order */
613e12c5d1SDavid du Colombier *hyp++ = 0;
623e12c5d1SDavid du Colombier if (*hyptr)
633e12c5d1SDavid du Colombier for (j = 1; j; ) {
643e12c5d1SDavid du Colombier j = 0;
653e12c5d1SDavid du Colombier for (hyp = hyptr + 1; *hyp != 0; hyp++) {
663e12c5d1SDavid du Colombier if (*(hyp - 1) > *hyp) {
673e12c5d1SDavid du Colombier j++;
683e12c5d1SDavid du Colombier i = *hyp;
693e12c5d1SDavid du Colombier *hyp = *(hyp - 1);
703e12c5d1SDavid du Colombier *(hyp - 1) = i;
713e12c5d1SDavid du Colombier }
723e12c5d1SDavid du Colombier }
733e12c5d1SDavid du Colombier }
743e12c5d1SDavid du Colombier }
753e12c5d1SDavid du Colombier
alpha(Tchar i)763e12c5d1SDavid du Colombier static alpha(Tchar i) /* non-zero if really alphabetic */
773e12c5d1SDavid du Colombier {
783e12c5d1SDavid du Colombier if (ismot(i))
793e12c5d1SDavid du Colombier return 0;
803e12c5d1SDavid du Colombier else if (cbits(i) >= ALPHABET) /* this isn't very elegant, but there's */
813e12c5d1SDavid du Colombier return 0; /* no good way to make sure i is in range for */
823e12c5d1SDavid du Colombier else /* the call of isalpha */
833e12c5d1SDavid du Colombier return isalpha(cbits(i));
843e12c5d1SDavid du Colombier }
853e12c5d1SDavid du Colombier
863e12c5d1SDavid du Colombier
punct(Tchar i)873e12c5d1SDavid du Colombier punct(Tchar i)
883e12c5d1SDavid du Colombier {
893e12c5d1SDavid du Colombier if (!i || alpha(i))
903e12c5d1SDavid du Colombier return(0);
913e12c5d1SDavid du Colombier else
923e12c5d1SDavid du Colombier return(1);
933e12c5d1SDavid du Colombier }
943e12c5d1SDavid du Colombier
953e12c5d1SDavid du Colombier
caseha(void)963e12c5d1SDavid du Colombier void caseha(void) /* set hyphenation algorithm */
973e12c5d1SDavid du Colombier {
983e12c5d1SDavid du Colombier hyphalg = HYPHALG;
993e12c5d1SDavid du Colombier if (skip())
1003e12c5d1SDavid du Colombier return;
1013e12c5d1SDavid du Colombier noscale++;
1023e12c5d1SDavid du Colombier hyphalg = atoi0();
1033e12c5d1SDavid du Colombier noscale = 0;
1043e12c5d1SDavid du Colombier }
1053e12c5d1SDavid du Colombier
1063e12c5d1SDavid du Colombier
caseht(void)1073e12c5d1SDavid du Colombier void caseht(void) /* set hyphenation threshold; not in manual! */
1083e12c5d1SDavid du Colombier {
1093e12c5d1SDavid du Colombier thresh = THRESH;
1103e12c5d1SDavid du Colombier if (skip())
1113e12c5d1SDavid du Colombier return;
1123e12c5d1SDavid du Colombier noscale++;
1133e12c5d1SDavid du Colombier thresh = atoi0();
1143e12c5d1SDavid du Colombier noscale = 0;
1153e12c5d1SDavid du Colombier }
1163e12c5d1SDavid du Colombier
1173e12c5d1SDavid du Colombier
growh(char * where)118219b2ee8SDavid du Colombier char *growh(char *where)
119219b2ee8SDavid du Colombier {
120219b2ee8SDavid du Colombier char *new;
121219b2ee8SDavid du Colombier
122219b2ee8SDavid du Colombier hexsize += NHEX;
123219b2ee8SDavid du Colombier if ((new = grow(hbufp, hexsize, sizeof(char))) == NULL)
124219b2ee8SDavid du Colombier return NULL;
125219b2ee8SDavid du Colombier if (new == hbufp) {
126219b2ee8SDavid du Colombier return where;
127219b2ee8SDavid du Colombier } else {
128219b2ee8SDavid du Colombier int diff;
129219b2ee8SDavid du Colombier diff = where - hbufp;
130219b2ee8SDavid du Colombier hbufp = new;
131219b2ee8SDavid du Colombier return new + diff;
132219b2ee8SDavid du Colombier }
133219b2ee8SDavid du Colombier }
134219b2ee8SDavid du Colombier
135219b2ee8SDavid du Colombier
casehw(void)1363e12c5d1SDavid du Colombier void casehw(void)
1373e12c5d1SDavid du Colombier {
1383e12c5d1SDavid du Colombier int i, k;
1393e12c5d1SDavid du Colombier char *j;
1403e12c5d1SDavid du Colombier Tchar t;
1413e12c5d1SDavid du Colombier
142219b2ee8SDavid du Colombier if (nexth == NULL) {
143219b2ee8SDavid du Colombier if ((nexth = hbufp = grow(hbufp, NHEX, sizeof(char))) == NULL) {
144219b2ee8SDavid du Colombier ERROR "No space for exception word list." WARN;
145219b2ee8SDavid du Colombier return;
146219b2ee8SDavid du Colombier }
147219b2ee8SDavid du Colombier hexsize = NHEX;
148219b2ee8SDavid du Colombier }
1493e12c5d1SDavid du Colombier k = 0;
1503e12c5d1SDavid du Colombier while (!skip()) {
151219b2ee8SDavid du Colombier if ((j = nexth) >= hbufp + hexsize - 2)
152219b2ee8SDavid du Colombier if ((j = nexth = growh(j)) == NULL)
1533e12c5d1SDavid du Colombier goto full;
1543e12c5d1SDavid du Colombier for (;;) {
1553e12c5d1SDavid du Colombier if (ismot(t = getch()))
1563e12c5d1SDavid du Colombier continue;
1573e12c5d1SDavid du Colombier i = cbits(t);
1583e12c5d1SDavid du Colombier if (i == ' ' || i == '\n') {
1593e12c5d1SDavid du Colombier *j++ = 0;
1603e12c5d1SDavid du Colombier nexth = j;
1613e12c5d1SDavid du Colombier *j = 0;
1623e12c5d1SDavid du Colombier if (i == ' ')
1633e12c5d1SDavid du Colombier break;
1643e12c5d1SDavid du Colombier else
1653e12c5d1SDavid du Colombier return;
1663e12c5d1SDavid du Colombier }
1673e12c5d1SDavid du Colombier if (i == '-') {
1683e12c5d1SDavid du Colombier k = HY_BIT;
1693e12c5d1SDavid du Colombier continue;
1703e12c5d1SDavid du Colombier }
1713e12c5d1SDavid du Colombier *j++ = maplow(i) | k;
1723e12c5d1SDavid du Colombier k = 0;
173219b2ee8SDavid du Colombier if (j >= hbufp + hexsize - 2)
174219b2ee8SDavid du Colombier if ((j = growh(j)) == NULL)
1753e12c5d1SDavid du Colombier goto full;
1763e12c5d1SDavid du Colombier }
1773e12c5d1SDavid du Colombier }
1783e12c5d1SDavid du Colombier return;
1793e12c5d1SDavid du Colombier full:
180219b2ee8SDavid du Colombier ERROR "Cannot grow exception word list." WARN;
1813e12c5d1SDavid du Colombier *nexth = 0;
1823e12c5d1SDavid du Colombier }
1833e12c5d1SDavid du Colombier
1843e12c5d1SDavid du Colombier
exword(void)1853e12c5d1SDavid du Colombier int exword(void)
1863e12c5d1SDavid du Colombier {
1873e12c5d1SDavid du Colombier Tchar *w;
1883e12c5d1SDavid du Colombier char *e, *save;
1893e12c5d1SDavid du Colombier
190219b2ee8SDavid du Colombier e = hbufp;
1913e12c5d1SDavid du Colombier while (1) {
1923e12c5d1SDavid du Colombier save = e;
193219b2ee8SDavid du Colombier if (e == NULL || *e == 0)
1943e12c5d1SDavid du Colombier return(0);
1953e12c5d1SDavid du Colombier w = wdstart;
1963e12c5d1SDavid du Colombier while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
1973e12c5d1SDavid du Colombier e++;
1983e12c5d1SDavid du Colombier w++;
1993e12c5d1SDavid du Colombier }
2003e12c5d1SDavid du Colombier if (!*e) {
2013e12c5d1SDavid du Colombier if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
2023e12c5d1SDavid du Colombier w = wdstart;
2033e12c5d1SDavid du Colombier for (e = save; *e; e++) {
2043e12c5d1SDavid du Colombier if (*e & HY_BIT)
2053e12c5d1SDavid du Colombier *hyp++ = w;
2063e12c5d1SDavid du Colombier if (hyp > hyptr + NHYP - 1)
2073e12c5d1SDavid du Colombier hyp = hyptr + NHYP - 1;
2083e12c5d1SDavid du Colombier w++;
2093e12c5d1SDavid du Colombier }
2103e12c5d1SDavid du Colombier return(1);
2113e12c5d1SDavid du Colombier } else {
2123e12c5d1SDavid du Colombier e++;
2133e12c5d1SDavid du Colombier continue;
2143e12c5d1SDavid du Colombier }
2153e12c5d1SDavid du Colombier } else
2163e12c5d1SDavid du Colombier while (*e++)
2173e12c5d1SDavid du Colombier ;
2183e12c5d1SDavid du Colombier }
2193e12c5d1SDavid du Colombier }
2203e12c5d1SDavid du Colombier
2213e12c5d1SDavid du Colombier
suffix(void)2223e12c5d1SDavid du Colombier suffix(void)
2233e12c5d1SDavid du Colombier {
2243e12c5d1SDavid du Colombier Tchar *w;
2253e12c5d1SDavid du Colombier char *s, *s0;
2263e12c5d1SDavid du Colombier Tchar i;
2273e12c5d1SDavid du Colombier extern char *suftab[];
2283e12c5d1SDavid du Colombier
2293e12c5d1SDavid du Colombier again:
2303e12c5d1SDavid du Colombier i = cbits(*hyend);
2313e12c5d1SDavid du Colombier if (!alpha(i))
2323e12c5d1SDavid du Colombier return(0);
2333e12c5d1SDavid du Colombier if (i < 'a')
2343e12c5d1SDavid du Colombier i -= 'A' - 'a';
2353e12c5d1SDavid du Colombier if ((s0 = suftab[i-'a']) == 0)
2363e12c5d1SDavid du Colombier return(0);
2373e12c5d1SDavid du Colombier for (;;) {
2383e12c5d1SDavid du Colombier if ((i = *s0 & 017) == 0)
2393e12c5d1SDavid du Colombier return(0);
2403e12c5d1SDavid du Colombier s = s0 + i - 1;
2413e12c5d1SDavid du Colombier w = hyend - 1;
2423e12c5d1SDavid du Colombier while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
2433e12c5d1SDavid du Colombier s--;
2443e12c5d1SDavid du Colombier w--;
2453e12c5d1SDavid du Colombier }
2463e12c5d1SDavid du Colombier if (s == s0)
2473e12c5d1SDavid du Colombier break;
2483e12c5d1SDavid du Colombier s0 += i;
2493e12c5d1SDavid du Colombier }
2503e12c5d1SDavid du Colombier s = s0 + i - 1;
2513e12c5d1SDavid du Colombier w = hyend;
2523e12c5d1SDavid du Colombier if (*s0 & HY_BIT)
2533e12c5d1SDavid du Colombier goto mark;
2543e12c5d1SDavid du Colombier while (s > s0) {
2553e12c5d1SDavid du Colombier w--;
2563e12c5d1SDavid du Colombier if (*s-- & HY_BIT) {
2573e12c5d1SDavid du Colombier mark:
2583e12c5d1SDavid du Colombier hyend = w - 1;
2593e12c5d1SDavid du Colombier if (*s0 & 0100) /* 0100 used in suftab to encode something too */
2603e12c5d1SDavid du Colombier continue;
2613e12c5d1SDavid du Colombier if (!chkvow(w))
2623e12c5d1SDavid du Colombier return(0);
2633e12c5d1SDavid du Colombier *hyp++ = w;
2643e12c5d1SDavid du Colombier }
2653e12c5d1SDavid du Colombier }
2663e12c5d1SDavid du Colombier if (*s0 & 040)
2673e12c5d1SDavid du Colombier return(0);
2683e12c5d1SDavid du Colombier if (exword())
2693e12c5d1SDavid du Colombier return(1);
2703e12c5d1SDavid du Colombier goto again;
2713e12c5d1SDavid du Colombier }
2723e12c5d1SDavid du Colombier
2733e12c5d1SDavid du Colombier
maplow(int i)2743e12c5d1SDavid du Colombier maplow(int i)
2753e12c5d1SDavid du Colombier {
2763e12c5d1SDavid du Colombier if (isupper(i))
2773e12c5d1SDavid du Colombier i = tolower(i);
2783e12c5d1SDavid du Colombier return(i);
2793e12c5d1SDavid du Colombier }
2803e12c5d1SDavid du Colombier
2813e12c5d1SDavid du Colombier
vowel(int i)2823e12c5d1SDavid du Colombier vowel(int i)
2833e12c5d1SDavid du Colombier {
2843e12c5d1SDavid du Colombier switch (i) {
2853e12c5d1SDavid du Colombier case 'a': case 'A':
2863e12c5d1SDavid du Colombier case 'e': case 'E':
2873e12c5d1SDavid du Colombier case 'i': case 'I':
2883e12c5d1SDavid du Colombier case 'o': case 'O':
2893e12c5d1SDavid du Colombier case 'u': case 'U':
2903e12c5d1SDavid du Colombier case 'y': case 'Y':
2913e12c5d1SDavid du Colombier return(1);
2923e12c5d1SDavid du Colombier default:
2933e12c5d1SDavid du Colombier return(0);
2943e12c5d1SDavid du Colombier }
2953e12c5d1SDavid du Colombier }
2963e12c5d1SDavid du Colombier
2973e12c5d1SDavid du Colombier
chkvow(Tchar * w)2983e12c5d1SDavid du Colombier Tchar *chkvow(Tchar *w)
2993e12c5d1SDavid du Colombier {
3003e12c5d1SDavid du Colombier while (--w >= wdstart)
3013e12c5d1SDavid du Colombier if (vowel(cbits(*w)))
3023e12c5d1SDavid du Colombier return(w);
3033e12c5d1SDavid du Colombier return(0);
3043e12c5d1SDavid du Colombier }
3053e12c5d1SDavid du Colombier
3063e12c5d1SDavid du Colombier
digram(void)3073e12c5d1SDavid du Colombier void digram(void)
3083e12c5d1SDavid du Colombier {
309*14f51593SDavid du Colombier int maxval, val;
310*14f51593SDavid du Colombier Tchar *nhyend, *maxw, *w;
3113e12c5d1SDavid du Colombier extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
3123e12c5d1SDavid du Colombier
313*14f51593SDavid du Colombier maxw = 0;
3143e12c5d1SDavid du Colombier again:
3153e12c5d1SDavid du Colombier if (!(w = chkvow(hyend + 1)))
3163e12c5d1SDavid du Colombier return;
3173e12c5d1SDavid du Colombier hyend = w;
3183e12c5d1SDavid du Colombier if (!(w = chkvow(hyend)))
3193e12c5d1SDavid du Colombier return;
3203e12c5d1SDavid du Colombier nhyend = w;
3213e12c5d1SDavid du Colombier maxval = 0;
3223e12c5d1SDavid du Colombier w--;
3233e12c5d1SDavid du Colombier while (++w < hyend && w < wdend - 1) {
3243e12c5d1SDavid du Colombier val = 1;
3253e12c5d1SDavid du Colombier if (w == wdstart)
3263e12c5d1SDavid du Colombier val *= dilook('a', cbits(*w), bxh);
3273e12c5d1SDavid du Colombier else if (w == wdstart + 1)
3283e12c5d1SDavid du Colombier val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
3293e12c5d1SDavid du Colombier else
3303e12c5d1SDavid du Colombier val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
3313e12c5d1SDavid du Colombier val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
3323e12c5d1SDavid du Colombier val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
3333e12c5d1SDavid du Colombier if (val > maxval) {
3343e12c5d1SDavid du Colombier maxval = val;
3353e12c5d1SDavid du Colombier maxw = w + 1;
3363e12c5d1SDavid du Colombier }
3373e12c5d1SDavid du Colombier }
3383e12c5d1SDavid du Colombier hyend = nhyend;
3393e12c5d1SDavid du Colombier if (maxval > thresh)
3403e12c5d1SDavid du Colombier *hyp++ = maxw;
3413e12c5d1SDavid du Colombier goto again;
3423e12c5d1SDavid du Colombier }
3433e12c5d1SDavid du Colombier
3443e12c5d1SDavid du Colombier
dilook(int a,int b,char t[26][13])3453e12c5d1SDavid du Colombier dilook(int a, int b, char t[26][13])
3463e12c5d1SDavid du Colombier {
3473e12c5d1SDavid du Colombier int i, j;
3483e12c5d1SDavid du Colombier
3493e12c5d1SDavid du Colombier i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
3503e12c5d1SDavid du Colombier if (!(j & 01))
3513e12c5d1SDavid du Colombier i >>= 4;
3523e12c5d1SDavid du Colombier return(i & 017);
3533e12c5d1SDavid du Colombier }
3543e12c5d1SDavid du Colombier
3553e12c5d1SDavid du Colombier
3563e12c5d1SDavid du Colombier /* here beginneth the tex hyphenation code, as interpreted freely */
3573e12c5d1SDavid du Colombier /* the main difference is that there is no attempt to squeeze space */
3583e12c5d1SDavid du Colombier /* as tightly at tex does. */
3593e12c5d1SDavid du Colombier
3603e12c5d1SDavid du Colombier static int texit(Tchar *, Tchar *);
3613e12c5d1SDavid du Colombier static int readpats(void);
3623e12c5d1SDavid du Colombier static void install(char *);
3633e12c5d1SDavid du Colombier static void fixup(void);
3643e12c5d1SDavid du Colombier static int trieindex(int, int);
3653e12c5d1SDavid du Colombier
3663e12c5d1SDavid du Colombier static char pats[50000]; /* size ought to be computed dynamically */
3673e12c5d1SDavid du Colombier static char *nextpat = pats;
3683e12c5d1SDavid du Colombier static char *trie[27*27]; /* english-specific sizes */
3693e12c5d1SDavid du Colombier
texhyphen(void)3703e12c5d1SDavid du Colombier int texhyphen(void)
3713e12c5d1SDavid du Colombier {
3723e12c5d1SDavid du Colombier static int loaded = 0; /* -1: couldn't find tex file */
3733e12c5d1SDavid du Colombier
3743e12c5d1SDavid du Colombier if (hyphalg == 0 || loaded == -1) /* non-zero => tex for now */
3753e12c5d1SDavid du Colombier return 0;
3763e12c5d1SDavid du Colombier if (loaded == 0) {
3773e12c5d1SDavid du Colombier if (readpats())
3783e12c5d1SDavid du Colombier loaded = 1;
3793e12c5d1SDavid du Colombier else
3803e12c5d1SDavid du Colombier loaded = -1;
3813e12c5d1SDavid du Colombier }
3823e12c5d1SDavid du Colombier return texit(wdstart, wdend);
3833e12c5d1SDavid du Colombier }
3843e12c5d1SDavid du Colombier
texit(Tchar * start,Tchar * end)3853e12c5d1SDavid du Colombier static int texit(Tchar *start, Tchar *end) /* hyphenate as in tex, return # found */
3863e12c5d1SDavid du Colombier {
387219b2ee8SDavid du Colombier int nw, i, k, equal, cnt[500];
3883e12c5d1SDavid du Colombier char w[500+1], *np, *pp, *wp, *xpp, *xwp;
3893e12c5d1SDavid du Colombier
3903e12c5d1SDavid du Colombier w[0] = '.';
3913e12c5d1SDavid du Colombier for (nw = 1; start <= end && nw < 500-1; nw++, start++)
3923e12c5d1SDavid du Colombier w[nw] = maplow(tolower(cbits(*start)));
3933e12c5d1SDavid du Colombier start -= (nw - 1);
3943e12c5d1SDavid du Colombier w[nw++] = '.';
3953e12c5d1SDavid du Colombier w[nw] = 0;
3963e12c5d1SDavid du Colombier /*
3973e12c5d1SDavid du Colombier * printf("try %s\n", w);
3983e12c5d1SDavid du Colombier */
3993e12c5d1SDavid du Colombier for (i = 0; i <= nw; i++)
4003e12c5d1SDavid du Colombier cnt[i] = '0';
4013e12c5d1SDavid du Colombier
40287dfdc75SDavid du Colombier for (wp = w; wp+1 < w+nw; wp++) {
4033e12c5d1SDavid du Colombier for (pp = trie[trieindex(*wp, *(wp+1))]; pp < nextpat; ) {
4043e12c5d1SDavid du Colombier if (pp == 0 /* no trie entry */
4053e12c5d1SDavid du Colombier || *pp != *wp /* no match on 1st letter */
4063e12c5d1SDavid du Colombier || *(pp+1) != *(wp+1)) /* no match on 2nd letter */
4073e12c5d1SDavid du Colombier break; /* so move to next letter of word */
408219b2ee8SDavid du Colombier equal = 1;
4093e12c5d1SDavid du Colombier for (xpp = pp+2, xwp = wp+2; *xpp; )
4103e12c5d1SDavid du Colombier if (*xpp++ != *xwp++) {
411219b2ee8SDavid du Colombier equal = 0;
4123e12c5d1SDavid du Colombier break;
4133e12c5d1SDavid du Colombier }
414219b2ee8SDavid du Colombier if (equal) {
4153e12c5d1SDavid du Colombier np = xpp+1; /* numpat */
4163e12c5d1SDavid du Colombier for (k = wp-w; *np; k++, np++)
4173e12c5d1SDavid du Colombier if (*np > cnt[k])
4183e12c5d1SDavid du Colombier cnt[k] = *np;
4193e12c5d1SDavid du Colombier /*
4203e12c5d1SDavid du Colombier * printf("match: %s %s\n", pp, xpp+1);
4213e12c5d1SDavid du Colombier */
4223e12c5d1SDavid du Colombier }
4233e12c5d1SDavid du Colombier pp += *(pp-1); /* skip over pattern and numbers to next */
4243e12c5d1SDavid du Colombier }
4253e12c5d1SDavid du Colombier }
4263e12c5d1SDavid du Colombier /*
4273e12c5d1SDavid du Colombier * for (i = 0; i < nw; i++) printf("%c", w[i]);
4283e12c5d1SDavid du Colombier * printf(" ");
4293e12c5d1SDavid du Colombier * for (i = 0; i <= nw; i++) printf("%c", cnt[i]);
4303e12c5d1SDavid du Colombier * printf("\n");
4313e12c5d1SDavid du Colombier */
4323e12c5d1SDavid du Colombier /*
4333e12c5d1SDavid du Colombier * for (i = 1; i < nw - 1; i++) {
4343e12c5d1SDavid du Colombier * if (i > 2 && i < nw - 3 && cnt[i] % 2)
4353e12c5d1SDavid du Colombier * printf("-");
4363e12c5d1SDavid du Colombier * if (cbits(start[i-1]) != '.')
4373e12c5d1SDavid du Colombier * printf("%c", cbits(start[i-1]));
4383e12c5d1SDavid du Colombier * }
4393e12c5d1SDavid du Colombier * printf("\n");
4403e12c5d1SDavid du Colombier */
4413e12c5d1SDavid du Colombier for (i = 1; i < nw -1; i++)
4423e12c5d1SDavid du Colombier if (i > 2 && i < nw - 3 && cnt[i] % 2)
4433e12c5d1SDavid du Colombier *hyp++ = start + i - 1;
4443e12c5d1SDavid du Colombier return hyp - hyptr; /* non-zero if a hyphen was found */
4453e12c5d1SDavid du Colombier }
4463e12c5d1SDavid du Colombier
4473e12c5d1SDavid du Colombier /*
4483e12c5d1SDavid du Colombier This code assumes that hyphen.tex looks like
4493e12c5d1SDavid du Colombier % some comments
4503e12c5d1SDavid du Colombier \patterns{ % more comments
4513e12c5d1SDavid du Colombier pat5ter4ns, 1 per line, SORTED, nothing else
4523e12c5d1SDavid du Colombier }
4533e12c5d1SDavid du Colombier more goo
4543e12c5d1SDavid du Colombier \hyphenation{ % more comments
4553e12c5d1SDavid du Colombier ex-cep-tions, one per line; i ignore this part for now
4563e12c5d1SDavid du Colombier }
4573e12c5d1SDavid du Colombier
4583e12c5d1SDavid du Colombier this code is NOT robust against variations. unfortunately,
4593e12c5d1SDavid du Colombier it looks like every local language version of this file has
4603e12c5d1SDavid du Colombier a different format. i have also made no provision for weird
4613e12c5d1SDavid du Colombier characters. sigh.
4623e12c5d1SDavid du Colombier */
4633e12c5d1SDavid du Colombier
readpats(void)4643e12c5d1SDavid du Colombier static int readpats(void)
4653e12c5d1SDavid du Colombier {
4663e12c5d1SDavid du Colombier FILE *fp;
4673e12c5d1SDavid du Colombier char buf[200], buf1[200];
4683e12c5d1SDavid du Colombier
4693e12c5d1SDavid du Colombier if ((fp = fopen(TEXHYPHENS, "r")) == NULL
470219b2ee8SDavid du Colombier && (fp = fopen(DWBalthyphens, "r")) == NULL) {
4713e12c5d1SDavid du Colombier ERROR "warning: can't find hyphen.tex" WARN;
4723e12c5d1SDavid du Colombier return 0;
4733e12c5d1SDavid du Colombier }
4743e12c5d1SDavid du Colombier
4753e12c5d1SDavid du Colombier while (fgets(buf, sizeof buf, fp) != NULL) {
4763e12c5d1SDavid du Colombier sscanf(buf, "%s", buf1);
4773e12c5d1SDavid du Colombier if (strcmp(buf1, "\\patterns{") == 0)
4783e12c5d1SDavid du Colombier break;
4793e12c5d1SDavid du Colombier }
4803e12c5d1SDavid du Colombier while (fgets(buf, sizeof buf, fp) != NULL) {
4813e12c5d1SDavid du Colombier if (buf[0] == '}')
4823e12c5d1SDavid du Colombier break;
4833e12c5d1SDavid du Colombier install(buf);
4843e12c5d1SDavid du Colombier }
4853e12c5d1SDavid du Colombier fclose(fp);
4863e12c5d1SDavid du Colombier fixup();
4873e12c5d1SDavid du Colombier return 1;
4883e12c5d1SDavid du Colombier }
4893e12c5d1SDavid du Colombier
install(char * s)4903e12c5d1SDavid du Colombier static void install(char *s) /* map ab4c5de to: 12 abcde \0 00405 \0 */
4913e12c5d1SDavid du Colombier {
4923e12c5d1SDavid du Colombier int npat, lastpat;
4933e12c5d1SDavid du Colombier char num[500], *onextpat = nextpat;
4943e12c5d1SDavid du Colombier
4953e12c5d1SDavid du Colombier num[0] = '0';
4963e12c5d1SDavid du Colombier *nextpat++ = ' '; /* fill in with count later */
4973e12c5d1SDavid du Colombier for (npat = lastpat = 0; *s != '\n' && *s != '\0'; s++) {
4983e12c5d1SDavid du Colombier if (isdigit(*s)) {
4993e12c5d1SDavid du Colombier num[npat] = *s;
5003e12c5d1SDavid du Colombier lastpat = npat;
5013e12c5d1SDavid du Colombier } else {
5023e12c5d1SDavid du Colombier *nextpat++ = *s;
5033e12c5d1SDavid du Colombier npat++;
5043e12c5d1SDavid du Colombier num[npat] = '0';
5053e12c5d1SDavid du Colombier }
5063e12c5d1SDavid du Colombier }
5073e12c5d1SDavid du Colombier *nextpat++ = 0;
5083e12c5d1SDavid du Colombier if (nextpat > pats + sizeof(pats)-20) {
5093e12c5d1SDavid du Colombier ERROR "tex hyphenation table overflow, tail end ignored" WARN;
5103e12c5d1SDavid du Colombier nextpat = onextpat;
5113e12c5d1SDavid du Colombier }
5123e12c5d1SDavid du Colombier num[lastpat+1] = 0;
5133e12c5d1SDavid du Colombier strcat(nextpat, num);
5143e12c5d1SDavid du Colombier nextpat += strlen(nextpat) + 1;
5153e12c5d1SDavid du Colombier }
5163e12c5d1SDavid du Colombier
fixup(void)5173e12c5d1SDavid du Colombier static void fixup(void) /* build indexes of where . a b c ... start */
5183e12c5d1SDavid du Colombier {
5193e12c5d1SDavid du Colombier char *p, *lastc;
5203e12c5d1SDavid du Colombier int n;
5213e12c5d1SDavid du Colombier
5223e12c5d1SDavid du Colombier for (lastc = pats, p = pats+1; p < nextpat; p++)
5233e12c5d1SDavid du Colombier if (*p == ' ') {
5243e12c5d1SDavid du Colombier *lastc = p - lastc;
5253e12c5d1SDavid du Colombier lastc = p;
5263e12c5d1SDavid du Colombier }
5273e12c5d1SDavid du Colombier *lastc = p - lastc;
5283e12c5d1SDavid du Colombier for (p = pats+1; p < nextpat; ) {
5293e12c5d1SDavid du Colombier n = trieindex(p[0], p[1]);
5303e12c5d1SDavid du Colombier if (trie[n] == 0)
5313e12c5d1SDavid du Colombier trie[n] = p;
5323e12c5d1SDavid du Colombier p += p[-1];
5333e12c5d1SDavid du Colombier }
5343e12c5d1SDavid du Colombier /* printf("pats = %d\n", nextpat - pats); */
5353e12c5d1SDavid du Colombier }
5363e12c5d1SDavid du Colombier
trieindex(int d1,int d2)5373e12c5d1SDavid du Colombier static int trieindex(int d1, int d2)
5383e12c5d1SDavid du Colombier {
53987dfdc75SDavid du Colombier int i;
54087dfdc75SDavid du Colombier
54187dfdc75SDavid du Colombier i = 27*(d1 == '.'? 0: d1 - 'a' + 1) + (d2 == '.'? 0: d2 - 'a' + 1);
54287dfdc75SDavid du Colombier assert(0 <= i && i < 27*27);
54387dfdc75SDavid du Colombier return i;
5443e12c5d1SDavid du Colombier }
545