151163Scael #ifndef lint
251163Scael static char sccsid[] = "@(#)n8.c 2.1 (CWI) 85/07/18";
351163Scael #endif lint
451163Scael #include <ctype.h>
551163Scael #include "tdef.h"
651163Scael #include <sgtty.h>
751163Scael #include "ext.h"
851163Scael #define HY_BIT 0200 /* stuff in here only works for ascii */
951163Scael
1051163Scael /*
1151163Scael * troff8.c
1251163Scael *
1351163Scael * hyphenation
1451163Scael */
1551163Scael
1651163Scael char hbuf[NHEX];
1751163Scael char *nexth = hbuf;
1851163Scael tchar *hyend;
1951163Scael
hyphen(wp)2051163Scael hyphen(wp)
2151163Scael tchar *wp;
2251163Scael {
2351163Scael register j;
2451163Scael register tchar *i;
2551163Scael
2651163Scael i = wp;
2751163Scael while (punct(cbits(*i++)))
2851163Scael ;
2951163Scael if (!alph(cbits(*--i)))
3051163Scael return;
3151163Scael wdstart = i++;
3251163Scael while (alph(cbits(*i++)))
3351163Scael ;
3451163Scael hyend = wdend = --i - 1;
3551163Scael while (punct(cbits(*i++)))
3651163Scael ;
3751163Scael if (*--i)
3851163Scael return;
3951163Scael if ((wdend - wdstart - 4) < 0)
4051163Scael return;
4151163Scael hyp = hyptr;
4251163Scael *hyp = 0;
4351163Scael hyoff = 2;
4451163Scael /*
4551163Scael if (!exword() && !suffix())
4651163Scael digram();
4751163Scael */
4851163Scael if (!exword()) {
4951163Scael if (hyalg == ORIGINAL && !suffix())
5051163Scael digram();
5151163Scael if (hyalg == DUTCH)
5251163Scael split(wdstart, wdend);
5351163Scael }
5451163Scael *hyp++ = 0;
5551163Scael if (*hyptr)
5651163Scael for (j = 1; j; ) {
5751163Scael j = 0;
5851163Scael for (hyp = hyptr + 1; *hyp != 0; hyp++) {
5951163Scael if (*(hyp - 1) > *hyp) {
6051163Scael j++;
6151163Scael i = *hyp;
6251163Scael *hyp = *(hyp - 1);
6351163Scael *(hyp - 1) = i;
6451163Scael }
6551163Scael }
6651163Scael }
6751163Scael }
6851163Scael
6951163Scael
punct(i)7051163Scael punct(i)
7151163Scael {
7251163Scael if (!i || alph(i))
7351163Scael return(0);
7451163Scael else
7551163Scael return(1);
7651163Scael }
7751163Scael
7851163Scael
alph(i)7951163Scael alph(i)
8051163Scael {
8151163Scael if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
8251163Scael return(1);
8351163Scael else
8451163Scael return(0);
8551163Scael }
8651163Scael
8751163Scael /*
8851163Scael * set the hyphenation algorithm
8951163Scael *
9051163Scael * jna
9151163Scael */
9251163Scael
caseha()9351163Scael caseha()
9451163Scael { register i;
9551163Scael
9651163Scael if ( skip())
9751163Scael i = hyalg1;
9851163Scael else {
9951163Scael noscale++;
10051163Scael noscale = 0;
101*54111Scael i = max((int)atoi0(), 0);
10251163Scael if (nonumb)
10351163Scael return;
10451163Scael if (i > MAXDIALECTS) {
10551163Scael errprint("Unknown dialect %d", i);
10651163Scael return;
10751163Scael }
10851163Scael }
10951163Scael hyalg1 = hyalg;
11051163Scael hyalg = i;
11151163Scael if( hyalg == DUTCH)
11251163Scael thresh = DUTCH_THRESH;
11351163Scael }
11451163Scael
caseht()11551163Scael caseht()
11651163Scael {
11751163Scael switch(hyalg) {
11851163Scael case ORIGINAL:
11951163Scael thresh = THRESH;
12051163Scael break;
12151163Scael case DUTCH:
12251163Scael thresh = DUTCH_THRESH;
12351163Scael break;
12451163Scael }
12551163Scael if (skip())
12651163Scael return;
12751163Scael noscale++;
12851163Scael if (hyalg == DUTCH)
129*54111Scael thresh = max((int)atoi0(), 1);
13051163Scael else
131*54111Scael thresh = (int)atoi0();
13251163Scael noscale = 0;
13351163Scael }
13451163Scael
13551163Scael
casehw()13651163Scael casehw()
13751163Scael {
13851163Scael register i, k;
13951163Scael register char *j;
14051163Scael tchar t;
14151163Scael
14251163Scael k = 0;
14351163Scael while (!skip()) {
14451163Scael if ((j = nexth) >= (hbuf + NHEX - 2))
14551163Scael goto full;
14651163Scael for (; ; ) {
14751163Scael if (ismot(t = getch()))
14851163Scael continue;
14951163Scael i = cbits(t);
15051163Scael if (i == ' ' || i == '\n') {
15151163Scael *j++ = 0;
15251163Scael nexth = j;
15351163Scael *j = 0;
15451163Scael if (i == ' ')
15551163Scael break;
15651163Scael else
15751163Scael return;
15851163Scael }
15951163Scael if (i == '-') {
16051163Scael k = HY_BIT;
16151163Scael continue;
16251163Scael }
16351163Scael *j++ = maplow(i) | k;
16451163Scael k = 0;
16551163Scael if (j >= (hbuf + NHEX - 2))
16651163Scael goto full;
16751163Scael }
16851163Scael }
16951163Scael return;
17051163Scael full:
17151163Scael errprint("exception word list full.");
17251163Scael *nexth = 0;
17351163Scael }
17451163Scael
17551163Scael
exword()17651163Scael exword()
17751163Scael {
17851163Scael register tchar *w;
17951163Scael register char *e;
18051163Scael char *save;
18151163Scael
18251163Scael e = hbuf;
18351163Scael while (1) {
18451163Scael save = e;
18551163Scael if (*e == 0)
18651163Scael return(0);
18751163Scael w = wdstart;
18851163Scael while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
18951163Scael e++;
19051163Scael w++;
19151163Scael };
19251163Scael if (!*e) {
19351163Scael if (w-1 == hyend ||
19451163Scael (hyalg == ORIGINAL /* s-extension only in original */
19551163Scael && (w == wdend && maplow(cbits(*w)) == 's'))) {
19651163Scael w = wdstart;
19751163Scael for (e = save; *e; e++) {
19851163Scael if (*e & HY_BIT)
19951163Scael *hyp++ = w;
20051163Scael if (hyp > (hyptr + NHYP - 1))
20151163Scael hyp = hyptr + NHYP - 1;
20251163Scael w++;
20351163Scael }
20451163Scael return(1);
20551163Scael } else {
20651163Scael e++;
20751163Scael continue;
20851163Scael }
20951163Scael } else
21051163Scael while (*e++)
21151163Scael ;
21251163Scael }
21351163Scael }
21451163Scael
21551163Scael
suffix()21651163Scael suffix()
21751163Scael {
21851163Scael register tchar *w;
21951163Scael register char *s, *s0;
22051163Scael tchar i;
22151163Scael extern char *suftab[];
22251163Scael extern tchar *chkvow();
22351163Scael
22451163Scael again:
22551163Scael if (!alph(cbits(i = cbits(*hyend))))
22651163Scael return(0);
22751163Scael if (i < 'a')
22851163Scael i -= 'A' - 'a';
22951163Scael if ((s0 = suftab[i-'a']) == 0)
23051163Scael return(0);
23151163Scael for (; ; ) {
23251163Scael if ((i = *s0 & 017) == 0)
23351163Scael return(0);
23451163Scael s = s0 + i - 1;
23551163Scael w = hyend - 1;
23651163Scael while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
23751163Scael s--;
23851163Scael w--;
23951163Scael }
24051163Scael if (s == s0)
24151163Scael break;
24251163Scael s0 += i;
24351163Scael }
24451163Scael s = s0 + i - 1;
24551163Scael w = hyend;
24651163Scael if (*s0 & HY_BIT)
24751163Scael goto mark;
24851163Scael while (s > s0) {
24951163Scael w--;
25051163Scael if (*s-- & HY_BIT) {
25151163Scael mark:
25251163Scael hyend = w - 1;
25351163Scael if (*s0 & 0100)
25451163Scael continue;
25551163Scael if (!chkvow(w))
25651163Scael return(0);
25751163Scael *hyp++ = w;
25851163Scael }
25951163Scael }
26051163Scael if (*s0 & 040)
26151163Scael return(0);
26251163Scael if (exword())
26351163Scael return(1);
26451163Scael goto again;
26551163Scael }
26651163Scael
26751163Scael
maplow(i)26851163Scael maplow(i)
26951163Scael register int i;
27051163Scael {
27151163Scael if (isupper(i))
27251163Scael i = tolower(i);
27351163Scael return(i);
27451163Scael }
27551163Scael
27651163Scael
vowel(i)27751163Scael vowel(i)
27851163Scael int i;
27951163Scael {
28051163Scael switch (maplow(i)) {
28151163Scael case 'a':
28251163Scael case 'e':
28351163Scael case 'i':
28451163Scael case 'o':
28551163Scael case 'u':
28651163Scael case 'y':
28751163Scael return(1);
28851163Scael default:
28951163Scael return(0);
29051163Scael }
29151163Scael }
29251163Scael
29351163Scael
chkvow(w)29451163Scael tchar *chkvow(w)
29551163Scael tchar *w;
29651163Scael {
29751163Scael while (--w >= wdstart)
29851163Scael if (vowel(cbits(*w)))
29951163Scael return(w);
30051163Scael return(0);
30151163Scael }
30251163Scael
30351163Scael
digram()30451163Scael digram()
30551163Scael {
30651163Scael register tchar *w;
30751163Scael register val;
30851163Scael tchar * nhyend, *maxw;
30951163Scael int maxval;
31051163Scael extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
31151163Scael
31251163Scael again:
31351163Scael if (!(w = chkvow(hyend + 1)))
31451163Scael return;
31551163Scael hyend = w;
31651163Scael if (!(w = chkvow(hyend)))
31751163Scael return;
31851163Scael nhyend = w;
31951163Scael maxval = 0;
32051163Scael w--;
32151163Scael while ((++w < hyend) && (w < (wdend - 1))) {
32251163Scael val = 1;
32351163Scael if (w == wdstart)
32451163Scael val *= dilook('a', cbits(*w), bxh);
32551163Scael else if (w == wdstart + 1)
32651163Scael val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
32751163Scael else
32851163Scael val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
32951163Scael val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
33051163Scael val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
33151163Scael if (val > maxval) {
33251163Scael maxval = val;
33351163Scael maxw = w + 1;
33451163Scael }
33551163Scael }
33651163Scael hyend = nhyend;
33751163Scael if (maxval > thresh)
33851163Scael *hyp++ = maxw;
33951163Scael goto again;
34051163Scael }
34151163Scael
34251163Scael
dilook(a,b,t)34351163Scael dilook(a, b, t)
34451163Scael int a, b;
34551163Scael char t[26][13];
34651163Scael {
34751163Scael register i, j;
34851163Scael
34951163Scael i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
35051163Scael if (!(j & 01))
35151163Scael i >>= 4;
35251163Scael return(i & 017);
35351163Scael }
35451163Scael
35551163Scael
35651163Scael /*
35751163Scael * All these jazz is to have the dialect dutch being hyphenated
35851163Scael * It first appeared in the dutch version of troff (nltroff), due to
35951163Scael * teus hagen.
36051163Scael * The original program has converted from Algol60 to C by, I think
36151163Scael * bert ijsselstein.
36251163Scael * It's a mess, anyway.
36351163Scael *
36451163Scael * Planted in this version of troff by jaap akkerhuis (jna).
36551163Scael *
36651163Scael * Note that this is licensed software!
36751163Scael *
36851163Scael */
36951163Scael
37051163Scael #ifndef NULL
37151163Scael #define NULL 0
37251163Scael #endif
37351163Scael #define MAXLETT 50 /* at most the first MAXLETT characters of a word
37451163Scael will be processed */
37551163Scael #define MAXSYLL 20 /* at most the first MAXSYLL syllables of a word
37651163Scael will be processed */
37751163Scael
37851163Scael #define LETTEREE 27
37951163Scael #define LETTERJ 41
38051163Scael #define LETTERV 55
38151163Scael #define LETTERX 57
38251163Scael #define LETTERZ 58
38351163Scael
38451163Scael /*
38551163Scael * split(..) needs to be cleaned up, could install hjt's version...
38651163Scael */
38751163Scael
split(aword,anend)38851163Scael split( aword, anend ) register tchar *aword, *anend;
38951163Scael { register tchar *place;
39051163Scael extern tchar *bestsplit1();
39151163Scael
39251163Scael place = bestsplit1( aword, anend );
39351163Scael if( place != (tchar *) NULL )
39451163Scael { *hyp++ = place;
39551163Scael if( place - aword > thresh && anend - place > thresh )
39651163Scael split( aword, place+1 );
39751163Scael if( anend - place > thresh && place - aword > thresh )
39851163Scael split( place, anend );
39951163Scael }
40051163Scael }
40151163Scael
40251163Scael tchar *
bestsplit1(tosplit,aend)40351163Scael bestsplit1( tosplit , aend )
40451163Scael tchar *tosplit, *aend;
40551163Scael {
40651163Scael /* This function determines the "best" place to split into two parts the
40751163Scael * Dutch word contained in a string of <size> characters which starts at
40851163Scael * the address <tosplit> .
40951163Scael * The input characters should be in ASCII code .
41051163Scael * The function returns as value the number of characters of the first
41151163Scael * of the two parts .
41251163Scael * If the returned value exceeds the character count of the line the
41351163Scael * user may try to invoke bestsplit1 again but now with <size> equal to
41451163Scael * the returned value plus one .
41551163Scael * The algorithm is adapted from the Mathematical Centre report NR 28/72,
41651163Scael * "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER-
41751163Scael * LANDSE WOORDEN" , which has been written by J.C. VAN VLIET.
41851163Scael */
41951163Scael extern char translate[], comprimation[][14], consonant[][23],
42051163Scael prefix[][3] ;
42151163Scael short woord[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ],
42251163Scael turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2,
42351163Scael l0, l1, l2 ;
42451163Scael short numlett, numsyll, turnindex, differ, start1, start2, stop,
42551163Scael level, bp ;
42651163Scael register int i, j, help ;
42751163Scael short size = aend - tosplit + 1;
42851163Scael
42951163Scael /* translate into bestsplit code : */
43051163Scael woord[0] = 0 ;
43151163Scael i = 1 ;
43251163Scael help = -1 ;
43351163Scael while ( (++help < size) && (i < MAXLETT ) ) {
43451163Scael reference[i] = i;
43551163Scael woord[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ;
43651163Scael }
43751163Scael /* end of translation : */
43851163Scael
43951163Scael numlett = i ;
44051163Scael if ( numlett < 4 ) goto nosplit ;
44151163Scael i = j = 1 ;
44251163Scael help = 0 ;
44351163Scael while ( i < numlett ) {
44451163Scael letter = woord[i] ;
44551163Scael /* comprimation of vowels : */
44651163Scael if ( (25 < letter) && (letter < 41) ) {
44751163Scael nextlett = woord[i+1] ;
44851163Scael if ( (28 < nextlett) && (nextlett < 43) ) {
44951163Scael letter = comprimation[letter-26][nextlett-29] ;
45051163Scael if (letter > 0) {
45151163Scael i++ ;
45251163Scael help++ ;
45351163Scael woord[i] = letter ;
45451163Scael continue ;
45551163Scael }
45651163Scael }
45751163Scael } /* end of comprimation */
45851163Scael
45951163Scael woord[j] = woord[i] ;
46051163Scael j++ ;
46151163Scael i++ ;
46251163Scael reference[j] += help ;
46351163Scael }
46451163Scael woord[j] = woord[numlett] ;
46551163Scael numlett = j ;
46651163Scael
46751163Scael
46851163Scael /* determination of the number of syllables */
46951163Scael j = -1 ;
47051163Scael i = 0 ;
47151163Scael while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) {
47251163Scael if (woord[i] < 39) {
47351163Scael j++ ;
47451163Scael vowel[j] = i ;
47551163Scael }
47651163Scael }
47751163Scael numsyll = j+1 ;
47851163Scael
47951163Scael if ( numsyll < 2 ) goto nosplit ;
48051163Scael turnindex = 0 ;
48151163Scael differ = 1 ;
48251163Scael start1 = 0 ;
48351163Scael start2 = numsyll - 1 ;
48451163Scael stop = start2 ;
48551163Scael
48651163Scael while ( turnindex < stop ) {
48751163Scael vowel1 = vowel[stop] ;
48851163Scael for ( i = stop - 1 ; i >= 0 ; i-- ) {
48951163Scael vowel2 = vowel[i] ;
49051163Scael if ( vowel1 - vowel2 == differ) {
49151163Scael turn[turnindex] = i ;
49251163Scael turnindex++ ;
49351163Scael }
49451163Scael vowel1 = vowel2 ;
49551163Scael }
49651163Scael if ( differ == 1 ) start1 = turnindex ;
49751163Scael else if ( differ == 2 ) start2 = turnindex ;
49851163Scael differ++ ;
49951163Scael }
50051163Scael
50151163Scael turnindex = start2 - 1 ;
50251163Scael stop = numsyll - 1 ;
50351163Scael level = 1 ;
50451163Scael
50551163Scael next :
50651163Scael turnindex++ ;
50751163Scael if ( turnindex >= stop ) {
50851163Scael if ( level == 1 ) turnindex = start2 ;
50951163Scael else if ( level == 2 ) {
51051163Scael turnindex = start1 ;
51151163Scael stop = start2 ;
51251163Scael }
51351163Scael else goto nosplit ;
51451163Scael level++ ;
51551163Scael if ( turnindex >= stop ) goto next ;
51651163Scael }
51751163Scael j = turn[turnindex] ;
51851163Scael vowel1 = vowel[j] ;
51951163Scael vowel2 = vowel[j+1] ;
52051163Scael
52151163Scael switch ( level ) {
52251163Scael case 1 :
52351163Scael for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) {
52451163Scael help = consonant[woord[j]-39][woord[j+1]-39] ;
52551163Scael if ( abs(help) == 1 ) goto splitafterj ;
52651163Scael if ( help < 0 ) goto next ;
52751163Scael }
52851163Scael break ; /* end of first phase */
52951163Scael
53051163Scael case 2 :
53151163Scael for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) {
53251163Scael help = consonant[woord[i]-39][woord[i+1]-39] ;
53351163Scael if ( abs(help) == 2 ) {
53451163Scael j = i ;
53551163Scael goto splitafterj ;
53651163Scael }
53751163Scael if ( abs(help) == 3 ) {
53851163Scael if ( i == vowel1+1 ) {
53951163Scael j = vowel1 ;
54051163Scael goto splitafterj ;
54151163Scael }
54251163Scael help = abs(consonant[woord[i-1]-39][woord[i]-39]) ;
54351163Scael if ( help == 2 ) {
54451163Scael j = i - 1 ;
54551163Scael goto splitafterj ;
54651163Scael }
54751163Scael if ( help == 3 ) {
54851163Scael j = i - 2 ;
54951163Scael goto splitafterj ;
55051163Scael }
55151163Scael }
55251163Scael else if ( ( abs(help) == 4 ) &&
55351163Scael ( i == vowel2-2 ) ) {
55451163Scael j = i ;
55551163Scael goto splitafterj ;
55651163Scael }
55751163Scael if ( help < 0 ) goto next ;
55851163Scael }
55951163Scael break ; /* end of second phase */
56051163Scael
56151163Scael case 3 :
56251163Scael j = vowel1 ;
56351163Scael help = woord[j+1] ;
56451163Scael if ( (help == LETTERJ) || (help == LETTERV) ||
56551163Scael (help == LETTERZ) ) goto splitafterj ;
56651163Scael if ( help == LETTERX ) goto next ;
56751163Scael l1 = woord[j] ;
56851163Scael if ( l1 == LETTEREE ) goto next ;
56951163Scael if ( ( l1 > 24 ) && ( l1 < 29 ) ) {
57051163Scael j++ ;
57151163Scael goto splitafterj ;
57251163Scael }
57351163Scael l0 = woord[j-1] ;
57451163Scael l2 = woord[j+1] ;
57551163Scael for ( i = 0 ; i < 7 ; i++ )
57651163Scael if ( ( l0 == prefix[i][0] ) &&
57751163Scael ( l1 == prefix[i][1] ) &&
57851163Scael ( l2 == prefix[i][2] ) ) goto next ;
57951163Scael goto splitafterj ;
58051163Scael break ; /* end of third phase */
58151163Scael
58251163Scael }
58351163Scael
58451163Scael
58551163Scael goto next ;
58651163Scael
58751163Scael splitafterj :
58851163Scael bp = reference[j+1] - 1 ;
58951163Scael if((bp < size-1) && (bp > 0))
59051163Scael goto away;
59151163Scael else
59251163Scael goto next;
59351163Scael
59451163Scael nosplit :
59551163Scael bp = 0 ;
59651163Scael level = 4 ;
59751163Scael away :
59851163Scael return(bp == 0? (tchar *) NULL : tosplit+bp) ;
59951163Scael }
600