xref: /csrg-svn/usr.bin/diction/style3/part.l (revision 65930)
19075Srrh %{
29075Srrh 
348247Sbostic /*-
448247Sbostic  * %sccs.include.proprietary.c%
548247Sbostic  */
648247Sbostic 
79075Srrh #ifndef lint
8*65930Svern static char sccsid[] = "@(#)part.l	8.2 (Berkeley) 01/28/94";
948247Sbostic #endif /* not lint */
109075Srrh 
119075Srrh #include "style.h"
129075Srrh #include "names.h"
139075Srrh #include "conp.h"
1444289Sbostic 
1544289Sbostic #undef yywrap
1644289Sbostic 
179082Srrh FILE *deb;
189082Srrh int nosave = 1;
199075Srrh int part = 0;
209082Srrh int barebones = 0;
219082Srrh int topic = 0;
229075Srrh int style = 1;
239075Srrh int pastyle = 0;
249075Srrh int pstyle = 0;
259075Srrh int lstyle = 0;
269075Srrh int rstyle = 0;
279075Srrh int estyle = 0;
289082Srrh int nstyle = 0;
299082Srrh int Nstyle = 0;
309075Srrh int lthresh;
319075Srrh int rthresh;
329075Srrh int nomin;
339075Srrh char s[SCHAR];
349075Srrh char *sptr = s;
359075Srrh struct ss sent[SLENG];
369075Srrh struct ss *sentp = sent;
379075Srrh float wperc();
389075Srrh float sperc();
399075Srrh float typersent();
409075Srrh float vperc();
419075Srrh int numsent = 0;
429075Srrh int qcount = 0;
439075Srrh int icount = 0;
449075Srrh long vowel = 0;
459075Srrh long numwds = 0;
469075Srrh long twds = 0;
479075Srrh long numnonf = 0;
489075Srrh long letnonf = 0;
499075Srrh int maxsent = 0;
509075Srrh int maxindex = 0;
519075Srrh int minsent = 30;
529075Srrh int minindex = 0;
539075Srrh int simple = 0;
549075Srrh int compound = 0;
559075Srrh int compdx = 0;
569075Srrh int prepc = 0;
579075Srrh int conjc = 0;
589075Srrh int complex = 0;
599075Srrh int tobe = 0;
609075Srrh int adj = 0;
619075Srrh int infin = 0;
629075Srrh int pron = 0;
639075Srrh int passive = 0;
649075Srrh int aux = 0;
659075Srrh int adv = 0;
669075Srrh int verbc = 0;
679075Srrh int tverbc = 0;
689075Srrh int noun = 0;
699075Srrh long numlet = 0;
709075Srrh int beg[15]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
719075Srrh int sleng[50];
729082Srrh int nsleng = 0;
739075Srrh int j,jj,i;
749075Srrh int comma = 0;
759075Srrh int cflg;
769075Srrh int question;
779075Srrh int quote = 0;
789075Srrh char *st;
799075Srrh int initf = 0;
809075Srrh int over = 1;
819082Srrh int nroff = 0;
829082Srrh int nrofflg = 0;
839082Srrh int leng[MAXPAR];
849082Srrh int sentno= 0;
859075Srrh %}
869075Srrh C	[A-DF-Za-z]
879075Srrh %%
889075Srrh ^{C}:.+	{
899075Srrh collect:
909075Srrh 	sentp->cc = sentp->ic = yytext[0];
919082Srrh 	if(sentp->cc == NOM)
929082Srrh 		sentp->cc = NOUN;
939075Srrh collect1:
949082Srrh 	nsleng++;
959075Srrh 	sentp->leng = yyleng-2;
969075Srrh 	sentp++->sp = sptr;
979075Srrh 	if(sentp >= &sent[SLENG-1]){
989075Srrh 		if(over)fprintf(stderr,"sentence %d too many words\n",numsent+2);
999075Srrh 		over=0;
1009075Srrh 		sentp--;
1019075Srrh 	}
1029075Srrh 	if(sptr+yyleng >= &s[SCHAR-1]){
1039075Srrh 		if(over)fprintf(stderr,"sentence %d too many characters\n",numsent+2);
1049075Srrh 		over=0;
1059075Srrh 	}
1069075Srrh 	else {
1079075Srrh 		for(i=2;i<yyleng;i++)*sptr++=yytext[i];
1089075Srrh 		*sptr++ = '\0';
1099075Srrh 	}
1109075Srrh 	}
1119075Srrh ^";:".+	{
1129075Srrh 	sentp->cc=END;
1139075Srrh 	sentp->ic = ';';
1149075Srrh 	goto collect1;
1159075Srrh 	}
1169075Srrh ^",:".+	{
1179075Srrh 	comma++;
1189075Srrh 	goto collect;
1199075Srrh 	}
1209075Srrh [\n]	;
1219075Srrh ^"\":".+	{
1229075Srrh 	goto collect;
1239075Srrh 	}
1249075Srrh ^E:"/."	{
1259075Srrh 	cflg = 1;
1269075Srrh 	goto sdone;
1279075Srrh 	}
1289075Srrh ^E:.+	{
1299075Srrh 	cflg = 0;
1309075Srrh sdone:
1319075Srrh 	over=1;
1329075Srrh 	sentp->cc=sentp->ic=END;
1339075Srrh 	sentp++->sp = sptr;
1349075Srrh 	for(i=2;i<yyleng;i++)*sptr++=yytext[i];
1359075Srrh 	*sptr++='\0';
1369075Srrh 	if(yytext[2]=='?')question=1;
1379075Srrh 	else question=0;
1389075Srrh 
1399082Srrh fragment:
1409075Srrh 	jj=0;
1419075Srrh 	if(quote == 1 && sent[jj].cc == ED){
1429075Srrh 		sent[jj].cc = VERB;
1439075Srrh 		quote = 0;
1449075Srrh 	}
1459075Srrh 	if(sent[jj].cc=='"')jj++;
1469075Srrh 	if(sent[jj].cc==SUBCONJ){
1479075Srrh 		if(sent[jj+1].cc == ','){
1489075Srrh 			sent[jj].cc=ADV;
1499075Srrh 			jj += 2;
1509075Srrh 			comma--;
1519075Srrh 		}
1529075Srrh 		else {
1539075Srrh 			jj=scan(1,',',0);
1549075Srrh 			if(jj != -1)jj++;
1559075Srrh 			comma--;
1569075Srrh 		}
1579075Srrh 	}
1589075Srrh 	if(jj != -1){
1599075Srrh 		if(sent[jj].cc==CONJ || sent[jj].cc=='"')jj++;
1609075Srrh 		while((jj=scan(jj,END,cflg)) != -1){
1619075Srrh 			jj++;
1629075Srrh 			if(sent[jj].cc == SUBCONJ && sent[jj+1].cc == ','){
1639075Srrh 				sent[jj].cc=ADV;
1649075Srrh 				jj += 2;
1659075Srrh 				comma--;
1669075Srrh 			}
1679075Srrh 		}
1689075Srrh 	}
1699075Srrh 	st = sent[i].sp;
1709075Srrh 	if(*(st+1) == '"')
1719075Srrh 		if(*st == '?' || *st == '!')quote = 1;
1729075Srrh 	outp();
1739082Srrh 	nsleng = 0;
1749082Srrh 	if(nroff){
1759082Srrh 		if(sentno > 0){
1769082Srrh 			printf(".SL \"");
1779082Srrh 			for(i=0;i<sentno;i++)
1789082Srrh 				printf(" %d",leng[i]);
1799082Srrh 			printf("\"\n");
1809082Srrh 			sentno = 0;
1819082Srrh 		}
1829082Srrh 		printf("%s",&yytext[1]);
1839082Srrh 		nroff = 0;
1849082Srrh 	}
1859075Srrh 	sptr=s;
1869075Srrh 	sentp=sent;
1879075Srrh 	comma=0;
1889075Srrh 	}
1899075Srrh ;.+	{
1909075Srrh 	if(style){
1919075Srrh 		nomin = atoi(&yytext[1]);
1929075Srrh 	}
1939075Srrh 	}
1949082Srrh ^:.+[\n]	{
1959082Srrh 	nrofflg=1;
1969082Srrh 	if(sentp != sent){
1979082Srrh 		sentp->cc = sentp->ic = END;
1989082Srrh 		sentp++->sp = sptr;
1999082Srrh 		*sptr++ = '.';
2009082Srrh 		*sptr++ = '\0';
2019082Srrh 		over = 1;
2029082Srrh 		nroff = 1;
2039082Srrh 		goto fragment;
2049082Srrh 	}
2059082Srrh 	if(sentno > 0){
2069082Srrh 		printf(".SL \"");
2079082Srrh 		for(i=0;i<sentno;i++)
2089082Srrh 			printf(" %d",leng[i]);
2099082Srrh 		printf("\"\n");
2109082Srrh 		sentno = 0;
2119082Srrh 	}
2129082Srrh 	printf("%s",&yytext[1]);
2139082Srrh 	}
2149075Srrh %%
2159075Srrh yywrap(){
2169075Srrh 	int ii;
2179075Srrh 	int ml,mg,lsum,gsum;
2189075Srrh 	float aindex, avl, avw;
2199075Srrh 	float cindex,kindex,findex,fgrad;
2209075Srrh 	float syl, avsy, adjs,snonf;
2219082Srrh 	FILE *io;
2229075Srrh 
2239075Srrh 	if(style){
2249075Srrh 	if(numwds == 0 || numsent == 0)exit(0);
2259075Srrh 	avw = (float)(numwds)/(float)(numsent);
2269075Srrh 	avl = (float)(numlet)/(float)(numwds);
2279075Srrh 	aindex = 4.71*avl + .5*avw -21.43;
2289075Srrh 	syl = .9981*vowel-.3432*twds;
2299075Srrh 	avsy = syl/twds;
2309075Srrh 	kindex = 11.8*avsy+.39*avw-15.59;
2319075Srrh 	findex = 206.835-84.6*avsy-1.015*avw;
2329075Srrh 	if(findex < 30.)fgrad = 17.;
2339075Srrh 	else if(findex > 100.) fgrad = 4.;
2349075Srrh 	else if(findex > 70.)fgrad=(100.-findex)/10 +5.;
2359075Srrh 	else if(findex > 60.)fgrad =(70.-findex)/10+8.;
2369075Srrh 	else if(findex >50.)fgrad=(60.-findex)/5+10;
2379075Srrh 	else fgrad=(50.-findex)/6.66 +13.;
2389075Srrh 	adjs = 100 * (float)numsent/numwds;
2399075Srrh 	cindex = 5.89*avl-.3*adjs-15.8;
2409075Srrh 	printf("readability grades:\n	(Kincaid) %4.1f  (auto) %4.1f  (Coleman-Liau) %4.1f  (Flesch) %4.1f (%4.1f)\n",kindex,aindex,cindex,fgrad,findex);
2419075Srrh 	printf("sentence info:\n");
2429075Srrh 	printf("	no. sent %d no. wds %ld\n",numsent,numwds);
2439075Srrh 	printf("	av sent leng %4.1f av word leng %4.2f\n",avw,avl);
2449075Srrh 	printf("	no. questions %d no. imperatives %d\n",qcount,icount);
2459075Srrh 	if(numnonf != 0){
2469075Srrh 		snonf = (float)(letnonf)/(float)(numnonf);
2479075Srrh 	}
2489075Srrh 	printf("	no. nonfunc wds %ld  %4.1f%%   av leng %4.2f\n",numnonf,(float)(numnonf)*100/numwds,snonf);
2499075Srrh 	mg = avw + 10.5;
2509075Srrh 	if(mg > 49)mg = 49;
2519075Srrh 	ml = avw - 4.5;
2529075Srrh 	if(ml <= 0)ml = 1;
2539082Srrh 	else if(ml > 49)ml=48;
2549075Srrh 	gsum = lsum = 0;
2559075Srrh 	for(ii=0;ii<50;ii++){
2569075Srrh 		if(ii < ml)lsum += sleng[ii];
2579075Srrh 		else if(ii > mg)gsum+= sleng[ii];
2589075Srrh 	}
2599075Srrh 	printf("	short sent (<%d)%3.0f%% (%d) long sent (>%d) %3.0f%% (%d)\n",ml,sperc(lsum),lsum,mg,sperc(gsum),gsum);
2609075Srrh 	printf("	longest sent %d wds at sent %d; shortest sent %d wds at sent %d\n",maxsent,maxindex,minsent,minindex);
2619075Srrh 	printf("sentence types:\n");
2629075Srrh 	printf("	simple %3.0f%% (%d) complex %3.0f%% (%d)\n",sperc(simple),simple,sperc(complex),complex);
2639075Srrh 	printf("	compound %3.0f%% (%d) compound-complex %3.0f%% (%d)\n",sperc(compound),compound,sperc(compdx),compdx);
2649075Srrh 	printf("word usage:\n");
2659075Srrh 	printf("	verb types as %% of total verbs\n");
2669075Srrh 	printf("	tobe %3.0f%% (%d) aux %3.0f%% (%d) inf %3.0f%% (%d)\n",vperc(tobe),tobe,vperc(aux),aux,vperc(infin),infin);
2679075Srrh 	if(verbc != 0)adjs = (float)(passive)*100/(float)(verbc);
2689075Srrh 	else adjs=0;
2699075Srrh 	printf("	passives as %% of non-inf verbs %3.0f%% (%d)\n",adjs,passive);
2709075Srrh 	printf("	types as %% of total\n");
2719075Srrh 	printf("	prep %3.1f%% (%d) conj %3.1f%% (%d) adv %3.1f%% (%d)\n",wperc(prepc),prepc,wperc(conjc),conjc,wperc(adv),adv);
2729075Srrh 	printf("	noun %3.1f%% (%d) adj %3.1f%% (%d) pron %3.1f%% (%d)\n",wperc(noun),noun,
2739075Srrh 		wperc(adj),adj,wperc(pron),pron);
2749075Srrh 	printf("	nominalizations %3.0f %% (%d)\n",wperc(nomin),nomin);
2759075Srrh 	printf("sentence beginnings:\n");
2769075Srrh 	ii=beg[0]+beg[7]+beg[6]+beg[3]+beg[8];
2779075Srrh 	printf("	subject opener: noun (%d) pron (%d) pos (%d) adj (%d) art (%d) tot %3.0f%%\n",
2789075Srrh beg[0],beg[7],beg[6],beg[3],beg[8],sperc(ii));
2799075Srrh 	printf("	prep %3.0f%% (%d) adv %3.0f%% (%d) \n",sperc(beg[9]),beg[9],sperc(beg[4]),beg[4]);
2809082Srrh 	printf("	verb %3.0f%% (%d) ",sperc(beg[1]+beg[10]+beg[11]),beg[1]+beg[10]+beg[11]);
2819075Srrh 	printf(" sub_conj %3.0f%% (%d) conj %3.0f%% (%d)\n",sperc(beg[13]),beg[13],sperc(beg[5]),beg[5]);
2829075Srrh 	printf("	expletives %3.0f%% (%d)\n",sperc(beg[14]),beg[14]);
2839082Srrh #ifdef SCATCH
2849082Srrh 	if(nosave && (fopen(SCATCH,"r")) != NULL){
2859082Srrh 	if(((io=fopen(SCATCH,"a")) != NULL)){
2869082Srrh 		fprintf(io," read %4.1f %4.1f %4.1f %4.1f %4.1f\n",kindex, aindex, cindex, findex, fgrad);
2879082Srrh 		fprintf(io," sentl %d %ld %4.2f %4.2f %d %d %ld %4.2f\n",numsent,numwds,avw,avl,qcount,icount,numnonf,snonf);
2889082Srrh 		fprintf(io," l var %d %d %d %d %d\n",ml,lsum,mg,gsum,maxsent);
2899082Srrh 		fprintf(io," t var %d %d %d %d\n",simple,complex,compound,compdx);
2909082Srrh 		fprintf(io," verbs %d %d %d %d %d %d\n",tverbc,verbc,tobe,aux,infin,passive);
2919082Srrh 		fprintf(io," ty %d %d %d %d %d %d %d\n",prepc,conjc,adv,noun,adj,pron,nomin);
2929082Srrh 		fprintf(io," beg %d %d %d %d %d %d\n",beg[0],beg[7],beg[6],beg[3],beg[8],ii);
2939082Srrh 		fprintf(io," sbeg %d %d %d %d %d %d\n",beg[9],beg[4],beg[1]+beg[10]+beg[11],beg[13],beg[5],beg[14]);
2949082Srrh 		}
2959075Srrh 	}
2969082Srrh #endif
2979082Srrh 	}
2989075Srrh 	return(1);
2999075Srrh }
3009075Srrh float
wperc(a)3019075Srrh wperc(a)
3029075Srrh {
3039075Srrh 	return((float)(a)*100/numwds);
3049075Srrh }
3059075Srrh float
sperc(a)3069075Srrh sperc(a)
3079075Srrh {
3089075Srrh 	return((float)(a)*100/numsent);
3099075Srrh }
3109075Srrh float
typersent(a)3119075Srrh typersent(a)
3129075Srrh {
3139075Srrh return((float)(a)/numsent);
3149075Srrh }
3159075Srrh float
vperc(a)3169075Srrh vperc(a)
3179075Srrh {
3189075Srrh 	if(tverbc == 0)return(0);
3199075Srrh 	return((float)(a)*100/tverbc);
3209075Srrh }
main(argc,argv)3219075Srrh main(argc,argv)
3229075Srrh char **argv;
3239075Srrh {
3249075Srrh 	while(--argc > 0 && (++argv)[0][0] == '-' ){
3259075Srrh 		switch(argv[0][1]){
3269082Srrh 		case 'd': nosave = 0;
3279082Srrh 			continue;
3289075Srrh 		case 's': style=1;
3299075Srrh 			continue;
3309075Srrh 		case 'p': pastyle=style=1;
3319075Srrh 			continue;
3329075Srrh 		case 'a': pstyle=style=1;
3339075Srrh 			continue;
3349075Srrh 		case 'e': estyle = style = 1;
3359075Srrh 			continue;
3369082Srrh 		case 'n': nstyle = style = 1;
3379082Srrh 			continue;
3389082Srrh 		case 'N': Nstyle = style = 1;
3399082Srrh 			continue;
3409075Srrh 		case 'l': style=lstyle=1;
3419075Srrh 			lthresh = atoi(*(++argv));
3429082Srrh 			argc--;
3439075Srrh 			continue;
3449075Srrh 		case 'r':
3459075Srrh 			style=rstyle=1;
3469075Srrh 			rthresh = atoi(*(++argv));
3479082Srrh 			argc--;
3489075Srrh 			continue;
3499075Srrh 		case 'P':
3509075Srrh 			part = 1;
3519082Srrh 			style = 0;
3529075Srrh 			continue;
3539082Srrh 		case 'b':		/* print bare bones info rje */
3549082Srrh 			barebones = 1;
3559082Srrh 			style = 0;
3569082Srrh 			continue;
3579082Srrh 		case 'T':		/*topic*/
3589082Srrh 			style = 0;
3599082Srrh 			topic = 1;
3609082Srrh 			continue;
3619075Srrh 		default:
3629075Srrh 			fprintf(stderr,"unknown flag to part %s\n",*argv);
3639075Srrh 			exit(1);
3649075Srrh 		}
3659075Srrh 		argv++;
3669075Srrh 	}
3679082Srrh #ifdef SNOM
3689082Srrh 	if(fopen(SNOM,"r") != NULL){
3699082Srrh 		deb = fopen(SNOM,"a");	/* SAVE NOM*/
3709082Srrh 	}
3719082Srrh #else
3729082Srrh 	deb = NULL;
3739082Srrh #endif
3749075Srrh 	yylex();
3759082Srrh 	if(nrofflg && sentno > 0){
3769082Srrh 		printf(".SL \"");
3779082Srrh 		for(i=0;i<sentno;i++)
3789082Srrh 			printf(" %d",leng[i]);
3799082Srrh 		printf("\"\n");
3809082Srrh 	}
3819075Srrh }
382