13e12c5d1SDavid du Colombier /*
23e12c5d1SDavid du Colombier * Deal with duplicated lines in a file
33e12c5d1SDavid du Colombier */
43e12c5d1SDavid du Colombier #include <u.h>
53e12c5d1SDavid du Colombier #include <libc.h>
63e12c5d1SDavid du Colombier #include <bio.h>
73e12c5d1SDavid du Colombier #include <ctype.h>
83e12c5d1SDavid du Colombier
93e12c5d1SDavid du Colombier #define SIZE 8000
103e12c5d1SDavid du Colombier
113e12c5d1SDavid du Colombier int fields = 0;
123e12c5d1SDavid du Colombier int letters = 0;
133e12c5d1SDavid du Colombier int linec = 0;
143e12c5d1SDavid du Colombier char mode;
153e12c5d1SDavid du Colombier int uniq;
163e12c5d1SDavid du Colombier char *b1, *b2;
173e12c5d1SDavid du Colombier long bsize;
183e12c5d1SDavid du Colombier Biobuf fin;
193e12c5d1SDavid du Colombier Biobuf fout;
203e12c5d1SDavid du Colombier
213e12c5d1SDavid du Colombier int gline(char *buf);
223e12c5d1SDavid du Colombier void pline(char *buf);
233e12c5d1SDavid du Colombier int equal(char *b1, char *b2);
243e12c5d1SDavid du Colombier char* skip(char *s);
253e12c5d1SDavid du Colombier
263e12c5d1SDavid du Colombier void
main(int argc,char * argv[])273e12c5d1SDavid du Colombier main(int argc, char *argv[])
283e12c5d1SDavid du Colombier {
293e12c5d1SDavid du Colombier int f;
303e12c5d1SDavid du Colombier
312a91472eSDavid du Colombier argv0 = argv[0];
323e12c5d1SDavid du Colombier bsize = SIZE;
333e12c5d1SDavid du Colombier b1 = malloc(bsize);
343e12c5d1SDavid du Colombier b2 = malloc(bsize);
353e12c5d1SDavid du Colombier f = 0;
363e12c5d1SDavid du Colombier while(argc > 1) {
373e12c5d1SDavid du Colombier if(*argv[1] == '-') {
383e12c5d1SDavid du Colombier if(isdigit(argv[1][1]))
393e12c5d1SDavid du Colombier fields = atoi(&argv[1][1]);
403e12c5d1SDavid du Colombier else
413e12c5d1SDavid du Colombier mode = argv[1][1];
423e12c5d1SDavid du Colombier argc--;
433e12c5d1SDavid du Colombier argv++;
443e12c5d1SDavid du Colombier continue;
453e12c5d1SDavid du Colombier }
463e12c5d1SDavid du Colombier if(*argv[1] == '+') {
473e12c5d1SDavid du Colombier letters = atoi(&argv[1][1]);
483e12c5d1SDavid du Colombier argc--;
493e12c5d1SDavid du Colombier argv++;
503e12c5d1SDavid du Colombier continue;
513e12c5d1SDavid du Colombier }
523e12c5d1SDavid du Colombier f = open(argv[1], 0);
532a91472eSDavid du Colombier if(f < 0)
542a91472eSDavid du Colombier sysfatal("cannot open %s", argv[1]);
553e12c5d1SDavid du Colombier break;
563e12c5d1SDavid du Colombier }
572a91472eSDavid du Colombier if(argc > 2)
582a91472eSDavid du Colombier sysfatal("unexpected argument %s", argv[2]);
593e12c5d1SDavid du Colombier Binit(&fin, f, OREAD);
603e12c5d1SDavid du Colombier Binit(&fout, 1, OWRITE);
613e12c5d1SDavid du Colombier
623e12c5d1SDavid du Colombier if(gline(b1))
633e12c5d1SDavid du Colombier exits(0);
643e12c5d1SDavid du Colombier for(;;) {
653e12c5d1SDavid du Colombier linec++;
663e12c5d1SDavid du Colombier if(gline(b2)) {
673e12c5d1SDavid du Colombier pline(b1);
683e12c5d1SDavid du Colombier exits(0);
693e12c5d1SDavid du Colombier }
703e12c5d1SDavid du Colombier if(!equal(b1, b2)) {
713e12c5d1SDavid du Colombier pline(b1);
723e12c5d1SDavid du Colombier linec = 0;
733e12c5d1SDavid du Colombier do {
743e12c5d1SDavid du Colombier linec++;
753e12c5d1SDavid du Colombier if(gline(b1)) {
763e12c5d1SDavid du Colombier pline(b2);
773e12c5d1SDavid du Colombier exits(0);
783e12c5d1SDavid du Colombier }
793e12c5d1SDavid du Colombier } while(equal(b2, b1));
803e12c5d1SDavid du Colombier pline(b2);
813e12c5d1SDavid du Colombier linec = 0;
823e12c5d1SDavid du Colombier }
833e12c5d1SDavid du Colombier }
843e12c5d1SDavid du Colombier }
853e12c5d1SDavid du Colombier
863e12c5d1SDavid du Colombier int
gline(char * buf)873e12c5d1SDavid du Colombier gline(char *buf)
883e12c5d1SDavid du Colombier {
89*666b04c8SDavid du Colombier int len;
903e12c5d1SDavid du Colombier char *p;
913e12c5d1SDavid du Colombier
923e12c5d1SDavid du Colombier p = Brdline(&fin, '\n');
933e12c5d1SDavid du Colombier if(p == 0)
943e12c5d1SDavid du Colombier return 1;
95*666b04c8SDavid du Colombier len = Blinelen(&fin);
96*666b04c8SDavid du Colombier if(len >= bsize-1)
972a91472eSDavid du Colombier sysfatal("line too long");
98*666b04c8SDavid du Colombier memmove(buf, p, len);
99*666b04c8SDavid du Colombier buf[len-1] = 0;
1003e12c5d1SDavid du Colombier return 0;
1013e12c5d1SDavid du Colombier }
1023e12c5d1SDavid du Colombier
1033e12c5d1SDavid du Colombier void
pline(char * buf)1043e12c5d1SDavid du Colombier pline(char *buf)
1053e12c5d1SDavid du Colombier {
1063e12c5d1SDavid du Colombier switch(mode) {
1073e12c5d1SDavid du Colombier
1083e12c5d1SDavid du Colombier case 'u':
1093e12c5d1SDavid du Colombier if(uniq) {
1103e12c5d1SDavid du Colombier uniq = 0;
1113e12c5d1SDavid du Colombier return;
1123e12c5d1SDavid du Colombier }
1133e12c5d1SDavid du Colombier break;
1143e12c5d1SDavid du Colombier
1153e12c5d1SDavid du Colombier case 'd':
1163e12c5d1SDavid du Colombier if(uniq)
1173e12c5d1SDavid du Colombier break;
1183e12c5d1SDavid du Colombier return;
1193e12c5d1SDavid du Colombier
1203e12c5d1SDavid du Colombier case 'c':
1213e12c5d1SDavid du Colombier Bprint(&fout, "%4d ", linec);
1223e12c5d1SDavid du Colombier }
1233e12c5d1SDavid du Colombier uniq = 0;
1243e12c5d1SDavid du Colombier Bprint(&fout, "%s\n", buf);
1253e12c5d1SDavid du Colombier }
1263e12c5d1SDavid du Colombier
1273e12c5d1SDavid du Colombier int
equal(char * b1,char * b2)1283e12c5d1SDavid du Colombier equal(char *b1, char *b2)
1293e12c5d1SDavid du Colombier {
1303e12c5d1SDavid du Colombier char c;
1313e12c5d1SDavid du Colombier
1323e12c5d1SDavid du Colombier if(fields || letters) {
1333e12c5d1SDavid du Colombier b1 = skip(b1);
1343e12c5d1SDavid du Colombier b2 = skip(b2);
1353e12c5d1SDavid du Colombier }
1363e12c5d1SDavid du Colombier for(;;) {
1373e12c5d1SDavid du Colombier c = *b1++;
1383e12c5d1SDavid du Colombier if(c != *b2++) {
1393e12c5d1SDavid du Colombier if(c == 0 && mode == 's')
1403e12c5d1SDavid du Colombier return 1;
1413e12c5d1SDavid du Colombier return 0;
1423e12c5d1SDavid du Colombier }
1433e12c5d1SDavid du Colombier if(c == 0) {
1443e12c5d1SDavid du Colombier uniq++;
1453e12c5d1SDavid du Colombier return 1;
1463e12c5d1SDavid du Colombier }
1473e12c5d1SDavid du Colombier }
1483e12c5d1SDavid du Colombier }
1493e12c5d1SDavid du Colombier
1503e12c5d1SDavid du Colombier char*
skip(char * s)1513e12c5d1SDavid du Colombier skip(char *s)
1523e12c5d1SDavid du Colombier {
1533e12c5d1SDavid du Colombier int nf, nl;
1543e12c5d1SDavid du Colombier
1553e12c5d1SDavid du Colombier nf = nl = 0;
1563e12c5d1SDavid du Colombier while(nf++ < fields) {
1573e12c5d1SDavid du Colombier while(*s == ' ' || *s == '\t')
1583e12c5d1SDavid du Colombier s++;
1593e12c5d1SDavid du Colombier while(!(*s == ' ' || *s == '\t' || *s == 0) )
1603e12c5d1SDavid du Colombier s++;
1613e12c5d1SDavid du Colombier }
1623e12c5d1SDavid du Colombier while(nl++ < letters && *s != 0)
1633e12c5d1SDavid du Colombier s++;
1643e12c5d1SDavid du Colombier return s;
1653e12c5d1SDavid du Colombier }
166