1 /*
2 * Deal with duplicated lines in a file
3 */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <ctype.h>
8
9 #define SIZE 8000
10
11 int fields = 0;
12 int letters = 0;
13 int linec = 0;
14 char mode;
15 int uniq;
16 char *b1, *b2;
17 long bsize;
18 Biobuf fin;
19 Biobuf fout;
20
21 int gline(char *buf);
22 void pline(char *buf);
23 int equal(char *b1, char *b2);
24 char* skip(char *s);
25
26 void
main(int argc,char * argv[])27 main(int argc, char *argv[])
28 {
29 int f;
30
31 argv0 = argv[0];
32 bsize = SIZE;
33 b1 = malloc(bsize);
34 b2 = malloc(bsize);
35 f = 0;
36 while(argc > 1) {
37 if(*argv[1] == '-') {
38 if(isdigit(argv[1][1]))
39 fields = atoi(&argv[1][1]);
40 else
41 mode = argv[1][1];
42 argc--;
43 argv++;
44 continue;
45 }
46 if(*argv[1] == '+') {
47 letters = atoi(&argv[1][1]);
48 argc--;
49 argv++;
50 continue;
51 }
52 f = open(argv[1], 0);
53 if(f < 0)
54 sysfatal("cannot open %s", argv[1]);
55 break;
56 }
57 if(argc > 2)
58 sysfatal("unexpected argument %s", argv[2]);
59 Binit(&fin, f, OREAD);
60 Binit(&fout, 1, OWRITE);
61
62 if(gline(b1))
63 exits(0);
64 for(;;) {
65 linec++;
66 if(gline(b2)) {
67 pline(b1);
68 exits(0);
69 }
70 if(!equal(b1, b2)) {
71 pline(b1);
72 linec = 0;
73 do {
74 linec++;
75 if(gline(b1)) {
76 pline(b2);
77 exits(0);
78 }
79 } while(equal(b2, b1));
80 pline(b2);
81 linec = 0;
82 }
83 }
84 }
85
86 int
gline(char * buf)87 gline(char *buf)
88 {
89 int len;
90 char *p;
91
92 p = Brdline(&fin, '\n');
93 if(p == 0)
94 return 1;
95 len = Blinelen(&fin);
96 if(len >= bsize-1)
97 sysfatal("line too long");
98 memmove(buf, p, len);
99 buf[len-1] = 0;
100 return 0;
101 }
102
103 void
pline(char * buf)104 pline(char *buf)
105 {
106 switch(mode) {
107
108 case 'u':
109 if(uniq) {
110 uniq = 0;
111 return;
112 }
113 break;
114
115 case 'd':
116 if(uniq)
117 break;
118 return;
119
120 case 'c':
121 Bprint(&fout, "%4d ", linec);
122 }
123 uniq = 0;
124 Bprint(&fout, "%s\n", buf);
125 }
126
127 int
equal(char * b1,char * b2)128 equal(char *b1, char *b2)
129 {
130 char c;
131
132 if(fields || letters) {
133 b1 = skip(b1);
134 b2 = skip(b2);
135 }
136 for(;;) {
137 c = *b1++;
138 if(c != *b2++) {
139 if(c == 0 && mode == 's')
140 return 1;
141 return 0;
142 }
143 if(c == 0) {
144 uniq++;
145 return 1;
146 }
147 }
148 }
149
150 char*
skip(char * s)151 skip(char *s)
152 {
153 int nf, nl;
154
155 nf = nl = 0;
156 while(nf++ < fields) {
157 while(*s == ' ' || *s == '\t')
158 s++;
159 while(!(*s == ' ' || *s == '\t' || *s == 0) )
160 s++;
161 }
162 while(nl++ < letters && *s != 0)
163 s++;
164 return s;
165 }
166