1 /* 2 * Deal with duplicated lines in a file 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <bio.h> 7 #include <ctype.h> 8 9 #define SIZE 8000 10 11 int fields = 0; 12 int letters = 0; 13 int linec = 0; 14 char mode; 15 int uniq; 16 char *b1, *b2; 17 long bsize; 18 Biobuf fin; 19 Biobuf fout; 20 21 int gline(char *buf); 22 void pline(char *buf); 23 int equal(char *b1, char *b2); 24 char* skip(char *s); 25 26 void 27 main(int argc, char *argv[]) 28 { 29 int f; 30 31 argv0 = argv[0]; 32 bsize = SIZE; 33 b1 = malloc(bsize); 34 b2 = malloc(bsize); 35 f = 0; 36 while(argc > 1) { 37 if(*argv[1] == '-') { 38 if(isdigit(argv[1][1])) 39 fields = atoi(&argv[1][1]); 40 else 41 mode = argv[1][1]; 42 argc--; 43 argv++; 44 continue; 45 } 46 if(*argv[1] == '+') { 47 letters = atoi(&argv[1][1]); 48 argc--; 49 argv++; 50 continue; 51 } 52 f = open(argv[1], 0); 53 if(f < 0) 54 sysfatal("cannot open %s", argv[1]); 55 break; 56 } 57 if(argc > 2) 58 sysfatal("unexpected argument %s", argv[2]); 59 Binit(&fin, f, OREAD); 60 Binit(&fout, 1, OWRITE); 61 62 if(gline(b1)) 63 exits(0); 64 for(;;) { 65 linec++; 66 if(gline(b2)) { 67 pline(b1); 68 exits(0); 69 } 70 if(!equal(b1, b2)) { 71 pline(b1); 72 linec = 0; 73 do { 74 linec++; 75 if(gline(b1)) { 76 pline(b2); 77 exits(0); 78 } 79 } while(equal(b2, b1)); 80 pline(b2); 81 linec = 0; 82 } 83 } 84 } 85 86 int 87 gline(char *buf) 88 { 89 int len; 90 char *p; 91 92 p = Brdline(&fin, '\n'); 93 if(p == 0) 94 return 1; 95 len = Blinelen(&fin); 96 if(len >= bsize-1) 97 sysfatal("line too long"); 98 memmove(buf, p, len); 99 buf[len-1] = 0; 100 return 0; 101 } 102 103 void 104 pline(char *buf) 105 { 106 switch(mode) { 107 108 case 'u': 109 if(uniq) { 110 uniq = 0; 111 return; 112 } 113 break; 114 115 case 'd': 116 if(uniq) 117 break; 118 return; 119 120 case 'c': 121 Bprint(&fout, "%4d ", linec); 122 } 123 uniq = 0; 124 Bprint(&fout, "%s\n", buf); 125 } 126 127 int 128 equal(char *b1, char *b2) 129 { 130 char c; 131 132 if(fields || letters) { 133 b1 = skip(b1); 134 b2 = skip(b2); 135 } 136 for(;;) { 137 c = *b1++; 138 if(c != *b2++) { 139 if(c == 0 && mode == 's') 140 return 1; 141 return 0; 142 } 143 if(c == 0) { 144 uniq++; 145 return 1; 146 } 147 } 148 } 149 150 char* 151 skip(char *s) 152 { 153 int nf, nl; 154 155 nf = nl = 0; 156 while(nf++ < fields) { 157 while(*s == ' ' || *s == '\t') 158 s++; 159 while(!(*s == ' ' || *s == '\t' || *s == 0) ) 160 s++; 161 } 162 while(nl++ < letters && *s != 0) 163 s++; 164 return s; 165 } 166