1 /* join F1 F2 on stuff */ 2 #include <u.h> 3 #include <libc.h> 4 #include <stdio.h> 5 #include <ctype.h> 6 #define F1 0 7 #define F2 1 8 #define F0 3 9 #define NFLD 100 /* max field per line */ 10 #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) 11 FILE *f[2]; 12 Rune buf[2][BUFSIZ]; /*input lines */ 13 Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ 14 Rune *s1,*s2; 15 int j1 = 1; /* join of this field of file 1 */ 16 int j2 = 1; /* join of this field of file 2 */ 17 int olist[2*NFLD]; /* output these fields */ 18 int olistf[2*NFLD]; /* from these files */ 19 int no; /* number of entries in olist */ 20 Rune sep1 = ' '; /* default field separator */ 21 Rune sep2 = '\t'; 22 char *sepstr=" "; 23 int discard; /* count of truncated lines */ 24 Rune null[BUFSIZ] = L""; 25 int a1; 26 int a2; 27 28 char *getoptarg(int*, char***); 29 void output(int, int); 30 int input(int); 31 void oparse(char*); 32 void error(char*, char*); 33 void seek1(void), seek2(void); 34 Rune *strtorune(Rune *, char *); 35 36 37 void 38 main(int argc, char **argv) 39 { 40 int i; 41 42 while (argc > 1 && argv[1][0] == '-') { 43 if (argv[1][1] == '\0') 44 break; 45 switch (argv[1][1]) { 46 case '-': 47 argc--; 48 argv++; 49 goto proceed; 50 case 'a': 51 switch(*getoptarg(&argc, &argv)) { 52 case '1': 53 a1++; 54 break; 55 case '2': 56 a2++; 57 break; 58 default: 59 error("incomplete option -a",""); 60 } 61 break; 62 case 'e': 63 strtorune(null, getoptarg(&argc, &argv)); 64 break; 65 case 't': 66 sepstr=getoptarg(&argc, &argv); 67 chartorune(&sep1, sepstr); 68 sep2 = sep1; 69 break; 70 case 'o': 71 if(argv[1][2]!=0 || 72 argc>2 && strchr(argv[2],',')!=0) 73 oparse(getoptarg(&argc, &argv)); 74 else for (no = 0; no<2*NFLD && argc>2; no++){ 75 if (argv[2][0] == '1' && argv[2][1] == '.') { 76 olistf[no] = F1; 77 olist[no] = atoi(&argv[2][2]); 78 } else if (argv[2][0] == '2' && argv[2][1] == '.') { 79 olist[no] = atoi(&argv[2][2]); 80 olistf[no] = F2; 81 } else if (argv[2][0] == '0') 82 olistf[no] = F0; 83 else 84 break; 85 argc--; 86 argv++; 87 } 88 break; 89 case 'j': 90 if(argc <= 2) 91 break; 92 if (argv[1][2] == '1') 93 j1 = atoi(argv[2]); 94 else if (argv[1][2] == '2') 95 j2 = atoi(argv[2]); 96 else 97 j1 = j2 = atoi(argv[2]); 98 argc--; 99 argv++; 100 break; 101 case '1': 102 j1 = atoi(getoptarg(&argc, &argv)); 103 break; 104 case '2': 105 j2 = atoi(getoptarg(&argc, &argv)); 106 break; 107 } 108 argc--; 109 argv++; 110 } 111 proceed: 112 for (i = 0; i < no; i++) 113 if (olist[i]-- > NFLD) /* 0 origin */ 114 error("field number too big in -o",""); 115 if (argc != 3) 116 error("usage: join [-1 x -2 y] [-o list] file1 file2",""); 117 if (j1 < 1 || j2 < 1) 118 error("invalid field indices", ""); 119 j1--; 120 j2--; /* everyone else believes in 0 origin */ 121 s1 = ppi[F1][j1]; 122 s2 = ppi[F2][j2]; 123 if (strcmp(argv[1], "-") == 0) 124 f[F1] = stdin; 125 else if ((f[F1] = fopen(argv[1], "r")) == 0) 126 error("can't open %s", argv[1]); 127 if(strcmp(argv[2], "-") == 0) { 128 f[F2] = stdin; 129 } else if ((f[F2] = fopen(argv[2], "r")) == 0) 130 error("can't open %s", argv[2]); 131 132 if(ftell(f[F2]) >= 0) 133 seek2(); 134 else if(ftell(f[F1]) >= 0) 135 seek1(); 136 else 137 error("neither file is randomly accessible",""); 138 if (discard) 139 error("some input line was truncated", ""); 140 exits(""); 141 } 142 int runecmp(Rune *a, Rune *b){ 143 while(*a==*b){ 144 if(*a=='\0') return 0; 145 a++; 146 b++; 147 } 148 if(*a<*b) return -1; 149 return 1; 150 } 151 char *runetostr(char *buf, Rune *r){ 152 char *s; 153 for(s=buf;*r;r++) s+=runetochar(s, r); 154 *s='\0'; 155 return buf; 156 } 157 Rune *strtorune(Rune *buf, char *s){ 158 Rune *r; 159 for(r=buf;*s;r++) s+=chartorune(r, s); 160 *r='\0'; 161 return buf; 162 } 163 /* lazy. there ought to be a clean way to combine seek1 & seek2 */ 164 #define get1() n1=input(F1) 165 #define get2() n2=input(F2) 166 void 167 seek2() 168 { 169 int n1, n2; 170 int top2=0; 171 int bot2 = ftell(f[F2]); 172 get1(); 173 get2(); 174 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 175 if(n1>0 && n2>0 && comp()>0 || n1==0) { 176 if(a2) output(0, n2); 177 bot2 = ftell(f[F2]); 178 get2(); 179 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 180 if(a1) output(n1, 0); 181 get1(); 182 } else /*(n1>0 && n2>0 && comp()==0)*/ { 183 while(n2>0 && comp()==0) { 184 output(n1, n2); 185 top2 = ftell(f[F2]); 186 get2(); 187 } 188 fseek(f[F2], bot2, 0); 189 get2(); 190 get1(); 191 for(;;) { 192 if(n1>0 && n2>0 && comp()==0) { 193 output(n1, n2); 194 get2(); 195 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 196 fseek(f[F2], bot2, 0); 197 get2(); 198 get1(); 199 } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ 200 fseek(f[F2], top2, 0); 201 bot2 = top2; 202 get2(); 203 break; 204 } 205 } 206 } 207 } 208 } 209 void 210 seek1() 211 { 212 int n1, n2; 213 int top1=0; 214 int bot1 = ftell(f[F1]); 215 get1(); 216 get2(); 217 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 218 if(n1>0 && n2>0 && comp()>0 || n1==0) { 219 if(a2) output(0, n2); 220 get2(); 221 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 222 if(a1) output(n1, 0); 223 bot1 = ftell(f[F1]); 224 get1(); 225 } else /*(n1>0 && n2>0 && comp()==0)*/ { 226 while(n2>0 && comp()==0) { 227 output(n1, n2); 228 top1 = ftell(f[F1]); 229 get1(); 230 } 231 fseek(f[F1], bot1, 0); 232 get2(); 233 get1(); 234 for(;;) { 235 if(n1>0 && n2>0 && comp()==0) { 236 output(n1, n2); 237 get1(); 238 } else if(n1>0 && n2>0 && comp()>0 || n1==0) { 239 fseek(f[F1], bot1, 0); 240 get2(); 241 get1(); 242 } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ 243 fseek(f[F1], top1, 0); 244 bot1 = top1; 245 get1(); 246 break; 247 } 248 } 249 } 250 } 251 } 252 253 int 254 input(int n) /* get input line and split into fields */ 255 { 256 register int i, c; 257 Rune *bp; 258 Rune **pp; 259 char line[BUFSIZ]; 260 261 bp = buf[n]; 262 pp = ppi[n]; 263 if (fgets(line, BUFSIZ, f[n]) == 0) 264 return(0); 265 strtorune(bp, line); 266 i = 0; 267 do { 268 i++; 269 if (sep1 == ' ') /* strip multiples */ 270 while ((c = *bp) == sep1 || c == sep2) 271 bp++; /* skip blanks */ 272 *pp++ = bp; /* record beginning */ 273 while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') 274 bp++; 275 *bp++ = '\0'; /* mark end by overwriting blank */ 276 } while (c != '\n' && c != '\0' && i < NFLD-1); 277 if (c != '\n') 278 discard++; 279 280 *pp = 0; 281 return(i); 282 } 283 284 void 285 output(int on1, int on2) /* print items from olist */ 286 { 287 int i; 288 Rune *temp; 289 char buf[BUFSIZ]; 290 291 if (no <= 0) { /* default case */ 292 printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); 293 for (i = 0; i < on1; i++) 294 if (i != j1) 295 printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); 296 for (i = 0; i < on2; i++) 297 if (i != j2) 298 printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); 299 printf("\n"); 300 } else { 301 for (i = 0; i < no; i++) { 302 if (olistf[i]==F0 && on1>j1) 303 temp = ppi[F1][j1]; 304 else if (olistf[i]==F0 && on2>j2) 305 temp = ppi[F2][j2]; 306 else { 307 temp = ppi[olistf[i]][olist[i]]; 308 if(olistf[i]==F1 && on1<=olist[i] || 309 olistf[i]==F2 && on2<=olist[i] || 310 *temp==0) 311 temp = null; 312 } 313 printf("%s", runetostr(buf, temp)); 314 if (i == no - 1) 315 printf("\n"); 316 else 317 printf("%s", sepstr); 318 } 319 } 320 } 321 322 void 323 error(char *s1, char *s2) 324 { 325 fprintf(stderr, "join: "); 326 fprintf(stderr, s1, s2); 327 fprintf(stderr, "\n"); 328 exits(s1); 329 } 330 331 char * 332 getoptarg(int *argcp, char ***argvp) 333 { 334 int argc = *argcp; 335 char **argv = *argvp; 336 if(argv[1][2] != 0) 337 return &argv[1][2]; 338 if(argc<=2 || argv[2][0]=='-') 339 error("incomplete option %s", argv[1]); 340 *argcp = argc-1; 341 *argvp = ++argv; 342 return argv[1]; 343 } 344 345 void 346 oparse(char *s) 347 { 348 for (no = 0; no<2*NFLD && *s; no++, s++) { 349 switch(*s) { 350 case 0: 351 return; 352 case '0': 353 olistf[no] = F0; 354 break; 355 case '1': 356 case '2': 357 if(s[1] == '.' && isdigit(s[2])) { 358 olistf[no] = *s=='1'? F1: F2; 359 olist[no] = atoi(s += 2); 360 break; 361 } /* fall thru */ 362 default: 363 error("invalid -o list", ""); 364 } 365 if(s[1] == ',') 366 s++; 367 } 368 } 369