1 /* join F1 F2 on stuff */ 2 #include <u.h> 3 #include <libc.h> 4 #include <stdio.h> 5 #include <ctype.h> 6 #define F1 0 7 #define F2 1 8 #define F0 3 9 #define NFLD 100 /* max field per line */ 10 #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) 11 FILE *f[2]; 12 Rune buf[2][BUFSIZ]; /*input lines */ 13 Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ 14 Rune *s1,*s2; 15 int j1 = 1; /* join of this field of file 1 */ 16 int j2 = 1; /* join of this field of file 2 */ 17 int olist[2*NFLD]; /* output these fields */ 18 int olistf[2*NFLD]; /* from these files */ 19 int no; /* number of entries in olist */ 20 Rune sep1 = ' '; /* default field separator */ 21 Rune sep2 = '\t'; 22 char *sepstr=" "; 23 int discard; /* count of truncated lines */ 24 Rune null[BUFSIZ] = L""; 25 int a1; 26 int a2; 27 28 char *getoptarg(int*, char***); 29 void output(int, int); 30 int input(int); 31 void oparse(char*); 32 void error(char*, char*); 33 void seek1(void), seek2(void); 34 Rune *strtorune(Rune *, char *); 35 36 37 void 38 main(int argc, char **argv) 39 { 40 int i; 41 42 while (argc > 1 && argv[1][0] == '-') { 43 if (argv[1][1] == '\0') 44 break; 45 switch (argv[1][1]) { 46 case '-': 47 argc--; 48 argv++; 49 goto proceed; 50 case 'a': 51 switch(*getoptarg(&argc, &argv)) { 52 case '1': 53 a1++; 54 break; 55 case '2': 56 a2++; 57 break; 58 default: 59 error("incomplete option -a",""); 60 } 61 break; 62 case 'e': 63 strtorune(null, getoptarg(&argc, &argv)); 64 break; 65 case 't': 66 sepstr=getoptarg(&argc, &argv); 67 chartorune(&sep1, sepstr); 68 sep2 = sep1; 69 break; 70 case 'o': 71 if(argv[1][2]!=0 || 72 argc>2 && strchr(argv[2],',')!=0) 73 oparse(getoptarg(&argc, &argv)); 74 else for (no = 0; no<2*NFLD && argc>2; no++){ 75 if (argv[2][0] == '1' && argv[2][1] == '.') { 76 olistf[no] = F1; 77 olist[no] = atoi(&argv[2][2]); 78 } else if (argv[2][0] == '2' && argv[2][1] == '.') { 79 olist[no] = atoi(&argv[2][2]); 80 olistf[no] = F2; 81 } else if (argv[2][0] == '0') 82 olistf[no] = F0; 83 else 84 break; 85 argc--; 86 argv++; 87 } 88 break; 89 case 'j': 90 if(argc <= 2) 91 break; 92 if (argv[1][2] == '1') 93 j1 = atoi(argv[2]); 94 else if (argv[1][2] == '2') 95 j2 = atoi(argv[2]); 96 else 97 j1 = j2 = atoi(argv[2]); 98 argc--; 99 argv++; 100 break; 101 case '1': 102 j1 = atoi(getoptarg(&argc, &argv)); 103 break; 104 case '2': 105 j2 = atoi(getoptarg(&argc, &argv)); 106 break; 107 } 108 argc--; 109 argv++; 110 } 111 proceed: 112 for (i = 0; i < no; i++) 113 if (olist[i]-- > NFLD) /* 0 origin */ 114 error("field number too big in -o",""); 115 if (argc != 3) 116 error("usage: join [-1 x -2 y] [-o list] file1 file2",""); 117 j1--; 118 j2--; /* everyone else believes in 0 origin */ 119 s1 = ppi[F1][j1]; 120 s2 = ppi[F2][j2]; 121 if (strcmp(argv[1], "-") == 0) 122 f[F1] = stdin; 123 else if ((f[F1] = fopen(argv[1], "r")) == 0) 124 error("can't open %s", argv[1]); 125 if(strcmp(argv[2], "-") == 0) { 126 f[F2] = stdin; 127 } else if ((f[F2] = fopen(argv[2], "r")) == 0) 128 error("can't open %s", argv[2]); 129 130 if(ftell(f[F2]) >= 0) 131 seek2(); 132 else if(ftell(f[F1]) >= 0) 133 seek1(); 134 else 135 error("neither file is randomly accessible",""); 136 if (discard) 137 error("some input line was truncated", ""); 138 exits(""); 139 } 140 int runecmp(Rune *a, Rune *b){ 141 while(*a==*b){ 142 if(*a=='\0') return 0; 143 a++; 144 b++; 145 } 146 if(*a<*b) return -1; 147 return 1; 148 } 149 char *runetostr(char *buf, Rune *r){ 150 char *s; 151 for(s=buf;*r;r++) s+=runetochar(s, r); 152 *s='\0'; 153 return buf; 154 } 155 Rune *strtorune(Rune *buf, char *s){ 156 Rune *r; 157 for(r=buf;*s;r++) s+=chartorune(r, s); 158 *r='\0'; 159 return buf; 160 } 161 /* lazy. there ought to be a clean way to combine seek1 & seek2 */ 162 #define get1() n1=input(F1) 163 #define get2() n2=input(F2) 164 void 165 seek2() 166 { 167 int n1, n2; 168 int top2=0; 169 int bot2 = ftell(f[F2]); 170 get1(); 171 get2(); 172 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 173 if(n1>0 && n2>0 && comp()>0 || n1==0) { 174 if(a2) output(0, n2); 175 bot2 = ftell(f[F2]); 176 get2(); 177 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 178 if(a1) output(n1, 0); 179 get1(); 180 } else /*(n1>0 && n2>0 && comp()==0)*/ { 181 while(n2>0 && comp()==0) { 182 output(n1, n2); 183 top2 = ftell(f[F2]); 184 get2(); 185 } 186 fseek(f[F2], bot2, 0); 187 get2(); 188 get1(); 189 for(;;) { 190 if(n1>0 && n2>0 && comp()==0) { 191 output(n1, n2); 192 get2(); 193 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 194 fseek(f[F2], bot2, 0); 195 get2(); 196 get1(); 197 } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ 198 fseek(f[F2], top2, 0); 199 bot2 = top2; 200 get2(); 201 break; 202 } 203 } 204 } 205 } 206 } 207 void 208 seek1() 209 { 210 int n1, n2; 211 int top1=0; 212 int bot1 = ftell(f[F1]); 213 get1(); 214 get2(); 215 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 216 if(n1>0 && n2>0 && comp()>0 || n1==0) { 217 if(a2) output(0, n2); 218 get2(); 219 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 220 if(a1) output(n1, 0); 221 bot1 = ftell(f[F1]); 222 get1(); 223 } else /*(n1>0 && n2>0 && comp()==0)*/ { 224 while(n2>0 && comp()==0) { 225 output(n1, n2); 226 top1 = ftell(f[F1]); 227 get1(); 228 } 229 fseek(f[F1], bot1, 0); 230 get2(); 231 get1(); 232 for(;;) { 233 if(n1>0 && n2>0 && comp()==0) { 234 output(n1, n2); 235 get1(); 236 } else if(n1>0 && n2>0 && comp()>0 || n1==0) { 237 fseek(f[F1], bot1, 0); 238 get2(); 239 get1(); 240 } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ 241 fseek(f[F1], top1, 0); 242 bot1 = top1; 243 get1(); 244 break; 245 } 246 } 247 } 248 } 249 } 250 251 int 252 input(int n) /* get input line and split into fields */ 253 { 254 register int i, c; 255 Rune *bp; 256 Rune **pp; 257 char line[BUFSIZ]; 258 259 bp = buf[n]; 260 pp = ppi[n]; 261 if (fgets(line, BUFSIZ, f[n]) == 0) 262 return(0); 263 strtorune(bp, line); 264 i = 0; 265 do { 266 i++; 267 if (sep1 == ' ') /* strip multiples */ 268 while ((c = *bp) == sep1 || c == sep2) 269 bp++; /* skip blanks */ 270 *pp++ = bp; /* record beginning */ 271 while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') 272 bp++; 273 *bp++ = '\0'; /* mark end by overwriting blank */ 274 } while (c != '\n' && c != '\0' && i < NFLD-1); 275 if (c != '\n') 276 discard++; 277 278 *pp = 0; 279 return(i); 280 } 281 282 void 283 output(int on1, int on2) /* print items from olist */ 284 { 285 int i; 286 Rune *temp; 287 char buf[BUFSIZ]; 288 289 if (no <= 0) { /* default case */ 290 printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); 291 for (i = 0; i < on1; i++) 292 if (i != j1) 293 printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); 294 for (i = 0; i < on2; i++) 295 if (i != j2) 296 printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); 297 printf("\n"); 298 } else { 299 for (i = 0; i < no; i++) { 300 if (olistf[i]==F0 && on1>j1) 301 temp = ppi[F1][j1]; 302 else if (olistf[i]==F0 && on2>j2) 303 temp = ppi[F2][j2]; 304 else { 305 temp = ppi[olistf[i]][olist[i]]; 306 if(olistf[i]==F1 && on1<=olist[i] || 307 olistf[i]==F2 && on2<=olist[i] || 308 *temp==0) 309 temp = null; 310 } 311 printf("%s", runetostr(buf, temp)); 312 if (i == no - 1) 313 printf("\n"); 314 else 315 printf("%s", sepstr); 316 } 317 } 318 } 319 320 void 321 error(char *s1, char *s2) 322 { 323 fprintf(stderr, "join: "); 324 fprintf(stderr, s1, s2); 325 fprintf(stderr, "\n"); 326 exits(s1); 327 } 328 329 char * 330 getoptarg(int *argcp, char ***argvp) 331 { 332 int argc = *argcp; 333 char **argv = *argvp; 334 if(argv[1][2] != 0) 335 return &argv[1][2]; 336 if(argc<=2 || argv[2][0]=='-') 337 error("incomplete option %s", argv[1]); 338 *argcp = argc-1; 339 *argvp = ++argv; 340 return argv[1]; 341 } 342 343 void 344 oparse(char *s) 345 { 346 for (no = 0; no<2*NFLD && *s; no++, s++) { 347 switch(*s) { 348 case 0: 349 return; 350 case '0': 351 olistf[no] = F0; 352 break; 353 case '1': 354 case '2': 355 if(s[1] == '.' && isdigit(s[2])) { 356 olistf[no] = *s=='1'? F1: F2; 357 olist[no] = atoi(s += 2); 358 break; 359 } /* fall thru */ 360 default: 361 error("invalid -o list", ""); 362 } 363 if(s[1] == ',') 364 s++; 365 } 366 } 367