1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1992-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * * 20*4887Schin ***********************************************************************/ 21*4887Schin #pragma prototyped 22*4887Schin /* 23*4887Schin * David Korn 24*4887Schin * Glenn Fowler 25*4887Schin * AT&T Research 26*4887Schin * 27*4887Schin * join 28*4887Schin */ 29*4887Schin 30*4887Schin static const char usage[] = 31*4887Schin "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]" 32*4887Schin USAGE_LICENSE 33*4887Schin "[+NAME?join - relational database operator]" 34*4887Schin "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 35*4887Schin "and \afile2\a and writes the resulting joined files to standard " 36*4887Schin "output. By default, a field is delimited by one or more spaces " 37*4887Schin "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 38*4887Schin "can be used to change the field delimiter.]" 39*4887Schin "[+?The \ajoin field\a is a field in each file on which files are compared. " 40*4887Schin "By default \bjoin\b writes one line in the output for each pair " 41*4887Schin "of lines in \afiles1\a and \afiles2\a that have identical join " 42*4887Schin "fields. The default output line consists of the join field, " 43*4887Schin "then the remaining fields from \afile1\a, then the remaining " 44*4887Schin "fields from \afile2\a, but this can be changed with the \b-o\b " 45*4887Schin "option. The \b-a\b option can be used to add unmatched lines " 46*4887Schin "to the output. The \b-v\b option can be used to output only " 47*4887Schin "unmatched lines.]" 48*4887Schin "[+?The files \afile1\a and \afile2\a must be ordered in the collating " 49*4887Schin "sequence of \bsort -b\b on the fields on which they are to be " 50*4887Schin "joined otherwise the results are unspecified.]" 51*4887Schin "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 52*4887Schin "uses standard input starting at the current location.]" 53*4887Schin 54*4887Schin "[e:empty]:[string?Replace empty output fields in the list selected with" 55*4887Schin " \b-o\b with \astring\a.]" 56*4887Schin "[o:output]:[list?Construct the output line to comprise the fields specified " 57*4887Schin "in a blank or comma separated list \alist\a. Each element in " 58*4887Schin "\alist\a consists of a file number (either 1 or 2), a period, " 59*4887Schin "and a field number or \b0\b representing the join field. " 60*4887Schin "As an obsolete feature multiple occurrences of \b-o\b can " 61*4887Schin "be specified.]" 62*4887Schin "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 63*4887Schin " and output.]" 64*4887Schin "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 65*4887Schin "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 66*4887Schin "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 67*4887Schin "[a:unpairable]#[fileno?Write a line for each unpairable line in file" 68*4887Schin " \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 69*4887Schin " normal output. If \b-a\b options appear for both 1 and 2, then " 70*4887Schin "all unpairable lines will be output.]" 71*4887Schin "[v:suppress]#[fileno?Write a line for each unpairable line in file" 72*4887Schin " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 73*4887Schin "output. If \b-v\b options appear for both 1 and 2, then " 74*4887Schin "all unpairable lines will be output.] ]" 75*4887Schin "[i:ignorecase?Ignore case in field comparisons.]" 76*4887Schin "[B!:mmap?Enable memory mapped reads instead of buffered.]" 77*4887Schin 78*4887Schin "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 79*4887Schin " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 80*4887Schin " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 81*4887Schin " equivalent to \b-2\b \afield\a.]" 82*4887Schin 83*4887Schin "\n" 84*4887Schin "\nfile1 file2\n" 85*4887Schin "\n" 86*4887Schin "[+EXIT STATUS?]{" 87*4887Schin "[+0?Both files processed successfully.]" 88*4887Schin "[+>0?An error occurred.]" 89*4887Schin "}" 90*4887Schin "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 91*4887Schin ; 92*4887Schin 93*4887Schin #include <cmd.h> 94*4887Schin #include <sfdisc.h> 95*4887Schin 96*4887Schin #define C_FILE1 001 97*4887Schin #define C_FILE2 002 98*4887Schin #define C_COMMON 004 99*4887Schin #define C_ALL (C_FILE1|C_FILE2|C_COMMON) 100*4887Schin 101*4887Schin #define NFIELD 10 102*4887Schin #define JOINFIELD 2 103*4887Schin 104*4887Schin #define S_DELIM 1 105*4887Schin #define S_SPACE 2 106*4887Schin #define S_NL 3 107*4887Schin 108*4887Schin typedef struct 109*4887Schin { 110*4887Schin Sfio_t* iop; 111*4887Schin char* name; 112*4887Schin char* recptr; 113*4887Schin int reclen; 114*4887Schin int field; 115*4887Schin int fieldlen; 116*4887Schin int nfields; 117*4887Schin int maxfields; 118*4887Schin int spaces; 119*4887Schin int hit; 120*4887Schin int discard; 121*4887Schin char** fieldlist; 122*4887Schin } File_t; 123*4887Schin 124*4887Schin typedef struct 125*4887Schin { 126*4887Schin unsigned char state[1<<CHAR_BIT]; 127*4887Schin Sfio_t* outfile; 128*4887Schin int* outlist; 129*4887Schin int outmode; 130*4887Schin int ooutmode; 131*4887Schin char* nullfield; 132*4887Schin int delim; 133*4887Schin int buffered; 134*4887Schin int ignorecase; 135*4887Schin char* same; 136*4887Schin int samesize; 137*4887Schin File_t file[2]; 138*4887Schin } Join_t; 139*4887Schin 140*4887Schin static void 141*4887Schin done(register Join_t* jp) 142*4887Schin { 143*4887Schin if (jp->file[0].iop && jp->file[0].iop != sfstdin) 144*4887Schin sfclose(jp->file[0].iop); 145*4887Schin if (jp->file[1].iop && jp->file[1].iop != sfstdin) 146*4887Schin sfclose(jp->file[1].iop); 147*4887Schin if (jp->outlist) 148*4887Schin free(jp->outlist); 149*4887Schin if (jp->file[0].fieldlist) 150*4887Schin free(jp->file[0].fieldlist); 151*4887Schin if (jp->file[1].fieldlist) 152*4887Schin free(jp->file[1].fieldlist); 153*4887Schin if (jp->same) 154*4887Schin free(jp->same); 155*4887Schin free(jp); 156*4887Schin } 157*4887Schin 158*4887Schin static Join_t* 159*4887Schin init(void) 160*4887Schin { 161*4887Schin register Join_t* jp; 162*4887Schin 163*4887Schin if (jp = newof(0, Join_t, 1, 0)) 164*4887Schin { 165*4887Schin jp->state[' '] = jp->state['\t'] = S_SPACE; 166*4887Schin jp->delim = -1; 167*4887Schin jp->nullfield = 0; 168*4887Schin if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) || 169*4887Schin !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0))) 170*4887Schin { 171*4887Schin done(jp); 172*4887Schin return 0; 173*4887Schin } 174*4887Schin jp->file[0].maxfields = NFIELD; 175*4887Schin jp->file[1].maxfields = NFIELD; 176*4887Schin jp->outmode = C_COMMON; 177*4887Schin } 178*4887Schin return jp; 179*4887Schin } 180*4887Schin 181*4887Schin static int 182*4887Schin getolist(Join_t* jp, const char* first, char** arglist) 183*4887Schin { 184*4887Schin register const char* cp = first; 185*4887Schin char** argv = arglist; 186*4887Schin register int c; 187*4887Schin int* outptr; 188*4887Schin int* outmax; 189*4887Schin int nfield = NFIELD; 190*4887Schin char* str; 191*4887Schin 192*4887Schin outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 193*4887Schin outmax = outptr + NFIELD; 194*4887Schin while (c = *cp++) 195*4887Schin { 196*4887Schin if (c==' ' || c=='\t' || c==',') 197*4887Schin continue; 198*4887Schin str = (char*)--cp; 199*4887Schin if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 200*4887Schin { 201*4887Schin str++; 202*4887Schin c = JOINFIELD; 203*4887Schin goto skip; 204*4887Schin } 205*4887Schin if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 206*4887Schin { 207*4887Schin error(2,"%s: invalid field list",first); 208*4887Schin break; 209*4887Schin } 210*4887Schin c--; 211*4887Schin c <<=2; 212*4887Schin if (*cp=='2') 213*4887Schin c |=1; 214*4887Schin skip: 215*4887Schin if (outptr >= outmax) 216*4887Schin { 217*4887Schin jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 218*4887Schin outptr = jp->outlist + nfield; 219*4887Schin nfield *= 2; 220*4887Schin outmax = jp->outlist + nfield; 221*4887Schin } 222*4887Schin *outptr++ = c; 223*4887Schin cp = str; 224*4887Schin } 225*4887Schin /* need to accept obsolescent command syntax */ 226*4887Schin while (1) 227*4887Schin { 228*4887Schin if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2')) 229*4887Schin { 230*4887Schin if (*cp=='0' && cp[1]==0) 231*4887Schin { 232*4887Schin c = JOINFIELD; 233*4887Schin goto skip2; 234*4887Schin } 235*4887Schin break; 236*4887Schin } 237*4887Schin str = (char*)cp; 238*4887Schin c = strtol(cp+2, &str,10); 239*4887Schin if (*str || --c<0) 240*4887Schin break; 241*4887Schin argv++; 242*4887Schin c <<= 2; 243*4887Schin if (*cp=='2') 244*4887Schin c |=1; 245*4887Schin skip2: 246*4887Schin if (outptr >= outmax) 247*4887Schin { 248*4887Schin jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 249*4887Schin outptr = jp->outlist + nfield; 250*4887Schin nfield *= 2; 251*4887Schin outmax = jp->outlist + nfield; 252*4887Schin } 253*4887Schin *outptr++ = c; 254*4887Schin } 255*4887Schin *outptr = -1; 256*4887Schin return argv-arglist; 257*4887Schin } 258*4887Schin 259*4887Schin /* 260*4887Schin * read in a record from file <index> and split into fields 261*4887Schin */ 262*4887Schin static unsigned char* 263*4887Schin getrec(Join_t* jp, int index, int discard) 264*4887Schin { 265*4887Schin register unsigned char* sp = jp->state; 266*4887Schin register File_t* fp = &jp->file[index]; 267*4887Schin register char** ptr = fp->fieldlist; 268*4887Schin register char** ptrmax = ptr + fp->maxfields; 269*4887Schin register char* cp; 270*4887Schin register int n = 0; 271*4887Schin 272*4887Schin if (cmdquit()) 273*4887Schin return 0; 274*4887Schin if (discard && fp->discard) 275*4887Schin sfraise(fp->iop, SFSK_DISCARD, NiL); 276*4887Schin fp->spaces = 0; 277*4887Schin fp->hit = 0; 278*4887Schin if (!(cp = sfgetr(fp->iop, '\n', 0))) 279*4887Schin { 280*4887Schin jp->outmode &= ~(1<<index); 281*4887Schin return 0; 282*4887Schin } 283*4887Schin fp->recptr = cp; 284*4887Schin fp->reclen = sfvalue(fp->iop); 285*4887Schin if (jp->delim=='\n') /* handle new-line delimiter specially */ 286*4887Schin { 287*4887Schin *ptr++ = cp; 288*4887Schin cp += fp->reclen; 289*4887Schin } 290*4887Schin else while (n!=S_NL) /* separate into fields */ 291*4887Schin { 292*4887Schin if (ptr >= ptrmax) 293*4887Schin { 294*4887Schin n = 2*fp->maxfields; 295*4887Schin fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0); 296*4887Schin ptr = fp->fieldlist + fp->maxfields; 297*4887Schin fp->maxfields = n; 298*4887Schin ptrmax = fp->fieldlist+n; 299*4887Schin } 300*4887Schin *ptr++ = cp; 301*4887Schin if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE) 302*4887Schin { 303*4887Schin fp->spaces = 1; 304*4887Schin while (sp[*(unsigned char*)cp++]==S_SPACE); 305*4887Schin cp--; 306*4887Schin } 307*4887Schin while ((n=sp[*(unsigned char*)cp++])==0); 308*4887Schin } 309*4887Schin *ptr = cp; 310*4887Schin fp->nfields = ptr - fp->fieldlist; 311*4887Schin if ((n=fp->field) < fp->nfields) 312*4887Schin { 313*4887Schin cp = fp->fieldlist[n]; 314*4887Schin /* eliminate leading spaces */ 315*4887Schin if (fp->spaces) 316*4887Schin { 317*4887Schin while (sp[*(unsigned char*)cp++]==S_SPACE); 318*4887Schin cp--; 319*4887Schin } 320*4887Schin fp->fieldlen = (fp->fieldlist[n+1]-cp)-1; 321*4887Schin return (unsigned char*)cp; 322*4887Schin } 323*4887Schin fp->fieldlen = 0; 324*4887Schin return (unsigned char*)""; 325*4887Schin } 326*4887Schin 327*4887Schin #if DEBUG_TRACE 328*4887Schin static unsigned char* u1,u2,u3; 329*4887Schin #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 330*4887Schin #endif 331*4887Schin 332*4887Schin /* 333*4887Schin * print field <n> from file <index> 334*4887Schin */ 335*4887Schin static int 336*4887Schin outfield(Join_t* jp, int index, register int n, int last) 337*4887Schin { 338*4887Schin register File_t* fp = &jp->file[index]; 339*4887Schin register char* cp; 340*4887Schin register char* cpmax; 341*4887Schin register int size; 342*4887Schin register Sfio_t* iop = jp->outfile; 343*4887Schin 344*4887Schin if (n < fp->nfields) 345*4887Schin { 346*4887Schin cp = fp->fieldlist[n]; 347*4887Schin cpmax = fp->fieldlist[n+1]; 348*4887Schin } 349*4887Schin else 350*4887Schin cp = 0; 351*4887Schin if ((n=jp->delim)<=0) 352*4887Schin { 353*4887Schin if (fp->spaces) 354*4887Schin { 355*4887Schin /*eliminate leading spaces */ 356*4887Schin while (jp->state[*(unsigned char*)cp++]==S_SPACE); 357*4887Schin cp--; 358*4887Schin } 359*4887Schin n = ' '; 360*4887Schin } 361*4887Schin if (last) 362*4887Schin n = '\n'; 363*4887Schin if (cp) 364*4887Schin size = cpmax-cp; 365*4887Schin else 366*4887Schin size = 0; 367*4887Schin if (size==0) 368*4887Schin { 369*4887Schin if (!jp->nullfield) 370*4887Schin sfputc(iop,n); 371*4887Schin else if (sfputr(iop,jp->nullfield,n) < 0) 372*4887Schin return -1; 373*4887Schin } 374*4887Schin else 375*4887Schin { 376*4887Schin last = cp[size-1]; 377*4887Schin cp[size-1] = n; 378*4887Schin if (sfwrite(iop,cp,size) < 0) 379*4887Schin return -1; 380*4887Schin cp[size-1] = last; 381*4887Schin } 382*4887Schin return 0; 383*4887Schin } 384*4887Schin 385*4887Schin #if DEBUG_TRACE 386*4887Schin static int i1,i2,i3; 387*4887Schin #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 388*4887Schin #endif 389*4887Schin 390*4887Schin static int 391*4887Schin outrec(register Join_t* jp, int mode) 392*4887Schin { 393*4887Schin register File_t* fp; 394*4887Schin register int i; 395*4887Schin register int j; 396*4887Schin register int k; 397*4887Schin register int n; 398*4887Schin int* out; 399*4887Schin 400*4887Schin if (mode < 0 && jp->file[0].hit++) 401*4887Schin return 0; 402*4887Schin if (mode > 0 && jp->file[1].hit++) 403*4887Schin return 0; 404*4887Schin if (out = jp->outlist) 405*4887Schin { 406*4887Schin while ((n = *out++) >= 0) 407*4887Schin { 408*4887Schin if (n == JOINFIELD) 409*4887Schin { 410*4887Schin i = mode >= 0; 411*4887Schin j = jp->file[i].field; 412*4887Schin } 413*4887Schin else 414*4887Schin { 415*4887Schin i = n & 1; 416*4887Schin j = (mode<0 && i || mode>0 && !i) ? 417*4887Schin jp->file[i].nfields : 418*4887Schin n >> 2; 419*4887Schin } 420*4887Schin if (outfield(jp, i, j, *out < 0) < 0) 421*4887Schin return -1; 422*4887Schin } 423*4887Schin return 0; 424*4887Schin } 425*4887Schin k = jp->file[0].nfields; 426*4887Schin if (mode >= 0) 427*4887Schin k += jp->file[1].nfields - 1; 428*4887Schin for (i=0; i<2; i++) 429*4887Schin { 430*4887Schin fp = &jp->file[i]; 431*4887Schin if (mode>0 && i==0) 432*4887Schin { 433*4887Schin k -= (fp->nfields - 1); 434*4887Schin continue; 435*4887Schin } 436*4887Schin n = fp->field; 437*4887Schin if (mode||i==0) 438*4887Schin { 439*4887Schin /* output join field first */ 440*4887Schin if (outfield(jp,i,n,!--k) < 0) 441*4887Schin return -1; 442*4887Schin if (!k) 443*4887Schin return 0; 444*4887Schin for (j=0; j<n; j++) 445*4887Schin { 446*4887Schin if (outfield(jp,i,j,!--k) < 0) 447*4887Schin return -1; 448*4887Schin if (!k) 449*4887Schin return 0; 450*4887Schin } 451*4887Schin j = n + 1; 452*4887Schin } 453*4887Schin else 454*4887Schin j = 0; 455*4887Schin for (;j<fp->nfields; j++) 456*4887Schin { 457*4887Schin if (j!=n && outfield(jp,i,j,!--k) < 0) 458*4887Schin return -1; 459*4887Schin if (!k) 460*4887Schin return 0; 461*4887Schin } 462*4887Schin } 463*4887Schin return 0; 464*4887Schin } 465*4887Schin 466*4887Schin #if DEBUG_TRACE 467*4887Schin #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 468*4887Schin #endif 469*4887Schin 470*4887Schin static int 471*4887Schin join(Join_t* jp) 472*4887Schin { 473*4887Schin register unsigned char* cp1; 474*4887Schin register unsigned char* cp2; 475*4887Schin register int n1; 476*4887Schin register int n2; 477*4887Schin register int n; 478*4887Schin register int cmp; 479*4887Schin register int same; 480*4887Schin int o2; 481*4887Schin Sfoff_t lo = -1; 482*4887Schin Sfoff_t hi = -1; 483*4887Schin 484*4887Schin if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 485*4887Schin { 486*4887Schin n1 = jp->file[0].fieldlen; 487*4887Schin n2 = jp->file[1].fieldlen; 488*4887Schin same = 0; 489*4887Schin for (;;) 490*4887Schin { 491*4887Schin n = n1 < n2 ? n1 : n2; 492*4887Schin #if DEBUG_TRACE 493*4887Schin if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 494*4887Schin cmp = n1 - n2; 495*4887Schin sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 496*4887Schin if (!cmp) 497*4887Schin #else 498*4887Schin if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 499*4887Schin #endif 500*4887Schin { 501*4887Schin if (!(jp->outmode & C_COMMON)) 502*4887Schin { 503*4887Schin if (cp1 = getrec(jp, 0, 1)) 504*4887Schin { 505*4887Schin n1 = jp->file[0].fieldlen; 506*4887Schin same = 1; 507*4887Schin continue; 508*4887Schin } 509*4887Schin if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 510*4887Schin break; 511*4887Schin if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 512*4887Schin { 513*4887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 514*4887Schin return -1; 515*4887Schin } 516*4887Schin } 517*4887Schin else if (outrec(jp, 0) < 0) 518*4887Schin return -1; 519*4887Schin else if (lo < 0 && (jp->outmode & C_COMMON)) 520*4887Schin { 521*4887Schin if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 522*4887Schin { 523*4887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 524*4887Schin return -1; 525*4887Schin } 526*4887Schin lo -= jp->file[1].reclen; 527*4887Schin } 528*4887Schin if (cp2 = getrec(jp, 1, lo < 0)) 529*4887Schin { 530*4887Schin n2 = jp->file[1].fieldlen; 531*4887Schin continue; 532*4887Schin } 533*4887Schin #if DEBUG_TRACE 534*4887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 535*4887Schin #endif 536*4887Schin } 537*4887Schin else if (cmp > 0) 538*4887Schin { 539*4887Schin if (same) 540*4887Schin { 541*4887Schin same = 0; 542*4887Schin next: 543*4887Schin if (n2 > jp->samesize) 544*4887Schin { 545*4887Schin jp->samesize = roundof(n2, 16); 546*4887Schin if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 547*4887Schin { 548*4887Schin error(ERROR_SYSTEM|2, "out of space"); 549*4887Schin return -1; 550*4887Schin } 551*4887Schin } 552*4887Schin memcpy(jp->same, cp2, o2 = n2); 553*4887Schin if (!(cp2 = getrec(jp, 1, 0))) 554*4887Schin break; 555*4887Schin n2 = jp->file[1].fieldlen; 556*4887Schin if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 557*4887Schin goto next; 558*4887Schin continue; 559*4887Schin } 560*4887Schin if (hi >= 0) 561*4887Schin { 562*4887Schin if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 563*4887Schin { 564*4887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 565*4887Schin return -1; 566*4887Schin } 567*4887Schin hi = -1; 568*4887Schin } 569*4887Schin else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 570*4887Schin return -1; 571*4887Schin lo = -1; 572*4887Schin if (cp2 = getrec(jp, 1, 1)) 573*4887Schin { 574*4887Schin n2 = jp->file[1].fieldlen; 575*4887Schin continue; 576*4887Schin } 577*4887Schin #if DEBUG_TRACE 578*4887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 579*4887Schin #endif 580*4887Schin } 581*4887Schin else if (same) 582*4887Schin { 583*4887Schin same = 0; 584*4887Schin if (!(cp1 = getrec(jp, 0, 0))) 585*4887Schin break; 586*4887Schin n1 = jp->file[0].fieldlen; 587*4887Schin continue; 588*4887Schin } 589*4887Schin if (lo >= 0) 590*4887Schin { 591*4887Schin if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 592*4887Schin (hi -= jp->file[1].reclen) < 0 || 593*4887Schin sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 594*4887Schin !(cp2 = getrec(jp, 1, 0))) 595*4887Schin { 596*4887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 597*4887Schin return -1; 598*4887Schin } 599*4887Schin n2 = jp->file[1].fieldlen; 600*4887Schin lo = -1; 601*4887Schin if (jp->file[1].discard) 602*4887Schin sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 603*4887Schin } 604*4887Schin else if (!cp2) 605*4887Schin break; 606*4887Schin else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 607*4887Schin return -1; 608*4887Schin if (!(cp1 = getrec(jp, 0, 1))) 609*4887Schin break; 610*4887Schin n1 = jp->file[0].fieldlen; 611*4887Schin } 612*4887Schin } 613*4887Schin #if DEBUG_TRACE 614*4887Schin sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 615*4887Schin #endif 616*4887Schin if (cp2) 617*4887Schin { 618*4887Schin if (hi >= 0 && 619*4887Schin sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 620*4887Schin sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 621*4887Schin { 622*4887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 623*4887Schin return -1; 624*4887Schin } 625*4887Schin #if DEBUG_TRACE 626*4887Schin sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 627*4887Schin #endif 628*4887Schin cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 629*4887Schin cmp = 1; 630*4887Schin n = 1; 631*4887Schin } 632*4887Schin else 633*4887Schin { 634*4887Schin cmp = -1; 635*4887Schin n = 0; 636*4887Schin } 637*4887Schin #if DEBUG_TRACE 638*4887Schin sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 639*4887Schin #endif 640*4887Schin if (!cp1 || !(jp->outmode & (1<<n))) 641*4887Schin { 642*4887Schin if (cp1 && jp->file[n].iop == sfstdin) 643*4887Schin sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 644*4887Schin return 0; 645*4887Schin } 646*4887Schin if (outrec(jp, cmp) < 0) 647*4887Schin return -1; 648*4887Schin do 649*4887Schin { 650*4887Schin if (!getrec(jp, n, 1)) 651*4887Schin return 0; 652*4887Schin } while (outrec(jp, cmp) >= 0); 653*4887Schin return -1; 654*4887Schin } 655*4887Schin 656*4887Schin int 657*4887Schin b_join(int argc, char** argv, void* context) 658*4887Schin { 659*4887Schin register int n; 660*4887Schin register char* cp; 661*4887Schin register Join_t* jp; 662*4887Schin char* e; 663*4887Schin 664*4887Schin #if !DEBUG_TRACE 665*4887Schin cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 666*4887Schin #endif 667*4887Schin if (!(jp = init())) 668*4887Schin error(ERROR_system(1),"out of space"); 669*4887Schin for (;;) 670*4887Schin { 671*4887Schin switch (n = optget(argv, usage)) 672*4887Schin { 673*4887Schin case 0: 674*4887Schin break; 675*4887Schin case 'j': 676*4887Schin /* 677*4887Schin * check for obsolete "-j1 field" and "-j2 field" 678*4887Schin */ 679*4887Schin 680*4887Schin if (opt_info.offset == 0) 681*4887Schin { 682*4887Schin cp = argv[opt_info.index - 1]; 683*4887Schin for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 684*4887Schin n = cp[n] == 'j'; 685*4887Schin } 686*4887Schin else 687*4887Schin n = 0; 688*4887Schin if (n) 689*4887Schin { 690*4887Schin if (opt_info.num!=1 && opt_info.num!=2) 691*4887Schin error(2,"-jfileno field: fileno must be 1 or 2"); 692*4887Schin n = '0' + opt_info.num; 693*4887Schin if (!(cp = argv[opt_info.index])) 694*4887Schin { 695*4887Schin argc = 0; 696*4887Schin break; 697*4887Schin } 698*4887Schin opt_info.num = strtol(cp, &e, 10); 699*4887Schin if (*e) 700*4887Schin { 701*4887Schin argc = 0; 702*4887Schin break; 703*4887Schin } 704*4887Schin opt_info.index++; 705*4887Schin } 706*4887Schin else 707*4887Schin { 708*4887Schin jp->file[0].field = (int)(opt_info.num-1); 709*4887Schin n = '2'; 710*4887Schin } 711*4887Schin /*FALLTHROUGH*/ 712*4887Schin case '1': 713*4887Schin case '2': 714*4887Schin if (opt_info.num <=0) 715*4887Schin error(2,"field number must positive"); 716*4887Schin jp->file[n-'1'].field = (int)(opt_info.num-1); 717*4887Schin continue; 718*4887Schin case 'v': 719*4887Schin jp->outmode &= ~C_COMMON; 720*4887Schin /*FALLTHROUGH*/ 721*4887Schin case 'a': 722*4887Schin if (opt_info.num!=1 && opt_info.num!=2) 723*4887Schin error(2,"%s: file number must be 1 or 2", opt_info.name); 724*4887Schin jp->outmode |= 1<<(opt_info.num-1); 725*4887Schin continue; 726*4887Schin case 'e': 727*4887Schin jp->nullfield = opt_info.arg; 728*4887Schin continue; 729*4887Schin case 'o': 730*4887Schin /* need to accept obsolescent command syntax */ 731*4887Schin n = getolist(jp, opt_info.arg, argv+opt_info.index); 732*4887Schin opt_info.index += n; 733*4887Schin continue; 734*4887Schin case 't': 735*4887Schin jp->state[' '] = jp->state['\t'] = 0; 736*4887Schin n= *(unsigned char*)opt_info.arg; 737*4887Schin jp->state[n] = S_DELIM; 738*4887Schin jp->delim = n; 739*4887Schin continue; 740*4887Schin case 'i': 741*4887Schin jp->ignorecase = !opt_info.num; 742*4887Schin continue; 743*4887Schin case 'B': 744*4887Schin jp->buffered = !opt_info.num; 745*4887Schin continue; 746*4887Schin case ':': 747*4887Schin error(2, "%s", opt_info.arg); 748*4887Schin break; 749*4887Schin case '?': 750*4887Schin done(jp); 751*4887Schin error(ERROR_usage(2), "%s", opt_info.arg); 752*4887Schin break; 753*4887Schin } 754*4887Schin break; 755*4887Schin } 756*4887Schin argv += opt_info.index; 757*4887Schin argc -= opt_info.index; 758*4887Schin if (error_info.errors || argc!=2) 759*4887Schin { 760*4887Schin done(jp); 761*4887Schin error(ERROR_usage(2),"%s", optusage(NiL)); 762*4887Schin } 763*4887Schin jp->ooutmode = jp->outmode; 764*4887Schin jp->file[0].name = cp = *argv++; 765*4887Schin if (streq(cp,"-")) 766*4887Schin { 767*4887Schin if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 768*4887Schin { 769*4887Schin if (sfdcseekable(sfstdin)) 770*4887Schin error(ERROR_warn(0),"%s: seek may fail",cp); 771*4887Schin else 772*4887Schin jp->file[0].discard = 1; 773*4887Schin } 774*4887Schin jp->file[0].iop = sfstdin; 775*4887Schin } 776*4887Schin else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 777*4887Schin { 778*4887Schin done(jp); 779*4887Schin error(ERROR_system(1),"%s: cannot open",cp); 780*4887Schin } 781*4887Schin jp->file[1].name = cp = *argv; 782*4887Schin if (streq(cp,"-")) 783*4887Schin { 784*4887Schin if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 785*4887Schin { 786*4887Schin if (sfdcseekable(sfstdin)) 787*4887Schin error(ERROR_warn(0),"%s: seek may fail",cp); 788*4887Schin else 789*4887Schin jp->file[1].discard = 1; 790*4887Schin } 791*4887Schin jp->file[1].iop = sfstdin; 792*4887Schin } 793*4887Schin else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 794*4887Schin { 795*4887Schin done(jp); 796*4887Schin error(ERROR_system(1),"%s: cannot open",cp); 797*4887Schin } 798*4887Schin if (jp->buffered) 799*4887Schin { 800*4887Schin sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 801*4887Schin sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND); 802*4887Schin } 803*4887Schin jp->state['\n'] = S_NL; 804*4887Schin jp->outfile = sfstdout; 805*4887Schin if (!jp->outlist) 806*4887Schin jp->nullfield = 0; 807*4887Schin if (join(jp) < 0) 808*4887Schin { 809*4887Schin done(jp); 810*4887Schin error(ERROR_system(1),"write error"); 811*4887Schin } 812*4887Schin else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 813*4887Schin sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 814*4887Schin done(jp); 815*4887Schin return error_info.errors; 816*4887Schin } 817