14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*8462SApril.Chin@Sun.COM * Copyright (c) 1992-2008 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 7*8462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin /* 234887Schin * David Korn 244887Schin * Glenn Fowler 254887Schin * AT&T Research 264887Schin * 274887Schin * join 284887Schin */ 294887Schin 304887Schin static const char usage[] = 314887Schin "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]" 324887Schin USAGE_LICENSE 334887Schin "[+NAME?join - relational database operator]" 344887Schin "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 354887Schin "and \afile2\a and writes the resulting joined files to standard " 364887Schin "output. By default, a field is delimited by one or more spaces " 374887Schin "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 384887Schin "can be used to change the field delimiter.]" 394887Schin "[+?The \ajoin field\a is a field in each file on which files are compared. " 404887Schin "By default \bjoin\b writes one line in the output for each pair " 414887Schin "of lines in \afiles1\a and \afiles2\a that have identical join " 424887Schin "fields. The default output line consists of the join field, " 434887Schin "then the remaining fields from \afile1\a, then the remaining " 444887Schin "fields from \afile2\a, but this can be changed with the \b-o\b " 454887Schin "option. The \b-a\b option can be used to add unmatched lines " 464887Schin "to the output. The \b-v\b option can be used to output only " 474887Schin "unmatched lines.]" 484887Schin "[+?The files \afile1\a and \afile2\a must be ordered in the collating " 494887Schin "sequence of \bsort -b\b on the fields on which they are to be " 504887Schin "joined otherwise the results are unspecified.]" 514887Schin "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 524887Schin "uses standard input starting at the current location.]" 534887Schin 544887Schin "[e:empty]:[string?Replace empty output fields in the list selected with" 554887Schin " \b-o\b with \astring\a.]" 564887Schin "[o:output]:[list?Construct the output line to comprise the fields specified " 574887Schin "in a blank or comma separated list \alist\a. Each element in " 584887Schin "\alist\a consists of a file number (either 1 or 2), a period, " 594887Schin "and a field number or \b0\b representing the join field. " 604887Schin "As an obsolete feature multiple occurrences of \b-o\b can " 614887Schin "be specified.]" 624887Schin "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 634887Schin " and output.]" 644887Schin "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 654887Schin "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 664887Schin "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 674887Schin "[a:unpairable]#[fileno?Write a line for each unpairable line in file" 684887Schin " \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 694887Schin " normal output. If \b-a\b options appear for both 1 and 2, then " 704887Schin "all unpairable lines will be output.]" 714887Schin "[v:suppress]#[fileno?Write a line for each unpairable line in file" 724887Schin " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 734887Schin "output. If \b-v\b options appear for both 1 and 2, then " 744887Schin "all unpairable lines will be output.] ]" 754887Schin "[i:ignorecase?Ignore case in field comparisons.]" 764887Schin "[B!:mmap?Enable memory mapped reads instead of buffered.]" 774887Schin 784887Schin "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 794887Schin " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 804887Schin " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 814887Schin " equivalent to \b-2\b \afield\a.]" 824887Schin 834887Schin "\n" 844887Schin "\nfile1 file2\n" 854887Schin "\n" 864887Schin "[+EXIT STATUS?]{" 874887Schin "[+0?Both files processed successfully.]" 884887Schin "[+>0?An error occurred.]" 894887Schin "}" 904887Schin "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 914887Schin ; 924887Schin 934887Schin #include <cmd.h> 944887Schin #include <sfdisc.h> 954887Schin 964887Schin #define C_FILE1 001 974887Schin #define C_FILE2 002 984887Schin #define C_COMMON 004 994887Schin #define C_ALL (C_FILE1|C_FILE2|C_COMMON) 1004887Schin 1014887Schin #define NFIELD 10 1024887Schin #define JOINFIELD 2 1034887Schin 1044887Schin #define S_DELIM 1 1054887Schin #define S_SPACE 2 1064887Schin #define S_NL 3 1074887Schin 1084887Schin typedef struct 1094887Schin { 1104887Schin Sfio_t* iop; 1114887Schin char* name; 1124887Schin char* recptr; 1134887Schin int reclen; 1144887Schin int field; 1154887Schin int fieldlen; 1164887Schin int nfields; 1174887Schin int maxfields; 1184887Schin int spaces; 1194887Schin int hit; 1204887Schin int discard; 1214887Schin char** fieldlist; 1224887Schin } File_t; 1234887Schin 1244887Schin typedef struct 1254887Schin { 1264887Schin unsigned char state[1<<CHAR_BIT]; 1274887Schin Sfio_t* outfile; 1284887Schin int* outlist; 1294887Schin int outmode; 1304887Schin int ooutmode; 1314887Schin char* nullfield; 1324887Schin int delim; 1334887Schin int buffered; 1344887Schin int ignorecase; 1354887Schin char* same; 1364887Schin int samesize; 137*8462SApril.Chin@Sun.COM void* context; 1384887Schin File_t file[2]; 1394887Schin } Join_t; 1404887Schin 1414887Schin static void 1424887Schin done(register Join_t* jp) 1434887Schin { 1444887Schin if (jp->file[0].iop && jp->file[0].iop != sfstdin) 1454887Schin sfclose(jp->file[0].iop); 1464887Schin if (jp->file[1].iop && jp->file[1].iop != sfstdin) 1474887Schin sfclose(jp->file[1].iop); 1484887Schin if (jp->outlist) 1494887Schin free(jp->outlist); 1504887Schin if (jp->file[0].fieldlist) 1514887Schin free(jp->file[0].fieldlist); 1524887Schin if (jp->file[1].fieldlist) 1534887Schin free(jp->file[1].fieldlist); 1544887Schin if (jp->same) 1554887Schin free(jp->same); 1564887Schin free(jp); 1574887Schin } 1584887Schin 1594887Schin static Join_t* 1604887Schin init(void) 1614887Schin { 1624887Schin register Join_t* jp; 1634887Schin 1644887Schin if (jp = newof(0, Join_t, 1, 0)) 1654887Schin { 1664887Schin jp->state[' '] = jp->state['\t'] = S_SPACE; 1674887Schin jp->delim = -1; 1684887Schin jp->nullfield = 0; 1694887Schin if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) || 1704887Schin !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0))) 1714887Schin { 1724887Schin done(jp); 1734887Schin return 0; 1744887Schin } 1754887Schin jp->file[0].maxfields = NFIELD; 1764887Schin jp->file[1].maxfields = NFIELD; 1774887Schin jp->outmode = C_COMMON; 1784887Schin } 1794887Schin return jp; 1804887Schin } 1814887Schin 1824887Schin static int 1834887Schin getolist(Join_t* jp, const char* first, char** arglist) 1844887Schin { 1854887Schin register const char* cp = first; 1864887Schin char** argv = arglist; 1874887Schin register int c; 1884887Schin int* outptr; 1894887Schin int* outmax; 1904887Schin int nfield = NFIELD; 1914887Schin char* str; 1924887Schin 1934887Schin outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 1944887Schin outmax = outptr + NFIELD; 1954887Schin while (c = *cp++) 1964887Schin { 1974887Schin if (c==' ' || c=='\t' || c==',') 1984887Schin continue; 1994887Schin str = (char*)--cp; 2004887Schin if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 2014887Schin { 2024887Schin str++; 2034887Schin c = JOINFIELD; 2044887Schin goto skip; 2054887Schin } 2064887Schin if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 2074887Schin { 2084887Schin error(2,"%s: invalid field list",first); 2094887Schin break; 2104887Schin } 2114887Schin c--; 2124887Schin c <<=2; 2134887Schin if (*cp=='2') 2144887Schin c |=1; 2154887Schin skip: 2164887Schin if (outptr >= outmax) 2174887Schin { 2184887Schin jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 2194887Schin outptr = jp->outlist + nfield; 2204887Schin nfield *= 2; 2214887Schin outmax = jp->outlist + nfield; 2224887Schin } 2234887Schin *outptr++ = c; 2244887Schin cp = str; 2254887Schin } 2264887Schin /* need to accept obsolescent command syntax */ 2274887Schin while (1) 2284887Schin { 2294887Schin if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2')) 2304887Schin { 2314887Schin if (*cp=='0' && cp[1]==0) 2324887Schin { 2334887Schin c = JOINFIELD; 2344887Schin goto skip2; 2354887Schin } 2364887Schin break; 2374887Schin } 2384887Schin str = (char*)cp; 2394887Schin c = strtol(cp+2, &str,10); 2404887Schin if (*str || --c<0) 2414887Schin break; 2424887Schin argv++; 2434887Schin c <<= 2; 2444887Schin if (*cp=='2') 2454887Schin c |=1; 2464887Schin skip2: 2474887Schin if (outptr >= outmax) 2484887Schin { 2494887Schin jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 2504887Schin outptr = jp->outlist + nfield; 2514887Schin nfield *= 2; 2524887Schin outmax = jp->outlist + nfield; 2534887Schin } 2544887Schin *outptr++ = c; 2554887Schin } 2564887Schin *outptr = -1; 2574887Schin return argv-arglist; 2584887Schin } 2594887Schin 2604887Schin /* 2614887Schin * read in a record from file <index> and split into fields 2624887Schin */ 2634887Schin static unsigned char* 2644887Schin getrec(Join_t* jp, int index, int discard) 2654887Schin { 2664887Schin register unsigned char* sp = jp->state; 2674887Schin register File_t* fp = &jp->file[index]; 2684887Schin register char** ptr = fp->fieldlist; 2694887Schin register char** ptrmax = ptr + fp->maxfields; 2704887Schin register char* cp; 2714887Schin register int n = 0; 2724887Schin 273*8462SApril.Chin@Sun.COM if (sh_checksig(jp->context)) 2744887Schin return 0; 2754887Schin if (discard && fp->discard) 2764887Schin sfraise(fp->iop, SFSK_DISCARD, NiL); 2774887Schin fp->spaces = 0; 2784887Schin fp->hit = 0; 2794887Schin if (!(cp = sfgetr(fp->iop, '\n', 0))) 2804887Schin { 2814887Schin jp->outmode &= ~(1<<index); 2824887Schin return 0; 2834887Schin } 2844887Schin fp->recptr = cp; 2854887Schin fp->reclen = sfvalue(fp->iop); 2864887Schin if (jp->delim=='\n') /* handle new-line delimiter specially */ 2874887Schin { 2884887Schin *ptr++ = cp; 2894887Schin cp += fp->reclen; 2904887Schin } 2914887Schin else while (n!=S_NL) /* separate into fields */ 2924887Schin { 2934887Schin if (ptr >= ptrmax) 2944887Schin { 2954887Schin n = 2*fp->maxfields; 2964887Schin fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0); 2974887Schin ptr = fp->fieldlist + fp->maxfields; 2984887Schin fp->maxfields = n; 2994887Schin ptrmax = fp->fieldlist+n; 3004887Schin } 3014887Schin *ptr++ = cp; 3024887Schin if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE) 3034887Schin { 3044887Schin fp->spaces = 1; 3054887Schin while (sp[*(unsigned char*)cp++]==S_SPACE); 3064887Schin cp--; 3074887Schin } 3084887Schin while ((n=sp[*(unsigned char*)cp++])==0); 3094887Schin } 3104887Schin *ptr = cp; 3114887Schin fp->nfields = ptr - fp->fieldlist; 3124887Schin if ((n=fp->field) < fp->nfields) 3134887Schin { 3144887Schin cp = fp->fieldlist[n]; 3154887Schin /* eliminate leading spaces */ 3164887Schin if (fp->spaces) 3174887Schin { 3184887Schin while (sp[*(unsigned char*)cp++]==S_SPACE); 3194887Schin cp--; 3204887Schin } 3214887Schin fp->fieldlen = (fp->fieldlist[n+1]-cp)-1; 3224887Schin return (unsigned char*)cp; 3234887Schin } 3244887Schin fp->fieldlen = 0; 3254887Schin return (unsigned char*)""; 3264887Schin } 3274887Schin 3284887Schin #if DEBUG_TRACE 3294887Schin static unsigned char* u1,u2,u3; 3304887Schin #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 3314887Schin #endif 3324887Schin 3334887Schin /* 3344887Schin * print field <n> from file <index> 3354887Schin */ 3364887Schin static int 3374887Schin outfield(Join_t* jp, int index, register int n, int last) 3384887Schin { 3394887Schin register File_t* fp = &jp->file[index]; 3404887Schin register char* cp; 3414887Schin register char* cpmax; 3424887Schin register int size; 3434887Schin register Sfio_t* iop = jp->outfile; 3444887Schin 3454887Schin if (n < fp->nfields) 3464887Schin { 3474887Schin cp = fp->fieldlist[n]; 3484887Schin cpmax = fp->fieldlist[n+1]; 3494887Schin } 3504887Schin else 3514887Schin cp = 0; 3524887Schin if ((n=jp->delim)<=0) 3534887Schin { 3544887Schin if (fp->spaces) 3554887Schin { 3564887Schin /*eliminate leading spaces */ 3574887Schin while (jp->state[*(unsigned char*)cp++]==S_SPACE); 3584887Schin cp--; 3594887Schin } 3604887Schin n = ' '; 3614887Schin } 3624887Schin if (last) 3634887Schin n = '\n'; 3644887Schin if (cp) 3654887Schin size = cpmax-cp; 3664887Schin else 3674887Schin size = 0; 3684887Schin if (size==0) 3694887Schin { 3704887Schin if (!jp->nullfield) 3714887Schin sfputc(iop,n); 3724887Schin else if (sfputr(iop,jp->nullfield,n) < 0) 3734887Schin return -1; 3744887Schin } 3754887Schin else 3764887Schin { 3774887Schin last = cp[size-1]; 3784887Schin cp[size-1] = n; 3794887Schin if (sfwrite(iop,cp,size) < 0) 3804887Schin return -1; 3814887Schin cp[size-1] = last; 3824887Schin } 3834887Schin return 0; 3844887Schin } 3854887Schin 3864887Schin #if DEBUG_TRACE 3874887Schin static int i1,i2,i3; 3884887Schin #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 3894887Schin #endif 3904887Schin 3914887Schin static int 3924887Schin outrec(register Join_t* jp, int mode) 3934887Schin { 3944887Schin register File_t* fp; 3954887Schin register int i; 3964887Schin register int j; 3974887Schin register int k; 3984887Schin register int n; 3994887Schin int* out; 4004887Schin 4014887Schin if (mode < 0 && jp->file[0].hit++) 4024887Schin return 0; 4034887Schin if (mode > 0 && jp->file[1].hit++) 4044887Schin return 0; 4054887Schin if (out = jp->outlist) 4064887Schin { 4074887Schin while ((n = *out++) >= 0) 4084887Schin { 4094887Schin if (n == JOINFIELD) 4104887Schin { 4114887Schin i = mode >= 0; 4124887Schin j = jp->file[i].field; 4134887Schin } 4144887Schin else 4154887Schin { 4164887Schin i = n & 1; 4174887Schin j = (mode<0 && i || mode>0 && !i) ? 4184887Schin jp->file[i].nfields : 4194887Schin n >> 2; 4204887Schin } 4214887Schin if (outfield(jp, i, j, *out < 0) < 0) 4224887Schin return -1; 4234887Schin } 4244887Schin return 0; 4254887Schin } 4264887Schin k = jp->file[0].nfields; 4274887Schin if (mode >= 0) 4284887Schin k += jp->file[1].nfields - 1; 4294887Schin for (i=0; i<2; i++) 4304887Schin { 4314887Schin fp = &jp->file[i]; 4324887Schin if (mode>0 && i==0) 4334887Schin { 4344887Schin k -= (fp->nfields - 1); 4354887Schin continue; 4364887Schin } 4374887Schin n = fp->field; 4384887Schin if (mode||i==0) 4394887Schin { 4404887Schin /* output join field first */ 4414887Schin if (outfield(jp,i,n,!--k) < 0) 4424887Schin return -1; 4434887Schin if (!k) 4444887Schin return 0; 4454887Schin for (j=0; j<n; j++) 4464887Schin { 4474887Schin if (outfield(jp,i,j,!--k) < 0) 4484887Schin return -1; 4494887Schin if (!k) 4504887Schin return 0; 4514887Schin } 4524887Schin j = n + 1; 4534887Schin } 4544887Schin else 4554887Schin j = 0; 4564887Schin for (;j<fp->nfields; j++) 4574887Schin { 4584887Schin if (j!=n && outfield(jp,i,j,!--k) < 0) 4594887Schin return -1; 4604887Schin if (!k) 4614887Schin return 0; 4624887Schin } 4634887Schin } 4644887Schin return 0; 4654887Schin } 4664887Schin 4674887Schin #if DEBUG_TRACE 4684887Schin #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 4694887Schin #endif 4704887Schin 4714887Schin static int 4724887Schin join(Join_t* jp) 4734887Schin { 4744887Schin register unsigned char* cp1; 4754887Schin register unsigned char* cp2; 4764887Schin register int n1; 4774887Schin register int n2; 4784887Schin register int n; 4794887Schin register int cmp; 4804887Schin register int same; 4814887Schin int o2; 4824887Schin Sfoff_t lo = -1; 4834887Schin Sfoff_t hi = -1; 4844887Schin 4854887Schin if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 4864887Schin { 4874887Schin n1 = jp->file[0].fieldlen; 4884887Schin n2 = jp->file[1].fieldlen; 4894887Schin same = 0; 4904887Schin for (;;) 4914887Schin { 4924887Schin n = n1 < n2 ? n1 : n2; 4934887Schin #if DEBUG_TRACE 4944887Schin if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 4954887Schin cmp = n1 - n2; 4964887Schin sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 4974887Schin if (!cmp) 4984887Schin #else 4994887Schin if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 5004887Schin #endif 5014887Schin { 5024887Schin if (!(jp->outmode & C_COMMON)) 5034887Schin { 5044887Schin if (cp1 = getrec(jp, 0, 1)) 5054887Schin { 5064887Schin n1 = jp->file[0].fieldlen; 5074887Schin same = 1; 5084887Schin continue; 5094887Schin } 5104887Schin if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 5114887Schin break; 5124887Schin if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 5134887Schin { 5144887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 5154887Schin return -1; 5164887Schin } 5174887Schin } 5184887Schin else if (outrec(jp, 0) < 0) 5194887Schin return -1; 5204887Schin else if (lo < 0 && (jp->outmode & C_COMMON)) 5214887Schin { 5224887Schin if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 5234887Schin { 5244887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 5254887Schin return -1; 5264887Schin } 5274887Schin lo -= jp->file[1].reclen; 5284887Schin } 5294887Schin if (cp2 = getrec(jp, 1, lo < 0)) 5304887Schin { 5314887Schin n2 = jp->file[1].fieldlen; 5324887Schin continue; 5334887Schin } 5344887Schin #if DEBUG_TRACE 5354887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 5364887Schin #endif 5374887Schin } 5384887Schin else if (cmp > 0) 5394887Schin { 5404887Schin if (same) 5414887Schin { 5424887Schin same = 0; 5434887Schin next: 5444887Schin if (n2 > jp->samesize) 5454887Schin { 5464887Schin jp->samesize = roundof(n2, 16); 5474887Schin if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 5484887Schin { 5494887Schin error(ERROR_SYSTEM|2, "out of space"); 5504887Schin return -1; 5514887Schin } 5524887Schin } 5534887Schin memcpy(jp->same, cp2, o2 = n2); 5544887Schin if (!(cp2 = getrec(jp, 1, 0))) 5554887Schin break; 5564887Schin n2 = jp->file[1].fieldlen; 5574887Schin if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 5584887Schin goto next; 5594887Schin continue; 5604887Schin } 5614887Schin if (hi >= 0) 5624887Schin { 5634887Schin if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 5644887Schin { 5654887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 5664887Schin return -1; 5674887Schin } 5684887Schin hi = -1; 5694887Schin } 5704887Schin else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 5714887Schin return -1; 5724887Schin lo = -1; 5734887Schin if (cp2 = getrec(jp, 1, 1)) 5744887Schin { 5754887Schin n2 = jp->file[1].fieldlen; 5764887Schin continue; 5774887Schin } 5784887Schin #if DEBUG_TRACE 5794887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 5804887Schin #endif 5814887Schin } 5824887Schin else if (same) 5834887Schin { 5844887Schin same = 0; 5854887Schin if (!(cp1 = getrec(jp, 0, 0))) 5864887Schin break; 5874887Schin n1 = jp->file[0].fieldlen; 5884887Schin continue; 5894887Schin } 5904887Schin if (lo >= 0) 5914887Schin { 5924887Schin if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 5934887Schin (hi -= jp->file[1].reclen) < 0 || 5944887Schin sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 5954887Schin !(cp2 = getrec(jp, 1, 0))) 5964887Schin { 5974887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 5984887Schin return -1; 5994887Schin } 6004887Schin n2 = jp->file[1].fieldlen; 6014887Schin lo = -1; 6024887Schin if (jp->file[1].discard) 6034887Schin sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 6044887Schin } 6054887Schin else if (!cp2) 6064887Schin break; 6074887Schin else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 6084887Schin return -1; 6094887Schin if (!(cp1 = getrec(jp, 0, 1))) 6104887Schin break; 6114887Schin n1 = jp->file[0].fieldlen; 6124887Schin } 6134887Schin } 6144887Schin #if DEBUG_TRACE 6154887Schin sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 6164887Schin #endif 6174887Schin if (cp2) 6184887Schin { 6194887Schin if (hi >= 0 && 6204887Schin sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 6214887Schin sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 6224887Schin { 6234887Schin error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 6244887Schin return -1; 6254887Schin } 6264887Schin #if DEBUG_TRACE 6274887Schin sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 6284887Schin #endif 6294887Schin cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 6304887Schin cmp = 1; 6314887Schin n = 1; 6324887Schin } 6334887Schin else 6344887Schin { 6354887Schin cmp = -1; 6364887Schin n = 0; 6374887Schin } 6384887Schin #if DEBUG_TRACE 6394887Schin sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 6404887Schin #endif 6414887Schin if (!cp1 || !(jp->outmode & (1<<n))) 6424887Schin { 6434887Schin if (cp1 && jp->file[n].iop == sfstdin) 6444887Schin sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 6454887Schin return 0; 6464887Schin } 6474887Schin if (outrec(jp, cmp) < 0) 6484887Schin return -1; 6494887Schin do 6504887Schin { 6514887Schin if (!getrec(jp, n, 1)) 6524887Schin return 0; 6534887Schin } while (outrec(jp, cmp) >= 0); 6544887Schin return -1; 6554887Schin } 6564887Schin 6574887Schin int 6584887Schin b_join(int argc, char** argv, void* context) 6594887Schin { 6604887Schin register int n; 6614887Schin register char* cp; 6624887Schin register Join_t* jp; 6634887Schin char* e; 6644887Schin 6654887Schin #if !DEBUG_TRACE 6664887Schin cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 6674887Schin #endif 6684887Schin if (!(jp = init())) 6694887Schin error(ERROR_system(1),"out of space"); 670*8462SApril.Chin@Sun.COM jp->context = context; 6714887Schin for (;;) 6724887Schin { 6734887Schin switch (n = optget(argv, usage)) 6744887Schin { 6754887Schin case 0: 6764887Schin break; 6774887Schin case 'j': 6784887Schin /* 6794887Schin * check for obsolete "-j1 field" and "-j2 field" 6804887Schin */ 6814887Schin 6824887Schin if (opt_info.offset == 0) 6834887Schin { 6844887Schin cp = argv[opt_info.index - 1]; 6854887Schin for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 6864887Schin n = cp[n] == 'j'; 6874887Schin } 6884887Schin else 6894887Schin n = 0; 6904887Schin if (n) 6914887Schin { 6924887Schin if (opt_info.num!=1 && opt_info.num!=2) 6934887Schin error(2,"-jfileno field: fileno must be 1 or 2"); 6944887Schin n = '0' + opt_info.num; 6954887Schin if (!(cp = argv[opt_info.index])) 6964887Schin { 6974887Schin argc = 0; 6984887Schin break; 6994887Schin } 7004887Schin opt_info.num = strtol(cp, &e, 10); 7014887Schin if (*e) 7024887Schin { 7034887Schin argc = 0; 7044887Schin break; 7054887Schin } 7064887Schin opt_info.index++; 7074887Schin } 7084887Schin else 7094887Schin { 7104887Schin jp->file[0].field = (int)(opt_info.num-1); 7114887Schin n = '2'; 7124887Schin } 7134887Schin /*FALLTHROUGH*/ 7144887Schin case '1': 7154887Schin case '2': 7164887Schin if (opt_info.num <=0) 7174887Schin error(2,"field number must positive"); 7184887Schin jp->file[n-'1'].field = (int)(opt_info.num-1); 7194887Schin continue; 7204887Schin case 'v': 7214887Schin jp->outmode &= ~C_COMMON; 7224887Schin /*FALLTHROUGH*/ 7234887Schin case 'a': 7244887Schin if (opt_info.num!=1 && opt_info.num!=2) 7254887Schin error(2,"%s: file number must be 1 or 2", opt_info.name); 7264887Schin jp->outmode |= 1<<(opt_info.num-1); 7274887Schin continue; 7284887Schin case 'e': 7294887Schin jp->nullfield = opt_info.arg; 7304887Schin continue; 7314887Schin case 'o': 7324887Schin /* need to accept obsolescent command syntax */ 7334887Schin n = getolist(jp, opt_info.arg, argv+opt_info.index); 7344887Schin opt_info.index += n; 7354887Schin continue; 7364887Schin case 't': 7374887Schin jp->state[' '] = jp->state['\t'] = 0; 7384887Schin n= *(unsigned char*)opt_info.arg; 7394887Schin jp->state[n] = S_DELIM; 7404887Schin jp->delim = n; 7414887Schin continue; 7424887Schin case 'i': 7434887Schin jp->ignorecase = !opt_info.num; 7444887Schin continue; 7454887Schin case 'B': 7464887Schin jp->buffered = !opt_info.num; 7474887Schin continue; 7484887Schin case ':': 7494887Schin error(2, "%s", opt_info.arg); 7504887Schin break; 7514887Schin case '?': 7524887Schin done(jp); 7534887Schin error(ERROR_usage(2), "%s", opt_info.arg); 7544887Schin break; 7554887Schin } 7564887Schin break; 7574887Schin } 7584887Schin argv += opt_info.index; 7594887Schin argc -= opt_info.index; 7604887Schin if (error_info.errors || argc!=2) 7614887Schin { 7624887Schin done(jp); 7634887Schin error(ERROR_usage(2),"%s", optusage(NiL)); 7644887Schin } 7654887Schin jp->ooutmode = jp->outmode; 7664887Schin jp->file[0].name = cp = *argv++; 7674887Schin if (streq(cp,"-")) 7684887Schin { 7694887Schin if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 7704887Schin { 7714887Schin if (sfdcseekable(sfstdin)) 7724887Schin error(ERROR_warn(0),"%s: seek may fail",cp); 7734887Schin else 7744887Schin jp->file[0].discard = 1; 7754887Schin } 7764887Schin jp->file[0].iop = sfstdin; 7774887Schin } 7784887Schin else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 7794887Schin { 7804887Schin done(jp); 7814887Schin error(ERROR_system(1),"%s: cannot open",cp); 7824887Schin } 7834887Schin jp->file[1].name = cp = *argv; 7844887Schin if (streq(cp,"-")) 7854887Schin { 7864887Schin if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 7874887Schin { 7884887Schin if (sfdcseekable(sfstdin)) 7894887Schin error(ERROR_warn(0),"%s: seek may fail",cp); 7904887Schin else 7914887Schin jp->file[1].discard = 1; 7924887Schin } 7934887Schin jp->file[1].iop = sfstdin; 7944887Schin } 7954887Schin else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 7964887Schin { 7974887Schin done(jp); 7984887Schin error(ERROR_system(1),"%s: cannot open",cp); 7994887Schin } 8004887Schin if (jp->buffered) 8014887Schin { 8024887Schin sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 8034887Schin sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND); 8044887Schin } 8054887Schin jp->state['\n'] = S_NL; 8064887Schin jp->outfile = sfstdout; 8074887Schin if (!jp->outlist) 8084887Schin jp->nullfield = 0; 8094887Schin if (join(jp) < 0) 8104887Schin { 8114887Schin done(jp); 8124887Schin error(ERROR_system(1),"write error"); 8134887Schin } 8144887Schin else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 8154887Schin sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 8164887Schin done(jp); 8174887Schin return error_info.errors; 8184887Schin } 819