xref: /onnv-gate/usr/src/lib/libcmd/common/join.c (revision 8462)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*8462SApril.Chin@Sun.COM *          Copyright (c) 1992-2008 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
7*8462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                                                                      *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin /*
234887Schin  * David Korn
244887Schin  * Glenn Fowler
254887Schin  * AT&T Research
264887Schin  *
274887Schin  * join
284887Schin  */
294887Schin 
304887Schin static const char usage[] =
314887Schin "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]"
324887Schin USAGE_LICENSE
334887Schin "[+NAME?join - relational database operator]"
344887Schin "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
354887Schin 	"and \afile2\a and writes the resulting joined files to standard "
364887Schin 	"output.  By default, a field is delimited by one or more spaces "
374887Schin 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
384887Schin 	"can be used to change the field delimiter.]"
394887Schin "[+?The \ajoin field\a is a field in each file on which files are compared. "
404887Schin 	"By default \bjoin\b writes one line in the output for each pair "
414887Schin 	"of lines in \afiles1\a and \afiles2\a that have identical join "
424887Schin 	"fields.  The default output line consists of the join field, "
434887Schin 	"then the remaining fields from \afile1\a, then the remaining "
444887Schin 	"fields from \afile2\a, but this can be changed with the \b-o\b "
454887Schin 	"option.  The \b-a\b option can be used to add unmatched lines "
464887Schin 	"to the output.  The \b-v\b option can be used to output only "
474887Schin 	"unmatched lines.]"
484887Schin "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
494887Schin 	"sequence of \bsort -b\b on the fields on which they are to be "
504887Schin 	"joined otherwise the results are unspecified.]"
514887Schin "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
524887Schin         "uses standard input starting at the current location.]"
534887Schin 
544887Schin "[e:empty]:[string?Replace empty output fields in the list selected with"
554887Schin "	\b-o\b with \astring\a.]"
564887Schin "[o:output]:[list?Construct the output line to comprise the fields specified "
574887Schin 	"in a blank or comma separated list \alist\a.  Each element in "
584887Schin 	"\alist\a consists of a file number (either 1 or 2), a period, "
594887Schin 	"and a field number or \b0\b representing the join field.  "
604887Schin 	"As an obsolete feature multiple occurrences of \b-o\b can "
614887Schin 	"be specified.]"
624887Schin "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
634887Schin "	and output.]"
644887Schin "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
654887Schin "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
664887Schin "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
674887Schin "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
684887Schin "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
694887Schin "	normal output.  If \b-a\b options appear for both 1 and 2, then "
704887Schin 	"all unpairable lines will be output.]"
714887Schin "[v:suppress]#[fileno?Write a line for each unpairable line in file"
724887Schin "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
734887Schin 	"output.  If \b-v\b options appear for both 1 and 2, then "
744887Schin 	"all unpairable lines will be output.] ]"
754887Schin "[i:ignorecase?Ignore case in field comparisons.]"
764887Schin "[B!:mmap?Enable memory mapped reads instead of buffered.]"
774887Schin 
784887Schin "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
794887Schin "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
804887Schin "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
814887Schin "	equivalent to \b-2\b \afield\a.]"
824887Schin 
834887Schin "\n"
844887Schin "\nfile1 file2\n"
854887Schin "\n"
864887Schin "[+EXIT STATUS?]{"
874887Schin 	"[+0?Both files processed successfully.]"
884887Schin 	"[+>0?An error occurred.]"
894887Schin "}"
904887Schin "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
914887Schin ;
924887Schin 
934887Schin #include <cmd.h>
944887Schin #include <sfdisc.h>
954887Schin 
964887Schin #define C_FILE1		001
974887Schin #define C_FILE2		002
984887Schin #define C_COMMON	004
994887Schin #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
1004887Schin 
1014887Schin #define NFIELD		10
1024887Schin #define JOINFIELD	2
1034887Schin 
1044887Schin #define S_DELIM		1
1054887Schin #define S_SPACE		2
1064887Schin #define S_NL		3
1074887Schin 
1084887Schin typedef struct
1094887Schin {
1104887Schin 	Sfio_t*		iop;
1114887Schin 	char*		name;
1124887Schin 	char*		recptr;
1134887Schin 	int		reclen;
1144887Schin 	int		field;
1154887Schin 	int		fieldlen;
1164887Schin 	int		nfields;
1174887Schin 	int		maxfields;
1184887Schin 	int		spaces;
1194887Schin 	int		hit;
1204887Schin 	int		discard;
1214887Schin 	char**		fieldlist;
1224887Schin } File_t;
1234887Schin 
1244887Schin typedef struct
1254887Schin {
1264887Schin 	unsigned char	state[1<<CHAR_BIT];
1274887Schin 	Sfio_t*		outfile;
1284887Schin 	int*		outlist;
1294887Schin 	int		outmode;
1304887Schin 	int		ooutmode;
1314887Schin 	char*		nullfield;
1324887Schin 	int		delim;
1334887Schin 	int		buffered;
1344887Schin 	int		ignorecase;
1354887Schin 	char*		same;
1364887Schin 	int		samesize;
137*8462SApril.Chin@Sun.COM 	void*		context;
1384887Schin 	File_t		file[2];
1394887Schin } Join_t;
1404887Schin 
1414887Schin static void
1424887Schin done(register Join_t* jp)
1434887Schin {
1444887Schin 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
1454887Schin 		sfclose(jp->file[0].iop);
1464887Schin 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
1474887Schin 		sfclose(jp->file[1].iop);
1484887Schin 	if (jp->outlist)
1494887Schin 		free(jp->outlist);
1504887Schin 	if (jp->file[0].fieldlist)
1514887Schin 		free(jp->file[0].fieldlist);
1524887Schin 	if (jp->file[1].fieldlist)
1534887Schin 		free(jp->file[1].fieldlist);
1544887Schin 	if (jp->same)
1554887Schin 		free(jp->same);
1564887Schin 	free(jp);
1574887Schin }
1584887Schin 
1594887Schin static Join_t*
1604887Schin init(void)
1614887Schin {
1624887Schin 	register Join_t*	jp;
1634887Schin 
1644887Schin 	if (jp = newof(0, Join_t, 1, 0))
1654887Schin 	{
1664887Schin 		jp->state[' '] = jp->state['\t'] = S_SPACE;
1674887Schin 		jp->delim = -1;
1684887Schin 		jp->nullfield = 0;
1694887Schin 		if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) ||
1704887Schin 		    !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0)))
1714887Schin 		{
1724887Schin 			done(jp);
1734887Schin 			return 0;
1744887Schin 		}
1754887Schin 		jp->file[0].maxfields = NFIELD;
1764887Schin 		jp->file[1].maxfields = NFIELD;
1774887Schin 		jp->outmode = C_COMMON;
1784887Schin 	}
1794887Schin 	return jp;
1804887Schin }
1814887Schin 
1824887Schin static int
1834887Schin getolist(Join_t* jp, const char* first, char** arglist)
1844887Schin {
1854887Schin 	register const char*	cp = first;
1864887Schin 	char**			argv = arglist;
1874887Schin 	register int		c;
1884887Schin 	int*			outptr;
1894887Schin 	int*			outmax;
1904887Schin 	int			nfield = NFIELD;
1914887Schin 	char*			str;
1924887Schin 
1934887Schin 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
1944887Schin 	outmax = outptr + NFIELD;
1954887Schin 	while (c = *cp++)
1964887Schin 	{
1974887Schin 		if (c==' ' || c=='\t' || c==',')
1984887Schin 			continue;
1994887Schin 		str = (char*)--cp;
2004887Schin 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
2014887Schin 		{
2024887Schin 			str++;
2034887Schin 			c = JOINFIELD;
2044887Schin 			goto skip;
2054887Schin 		}
2064887Schin 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
2074887Schin 		{
2084887Schin 			error(2,"%s: invalid field list",first);
2094887Schin 			break;
2104887Schin 		}
2114887Schin 		c--;
2124887Schin 		c <<=2;
2134887Schin 		if (*cp=='2')
2144887Schin 			c |=1;
2154887Schin 	skip:
2164887Schin 		if (outptr >= outmax)
2174887Schin 		{
2184887Schin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
2194887Schin 			outptr = jp->outlist + nfield;
2204887Schin 			nfield *= 2;
2214887Schin 			outmax = jp->outlist + nfield;
2224887Schin 		}
2234887Schin 		*outptr++ = c;
2244887Schin 		cp = str;
2254887Schin 	}
2264887Schin 	/* need to accept obsolescent command syntax */
2274887Schin 	while (1)
2284887Schin 	{
2294887Schin 		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
2304887Schin 		{
2314887Schin 			if (*cp=='0' && cp[1]==0)
2324887Schin 			{
2334887Schin 				c = JOINFIELD;
2344887Schin 				goto skip2;
2354887Schin 			}
2364887Schin 			break;
2374887Schin 		}
2384887Schin 		str = (char*)cp;
2394887Schin 		c = strtol(cp+2, &str,10);
2404887Schin 		if (*str || --c<0)
2414887Schin 			break;
2424887Schin 		argv++;
2434887Schin 		c <<= 2;
2444887Schin 		if (*cp=='2')
2454887Schin 			c |=1;
2464887Schin 	skip2:
2474887Schin 		if (outptr >= outmax)
2484887Schin 		{
2494887Schin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
2504887Schin 			outptr = jp->outlist + nfield;
2514887Schin 			nfield *= 2;
2524887Schin 			outmax = jp->outlist + nfield;
2534887Schin 		}
2544887Schin 		*outptr++ = c;
2554887Schin 	}
2564887Schin 	*outptr = -1;
2574887Schin 	return argv-arglist;
2584887Schin }
2594887Schin 
2604887Schin /*
2614887Schin  * read in a record from file <index> and split into fields
2624887Schin  */
2634887Schin static unsigned char*
2644887Schin getrec(Join_t* jp, int index, int discard)
2654887Schin {
2664887Schin 	register unsigned char*	sp = jp->state;
2674887Schin 	register File_t*	fp = &jp->file[index];
2684887Schin 	register char**		ptr = fp->fieldlist;
2694887Schin 	register char**		ptrmax = ptr + fp->maxfields;
2704887Schin 	register char*		cp;
2714887Schin 	register int		n = 0;
2724887Schin 
273*8462SApril.Chin@Sun.COM 	if (sh_checksig(jp->context))
2744887Schin 		return 0;
2754887Schin 	if (discard && fp->discard)
2764887Schin 		sfraise(fp->iop, SFSK_DISCARD, NiL);
2774887Schin 	fp->spaces = 0;
2784887Schin 	fp->hit = 0;
2794887Schin 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
2804887Schin 	{
2814887Schin 		jp->outmode &= ~(1<<index);
2824887Schin 		return 0;
2834887Schin 	}
2844887Schin 	fp->recptr = cp;
2854887Schin 	fp->reclen = sfvalue(fp->iop);
2864887Schin 	if (jp->delim=='\n')	/* handle new-line delimiter specially */
2874887Schin 	{
2884887Schin 		*ptr++ = cp;
2894887Schin 		cp += fp->reclen;
2904887Schin 	}
2914887Schin 	else while (n!=S_NL) /* separate into fields */
2924887Schin 	{
2934887Schin 		if (ptr >= ptrmax)
2944887Schin 		{
2954887Schin 			n = 2*fp->maxfields;
2964887Schin 			fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0);
2974887Schin 			ptr = fp->fieldlist + fp->maxfields;
2984887Schin 			fp->maxfields = n;
2994887Schin 			ptrmax = fp->fieldlist+n;
3004887Schin 		}
3014887Schin 		*ptr++ = cp;
3024887Schin 		if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE)
3034887Schin 		{
3044887Schin 			fp->spaces = 1;
3054887Schin 			while (sp[*(unsigned char*)cp++]==S_SPACE);
3064887Schin 			cp--;
3074887Schin 		}
3084887Schin 		while ((n=sp[*(unsigned char*)cp++])==0);
3094887Schin 	}
3104887Schin 	*ptr = cp;
3114887Schin 	fp->nfields = ptr - fp->fieldlist;
3124887Schin 	if ((n=fp->field) < fp->nfields)
3134887Schin 	{
3144887Schin 		cp = fp->fieldlist[n];
3154887Schin 		/* eliminate leading spaces */
3164887Schin 		if (fp->spaces)
3174887Schin 		{
3184887Schin 			while (sp[*(unsigned char*)cp++]==S_SPACE);
3194887Schin 			cp--;
3204887Schin 		}
3214887Schin 		fp->fieldlen = (fp->fieldlist[n+1]-cp)-1;
3224887Schin 		return (unsigned char*)cp;
3234887Schin 	}
3244887Schin 	fp->fieldlen = 0;
3254887Schin 	return (unsigned char*)"";
3264887Schin }
3274887Schin 
3284887Schin #if DEBUG_TRACE
3294887Schin static unsigned char* u1,u2,u3;
3304887Schin #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
3314887Schin #endif
3324887Schin 
3334887Schin /*
3344887Schin  * print field <n> from file <index>
3354887Schin  */
3364887Schin static int
3374887Schin outfield(Join_t* jp, int index, register int n, int last)
3384887Schin {
3394887Schin 	register File_t*	fp = &jp->file[index];
3404887Schin 	register char*		cp;
3414887Schin 	register char*		cpmax;
3424887Schin 	register int		size;
3434887Schin 	register Sfio_t*	iop = jp->outfile;
3444887Schin 
3454887Schin 	if (n < fp->nfields)
3464887Schin 	{
3474887Schin 		cp = fp->fieldlist[n];
3484887Schin 		cpmax = fp->fieldlist[n+1];
3494887Schin 	}
3504887Schin 	else
3514887Schin 		cp = 0;
3524887Schin 	if ((n=jp->delim)<=0)
3534887Schin 	{
3544887Schin 		if (fp->spaces)
3554887Schin 		{
3564887Schin 			/*eliminate leading spaces */
3574887Schin 			while (jp->state[*(unsigned char*)cp++]==S_SPACE);
3584887Schin 			cp--;
3594887Schin 		}
3604887Schin 		n = ' ';
3614887Schin 	}
3624887Schin 	if (last)
3634887Schin 		n = '\n';
3644887Schin 	if (cp)
3654887Schin 		size = cpmax-cp;
3664887Schin 	else
3674887Schin 		size = 0;
3684887Schin 	if (size==0)
3694887Schin 	{
3704887Schin 		if (!jp->nullfield)
3714887Schin 			sfputc(iop,n);
3724887Schin 		else if (sfputr(iop,jp->nullfield,n) < 0)
3734887Schin 			return -1;
3744887Schin 	}
3754887Schin 	else
3764887Schin 	{
3774887Schin 		last = cp[size-1];
3784887Schin 		cp[size-1] = n;
3794887Schin 		if (sfwrite(iop,cp,size) < 0)
3804887Schin 			return -1;
3814887Schin 		cp[size-1] = last;
3824887Schin 	}
3834887Schin 	return 0;
3844887Schin }
3854887Schin 
3864887Schin #if DEBUG_TRACE
3874887Schin static int i1,i2,i3;
3884887Schin #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
3894887Schin #endif
3904887Schin 
3914887Schin static int
3924887Schin outrec(register Join_t* jp, int mode)
3934887Schin {
3944887Schin 	register File_t*	fp;
3954887Schin 	register int		i;
3964887Schin 	register int		j;
3974887Schin 	register int		k;
3984887Schin 	register int		n;
3994887Schin 	int*			out;
4004887Schin 
4014887Schin 	if (mode < 0 && jp->file[0].hit++)
4024887Schin 		return 0;
4034887Schin 	if (mode > 0 && jp->file[1].hit++)
4044887Schin 		return 0;
4054887Schin 	if (out = jp->outlist)
4064887Schin 	{
4074887Schin 		while ((n = *out++) >= 0)
4084887Schin 		{
4094887Schin 			if (n == JOINFIELD)
4104887Schin 			{
4114887Schin 				i = mode >= 0;
4124887Schin 				j = jp->file[i].field;
4134887Schin 			}
4144887Schin 			else
4154887Schin 			{
4164887Schin 				i = n & 1;
4174887Schin 				j = (mode<0 && i || mode>0 && !i) ?
4184887Schin 					jp->file[i].nfields :
4194887Schin 					n >> 2;
4204887Schin 			}
4214887Schin 			if (outfield(jp, i, j, *out < 0) < 0)
4224887Schin 				return -1;
4234887Schin 		}
4244887Schin 		return 0;
4254887Schin 	}
4264887Schin 	k = jp->file[0].nfields;
4274887Schin 	if (mode >= 0)
4284887Schin 		k += jp->file[1].nfields - 1;
4294887Schin 	for (i=0; i<2; i++)
4304887Schin 	{
4314887Schin 		fp = &jp->file[i];
4324887Schin 		if (mode>0 && i==0)
4334887Schin 		{
4344887Schin 			k -= (fp->nfields - 1);
4354887Schin 			continue;
4364887Schin 		}
4374887Schin 		n = fp->field;
4384887Schin 		if (mode||i==0)
4394887Schin 		{
4404887Schin 			/* output join field first */
4414887Schin 			if (outfield(jp,i,n,!--k) < 0)
4424887Schin 				return -1;
4434887Schin 			if (!k)
4444887Schin 				return 0;
4454887Schin 			for (j=0; j<n; j++)
4464887Schin 			{
4474887Schin 				if (outfield(jp,i,j,!--k) < 0)
4484887Schin 					return -1;
4494887Schin 				if (!k)
4504887Schin 					return 0;
4514887Schin 			}
4524887Schin 			j = n + 1;
4534887Schin 		}
4544887Schin 		else
4554887Schin 			j = 0;
4564887Schin 		for (;j<fp->nfields; j++)
4574887Schin 		{
4584887Schin 			if (j!=n && outfield(jp,i,j,!--k) < 0)
4594887Schin 				return -1;
4604887Schin 			if (!k)
4614887Schin 				return 0;
4624887Schin 		}
4634887Schin 	}
4644887Schin 	return 0;
4654887Schin }
4664887Schin 
4674887Schin #if DEBUG_TRACE
4684887Schin #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
4694887Schin #endif
4704887Schin 
4714887Schin static int
4724887Schin join(Join_t* jp)
4734887Schin {
4744887Schin 	register unsigned char*	cp1;
4754887Schin 	register unsigned char*	cp2;
4764887Schin 	register int		n1;
4774887Schin 	register int		n2;
4784887Schin 	register int		n;
4794887Schin 	register int		cmp;
4804887Schin 	register int		same;
4814887Schin 	int			o2;
4824887Schin 	Sfoff_t			lo = -1;
4834887Schin 	Sfoff_t			hi = -1;
4844887Schin 
4854887Schin 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
4864887Schin 	{
4874887Schin 		n1 = jp->file[0].fieldlen;
4884887Schin 		n2 = jp->file[1].fieldlen;
4894887Schin 		same = 0;
4904887Schin 		for (;;)
4914887Schin 		{
4924887Schin 			n = n1 < n2 ? n1 : n2;
4934887Schin #if DEBUG_TRACE
4944887Schin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
4954887Schin 				cmp = n1 - n2;
4964887Schin sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
4974887Schin 			if (!cmp)
4984887Schin #else
4994887Schin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
5004887Schin #endif
5014887Schin 			{
5024887Schin 				if (!(jp->outmode & C_COMMON))
5034887Schin 				{
5044887Schin 					if (cp1 = getrec(jp, 0, 1))
5054887Schin 					{
5064887Schin 						n1 = jp->file[0].fieldlen;
5074887Schin 						same = 1;
5084887Schin 						continue;
5094887Schin 					}
5104887Schin 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
5114887Schin 						break;
5124887Schin 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
5134887Schin 					{
5144887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
5154887Schin 						return -1;
5164887Schin 					}
5174887Schin 				}
5184887Schin 				else if (outrec(jp, 0) < 0)
5194887Schin 					return -1;
5204887Schin 				else if (lo < 0 && (jp->outmode & C_COMMON))
5214887Schin 				{
5224887Schin 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
5234887Schin 					{
5244887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
5254887Schin 						return -1;
5264887Schin 					}
5274887Schin 					lo -= jp->file[1].reclen;
5284887Schin 				}
5294887Schin 				if (cp2 = getrec(jp, 1, lo < 0))
5304887Schin 				{
5314887Schin 					n2 = jp->file[1].fieldlen;
5324887Schin 					continue;
5334887Schin 				}
5344887Schin #if DEBUG_TRACE
5354887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
5364887Schin #endif
5374887Schin 			}
5384887Schin 			else if (cmp > 0)
5394887Schin 			{
5404887Schin 				if (same)
5414887Schin 				{
5424887Schin 					same = 0;
5434887Schin 				next:
5444887Schin 					if (n2 > jp->samesize)
5454887Schin 					{
5464887Schin 						jp->samesize = roundof(n2, 16);
5474887Schin 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
5484887Schin 						{
5494887Schin 							error(ERROR_SYSTEM|2, "out of space");
5504887Schin 							return -1;
5514887Schin 						}
5524887Schin 					}
5534887Schin 					memcpy(jp->same, cp2, o2 = n2);
5544887Schin 					if (!(cp2 = getrec(jp, 1, 0)))
5554887Schin 						break;
5564887Schin 					n2 = jp->file[1].fieldlen;
5574887Schin 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
5584887Schin 						goto next;
5594887Schin 					continue;
5604887Schin 				}
5614887Schin 				if (hi >= 0)
5624887Schin 				{
5634887Schin 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
5644887Schin 					{
5654887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
5664887Schin 						return -1;
5674887Schin 					}
5684887Schin 					hi = -1;
5694887Schin 				}
5704887Schin 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
5714887Schin 					return -1;
5724887Schin 				lo = -1;
5734887Schin 				if (cp2 = getrec(jp, 1, 1))
5744887Schin 				{
5754887Schin 					n2 = jp->file[1].fieldlen;
5764887Schin 					continue;
5774887Schin 				}
5784887Schin #if DEBUG_TRACE
5794887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
5804887Schin #endif
5814887Schin 			}
5824887Schin 			else if (same)
5834887Schin 			{
5844887Schin 				same = 0;
5854887Schin 				if (!(cp1 = getrec(jp, 0, 0)))
5864887Schin 					break;
5874887Schin 				n1 = jp->file[0].fieldlen;
5884887Schin 				continue;
5894887Schin 			}
5904887Schin 			if (lo >= 0)
5914887Schin 			{
5924887Schin 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
5934887Schin 				    (hi -= jp->file[1].reclen) < 0 ||
5944887Schin 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
5954887Schin 				    !(cp2 = getrec(jp, 1, 0)))
5964887Schin 				{
5974887Schin 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
5984887Schin 					return -1;
5994887Schin 				}
6004887Schin 				n2 = jp->file[1].fieldlen;
6014887Schin 				lo = -1;
6024887Schin 				if (jp->file[1].discard)
6034887Schin 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
6044887Schin 			}
6054887Schin 			else if (!cp2)
6064887Schin 				break;
6074887Schin 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
6084887Schin 				return -1;
6094887Schin 			if (!(cp1 = getrec(jp, 0, 1)))
6104887Schin 				break;
6114887Schin 			n1 = jp->file[0].fieldlen;
6124887Schin 		}
6134887Schin 	}
6144887Schin #if DEBUG_TRACE
6154887Schin sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
6164887Schin #endif
6174887Schin 	if (cp2)
6184887Schin 	{
6194887Schin 		if (hi >= 0 &&
6204887Schin 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
6214887Schin 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
6224887Schin 		{
6234887Schin 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
6244887Schin 			return -1;
6254887Schin 		}
6264887Schin #if DEBUG_TRACE
6274887Schin sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
6284887Schin #endif
6294887Schin 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
6304887Schin 		cmp = 1;
6314887Schin 		n = 1;
6324887Schin 	}
6334887Schin 	else
6344887Schin 	{
6354887Schin 		cmp = -1;
6364887Schin 		n = 0;
6374887Schin 	}
6384887Schin #if DEBUG_TRACE
6394887Schin sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
6404887Schin #endif
6414887Schin 	if (!cp1 || !(jp->outmode & (1<<n)))
6424887Schin 	{
6434887Schin 		if (cp1 && jp->file[n].iop == sfstdin)
6444887Schin 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
6454887Schin 		return 0;
6464887Schin 	}
6474887Schin 	if (outrec(jp, cmp) < 0)
6484887Schin 		return -1;
6494887Schin 	do
6504887Schin 	{
6514887Schin 		if (!getrec(jp, n, 1))
6524887Schin 			return 0;
6534887Schin 	} while (outrec(jp, cmp) >= 0);
6544887Schin 	return -1;
6554887Schin }
6564887Schin 
6574887Schin int
6584887Schin b_join(int argc, char** argv, void* context)
6594887Schin {
6604887Schin 	register int		n;
6614887Schin 	register char*		cp;
6624887Schin 	register Join_t*	jp;
6634887Schin 	char*			e;
6644887Schin 
6654887Schin #if !DEBUG_TRACE
6664887Schin 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
6674887Schin #endif
6684887Schin 	if (!(jp = init()))
6694887Schin 		error(ERROR_system(1),"out of space");
670*8462SApril.Chin@Sun.COM 	jp->context = context;
6714887Schin 	for (;;)
6724887Schin 	{
6734887Schin 		switch (n = optget(argv, usage))
6744887Schin 		{
6754887Schin 		case 0:
6764887Schin 			break;
6774887Schin  		case 'j':
6784887Schin 			/*
6794887Schin 			 * check for obsolete "-j1 field" and "-j2 field"
6804887Schin 			 */
6814887Schin 
6824887Schin 			if (opt_info.offset == 0)
6834887Schin 			{
6844887Schin 				cp = argv[opt_info.index - 1];
6854887Schin 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
6864887Schin 				n = cp[n] == 'j';
6874887Schin 			}
6884887Schin 			else
6894887Schin 				n = 0;
6904887Schin 			if (n)
6914887Schin 			{
6924887Schin 				if (opt_info.num!=1 && opt_info.num!=2)
6934887Schin 					error(2,"-jfileno field: fileno must be 1 or 2");
6944887Schin 				n = '0' + opt_info.num;
6954887Schin 				if (!(cp = argv[opt_info.index]))
6964887Schin 				{
6974887Schin 					argc = 0;
6984887Schin 					break;
6994887Schin 				}
7004887Schin 				opt_info.num = strtol(cp, &e, 10);
7014887Schin 				if (*e)
7024887Schin 				{
7034887Schin 					argc = 0;
7044887Schin 					break;
7054887Schin 				}
7064887Schin 				opt_info.index++;
7074887Schin 			}
7084887Schin 			else
7094887Schin 			{
7104887Schin 				jp->file[0].field = (int)(opt_info.num-1);
7114887Schin 				n = '2';
7124887Schin 			}
7134887Schin 			/*FALLTHROUGH*/
7144887Schin  		case '1':
7154887Schin 		case '2':
7164887Schin 			if (opt_info.num <=0)
7174887Schin 				error(2,"field number must positive");
7184887Schin 			jp->file[n-'1'].field = (int)(opt_info.num-1);
7194887Schin 			continue;
7204887Schin 		case 'v':
7214887Schin 			jp->outmode &= ~C_COMMON;
7224887Schin 			/*FALLTHROUGH*/
7234887Schin 		case 'a':
7244887Schin 			if (opt_info.num!=1 && opt_info.num!=2)
7254887Schin 				error(2,"%s: file number must be 1 or 2", opt_info.name);
7264887Schin 			jp->outmode |= 1<<(opt_info.num-1);
7274887Schin 			continue;
7284887Schin 		case 'e':
7294887Schin 			jp->nullfield = opt_info.arg;
7304887Schin 			continue;
7314887Schin 		case 'o':
7324887Schin 			/* need to accept obsolescent command syntax */
7334887Schin 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
7344887Schin 			opt_info.index += n;
7354887Schin 			continue;
7364887Schin 		case 't':
7374887Schin 			jp->state[' '] = jp->state['\t'] = 0;
7384887Schin 			n= *(unsigned char*)opt_info.arg;
7394887Schin 			jp->state[n] = S_DELIM;
7404887Schin 			jp->delim = n;
7414887Schin 			continue;
7424887Schin 		case 'i':
7434887Schin 			jp->ignorecase = !opt_info.num;
7444887Schin 			continue;
7454887Schin 		case 'B':
7464887Schin 			jp->buffered = !opt_info.num;
7474887Schin 			continue;
7484887Schin 		case ':':
7494887Schin 			error(2, "%s", opt_info.arg);
7504887Schin 			break;
7514887Schin 		case '?':
7524887Schin 			done(jp);
7534887Schin 			error(ERROR_usage(2), "%s", opt_info.arg);
7544887Schin 			break;
7554887Schin 		}
7564887Schin 		break;
7574887Schin 	}
7584887Schin 	argv += opt_info.index;
7594887Schin 	argc -= opt_info.index;
7604887Schin 	if (error_info.errors || argc!=2)
7614887Schin 	{
7624887Schin 		done(jp);
7634887Schin 		error(ERROR_usage(2),"%s", optusage(NiL));
7644887Schin 	}
7654887Schin 	jp->ooutmode = jp->outmode;
7664887Schin 	jp->file[0].name = cp = *argv++;
7674887Schin 	if (streq(cp,"-"))
7684887Schin 	{
7694887Schin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
7704887Schin 		{
7714887Schin 			if (sfdcseekable(sfstdin))
7724887Schin 				error(ERROR_warn(0),"%s: seek may fail",cp);
7734887Schin 			else
7744887Schin 				jp->file[0].discard = 1;
7754887Schin 		}
7764887Schin 		jp->file[0].iop = sfstdin;
7774887Schin 	}
7784887Schin 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
7794887Schin 	{
7804887Schin 		done(jp);
7814887Schin 		error(ERROR_system(1),"%s: cannot open",cp);
7824887Schin 	}
7834887Schin 	jp->file[1].name = cp = *argv;
7844887Schin 	if (streq(cp,"-"))
7854887Schin 	{
7864887Schin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
7874887Schin 		{
7884887Schin 			if (sfdcseekable(sfstdin))
7894887Schin 				error(ERROR_warn(0),"%s: seek may fail",cp);
7904887Schin 			else
7914887Schin 				jp->file[1].discard = 1;
7924887Schin 		}
7934887Schin 		jp->file[1].iop = sfstdin;
7944887Schin 	}
7954887Schin 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
7964887Schin 	{
7974887Schin 		done(jp);
7984887Schin 		error(ERROR_system(1),"%s: cannot open",cp);
7994887Schin 	}
8004887Schin 	if (jp->buffered)
8014887Schin 	{
8024887Schin 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
8034887Schin 		sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND);
8044887Schin 	}
8054887Schin 	jp->state['\n'] = S_NL;
8064887Schin 	jp->outfile = sfstdout;
8074887Schin 	if (!jp->outlist)
8084887Schin 		jp->nullfield = 0;
8094887Schin 	if (join(jp) < 0)
8104887Schin 	{
8114887Schin 		done(jp);
8124887Schin 		error(ERROR_system(1),"write error");
8134887Schin 	}
8144887Schin 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
8154887Schin 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
8164887Schin 	done(jp);
8174887Schin 	return error_info.errors;
8184887Schin }
819