xref: /onnv-gate/usr/src/lib/libcmd/common/join.c (revision 4887)
1*4887Schin /***********************************************************************
2*4887Schin *                                                                      *
3*4887Schin *               This software is part of the ast package               *
4*4887Schin *           Copyright (c) 1992-2007 AT&T Knowledge Ventures            *
5*4887Schin *                      and is licensed under the                       *
6*4887Schin *                  Common Public License, Version 1.0                  *
7*4887Schin *                      by AT&T Knowledge Ventures                      *
8*4887Schin *                                                                      *
9*4887Schin *                A copy of the License is available at                 *
10*4887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
11*4887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*4887Schin *                                                                      *
13*4887Schin *              Information and Software Systems Research               *
14*4887Schin *                            AT&T Research                             *
15*4887Schin *                           Florham Park NJ                            *
16*4887Schin *                                                                      *
17*4887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
18*4887Schin *                  David Korn <dgk@research.att.com>                   *
19*4887Schin *                                                                      *
20*4887Schin ***********************************************************************/
21*4887Schin #pragma prototyped
22*4887Schin /*
23*4887Schin  * David Korn
24*4887Schin  * Glenn Fowler
25*4887Schin  * AT&T Research
26*4887Schin  *
27*4887Schin  * join
28*4887Schin  */
29*4887Schin 
30*4887Schin static const char usage[] =
31*4887Schin "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]"
32*4887Schin USAGE_LICENSE
33*4887Schin "[+NAME?join - relational database operator]"
34*4887Schin "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35*4887Schin 	"and \afile2\a and writes the resulting joined files to standard "
36*4887Schin 	"output.  By default, a field is delimited by one or more spaces "
37*4887Schin 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38*4887Schin 	"can be used to change the field delimiter.]"
39*4887Schin "[+?The \ajoin field\a is a field in each file on which files are compared. "
40*4887Schin 	"By default \bjoin\b writes one line in the output for each pair "
41*4887Schin 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42*4887Schin 	"fields.  The default output line consists of the join field, "
43*4887Schin 	"then the remaining fields from \afile1\a, then the remaining "
44*4887Schin 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45*4887Schin 	"option.  The \b-a\b option can be used to add unmatched lines "
46*4887Schin 	"to the output.  The \b-v\b option can be used to output only "
47*4887Schin 	"unmatched lines.]"
48*4887Schin "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49*4887Schin 	"sequence of \bsort -b\b on the fields on which they are to be "
50*4887Schin 	"joined otherwise the results are unspecified.]"
51*4887Schin "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52*4887Schin         "uses standard input starting at the current location.]"
53*4887Schin 
54*4887Schin "[e:empty]:[string?Replace empty output fields in the list selected with"
55*4887Schin "	\b-o\b with \astring\a.]"
56*4887Schin "[o:output]:[list?Construct the output line to comprise the fields specified "
57*4887Schin 	"in a blank or comma separated list \alist\a.  Each element in "
58*4887Schin 	"\alist\a consists of a file number (either 1 or 2), a period, "
59*4887Schin 	"and a field number or \b0\b representing the join field.  "
60*4887Schin 	"As an obsolete feature multiple occurrences of \b-o\b can "
61*4887Schin 	"be specified.]"
62*4887Schin "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63*4887Schin "	and output.]"
64*4887Schin "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65*4887Schin "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66*4887Schin "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67*4887Schin "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68*4887Schin "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69*4887Schin "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70*4887Schin 	"all unpairable lines will be output.]"
71*4887Schin "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72*4887Schin "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73*4887Schin 	"output.  If \b-v\b options appear for both 1 and 2, then "
74*4887Schin 	"all unpairable lines will be output.] ]"
75*4887Schin "[i:ignorecase?Ignore case in field comparisons.]"
76*4887Schin "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77*4887Schin 
78*4887Schin "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79*4887Schin "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80*4887Schin "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81*4887Schin "	equivalent to \b-2\b \afield\a.]"
82*4887Schin 
83*4887Schin "\n"
84*4887Schin "\nfile1 file2\n"
85*4887Schin "\n"
86*4887Schin "[+EXIT STATUS?]{"
87*4887Schin 	"[+0?Both files processed successfully.]"
88*4887Schin 	"[+>0?An error occurred.]"
89*4887Schin "}"
90*4887Schin "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91*4887Schin ;
92*4887Schin 
93*4887Schin #include <cmd.h>
94*4887Schin #include <sfdisc.h>
95*4887Schin 
96*4887Schin #define C_FILE1		001
97*4887Schin #define C_FILE2		002
98*4887Schin #define C_COMMON	004
99*4887Schin #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
100*4887Schin 
101*4887Schin #define NFIELD		10
102*4887Schin #define JOINFIELD	2
103*4887Schin 
104*4887Schin #define S_DELIM		1
105*4887Schin #define S_SPACE		2
106*4887Schin #define S_NL		3
107*4887Schin 
108*4887Schin typedef struct
109*4887Schin {
110*4887Schin 	Sfio_t*		iop;
111*4887Schin 	char*		name;
112*4887Schin 	char*		recptr;
113*4887Schin 	int		reclen;
114*4887Schin 	int		field;
115*4887Schin 	int		fieldlen;
116*4887Schin 	int		nfields;
117*4887Schin 	int		maxfields;
118*4887Schin 	int		spaces;
119*4887Schin 	int		hit;
120*4887Schin 	int		discard;
121*4887Schin 	char**		fieldlist;
122*4887Schin } File_t;
123*4887Schin 
124*4887Schin typedef struct
125*4887Schin {
126*4887Schin 	unsigned char	state[1<<CHAR_BIT];
127*4887Schin 	Sfio_t*		outfile;
128*4887Schin 	int*		outlist;
129*4887Schin 	int		outmode;
130*4887Schin 	int		ooutmode;
131*4887Schin 	char*		nullfield;
132*4887Schin 	int		delim;
133*4887Schin 	int		buffered;
134*4887Schin 	int		ignorecase;
135*4887Schin 	char*		same;
136*4887Schin 	int		samesize;
137*4887Schin 	File_t		file[2];
138*4887Schin } Join_t;
139*4887Schin 
140*4887Schin static void
141*4887Schin done(register Join_t* jp)
142*4887Schin {
143*4887Schin 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
144*4887Schin 		sfclose(jp->file[0].iop);
145*4887Schin 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
146*4887Schin 		sfclose(jp->file[1].iop);
147*4887Schin 	if (jp->outlist)
148*4887Schin 		free(jp->outlist);
149*4887Schin 	if (jp->file[0].fieldlist)
150*4887Schin 		free(jp->file[0].fieldlist);
151*4887Schin 	if (jp->file[1].fieldlist)
152*4887Schin 		free(jp->file[1].fieldlist);
153*4887Schin 	if (jp->same)
154*4887Schin 		free(jp->same);
155*4887Schin 	free(jp);
156*4887Schin }
157*4887Schin 
158*4887Schin static Join_t*
159*4887Schin init(void)
160*4887Schin {
161*4887Schin 	register Join_t*	jp;
162*4887Schin 
163*4887Schin 	if (jp = newof(0, Join_t, 1, 0))
164*4887Schin 	{
165*4887Schin 		jp->state[' '] = jp->state['\t'] = S_SPACE;
166*4887Schin 		jp->delim = -1;
167*4887Schin 		jp->nullfield = 0;
168*4887Schin 		if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) ||
169*4887Schin 		    !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0)))
170*4887Schin 		{
171*4887Schin 			done(jp);
172*4887Schin 			return 0;
173*4887Schin 		}
174*4887Schin 		jp->file[0].maxfields = NFIELD;
175*4887Schin 		jp->file[1].maxfields = NFIELD;
176*4887Schin 		jp->outmode = C_COMMON;
177*4887Schin 	}
178*4887Schin 	return jp;
179*4887Schin }
180*4887Schin 
181*4887Schin static int
182*4887Schin getolist(Join_t* jp, const char* first, char** arglist)
183*4887Schin {
184*4887Schin 	register const char*	cp = first;
185*4887Schin 	char**			argv = arglist;
186*4887Schin 	register int		c;
187*4887Schin 	int*			outptr;
188*4887Schin 	int*			outmax;
189*4887Schin 	int			nfield = NFIELD;
190*4887Schin 	char*			str;
191*4887Schin 
192*4887Schin 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
193*4887Schin 	outmax = outptr + NFIELD;
194*4887Schin 	while (c = *cp++)
195*4887Schin 	{
196*4887Schin 		if (c==' ' || c=='\t' || c==',')
197*4887Schin 			continue;
198*4887Schin 		str = (char*)--cp;
199*4887Schin 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
200*4887Schin 		{
201*4887Schin 			str++;
202*4887Schin 			c = JOINFIELD;
203*4887Schin 			goto skip;
204*4887Schin 		}
205*4887Schin 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
206*4887Schin 		{
207*4887Schin 			error(2,"%s: invalid field list",first);
208*4887Schin 			break;
209*4887Schin 		}
210*4887Schin 		c--;
211*4887Schin 		c <<=2;
212*4887Schin 		if (*cp=='2')
213*4887Schin 			c |=1;
214*4887Schin 	skip:
215*4887Schin 		if (outptr >= outmax)
216*4887Schin 		{
217*4887Schin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
218*4887Schin 			outptr = jp->outlist + nfield;
219*4887Schin 			nfield *= 2;
220*4887Schin 			outmax = jp->outlist + nfield;
221*4887Schin 		}
222*4887Schin 		*outptr++ = c;
223*4887Schin 		cp = str;
224*4887Schin 	}
225*4887Schin 	/* need to accept obsolescent command syntax */
226*4887Schin 	while (1)
227*4887Schin 	{
228*4887Schin 		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
229*4887Schin 		{
230*4887Schin 			if (*cp=='0' && cp[1]==0)
231*4887Schin 			{
232*4887Schin 				c = JOINFIELD;
233*4887Schin 				goto skip2;
234*4887Schin 			}
235*4887Schin 			break;
236*4887Schin 		}
237*4887Schin 		str = (char*)cp;
238*4887Schin 		c = strtol(cp+2, &str,10);
239*4887Schin 		if (*str || --c<0)
240*4887Schin 			break;
241*4887Schin 		argv++;
242*4887Schin 		c <<= 2;
243*4887Schin 		if (*cp=='2')
244*4887Schin 			c |=1;
245*4887Schin 	skip2:
246*4887Schin 		if (outptr >= outmax)
247*4887Schin 		{
248*4887Schin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
249*4887Schin 			outptr = jp->outlist + nfield;
250*4887Schin 			nfield *= 2;
251*4887Schin 			outmax = jp->outlist + nfield;
252*4887Schin 		}
253*4887Schin 		*outptr++ = c;
254*4887Schin 	}
255*4887Schin 	*outptr = -1;
256*4887Schin 	return argv-arglist;
257*4887Schin }
258*4887Schin 
259*4887Schin /*
260*4887Schin  * read in a record from file <index> and split into fields
261*4887Schin  */
262*4887Schin static unsigned char*
263*4887Schin getrec(Join_t* jp, int index, int discard)
264*4887Schin {
265*4887Schin 	register unsigned char*	sp = jp->state;
266*4887Schin 	register File_t*	fp = &jp->file[index];
267*4887Schin 	register char**		ptr = fp->fieldlist;
268*4887Schin 	register char**		ptrmax = ptr + fp->maxfields;
269*4887Schin 	register char*		cp;
270*4887Schin 	register int		n = 0;
271*4887Schin 
272*4887Schin 	if (cmdquit())
273*4887Schin 		return 0;
274*4887Schin 	if (discard && fp->discard)
275*4887Schin 		sfraise(fp->iop, SFSK_DISCARD, NiL);
276*4887Schin 	fp->spaces = 0;
277*4887Schin 	fp->hit = 0;
278*4887Schin 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
279*4887Schin 	{
280*4887Schin 		jp->outmode &= ~(1<<index);
281*4887Schin 		return 0;
282*4887Schin 	}
283*4887Schin 	fp->recptr = cp;
284*4887Schin 	fp->reclen = sfvalue(fp->iop);
285*4887Schin 	if (jp->delim=='\n')	/* handle new-line delimiter specially */
286*4887Schin 	{
287*4887Schin 		*ptr++ = cp;
288*4887Schin 		cp += fp->reclen;
289*4887Schin 	}
290*4887Schin 	else while (n!=S_NL) /* separate into fields */
291*4887Schin 	{
292*4887Schin 		if (ptr >= ptrmax)
293*4887Schin 		{
294*4887Schin 			n = 2*fp->maxfields;
295*4887Schin 			fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0);
296*4887Schin 			ptr = fp->fieldlist + fp->maxfields;
297*4887Schin 			fp->maxfields = n;
298*4887Schin 			ptrmax = fp->fieldlist+n;
299*4887Schin 		}
300*4887Schin 		*ptr++ = cp;
301*4887Schin 		if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE)
302*4887Schin 		{
303*4887Schin 			fp->spaces = 1;
304*4887Schin 			while (sp[*(unsigned char*)cp++]==S_SPACE);
305*4887Schin 			cp--;
306*4887Schin 		}
307*4887Schin 		while ((n=sp[*(unsigned char*)cp++])==0);
308*4887Schin 	}
309*4887Schin 	*ptr = cp;
310*4887Schin 	fp->nfields = ptr - fp->fieldlist;
311*4887Schin 	if ((n=fp->field) < fp->nfields)
312*4887Schin 	{
313*4887Schin 		cp = fp->fieldlist[n];
314*4887Schin 		/* eliminate leading spaces */
315*4887Schin 		if (fp->spaces)
316*4887Schin 		{
317*4887Schin 			while (sp[*(unsigned char*)cp++]==S_SPACE);
318*4887Schin 			cp--;
319*4887Schin 		}
320*4887Schin 		fp->fieldlen = (fp->fieldlist[n+1]-cp)-1;
321*4887Schin 		return (unsigned char*)cp;
322*4887Schin 	}
323*4887Schin 	fp->fieldlen = 0;
324*4887Schin 	return (unsigned char*)"";
325*4887Schin }
326*4887Schin 
327*4887Schin #if DEBUG_TRACE
328*4887Schin static unsigned char* u1,u2,u3;
329*4887Schin #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
330*4887Schin #endif
331*4887Schin 
332*4887Schin /*
333*4887Schin  * print field <n> from file <index>
334*4887Schin  */
335*4887Schin static int
336*4887Schin outfield(Join_t* jp, int index, register int n, int last)
337*4887Schin {
338*4887Schin 	register File_t*	fp = &jp->file[index];
339*4887Schin 	register char*		cp;
340*4887Schin 	register char*		cpmax;
341*4887Schin 	register int		size;
342*4887Schin 	register Sfio_t*	iop = jp->outfile;
343*4887Schin 
344*4887Schin 	if (n < fp->nfields)
345*4887Schin 	{
346*4887Schin 		cp = fp->fieldlist[n];
347*4887Schin 		cpmax = fp->fieldlist[n+1];
348*4887Schin 	}
349*4887Schin 	else
350*4887Schin 		cp = 0;
351*4887Schin 	if ((n=jp->delim)<=0)
352*4887Schin 	{
353*4887Schin 		if (fp->spaces)
354*4887Schin 		{
355*4887Schin 			/*eliminate leading spaces */
356*4887Schin 			while (jp->state[*(unsigned char*)cp++]==S_SPACE);
357*4887Schin 			cp--;
358*4887Schin 		}
359*4887Schin 		n = ' ';
360*4887Schin 	}
361*4887Schin 	if (last)
362*4887Schin 		n = '\n';
363*4887Schin 	if (cp)
364*4887Schin 		size = cpmax-cp;
365*4887Schin 	else
366*4887Schin 		size = 0;
367*4887Schin 	if (size==0)
368*4887Schin 	{
369*4887Schin 		if (!jp->nullfield)
370*4887Schin 			sfputc(iop,n);
371*4887Schin 		else if (sfputr(iop,jp->nullfield,n) < 0)
372*4887Schin 			return -1;
373*4887Schin 	}
374*4887Schin 	else
375*4887Schin 	{
376*4887Schin 		last = cp[size-1];
377*4887Schin 		cp[size-1] = n;
378*4887Schin 		if (sfwrite(iop,cp,size) < 0)
379*4887Schin 			return -1;
380*4887Schin 		cp[size-1] = last;
381*4887Schin 	}
382*4887Schin 	return 0;
383*4887Schin }
384*4887Schin 
385*4887Schin #if DEBUG_TRACE
386*4887Schin static int i1,i2,i3;
387*4887Schin #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
388*4887Schin #endif
389*4887Schin 
390*4887Schin static int
391*4887Schin outrec(register Join_t* jp, int mode)
392*4887Schin {
393*4887Schin 	register File_t*	fp;
394*4887Schin 	register int		i;
395*4887Schin 	register int		j;
396*4887Schin 	register int		k;
397*4887Schin 	register int		n;
398*4887Schin 	int*			out;
399*4887Schin 
400*4887Schin 	if (mode < 0 && jp->file[0].hit++)
401*4887Schin 		return 0;
402*4887Schin 	if (mode > 0 && jp->file[1].hit++)
403*4887Schin 		return 0;
404*4887Schin 	if (out = jp->outlist)
405*4887Schin 	{
406*4887Schin 		while ((n = *out++) >= 0)
407*4887Schin 		{
408*4887Schin 			if (n == JOINFIELD)
409*4887Schin 			{
410*4887Schin 				i = mode >= 0;
411*4887Schin 				j = jp->file[i].field;
412*4887Schin 			}
413*4887Schin 			else
414*4887Schin 			{
415*4887Schin 				i = n & 1;
416*4887Schin 				j = (mode<0 && i || mode>0 && !i) ?
417*4887Schin 					jp->file[i].nfields :
418*4887Schin 					n >> 2;
419*4887Schin 			}
420*4887Schin 			if (outfield(jp, i, j, *out < 0) < 0)
421*4887Schin 				return -1;
422*4887Schin 		}
423*4887Schin 		return 0;
424*4887Schin 	}
425*4887Schin 	k = jp->file[0].nfields;
426*4887Schin 	if (mode >= 0)
427*4887Schin 		k += jp->file[1].nfields - 1;
428*4887Schin 	for (i=0; i<2; i++)
429*4887Schin 	{
430*4887Schin 		fp = &jp->file[i];
431*4887Schin 		if (mode>0 && i==0)
432*4887Schin 		{
433*4887Schin 			k -= (fp->nfields - 1);
434*4887Schin 			continue;
435*4887Schin 		}
436*4887Schin 		n = fp->field;
437*4887Schin 		if (mode||i==0)
438*4887Schin 		{
439*4887Schin 			/* output join field first */
440*4887Schin 			if (outfield(jp,i,n,!--k) < 0)
441*4887Schin 				return -1;
442*4887Schin 			if (!k)
443*4887Schin 				return 0;
444*4887Schin 			for (j=0; j<n; j++)
445*4887Schin 			{
446*4887Schin 				if (outfield(jp,i,j,!--k) < 0)
447*4887Schin 					return -1;
448*4887Schin 				if (!k)
449*4887Schin 					return 0;
450*4887Schin 			}
451*4887Schin 			j = n + 1;
452*4887Schin 		}
453*4887Schin 		else
454*4887Schin 			j = 0;
455*4887Schin 		for (;j<fp->nfields; j++)
456*4887Schin 		{
457*4887Schin 			if (j!=n && outfield(jp,i,j,!--k) < 0)
458*4887Schin 				return -1;
459*4887Schin 			if (!k)
460*4887Schin 				return 0;
461*4887Schin 		}
462*4887Schin 	}
463*4887Schin 	return 0;
464*4887Schin }
465*4887Schin 
466*4887Schin #if DEBUG_TRACE
467*4887Schin #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
468*4887Schin #endif
469*4887Schin 
470*4887Schin static int
471*4887Schin join(Join_t* jp)
472*4887Schin {
473*4887Schin 	register unsigned char*	cp1;
474*4887Schin 	register unsigned char*	cp2;
475*4887Schin 	register int		n1;
476*4887Schin 	register int		n2;
477*4887Schin 	register int		n;
478*4887Schin 	register int		cmp;
479*4887Schin 	register int		same;
480*4887Schin 	int			o2;
481*4887Schin 	Sfoff_t			lo = -1;
482*4887Schin 	Sfoff_t			hi = -1;
483*4887Schin 
484*4887Schin 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
485*4887Schin 	{
486*4887Schin 		n1 = jp->file[0].fieldlen;
487*4887Schin 		n2 = jp->file[1].fieldlen;
488*4887Schin 		same = 0;
489*4887Schin 		for (;;)
490*4887Schin 		{
491*4887Schin 			n = n1 < n2 ? n1 : n2;
492*4887Schin #if DEBUG_TRACE
493*4887Schin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
494*4887Schin 				cmp = n1 - n2;
495*4887Schin sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
496*4887Schin 			if (!cmp)
497*4887Schin #else
498*4887Schin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
499*4887Schin #endif
500*4887Schin 			{
501*4887Schin 				if (!(jp->outmode & C_COMMON))
502*4887Schin 				{
503*4887Schin 					if (cp1 = getrec(jp, 0, 1))
504*4887Schin 					{
505*4887Schin 						n1 = jp->file[0].fieldlen;
506*4887Schin 						same = 1;
507*4887Schin 						continue;
508*4887Schin 					}
509*4887Schin 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
510*4887Schin 						break;
511*4887Schin 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
512*4887Schin 					{
513*4887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
514*4887Schin 						return -1;
515*4887Schin 					}
516*4887Schin 				}
517*4887Schin 				else if (outrec(jp, 0) < 0)
518*4887Schin 					return -1;
519*4887Schin 				else if (lo < 0 && (jp->outmode & C_COMMON))
520*4887Schin 				{
521*4887Schin 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
522*4887Schin 					{
523*4887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
524*4887Schin 						return -1;
525*4887Schin 					}
526*4887Schin 					lo -= jp->file[1].reclen;
527*4887Schin 				}
528*4887Schin 				if (cp2 = getrec(jp, 1, lo < 0))
529*4887Schin 				{
530*4887Schin 					n2 = jp->file[1].fieldlen;
531*4887Schin 					continue;
532*4887Schin 				}
533*4887Schin #if DEBUG_TRACE
534*4887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
535*4887Schin #endif
536*4887Schin 			}
537*4887Schin 			else if (cmp > 0)
538*4887Schin 			{
539*4887Schin 				if (same)
540*4887Schin 				{
541*4887Schin 					same = 0;
542*4887Schin 				next:
543*4887Schin 					if (n2 > jp->samesize)
544*4887Schin 					{
545*4887Schin 						jp->samesize = roundof(n2, 16);
546*4887Schin 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
547*4887Schin 						{
548*4887Schin 							error(ERROR_SYSTEM|2, "out of space");
549*4887Schin 							return -1;
550*4887Schin 						}
551*4887Schin 					}
552*4887Schin 					memcpy(jp->same, cp2, o2 = n2);
553*4887Schin 					if (!(cp2 = getrec(jp, 1, 0)))
554*4887Schin 						break;
555*4887Schin 					n2 = jp->file[1].fieldlen;
556*4887Schin 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
557*4887Schin 						goto next;
558*4887Schin 					continue;
559*4887Schin 				}
560*4887Schin 				if (hi >= 0)
561*4887Schin 				{
562*4887Schin 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
563*4887Schin 					{
564*4887Schin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
565*4887Schin 						return -1;
566*4887Schin 					}
567*4887Schin 					hi = -1;
568*4887Schin 				}
569*4887Schin 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
570*4887Schin 					return -1;
571*4887Schin 				lo = -1;
572*4887Schin 				if (cp2 = getrec(jp, 1, 1))
573*4887Schin 				{
574*4887Schin 					n2 = jp->file[1].fieldlen;
575*4887Schin 					continue;
576*4887Schin 				}
577*4887Schin #if DEBUG_TRACE
578*4887Schin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
579*4887Schin #endif
580*4887Schin 			}
581*4887Schin 			else if (same)
582*4887Schin 			{
583*4887Schin 				same = 0;
584*4887Schin 				if (!(cp1 = getrec(jp, 0, 0)))
585*4887Schin 					break;
586*4887Schin 				n1 = jp->file[0].fieldlen;
587*4887Schin 				continue;
588*4887Schin 			}
589*4887Schin 			if (lo >= 0)
590*4887Schin 			{
591*4887Schin 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
592*4887Schin 				    (hi -= jp->file[1].reclen) < 0 ||
593*4887Schin 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
594*4887Schin 				    !(cp2 = getrec(jp, 1, 0)))
595*4887Schin 				{
596*4887Schin 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
597*4887Schin 					return -1;
598*4887Schin 				}
599*4887Schin 				n2 = jp->file[1].fieldlen;
600*4887Schin 				lo = -1;
601*4887Schin 				if (jp->file[1].discard)
602*4887Schin 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
603*4887Schin 			}
604*4887Schin 			else if (!cp2)
605*4887Schin 				break;
606*4887Schin 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
607*4887Schin 				return -1;
608*4887Schin 			if (!(cp1 = getrec(jp, 0, 1)))
609*4887Schin 				break;
610*4887Schin 			n1 = jp->file[0].fieldlen;
611*4887Schin 		}
612*4887Schin 	}
613*4887Schin #if DEBUG_TRACE
614*4887Schin sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
615*4887Schin #endif
616*4887Schin 	if (cp2)
617*4887Schin 	{
618*4887Schin 		if (hi >= 0 &&
619*4887Schin 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
620*4887Schin 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
621*4887Schin 		{
622*4887Schin 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
623*4887Schin 			return -1;
624*4887Schin 		}
625*4887Schin #if DEBUG_TRACE
626*4887Schin sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
627*4887Schin #endif
628*4887Schin 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
629*4887Schin 		cmp = 1;
630*4887Schin 		n = 1;
631*4887Schin 	}
632*4887Schin 	else
633*4887Schin 	{
634*4887Schin 		cmp = -1;
635*4887Schin 		n = 0;
636*4887Schin 	}
637*4887Schin #if DEBUG_TRACE
638*4887Schin sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
639*4887Schin #endif
640*4887Schin 	if (!cp1 || !(jp->outmode & (1<<n)))
641*4887Schin 	{
642*4887Schin 		if (cp1 && jp->file[n].iop == sfstdin)
643*4887Schin 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
644*4887Schin 		return 0;
645*4887Schin 	}
646*4887Schin 	if (outrec(jp, cmp) < 0)
647*4887Schin 		return -1;
648*4887Schin 	do
649*4887Schin 	{
650*4887Schin 		if (!getrec(jp, n, 1))
651*4887Schin 			return 0;
652*4887Schin 	} while (outrec(jp, cmp) >= 0);
653*4887Schin 	return -1;
654*4887Schin }
655*4887Schin 
656*4887Schin int
657*4887Schin b_join(int argc, char** argv, void* context)
658*4887Schin {
659*4887Schin 	register int		n;
660*4887Schin 	register char*		cp;
661*4887Schin 	register Join_t*	jp;
662*4887Schin 	char*			e;
663*4887Schin 
664*4887Schin #if !DEBUG_TRACE
665*4887Schin 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
666*4887Schin #endif
667*4887Schin 	if (!(jp = init()))
668*4887Schin 		error(ERROR_system(1),"out of space");
669*4887Schin 	for (;;)
670*4887Schin 	{
671*4887Schin 		switch (n = optget(argv, usage))
672*4887Schin 		{
673*4887Schin 		case 0:
674*4887Schin 			break;
675*4887Schin  		case 'j':
676*4887Schin 			/*
677*4887Schin 			 * check for obsolete "-j1 field" and "-j2 field"
678*4887Schin 			 */
679*4887Schin 
680*4887Schin 			if (opt_info.offset == 0)
681*4887Schin 			{
682*4887Schin 				cp = argv[opt_info.index - 1];
683*4887Schin 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
684*4887Schin 				n = cp[n] == 'j';
685*4887Schin 			}
686*4887Schin 			else
687*4887Schin 				n = 0;
688*4887Schin 			if (n)
689*4887Schin 			{
690*4887Schin 				if (opt_info.num!=1 && opt_info.num!=2)
691*4887Schin 					error(2,"-jfileno field: fileno must be 1 or 2");
692*4887Schin 				n = '0' + opt_info.num;
693*4887Schin 				if (!(cp = argv[opt_info.index]))
694*4887Schin 				{
695*4887Schin 					argc = 0;
696*4887Schin 					break;
697*4887Schin 				}
698*4887Schin 				opt_info.num = strtol(cp, &e, 10);
699*4887Schin 				if (*e)
700*4887Schin 				{
701*4887Schin 					argc = 0;
702*4887Schin 					break;
703*4887Schin 				}
704*4887Schin 				opt_info.index++;
705*4887Schin 			}
706*4887Schin 			else
707*4887Schin 			{
708*4887Schin 				jp->file[0].field = (int)(opt_info.num-1);
709*4887Schin 				n = '2';
710*4887Schin 			}
711*4887Schin 			/*FALLTHROUGH*/
712*4887Schin  		case '1':
713*4887Schin 		case '2':
714*4887Schin 			if (opt_info.num <=0)
715*4887Schin 				error(2,"field number must positive");
716*4887Schin 			jp->file[n-'1'].field = (int)(opt_info.num-1);
717*4887Schin 			continue;
718*4887Schin 		case 'v':
719*4887Schin 			jp->outmode &= ~C_COMMON;
720*4887Schin 			/*FALLTHROUGH*/
721*4887Schin 		case 'a':
722*4887Schin 			if (opt_info.num!=1 && opt_info.num!=2)
723*4887Schin 				error(2,"%s: file number must be 1 or 2", opt_info.name);
724*4887Schin 			jp->outmode |= 1<<(opt_info.num-1);
725*4887Schin 			continue;
726*4887Schin 		case 'e':
727*4887Schin 			jp->nullfield = opt_info.arg;
728*4887Schin 			continue;
729*4887Schin 		case 'o':
730*4887Schin 			/* need to accept obsolescent command syntax */
731*4887Schin 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
732*4887Schin 			opt_info.index += n;
733*4887Schin 			continue;
734*4887Schin 		case 't':
735*4887Schin 			jp->state[' '] = jp->state['\t'] = 0;
736*4887Schin 			n= *(unsigned char*)opt_info.arg;
737*4887Schin 			jp->state[n] = S_DELIM;
738*4887Schin 			jp->delim = n;
739*4887Schin 			continue;
740*4887Schin 		case 'i':
741*4887Schin 			jp->ignorecase = !opt_info.num;
742*4887Schin 			continue;
743*4887Schin 		case 'B':
744*4887Schin 			jp->buffered = !opt_info.num;
745*4887Schin 			continue;
746*4887Schin 		case ':':
747*4887Schin 			error(2, "%s", opt_info.arg);
748*4887Schin 			break;
749*4887Schin 		case '?':
750*4887Schin 			done(jp);
751*4887Schin 			error(ERROR_usage(2), "%s", opt_info.arg);
752*4887Schin 			break;
753*4887Schin 		}
754*4887Schin 		break;
755*4887Schin 	}
756*4887Schin 	argv += opt_info.index;
757*4887Schin 	argc -= opt_info.index;
758*4887Schin 	if (error_info.errors || argc!=2)
759*4887Schin 	{
760*4887Schin 		done(jp);
761*4887Schin 		error(ERROR_usage(2),"%s", optusage(NiL));
762*4887Schin 	}
763*4887Schin 	jp->ooutmode = jp->outmode;
764*4887Schin 	jp->file[0].name = cp = *argv++;
765*4887Schin 	if (streq(cp,"-"))
766*4887Schin 	{
767*4887Schin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
768*4887Schin 		{
769*4887Schin 			if (sfdcseekable(sfstdin))
770*4887Schin 				error(ERROR_warn(0),"%s: seek may fail",cp);
771*4887Schin 			else
772*4887Schin 				jp->file[0].discard = 1;
773*4887Schin 		}
774*4887Schin 		jp->file[0].iop = sfstdin;
775*4887Schin 	}
776*4887Schin 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
777*4887Schin 	{
778*4887Schin 		done(jp);
779*4887Schin 		error(ERROR_system(1),"%s: cannot open",cp);
780*4887Schin 	}
781*4887Schin 	jp->file[1].name = cp = *argv;
782*4887Schin 	if (streq(cp,"-"))
783*4887Schin 	{
784*4887Schin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
785*4887Schin 		{
786*4887Schin 			if (sfdcseekable(sfstdin))
787*4887Schin 				error(ERROR_warn(0),"%s: seek may fail",cp);
788*4887Schin 			else
789*4887Schin 				jp->file[1].discard = 1;
790*4887Schin 		}
791*4887Schin 		jp->file[1].iop = sfstdin;
792*4887Schin 	}
793*4887Schin 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
794*4887Schin 	{
795*4887Schin 		done(jp);
796*4887Schin 		error(ERROR_system(1),"%s: cannot open",cp);
797*4887Schin 	}
798*4887Schin 	if (jp->buffered)
799*4887Schin 	{
800*4887Schin 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
801*4887Schin 		sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND);
802*4887Schin 	}
803*4887Schin 	jp->state['\n'] = S_NL;
804*4887Schin 	jp->outfile = sfstdout;
805*4887Schin 	if (!jp->outlist)
806*4887Schin 		jp->nullfield = 0;
807*4887Schin 	if (join(jp) < 0)
808*4887Schin 	{
809*4887Schin 		done(jp);
810*4887Schin 		error(ERROR_system(1),"write error");
811*4887Schin 	}
812*4887Schin 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
813*4887Schin 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
814*4887Schin 	done(jp);
815*4887Schin 	return error_info.errors;
816*4887Schin }
817