xref: /netbsd-src/usr.bin/sort/sort.h (revision eb7c1594f145c931049e1fd9eb056a5987e87e59)
1 /*	$NetBSD: sort.h,v 1.17 2003/08/07 11:32:34 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Peter McIlroy.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  *
70  *	@(#)sort.h	8.1 (Berkeley) 6/6/93
71  */
72 
73 #include <sys/param.h>
74 
75 #include <db.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <limits.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 
84 #define NBINS		256
85 
86 /* values for masks, weights, and other flags. */
87 #define I 1		/* mask out non-printable characters */
88 #define D 2		/* sort alphanumeric characters only */
89 #define N 4		/* Field is a number */
90 #define F 8		/* weight lower and upper case the same */
91 #define R 16		/* Field is reversed with respect to the global weight */
92 #define BI 32		/* ignore blanks in icol */
93 #define BT 64		/* ignore blanks in tcol */
94 
95 /* masks for delimiters: blanks, fields, and termination. */
96 #define BLANK 1		/* ' ', '\t'; '\n' if -T is invoked */
97 #define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
98 #define REC_D_F 4	/* '\n' default; from -T otherwise */
99 
100 #define ND 10	/* limit on number of -k options. */
101 
102 #define min(a, b) ((a) < (b) ? (a) : (b))
103 #define max(a, b) ((a) > (b) ? (a) : (b))
104 
105 #define	FCLOSE(file) {							\
106 	if (EOF == fclose(file))					\
107 		err(2, "%p", file);					\
108 }
109 
110 #define	EWRITE(ptr, size, n, f) {					\
111 	if (!fwrite(ptr, size, n, f))					\
112 		 err(2, NULL);						\
113 }
114 
115 /* length of record is currently limited to maximum string length (size_t) */
116 typedef size_t length_t;
117 
118 /* a record is a key/line pair starting at rec.data. It has a total length
119  * and an offset to the start of the line half of the pair.
120  */
121 typedef struct recheader {
122 	length_t length;
123 	length_t offset;
124 	u_char data[1];
125 } RECHEADER;
126 
127 typedef struct trecheader {
128 	length_t length;
129 	length_t offset;
130 } TRECHEADER;
131 
132 /* This is the column as seen by struct field.  It is used by enterfield.
133  * They are matched with corresponding coldescs during initialization.
134  */
135 struct column {
136 	struct coldesc *p;
137 	int num;
138 	int indent;
139 };
140 
141 /* a coldesc has a number and pointers to the beginning and end of the
142  * corresponding column in the current line.  This is determined in enterkey.
143  */
144 typedef struct coldesc {
145 	u_char *start;
146 	u_char *end;
147 	int num;
148 } COLDESC;
149 
150 /* A field has an initial and final column; an omitted final column
151  * implies the end of the line.  Flags regulate omission of blanks and
152  * numerical sorts; mask determines which characters are ignored (from -i, -d);
153  * weights determines the sort weights of a character (from -f, -r).
154  */
155 struct field {
156 	struct column icol;
157 	struct column tcol;
158 	u_int flags;
159 	u_char *mask;
160 	u_char *weights;
161 };
162 
163 struct filelist {
164 	const char * const * names;
165 };
166 
167 typedef int (*get_func_t)(int, int, struct filelist *, int,
168 		RECHEADER *, u_char *, struct field *);
169 typedef void (*put_func_t)(const struct recheader *, FILE *);
170 
171 extern int PANIC;	/* maximum depth of fsort before fmerge is called */
172 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
173 extern u_char d_mask[NBINS];
174 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
175 extern int REC_D;
176 extern const char *tmpdir;
177 extern int stable_sort;
178 extern u_char gweights[NBINS];
179 extern struct coldesc clist[(ND+1)*2];
180 extern int ncols;
181 
182 void	 append(const u_char **, int, int, FILE *,
183 	    void (*)(const RECHEADER *, FILE *), struct field *);
184 void	 concat(FILE *, FILE *);
185 length_t enterkey(RECHEADER *, DBT *, int, struct field *);
186 void	 fixit(int *, char **);
187 void	 fldreset(struct field *);
188 FILE	*ftmp(void);
189 void	 fmerge(int, int, struct filelist *, int,
190 		get_func_t, FILE *, put_func_t, struct field *);
191 void	 fsort(int, int, int, struct filelist *, int, FILE *,
192 		struct field *);
193 int	 geteasy(int, int, struct filelist *,
194 	    int, RECHEADER *, u_char *, struct field *);
195 int	 getnext(int, int, struct filelist *,
196 	    int, RECHEADER *, u_char *, struct field *);
197 int	 makekey(int, int, struct filelist *,
198 	    int, RECHEADER *, u_char *, struct field *);
199 int	 makeline(int, int, struct filelist *,
200 	    int, RECHEADER *, u_char *, struct field *);
201 void	 num_init(void);
202 void	 onepass(const u_char **, int, long, long *, u_char *, FILE *);
203 int	 optval(int, int);
204 void	 order(struct filelist *, get_func_t, struct field *);
205 void	 putline(const RECHEADER *, FILE *);
206 void	 putrec(const RECHEADER *, FILE *);
207 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
208 int	 setfield(const char *, struct field *, int);
209 void	 settables(int);
210