xref: /openbsd-src/usr.bin/uniq/uniq.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: uniq.c,v 1.24 2015/12/19 10:21:01 schwarze Exp $	*/
2 /*	$NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Case Larsen.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <errno.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include <wchar.h>
46 #include <wctype.h>
47 
48 #define	MAXLINELEN	(8 * 1024)
49 
50 int cflag, dflag, uflag;
51 int numchars, numfields, repeats;
52 
53 FILE	*file(char *, char *);
54 void	 show(FILE *, char *);
55 char	*skip(char *);
56 void	 obsolete(char *[]);
57 __dead void	usage(void);
58 
59 int
60 main(int argc, char *argv[])
61 {
62 	char *t1, *t2;
63 	FILE *ifp = NULL, *ofp = NULL;
64 	int ch;
65 	char *prevline, *thisline;
66 
67 	setlocale(LC_CTYPE, "");
68 
69 	if (pledge("stdio rpath wpath cpath", NULL) == -1)
70 		err(1, "pledge");
71 
72 	obsolete(argv);
73 	while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) {
74 		const char *errstr;
75 
76 		switch (ch) {
77 		case 'c':
78 			cflag = 1;
79 			break;
80 		case 'd':
81 			dflag = 1;
82 			break;
83 		case 'f':
84 			numfields = (int)strtonum(optarg, 0, INT_MAX,
85 			    &errstr);
86 			if (errstr)
87 				errx(1, "field skip value is %s: %s",
88 				    errstr, optarg);
89 			break;
90 		case 's':
91 			numchars = (int)strtonum(optarg, 0, INT_MAX,
92 			    &errstr);
93 			if (errstr)
94 				errx(1,
95 				    "character skip value is %s: %s",
96 				    errstr, optarg);
97 			break;
98 		case 'u':
99 			uflag = 1;
100 			break;
101 		default:
102 			usage();
103 		}
104 	}
105 
106 	argc -= optind;
107 	argv += optind;
108 
109 	/* If neither -d nor -u are set, default is -d -u. */
110 	if (!dflag && !uflag)
111 		dflag = uflag = 1;
112 
113 	switch(argc) {
114 	case 0:
115 		ifp = stdin;
116 		ofp = stdout;
117 		break;
118 	case 1:
119 		ifp = file(argv[0], "r");
120 		ofp = stdout;
121 		break;
122 	case 2:
123 		ifp = file(argv[0], "r");
124 		ofp = file(argv[1], "w");
125 		break;
126 	default:
127 		usage();
128 	}
129 
130 	if (pledge("stdio", NULL) == -1)
131 		err(1, "pledge");
132 
133 	prevline = malloc(MAXLINELEN);
134 	thisline = malloc(MAXLINELEN);
135 	if (prevline == NULL || thisline == NULL)
136 		err(1, "malloc");
137 
138 	if (fgets(prevline, MAXLINELEN, ifp) == NULL)
139 		exit(0);
140 
141 	while (fgets(thisline, MAXLINELEN, ifp)) {
142 		/* If requested get the chosen fields + character offsets. */
143 		if (numfields || numchars) {
144 			t1 = skip(thisline);
145 			t2 = skip(prevline);
146 		} else {
147 			t1 = thisline;
148 			t2 = prevline;
149 		}
150 
151 		/* If different, print; set previous to new value. */
152 		if (strcmp(t1, t2)) {
153 			show(ofp, prevline);
154 			t1 = prevline;
155 			prevline = thisline;
156 			thisline = t1;
157 			repeats = 0;
158 		} else
159 			++repeats;
160 	}
161 	show(ofp, prevline);
162 	exit(0);
163 }
164 
165 /*
166  * show --
167  *	Output a line depending on the flags and number of repetitions
168  *	of the line.
169  */
170 void
171 show(FILE *ofp, char *str)
172 {
173 	if ((dflag && repeats) || (uflag && !repeats)) {
174 		if (cflag)
175 			(void)fprintf(ofp, "%4d %s", repeats + 1, str);
176 		else
177 			(void)fprintf(ofp, "%s", str);
178 	}
179 }
180 
181 char *
182 skip(char *str)
183 {
184 	wchar_t wc;
185 	int nchars, nfields;
186 	int len;
187 	int field_started;
188 
189 	for (nfields = numfields; nfields && *str; nfields--) {
190 		/* Skip one field, including preceding blanks. */
191 		for (field_started = 0; *str != '\0'; str += len) {
192 			if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) {
193 				(void)mbtowc(NULL, NULL, MB_CUR_MAX);
194 				wc = L'?';
195 				len = 1;
196 			}
197 			if (iswblank(wc)) {
198 				if (field_started)
199 					break;
200 			} else
201 				field_started = 1;
202 		}
203 	}
204 
205 	/* Skip some additional characters. */
206 	for (nchars = numchars; nchars-- && *str != '\0'; str += len)
207 		if ((len = mblen(str, MB_CUR_MAX)) == -1)
208 			len = 1;
209 
210 	return (str);
211 }
212 
213 FILE *
214 file(char *name, char *mode)
215 {
216 	FILE *fp;
217 
218 	if (strcmp(name, "-") == 0)
219 		return(*mode == 'r' ? stdin : stdout);
220 	if ((fp = fopen(name, mode)) == NULL)
221 		err(1, "%s", name);
222 	return (fp);
223 }
224 
225 void
226 obsolete(char *argv[])
227 {
228 	size_t len;
229 	char *ap, *p, *start;
230 
231 	while ((ap = *++argv)) {
232 		/* Return if "--" or not an option of any form. */
233 		if (ap[0] != '-') {
234 			if (ap[0] != '+')
235 				return;
236 		} else if (ap[1] == '-')
237 			return;
238 		if (!isdigit((unsigned char)ap[1]))
239 			continue;
240 		/*
241 		 * Digit signifies an old-style option.  Malloc space for dash,
242 		 * new option and argument.
243 		 */
244 		len = strlen(ap) + 3;
245 		if ((start = p = malloc(len)) == NULL)
246 			err(1, "malloc");
247 		*p++ = '-';
248 		*p++ = ap[0] == '+' ? 's' : 'f';
249 		(void)strlcpy(p, ap + 1, len - 2);
250 		*argv = start;
251 	}
252 }
253 
254 __dead void
255 usage(void)
256 {
257 	extern char *__progname;
258 
259 	(void)fprintf(stderr,
260 	    "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n",
261 	    __progname);
262 	exit(1);
263 }
264