xref: /openbsd-src/usr.bin/uniq/uniq.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: uniq.c,v 1.7 2001/07/12 05:17:28 deraadt Exp $	*/
2 /*	$NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Case Larsen.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #ifndef lint
41 static char copyright[] =
42 "@(#) Copyright (c) 1989, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 
46 #ifndef lint
47 #if 0
48 static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
49 #endif
50 static char rcsid[] = "$OpenBSD: uniq.c,v 1.7 2001/07/12 05:17:28 deraadt Exp $";
51 #endif /* not lint */
52 
53 #include <errno.h>
54 #include <stdio.h>
55 #include <ctype.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 #include <err.h>
60 
61 #define	MAXLINELEN	(8 * 1024)
62 
63 int cflag, dflag, uflag;
64 int numchars, numfields, repeats;
65 
66 FILE	*file __P((char *, char *));
67 void	 show __P((FILE *, char *));
68 char	*skip __P((char *));
69 void	 obsolete __P((char *[]));
70 void	 usage __P((void));
71 
72 int
73 main (argc, argv)
74 	int argc;
75 	char *argv[];
76 {
77 	register char *t1, *t2;
78 	FILE *ifp, *ofp;
79 	int ch;
80 	char *prevline, *thisline, *p;
81 
82 	obsolete(argv);
83 	while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
84 		switch (ch) {
85 		case '-':
86 			--optind;
87 			goto done;
88 		case 'c':
89 			cflag = 1;
90 			break;
91 		case 'd':
92 			dflag = 1;
93 			break;
94 		case 'f':
95 			numfields = strtol(optarg, &p, 10);
96 			if (numfields < 0 || *p)
97 				errx(1, "illegal field skip value: %s", optarg);
98 			break;
99 		case 's':
100 			numchars = strtol(optarg, &p, 10);
101 			if (numchars < 0 || *p)
102 				errx(1, "illegal character skip value: %s", optarg);
103 			break;
104 		case 'u':
105 			uflag = 1;
106 			break;
107 		case '?':
108 		default:
109 			usage();
110 	}
111 
112 done:	argc -= optind;
113 	argv +=optind;
114 
115 	/* If no flags are set, default is -d -u. */
116 	if (cflag) {
117 		if (dflag || uflag)
118 			usage();
119 	} else if (!dflag && !uflag)
120 		dflag = uflag = 1;
121 
122 	switch(argc) {
123 	case 0:
124 		ifp = stdin;
125 		ofp = stdout;
126 		break;
127 	case 1:
128 		ifp = file(argv[0], "r");
129 		ofp = stdout;
130 		break;
131 	case 2:
132 		ifp = file(argv[0], "r");
133 		ofp = file(argv[1], "w");
134 		break;
135 	default:
136 		usage();
137 	}
138 
139 	prevline = malloc(MAXLINELEN);
140 	thisline = malloc(MAXLINELEN);
141 	if (prevline == NULL || thisline == NULL)
142 		err(1, "malloc");
143 
144 	if (fgets(prevline, MAXLINELEN, ifp) == NULL)
145 		exit(0);
146 
147 	while (fgets(thisline, MAXLINELEN, ifp)) {
148 		/* If requested get the chosen fields + character offsets. */
149 		if (numfields || numchars) {
150 			t1 = skip(thisline);
151 			t2 = skip(prevline);
152 		} else {
153 			t1 = thisline;
154 			t2 = prevline;
155 		}
156 
157 		/* If different, print; set previous to new value. */
158 		if (strcmp(t1, t2)) {
159 			show(ofp, prevline);
160 			t1 = prevline;
161 			prevline = thisline;
162 			thisline = t1;
163 			repeats = 0;
164 		} else
165 			++repeats;
166 	}
167 	show(ofp, prevline);
168 	exit(0);
169 }
170 
171 /*
172  * show --
173  *	Output a line depending on the flags and number of repetitions
174  *	of the line.
175  */
176 void
177 show(ofp, str)
178 	FILE *ofp;
179 	char *str;
180 {
181 
182 	if (cflag && *str)
183 		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
184 	if (dflag && repeats || uflag && !repeats)
185 		(void)fprintf(ofp, "%s", str);
186 }
187 
188 char *
189 skip(str)
190 	register char *str;
191 {
192 	register int infield, nchars, nfields;
193 
194 	for (nfields = numfields, infield = 0; nfields && *str; ++str)
195 		if (isspace(*str)) {
196 			if (infield) {
197 				infield = 0;
198 				--nfields;
199 			}
200 		} else if (!infield)
201 			infield = 1;
202 	for (nchars = numchars; nchars-- && *str; ++str);
203 	return(str);
204 }
205 
206 FILE *
207 file(name, mode)
208 	char *name, *mode;
209 {
210 	FILE *fp;
211 
212 	if ((fp = fopen(name, mode)) == NULL)
213 		err(1, "%s", name);
214 	return(fp);
215 }
216 
217 void
218 obsolete(argv)
219 	char *argv[];
220 {
221 	int len;
222 	char *ap, *p, *start;
223 
224 	while ((ap = *++argv)) {
225 		/* Return if "--" or not an option of any form. */
226 		if (ap[0] != '-') {
227 			if (ap[0] != '+')
228 				return;
229 		} else if (ap[1] == '-')
230 			return;
231 		if (!isdigit(ap[1]))
232 			continue;
233 		/*
234 		 * Digit signifies an old-style option.  Malloc space for dash,
235 		 * new option and argument.
236 		 */
237 		len = strlen(ap);
238 		if ((start = p = malloc(len + 3)) == NULL)
239 			err(1, "malloc");
240 		*p++ = '-';
241 		*p++ = ap[0] == '+' ? 's' : 'f';
242 		(void)strcpy(p, ap + 1);
243 		*argv = start;
244 	}
245 }
246 
247 void
248 usage()
249 {
250 	(void)fprintf(stderr,
251 	    "usage: uniq [-c | -du] [-f fields] [-s chars] [input [output]]\n");
252 	exit(1);
253 }
254