1 /* $NetBSD: uniq.c,v 1.15 2008/07/21 14:19:27 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Case Larsen. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: uniq.c,v 1.15 2008/07/21 14:19:27 lukem Exp $"); 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include <errno.h> 50 #include <stdio.h> 51 #include <ctype.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <unistd.h> 55 56 static int cflag, dflag, uflag; 57 static int numchars, numfields, repeats; 58 59 static FILE *file(const char *, const char *); 60 static void show(FILE *, const char *); 61 static const char *skip(const char *); 62 static void obsolete(char *[]); 63 static void usage(void) __dead; 64 65 int 66 main (int argc, char *argv[]) 67 { 68 const char *t1, *t2; 69 FILE *ifp, *ofp; 70 int ch; 71 char *prevline, *thisline, *p; 72 size_t prevlinesize, thislinesize, psize; 73 74 setprogname(argv[0]); 75 ifp = ofp = NULL; 76 obsolete(argv); 77 while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) 78 switch (ch) { 79 case '-': 80 --optind; 81 goto done; 82 case 'c': 83 cflag = 1; 84 break; 85 case 'd': 86 dflag = 1; 87 break; 88 case 'f': 89 numfields = strtol(optarg, &p, 10); 90 if (numfields < 0 || *p) 91 errx(1, "illegal field skip value: %s", optarg); 92 break; 93 case 's': 94 numchars = strtol(optarg, &p, 10); 95 if (numchars < 0 || *p) 96 errx(1, "illegal character skip value: %s", 97 optarg); 98 break; 99 case 'u': 100 uflag = 1; 101 break; 102 case '?': 103 default: 104 usage(); 105 } 106 107 done: argc -= optind; 108 argv +=optind; 109 110 /* If no flags are set, default is -d -u. */ 111 if (cflag) { 112 if (dflag || uflag) 113 usage(); 114 } else if (!dflag && !uflag) 115 dflag = uflag = 1; 116 117 switch(argc) { 118 case 0: 119 ifp = stdin; 120 ofp = stdout; 121 break; 122 case 1: 123 ifp = file(argv[0], "r"); 124 ofp = stdout; 125 break; 126 case 2: 127 ifp = file(argv[0], "r"); 128 ofp = file(argv[1], "w"); 129 break; 130 default: 131 usage(); 132 } 133 134 if ((p = fgetln(ifp, &psize)) == NULL) 135 return 0; 136 prevlinesize = psize; 137 if ((prevline = malloc(prevlinesize + 1)) == NULL) 138 err(1, "malloc"); 139 (void)memcpy(prevline, p, prevlinesize); 140 prevline[prevlinesize] = '\0'; 141 142 thislinesize = psize; 143 if ((thisline = malloc(thislinesize + 1)) == NULL) 144 err(1, "malloc"); 145 146 while ((p = fgetln(ifp, &psize)) != NULL) { 147 if (psize > thislinesize) { 148 if ((thisline = realloc(thisline, psize + 1)) == NULL) 149 err(1, "realloc"); 150 thislinesize = psize; 151 } 152 (void)memcpy(thisline, p, psize); 153 thisline[psize] = '\0'; 154 155 /* If requested get the chosen fields + character offsets. */ 156 if (numfields || numchars) { 157 t1 = skip(thisline); 158 t2 = skip(prevline); 159 } else { 160 t1 = thisline; 161 t2 = prevline; 162 } 163 164 /* If different, print; set previous to new value. */ 165 if (strcmp(t1, t2)) { 166 char *t; 167 size_t ts; 168 169 show(ofp, prevline); 170 t = prevline; 171 prevline = thisline; 172 thisline = t; 173 ts = prevlinesize; 174 prevlinesize = thislinesize; 175 thislinesize = ts; 176 repeats = 0; 177 } else 178 ++repeats; 179 } 180 show(ofp, prevline); 181 free(prevline); 182 free(thisline); 183 return 0; 184 } 185 186 /* 187 * show -- 188 * Output a line depending on the flags and number of repetitions 189 * of the line. 190 */ 191 static void 192 show(FILE *ofp, const char *str) 193 { 194 195 if (cflag && *str) 196 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 197 if ((dflag && repeats) || (uflag && !repeats)) 198 (void)fprintf(ofp, "%s", str); 199 } 200 201 static const char * 202 skip(const char *str) 203 { 204 int infield, nchars, nfields; 205 206 for (nfields = numfields, infield = 0; nfields && *str; ++str) 207 if (isspace((unsigned char)*str)) { 208 if (infield) { 209 infield = 0; 210 --nfields; 211 } 212 } else if (!infield) 213 infield = 1; 214 for (nchars = numchars; nchars-- && *str; ++str) 215 continue; 216 return str; 217 } 218 219 static FILE * 220 file(const char *name, const char *mode) 221 { 222 FILE *fp; 223 224 if ((fp = fopen(name, mode)) == NULL) 225 err(1, "%s", name); 226 return(fp); 227 } 228 229 static void 230 obsolete(char *argv[]) 231 { 232 char *ap, *p, *start; 233 234 while ((ap = *++argv) != NULL) { 235 /* Return if "--" or not an option of any form. */ 236 if (ap[0] != '-') { 237 if (ap[0] != '+') 238 return; 239 } else if (ap[1] == '-') 240 return; 241 if (!isdigit((unsigned char)ap[1])) 242 continue; 243 /* 244 * Digit signifies an old-style option. Malloc space for dash, 245 * new option and argument. 246 */ 247 (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); 248 if (!p) 249 err(1, "malloc"); 250 start = p; 251 *argv = start; 252 } 253 } 254 255 static void 256 usage(void) 257 { 258 (void)fprintf(stderr, "Usage: %s [-c | -du] [-f fields] [-s chars] " 259 "[input [output]]\n", getprogname()); 260 exit(1); 261 } 262