1 /* $NetBSD: uniq.c,v 1.20 2016/10/16 06:17:51 abhinav Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Case Larsen. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: uniq.c,v 1.20 2016/10/16 06:17:51 abhinav Exp $"); 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include <errno.h> 50 #include <stdio.h> 51 #include <ctype.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <unistd.h> 55 56 static int cflag, dflag, uflag; 57 static int numchars, numfields, repeats; 58 59 static FILE *file(const char *, const char *); 60 static void show(FILE *, const char *); 61 static const char *skip(const char *, size_t *); 62 static void obsolete(char *[]); 63 static void usage(void) __dead; 64 65 int 66 main (int argc, char *argv[]) 67 { 68 const char *prevp, *thisp; 69 FILE *ifp, *ofp; 70 int ch; 71 char *prevline, *thisline, *p; 72 size_t prevlinesize, thislinesize, psize; 73 size_t prevlen, thislen; 74 75 setprogname(argv[0]); 76 ifp = ofp = NULL; 77 obsolete(argv); 78 while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) 79 switch (ch) { 80 case '-': 81 --optind; 82 goto done; 83 case 'c': 84 cflag = 1; 85 break; 86 case 'd': 87 dflag = 1; 88 break; 89 case 'f': 90 numfields = strtol(optarg, &p, 10); 91 if (numfields < 0 || *p) 92 errx(1, "illegal field skip value: %s", optarg); 93 break; 94 case 's': 95 numchars = strtol(optarg, &p, 10); 96 if (numchars < 0 || *p) 97 errx(1, "illegal character skip value: %s", 98 optarg); 99 break; 100 case 'u': 101 uflag = 1; 102 break; 103 case '?': 104 default: 105 usage(); 106 } 107 108 done: argc -= optind; 109 argv +=optind; 110 111 switch(argc) { 112 case 0: 113 ifp = stdin; 114 ofp = stdout; 115 break; 116 case 1: 117 ifp = file(argv[0], "r"); 118 ofp = stdout; 119 break; 120 case 2: 121 ifp = file(argv[0], "r"); 122 ofp = file(argv[1], "w"); 123 break; 124 default: 125 usage(); 126 } 127 128 if ((p = fgetln(ifp, &psize)) == NULL) 129 return 0; 130 prevlinesize = prevlen = psize; 131 if ((prevline = malloc(prevlinesize + 1)) == NULL) 132 err(1, "malloc"); 133 (void)memcpy(prevline, p, prevlinesize); 134 prevline[prevlinesize] = '\0'; 135 136 if (numfields || numchars) 137 prevp = skip(prevline, &prevlen); 138 else 139 prevp = prevline; 140 141 thislinesize = psize; 142 if ((thisline = malloc(thislinesize + 1)) == NULL) 143 err(1, "malloc"); 144 145 while ((p = fgetln(ifp, &psize)) != NULL) { 146 if (psize > thislinesize) { 147 if ((thisline = realloc(thisline, psize + 1)) == NULL) 148 err(1, "realloc"); 149 thislinesize = psize; 150 } 151 thislen = psize; 152 (void)memcpy(thisline, p, psize); 153 thisline[psize] = '\0'; 154 155 /* If requested get the chosen fields + character offsets. */ 156 if (numfields || numchars) { 157 thisp = skip(thisline, &thislen); 158 } else { 159 thisp = thisline; 160 } 161 162 /* If different, print; set previous to new value. */ 163 if (thislen != prevlen || strcmp(thisp, prevp)) { 164 char *t; 165 size_t ts; 166 167 show(ofp, prevline); 168 t = prevline; 169 prevline = thisline; 170 thisline = t; 171 ts = prevlinesize; 172 prevlinesize = thislinesize; 173 thislinesize = ts; 174 prevp = thisp; 175 prevlen = thislen; 176 repeats = 0; 177 } else 178 ++repeats; 179 } 180 show(ofp, prevline); 181 free(prevline); 182 free(thisline); 183 return 0; 184 } 185 186 /* 187 * show -- 188 * Output a line depending on the flags and number of repetitions 189 * of the line. 190 */ 191 static void 192 show(FILE *ofp, const char *str) 193 { 194 195 if ((dflag && repeats == 0) || (uflag && repeats > 0)) 196 return; 197 if (cflag) { 198 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 199 } else { 200 (void)fprintf(ofp, "%s", str); 201 } 202 } 203 204 static const char * 205 skip(const char *str, size_t *linesize) 206 { 207 int infield, nchars, nfields; 208 size_t ls = *linesize; 209 210 for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls) 211 if (isspace((unsigned char)*str)) { 212 if (infield) { 213 infield = 0; 214 --nfields; 215 } 216 } else if (!infield) 217 infield = 1; 218 for (nchars = numchars; nchars-- && *str; ++str, --ls) 219 continue; 220 *linesize = ls; 221 return str; 222 } 223 224 static FILE * 225 file(const char *name, const char *mode) 226 { 227 FILE *fp; 228 229 if ((fp = fopen(name, mode)) == NULL) 230 err(1, "%s", name); 231 return(fp); 232 } 233 234 static void 235 obsolete(char *argv[]) 236 { 237 char *ap, *p, *start; 238 239 while ((ap = *++argv) != NULL) { 240 /* Return if "--" or not an option of any form. */ 241 if (ap[0] != '-') { 242 if (ap[0] != '+') 243 return; 244 } else if (ap[1] == '-') 245 return; 246 if (!isdigit((unsigned char)ap[1])) 247 continue; 248 /* 249 * Digit signifies an old-style option. Malloc space for dash, 250 * new option and argument. 251 */ 252 (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); 253 if (!p) 254 err(1, "malloc"); 255 start = p; 256 *argv = start; 257 } 258 } 259 260 static void 261 usage(void) 262 { 263 (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " 264 "[input_file [output_file]]\n", getprogname()); 265 exit(1); 266 } 267