1 /* $OpenBSD: uniq.c,v 1.24 2015/12/19 10:21:01 schwarze Exp $ */ 2 /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Case Larsen. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <wchar.h> 46 #include <wctype.h> 47 48 #define MAXLINELEN (8 * 1024) 49 50 int cflag, dflag, uflag; 51 int numchars, numfields, repeats; 52 53 FILE *file(char *, char *); 54 void show(FILE *, char *); 55 char *skip(char *); 56 void obsolete(char *[]); 57 __dead void usage(void); 58 59 int 60 main(int argc, char *argv[]) 61 { 62 char *t1, *t2; 63 FILE *ifp = NULL, *ofp = NULL; 64 int ch; 65 char *prevline, *thisline; 66 67 setlocale(LC_CTYPE, ""); 68 69 if (pledge("stdio rpath wpath cpath", NULL) == -1) 70 err(1, "pledge"); 71 72 obsolete(argv); 73 while ((ch = getopt(argc, argv, "cdf:s:u")) != -1) { 74 const char *errstr; 75 76 switch (ch) { 77 case 'c': 78 cflag = 1; 79 break; 80 case 'd': 81 dflag = 1; 82 break; 83 case 'f': 84 numfields = (int)strtonum(optarg, 0, INT_MAX, 85 &errstr); 86 if (errstr) 87 errx(1, "field skip value is %s: %s", 88 errstr, optarg); 89 break; 90 case 's': 91 numchars = (int)strtonum(optarg, 0, INT_MAX, 92 &errstr); 93 if (errstr) 94 errx(1, 95 "character skip value is %s: %s", 96 errstr, optarg); 97 break; 98 case 'u': 99 uflag = 1; 100 break; 101 default: 102 usage(); 103 } 104 } 105 106 argc -= optind; 107 argv += optind; 108 109 /* If neither -d nor -u are set, default is -d -u. */ 110 if (!dflag && !uflag) 111 dflag = uflag = 1; 112 113 switch(argc) { 114 case 0: 115 ifp = stdin; 116 ofp = stdout; 117 break; 118 case 1: 119 ifp = file(argv[0], "r"); 120 ofp = stdout; 121 break; 122 case 2: 123 ifp = file(argv[0], "r"); 124 ofp = file(argv[1], "w"); 125 break; 126 default: 127 usage(); 128 } 129 130 if (pledge("stdio", NULL) == -1) 131 err(1, "pledge"); 132 133 prevline = malloc(MAXLINELEN); 134 thisline = malloc(MAXLINELEN); 135 if (prevline == NULL || thisline == NULL) 136 err(1, "malloc"); 137 138 if (fgets(prevline, MAXLINELEN, ifp) == NULL) 139 exit(0); 140 141 while (fgets(thisline, MAXLINELEN, ifp)) { 142 /* If requested get the chosen fields + character offsets. */ 143 if (numfields || numchars) { 144 t1 = skip(thisline); 145 t2 = skip(prevline); 146 } else { 147 t1 = thisline; 148 t2 = prevline; 149 } 150 151 /* If different, print; set previous to new value. */ 152 if (strcmp(t1, t2)) { 153 show(ofp, prevline); 154 t1 = prevline; 155 prevline = thisline; 156 thisline = t1; 157 repeats = 0; 158 } else 159 ++repeats; 160 } 161 show(ofp, prevline); 162 exit(0); 163 } 164 165 /* 166 * show -- 167 * Output a line depending on the flags and number of repetitions 168 * of the line. 169 */ 170 void 171 show(FILE *ofp, char *str) 172 { 173 if ((dflag && repeats) || (uflag && !repeats)) { 174 if (cflag) 175 (void)fprintf(ofp, "%4d %s", repeats + 1, str); 176 else 177 (void)fprintf(ofp, "%s", str); 178 } 179 } 180 181 char * 182 skip(char *str) 183 { 184 wchar_t wc; 185 int nchars, nfields; 186 int len; 187 int field_started; 188 189 for (nfields = numfields; nfields && *str; nfields--) { 190 /* Skip one field, including preceding blanks. */ 191 for (field_started = 0; *str != '\0'; str += len) { 192 if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) { 193 (void)mbtowc(NULL, NULL, MB_CUR_MAX); 194 wc = L'?'; 195 len = 1; 196 } 197 if (iswblank(wc)) { 198 if (field_started) 199 break; 200 } else 201 field_started = 1; 202 } 203 } 204 205 /* Skip some additional characters. */ 206 for (nchars = numchars; nchars-- && *str != '\0'; str += len) 207 if ((len = mblen(str, MB_CUR_MAX)) == -1) 208 len = 1; 209 210 return (str); 211 } 212 213 FILE * 214 file(char *name, char *mode) 215 { 216 FILE *fp; 217 218 if (strcmp(name, "-") == 0) 219 return(*mode == 'r' ? stdin : stdout); 220 if ((fp = fopen(name, mode)) == NULL) 221 err(1, "%s", name); 222 return (fp); 223 } 224 225 void 226 obsolete(char *argv[]) 227 { 228 size_t len; 229 char *ap, *p, *start; 230 231 while ((ap = *++argv)) { 232 /* Return if "--" or not an option of any form. */ 233 if (ap[0] != '-') { 234 if (ap[0] != '+') 235 return; 236 } else if (ap[1] == '-') 237 return; 238 if (!isdigit((unsigned char)ap[1])) 239 continue; 240 /* 241 * Digit signifies an old-style option. Malloc space for dash, 242 * new option and argument. 243 */ 244 len = strlen(ap) + 3; 245 if ((start = p = malloc(len)) == NULL) 246 err(1, "malloc"); 247 *p++ = '-'; 248 *p++ = ap[0] == '+' ? 's' : 'f'; 249 (void)strlcpy(p, ap + 1, len - 2); 250 *argv = start; 251 } 252 } 253 254 __dead void 255 usage(void) 256 { 257 extern char *__progname; 258 259 (void)fprintf(stderr, 260 "usage: %s [-c] [-d | -u] [-f fields] [-s chars] [input_file [output_file]]\n", 261 __progname); 262 exit(1); 263 } 264