1 /* $NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christos Zoulas. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 #include <sys/cdefs.h> 39 __RCSID("$NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $"); 40 41 #include <stdio.h> 42 #include <string.h> 43 #include <stdlib.h> 44 #include <db.h> 45 #include <err.h> 46 #include <util.h> 47 #include <ctype.h> 48 #include <fcntl.h> 49 50 extern const HASHINFO hinfo; 51 52 void uniq(const char *); 53 static int comp(const char *, char **, size_t *); 54 55 /* 56 * Preserve only unique content lines in a file. Input lines that have 57 * content [alphanumeric characters before a comment] are white-space 58 * normalized and have their comments removed. Then they are placed 59 * in a hash table, and only the first instance of them is printed. 60 * Comment lines without any alphanumeric content are always printed 61 * since they are there to make the file "pretty". Comment lines with 62 * alphanumeric content are also placed into the hash table and only 63 * printed once. 64 */ 65 void 66 uniq(const char *fname) 67 { 68 DB *db; 69 DBT key; 70 static const DBT data = { NULL, 0 }; 71 FILE *fp; 72 char *line; 73 size_t len; 74 75 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 76 err(1, "Cannot create in memory database"); 77 78 fp = efopen(fname, "r"); 79 while ((line = fgetln(fp, &len)) != NULL) { 80 size_t complen = len; 81 char *compline; 82 if (!comp(line, &compline, &complen)) { 83 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 84 line); 85 continue; 86 } 87 key.data = compline; 88 key.size = complen; 89 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 90 case 0: 91 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 92 line); 93 break; 94 case 1: 95 break; 96 case -1: 97 err(1, "put"); 98 default: 99 abort(); 100 break; 101 } 102 } 103 (void)fflush(stdout); 104 exit(0); 105 } 106 107 /* 108 * normalize whitespace in the original line and place a new string 109 * with whitespace converted to a single space in compline. If the line 110 * contains just comments, we preserve them. If it contains data and 111 * comments, we kill the comments. Return 1 if the line had actual 112 * contents, or 0 if it was just a comment without alphanumeric characters. 113 */ 114 static int 115 comp(const char *origline, char **compline, size_t *len) 116 { 117 const unsigned char *p; 118 unsigned char *q; 119 char *cline; 120 size_t l = *len, complen; 121 int hasalnum, iscomment; 122 123 /* Eat leading space */ 124 for (p = (const unsigned char *)origline; l && *p && isspace(*p); 125 p++, l--) 126 continue; 127 cline = emalloc(l + 1); 128 (void)memcpy(cline, p, l); 129 cline[l] = '\0'; 130 if (*cline == '\0') 131 return 0; 132 133 complen = 0; 134 hasalnum = 0; 135 iscomment = 0; 136 137 for (q = (unsigned char *)cline; l && *p; p++, l--) { 138 if (isspace(*p)) { 139 if (complen && isspace(q[-1])) 140 continue; 141 *q++ = ' '; 142 complen++; 143 } else { 144 if (!iscomment && *p == '#') { 145 if (hasalnum) 146 break; 147 iscomment = 1; 148 } else 149 hasalnum |= isalnum(*p); 150 *q++ = *p; 151 complen++; 152 } 153 } 154 155 /* Eat trailing space */ 156 while (complen && isspace(q[-1])) { 157 --q; 158 --complen; 159 } 160 *q = '\0'; 161 *compline = cline; 162 *len = complen; 163 return hasalnum; 164 } 165