xref: /netbsd-src/usr.sbin/services_mkdb/uniq.c (revision 5eb26e73175106581526d04fbcbb6b610c056cdd)
1*5eb26e73Schristos /*	$NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $	*/
24f52df9aSchristos 
34f52df9aSchristos /*-
44f52df9aSchristos  * Copyright (c) 2007 The NetBSD Foundation, Inc.
54f52df9aSchristos  * All rights reserved.
64f52df9aSchristos  *
74f52df9aSchristos  * This code is derived from software contributed to The NetBSD Foundation
84f52df9aSchristos  * by Christos Zoulas.
94f52df9aSchristos  *
104f52df9aSchristos  * Redistribution and use in source and binary forms, with or without
114f52df9aSchristos  * modification, are permitted provided that the following conditions
124f52df9aSchristos  * are met:
134f52df9aSchristos  * 1. Redistributions of source code must retain the above copyright
144f52df9aSchristos  *    notice, this list of conditions and the following disclaimer.
154f52df9aSchristos  * 2. Redistributions in binary form must reproduce the above copyright
164f52df9aSchristos  *    notice, this list of conditions and the following disclaimer in the
174f52df9aSchristos  *    documentation and/or other materials provided with the distribution.
184f52df9aSchristos  *
194f52df9aSchristos  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
204f52df9aSchristos  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
214f52df9aSchristos  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
224f52df9aSchristos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
234f52df9aSchristos  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
244f52df9aSchristos  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
254f52df9aSchristos  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
264f52df9aSchristos  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
274f52df9aSchristos  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
284f52df9aSchristos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
294f52df9aSchristos  * POSSIBILITY OF SUCH DAMAGE.
304f52df9aSchristos  */
314f52df9aSchristos #include <sys/cdefs.h>
32*5eb26e73Schristos __RCSID("$NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $");
334f52df9aSchristos 
344f52df9aSchristos #include <stdio.h>
354f52df9aSchristos #include <string.h>
364f52df9aSchristos #include <stdlib.h>
374f52df9aSchristos #include <db.h>
384f52df9aSchristos #include <err.h>
394f52df9aSchristos #include <util.h>
404f52df9aSchristos #include <ctype.h>
414f52df9aSchristos #include <fcntl.h>
424f52df9aSchristos 
43b9cf7d31Sjoerg #include "extern.h"
444f52df9aSchristos 
45b9cf7d31Sjoerg static const HASHINFO hinfo = {
46b9cf7d31Sjoerg 	.bsize = 256,
47b9cf7d31Sjoerg 	.ffactor = 4,
48b9cf7d31Sjoerg 	.nelem = 32768,
49b9cf7d31Sjoerg 	.cachesize = 1024,
50b9cf7d31Sjoerg 	.hash = NULL,
51b9cf7d31Sjoerg 	.lorder = 0
52b9cf7d31Sjoerg };
53b9cf7d31Sjoerg 
544f52df9aSchristos static int comp(const char *, char **, size_t *);
554f52df9aSchristos 
564f52df9aSchristos /*
5765c07d0bSchristos  * Preserve only unique content lines in a file. Input lines that have
584f52df9aSchristos  * content [alphanumeric characters before a comment] are white-space
594f52df9aSchristos  * normalized and have their comments removed. Then they are placed
604f52df9aSchristos  * in a hash table, and only the first instance of them is printed.
614f52df9aSchristos  * Comment lines without any alphanumeric content are always printed
624f52df9aSchristos  * since they are there to make the file "pretty". Comment lines with
634f52df9aSchristos  * alphanumeric content are also placed into the hash table and only
644f52df9aSchristos  * printed once.
654f52df9aSchristos  */
664f52df9aSchristos void
uniq(const char * fname)674f52df9aSchristos uniq(const char *fname)
684f52df9aSchristos {
694f52df9aSchristos 	DB *db;
704f52df9aSchristos 	DBT key;
714f52df9aSchristos 	static const DBT data = { NULL, 0 };
724f52df9aSchristos 	FILE *fp;
734f52df9aSchristos 	char *line;
744f52df9aSchristos 	size_t len;
754f52df9aSchristos 
764f52df9aSchristos 	if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
774f52df9aSchristos 		err(1, "Cannot create in memory database");
784f52df9aSchristos 
7965c07d0bSchristos 	fp = efopen(fname, "r");
804f52df9aSchristos 	while ((line = fgetln(fp, &len)) != NULL) {
814f52df9aSchristos 		size_t complen = len;
824f52df9aSchristos 		char *compline;
834f52df9aSchristos 		if (!comp(line, &compline, &complen)) {
844f52df9aSchristos 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
854f52df9aSchristos 			    line);
864f52df9aSchristos 			continue;
874f52df9aSchristos 		}
884f52df9aSchristos 		key.data = compline;
894f52df9aSchristos 		key.size = complen;
904f52df9aSchristos 		switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
914f52df9aSchristos 		case 0:
924f52df9aSchristos 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
934f52df9aSchristos 			    line);
944f52df9aSchristos 			break;
954f52df9aSchristos 		case 1:
964f52df9aSchristos 			break;
974f52df9aSchristos 		case -1:
984f52df9aSchristos 			err(1, "put");
990ff5eeabSchristos 			/*NOTREACHED*/
1004f52df9aSchristos 		default:
1014f52df9aSchristos 			abort();
1024f52df9aSchristos 			break;
1034f52df9aSchristos 		}
1044f52df9aSchristos 	}
1054f52df9aSchristos 	(void)fflush(stdout);
1064f52df9aSchristos 	exit(0);
1074f52df9aSchristos }
1084f52df9aSchristos 
1094f52df9aSchristos /*
1104f52df9aSchristos  * normalize whitespace in the original line and place a new string
11136619a20Schristos  * with whitespace converted to a single space in compline. If the line
1124f52df9aSchristos  * contains just comments, we preserve them. If it contains data and
1134f52df9aSchristos  * comments, we kill the comments. Return 1 if the line had actual
11436619a20Schristos  * contents, or 0 if it was just a comment without alphanumeric characters.
1154f52df9aSchristos  */
1164f52df9aSchristos static int
comp(const char * origline,char ** compline,size_t * len)1174f52df9aSchristos comp(const char *origline, char **compline, size_t *len)
1184f52df9aSchristos {
1194f52df9aSchristos 	const unsigned char *p;
1204f52df9aSchristos 	unsigned char *q;
1214f52df9aSchristos 	char *cline;
1224f52df9aSchristos 	size_t l = *len, complen;
12336619a20Schristos 	int hasalnum, iscomment;
1244f52df9aSchristos 
12536619a20Schristos 	/* Eat leading space */
1264f52df9aSchristos 	for (p = (const unsigned char *)origline; l && *p && isspace(*p);
1274f52df9aSchristos 	    p++, l--)
1284f52df9aSchristos 		continue;
129*5eb26e73Schristos 	if (*p == '\0' || l == 0)
130*5eb26e73Schristos 		return 0;
131*5eb26e73Schristos 
1324f52df9aSchristos 	cline = emalloc(l + 1);
1334f52df9aSchristos 	(void)memcpy(cline, p, l);
1344f52df9aSchristos 	cline[l] = '\0';
1354f52df9aSchristos 
1364f52df9aSchristos 	complen = 0;
1374f52df9aSchristos 	hasalnum = 0;
13836619a20Schristos 	iscomment = 0;
13936619a20Schristos 
1404f52df9aSchristos 	for (q = (unsigned char *)cline; l && *p; p++, l--) {
1414f52df9aSchristos 		if (isspace(*p)) {
14236619a20Schristos 			if (complen && isspace(q[-1]))
1434f52df9aSchristos 				continue;
1444f52df9aSchristos 			*q++ = ' ';
1454f52df9aSchristos 			complen++;
14636619a20Schristos 		} else {
14736619a20Schristos 			if (!iscomment && *p == '#') {
14836619a20Schristos 				if (hasalnum)
1494f52df9aSchristos 					break;
1504f52df9aSchristos 				iscomment = 1;
1514f52df9aSchristos 			} else
1524f52df9aSchristos 				hasalnum |= isalnum(*p);
1534f52df9aSchristos 			*q++ = *p;
1544f52df9aSchristos 			complen++;
1554f52df9aSchristos 		}
15636619a20Schristos 	}
15736619a20Schristos 
15836619a20Schristos 	/* Eat trailing space */
15936619a20Schristos 	while (complen && isspace(q[-1])) {
16036619a20Schristos 		--q;
16136619a20Schristos 		--complen;
16236619a20Schristos 	}
1634f52df9aSchristos 	*q = '\0';
164*5eb26e73Schristos 	if (!hasalnum) {
165*5eb26e73Schristos 		free(cline);
166*5eb26e73Schristos 		cline = NULL;
167*5eb26e73Schristos 		complen = 0;
168*5eb26e73Schristos 	}
1694f52df9aSchristos 	*compline = cline;
1704f52df9aSchristos 	*len = complen;
1714f52df9aSchristos 	return hasalnum;
1724f52df9aSchristos }
173