10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*871Scasper  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*871Scasper  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * This file contains public functions for managing DHCP network
310Sstevel@tonic-gate  * containers.  For the semantics of these functions, please see the
320Sstevel@tonic-gate  * Enterprise DHCP Architecture Document.
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * This module uses synchronization guarantees provided by dsvclockd(1M);
350Sstevel@tonic-gate  * please see $SRC/lib/libdhcpsvc/private/README.synch for details.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * Big Theory Statement for the SUNWbinfiles DHCP Network Module
380Sstevel@tonic-gate  * =============================================================
390Sstevel@tonic-gate  *
400Sstevel@tonic-gate  * 1. On-disk Structure
410Sstevel@tonic-gate  *
420Sstevel@tonic-gate  * Each container consists of two basic pieces on-disk: a header and an
430Sstevel@tonic-gate  * array of records.  In order to provide fast client IP lookup, the array
440Sstevel@tonic-gate  * of records is directly indexed by client IP address (using a simple
450Sstevel@tonic-gate  * mapping function).  In order to provide fast client id lookup, each
460Sstevel@tonic-gate  * in-use record is also on exactly one doubly-linked client id hash chain;
470Sstevel@tonic-gate  * the hash chains heads are contained in the header).  For all other
480Sstevel@tonic-gate  * lookups, we can restrict our search to only the in-use records by merely
490Sstevel@tonic-gate  * walking all of the hash chains.  Here's a crude illustration of what
500Sstevel@tonic-gate  * this looks like on-disk (note that hash chains 2 and 3 are empty):
510Sstevel@tonic-gate  *
520Sstevel@tonic-gate  *              _______________________________________________
530Sstevel@tonic-gate  *             | container info   | hash chain heads (buckets) |
540Sstevel@tonic-gate  *    header   |                  | 1 | 2 | 3 |  [ .... ]  | N |
550Sstevel@tonic-gate  *             |                  | | |   |   |            | | |
560Sstevel@tonic-gate  *             |__________________|_|________________________|_|
570Sstevel@tonic-gate  *             | rec1      | rec2   |  | rec3      | rec4    | |
580Sstevel@tonic-gate  *             |           |        +--->          |         | |
590Sstevel@tonic-gate  *             | unused    | unused    | hash1     | unused  | |
600Sstevel@tonic-gate  *             |___________|___________|________^|_|_________|_|
610Sstevel@tonic-gate  *             | rec5      | rec6      | rec7   |v | rec8    | |
620Sstevel@tonic-gate  *             |           |           |           ->        | |
630Sstevel@tonic-gate  *    records  | unused    | hashN     | hash1    <- hash1   | |
640Sstevel@tonic-gate  *             |___________|________^|_|___________|_________|_|
650Sstevel@tonic-gate  *             |           :        :: :           :         : |
660Sstevel@tonic-gate  *             |           :        :: : [ more records... ] : |
670Sstevel@tonic-gate  *             |           :        :: :           :         : |
680Sstevel@tonic-gate  *             |___________:________::_:___________:_________:_|
690Sstevel@tonic-gate  *             | recN-3    | recN-2 || | recN-1    | recN    v |
700Sstevel@tonic-gate  *             |           |        |+-->          ->          |
710Sstevel@tonic-gate  *             | unused    | unused +--- hashN    <- hashN     |
720Sstevel@tonic-gate  *             |___________|___________|___________|___________|
730Sstevel@tonic-gate  *
740Sstevel@tonic-gate  * Note that the actual on-disk format is a bit more complicated than this
750Sstevel@tonic-gate  * due to robustness issues; see section 3 below for details.
760Sstevel@tonic-gate  *
770Sstevel@tonic-gate  * 2. Robustness Requirements
780Sstevel@tonic-gate  *
790Sstevel@tonic-gate  * This module has been designed to be as efficient as possible while still
800Sstevel@tonic-gate  * retaining the robustness minimally required for an enterprise-level
810Sstevel@tonic-gate  * environment.  In particular, it is designed to handle the following
820Sstevel@tonic-gate  * failure situations:
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  *	1. An update operation (add, modify, delete) on a container is
850Sstevel@tonic-gate  *	   unable to complete due to an unexpected internal error at
860Sstevel@tonic-gate  *	   any point in the update code.
870Sstevel@tonic-gate  *
880Sstevel@tonic-gate  *	2. An update operation (add, modify, delete) on a container is
890Sstevel@tonic-gate  *	   unable to complete due to unexpected program termination while
900Sstevel@tonic-gate  *	   at any point in the update code.
910Sstevel@tonic-gate  *
920Sstevel@tonic-gate  * If either of these situations occur, the container in question must be
930Sstevel@tonic-gate  * left in a consistent (and viable) state.  In addition, only the pending
940Sstevel@tonic-gate  * transaction (at most) may be lost.
950Sstevel@tonic-gate  *
960Sstevel@tonic-gate  * 3. Robustness Techniques
970Sstevel@tonic-gate  *
980Sstevel@tonic-gate  * This module uses a few different techniques to meet our robustness goals
990Sstevel@tonic-gate  * while maintaining high performance.  The biggest problem we encounter
1000Sstevel@tonic-gate  * when trying to achieve robustness is updating the client id hash chain.
1010Sstevel@tonic-gate  * In particular, it is not possible to atomically add, move, or delete an
1020Sstevel@tonic-gate  * item from a doubly linked list, thus creating a window where a crash
1030Sstevel@tonic-gate  * could leave our hash chains in an inconsistent state.
1040Sstevel@tonic-gate  *
1050Sstevel@tonic-gate  * To address this problem, we actually maintain two images (copies) of all
1060Sstevel@tonic-gate  * the hash chains in the container.  At any point in time, exactly one of
1070Sstevel@tonic-gate  * the two images is active (and thus considered authoritative), as
1080Sstevel@tonic-gate  * indicated by a byte in the container header.  When performing an update
1090Sstevel@tonic-gate  * operation, all hash chain modifications are done on the *inactive*
1100Sstevel@tonic-gate  * image, then, once the inactive image has completed the hash chain
1110Sstevel@tonic-gate  * operations required by the update, the active and inactive images are
1120Sstevel@tonic-gate  * atomically switched, making the formerly-inactive image authoritative.
1130Sstevel@tonic-gate  * After the image switch, the update code then updates the formerly-active
1140Sstevel@tonic-gate  * image's hash chains to match the active image's hash chains.
1150Sstevel@tonic-gate  *
1160Sstevel@tonic-gate  * This approach has the nice property that internal container consistency
1170Sstevel@tonic-gate  * can always be restored after a crash by just resynchronizing the
1180Sstevel@tonic-gate  * inactive image's hash chains with the active image's chains.  Note that
1190Sstevel@tonic-gate  * the atomic image switch serves as the "commit point" for the operation:
1200Sstevel@tonic-gate  * if we crash before this point, we roll back the operation upon recovery
1210Sstevel@tonic-gate  * and it appears as though the operation never happened; if we crash after
1220Sstevel@tonic-gate  * this point, we roll forward the rest of the operation upon recovery as
1230Sstevel@tonic-gate  * if the crash had not happened.
1240Sstevel@tonic-gate  *
1250Sstevel@tonic-gate  * This technique is enough to robustly implement our add and delete
1260Sstevel@tonic-gate  * operations, but modify has an additional complication due to our direct
1270Sstevel@tonic-gate  * mapping of client IP addresses to records.  In particular, unless the
1280Sstevel@tonic-gate  * record modification includes changing the client IP address, the
1290Sstevel@tonic-gate  * modified record must be written at the same location as the original
1300Sstevel@tonic-gate  * record -- however, if the modify operation fails part way through
1310Sstevel@tonic-gate  * writing out the new client record, the record will be corrupt and we
1320Sstevel@tonic-gate  * will have no way to return the record to a consistent state.  To address
1330Sstevel@tonic-gate  * this issue, we allocate a spare record in the container header called
1340Sstevel@tonic-gate  * the "temporary" record.  Upon a modification of this type, we first
1350Sstevel@tonic-gate  * write the modified record to the temporary record and indicate that the
1360Sstevel@tonic-gate  * temporary record is currently proxying for the actual record.  We then
1370Sstevel@tonic-gate  * copy the temporary record to the actual record and make the temporary
1380Sstevel@tonic-gate  * record available again for future use.  If a crash occurs before the
1390Sstevel@tonic-gate  * copy to the temporary record is complete, then we just roll back as if
1400Sstevel@tonic-gate  * the modify never happened (since we have not modified the actual
1410Sstevel@tonic-gate  * record).  If a crash occurs after copying the temporary record, we roll
1420Sstevel@tonic-gate  * forward and complete the copy operation as if the crash never happened.
1430Sstevel@tonic-gate  * Note that there are some additional subtle complications here; see the
1440Sstevel@tonic-gate  * comments in the code for details.
1450Sstevel@tonic-gate  */
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate #include <unistd.h>
1480Sstevel@tonic-gate #include <sys/types.h>
1490Sstevel@tonic-gate #include <sys/socket.h>
1500Sstevel@tonic-gate #include <sys/isa_defs.h>
1510Sstevel@tonic-gate #include <netinet/in.h>
1520Sstevel@tonic-gate #include <dhcp_svc_public.h>
1530Sstevel@tonic-gate #include <stdlib.h>
1540Sstevel@tonic-gate #include <dirent.h>
1550Sstevel@tonic-gate #include <string.h>
1560Sstevel@tonic-gate #include <libgen.h>
1570Sstevel@tonic-gate #include <errno.h>
1580Sstevel@tonic-gate #include <stddef.h>
1590Sstevel@tonic-gate #include <assert.h>
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate #include "dhcp_network.h"
1620Sstevel@tonic-gate #include "util.h"
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate static uint16_t	cidhash(const uchar_t *, size_t);
1650Sstevel@tonic-gate static void	net2path(char *, size_t, const char *, ipaddr_t);
1660Sstevel@tonic-gate static int	check_dn(dn_handle_t *);
1670Sstevel@tonic-gate static int	getabyte(int, off_t, uchar_t *);
1680Sstevel@tonic-gate static int	setabyte(int, off_t, uchar_t);
1690Sstevel@tonic-gate static int	read_rec(int, dn_filerec_t *, dn_recid_t);
1700Sstevel@tonic-gate static int	write_rec(int, dn_filerec_t *, dn_recid_t);
1710Sstevel@tonic-gate static int	read_header(int, dn_header_t *, boolean_t);
1720Sstevel@tonic-gate static int	write_header(int, dn_header_t *);
1730Sstevel@tonic-gate static int	read_hashhead(int, dn_recid_t *, uint16_t, uchar_t);
1740Sstevel@tonic-gate static int	write_hashhead(int, dn_recid_t, uint16_t, uchar_t);
1750Sstevel@tonic-gate static boolean_t record_match(const dn_rec_t *, const dn_rec_t *, uint_t);
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate int
open_dn(void ** handlep,const char * dir,uint_t flags,const struct in_addr * netp,const struct in_addr * maskp)1780Sstevel@tonic-gate open_dn(void **handlep, const char *dir, uint_t flags,
1790Sstevel@tonic-gate     const struct in_addr *netp, const struct in_addr *maskp)
1800Sstevel@tonic-gate {
1810Sstevel@tonic-gate 	dn_handle_t	*dhp;
1820Sstevel@tonic-gate 	dn_header_t	header = { 0 };
1830Sstevel@tonic-gate 	char		dnpath[MAXPATHLEN];
1840Sstevel@tonic-gate 	int		i, retval;
1850Sstevel@tonic-gate 	off_t		filesz;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 	dhp = malloc(sizeof (dn_handle_t));
1880Sstevel@tonic-gate 	if (dhp == NULL)
1890Sstevel@tonic-gate 		return (DSVC_NO_MEMORY);
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate 	/*
1920Sstevel@tonic-gate 	 * As a safeguard, check that the size of a dn_header_t hasn't
1930Sstevel@tonic-gate 	 * changed (since it contains a dn_rec_t, this will probably catch
1940Sstevel@tonic-gate 	 * a change in that structure as well).  If it has, bail rather
1950Sstevel@tonic-gate 	 * than totally corrupting the container (by continuing).  Note
1960Sstevel@tonic-gate 	 * that this situation indicates an internal programming error,
1970Sstevel@tonic-gate 	 * which is why we prefer assert() to just returning DSVC_INTERNAL.
1980Sstevel@tonic-gate 	 */
1990Sstevel@tonic-gate 	/* CONSTCOND */
2000Sstevel@tonic-gate 	assert(sizeof (header) == 32768);
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
2030Sstevel@tonic-gate 	retval = open_file(dnpath, flags, &dhp->dh_fd);
2040Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS) {
2050Sstevel@tonic-gate 		free(dhp);
2060Sstevel@tonic-gate 		return (retval);
2070Sstevel@tonic-gate 	}
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	if (flags & DSVC_CREATE) {
2100Sstevel@tonic-gate 		/*
2110Sstevel@tonic-gate 		 * We just created the per-network container; initialize
2120Sstevel@tonic-gate 		 * the header and put it out on disk.  Note that we leave
2130Sstevel@tonic-gate 		 * `dnh_version' zero until the entire header has been
2140Sstevel@tonic-gate 		 * written, so we can detect partial failure.
2150Sstevel@tonic-gate 		 */
2160Sstevel@tonic-gate 		header.dnh_version	= 0;
2170Sstevel@tonic-gate 		header.dnh_network	= netp->s_addr;
2180Sstevel@tonic-gate 		header.dnh_netmask	= maskp->s_addr;
2190Sstevel@tonic-gate 		header.dnh_magic	= DN_MAGIC;
2200Sstevel@tonic-gate 		header.dnh_tempimage	= DN_NOIMAGE;
2210Sstevel@tonic-gate 		header.dnh_image	= 0;
2220Sstevel@tonic-gate 		header.dnh_errors	= 0;
2230Sstevel@tonic-gate 		header.dnh_checks	= 0;
2240Sstevel@tonic-gate 		for (i = 0; i < DN_CIDHASHSZ; i++) {
2250Sstevel@tonic-gate 			header.dnh_cidhash[i][header.dnh_image]  = DN_NOREC;
2260Sstevel@tonic-gate 			header.dnh_cidhash[i][!header.dnh_image] = DN_NOREC;
2270Sstevel@tonic-gate 		}
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 		if (write_header(dhp->dh_fd, &header) == -1) {
2300Sstevel@tonic-gate 			retval = syserr_to_dsvcerr(errno);
2310Sstevel@tonic-gate 			(void) remove_dn(dir, netp);
2320Sstevel@tonic-gate 			(void) close_dn((void **)&dhp);
2330Sstevel@tonic-gate 			return (retval);
2340Sstevel@tonic-gate 		}
2350Sstevel@tonic-gate 
2360Sstevel@tonic-gate 		/*
2370Sstevel@tonic-gate 		 * Virtually reserve all the space we're going to need for
2380Sstevel@tonic-gate 		 * the dn_rec_t's ahead of time, so that we don't have to
2390Sstevel@tonic-gate 		 * worry about "growing" the file later (though it may
2400Sstevel@tonic-gate 		 * increase in size as we fill in holes).  We're guaranteed
2410Sstevel@tonic-gate 		 * that we'll read these holes as zeros, which we take
2420Sstevel@tonic-gate 		 * advantage of since a dn_filerec_t with a rec_prev of
2430Sstevel@tonic-gate 		 * DN_NOREC (which is 0) indicates that a record is unused.
2440Sstevel@tonic-gate 		 */
2450Sstevel@tonic-gate 		filesz = RECID2OFFSET(RECID(~0, header.dnh_netmask) + 1);
2460Sstevel@tonic-gate 		retval = setabyte(dhp->dh_fd, filesz - 1, 0);
2470Sstevel@tonic-gate 		if (retval != DSVC_SUCCESS) {
2480Sstevel@tonic-gate 			(void) remove_dn(dir, netp);
2490Sstevel@tonic-gate 			(void) close_dn((void **)&dhp);
2500Sstevel@tonic-gate 			return (retval);
2510Sstevel@tonic-gate 		}
2520Sstevel@tonic-gate 
2530Sstevel@tonic-gate 		/*
2540Sstevel@tonic-gate 		 * Set the version field on the container, effectively
2550Sstevel@tonic-gate 		 * making it available for use.
2560Sstevel@tonic-gate 		 */
2570Sstevel@tonic-gate 		retval = setabyte(dhp->dh_fd, offsetof(dn_header_t,
2580Sstevel@tonic-gate 		    dnh_version), DSVC_CONVER);
2590Sstevel@tonic-gate 		if (retval != DSVC_SUCCESS) {
2600Sstevel@tonic-gate 			(void) remove_dn(dir, netp);
2610Sstevel@tonic-gate 			(void) close_dn((void **)&dhp);
2620Sstevel@tonic-gate 			return (retval);
2630Sstevel@tonic-gate 		}
2640Sstevel@tonic-gate 	} else {
2650Sstevel@tonic-gate 		/*
2660Sstevel@tonic-gate 		 * Container already exists; sanity check against the
2670Sstevel@tonic-gate 		 * header that's on-disk.  If we detect a problem then
2680Sstevel@tonic-gate 		 * either someone scribbled on our container or we
2690Sstevel@tonic-gate 		 * terminated abnormally when creating the container.
2700Sstevel@tonic-gate 		 */
2710Sstevel@tonic-gate 		if (read_header(dhp->dh_fd, &header, B_FALSE) == -1) {
2720Sstevel@tonic-gate 			retval = syserr_to_dsvcerr(errno);
2730Sstevel@tonic-gate 			(void) close_dn((void **)&dhp);
2740Sstevel@tonic-gate 			return (retval);
2750Sstevel@tonic-gate 		}
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 		if (header.dnh_network != netp->s_addr ||
2780Sstevel@tonic-gate 		    header.dnh_version != DSVC_CONVER ||
2790Sstevel@tonic-gate 		    header.dnh_magic != DN_MAGIC) {
2800Sstevel@tonic-gate 			(void) close_dn((void **)&dhp);
2810Sstevel@tonic-gate 			return (DSVC_INTERNAL);
2820Sstevel@tonic-gate 		}
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	dhp->dh_netmask	= header.dnh_netmask;
2860Sstevel@tonic-gate 	dhp->dh_oflags	= flags;
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate 	*handlep = dhp;
2890Sstevel@tonic-gate 	return (DSVC_SUCCESS);
2900Sstevel@tonic-gate }
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate int
close_dn(void ** handlep)2930Sstevel@tonic-gate close_dn(void **handlep)
2940Sstevel@tonic-gate {
2950Sstevel@tonic-gate 	dn_handle_t *dhp = (dn_handle_t *)*handlep;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	if (close(dhp->dh_fd) == -1)
2980Sstevel@tonic-gate 		return (DSVC_INTERNAL);
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	free(dhp);
3010Sstevel@tonic-gate 	return (DSVC_SUCCESS);
3020Sstevel@tonic-gate }
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate int
remove_dn(const char * dir,const struct in_addr * netp)3050Sstevel@tonic-gate remove_dn(const char *dir, const struct in_addr *netp)
3060Sstevel@tonic-gate {
3070Sstevel@tonic-gate 	char dnpath[MAXPATHLEN];
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 	net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
3100Sstevel@tonic-gate 	if (unlink(dnpath) == -1)
3110Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	return (DSVC_SUCCESS);
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate int
lookup_dn(void * handle,boolean_t partial,uint_t query,int count,const dn_rec_t * targetp,dn_rec_list_t ** recordsp,uint_t * nrecordsp)3170Sstevel@tonic-gate lookup_dn(void *handle, boolean_t partial, uint_t query, int count,
3180Sstevel@tonic-gate     const dn_rec_t *targetp, dn_rec_list_t **recordsp, uint_t *nrecordsp)
3190Sstevel@tonic-gate {
3200Sstevel@tonic-gate 	dn_handle_t	*dhp = (dn_handle_t *)handle;
3210Sstevel@tonic-gate 	int		retval = DSVC_SUCCESS;
3220Sstevel@tonic-gate 	uint_t		nrecords, n;
3230Sstevel@tonic-gate 	uint16_t	hash;
3240Sstevel@tonic-gate 	dn_rec_t	*recordp;
3250Sstevel@tonic-gate 	dn_rec_list_t	*records, *new_records;
3260Sstevel@tonic-gate 	dn_recid_t	recid, temp_recid = DN_NOREC;
3270Sstevel@tonic-gate 	dn_filerec_t	rec;
3280Sstevel@tonic-gate 	dn_header_t	header;
3290Sstevel@tonic-gate 	uchar_t		image;
3300Sstevel@tonic-gate 	int		fd = dhp->dh_fd;
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate 	if ((dhp->dh_oflags & DSVC_READ) == 0)
3330Sstevel@tonic-gate 		return (DSVC_ACCESS);
3340Sstevel@tonic-gate 
3350Sstevel@tonic-gate 	if (read_header(fd, &header, B_FALSE) == -1)
3360Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
3370Sstevel@tonic-gate 
3380Sstevel@tonic-gate 	/*
3390Sstevel@tonic-gate 	 * It's possible that a previous update to this container failed
3400Sstevel@tonic-gate 	 * part-way through.  In general, this is fine since we always keep
3410Sstevel@tonic-gate 	 * our active image's hash chains correct and only swap to the
3420Sstevel@tonic-gate 	 * alternate image when the other image is completely safe to use.
3430Sstevel@tonic-gate 	 * However, for reasons explained in modify_dn(), it's possible
3440Sstevel@tonic-gate 	 * that a record being modified was not completely updated before a
3450Sstevel@tonic-gate 	 * failure occurred.  In this case, the actual data for that record
3460Sstevel@tonic-gate 	 * is contained in the temporary record in the header.  We need to
3470Sstevel@tonic-gate 	 * be careful to use that temporary record anywhere we'd otherwise
3480Sstevel@tonic-gate 	 * refer to the partially updated record.  Note that we do this
3490Sstevel@tonic-gate 	 * rather than attempting to restore the consistency of the
3500Sstevel@tonic-gate 	 * container because we're MT-hot here.
3510Sstevel@tonic-gate 	 */
3520Sstevel@tonic-gate 	if (header.dnh_dirty && header.dnh_tempimage == header.dnh_image) {
3530Sstevel@tonic-gate 		temp_recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
3540Sstevel@tonic-gate 		    header.dnh_netmask);
3550Sstevel@tonic-gate 	}
3560Sstevel@tonic-gate 
3570Sstevel@tonic-gate 	image = header.dnh_image;
3580Sstevel@tonic-gate 	records = NULL;
3590Sstevel@tonic-gate 	for (n = 0, nrecords = 0; count < 0 || nrecords < count; n++) {
3600Sstevel@tonic-gate 		if (DSVC_QISEQ(query, DN_QCIP)) {
3610Sstevel@tonic-gate 			/*
3620Sstevel@tonic-gate 			 * Lookup scenario 1: Caller has requested a QN_CIP
3630Sstevel@tonic-gate 			 * query lookup; set `recid' to the only possible
3640Sstevel@tonic-gate 			 * entry (which may not be in-use).
3650Sstevel@tonic-gate 			 */
3660Sstevel@tonic-gate 			if (n != 0)
3670Sstevel@tonic-gate 				break;
3680Sstevel@tonic-gate 			recid = RECID(targetp->dn_cip.s_addr, dhp->dh_netmask);
3690Sstevel@tonic-gate 		} else if (DSVC_QISEQ(query, DN_QCID)) {
3700Sstevel@tonic-gate 			/*
3710Sstevel@tonic-gate 			 * Lookup scenario 2: Caller has requested a
3720Sstevel@tonic-gate 			 * QN_CID-based lookup.  Walk the `cidhash' chain
3730Sstevel@tonic-gate 			 * (one call at a time) and set `recid' to hash
3740Sstevel@tonic-gate 			 * bucket candidates.
3750Sstevel@tonic-gate 			 *
3760Sstevel@tonic-gate 			 * Note that it's possible for the client id value
3770Sstevel@tonic-gate 			 * 00 to appear more than once, and it's not
3780Sstevel@tonic-gate 			 * impossible for other duplicate client ids to
3790Sstevel@tonic-gate 			 * occur, so continue until we reach `nrecords'.
3800Sstevel@tonic-gate 			 */
3810Sstevel@tonic-gate 			if (n == 0) {
3820Sstevel@tonic-gate 				hash = cidhash(targetp->dn_cid,
3830Sstevel@tonic-gate 				    targetp->dn_cid_len);
3840Sstevel@tonic-gate 				if (read_hashhead(fd, &recid, hash, image)
3850Sstevel@tonic-gate 				    == -1)
3860Sstevel@tonic-gate 					return (syserr_to_dsvcerr(errno));
3870Sstevel@tonic-gate 			} else {
3880Sstevel@tonic-gate 				/* sanity check */
3890Sstevel@tonic-gate 				if (recid == rec.rec_next[image])
3900Sstevel@tonic-gate 					break;
3910Sstevel@tonic-gate 				recid = rec.rec_next[image];
3920Sstevel@tonic-gate 			}
3930Sstevel@tonic-gate 		} else {
3940Sstevel@tonic-gate 			/*
3950Sstevel@tonic-gate 			 * Lookup scenario 3: Caller has requested any
3960Sstevel@tonic-gate 			 * other type of search.  Walk the all the client
3970Sstevel@tonic-gate 			 * id hashes.
3980Sstevel@tonic-gate 			 */
3990Sstevel@tonic-gate 			if (n == 0) {
4000Sstevel@tonic-gate 				hash = 0;
4010Sstevel@tonic-gate 				if (read_header(fd, &header, B_TRUE) == -1)
4020Sstevel@tonic-gate 					return (syserr_to_dsvcerr(errno));
4030Sstevel@tonic-gate 				recid = header.dnh_cidhash[hash][image];
4040Sstevel@tonic-gate 			} else {
4050Sstevel@tonic-gate 				/* sanity check */
4060Sstevel@tonic-gate 				if (recid == rec.rec_next[image])
4070Sstevel@tonic-gate 					break;
4080Sstevel@tonic-gate 				recid = rec.rec_next[image];
4090Sstevel@tonic-gate 			}
4100Sstevel@tonic-gate 
4110Sstevel@tonic-gate 			while (recid == DN_NOREC && ++hash < DN_CIDHASHSZ)
4120Sstevel@tonic-gate 				recid = header.dnh_cidhash[hash][image];
4130Sstevel@tonic-gate 		}
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 		/*
4160Sstevel@tonic-gate 		 * No more records; bail.
4170Sstevel@tonic-gate 		 */
4180Sstevel@tonic-gate 		if (recid == DN_NOREC)
4190Sstevel@tonic-gate 			break;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 		if (recid == temp_recid) {
4220Sstevel@tonic-gate 			/*
4230Sstevel@tonic-gate 			 * The temporary record is actually authoritative
4240Sstevel@tonic-gate 			 * for this record's contents; use it instead.
4250Sstevel@tonic-gate 			 */
4260Sstevel@tonic-gate 			recid = DN_TEMPREC;
4270Sstevel@tonic-gate 		}
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 		if (read_rec(dhp->dh_fd, &rec, recid) == -1) {
4300Sstevel@tonic-gate 			retval = syserr_to_dsvcerr(errno);
4310Sstevel@tonic-gate 			break;
4320Sstevel@tonic-gate 		}
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 		/*
4350Sstevel@tonic-gate 		 * If the record isn't in-use, then skip...
4360Sstevel@tonic-gate 		 */
4370Sstevel@tonic-gate 		if (rec.rec_prev[image] == DN_NOREC)
4380Sstevel@tonic-gate 			continue;
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 		/*
4410Sstevel@tonic-gate 		 * See if we've got a match...
4420Sstevel@tonic-gate 		 */
4430Sstevel@tonic-gate 		if (!record_match(&rec.rec_dn, targetp, query))
4440Sstevel@tonic-gate 			continue;
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 		/*
4470Sstevel@tonic-gate 		 * Caller just wants a count of the number of matching
4480Sstevel@tonic-gate 		 * records, not the records themselves; continue.
4490Sstevel@tonic-gate 		 */
4500Sstevel@tonic-gate 		if (recordsp == NULL) {
4510Sstevel@tonic-gate 			nrecords++;
4520Sstevel@tonic-gate 			continue;
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 		/*
4560Sstevel@tonic-gate 		 * Allocate the record and fill it in.
4570Sstevel@tonic-gate 		 */
4580Sstevel@tonic-gate 		recordp = malloc(sizeof (dn_rec_t));
4590Sstevel@tonic-gate 		if (recordp == NULL) {
4600Sstevel@tonic-gate 			if (!partial)
4610Sstevel@tonic-gate 				retval = DSVC_NO_MEMORY;
4620Sstevel@tonic-gate 			break;
4630Sstevel@tonic-gate 		}
4640Sstevel@tonic-gate 		*recordp = rec.rec_dn;
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 		/*
4670Sstevel@tonic-gate 		 * Chuck the record on the list and up the counter.
4680Sstevel@tonic-gate 		 */
4690Sstevel@tonic-gate 		new_records = add_dnrec_to_list(recordp, records);
4700Sstevel@tonic-gate 		if (new_records == NULL) {
4710Sstevel@tonic-gate 			free(recordp);
4720Sstevel@tonic-gate 			if (!partial)
4730Sstevel@tonic-gate 				retval = DSVC_NO_MEMORY;
4740Sstevel@tonic-gate 			break;
4750Sstevel@tonic-gate 		}
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate 		records = new_records;
4780Sstevel@tonic-gate 		nrecords++;
4790Sstevel@tonic-gate 	}
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate 	if (retval == DSVC_SUCCESS) {
4820Sstevel@tonic-gate 		*nrecordsp = nrecords;
4830Sstevel@tonic-gate 		if (recordsp != NULL)
4840Sstevel@tonic-gate 			*recordsp = records;
4850Sstevel@tonic-gate 		return (DSVC_SUCCESS);
4860Sstevel@tonic-gate 	}
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate 	if (records != NULL)
4890Sstevel@tonic-gate 		free_dnrec_list(records);
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	return (retval);
4920Sstevel@tonic-gate }
4930Sstevel@tonic-gate 
4940Sstevel@tonic-gate /*
4950Sstevel@tonic-gate  * Compares `dnp' to the target `targetp', using `query' to decide what
4960Sstevel@tonic-gate  * fields to compare.  Returns B_TRUE if `dnp' matches `targetp', B_FALSE
4970Sstevel@tonic-gate  * if not.
4980Sstevel@tonic-gate  */
4990Sstevel@tonic-gate static boolean_t
record_match(const dn_rec_t * dnp,const dn_rec_t * targetp,uint_t query)5000Sstevel@tonic-gate record_match(const dn_rec_t *dnp, const dn_rec_t *targetp, uint_t query)
5010Sstevel@tonic-gate {
5020Sstevel@tonic-gate 	unsigned int qflags[] = { DN_QFDYNAMIC, DN_QFAUTOMATIC, DN_QFMANUAL,
5030Sstevel@tonic-gate 				DN_QFUNUSABLE, DN_QFBOOTP_ONLY };
5040Sstevel@tonic-gate 	unsigned int flags[]  = { DN_FDYNAMIC, DN_FAUTOMATIC, DN_FMANUAL,
5050Sstevel@tonic-gate 				DN_FUNUSABLE, DN_FBOOTP_ONLY };
5060Sstevel@tonic-gate 	unsigned int i;
5070Sstevel@tonic-gate 	unsigned int query0;
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 	/*
5100Sstevel@tonic-gate 	 * As an optimization, skip any checks if the query is empty.
5110Sstevel@tonic-gate 	 */
5120Sstevel@tonic-gate 	DSVC_QINIT(query0);
5130Sstevel@tonic-gate 	if (query == query0)
5140Sstevel@tonic-gate 		return (B_TRUE);
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	if (DSVC_QISEQ(query, DN_QLEASE) &&
5170Sstevel@tonic-gate 	    targetp->dn_lease != dnp->dn_lease)
5180Sstevel@tonic-gate 		return (B_FALSE);
5190Sstevel@tonic-gate 	if (DSVC_QISNEQ(query, DN_QLEASE) &&
5200Sstevel@tonic-gate 	    targetp->dn_lease == dnp->dn_lease)
5210Sstevel@tonic-gate 		return (B_FALSE);
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	if (DSVC_QISEQ(query, DN_QCIP) &&
5240Sstevel@tonic-gate 	    dnp->dn_cip.s_addr != targetp->dn_cip.s_addr)
5250Sstevel@tonic-gate 		return (B_FALSE);
5260Sstevel@tonic-gate 	if (DSVC_QISNEQ(query, DN_QCIP) &&
5270Sstevel@tonic-gate 	    dnp->dn_cip.s_addr == targetp->dn_cip.s_addr)
5280Sstevel@tonic-gate 		return (B_FALSE);
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 	if (DSVC_QISEQ(query, DN_QCID) &&
5310Sstevel@tonic-gate 	    (dnp->dn_cid_len != targetp->dn_cid_len ||
5320Sstevel@tonic-gate 	    (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) != 0)))
5330Sstevel@tonic-gate 		return (B_FALSE);
5340Sstevel@tonic-gate 	if (DSVC_QISNEQ(query, DN_QCID) &&
5350Sstevel@tonic-gate 	    (dnp->dn_cid_len == targetp->dn_cid_len &&
5360Sstevel@tonic-gate 	    (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) == 0)))
5370Sstevel@tonic-gate 		return (B_FALSE);
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 	if (DSVC_QISEQ(query, DN_QSIP) &&
5400Sstevel@tonic-gate 	    dnp->dn_sip.s_addr != targetp->dn_sip.s_addr)
5410Sstevel@tonic-gate 		return (B_FALSE);
5420Sstevel@tonic-gate 	if (DSVC_QISNEQ(query, DN_QSIP) &&
5430Sstevel@tonic-gate 	    dnp->dn_sip.s_addr == targetp->dn_sip.s_addr)
5440Sstevel@tonic-gate 		return (B_FALSE);
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	if (DSVC_QISEQ(query, DN_QMACRO) &&
5470Sstevel@tonic-gate 	    strcmp(targetp->dn_macro, dnp->dn_macro) != 0)
5480Sstevel@tonic-gate 		return (B_FALSE);
5490Sstevel@tonic-gate 	if (DSVC_QISNEQ(query, DN_QMACRO) &&
5500Sstevel@tonic-gate 	    strcmp(targetp->dn_macro, dnp->dn_macro) == 0)
5510Sstevel@tonic-gate 		return (B_FALSE);
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	for (i = 0; i < sizeof (qflags) / sizeof (unsigned int); i++) {
5540Sstevel@tonic-gate 		if (DSVC_QISEQ(query, qflags[i]) &&
5550Sstevel@tonic-gate 		    (dnp->dn_flags & flags[i]) !=
5560Sstevel@tonic-gate 		    (targetp->dn_flags & flags[i]))
5570Sstevel@tonic-gate 			return (B_FALSE);
5580Sstevel@tonic-gate 		if (DSVC_QISNEQ(query, qflags[i]) &&
5590Sstevel@tonic-gate 		    (dnp->dn_flags & flags[i]) ==
5600Sstevel@tonic-gate 		    (targetp->dn_flags & flags[i]))
5610Sstevel@tonic-gate 			return (B_FALSE);
5620Sstevel@tonic-gate 	}
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate 	return (B_TRUE);
5650Sstevel@tonic-gate }
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate int
add_dn(void * handle,dn_rec_t * addp)5680Sstevel@tonic-gate add_dn(void *handle, dn_rec_t *addp)
5690Sstevel@tonic-gate {
5700Sstevel@tonic-gate 	dn_filerec_t	rec, rec_next;
5710Sstevel@tonic-gate 	dn_recid_t	recid, recid_head;
5720Sstevel@tonic-gate 	uint16_t	hash;
5730Sstevel@tonic-gate 	uchar_t		image;
5740Sstevel@tonic-gate 	int		retval;
5750Sstevel@tonic-gate 	dn_handle_t	*dhp = (dn_handle_t *)handle;
5760Sstevel@tonic-gate 	int		fd = dhp->dh_fd;
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
5790Sstevel@tonic-gate 		return (DSVC_ACCESS);
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate 	retval = check_dn(dhp);
5820Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
5830Sstevel@tonic-gate 		return (retval);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	hash = cidhash(addp->dn_cid, addp->dn_cid_len);
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	/*
5880Sstevel@tonic-gate 	 * Get the active image.
5890Sstevel@tonic-gate 	 */
5900Sstevel@tonic-gate 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
5910Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
5920Sstevel@tonic-gate 		return (retval);
5930Sstevel@tonic-gate 
5940Sstevel@tonic-gate 	/*
5950Sstevel@tonic-gate 	 * Doublecheck to make sure this entry doesn't exist already.
5960Sstevel@tonic-gate 	 */
5970Sstevel@tonic-gate 	recid = RECID(addp->dn_cip.s_addr, dhp->dh_netmask);
5980Sstevel@tonic-gate 	if (read_rec(fd, &rec, recid) == -1)
5990Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 	if (rec.rec_prev[image] != DN_NOREC)
6020Sstevel@tonic-gate 		return (DSVC_EXISTS);
6030Sstevel@tonic-gate 
6040Sstevel@tonic-gate 	/*
6050Sstevel@tonic-gate 	 * We're going to insert `rec' at the head of the `hash' hash
6060Sstevel@tonic-gate 	 * chain; get it ready-to-go.  Note that we update the alternate
6070Sstevel@tonic-gate 	 * image's hash record id pointers so that the record will
6080Sstevel@tonic-gate 	 * atomically become in-use when we switch to the alternate image.
6090Sstevel@tonic-gate 	 */
6100Sstevel@tonic-gate 	if (read_hashhead(fd, &recid_head, hash, image) == -1)
6110Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate 	rec.rec_dn = *addp;
6140Sstevel@tonic-gate 	rec.rec_dn.dn_sig = gensig();
6150Sstevel@tonic-gate 	rec.rec_prev[!image] = DN_HASHHEAD;
6160Sstevel@tonic-gate 	rec.rec_next[!image] = recid_head;
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 	/*
6190Sstevel@tonic-gate 	 * If there's a record currently on the hash chain (i.e, we're
6200Sstevel@tonic-gate 	 * not the first) then load the record.
6210Sstevel@tonic-gate 	 */
6220Sstevel@tonic-gate 	if (rec.rec_next[!image] != DN_NOREC) {
6230Sstevel@tonic-gate 		if (read_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
6240Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
6250Sstevel@tonic-gate 	}
6260Sstevel@tonic-gate 
6270Sstevel@tonic-gate 	/*
6280Sstevel@tonic-gate 	 * Before we update any information on disk, mark the container as
6290Sstevel@tonic-gate 	 * dirty so that there's no chance the container is inconsistent
6300Sstevel@tonic-gate 	 * without us knowing about it.
6310Sstevel@tonic-gate 	 */
6320Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
6330Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
6340Sstevel@tonic-gate 		return (retval);
6350Sstevel@tonic-gate 
6360Sstevel@tonic-gate 	/*
6370Sstevel@tonic-gate 	 * Update the new record on-disk; note that it's not yet reachable
6380Sstevel@tonic-gate 	 * via hash.
6390Sstevel@tonic-gate 	 */
6400Sstevel@tonic-gate 	if (write_rec(fd, &rec, recid) == -1)
6410Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6420Sstevel@tonic-gate 
6430Sstevel@tonic-gate 	/*
6440Sstevel@tonic-gate 	 * Update the alternate image's on-disk hash pointers.  We need to
6450Sstevel@tonic-gate 	 * do this before we switch to the alternate image so we cannot
6460Sstevel@tonic-gate 	 * abort with an inconsistent active image.
6470Sstevel@tonic-gate 	 */
6480Sstevel@tonic-gate 	if (rec.rec_next[!image] != DN_NOREC) {
6490Sstevel@tonic-gate 		rec_next.rec_prev[!image] = recid;
6500Sstevel@tonic-gate 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
6510Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
6520Sstevel@tonic-gate 	}
6530Sstevel@tonic-gate 	if (write_hashhead(fd, recid, hash, !image) == -1)
6540Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	/*
6570Sstevel@tonic-gate 	 * Activate the alternate image.  This is our commit point -- if we
6580Sstevel@tonic-gate 	 * fail after this point, we will roll forward on recovery.
6590Sstevel@tonic-gate 	 */
6600Sstevel@tonic-gate 	image = !image;
6610Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
6620Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
6630Sstevel@tonic-gate 		return (retval);
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	/*
6660Sstevel@tonic-gate 	 * Update the old record id pointers to match
6670Sstevel@tonic-gate 	 */
6680Sstevel@tonic-gate 	rec.rec_prev[!image] = rec.rec_prev[image];
6690Sstevel@tonic-gate 	rec.rec_next[!image] = rec.rec_next[image];
6700Sstevel@tonic-gate 	if (write_rec(fd, &rec, recid) == -1)
6710Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 	if (rec.rec_next[!image] != DN_NOREC) {
6740Sstevel@tonic-gate 		rec_next.rec_prev[!image] = recid;
6750Sstevel@tonic-gate 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
6760Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
6770Sstevel@tonic-gate 	}
6780Sstevel@tonic-gate 	if (write_hashhead(fd, recid, hash, !image) == -1)
6790Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	/*
6820Sstevel@tonic-gate 	 * Update the signature on the record handed back to the caller.
6830Sstevel@tonic-gate 	 */
6840Sstevel@tonic-gate 	addp->dn_sig = rec.rec_dn.dn_sig;
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	/*
6870Sstevel@tonic-gate 	 * Finally, mark the container as clean.
6880Sstevel@tonic-gate 	 */
6890Sstevel@tonic-gate 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
6900Sstevel@tonic-gate }
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate int
delete_dn(void * handle,const dn_rec_t * delp)6930Sstevel@tonic-gate delete_dn(void *handle, const dn_rec_t *delp)
6940Sstevel@tonic-gate {
6950Sstevel@tonic-gate 	dn_filerec_t	rec, rec_prev, rec_next;
6960Sstevel@tonic-gate 	dn_recid_t	recid;
6970Sstevel@tonic-gate 	uint16_t	hash;
6980Sstevel@tonic-gate 	uchar_t		image;
6990Sstevel@tonic-gate 	int		retval;
7000Sstevel@tonic-gate 	dn_handle_t	*dhp = (dn_handle_t *)handle;
7010Sstevel@tonic-gate 	int		fd = dhp->dh_fd;
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
7040Sstevel@tonic-gate 		return (DSVC_ACCESS);
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	retval = check_dn(dhp);
7070Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
7080Sstevel@tonic-gate 		return (retval);
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	/*
7110Sstevel@tonic-gate 	 * Get the active image.
7120Sstevel@tonic-gate 	 */
7130Sstevel@tonic-gate 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
7140Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
7150Sstevel@tonic-gate 		return (retval);
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	/*
7180Sstevel@tonic-gate 	 * Find the original entry in the network table, make sure the
7190Sstevel@tonic-gate 	 * record is in-use, and check the signature field (to guard
7200Sstevel@tonic-gate 	 * against collisions).
7210Sstevel@tonic-gate 	 */
7220Sstevel@tonic-gate 	recid = RECID(delp->dn_cip.s_addr, dhp->dh_netmask);
7230Sstevel@tonic-gate 	if (read_rec(fd, &rec, recid) == -1)
7240Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 	if (rec.rec_prev[image] == DN_NOREC)
7270Sstevel@tonic-gate 		return (DSVC_NOENT);
7280Sstevel@tonic-gate 
7290Sstevel@tonic-gate 	hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	/*
7320Sstevel@tonic-gate 	 * The signatures must match to delete a record, *except* when
7330Sstevel@tonic-gate 	 * delp->dn_sig == 0.  This is so records can be deleted that
7340Sstevel@tonic-gate 	 * weren't retrieved via lookup_dn()
7350Sstevel@tonic-gate 	 */
7360Sstevel@tonic-gate 	if (delp->dn_sig != 0 && rec.rec_dn.dn_sig != delp->dn_sig)
7370Sstevel@tonic-gate 		return (DSVC_COLLISION);
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	/*
7400Sstevel@tonic-gate 	 * Read our neighboring records.
7410Sstevel@tonic-gate 	 */
7420Sstevel@tonic-gate 	if (rec.rec_next[image] != DN_NOREC) {
7430Sstevel@tonic-gate 		if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
7440Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
7450Sstevel@tonic-gate 	}
7460Sstevel@tonic-gate 
7470Sstevel@tonic-gate 	if (rec.rec_prev[image] != DN_HASHHEAD) {
7480Sstevel@tonic-gate 		if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
7490Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
7500Sstevel@tonic-gate 	}
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 	/*
7530Sstevel@tonic-gate 	 * Before we update the alternate image's on-disk hash pointers,
7540Sstevel@tonic-gate 	 * mark the container as dirty so that there's no chance the
7550Sstevel@tonic-gate 	 * container is inconsistent without us knowing about it.
7560Sstevel@tonic-gate 	 */
7570Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
7580Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
7590Sstevel@tonic-gate 		return (retval);
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 	/*
7620Sstevel@tonic-gate 	 * Update the alternate image's on-disk hash pointers.  We need to
7630Sstevel@tonic-gate 	 * do this before we switch to the alternate image so we do not
7640Sstevel@tonic-gate 	 * abort with an inconsistent active image.  Also reset the
7650Sstevel@tonic-gate 	 * record's alternate image record id pointers, so that the old
7660Sstevel@tonic-gate 	 * record will not be in-use when we switch to the alternate image.
7670Sstevel@tonic-gate 	 */
7680Sstevel@tonic-gate 	if (rec.rec_next[image] != DN_NOREC) {
7690Sstevel@tonic-gate 		rec_next.rec_prev[!image] = rec.rec_prev[image];
7700Sstevel@tonic-gate 		if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
7710Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
7720Sstevel@tonic-gate 	}
7730Sstevel@tonic-gate 
7740Sstevel@tonic-gate 	if (rec.rec_prev[image] != DN_HASHHEAD) {
7750Sstevel@tonic-gate 		rec_prev.rec_next[!image] = rec.rec_next[image];
7760Sstevel@tonic-gate 		if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
7770Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
7780Sstevel@tonic-gate 	} else {
7790Sstevel@tonic-gate 		if (write_hashhead(fd, rec.rec_next[image], hash, !image) == -1)
7800Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
7810Sstevel@tonic-gate 	}
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 	rec.rec_next[!image] = DN_NOREC;
7840Sstevel@tonic-gate 	rec.rec_prev[!image] = DN_NOREC;
7850Sstevel@tonic-gate 	if (write_rec(fd, &rec, recid) == -1)
7860Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	/*
7890Sstevel@tonic-gate 	 * Activate the alternate image.  This is our commit point -- if we
7900Sstevel@tonic-gate 	 * fail after this point, we will roll forward on recovery.
7910Sstevel@tonic-gate 	 */
7920Sstevel@tonic-gate 	image = !image;
7930Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
7940Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
7950Sstevel@tonic-gate 		return (retval);
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate 	/*
7980Sstevel@tonic-gate 	 * Update the old record id pointers to match.
7990Sstevel@tonic-gate 	 */
8000Sstevel@tonic-gate 	if (rec.rec_next[!image] != DN_NOREC) {
8010Sstevel@tonic-gate 		rec_next.rec_prev[!image] = rec.rec_prev[!image];
8020Sstevel@tonic-gate 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
8030Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
8040Sstevel@tonic-gate 	}
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 	if (rec.rec_prev[!image] != DN_HASHHEAD) {
8070Sstevel@tonic-gate 		rec_prev.rec_next[!image] = rec.rec_next[!image];
8080Sstevel@tonic-gate 		if (write_rec(fd, &rec_prev, rec.rec_prev[!image]) == -1)
8090Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
8100Sstevel@tonic-gate 	} else {
8110Sstevel@tonic-gate 		if (write_hashhead(fd, rec.rec_next[!image], hash, !image)
8120Sstevel@tonic-gate 		    == -1)
8130Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
8140Sstevel@tonic-gate 	}
8150Sstevel@tonic-gate 
8160Sstevel@tonic-gate 	rec.rec_next[!image] = DN_NOREC;
8170Sstevel@tonic-gate 	rec.rec_prev[!image] = DN_NOREC;
8180Sstevel@tonic-gate 	if (write_rec(fd, &rec, recid) == -1)
8190Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate 	/*
8220Sstevel@tonic-gate 	 * Finally, mark the container as clean.
8230Sstevel@tonic-gate 	 */
8240Sstevel@tonic-gate 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
8250Sstevel@tonic-gate }
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate int
modify_dn(void * handle,const dn_rec_t * origp,dn_rec_t * newp)8280Sstevel@tonic-gate modify_dn(void *handle, const dn_rec_t *origp, dn_rec_t *newp)
8290Sstevel@tonic-gate {
8300Sstevel@tonic-gate 	dn_filerec_t	rec, new_rec, rec_head, rec_next, rec_prev;
8310Sstevel@tonic-gate 	dn_recid_t	recid, new_recid, recid_head;
8320Sstevel@tonic-gate 	uint16_t	hash, new_hash;
8330Sstevel@tonic-gate 	uchar_t		image;
8340Sstevel@tonic-gate 	int		retval;
8350Sstevel@tonic-gate 	dn_handle_t	*dhp = (dn_handle_t *)handle;
8360Sstevel@tonic-gate 	int		fd = dhp->dh_fd;
8370Sstevel@tonic-gate 
8380Sstevel@tonic-gate 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
8390Sstevel@tonic-gate 		return (DSVC_ACCESS);
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 	retval = check_dn(dhp);
8420Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
8430Sstevel@tonic-gate 		return (retval);
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	/*
8460Sstevel@tonic-gate 	 * Get the active image
8470Sstevel@tonic-gate 	 */
8480Sstevel@tonic-gate 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
8490Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
8500Sstevel@tonic-gate 		return (retval);
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate 	/*
8530Sstevel@tonic-gate 	 * Find the original entry in the network table, make sure the
8540Sstevel@tonic-gate 	 * entry is in-use, and check the signature field (to guard against
8550Sstevel@tonic-gate 	 * collisions).
8560Sstevel@tonic-gate 	 */
8570Sstevel@tonic-gate 	recid = RECID(origp->dn_cip.s_addr, dhp->dh_netmask);
8580Sstevel@tonic-gate 	if (read_rec(fd, &rec, recid) == -1)
8590Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate 	if (rec.rec_prev[image] == DN_NOREC)
8620Sstevel@tonic-gate 		return (DSVC_NOENT);
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate 	if (rec.rec_dn.dn_sig != origp->dn_sig)
8650Sstevel@tonic-gate 		return (DSVC_COLLISION);
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	/*
8680Sstevel@tonic-gate 	 * Check if the record id is changing (as a result of modifying the
8690Sstevel@tonic-gate 	 * IP address). If it is, then make sure the new one is available
8700Sstevel@tonic-gate 	 * (if not, fail with DSVC_EXISTS).
8710Sstevel@tonic-gate 	 */
8720Sstevel@tonic-gate 	new_recid = RECID(newp->dn_cip.s_addr, dhp->dh_netmask);
8730Sstevel@tonic-gate 	if (recid != new_recid) {
8740Sstevel@tonic-gate 		if (read_rec(fd, &new_rec, new_recid) == -1)
8750Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
8760Sstevel@tonic-gate 		if (new_rec.rec_prev[image] != DN_NOREC)
8770Sstevel@tonic-gate 			return (DSVC_EXISTS);
8780Sstevel@tonic-gate 	}
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate 	/*
8810Sstevel@tonic-gate 	 * Update the record with the new information.
8820Sstevel@tonic-gate 	 */
8830Sstevel@tonic-gate 	new_rec.rec_dn = *newp;
8840Sstevel@tonic-gate 	new_rec.rec_dn.dn_sig = origp->dn_sig + 1;
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	/*
8870Sstevel@tonic-gate 	 * Find out if our hash chain is changing.  If so, then update the
8880Sstevel@tonic-gate 	 * new record's record id pointers to be on the new chain;
8890Sstevel@tonic-gate 	 * otherwise just take the original record's pointers.  Note that
8900Sstevel@tonic-gate 	 * in either case, only update the alternate image pointers, so
8910Sstevel@tonic-gate 	 * that the new record becomes in-use when we switch to the
8920Sstevel@tonic-gate 	 * alternate image.
8930Sstevel@tonic-gate 	 */
8940Sstevel@tonic-gate 	hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
8950Sstevel@tonic-gate 	new_hash = cidhash(newp->dn_cid, newp->dn_cid_len);
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate 	if (hash == new_hash) {
8980Sstevel@tonic-gate 		new_rec.rec_prev[!image] = rec.rec_prev[image];
8990Sstevel@tonic-gate 		new_rec.rec_next[!image] = rec.rec_next[image];
9000Sstevel@tonic-gate 	} else {
9010Sstevel@tonic-gate 		if (read_hashhead(fd, &recid_head, new_hash, image) == -1)
9020Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate 		new_rec.rec_prev[!image] = DN_HASHHEAD;
9050Sstevel@tonic-gate 		new_rec.rec_next[!image] = recid_head;
9060Sstevel@tonic-gate 	}
9070Sstevel@tonic-gate 
9080Sstevel@tonic-gate 	/*
9090Sstevel@tonic-gate 	 * Write the record out; if this means overwriting the old record,
9100Sstevel@tonic-gate 	 * then write to a temporary record instead.
9110Sstevel@tonic-gate 	 */
9120Sstevel@tonic-gate 	if (write_rec(fd, &new_rec, new_recid == recid ? DN_TEMPREC : new_recid)
9130Sstevel@tonic-gate 	    == -1)
9140Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
9150Sstevel@tonic-gate 
9160Sstevel@tonic-gate 	/*
9170Sstevel@tonic-gate 	 * Mark the container as dirty so that there's no chance the
9180Sstevel@tonic-gate 	 * container is inconsistent without us knowing about it.
9190Sstevel@tonic-gate 	 */
9200Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
9210Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
9220Sstevel@tonic-gate 		return (retval);
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 	/*
9250Sstevel@tonic-gate 	 * If we've changed either the hash chain or the record id, then
9260Sstevel@tonic-gate 	 * update our neighboring records' record id pointers.  If we're
9270Sstevel@tonic-gate 	 * changing hash chains, then remove ourselves from the old
9280Sstevel@tonic-gate 	 * hash chain and insert ourselves on the new one -- otherwise, if
9290Sstevel@tonic-gate 	 * we're changing record id's, then update our neighbors with our
9300Sstevel@tonic-gate 	 * new record id.  Note that we only apply these changes to the
9310Sstevel@tonic-gate 	 * alternate image for now so that we can recover upon failure.
9320Sstevel@tonic-gate 	 */
9330Sstevel@tonic-gate 	if (hash != new_hash || recid != new_recid) {
9340Sstevel@tonic-gate 		if (rec.rec_next[image] != DN_NOREC) {
9350Sstevel@tonic-gate 			if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
9360Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9370Sstevel@tonic-gate 		}
9380Sstevel@tonic-gate 		if (rec.rec_prev[image] != DN_HASHHEAD) {
9390Sstevel@tonic-gate 			if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
9400Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9410Sstevel@tonic-gate 		}
9420Sstevel@tonic-gate 
9430Sstevel@tonic-gate 		if (hash != new_hash) {
9440Sstevel@tonic-gate 			rec_next.rec_prev[!image] = rec.rec_prev[!image];
9450Sstevel@tonic-gate 			rec_prev.rec_next[!image] = rec.rec_next[!image];
9460Sstevel@tonic-gate 		} else {
9470Sstevel@tonic-gate 			rec_next.rec_prev[!image] = new_recid;
9480Sstevel@tonic-gate 			rec_prev.rec_next[!image] = new_recid;
9490Sstevel@tonic-gate 		}
9500Sstevel@tonic-gate 
9510Sstevel@tonic-gate 		if (rec.rec_next[image] != DN_NOREC) {
9520Sstevel@tonic-gate 			if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
9530Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9540Sstevel@tonic-gate 		}
9550Sstevel@tonic-gate 		if (rec.rec_prev[image] != DN_HASHHEAD) {
9560Sstevel@tonic-gate 			if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
9570Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9580Sstevel@tonic-gate 		} else {
9590Sstevel@tonic-gate 			if (write_hashhead(fd, rec_prev.rec_next[!image], hash,
9600Sstevel@tonic-gate 			    !image) == -1)
9610Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9620Sstevel@tonic-gate 		}
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 		/*
9650Sstevel@tonic-gate 		 * If our hash is changing, update the alternate image
9660Sstevel@tonic-gate 		 * record id pointers to point to our moved record.
9670Sstevel@tonic-gate 		 */
9680Sstevel@tonic-gate 		if (hash != new_hash) {
9690Sstevel@tonic-gate 			if (recid_head != DN_NOREC) {
9700Sstevel@tonic-gate 				if (read_rec(fd, &rec_head, recid_head) == -1)
9710Sstevel@tonic-gate 					return (syserr_to_dsvcerr(errno));
9720Sstevel@tonic-gate 				rec_head.rec_prev[!image] = new_recid;
9730Sstevel@tonic-gate 				if (write_rec(fd, &rec_head, recid_head) == -1)
9740Sstevel@tonic-gate 					return (syserr_to_dsvcerr(errno));
9750Sstevel@tonic-gate 			}
9760Sstevel@tonic-gate 			if (write_hashhead(fd, new_recid, new_hash, !image)
9770Sstevel@tonic-gate 			    == -1)
9780Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9790Sstevel@tonic-gate 		}
9800Sstevel@tonic-gate 
9810Sstevel@tonic-gate 		/*
9820Sstevel@tonic-gate 		 * If our record id is changing, reset the old record's
9830Sstevel@tonic-gate 		 * alternate image record id pointers, so that the old
9840Sstevel@tonic-gate 		 * record will not be in-use once we switch over to the
9850Sstevel@tonic-gate 		 * alternate image.
9860Sstevel@tonic-gate 		 */
9870Sstevel@tonic-gate 		if (recid != new_recid) {
9880Sstevel@tonic-gate 			rec.rec_prev[!image] = DN_NOREC;
9890Sstevel@tonic-gate 			rec.rec_next[!image] = DN_NOREC;
9900Sstevel@tonic-gate 			if (write_rec(fd, &rec, recid) == -1)
9910Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
9920Sstevel@tonic-gate 		}
9930Sstevel@tonic-gate 	}
9940Sstevel@tonic-gate 
9950Sstevel@tonic-gate 	/*
9960Sstevel@tonic-gate 	 * If we're using the temporary record, then set `dnh_tempimage' to
9970Sstevel@tonic-gate 	 * the image that will be active when we're done.  This piece of
9980Sstevel@tonic-gate 	 * state is critical in the case of failure, since it indicates
9990Sstevel@tonic-gate 	 * both that the temporary record is valid, and tells us whether we
10000Sstevel@tonic-gate 	 * failed before or after activating the alternate image (below).
10010Sstevel@tonic-gate 	 * If we failed before activating the alternate image, then the
10020Sstevel@tonic-gate 	 * failure code can just reset `dnh_tempimage' to DN_NOIMAGE and
10030Sstevel@tonic-gate 	 * resynchronize the pointers.  Otherwise, we failed somewhere
10040Sstevel@tonic-gate 	 * after making the alternate image active but before we completed
10050Sstevel@tonic-gate 	 * copying the temporary record over to the actual record, which
10060Sstevel@tonic-gate 	 * the recovery code will then complete on our behalf before
10070Sstevel@tonic-gate 	 * resynchronizing the pointers.
10080Sstevel@tonic-gate 	 */
10090Sstevel@tonic-gate 	if (recid == new_recid) {
10100Sstevel@tonic-gate 		retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
10110Sstevel@tonic-gate 		    !image);
10120Sstevel@tonic-gate 		if (retval != DSVC_SUCCESS)
10130Sstevel@tonic-gate 			return (retval);
10140Sstevel@tonic-gate 	}
10150Sstevel@tonic-gate 
10160Sstevel@tonic-gate 	/*
10170Sstevel@tonic-gate 	 * Activate the alternate image.  This is our commit point -- if we
10180Sstevel@tonic-gate 	 * fail after this point, we will roll forward on recovery.
10190Sstevel@tonic-gate 	 */
10200Sstevel@tonic-gate 	image = !image;
10210Sstevel@tonic-gate 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
10220Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
10230Sstevel@tonic-gate 		return (retval);
10240Sstevel@tonic-gate 
10250Sstevel@tonic-gate 	/*
10260Sstevel@tonic-gate 	 * If we used the temporary record, copy the data into the actual
10270Sstevel@tonic-gate 	 * record.  Once finished, reset `dnh_tempimage' to DN_NOIMAGE
10280Sstevel@tonic-gate 	 * since the temporary record no longer needs to be used.
10290Sstevel@tonic-gate 	 */
10300Sstevel@tonic-gate 	if (recid == new_recid) {
10310Sstevel@tonic-gate 		if (write_rec(fd, &new_rec, new_recid) == -1)
10320Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 		retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
10350Sstevel@tonic-gate 		    DN_NOIMAGE);
10360Sstevel@tonic-gate 		if (retval != DSVC_SUCCESS)
10370Sstevel@tonic-gate 			return (retval);
10380Sstevel@tonic-gate 	}
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 	/*
10410Sstevel@tonic-gate 	 * Update the old record id pointers to match.
10420Sstevel@tonic-gate 	 */
10430Sstevel@tonic-gate 	new_rec.rec_prev[!image] = new_rec.rec_prev[image];
10440Sstevel@tonic-gate 	new_rec.rec_next[!image] = new_rec.rec_next[image];
10450Sstevel@tonic-gate 	if (write_rec(fd, &new_rec, new_recid) == -1)
10460Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
10470Sstevel@tonic-gate 
10480Sstevel@tonic-gate 	if (hash != new_hash || recid != new_recid) {
10490Sstevel@tonic-gate 		if (rec.rec_next[image] != DN_NOREC) {
10500Sstevel@tonic-gate 			rec_next.rec_prev[!image] = rec.rec_prev[image];
10510Sstevel@tonic-gate 			if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
10520Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
10530Sstevel@tonic-gate 		}
10540Sstevel@tonic-gate 		if (rec.rec_prev[image] != DN_HASHHEAD) {
10550Sstevel@tonic-gate 			rec_prev.rec_next[!image] = rec.rec_next[image];
10560Sstevel@tonic-gate 			if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
10570Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
10580Sstevel@tonic-gate 		} else {
10590Sstevel@tonic-gate 			if (write_hashhead(fd, rec.rec_next[image], hash,
10600Sstevel@tonic-gate 			    !image) == -1)
10610Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
10620Sstevel@tonic-gate 		}
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 		/*
10650Sstevel@tonic-gate 		 * If our hash changed, update the alternate image record
10660Sstevel@tonic-gate 		 * id pointers to point to our moved record.
10670Sstevel@tonic-gate 		 */
10680Sstevel@tonic-gate 		if (hash != new_hash) {
10690Sstevel@tonic-gate 			if (recid_head != DN_NOREC) {
10700Sstevel@tonic-gate 				rec_head.rec_prev[!image] =
10710Sstevel@tonic-gate 				    rec_head.rec_prev[image];
10720Sstevel@tonic-gate 				if (write_rec(fd, &rec_head, recid_head) == -1)
10730Sstevel@tonic-gate 					return (syserr_to_dsvcerr(errno));
10740Sstevel@tonic-gate 			}
10750Sstevel@tonic-gate 			if (write_hashhead(fd, new_recid, new_hash, !image)
10760Sstevel@tonic-gate 			    == -1)
10770Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
10780Sstevel@tonic-gate 		}
10790Sstevel@tonic-gate 
10800Sstevel@tonic-gate 		/*
10810Sstevel@tonic-gate 		 * If our record id changed, then finish marking the old
10820Sstevel@tonic-gate 		 * record as "not in use".
10830Sstevel@tonic-gate 		 */
10840Sstevel@tonic-gate 		if (recid != new_recid) {
10850Sstevel@tonic-gate 			rec.rec_prev[!image] = DN_NOREC;
10860Sstevel@tonic-gate 			rec.rec_next[!image] = DN_NOREC;
10870Sstevel@tonic-gate 			if (write_rec(fd, &rec, recid) == -1)
10880Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
10890Sstevel@tonic-gate 		}
10900Sstevel@tonic-gate 	}
10910Sstevel@tonic-gate 
10920Sstevel@tonic-gate 	/*
10930Sstevel@tonic-gate 	 * Update the signature on the new record handed back to the caller.
10940Sstevel@tonic-gate 	 */
10950Sstevel@tonic-gate 	newp->dn_sig = new_rec.rec_dn.dn_sig;
10960Sstevel@tonic-gate 
10970Sstevel@tonic-gate 	/*
10980Sstevel@tonic-gate 	 * Finally, mark the container as clean.
10990Sstevel@tonic-gate 	 */
11000Sstevel@tonic-gate 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate int
list_dn(const char * location,char *** listppp,uint_t * countp)11040Sstevel@tonic-gate list_dn(const char *location, char ***listppp, uint_t *countp)
11050Sstevel@tonic-gate {
11060Sstevel@tonic-gate 	char		ipaddr[INET_ADDRSTRLEN];
1107*871Scasper 	struct dirent	*result;
11080Sstevel@tonic-gate 	DIR		*dirp;
11090Sstevel@tonic-gate 	unsigned int	i, count = 0;
11100Sstevel@tonic-gate 	char		*re, **new_listpp, **listpp = NULL;
11110Sstevel@tonic-gate 	char		conver[4];
11120Sstevel@tonic-gate 	int		error;
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate 	dirp = opendir(location);
11150Sstevel@tonic-gate 	if (dirp == NULL) {
11160Sstevel@tonic-gate 		switch (errno) {
11170Sstevel@tonic-gate 		case EACCES:
11180Sstevel@tonic-gate 		case EPERM:
11190Sstevel@tonic-gate 			return (DSVC_ACCESS);
11200Sstevel@tonic-gate 		case ENOENT:
11210Sstevel@tonic-gate 			return (DSVC_NO_LOCATION);
11220Sstevel@tonic-gate 		default:
11230Sstevel@tonic-gate 			break;
11240Sstevel@tonic-gate 		}
11250Sstevel@tonic-gate 		return (DSVC_INTERNAL);
11260Sstevel@tonic-gate 	}
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	/*
11290Sstevel@tonic-gate 	 * Compile a regular expression matching "SUNWbinfilesX_" (where X
11300Sstevel@tonic-gate 	 * is a container version number) followed by an IP address
11310Sstevel@tonic-gate 	 * (roughly speaking).  Note that the $N constructions allow us to
11320Sstevel@tonic-gate 	 * get the container version and IP address when calling regex(3C).
11330Sstevel@tonic-gate 	 */
11340Sstevel@tonic-gate 	re = regcmp("^SUNWbinfiles([0-9]{1,3})$0_"
11350Sstevel@tonic-gate 	    "(([0-9]{1,3}_){3}[0-9]{1,3})$1$", (char *)0);
11360Sstevel@tonic-gate 	if (re == NULL)
11370Sstevel@tonic-gate 		return (DSVC_NO_MEMORY);
11380Sstevel@tonic-gate 
1139*871Scasper 	while ((result = readdir(dirp)) != NULL) {
11400Sstevel@tonic-gate 
11410Sstevel@tonic-gate 		if (regex(re, result->d_name, conver, ipaddr) != NULL) {
11420Sstevel@tonic-gate 			if (atoi(conver) != DSVC_CONVER)
11430Sstevel@tonic-gate 				continue;
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate 			for (i = 0; ipaddr[i] != '\0'; i++)
11460Sstevel@tonic-gate 				if (ipaddr[i] == '_')
11470Sstevel@tonic-gate 					ipaddr[i] = '.';
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 			new_listpp = realloc(listpp,
11500Sstevel@tonic-gate 			    (sizeof (char **)) * (count + 1));
11510Sstevel@tonic-gate 			if (new_listpp == NULL) {
11520Sstevel@tonic-gate 				error = DSVC_NO_MEMORY;
11530Sstevel@tonic-gate 				goto fail;
11540Sstevel@tonic-gate 			}
11550Sstevel@tonic-gate 			listpp = new_listpp;
11560Sstevel@tonic-gate 			listpp[count] = strdup(ipaddr);
11570Sstevel@tonic-gate 			if (listpp[count] == NULL) {
11580Sstevel@tonic-gate 				error = DSVC_NO_MEMORY;
11590Sstevel@tonic-gate 				goto fail;
11600Sstevel@tonic-gate 			}
11610Sstevel@tonic-gate 			count++;
11620Sstevel@tonic-gate 		}
11630Sstevel@tonic-gate 	}
11640Sstevel@tonic-gate 	free(re);
11650Sstevel@tonic-gate 	(void) closedir(dirp);
11660Sstevel@tonic-gate 
11670Sstevel@tonic-gate 	*countp = count;
11680Sstevel@tonic-gate 	*listppp = listpp;
11690Sstevel@tonic-gate 	return (DSVC_SUCCESS);
11700Sstevel@tonic-gate fail:
11710Sstevel@tonic-gate 	free(re);
11720Sstevel@tonic-gate 	(void) closedir(dirp);
11730Sstevel@tonic-gate 
11740Sstevel@tonic-gate 	for (i = 0; i < count; i++)
11750Sstevel@tonic-gate 		free(listpp[i]);
11760Sstevel@tonic-gate 	free(listpp);
11770Sstevel@tonic-gate 	return (error);
11780Sstevel@tonic-gate }
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate /*
11810Sstevel@tonic-gate  * Check (a la fsck) that a given DHCP network container is in a consistent
11820Sstevel@tonic-gate  * state.  If not, then attempt to restore internal consistency; this should
11830Sstevel@tonic-gate  * always be possible unless the container has been externally corrupted.
11840Sstevel@tonic-gate  */
11850Sstevel@tonic-gate static int
check_dn(dn_handle_t * dhp)11860Sstevel@tonic-gate check_dn(dn_handle_t *dhp)
11870Sstevel@tonic-gate {
11880Sstevel@tonic-gate 	dn_header_t	header;
11890Sstevel@tonic-gate 	uchar_t		image, dirty;
11900Sstevel@tonic-gate 	uint16_t	hash;
11910Sstevel@tonic-gate 	dn_filerec_t	rec;
11920Sstevel@tonic-gate 	dn_recid_t	recid, maxrecid;
11930Sstevel@tonic-gate 	int		retval;
11940Sstevel@tonic-gate 
11950Sstevel@tonic-gate 	/*
11960Sstevel@tonic-gate 	 * Reading the whole header is a very expensive operation; only do
11970Sstevel@tonic-gate 	 * it once we're sure the container is actually dirty.  On an
11980Sstevel@tonic-gate 	 * E4500, this optimization lowers the wall-clock cost of creating
11990Sstevel@tonic-gate 	 * a 5000-record datastore by 20 percent.
12000Sstevel@tonic-gate 	 */
12010Sstevel@tonic-gate 	retval = getabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), &dirty);
12020Sstevel@tonic-gate 	if (retval != DSVC_SUCCESS)
12030Sstevel@tonic-gate 		return (retval);
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 	if (dirty == 0)
12060Sstevel@tonic-gate 		return (DSVC_SUCCESS);
12070Sstevel@tonic-gate 
12080Sstevel@tonic-gate 	if (read_header(dhp->dh_fd, &header, B_TRUE) == -1)
12090Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
12100Sstevel@tonic-gate 
12110Sstevel@tonic-gate 	/*
12120Sstevel@tonic-gate 	 * If `dnh_tempimage' matches the current working image, then we
12130Sstevel@tonic-gate 	 * crashed in the middle of a modify_dn() operation.  Complete
12140Sstevel@tonic-gate 	 * writing out the temporary record before restoring internal
12150Sstevel@tonic-gate 	 * consistency.  This is a bit of a kludge but there doesn't seem
12160Sstevel@tonic-gate 	 * to be another way.
12170Sstevel@tonic-gate 	 */
12180Sstevel@tonic-gate 	if (header.dnh_tempimage == header.dnh_image) {
12190Sstevel@tonic-gate 		recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
12200Sstevel@tonic-gate 		    header.dnh_netmask);
12210Sstevel@tonic-gate 		if (write_rec(dhp->dh_fd, &header.dnh_temp, recid) == -1)
12220Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate 		header.dnh_tempimage = DN_NOIMAGE;
12250Sstevel@tonic-gate 	}
12260Sstevel@tonic-gate 
12270Sstevel@tonic-gate 	/*
12280Sstevel@tonic-gate 	 * Blindly update all the header hashhead pointers since we're
12290Sstevel@tonic-gate 	 * going to have to re-write the header anyway.
12300Sstevel@tonic-gate 	 */
12310Sstevel@tonic-gate 	image = header.dnh_image;
12320Sstevel@tonic-gate 	for (hash = 0; hash < DN_CIDHASHSZ; hash++) {
12330Sstevel@tonic-gate 		header.dnh_cidhash[hash][!image] =
12340Sstevel@tonic-gate 		    header.dnh_cidhash[hash][image];
12350Sstevel@tonic-gate 	}
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	/*
12380Sstevel@tonic-gate 	 * Synchronize the record pointers of all in-use records.  We do
12390Sstevel@tonic-gate 	 * this instead of just walking the hashheads because not all dirty
12400Sstevel@tonic-gate 	 * records are hashed (for instance, we may have failed part way
12410Sstevel@tonic-gate 	 * through an add_dn()).
12420Sstevel@tonic-gate 	 */
12430Sstevel@tonic-gate 	maxrecid = RECID(~0, header.dnh_netmask);
12440Sstevel@tonic-gate 	for (recid = RECID(0, header.dnh_netmask); recid <= maxrecid; recid++) {
12450Sstevel@tonic-gate 		if (read_rec(dhp->dh_fd, &rec, recid) == -1)
12460Sstevel@tonic-gate 			return (syserr_to_dsvcerr(errno));
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate 		/*
12490Sstevel@tonic-gate 		 * Verify the pointers match.  If not, then correct
12500Sstevel@tonic-gate 		 * the record and write it back to disk.
12510Sstevel@tonic-gate 		 */
12520Sstevel@tonic-gate 		if (rec.rec_next[image] != rec.rec_next[!image] ||
12530Sstevel@tonic-gate 		    rec.rec_prev[image] != rec.rec_prev[!image]) {
12540Sstevel@tonic-gate 			header.dnh_errors++;
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 			rec.rec_prev[!image] = rec.rec_prev[image];
12570Sstevel@tonic-gate 			rec.rec_next[!image] = rec.rec_next[image];
12580Sstevel@tonic-gate 
12590Sstevel@tonic-gate 			if (write_rec(dhp->dh_fd, &rec, recid) == -1)
12600Sstevel@tonic-gate 				return (syserr_to_dsvcerr(errno));
12610Sstevel@tonic-gate 		}
12620Sstevel@tonic-gate 	}
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate 	header.dnh_checks++;
12650Sstevel@tonic-gate 	if (write_header(dhp->dh_fd, &header) == -1)
12660Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 	/*
12690Sstevel@tonic-gate 	 * Clear the dirty bit on the container.
12700Sstevel@tonic-gate 	 */
12710Sstevel@tonic-gate 	return (setabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), 0));
12720Sstevel@tonic-gate }
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate /*
12750Sstevel@tonic-gate  * Given a buffer `path' of `pathlen' bytes, fill it in with a path to the
12760Sstevel@tonic-gate  * DHCP Network table for IP network `ip' located in directory `dir'.
12770Sstevel@tonic-gate  */
12780Sstevel@tonic-gate static void
net2path(char * path,size_t pathlen,const char * dir,ipaddr_t ip)12790Sstevel@tonic-gate net2path(char *path, size_t pathlen, const char *dir, ipaddr_t ip)
12800Sstevel@tonic-gate {
12810Sstevel@tonic-gate 	(void) snprintf(path, pathlen, "%s/SUNWbinfiles%u_%d_%d_%d_%d", dir,
12820Sstevel@tonic-gate 	    DSVC_CONVER, ip >> 24, (ip >> 16) & 0xff, (ip >> 8) & 0xff,
12830Sstevel@tonic-gate 	    ip & 0xff);
12840Sstevel@tonic-gate }
12850Sstevel@tonic-gate 
12860Sstevel@tonic-gate /*
12870Sstevel@tonic-gate  * Given a `cid' that's `cidlen' bytes long, hash it to a value between 0
12880Sstevel@tonic-gate  * and DN_CIDHASHSZ - 1.  We use CRC16 for our hash since it's known to be
12890Sstevel@tonic-gate  * very evenly distributed.
12900Sstevel@tonic-gate  */
12910Sstevel@tonic-gate static uint16_t
cidhash(const uchar_t * cid,size_t cidlen)12920Sstevel@tonic-gate cidhash(const uchar_t *cid, size_t cidlen)
12930Sstevel@tonic-gate {
12940Sstevel@tonic-gate 	uchar_t		bit;
12950Sstevel@tonic-gate 	uint16_t	result = 0xffff;
12960Sstevel@tonic-gate 	const uint16_t	crc16_poly = 0x8408; /* mutated CRC-CCITT polynomial */
12970Sstevel@tonic-gate 
12980Sstevel@tonic-gate 	while (cidlen-- != 0) {
12990Sstevel@tonic-gate 		result ^= *cid++;
13000Sstevel@tonic-gate 		for (bit = 0; bit < 8; bit++) {
13010Sstevel@tonic-gate 			if (result & 1)
13020Sstevel@tonic-gate 				result = (result >> 1) ^ crc16_poly;
13030Sstevel@tonic-gate 			else
13040Sstevel@tonic-gate 				result >>= 1;
13050Sstevel@tonic-gate 		}
13060Sstevel@tonic-gate 	}
13070Sstevel@tonic-gate 	return (result % DN_CIDHASHSZ);
13080Sstevel@tonic-gate }
13090Sstevel@tonic-gate 
13100Sstevel@tonic-gate /*
13110Sstevel@tonic-gate  * Convert the dn_filerec_t pointed to by `rec' from native (host) to
13120Sstevel@tonic-gate  * network order or the other way.
13130Sstevel@tonic-gate  */
13140Sstevel@tonic-gate /* ARGSUSED */
13150Sstevel@tonic-gate static void
nhconvert_rec(dn_filerec_t * rec)13160Sstevel@tonic-gate nhconvert_rec(dn_filerec_t *rec)
13170Sstevel@tonic-gate {
13180Sstevel@tonic-gate #ifdef	_LITTLE_ENDIAN
13190Sstevel@tonic-gate 	dn_rec_t *dnp = &rec->rec_dn;
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate 	nhconvert(&rec->rec_prev[0], &rec->rec_prev[0], sizeof (dn_recid_t));
13220Sstevel@tonic-gate 	nhconvert(&rec->rec_prev[1], &rec->rec_prev[1], sizeof (dn_recid_t));
13230Sstevel@tonic-gate 	nhconvert(&rec->rec_next[0], &rec->rec_next[0], sizeof (dn_recid_t));
13240Sstevel@tonic-gate 	nhconvert(&rec->rec_next[1], &rec->rec_next[1], sizeof (dn_recid_t));
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 	nhconvert(&dnp->dn_cip.s_addr, &dnp->dn_cip.s_addr, sizeof (ipaddr_t));
13270Sstevel@tonic-gate 	nhconvert(&dnp->dn_sip.s_addr, &dnp->dn_sip.s_addr, sizeof (ipaddr_t));
13280Sstevel@tonic-gate 	nhconvert(&dnp->dn_lease, &dnp->dn_lease, sizeof (lease_t));
13290Sstevel@tonic-gate 	nhconvert(&dnp->dn_sig, &dnp->dn_sig, sizeof (uint64_t));
13300Sstevel@tonic-gate #endif
13310Sstevel@tonic-gate }
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate /*
13340Sstevel@tonic-gate  * Convert the header pointed to by `hdrp' from native (host) to network
13350Sstevel@tonic-gate  * order or the other way.  If `hash' is false, then don't bother
13360Sstevel@tonic-gate  * converting the hash chains.
13370Sstevel@tonic-gate  */
13380Sstevel@tonic-gate /* ARGSUSED */
13390Sstevel@tonic-gate static void
nhconvert_header(dn_header_t * hdrp,boolean_t hash)13400Sstevel@tonic-gate nhconvert_header(dn_header_t *hdrp, boolean_t hash)
13410Sstevel@tonic-gate {
13420Sstevel@tonic-gate #ifdef	_LITTLE_ENDIAN
13430Sstevel@tonic-gate 	unsigned int i;
13440Sstevel@tonic-gate 
13450Sstevel@tonic-gate 	nhconvert(&hdrp->dnh_network, &hdrp->dnh_network, sizeof (ipaddr_t));
13460Sstevel@tonic-gate 	nhconvert(&hdrp->dnh_netmask, &hdrp->dnh_netmask, sizeof (ipaddr_t));
13470Sstevel@tonic-gate 	nhconvert(&hdrp->dnh_magic, &hdrp->dnh_magic, sizeof (uint32_t));
13480Sstevel@tonic-gate 	nhconvert_rec(&hdrp->dnh_temp);
13490Sstevel@tonic-gate 
13500Sstevel@tonic-gate 	if (hash) {
13510Sstevel@tonic-gate 		for (i = 0; i < DN_CIDHASHSZ; i++) {
13520Sstevel@tonic-gate 			nhconvert(&hdrp->dnh_cidhash[i][0],
13530Sstevel@tonic-gate 			    &hdrp->dnh_cidhash[i][0], sizeof (dn_recid_t));
13540Sstevel@tonic-gate 			nhconvert(&hdrp->dnh_cidhash[i][1],
13550Sstevel@tonic-gate 			    &hdrp->dnh_cidhash[i][1], sizeof (dn_recid_t));
13560Sstevel@tonic-gate 		}
13570Sstevel@tonic-gate 	}
13580Sstevel@tonic-gate #endif
13590Sstevel@tonic-gate }
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate /*
13620Sstevel@tonic-gate  * Read the dn_filerec_t identified by `recid' from open container `fd'
13630Sstevel@tonic-gate  * into `rec'.  Returns 0 on success, -1 on failure (errno is set).
13640Sstevel@tonic-gate  */
13650Sstevel@tonic-gate static int
read_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)13660Sstevel@tonic-gate read_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
13670Sstevel@tonic-gate {
13680Sstevel@tonic-gate 	if (pnread(fd, rec, sizeof (*rec), RECID2OFFSET(recid)) == -1)
13690Sstevel@tonic-gate 		return (-1);
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 	nhconvert_rec(rec);
13720Sstevel@tonic-gate 	return (0);
13730Sstevel@tonic-gate }
13740Sstevel@tonic-gate 
13750Sstevel@tonic-gate /*
13760Sstevel@tonic-gate  * Write the dn_filerec_t `rec' identified by `recid' into the open
13770Sstevel@tonic-gate  * container `fd'.  Returns 0 on success, -1 on failure (errno is set).
13780Sstevel@tonic-gate  */
13790Sstevel@tonic-gate static int
write_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)13800Sstevel@tonic-gate write_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
13810Sstevel@tonic-gate {
13820Sstevel@tonic-gate 	int retval;
13830Sstevel@tonic-gate 
13840Sstevel@tonic-gate 	nhconvert_rec(rec);
13850Sstevel@tonic-gate 	retval = pnwrite(fd, rec, sizeof (*rec), RECID2OFFSET(recid));
13860Sstevel@tonic-gate 	nhconvert_rec(rec);
13870Sstevel@tonic-gate 	return (retval);
13880Sstevel@tonic-gate }
13890Sstevel@tonic-gate 
13900Sstevel@tonic-gate /*
13910Sstevel@tonic-gate  * Read the dn_header_t from the open container `fd' into the dn_header_t
13920Sstevel@tonic-gate  * pointed to by `hdrp'; if `hash' is not set, then skip reading the
13930Sstevel@tonic-gate  * dn_header_t hash chains.  Returns 0 on success, -1 on failure (errno is
13940Sstevel@tonic-gate  * set).
13950Sstevel@tonic-gate  */
13960Sstevel@tonic-gate static int
read_header(int fd,dn_header_t * hdrp,boolean_t hash)13970Sstevel@tonic-gate read_header(int fd, dn_header_t *hdrp, boolean_t hash)
13980Sstevel@tonic-gate {
13990Sstevel@tonic-gate 	size_t size;
14000Sstevel@tonic-gate 
14010Sstevel@tonic-gate 	size = hash ? sizeof (dn_header_t) : offsetof(dn_header_t, dnh_cidhash);
14020Sstevel@tonic-gate 	if (pnread(fd, hdrp, size, 0) == -1)
14030Sstevel@tonic-gate 		return (-1);
14040Sstevel@tonic-gate 
14050Sstevel@tonic-gate 	nhconvert_header(hdrp, hash);
14060Sstevel@tonic-gate 	return (0);
14070Sstevel@tonic-gate }
14080Sstevel@tonic-gate 
14090Sstevel@tonic-gate /*
14100Sstevel@tonic-gate  * Write the dn_header_t pointed to by `hdrp' into open container `fd'.
14110Sstevel@tonic-gate  * Returns 0 on success, -1 on failure (errno is set).
14120Sstevel@tonic-gate  */
14130Sstevel@tonic-gate static int
write_header(int fd,dn_header_t * hdrp)14140Sstevel@tonic-gate write_header(int fd, dn_header_t *hdrp)
14150Sstevel@tonic-gate {
14160Sstevel@tonic-gate 	int retval;
14170Sstevel@tonic-gate 
14180Sstevel@tonic-gate 	nhconvert_header(hdrp, B_TRUE);
14190Sstevel@tonic-gate 	retval = pnwrite(fd, hdrp, sizeof (dn_header_t), 0);
14200Sstevel@tonic-gate 	nhconvert_header(hdrp, B_TRUE);
14210Sstevel@tonic-gate 	return (retval);
14220Sstevel@tonic-gate }
14230Sstevel@tonic-gate 
14240Sstevel@tonic-gate /*
14250Sstevel@tonic-gate  * Read in the head of the `cidhash' hash chain from open container `fd'
14260Sstevel@tonic-gate  * into `recid_headp', using image `image'.  Returns 0 on success, -1 on
14270Sstevel@tonic-gate  * failure (errno is set).
14280Sstevel@tonic-gate  */
14290Sstevel@tonic-gate static int
read_hashhead(int fd,dn_recid_t * recid_headp,uint16_t cidhash,uchar_t image)14300Sstevel@tonic-gate read_hashhead(int fd, dn_recid_t *recid_headp, uint16_t cidhash, uchar_t image)
14310Sstevel@tonic-gate {
14320Sstevel@tonic-gate 	if (pnread(fd, recid_headp, sizeof (dn_recid_t),
14330Sstevel@tonic-gate 	    offsetof(dn_header_t, dnh_cidhash[cidhash][image])) == -1)
14340Sstevel@tonic-gate 		return (-1);
14350Sstevel@tonic-gate 
14360Sstevel@tonic-gate 	nhconvert(recid_headp, recid_headp, sizeof (dn_recid_t));
14370Sstevel@tonic-gate 	return (0);
14380Sstevel@tonic-gate }
14390Sstevel@tonic-gate 
14400Sstevel@tonic-gate /*
14410Sstevel@tonic-gate  * Write out the head of the `cidhash' hash chain into open container `fd'
14420Sstevel@tonic-gate  * from `recid_head', using image `image'.  Returns 0 on success, -1 on
14430Sstevel@tonic-gate  * failure (errno is set).
14440Sstevel@tonic-gate  */
14450Sstevel@tonic-gate static int
write_hashhead(int fd,dn_recid_t recid_head,uint16_t cidhash,uchar_t image)14460Sstevel@tonic-gate write_hashhead(int fd, dn_recid_t recid_head, uint16_t cidhash, uchar_t image)
14470Sstevel@tonic-gate {
14480Sstevel@tonic-gate 	nhconvert(&recid_head, &recid_head, sizeof (dn_recid_t));
14490Sstevel@tonic-gate 	return (pnwrite(fd, &recid_head, sizeof (dn_recid_t),
14500Sstevel@tonic-gate 	    offsetof(dn_header_t, dnh_cidhash[cidhash][image])));
14510Sstevel@tonic-gate }
14520Sstevel@tonic-gate 
14530Sstevel@tonic-gate /*
14540Sstevel@tonic-gate  * Get the byte `offset' bytes into open file `fd', and store in `bytep'.
14550Sstevel@tonic-gate  * Returns a DSVC_* return code.
14560Sstevel@tonic-gate  */
14570Sstevel@tonic-gate static int
getabyte(int fd,off_t offset,uchar_t * bytep)14580Sstevel@tonic-gate getabyte(int fd, off_t offset, uchar_t *bytep)
14590Sstevel@tonic-gate {
14600Sstevel@tonic-gate 	switch (pread(fd, bytep, 1, offset)) {
14610Sstevel@tonic-gate 	case 1:
14620Sstevel@tonic-gate 		return (DSVC_SUCCESS);
14630Sstevel@tonic-gate 	case -1:
14640Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
14650Sstevel@tonic-gate 	default:
14660Sstevel@tonic-gate 		break;
14670Sstevel@tonic-gate 	}
14680Sstevel@tonic-gate 
14690Sstevel@tonic-gate 	return (DSVC_INTERNAL);
14700Sstevel@tonic-gate }
14710Sstevel@tonic-gate 
14720Sstevel@tonic-gate /*
14730Sstevel@tonic-gate  * Set the byte `offset' bytes into open file `fd' to `byte'.  Returns a
14740Sstevel@tonic-gate  * DSVC_* return code.
14750Sstevel@tonic-gate  */
14760Sstevel@tonic-gate static int
setabyte(int fd,off_t offset,uchar_t byte)14770Sstevel@tonic-gate setabyte(int fd, off_t offset, uchar_t byte)
14780Sstevel@tonic-gate {
14790Sstevel@tonic-gate 	switch (pwrite(fd, &byte, 1, offset)) {
14800Sstevel@tonic-gate 	case 1:
14810Sstevel@tonic-gate 		return (DSVC_SUCCESS);
14820Sstevel@tonic-gate 	case -1:
14830Sstevel@tonic-gate 		return (syserr_to_dsvcerr(errno));
14840Sstevel@tonic-gate 	default:
14850Sstevel@tonic-gate 		break;
14860Sstevel@tonic-gate 	}
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate 	return (DSVC_INTERNAL);
14890Sstevel@tonic-gate }
1490