1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file contains public functions for managing DHCP network
31  * containers.  For the semantics of these functions, please see the
32  * Enterprise DHCP Architecture Document.
33  *
34  * This module uses synchronization guarantees provided by dsvclockd(1M);
35  * please see $SRC/lib/libdhcpsvc/private/README.synch for details.
36  *
37  * Big Theory Statement for the SUNWbinfiles DHCP Network Module
38  * =============================================================
39  *
40  * 1. On-disk Structure
41  *
42  * Each container consists of two basic pieces on-disk: a header and an
43  * array of records.  In order to provide fast client IP lookup, the array
44  * of records is directly indexed by client IP address (using a simple
45  * mapping function).  In order to provide fast client id lookup, each
46  * in-use record is also on exactly one doubly-linked client id hash chain;
47  * the hash chains heads are contained in the header).  For all other
48  * lookups, we can restrict our search to only the in-use records by merely
49  * walking all of the hash chains.  Here's a crude illustration of what
50  * this looks like on-disk (note that hash chains 2 and 3 are empty):
51  *
52  *              _______________________________________________
53  *             | container info   | hash chain heads (buckets) |
54  *    header   |                  | 1 | 2 | 3 |  [ .... ]  | N |
55  *             |                  | | |   |   |            | | |
56  *             |__________________|_|________________________|_|
57  *             | rec1      | rec2   |  | rec3      | rec4    | |
58  *             |           |        +--->          |         | |
59  *             | unused    | unused    | hash1     | unused  | |
60  *             |___________|___________|________^|_|_________|_|
61  *             | rec5      | rec6      | rec7   |v | rec8    | |
62  *             |           |           |           ->        | |
63  *    records  | unused    | hashN     | hash1    <- hash1   | |
64  *             |___________|________^|_|___________|_________|_|
65  *             |           :        :: :           :         : |
66  *             |           :        :: : [ more records... ] : |
67  *             |           :        :: :           :         : |
68  *             |___________:________::_:___________:_________:_|
69  *             | recN-3    | recN-2 || | recN-1    | recN    v |
70  *             |           |        |+-->          ->          |
71  *             | unused    | unused +--- hashN    <- hashN     |
72  *             |___________|___________|___________|___________|
73  *
74  * Note that the actual on-disk format is a bit more complicated than this
75  * due to robustness issues; see section 3 below for details.
76  *
77  * 2. Robustness Requirements
78  *
79  * This module has been designed to be as efficient as possible while still
80  * retaining the robustness minimally required for an enterprise-level
81  * environment.  In particular, it is designed to handle the following
82  * failure situations:
83  *
84  *	1. An update operation (add, modify, delete) on a container is
85  *	   unable to complete due to an unexpected internal error at
86  *	   any point in the update code.
87  *
88  *	2. An update operation (add, modify, delete) on a container is
89  *	   unable to complete due to unexpected program termination while
90  *	   at any point in the update code.
91  *
92  * If either of these situations occur, the container in question must be
93  * left in a consistent (and viable) state.  In addition, only the pending
94  * transaction (at most) may be lost.
95  *
96  * 3. Robustness Techniques
97  *
98  * This module uses a few different techniques to meet our robustness goals
99  * while maintaining high performance.  The biggest problem we encounter
100  * when trying to achieve robustness is updating the client id hash chain.
101  * In particular, it is not possible to atomically add, move, or delete an
102  * item from a doubly linked list, thus creating a window where a crash
103  * could leave our hash chains in an inconsistent state.
104  *
105  * To address this problem, we actually maintain two images (copies) of all
106  * the hash chains in the container.  At any point in time, exactly one of
107  * the two images is active (and thus considered authoritative), as
108  * indicated by a byte in the container header.  When performing an update
109  * operation, all hash chain modifications are done on the *inactive*
110  * image, then, once the inactive image has completed the hash chain
111  * operations required by the update, the active and inactive images are
112  * atomically switched, making the formerly-inactive image authoritative.
113  * After the image switch, the update code then updates the formerly-active
114  * image's hash chains to match the active image's hash chains.
115  *
116  * This approach has the nice property that internal container consistency
117  * can always be restored after a crash by just resynchronizing the
118  * inactive image's hash chains with the active image's chains.  Note that
119  * the atomic image switch serves as the "commit point" for the operation:
120  * if we crash before this point, we roll back the operation upon recovery
121  * and it appears as though the operation never happened; if we crash after
122  * this point, we roll forward the rest of the operation upon recovery as
123  * if the crash had not happened.
124  *
125  * This technique is enough to robustly implement our add and delete
126  * operations, but modify has an additional complication due to our direct
127  * mapping of client IP addresses to records.  In particular, unless the
128  * record modification includes changing the client IP address, the
129  * modified record must be written at the same location as the original
130  * record -- however, if the modify operation fails part way through
131  * writing out the new client record, the record will be corrupt and we
132  * will have no way to return the record to a consistent state.  To address
133  * this issue, we allocate a spare record in the container header called
134  * the "temporary" record.  Upon a modification of this type, we first
135  * write the modified record to the temporary record and indicate that the
136  * temporary record is currently proxying for the actual record.  We then
137  * copy the temporary record to the actual record and make the temporary
138  * record available again for future use.  If a crash occurs before the
139  * copy to the temporary record is complete, then we just roll back as if
140  * the modify never happened (since we have not modified the actual
141  * record).  If a crash occurs after copying the temporary record, we roll
142  * forward and complete the copy operation as if the crash never happened.
143  * Note that there are some additional subtle complications here; see the
144  * comments in the code for details.
145  */
146 
147 #include <unistd.h>
148 #include <sys/types.h>
149 #include <sys/socket.h>
150 #include <sys/isa_defs.h>
151 #include <netinet/in.h>
152 #include <dhcp_svc_public.h>
153 #include <stdlib.h>
154 #include <dirent.h>
155 #include <string.h>
156 #include <libgen.h>
157 #include <errno.h>
158 #include <stddef.h>
159 #include <assert.h>
160 
161 #include "dhcp_network.h"
162 #include "util.h"
163 
164 static uint16_t	cidhash(const uchar_t *, size_t);
165 static void	net2path(char *, size_t, const char *, ipaddr_t);
166 static int	check_dn(dn_handle_t *);
167 static int	getabyte(int, off_t, uchar_t *);
168 static int	setabyte(int, off_t, uchar_t);
169 static int	read_rec(int, dn_filerec_t *, dn_recid_t);
170 static int	write_rec(int, dn_filerec_t *, dn_recid_t);
171 static int	read_header(int, dn_header_t *, boolean_t);
172 static int	write_header(int, dn_header_t *);
173 static int	read_hashhead(int, dn_recid_t *, uint16_t, uchar_t);
174 static int	write_hashhead(int, dn_recid_t, uint16_t, uchar_t);
175 static boolean_t record_match(const dn_rec_t *, const dn_rec_t *, uint_t);
176 
177 int
open_dn(void ** handlep,const char * dir,uint_t flags,const struct in_addr * netp,const struct in_addr * maskp)178 open_dn(void **handlep, const char *dir, uint_t flags,
179     const struct in_addr *netp, const struct in_addr *maskp)
180 {
181 	dn_handle_t	*dhp;
182 	dn_header_t	header = { 0 };
183 	char		dnpath[MAXPATHLEN];
184 	int		i, retval;
185 	off_t		filesz;
186 
187 	dhp = malloc(sizeof (dn_handle_t));
188 	if (dhp == NULL)
189 		return (DSVC_NO_MEMORY);
190 
191 	/*
192 	 * As a safeguard, check that the size of a dn_header_t hasn't
193 	 * changed (since it contains a dn_rec_t, this will probably catch
194 	 * a change in that structure as well).  If it has, bail rather
195 	 * than totally corrupting the container (by continuing).  Note
196 	 * that this situation indicates an internal programming error,
197 	 * which is why we prefer assert() to just returning DSVC_INTERNAL.
198 	 */
199 	/* CONSTCOND */
200 	assert(sizeof (header) == 32768);
201 
202 	net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
203 	retval = open_file(dnpath, flags, &dhp->dh_fd);
204 	if (retval != DSVC_SUCCESS) {
205 		free(dhp);
206 		return (retval);
207 	}
208 
209 	if (flags & DSVC_CREATE) {
210 		/*
211 		 * We just created the per-network container; initialize
212 		 * the header and put it out on disk.  Note that we leave
213 		 * `dnh_version' zero until the entire header has been
214 		 * written, so we can detect partial failure.
215 		 */
216 		header.dnh_version	= 0;
217 		header.dnh_network	= netp->s_addr;
218 		header.dnh_netmask	= maskp->s_addr;
219 		header.dnh_magic	= DN_MAGIC;
220 		header.dnh_tempimage	= DN_NOIMAGE;
221 		header.dnh_image	= 0;
222 		header.dnh_errors	= 0;
223 		header.dnh_checks	= 0;
224 		for (i = 0; i < DN_CIDHASHSZ; i++) {
225 			header.dnh_cidhash[i][header.dnh_image]  = DN_NOREC;
226 			header.dnh_cidhash[i][!header.dnh_image] = DN_NOREC;
227 		}
228 
229 		if (write_header(dhp->dh_fd, &header) == -1) {
230 			retval = syserr_to_dsvcerr(errno);
231 			(void) remove_dn(dir, netp);
232 			(void) close_dn((void **)&dhp);
233 			return (retval);
234 		}
235 
236 		/*
237 		 * Virtually reserve all the space we're going to need for
238 		 * the dn_rec_t's ahead of time, so that we don't have to
239 		 * worry about "growing" the file later (though it may
240 		 * increase in size as we fill in holes).  We're guaranteed
241 		 * that we'll read these holes as zeros, which we take
242 		 * advantage of since a dn_filerec_t with a rec_prev of
243 		 * DN_NOREC (which is 0) indicates that a record is unused.
244 		 */
245 		filesz = RECID2OFFSET(RECID(~0, header.dnh_netmask) + 1);
246 		retval = setabyte(dhp->dh_fd, filesz - 1, 0);
247 		if (retval != DSVC_SUCCESS) {
248 			(void) remove_dn(dir, netp);
249 			(void) close_dn((void **)&dhp);
250 			return (retval);
251 		}
252 
253 		/*
254 		 * Set the version field on the container, effectively
255 		 * making it available for use.
256 		 */
257 		retval = setabyte(dhp->dh_fd, offsetof(dn_header_t,
258 		    dnh_version), DSVC_CONVER);
259 		if (retval != DSVC_SUCCESS) {
260 			(void) remove_dn(dir, netp);
261 			(void) close_dn((void **)&dhp);
262 			return (retval);
263 		}
264 	} else {
265 		/*
266 		 * Container already exists; sanity check against the
267 		 * header that's on-disk.  If we detect a problem then
268 		 * either someone scribbled on our container or we
269 		 * terminated abnormally when creating the container.
270 		 */
271 		if (read_header(dhp->dh_fd, &header, B_FALSE) == -1) {
272 			retval = syserr_to_dsvcerr(errno);
273 			(void) close_dn((void **)&dhp);
274 			return (retval);
275 		}
276 
277 		if (header.dnh_network != netp->s_addr ||
278 		    header.dnh_version != DSVC_CONVER ||
279 		    header.dnh_magic != DN_MAGIC) {
280 			(void) close_dn((void **)&dhp);
281 			return (DSVC_INTERNAL);
282 		}
283 	}
284 
285 	dhp->dh_netmask	= header.dnh_netmask;
286 	dhp->dh_oflags	= flags;
287 
288 	*handlep = dhp;
289 	return (DSVC_SUCCESS);
290 }
291 
292 int
close_dn(void ** handlep)293 close_dn(void **handlep)
294 {
295 	dn_handle_t *dhp = (dn_handle_t *)*handlep;
296 
297 	if (close(dhp->dh_fd) == -1)
298 		return (DSVC_INTERNAL);
299 
300 	free(dhp);
301 	return (DSVC_SUCCESS);
302 }
303 
304 int
remove_dn(const char * dir,const struct in_addr * netp)305 remove_dn(const char *dir, const struct in_addr *netp)
306 {
307 	char dnpath[MAXPATHLEN];
308 
309 	net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
310 	if (unlink(dnpath) == -1)
311 		return (syserr_to_dsvcerr(errno));
312 
313 	return (DSVC_SUCCESS);
314 }
315 
316 int
lookup_dn(void * handle,boolean_t partial,uint_t query,int count,const dn_rec_t * targetp,dn_rec_list_t ** recordsp,uint_t * nrecordsp)317 lookup_dn(void *handle, boolean_t partial, uint_t query, int count,
318     const dn_rec_t *targetp, dn_rec_list_t **recordsp, uint_t *nrecordsp)
319 {
320 	dn_handle_t	*dhp = (dn_handle_t *)handle;
321 	int		retval = DSVC_SUCCESS;
322 	uint_t		nrecords, n;
323 	uint16_t	hash;
324 	dn_rec_t	*recordp;
325 	dn_rec_list_t	*records, *new_records;
326 	dn_recid_t	recid, temp_recid = DN_NOREC;
327 	dn_filerec_t	rec;
328 	dn_header_t	header;
329 	uchar_t		image;
330 	int		fd = dhp->dh_fd;
331 
332 	if ((dhp->dh_oflags & DSVC_READ) == 0)
333 		return (DSVC_ACCESS);
334 
335 	if (read_header(fd, &header, B_FALSE) == -1)
336 		return (syserr_to_dsvcerr(errno));
337 
338 	/*
339 	 * It's possible that a previous update to this container failed
340 	 * part-way through.  In general, this is fine since we always keep
341 	 * our active image's hash chains correct and only swap to the
342 	 * alternate image when the other image is completely safe to use.
343 	 * However, for reasons explained in modify_dn(), it's possible
344 	 * that a record being modified was not completely updated before a
345 	 * failure occurred.  In this case, the actual data for that record
346 	 * is contained in the temporary record in the header.  We need to
347 	 * be careful to use that temporary record anywhere we'd otherwise
348 	 * refer to the partially updated record.  Note that we do this
349 	 * rather than attempting to restore the consistency of the
350 	 * container because we're MT-hot here.
351 	 */
352 	if (header.dnh_dirty && header.dnh_tempimage == header.dnh_image) {
353 		temp_recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
354 		    header.dnh_netmask);
355 	}
356 
357 	image = header.dnh_image;
358 	records = NULL;
359 	for (n = 0, nrecords = 0; count < 0 || nrecords < count; n++) {
360 		if (DSVC_QISEQ(query, DN_QCIP)) {
361 			/*
362 			 * Lookup scenario 1: Caller has requested a QN_CIP
363 			 * query lookup; set `recid' to the only possible
364 			 * entry (which may not be in-use).
365 			 */
366 			if (n != 0)
367 				break;
368 			recid = RECID(targetp->dn_cip.s_addr, dhp->dh_netmask);
369 		} else if (DSVC_QISEQ(query, DN_QCID)) {
370 			/*
371 			 * Lookup scenario 2: Caller has requested a
372 			 * QN_CID-based lookup.  Walk the `cidhash' chain
373 			 * (one call at a time) and set `recid' to hash
374 			 * bucket candidates.
375 			 *
376 			 * Note that it's possible for the client id value
377 			 * 00 to appear more than once, and it's not
378 			 * impossible for other duplicate client ids to
379 			 * occur, so continue until we reach `nrecords'.
380 			 */
381 			if (n == 0) {
382 				hash = cidhash(targetp->dn_cid,
383 				    targetp->dn_cid_len);
384 				if (read_hashhead(fd, &recid, hash, image)
385 				    == -1)
386 					return (syserr_to_dsvcerr(errno));
387 			} else {
388 				/* sanity check */
389 				if (recid == rec.rec_next[image])
390 					break;
391 				recid = rec.rec_next[image];
392 			}
393 		} else {
394 			/*
395 			 * Lookup scenario 3: Caller has requested any
396 			 * other type of search.  Walk the all the client
397 			 * id hashes.
398 			 */
399 			if (n == 0) {
400 				hash = 0;
401 				if (read_header(fd, &header, B_TRUE) == -1)
402 					return (syserr_to_dsvcerr(errno));
403 				recid = header.dnh_cidhash[hash][image];
404 			} else {
405 				/* sanity check */
406 				if (recid == rec.rec_next[image])
407 					break;
408 				recid = rec.rec_next[image];
409 			}
410 
411 			while (recid == DN_NOREC && ++hash < DN_CIDHASHSZ)
412 				recid = header.dnh_cidhash[hash][image];
413 		}
414 
415 		/*
416 		 * No more records; bail.
417 		 */
418 		if (recid == DN_NOREC)
419 			break;
420 
421 		if (recid == temp_recid) {
422 			/*
423 			 * The temporary record is actually authoritative
424 			 * for this record's contents; use it instead.
425 			 */
426 			recid = DN_TEMPREC;
427 		}
428 
429 		if (read_rec(dhp->dh_fd, &rec, recid) == -1) {
430 			retval = syserr_to_dsvcerr(errno);
431 			break;
432 		}
433 
434 		/*
435 		 * If the record isn't in-use, then skip...
436 		 */
437 		if (rec.rec_prev[image] == DN_NOREC)
438 			continue;
439 
440 		/*
441 		 * See if we've got a match...
442 		 */
443 		if (!record_match(&rec.rec_dn, targetp, query))
444 			continue;
445 
446 		/*
447 		 * Caller just wants a count of the number of matching
448 		 * records, not the records themselves; continue.
449 		 */
450 		if (recordsp == NULL) {
451 			nrecords++;
452 			continue;
453 		}
454 
455 		/*
456 		 * Allocate the record and fill it in.
457 		 */
458 		recordp = malloc(sizeof (dn_rec_t));
459 		if (recordp == NULL) {
460 			if (!partial)
461 				retval = DSVC_NO_MEMORY;
462 			break;
463 		}
464 		*recordp = rec.rec_dn;
465 
466 		/*
467 		 * Chuck the record on the list and up the counter.
468 		 */
469 		new_records = add_dnrec_to_list(recordp, records);
470 		if (new_records == NULL) {
471 			free(recordp);
472 			if (!partial)
473 				retval = DSVC_NO_MEMORY;
474 			break;
475 		}
476 
477 		records = new_records;
478 		nrecords++;
479 	}
480 
481 	if (retval == DSVC_SUCCESS) {
482 		*nrecordsp = nrecords;
483 		if (recordsp != NULL)
484 			*recordsp = records;
485 		return (DSVC_SUCCESS);
486 	}
487 
488 	if (records != NULL)
489 		free_dnrec_list(records);
490 
491 	return (retval);
492 }
493 
494 /*
495  * Compares `dnp' to the target `targetp', using `query' to decide what
496  * fields to compare.  Returns B_TRUE if `dnp' matches `targetp', B_FALSE
497  * if not.
498  */
499 static boolean_t
record_match(const dn_rec_t * dnp,const dn_rec_t * targetp,uint_t query)500 record_match(const dn_rec_t *dnp, const dn_rec_t *targetp, uint_t query)
501 {
502 	unsigned int qflags[] = { DN_QFDYNAMIC, DN_QFAUTOMATIC, DN_QFMANUAL,
503 				DN_QFUNUSABLE, DN_QFBOOTP_ONLY };
504 	unsigned int flags[]  = { DN_FDYNAMIC, DN_FAUTOMATIC, DN_FMANUAL,
505 				DN_FUNUSABLE, DN_FBOOTP_ONLY };
506 	unsigned int i;
507 	unsigned int query0;
508 
509 	/*
510 	 * As an optimization, skip any checks if the query is empty.
511 	 */
512 	DSVC_QINIT(query0);
513 	if (query == query0)
514 		return (B_TRUE);
515 
516 	if (DSVC_QISEQ(query, DN_QLEASE) &&
517 	    targetp->dn_lease != dnp->dn_lease)
518 		return (B_FALSE);
519 	if (DSVC_QISNEQ(query, DN_QLEASE) &&
520 	    targetp->dn_lease == dnp->dn_lease)
521 		return (B_FALSE);
522 
523 	if (DSVC_QISEQ(query, DN_QCIP) &&
524 	    dnp->dn_cip.s_addr != targetp->dn_cip.s_addr)
525 		return (B_FALSE);
526 	if (DSVC_QISNEQ(query, DN_QCIP) &&
527 	    dnp->dn_cip.s_addr == targetp->dn_cip.s_addr)
528 		return (B_FALSE);
529 
530 	if (DSVC_QISEQ(query, DN_QCID) &&
531 	    (dnp->dn_cid_len != targetp->dn_cid_len ||
532 	    (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) != 0)))
533 		return (B_FALSE);
534 	if (DSVC_QISNEQ(query, DN_QCID) &&
535 	    (dnp->dn_cid_len == targetp->dn_cid_len &&
536 	    (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) == 0)))
537 		return (B_FALSE);
538 
539 	if (DSVC_QISEQ(query, DN_QSIP) &&
540 	    dnp->dn_sip.s_addr != targetp->dn_sip.s_addr)
541 		return (B_FALSE);
542 	if (DSVC_QISNEQ(query, DN_QSIP) &&
543 	    dnp->dn_sip.s_addr == targetp->dn_sip.s_addr)
544 		return (B_FALSE);
545 
546 	if (DSVC_QISEQ(query, DN_QMACRO) &&
547 	    strcmp(targetp->dn_macro, dnp->dn_macro) != 0)
548 		return (B_FALSE);
549 	if (DSVC_QISNEQ(query, DN_QMACRO) &&
550 	    strcmp(targetp->dn_macro, dnp->dn_macro) == 0)
551 		return (B_FALSE);
552 
553 	for (i = 0; i < sizeof (qflags) / sizeof (unsigned int); i++) {
554 		if (DSVC_QISEQ(query, qflags[i]) &&
555 		    (dnp->dn_flags & flags[i]) !=
556 		    (targetp->dn_flags & flags[i]))
557 			return (B_FALSE);
558 		if (DSVC_QISNEQ(query, qflags[i]) &&
559 		    (dnp->dn_flags & flags[i]) ==
560 		    (targetp->dn_flags & flags[i]))
561 			return (B_FALSE);
562 	}
563 
564 	return (B_TRUE);
565 }
566 
567 int
add_dn(void * handle,dn_rec_t * addp)568 add_dn(void *handle, dn_rec_t *addp)
569 {
570 	dn_filerec_t	rec, rec_next;
571 	dn_recid_t	recid, recid_head;
572 	uint16_t	hash;
573 	uchar_t		image;
574 	int		retval;
575 	dn_handle_t	*dhp = (dn_handle_t *)handle;
576 	int		fd = dhp->dh_fd;
577 
578 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
579 		return (DSVC_ACCESS);
580 
581 	retval = check_dn(dhp);
582 	if (retval != DSVC_SUCCESS)
583 		return (retval);
584 
585 	hash = cidhash(addp->dn_cid, addp->dn_cid_len);
586 
587 	/*
588 	 * Get the active image.
589 	 */
590 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
591 	if (retval != DSVC_SUCCESS)
592 		return (retval);
593 
594 	/*
595 	 * Doublecheck to make sure this entry doesn't exist already.
596 	 */
597 	recid = RECID(addp->dn_cip.s_addr, dhp->dh_netmask);
598 	if (read_rec(fd, &rec, recid) == -1)
599 		return (syserr_to_dsvcerr(errno));
600 
601 	if (rec.rec_prev[image] != DN_NOREC)
602 		return (DSVC_EXISTS);
603 
604 	/*
605 	 * We're going to insert `rec' at the head of the `hash' hash
606 	 * chain; get it ready-to-go.  Note that we update the alternate
607 	 * image's hash record id pointers so that the record will
608 	 * atomically become in-use when we switch to the alternate image.
609 	 */
610 	if (read_hashhead(fd, &recid_head, hash, image) == -1)
611 		return (syserr_to_dsvcerr(errno));
612 
613 	rec.rec_dn = *addp;
614 	rec.rec_dn.dn_sig = gensig();
615 	rec.rec_prev[!image] = DN_HASHHEAD;
616 	rec.rec_next[!image] = recid_head;
617 
618 	/*
619 	 * If there's a record currently on the hash chain (i.e, we're
620 	 * not the first) then load the record.
621 	 */
622 	if (rec.rec_next[!image] != DN_NOREC) {
623 		if (read_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
624 			return (syserr_to_dsvcerr(errno));
625 	}
626 
627 	/*
628 	 * Before we update any information on disk, mark the container as
629 	 * dirty so that there's no chance the container is inconsistent
630 	 * without us knowing about it.
631 	 */
632 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
633 	if (retval != DSVC_SUCCESS)
634 		return (retval);
635 
636 	/*
637 	 * Update the new record on-disk; note that it's not yet reachable
638 	 * via hash.
639 	 */
640 	if (write_rec(fd, &rec, recid) == -1)
641 		return (syserr_to_dsvcerr(errno));
642 
643 	/*
644 	 * Update the alternate image's on-disk hash pointers.  We need to
645 	 * do this before we switch to the alternate image so we cannot
646 	 * abort with an inconsistent active image.
647 	 */
648 	if (rec.rec_next[!image] != DN_NOREC) {
649 		rec_next.rec_prev[!image] = recid;
650 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
651 			return (syserr_to_dsvcerr(errno));
652 	}
653 	if (write_hashhead(fd, recid, hash, !image) == -1)
654 		return (syserr_to_dsvcerr(errno));
655 
656 	/*
657 	 * Activate the alternate image.  This is our commit point -- if we
658 	 * fail after this point, we will roll forward on recovery.
659 	 */
660 	image = !image;
661 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
662 	if (retval != DSVC_SUCCESS)
663 		return (retval);
664 
665 	/*
666 	 * Update the old record id pointers to match
667 	 */
668 	rec.rec_prev[!image] = rec.rec_prev[image];
669 	rec.rec_next[!image] = rec.rec_next[image];
670 	if (write_rec(fd, &rec, recid) == -1)
671 		return (syserr_to_dsvcerr(errno));
672 
673 	if (rec.rec_next[!image] != DN_NOREC) {
674 		rec_next.rec_prev[!image] = recid;
675 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
676 			return (syserr_to_dsvcerr(errno));
677 	}
678 	if (write_hashhead(fd, recid, hash, !image) == -1)
679 		return (syserr_to_dsvcerr(errno));
680 
681 	/*
682 	 * Update the signature on the record handed back to the caller.
683 	 */
684 	addp->dn_sig = rec.rec_dn.dn_sig;
685 
686 	/*
687 	 * Finally, mark the container as clean.
688 	 */
689 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
690 }
691 
692 int
delete_dn(void * handle,const dn_rec_t * delp)693 delete_dn(void *handle, const dn_rec_t *delp)
694 {
695 	dn_filerec_t	rec, rec_prev, rec_next;
696 	dn_recid_t	recid;
697 	uint16_t	hash;
698 	uchar_t		image;
699 	int		retval;
700 	dn_handle_t	*dhp = (dn_handle_t *)handle;
701 	int		fd = dhp->dh_fd;
702 
703 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
704 		return (DSVC_ACCESS);
705 
706 	retval = check_dn(dhp);
707 	if (retval != DSVC_SUCCESS)
708 		return (retval);
709 
710 	/*
711 	 * Get the active image.
712 	 */
713 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
714 	if (retval != DSVC_SUCCESS)
715 		return (retval);
716 
717 	/*
718 	 * Find the original entry in the network table, make sure the
719 	 * record is in-use, and check the signature field (to guard
720 	 * against collisions).
721 	 */
722 	recid = RECID(delp->dn_cip.s_addr, dhp->dh_netmask);
723 	if (read_rec(fd, &rec, recid) == -1)
724 		return (syserr_to_dsvcerr(errno));
725 
726 	if (rec.rec_prev[image] == DN_NOREC)
727 		return (DSVC_NOENT);
728 
729 	hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
730 
731 	/*
732 	 * The signatures must match to delete a record, *except* when
733 	 * delp->dn_sig == 0.  This is so records can be deleted that
734 	 * weren't retrieved via lookup_dn()
735 	 */
736 	if (delp->dn_sig != 0 && rec.rec_dn.dn_sig != delp->dn_sig)
737 		return (DSVC_COLLISION);
738 
739 	/*
740 	 * Read our neighboring records.
741 	 */
742 	if (rec.rec_next[image] != DN_NOREC) {
743 		if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
744 			return (syserr_to_dsvcerr(errno));
745 	}
746 
747 	if (rec.rec_prev[image] != DN_HASHHEAD) {
748 		if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
749 			return (syserr_to_dsvcerr(errno));
750 	}
751 
752 	/*
753 	 * Before we update the alternate image's on-disk hash pointers,
754 	 * mark the container as dirty so that there's no chance the
755 	 * container is inconsistent without us knowing about it.
756 	 */
757 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
758 	if (retval != DSVC_SUCCESS)
759 		return (retval);
760 
761 	/*
762 	 * Update the alternate image's on-disk hash pointers.  We need to
763 	 * do this before we switch to the alternate image so we do not
764 	 * abort with an inconsistent active image.  Also reset the
765 	 * record's alternate image record id pointers, so that the old
766 	 * record will not be in-use when we switch to the alternate image.
767 	 */
768 	if (rec.rec_next[image] != DN_NOREC) {
769 		rec_next.rec_prev[!image] = rec.rec_prev[image];
770 		if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
771 			return (syserr_to_dsvcerr(errno));
772 	}
773 
774 	if (rec.rec_prev[image] != DN_HASHHEAD) {
775 		rec_prev.rec_next[!image] = rec.rec_next[image];
776 		if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
777 			return (syserr_to_dsvcerr(errno));
778 	} else {
779 		if (write_hashhead(fd, rec.rec_next[image], hash, !image) == -1)
780 			return (syserr_to_dsvcerr(errno));
781 	}
782 
783 	rec.rec_next[!image] = DN_NOREC;
784 	rec.rec_prev[!image] = DN_NOREC;
785 	if (write_rec(fd, &rec, recid) == -1)
786 		return (syserr_to_dsvcerr(errno));
787 
788 	/*
789 	 * Activate the alternate image.  This is our commit point -- if we
790 	 * fail after this point, we will roll forward on recovery.
791 	 */
792 	image = !image;
793 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
794 	if (retval != DSVC_SUCCESS)
795 		return (retval);
796 
797 	/*
798 	 * Update the old record id pointers to match.
799 	 */
800 	if (rec.rec_next[!image] != DN_NOREC) {
801 		rec_next.rec_prev[!image] = rec.rec_prev[!image];
802 		if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
803 			return (syserr_to_dsvcerr(errno));
804 	}
805 
806 	if (rec.rec_prev[!image] != DN_HASHHEAD) {
807 		rec_prev.rec_next[!image] = rec.rec_next[!image];
808 		if (write_rec(fd, &rec_prev, rec.rec_prev[!image]) == -1)
809 			return (syserr_to_dsvcerr(errno));
810 	} else {
811 		if (write_hashhead(fd, rec.rec_next[!image], hash, !image)
812 		    == -1)
813 			return (syserr_to_dsvcerr(errno));
814 	}
815 
816 	rec.rec_next[!image] = DN_NOREC;
817 	rec.rec_prev[!image] = DN_NOREC;
818 	if (write_rec(fd, &rec, recid) == -1)
819 		return (syserr_to_dsvcerr(errno));
820 
821 	/*
822 	 * Finally, mark the container as clean.
823 	 */
824 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
825 }
826 
827 int
modify_dn(void * handle,const dn_rec_t * origp,dn_rec_t * newp)828 modify_dn(void *handle, const dn_rec_t *origp, dn_rec_t *newp)
829 {
830 	dn_filerec_t	rec, new_rec, rec_head, rec_next, rec_prev;
831 	dn_recid_t	recid, new_recid, recid_head;
832 	uint16_t	hash, new_hash;
833 	uchar_t		image;
834 	int		retval;
835 	dn_handle_t	*dhp = (dn_handle_t *)handle;
836 	int		fd = dhp->dh_fd;
837 
838 	if ((dhp->dh_oflags & DSVC_WRITE) == 0)
839 		return (DSVC_ACCESS);
840 
841 	retval = check_dn(dhp);
842 	if (retval != DSVC_SUCCESS)
843 		return (retval);
844 
845 	/*
846 	 * Get the active image
847 	 */
848 	retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
849 	if (retval != DSVC_SUCCESS)
850 		return (retval);
851 
852 	/*
853 	 * Find the original entry in the network table, make sure the
854 	 * entry is in-use, and check the signature field (to guard against
855 	 * collisions).
856 	 */
857 	recid = RECID(origp->dn_cip.s_addr, dhp->dh_netmask);
858 	if (read_rec(fd, &rec, recid) == -1)
859 		return (syserr_to_dsvcerr(errno));
860 
861 	if (rec.rec_prev[image] == DN_NOREC)
862 		return (DSVC_NOENT);
863 
864 	if (rec.rec_dn.dn_sig != origp->dn_sig)
865 		return (DSVC_COLLISION);
866 
867 	/*
868 	 * Check if the record id is changing (as a result of modifying the
869 	 * IP address). If it is, then make sure the new one is available
870 	 * (if not, fail with DSVC_EXISTS).
871 	 */
872 	new_recid = RECID(newp->dn_cip.s_addr, dhp->dh_netmask);
873 	if (recid != new_recid) {
874 		if (read_rec(fd, &new_rec, new_recid) == -1)
875 			return (syserr_to_dsvcerr(errno));
876 		if (new_rec.rec_prev[image] != DN_NOREC)
877 			return (DSVC_EXISTS);
878 	}
879 
880 	/*
881 	 * Update the record with the new information.
882 	 */
883 	new_rec.rec_dn = *newp;
884 	new_rec.rec_dn.dn_sig = origp->dn_sig + 1;
885 
886 	/*
887 	 * Find out if our hash chain is changing.  If so, then update the
888 	 * new record's record id pointers to be on the new chain;
889 	 * otherwise just take the original record's pointers.  Note that
890 	 * in either case, only update the alternate image pointers, so
891 	 * that the new record becomes in-use when we switch to the
892 	 * alternate image.
893 	 */
894 	hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
895 	new_hash = cidhash(newp->dn_cid, newp->dn_cid_len);
896 
897 	if (hash == new_hash) {
898 		new_rec.rec_prev[!image] = rec.rec_prev[image];
899 		new_rec.rec_next[!image] = rec.rec_next[image];
900 	} else {
901 		if (read_hashhead(fd, &recid_head, new_hash, image) == -1)
902 			return (syserr_to_dsvcerr(errno));
903 
904 		new_rec.rec_prev[!image] = DN_HASHHEAD;
905 		new_rec.rec_next[!image] = recid_head;
906 	}
907 
908 	/*
909 	 * Write the record out; if this means overwriting the old record,
910 	 * then write to a temporary record instead.
911 	 */
912 	if (write_rec(fd, &new_rec, new_recid == recid ? DN_TEMPREC : new_recid)
913 	    == -1)
914 		return (syserr_to_dsvcerr(errno));
915 
916 	/*
917 	 * Mark the container as dirty so that there's no chance the
918 	 * container is inconsistent without us knowing about it.
919 	 */
920 	retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
921 	if (retval != DSVC_SUCCESS)
922 		return (retval);
923 
924 	/*
925 	 * If we've changed either the hash chain or the record id, then
926 	 * update our neighboring records' record id pointers.  If we're
927 	 * changing hash chains, then remove ourselves from the old
928 	 * hash chain and insert ourselves on the new one -- otherwise, if
929 	 * we're changing record id's, then update our neighbors with our
930 	 * new record id.  Note that we only apply these changes to the
931 	 * alternate image for now so that we can recover upon failure.
932 	 */
933 	if (hash != new_hash || recid != new_recid) {
934 		if (rec.rec_next[image] != DN_NOREC) {
935 			if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
936 				return (syserr_to_dsvcerr(errno));
937 		}
938 		if (rec.rec_prev[image] != DN_HASHHEAD) {
939 			if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
940 				return (syserr_to_dsvcerr(errno));
941 		}
942 
943 		if (hash != new_hash) {
944 			rec_next.rec_prev[!image] = rec.rec_prev[!image];
945 			rec_prev.rec_next[!image] = rec.rec_next[!image];
946 		} else {
947 			rec_next.rec_prev[!image] = new_recid;
948 			rec_prev.rec_next[!image] = new_recid;
949 		}
950 
951 		if (rec.rec_next[image] != DN_NOREC) {
952 			if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
953 				return (syserr_to_dsvcerr(errno));
954 		}
955 		if (rec.rec_prev[image] != DN_HASHHEAD) {
956 			if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
957 				return (syserr_to_dsvcerr(errno));
958 		} else {
959 			if (write_hashhead(fd, rec_prev.rec_next[!image], hash,
960 			    !image) == -1)
961 				return (syserr_to_dsvcerr(errno));
962 		}
963 
964 		/*
965 		 * If our hash is changing, update the alternate image
966 		 * record id pointers to point to our moved record.
967 		 */
968 		if (hash != new_hash) {
969 			if (recid_head != DN_NOREC) {
970 				if (read_rec(fd, &rec_head, recid_head) == -1)
971 					return (syserr_to_dsvcerr(errno));
972 				rec_head.rec_prev[!image] = new_recid;
973 				if (write_rec(fd, &rec_head, recid_head) == -1)
974 					return (syserr_to_dsvcerr(errno));
975 			}
976 			if (write_hashhead(fd, new_recid, new_hash, !image)
977 			    == -1)
978 				return (syserr_to_dsvcerr(errno));
979 		}
980 
981 		/*
982 		 * If our record id is changing, reset the old record's
983 		 * alternate image record id pointers, so that the old
984 		 * record will not be in-use once we switch over to the
985 		 * alternate image.
986 		 */
987 		if (recid != new_recid) {
988 			rec.rec_prev[!image] = DN_NOREC;
989 			rec.rec_next[!image] = DN_NOREC;
990 			if (write_rec(fd, &rec, recid) == -1)
991 				return (syserr_to_dsvcerr(errno));
992 		}
993 	}
994 
995 	/*
996 	 * If we're using the temporary record, then set `dnh_tempimage' to
997 	 * the image that will be active when we're done.  This piece of
998 	 * state is critical in the case of failure, since it indicates
999 	 * both that the temporary record is valid, and tells us whether we
1000 	 * failed before or after activating the alternate image (below).
1001 	 * If we failed before activating the alternate image, then the
1002 	 * failure code can just reset `dnh_tempimage' to DN_NOIMAGE and
1003 	 * resynchronize the pointers.  Otherwise, we failed somewhere
1004 	 * after making the alternate image active but before we completed
1005 	 * copying the temporary record over to the actual record, which
1006 	 * the recovery code will then complete on our behalf before
1007 	 * resynchronizing the pointers.
1008 	 */
1009 	if (recid == new_recid) {
1010 		retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1011 		    !image);
1012 		if (retval != DSVC_SUCCESS)
1013 			return (retval);
1014 	}
1015 
1016 	/*
1017 	 * Activate the alternate image.  This is our commit point -- if we
1018 	 * fail after this point, we will roll forward on recovery.
1019 	 */
1020 	image = !image;
1021 	retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
1022 	if (retval != DSVC_SUCCESS)
1023 		return (retval);
1024 
1025 	/*
1026 	 * If we used the temporary record, copy the data into the actual
1027 	 * record.  Once finished, reset `dnh_tempimage' to DN_NOIMAGE
1028 	 * since the temporary record no longer needs to be used.
1029 	 */
1030 	if (recid == new_recid) {
1031 		if (write_rec(fd, &new_rec, new_recid) == -1)
1032 			return (syserr_to_dsvcerr(errno));
1033 
1034 		retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1035 		    DN_NOIMAGE);
1036 		if (retval != DSVC_SUCCESS)
1037 			return (retval);
1038 	}
1039 
1040 	/*
1041 	 * Update the old record id pointers to match.
1042 	 */
1043 	new_rec.rec_prev[!image] = new_rec.rec_prev[image];
1044 	new_rec.rec_next[!image] = new_rec.rec_next[image];
1045 	if (write_rec(fd, &new_rec, new_recid) == -1)
1046 		return (syserr_to_dsvcerr(errno));
1047 
1048 	if (hash != new_hash || recid != new_recid) {
1049 		if (rec.rec_next[image] != DN_NOREC) {
1050 			rec_next.rec_prev[!image] = rec.rec_prev[image];
1051 			if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
1052 				return (syserr_to_dsvcerr(errno));
1053 		}
1054 		if (rec.rec_prev[image] != DN_HASHHEAD) {
1055 			rec_prev.rec_next[!image] = rec.rec_next[image];
1056 			if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
1057 				return (syserr_to_dsvcerr(errno));
1058 		} else {
1059 			if (write_hashhead(fd, rec.rec_next[image], hash,
1060 			    !image) == -1)
1061 				return (syserr_to_dsvcerr(errno));
1062 		}
1063 
1064 		/*
1065 		 * If our hash changed, update the alternate image record
1066 		 * id pointers to point to our moved record.
1067 		 */
1068 		if (hash != new_hash) {
1069 			if (recid_head != DN_NOREC) {
1070 				rec_head.rec_prev[!image] =
1071 				    rec_head.rec_prev[image];
1072 				if (write_rec(fd, &rec_head, recid_head) == -1)
1073 					return (syserr_to_dsvcerr(errno));
1074 			}
1075 			if (write_hashhead(fd, new_recid, new_hash, !image)
1076 			    == -1)
1077 				return (syserr_to_dsvcerr(errno));
1078 		}
1079 
1080 		/*
1081 		 * If our record id changed, then finish marking the old
1082 		 * record as "not in use".
1083 		 */
1084 		if (recid != new_recid) {
1085 			rec.rec_prev[!image] = DN_NOREC;
1086 			rec.rec_next[!image] = DN_NOREC;
1087 			if (write_rec(fd, &rec, recid) == -1)
1088 				return (syserr_to_dsvcerr(errno));
1089 		}
1090 	}
1091 
1092 	/*
1093 	 * Update the signature on the new record handed back to the caller.
1094 	 */
1095 	newp->dn_sig = new_rec.rec_dn.dn_sig;
1096 
1097 	/*
1098 	 * Finally, mark the container as clean.
1099 	 */
1100 	return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
1101 }
1102 
1103 int
list_dn(const char * location,char *** listppp,uint_t * countp)1104 list_dn(const char *location, char ***listppp, uint_t *countp)
1105 {
1106 	char		ipaddr[INET_ADDRSTRLEN];
1107 	struct dirent	*result;
1108 	DIR		*dirp;
1109 	unsigned int	i, count = 0;
1110 	char		*re, **new_listpp, **listpp = NULL;
1111 	char		conver[4];
1112 	int		error;
1113 
1114 	dirp = opendir(location);
1115 	if (dirp == NULL) {
1116 		switch (errno) {
1117 		case EACCES:
1118 		case EPERM:
1119 			return (DSVC_ACCESS);
1120 		case ENOENT:
1121 			return (DSVC_NO_LOCATION);
1122 		default:
1123 			break;
1124 		}
1125 		return (DSVC_INTERNAL);
1126 	}
1127 
1128 	/*
1129 	 * Compile a regular expression matching "SUNWbinfilesX_" (where X
1130 	 * is a container version number) followed by an IP address
1131 	 * (roughly speaking).  Note that the $N constructions allow us to
1132 	 * get the container version and IP address when calling regex(3C).
1133 	 */
1134 	re = regcmp("^SUNWbinfiles([0-9]{1,3})$0_"
1135 	    "(([0-9]{1,3}_){3}[0-9]{1,3})$1$", (char *)0);
1136 	if (re == NULL)
1137 		return (DSVC_NO_MEMORY);
1138 
1139 	while ((result = readdir(dirp)) != NULL) {
1140 
1141 		if (regex(re, result->d_name, conver, ipaddr) != NULL) {
1142 			if (atoi(conver) != DSVC_CONVER)
1143 				continue;
1144 
1145 			for (i = 0; ipaddr[i] != '\0'; i++)
1146 				if (ipaddr[i] == '_')
1147 					ipaddr[i] = '.';
1148 
1149 			new_listpp = realloc(listpp,
1150 			    (sizeof (char **)) * (count + 1));
1151 			if (new_listpp == NULL) {
1152 				error = DSVC_NO_MEMORY;
1153 				goto fail;
1154 			}
1155 			listpp = new_listpp;
1156 			listpp[count] = strdup(ipaddr);
1157 			if (listpp[count] == NULL) {
1158 				error = DSVC_NO_MEMORY;
1159 				goto fail;
1160 			}
1161 			count++;
1162 		}
1163 	}
1164 	free(re);
1165 	(void) closedir(dirp);
1166 
1167 	*countp = count;
1168 	*listppp = listpp;
1169 	return (DSVC_SUCCESS);
1170 fail:
1171 	free(re);
1172 	(void) closedir(dirp);
1173 
1174 	for (i = 0; i < count; i++)
1175 		free(listpp[i]);
1176 	free(listpp);
1177 	return (error);
1178 }
1179 
1180 /*
1181  * Check (a la fsck) that a given DHCP network container is in a consistent
1182  * state.  If not, then attempt to restore internal consistency; this should
1183  * always be possible unless the container has been externally corrupted.
1184  */
1185 static int
check_dn(dn_handle_t * dhp)1186 check_dn(dn_handle_t *dhp)
1187 {
1188 	dn_header_t	header;
1189 	uchar_t		image, dirty;
1190 	uint16_t	hash;
1191 	dn_filerec_t	rec;
1192 	dn_recid_t	recid, maxrecid;
1193 	int		retval;
1194 
1195 	/*
1196 	 * Reading the whole header is a very expensive operation; only do
1197 	 * it once we're sure the container is actually dirty.  On an
1198 	 * E4500, this optimization lowers the wall-clock cost of creating
1199 	 * a 5000-record datastore by 20 percent.
1200 	 */
1201 	retval = getabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), &dirty);
1202 	if (retval != DSVC_SUCCESS)
1203 		return (retval);
1204 
1205 	if (dirty == 0)
1206 		return (DSVC_SUCCESS);
1207 
1208 	if (read_header(dhp->dh_fd, &header, B_TRUE) == -1)
1209 		return (syserr_to_dsvcerr(errno));
1210 
1211 	/*
1212 	 * If `dnh_tempimage' matches the current working image, then we
1213 	 * crashed in the middle of a modify_dn() operation.  Complete
1214 	 * writing out the temporary record before restoring internal
1215 	 * consistency.  This is a bit of a kludge but there doesn't seem
1216 	 * to be another way.
1217 	 */
1218 	if (header.dnh_tempimage == header.dnh_image) {
1219 		recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
1220 		    header.dnh_netmask);
1221 		if (write_rec(dhp->dh_fd, &header.dnh_temp, recid) == -1)
1222 			return (syserr_to_dsvcerr(errno));
1223 
1224 		header.dnh_tempimage = DN_NOIMAGE;
1225 	}
1226 
1227 	/*
1228 	 * Blindly update all the header hashhead pointers since we're
1229 	 * going to have to re-write the header anyway.
1230 	 */
1231 	image = header.dnh_image;
1232 	for (hash = 0; hash < DN_CIDHASHSZ; hash++) {
1233 		header.dnh_cidhash[hash][!image] =
1234 		    header.dnh_cidhash[hash][image];
1235 	}
1236 
1237 	/*
1238 	 * Synchronize the record pointers of all in-use records.  We do
1239 	 * this instead of just walking the hashheads because not all dirty
1240 	 * records are hashed (for instance, we may have failed part way
1241 	 * through an add_dn()).
1242 	 */
1243 	maxrecid = RECID(~0, header.dnh_netmask);
1244 	for (recid = RECID(0, header.dnh_netmask); recid <= maxrecid; recid++) {
1245 		if (read_rec(dhp->dh_fd, &rec, recid) == -1)
1246 			return (syserr_to_dsvcerr(errno));
1247 
1248 		/*
1249 		 * Verify the pointers match.  If not, then correct
1250 		 * the record and write it back to disk.
1251 		 */
1252 		if (rec.rec_next[image] != rec.rec_next[!image] ||
1253 		    rec.rec_prev[image] != rec.rec_prev[!image]) {
1254 			header.dnh_errors++;
1255 
1256 			rec.rec_prev[!image] = rec.rec_prev[image];
1257 			rec.rec_next[!image] = rec.rec_next[image];
1258 
1259 			if (write_rec(dhp->dh_fd, &rec, recid) == -1)
1260 				return (syserr_to_dsvcerr(errno));
1261 		}
1262 	}
1263 
1264 	header.dnh_checks++;
1265 	if (write_header(dhp->dh_fd, &header) == -1)
1266 		return (syserr_to_dsvcerr(errno));
1267 
1268 	/*
1269 	 * Clear the dirty bit on the container.
1270 	 */
1271 	return (setabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), 0));
1272 }
1273 
1274 /*
1275  * Given a buffer `path' of `pathlen' bytes, fill it in with a path to the
1276  * DHCP Network table for IP network `ip' located in directory `dir'.
1277  */
1278 static void
net2path(char * path,size_t pathlen,const char * dir,ipaddr_t ip)1279 net2path(char *path, size_t pathlen, const char *dir, ipaddr_t ip)
1280 {
1281 	(void) snprintf(path, pathlen, "%s/SUNWbinfiles%u_%d_%d_%d_%d", dir,
1282 	    DSVC_CONVER, ip >> 24, (ip >> 16) & 0xff, (ip >> 8) & 0xff,
1283 	    ip & 0xff);
1284 }
1285 
1286 /*
1287  * Given a `cid' that's `cidlen' bytes long, hash it to a value between 0
1288  * and DN_CIDHASHSZ - 1.  We use CRC16 for our hash since it's known to be
1289  * very evenly distributed.
1290  */
1291 static uint16_t
cidhash(const uchar_t * cid,size_t cidlen)1292 cidhash(const uchar_t *cid, size_t cidlen)
1293 {
1294 	uchar_t		bit;
1295 	uint16_t	result = 0xffff;
1296 	const uint16_t	crc16_poly = 0x8408; /* mutated CRC-CCITT polynomial */
1297 
1298 	while (cidlen-- != 0) {
1299 		result ^= *cid++;
1300 		for (bit = 0; bit < 8; bit++) {
1301 			if (result & 1)
1302 				result = (result >> 1) ^ crc16_poly;
1303 			else
1304 				result >>= 1;
1305 		}
1306 	}
1307 	return (result % DN_CIDHASHSZ);
1308 }
1309 
1310 /*
1311  * Convert the dn_filerec_t pointed to by `rec' from native (host) to
1312  * network order or the other way.
1313  */
1314 /* ARGSUSED */
1315 static void
nhconvert_rec(dn_filerec_t * rec)1316 nhconvert_rec(dn_filerec_t *rec)
1317 {
1318 #ifdef	_LITTLE_ENDIAN
1319 	dn_rec_t *dnp = &rec->rec_dn;
1320 
1321 	nhconvert(&rec->rec_prev[0], &rec->rec_prev[0], sizeof (dn_recid_t));
1322 	nhconvert(&rec->rec_prev[1], &rec->rec_prev[1], sizeof (dn_recid_t));
1323 	nhconvert(&rec->rec_next[0], &rec->rec_next[0], sizeof (dn_recid_t));
1324 	nhconvert(&rec->rec_next[1], &rec->rec_next[1], sizeof (dn_recid_t));
1325 
1326 	nhconvert(&dnp->dn_cip.s_addr, &dnp->dn_cip.s_addr, sizeof (ipaddr_t));
1327 	nhconvert(&dnp->dn_sip.s_addr, &dnp->dn_sip.s_addr, sizeof (ipaddr_t));
1328 	nhconvert(&dnp->dn_lease, &dnp->dn_lease, sizeof (lease_t));
1329 	nhconvert(&dnp->dn_sig, &dnp->dn_sig, sizeof (uint64_t));
1330 #endif
1331 }
1332 
1333 /*
1334  * Convert the header pointed to by `hdrp' from native (host) to network
1335  * order or the other way.  If `hash' is false, then don't bother
1336  * converting the hash chains.
1337  */
1338 /* ARGSUSED */
1339 static void
nhconvert_header(dn_header_t * hdrp,boolean_t hash)1340 nhconvert_header(dn_header_t *hdrp, boolean_t hash)
1341 {
1342 #ifdef	_LITTLE_ENDIAN
1343 	unsigned int i;
1344 
1345 	nhconvert(&hdrp->dnh_network, &hdrp->dnh_network, sizeof (ipaddr_t));
1346 	nhconvert(&hdrp->dnh_netmask, &hdrp->dnh_netmask, sizeof (ipaddr_t));
1347 	nhconvert(&hdrp->dnh_magic, &hdrp->dnh_magic, sizeof (uint32_t));
1348 	nhconvert_rec(&hdrp->dnh_temp);
1349 
1350 	if (hash) {
1351 		for (i = 0; i < DN_CIDHASHSZ; i++) {
1352 			nhconvert(&hdrp->dnh_cidhash[i][0],
1353 			    &hdrp->dnh_cidhash[i][0], sizeof (dn_recid_t));
1354 			nhconvert(&hdrp->dnh_cidhash[i][1],
1355 			    &hdrp->dnh_cidhash[i][1], sizeof (dn_recid_t));
1356 		}
1357 	}
1358 #endif
1359 }
1360 
1361 /*
1362  * Read the dn_filerec_t identified by `recid' from open container `fd'
1363  * into `rec'.  Returns 0 on success, -1 on failure (errno is set).
1364  */
1365 static int
read_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)1366 read_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1367 {
1368 	if (pnread(fd, rec, sizeof (*rec), RECID2OFFSET(recid)) == -1)
1369 		return (-1);
1370 
1371 	nhconvert_rec(rec);
1372 	return (0);
1373 }
1374 
1375 /*
1376  * Write the dn_filerec_t `rec' identified by `recid' into the open
1377  * container `fd'.  Returns 0 on success, -1 on failure (errno is set).
1378  */
1379 static int
write_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)1380 write_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1381 {
1382 	int retval;
1383 
1384 	nhconvert_rec(rec);
1385 	retval = pnwrite(fd, rec, sizeof (*rec), RECID2OFFSET(recid));
1386 	nhconvert_rec(rec);
1387 	return (retval);
1388 }
1389 
1390 /*
1391  * Read the dn_header_t from the open container `fd' into the dn_header_t
1392  * pointed to by `hdrp'; if `hash' is not set, then skip reading the
1393  * dn_header_t hash chains.  Returns 0 on success, -1 on failure (errno is
1394  * set).
1395  */
1396 static int
read_header(int fd,dn_header_t * hdrp,boolean_t hash)1397 read_header(int fd, dn_header_t *hdrp, boolean_t hash)
1398 {
1399 	size_t size;
1400 
1401 	size = hash ? sizeof (dn_header_t) : offsetof(dn_header_t, dnh_cidhash);
1402 	if (pnread(fd, hdrp, size, 0) == -1)
1403 		return (-1);
1404 
1405 	nhconvert_header(hdrp, hash);
1406 	return (0);
1407 }
1408 
1409 /*
1410  * Write the dn_header_t pointed to by `hdrp' into open container `fd'.
1411  * Returns 0 on success, -1 on failure (errno is set).
1412  */
1413 static int
write_header(int fd,dn_header_t * hdrp)1414 write_header(int fd, dn_header_t *hdrp)
1415 {
1416 	int retval;
1417 
1418 	nhconvert_header(hdrp, B_TRUE);
1419 	retval = pnwrite(fd, hdrp, sizeof (dn_header_t), 0);
1420 	nhconvert_header(hdrp, B_TRUE);
1421 	return (retval);
1422 }
1423 
1424 /*
1425  * Read in the head of the `cidhash' hash chain from open container `fd'
1426  * into `recid_headp', using image `image'.  Returns 0 on success, -1 on
1427  * failure (errno is set).
1428  */
1429 static int
read_hashhead(int fd,dn_recid_t * recid_headp,uint16_t cidhash,uchar_t image)1430 read_hashhead(int fd, dn_recid_t *recid_headp, uint16_t cidhash, uchar_t image)
1431 {
1432 	if (pnread(fd, recid_headp, sizeof (dn_recid_t),
1433 	    offsetof(dn_header_t, dnh_cidhash[cidhash][image])) == -1)
1434 		return (-1);
1435 
1436 	nhconvert(recid_headp, recid_headp, sizeof (dn_recid_t));
1437 	return (0);
1438 }
1439 
1440 /*
1441  * Write out the head of the `cidhash' hash chain into open container `fd'
1442  * from `recid_head', using image `image'.  Returns 0 on success, -1 on
1443  * failure (errno is set).
1444  */
1445 static int
write_hashhead(int fd,dn_recid_t recid_head,uint16_t cidhash,uchar_t image)1446 write_hashhead(int fd, dn_recid_t recid_head, uint16_t cidhash, uchar_t image)
1447 {
1448 	nhconvert(&recid_head, &recid_head, sizeof (dn_recid_t));
1449 	return (pnwrite(fd, &recid_head, sizeof (dn_recid_t),
1450 	    offsetof(dn_header_t, dnh_cidhash[cidhash][image])));
1451 }
1452 
1453 /*
1454  * Get the byte `offset' bytes into open file `fd', and store in `bytep'.
1455  * Returns a DSVC_* return code.
1456  */
1457 static int
getabyte(int fd,off_t offset,uchar_t * bytep)1458 getabyte(int fd, off_t offset, uchar_t *bytep)
1459 {
1460 	switch (pread(fd, bytep, 1, offset)) {
1461 	case 1:
1462 		return (DSVC_SUCCESS);
1463 	case -1:
1464 		return (syserr_to_dsvcerr(errno));
1465 	default:
1466 		break;
1467 	}
1468 
1469 	return (DSVC_INTERNAL);
1470 }
1471 
1472 /*
1473  * Set the byte `offset' bytes into open file `fd' to `byte'.  Returns a
1474  * DSVC_* return code.
1475  */
1476 static int
setabyte(int fd,off_t offset,uchar_t byte)1477 setabyte(int fd, off_t offset, uchar_t byte)
1478 {
1479 	switch (pwrite(fd, &byte, 1, offset)) {
1480 	case 1:
1481 		return (DSVC_SUCCESS);
1482 	case -1:
1483 		return (syserr_to_dsvcerr(errno));
1484 	default:
1485 		break;
1486 	}
1487 
1488 	return (DSVC_INTERNAL);
1489 }
1490