1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * This file contains public functions for managing DHCP network
31 * containers. For the semantics of these functions, please see the
32 * Enterprise DHCP Architecture Document.
33 *
34 * This module uses synchronization guarantees provided by dsvclockd(1M);
35 * please see $SRC/lib/libdhcpsvc/private/README.synch for details.
36 *
37 * Big Theory Statement for the SUNWbinfiles DHCP Network Module
38 * =============================================================
39 *
40 * 1. On-disk Structure
41 *
42 * Each container consists of two basic pieces on-disk: a header and an
43 * array of records. In order to provide fast client IP lookup, the array
44 * of records is directly indexed by client IP address (using a simple
45 * mapping function). In order to provide fast client id lookup, each
46 * in-use record is also on exactly one doubly-linked client id hash chain;
47 * the hash chains heads are contained in the header). For all other
48 * lookups, we can restrict our search to only the in-use records by merely
49 * walking all of the hash chains. Here's a crude illustration of what
50 * this looks like on-disk (note that hash chains 2 and 3 are empty):
51 *
52 * _______________________________________________
53 * | container info | hash chain heads (buckets) |
54 * header | | 1 | 2 | 3 | [ .... ] | N |
55 * | | | | | | | | |
56 * |__________________|_|________________________|_|
57 * | rec1 | rec2 | | rec3 | rec4 | |
58 * | | +---> | | |
59 * | unused | unused | hash1 | unused | |
60 * |___________|___________|________^|_|_________|_|
61 * | rec5 | rec6 | rec7 |v | rec8 | |
62 * | | | -> | |
63 * records | unused | hashN | hash1 <- hash1 | |
64 * |___________|________^|_|___________|_________|_|
65 * | : :: : : : |
66 * | : :: : [ more records... ] : |
67 * | : :: : : : |
68 * |___________:________::_:___________:_________:_|
69 * | recN-3 | recN-2 || | recN-1 | recN v |
70 * | | |+--> -> |
71 * | unused | unused +--- hashN <- hashN |
72 * |___________|___________|___________|___________|
73 *
74 * Note that the actual on-disk format is a bit more complicated than this
75 * due to robustness issues; see section 3 below for details.
76 *
77 * 2. Robustness Requirements
78 *
79 * This module has been designed to be as efficient as possible while still
80 * retaining the robustness minimally required for an enterprise-level
81 * environment. In particular, it is designed to handle the following
82 * failure situations:
83 *
84 * 1. An update operation (add, modify, delete) on a container is
85 * unable to complete due to an unexpected internal error at
86 * any point in the update code.
87 *
88 * 2. An update operation (add, modify, delete) on a container is
89 * unable to complete due to unexpected program termination while
90 * at any point in the update code.
91 *
92 * If either of these situations occur, the container in question must be
93 * left in a consistent (and viable) state. In addition, only the pending
94 * transaction (at most) may be lost.
95 *
96 * 3. Robustness Techniques
97 *
98 * This module uses a few different techniques to meet our robustness goals
99 * while maintaining high performance. The biggest problem we encounter
100 * when trying to achieve robustness is updating the client id hash chain.
101 * In particular, it is not possible to atomically add, move, or delete an
102 * item from a doubly linked list, thus creating a window where a crash
103 * could leave our hash chains in an inconsistent state.
104 *
105 * To address this problem, we actually maintain two images (copies) of all
106 * the hash chains in the container. At any point in time, exactly one of
107 * the two images is active (and thus considered authoritative), as
108 * indicated by a byte in the container header. When performing an update
109 * operation, all hash chain modifications are done on the *inactive*
110 * image, then, once the inactive image has completed the hash chain
111 * operations required by the update, the active and inactive images are
112 * atomically switched, making the formerly-inactive image authoritative.
113 * After the image switch, the update code then updates the formerly-active
114 * image's hash chains to match the active image's hash chains.
115 *
116 * This approach has the nice property that internal container consistency
117 * can always be restored after a crash by just resynchronizing the
118 * inactive image's hash chains with the active image's chains. Note that
119 * the atomic image switch serves as the "commit point" for the operation:
120 * if we crash before this point, we roll back the operation upon recovery
121 * and it appears as though the operation never happened; if we crash after
122 * this point, we roll forward the rest of the operation upon recovery as
123 * if the crash had not happened.
124 *
125 * This technique is enough to robustly implement our add and delete
126 * operations, but modify has an additional complication due to our direct
127 * mapping of client IP addresses to records. In particular, unless the
128 * record modification includes changing the client IP address, the
129 * modified record must be written at the same location as the original
130 * record -- however, if the modify operation fails part way through
131 * writing out the new client record, the record will be corrupt and we
132 * will have no way to return the record to a consistent state. To address
133 * this issue, we allocate a spare record in the container header called
134 * the "temporary" record. Upon a modification of this type, we first
135 * write the modified record to the temporary record and indicate that the
136 * temporary record is currently proxying for the actual record. We then
137 * copy the temporary record to the actual record and make the temporary
138 * record available again for future use. If a crash occurs before the
139 * copy to the temporary record is complete, then we just roll back as if
140 * the modify never happened (since we have not modified the actual
141 * record). If a crash occurs after copying the temporary record, we roll
142 * forward and complete the copy operation as if the crash never happened.
143 * Note that there are some additional subtle complications here; see the
144 * comments in the code for details.
145 */
146
147 #include <unistd.h>
148 #include <sys/types.h>
149 #include <sys/socket.h>
150 #include <sys/isa_defs.h>
151 #include <netinet/in.h>
152 #include <dhcp_svc_public.h>
153 #include <stdlib.h>
154 #include <dirent.h>
155 #include <string.h>
156 #include <libgen.h>
157 #include <errno.h>
158 #include <stddef.h>
159 #include <assert.h>
160
161 #include "dhcp_network.h"
162 #include "util.h"
163
164 static uint16_t cidhash(const uchar_t *, size_t);
165 static void net2path(char *, size_t, const char *, ipaddr_t);
166 static int check_dn(dn_handle_t *);
167 static int getabyte(int, off_t, uchar_t *);
168 static int setabyte(int, off_t, uchar_t);
169 static int read_rec(int, dn_filerec_t *, dn_recid_t);
170 static int write_rec(int, dn_filerec_t *, dn_recid_t);
171 static int read_header(int, dn_header_t *, boolean_t);
172 static int write_header(int, dn_header_t *);
173 static int read_hashhead(int, dn_recid_t *, uint16_t, uchar_t);
174 static int write_hashhead(int, dn_recid_t, uint16_t, uchar_t);
175 static boolean_t record_match(const dn_rec_t *, const dn_rec_t *, uint_t);
176
177 int
open_dn(void ** handlep,const char * dir,uint_t flags,const struct in_addr * netp,const struct in_addr * maskp)178 open_dn(void **handlep, const char *dir, uint_t flags,
179 const struct in_addr *netp, const struct in_addr *maskp)
180 {
181 dn_handle_t *dhp;
182 dn_header_t header = { 0 };
183 char dnpath[MAXPATHLEN];
184 int i, retval;
185 off_t filesz;
186
187 dhp = malloc(sizeof (dn_handle_t));
188 if (dhp == NULL)
189 return (DSVC_NO_MEMORY);
190
191 /*
192 * As a safeguard, check that the size of a dn_header_t hasn't
193 * changed (since it contains a dn_rec_t, this will probably catch
194 * a change in that structure as well). If it has, bail rather
195 * than totally corrupting the container (by continuing). Note
196 * that this situation indicates an internal programming error,
197 * which is why we prefer assert() to just returning DSVC_INTERNAL.
198 */
199 /* CONSTCOND */
200 assert(sizeof (header) == 32768);
201
202 net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
203 retval = open_file(dnpath, flags, &dhp->dh_fd);
204 if (retval != DSVC_SUCCESS) {
205 free(dhp);
206 return (retval);
207 }
208
209 if (flags & DSVC_CREATE) {
210 /*
211 * We just created the per-network container; initialize
212 * the header and put it out on disk. Note that we leave
213 * `dnh_version' zero until the entire header has been
214 * written, so we can detect partial failure.
215 */
216 header.dnh_version = 0;
217 header.dnh_network = netp->s_addr;
218 header.dnh_netmask = maskp->s_addr;
219 header.dnh_magic = DN_MAGIC;
220 header.dnh_tempimage = DN_NOIMAGE;
221 header.dnh_image = 0;
222 header.dnh_errors = 0;
223 header.dnh_checks = 0;
224 for (i = 0; i < DN_CIDHASHSZ; i++) {
225 header.dnh_cidhash[i][header.dnh_image] = DN_NOREC;
226 header.dnh_cidhash[i][!header.dnh_image] = DN_NOREC;
227 }
228
229 if (write_header(dhp->dh_fd, &header) == -1) {
230 retval = syserr_to_dsvcerr(errno);
231 (void) remove_dn(dir, netp);
232 (void) close_dn((void **)&dhp);
233 return (retval);
234 }
235
236 /*
237 * Virtually reserve all the space we're going to need for
238 * the dn_rec_t's ahead of time, so that we don't have to
239 * worry about "growing" the file later (though it may
240 * increase in size as we fill in holes). We're guaranteed
241 * that we'll read these holes as zeros, which we take
242 * advantage of since a dn_filerec_t with a rec_prev of
243 * DN_NOREC (which is 0) indicates that a record is unused.
244 */
245 filesz = RECID2OFFSET(RECID(~0, header.dnh_netmask) + 1);
246 retval = setabyte(dhp->dh_fd, filesz - 1, 0);
247 if (retval != DSVC_SUCCESS) {
248 (void) remove_dn(dir, netp);
249 (void) close_dn((void **)&dhp);
250 return (retval);
251 }
252
253 /*
254 * Set the version field on the container, effectively
255 * making it available for use.
256 */
257 retval = setabyte(dhp->dh_fd, offsetof(dn_header_t,
258 dnh_version), DSVC_CONVER);
259 if (retval != DSVC_SUCCESS) {
260 (void) remove_dn(dir, netp);
261 (void) close_dn((void **)&dhp);
262 return (retval);
263 }
264 } else {
265 /*
266 * Container already exists; sanity check against the
267 * header that's on-disk. If we detect a problem then
268 * either someone scribbled on our container or we
269 * terminated abnormally when creating the container.
270 */
271 if (read_header(dhp->dh_fd, &header, B_FALSE) == -1) {
272 retval = syserr_to_dsvcerr(errno);
273 (void) close_dn((void **)&dhp);
274 return (retval);
275 }
276
277 if (header.dnh_network != netp->s_addr ||
278 header.dnh_version != DSVC_CONVER ||
279 header.dnh_magic != DN_MAGIC) {
280 (void) close_dn((void **)&dhp);
281 return (DSVC_INTERNAL);
282 }
283 }
284
285 dhp->dh_netmask = header.dnh_netmask;
286 dhp->dh_oflags = flags;
287
288 *handlep = dhp;
289 return (DSVC_SUCCESS);
290 }
291
292 int
close_dn(void ** handlep)293 close_dn(void **handlep)
294 {
295 dn_handle_t *dhp = (dn_handle_t *)*handlep;
296
297 if (close(dhp->dh_fd) == -1)
298 return (DSVC_INTERNAL);
299
300 free(dhp);
301 return (DSVC_SUCCESS);
302 }
303
304 int
remove_dn(const char * dir,const struct in_addr * netp)305 remove_dn(const char *dir, const struct in_addr *netp)
306 {
307 char dnpath[MAXPATHLEN];
308
309 net2path(dnpath, MAXPATHLEN, dir, netp->s_addr);
310 if (unlink(dnpath) == -1)
311 return (syserr_to_dsvcerr(errno));
312
313 return (DSVC_SUCCESS);
314 }
315
316 int
lookup_dn(void * handle,boolean_t partial,uint_t query,int count,const dn_rec_t * targetp,dn_rec_list_t ** recordsp,uint_t * nrecordsp)317 lookup_dn(void *handle, boolean_t partial, uint_t query, int count,
318 const dn_rec_t *targetp, dn_rec_list_t **recordsp, uint_t *nrecordsp)
319 {
320 dn_handle_t *dhp = (dn_handle_t *)handle;
321 int retval = DSVC_SUCCESS;
322 uint_t nrecords, n;
323 uint16_t hash;
324 dn_rec_t *recordp;
325 dn_rec_list_t *records, *new_records;
326 dn_recid_t recid, temp_recid = DN_NOREC;
327 dn_filerec_t rec;
328 dn_header_t header;
329 uchar_t image;
330 int fd = dhp->dh_fd;
331
332 if ((dhp->dh_oflags & DSVC_READ) == 0)
333 return (DSVC_ACCESS);
334
335 if (read_header(fd, &header, B_FALSE) == -1)
336 return (syserr_to_dsvcerr(errno));
337
338 /*
339 * It's possible that a previous update to this container failed
340 * part-way through. In general, this is fine since we always keep
341 * our active image's hash chains correct and only swap to the
342 * alternate image when the other image is completely safe to use.
343 * However, for reasons explained in modify_dn(), it's possible
344 * that a record being modified was not completely updated before a
345 * failure occurred. In this case, the actual data for that record
346 * is contained in the temporary record in the header. We need to
347 * be careful to use that temporary record anywhere we'd otherwise
348 * refer to the partially updated record. Note that we do this
349 * rather than attempting to restore the consistency of the
350 * container because we're MT-hot here.
351 */
352 if (header.dnh_dirty && header.dnh_tempimage == header.dnh_image) {
353 temp_recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
354 header.dnh_netmask);
355 }
356
357 image = header.dnh_image;
358 records = NULL;
359 for (n = 0, nrecords = 0; count < 0 || nrecords < count; n++) {
360 if (DSVC_QISEQ(query, DN_QCIP)) {
361 /*
362 * Lookup scenario 1: Caller has requested a QN_CIP
363 * query lookup; set `recid' to the only possible
364 * entry (which may not be in-use).
365 */
366 if (n != 0)
367 break;
368 recid = RECID(targetp->dn_cip.s_addr, dhp->dh_netmask);
369 } else if (DSVC_QISEQ(query, DN_QCID)) {
370 /*
371 * Lookup scenario 2: Caller has requested a
372 * QN_CID-based lookup. Walk the `cidhash' chain
373 * (one call at a time) and set `recid' to hash
374 * bucket candidates.
375 *
376 * Note that it's possible for the client id value
377 * 00 to appear more than once, and it's not
378 * impossible for other duplicate client ids to
379 * occur, so continue until we reach `nrecords'.
380 */
381 if (n == 0) {
382 hash = cidhash(targetp->dn_cid,
383 targetp->dn_cid_len);
384 if (read_hashhead(fd, &recid, hash, image)
385 == -1)
386 return (syserr_to_dsvcerr(errno));
387 } else {
388 /* sanity check */
389 if (recid == rec.rec_next[image])
390 break;
391 recid = rec.rec_next[image];
392 }
393 } else {
394 /*
395 * Lookup scenario 3: Caller has requested any
396 * other type of search. Walk the all the client
397 * id hashes.
398 */
399 if (n == 0) {
400 hash = 0;
401 if (read_header(fd, &header, B_TRUE) == -1)
402 return (syserr_to_dsvcerr(errno));
403 recid = header.dnh_cidhash[hash][image];
404 } else {
405 /* sanity check */
406 if (recid == rec.rec_next[image])
407 break;
408 recid = rec.rec_next[image];
409 }
410
411 while (recid == DN_NOREC && ++hash < DN_CIDHASHSZ)
412 recid = header.dnh_cidhash[hash][image];
413 }
414
415 /*
416 * No more records; bail.
417 */
418 if (recid == DN_NOREC)
419 break;
420
421 if (recid == temp_recid) {
422 /*
423 * The temporary record is actually authoritative
424 * for this record's contents; use it instead.
425 */
426 recid = DN_TEMPREC;
427 }
428
429 if (read_rec(dhp->dh_fd, &rec, recid) == -1) {
430 retval = syserr_to_dsvcerr(errno);
431 break;
432 }
433
434 /*
435 * If the record isn't in-use, then skip...
436 */
437 if (rec.rec_prev[image] == DN_NOREC)
438 continue;
439
440 /*
441 * See if we've got a match...
442 */
443 if (!record_match(&rec.rec_dn, targetp, query))
444 continue;
445
446 /*
447 * Caller just wants a count of the number of matching
448 * records, not the records themselves; continue.
449 */
450 if (recordsp == NULL) {
451 nrecords++;
452 continue;
453 }
454
455 /*
456 * Allocate the record and fill it in.
457 */
458 recordp = malloc(sizeof (dn_rec_t));
459 if (recordp == NULL) {
460 if (!partial)
461 retval = DSVC_NO_MEMORY;
462 break;
463 }
464 *recordp = rec.rec_dn;
465
466 /*
467 * Chuck the record on the list and up the counter.
468 */
469 new_records = add_dnrec_to_list(recordp, records);
470 if (new_records == NULL) {
471 free(recordp);
472 if (!partial)
473 retval = DSVC_NO_MEMORY;
474 break;
475 }
476
477 records = new_records;
478 nrecords++;
479 }
480
481 if (retval == DSVC_SUCCESS) {
482 *nrecordsp = nrecords;
483 if (recordsp != NULL)
484 *recordsp = records;
485 return (DSVC_SUCCESS);
486 }
487
488 if (records != NULL)
489 free_dnrec_list(records);
490
491 return (retval);
492 }
493
494 /*
495 * Compares `dnp' to the target `targetp', using `query' to decide what
496 * fields to compare. Returns B_TRUE if `dnp' matches `targetp', B_FALSE
497 * if not.
498 */
499 static boolean_t
record_match(const dn_rec_t * dnp,const dn_rec_t * targetp,uint_t query)500 record_match(const dn_rec_t *dnp, const dn_rec_t *targetp, uint_t query)
501 {
502 unsigned int qflags[] = { DN_QFDYNAMIC, DN_QFAUTOMATIC, DN_QFMANUAL,
503 DN_QFUNUSABLE, DN_QFBOOTP_ONLY };
504 unsigned int flags[] = { DN_FDYNAMIC, DN_FAUTOMATIC, DN_FMANUAL,
505 DN_FUNUSABLE, DN_FBOOTP_ONLY };
506 unsigned int i;
507 unsigned int query0;
508
509 /*
510 * As an optimization, skip any checks if the query is empty.
511 */
512 DSVC_QINIT(query0);
513 if (query == query0)
514 return (B_TRUE);
515
516 if (DSVC_QISEQ(query, DN_QLEASE) &&
517 targetp->dn_lease != dnp->dn_lease)
518 return (B_FALSE);
519 if (DSVC_QISNEQ(query, DN_QLEASE) &&
520 targetp->dn_lease == dnp->dn_lease)
521 return (B_FALSE);
522
523 if (DSVC_QISEQ(query, DN_QCIP) &&
524 dnp->dn_cip.s_addr != targetp->dn_cip.s_addr)
525 return (B_FALSE);
526 if (DSVC_QISNEQ(query, DN_QCIP) &&
527 dnp->dn_cip.s_addr == targetp->dn_cip.s_addr)
528 return (B_FALSE);
529
530 if (DSVC_QISEQ(query, DN_QCID) &&
531 (dnp->dn_cid_len != targetp->dn_cid_len ||
532 (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) != 0)))
533 return (B_FALSE);
534 if (DSVC_QISNEQ(query, DN_QCID) &&
535 (dnp->dn_cid_len == targetp->dn_cid_len &&
536 (memcmp(dnp->dn_cid, targetp->dn_cid, dnp->dn_cid_len) == 0)))
537 return (B_FALSE);
538
539 if (DSVC_QISEQ(query, DN_QSIP) &&
540 dnp->dn_sip.s_addr != targetp->dn_sip.s_addr)
541 return (B_FALSE);
542 if (DSVC_QISNEQ(query, DN_QSIP) &&
543 dnp->dn_sip.s_addr == targetp->dn_sip.s_addr)
544 return (B_FALSE);
545
546 if (DSVC_QISEQ(query, DN_QMACRO) &&
547 strcmp(targetp->dn_macro, dnp->dn_macro) != 0)
548 return (B_FALSE);
549 if (DSVC_QISNEQ(query, DN_QMACRO) &&
550 strcmp(targetp->dn_macro, dnp->dn_macro) == 0)
551 return (B_FALSE);
552
553 for (i = 0; i < sizeof (qflags) / sizeof (unsigned int); i++) {
554 if (DSVC_QISEQ(query, qflags[i]) &&
555 (dnp->dn_flags & flags[i]) !=
556 (targetp->dn_flags & flags[i]))
557 return (B_FALSE);
558 if (DSVC_QISNEQ(query, qflags[i]) &&
559 (dnp->dn_flags & flags[i]) ==
560 (targetp->dn_flags & flags[i]))
561 return (B_FALSE);
562 }
563
564 return (B_TRUE);
565 }
566
567 int
add_dn(void * handle,dn_rec_t * addp)568 add_dn(void *handle, dn_rec_t *addp)
569 {
570 dn_filerec_t rec, rec_next;
571 dn_recid_t recid, recid_head;
572 uint16_t hash;
573 uchar_t image;
574 int retval;
575 dn_handle_t *dhp = (dn_handle_t *)handle;
576 int fd = dhp->dh_fd;
577
578 if ((dhp->dh_oflags & DSVC_WRITE) == 0)
579 return (DSVC_ACCESS);
580
581 retval = check_dn(dhp);
582 if (retval != DSVC_SUCCESS)
583 return (retval);
584
585 hash = cidhash(addp->dn_cid, addp->dn_cid_len);
586
587 /*
588 * Get the active image.
589 */
590 retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
591 if (retval != DSVC_SUCCESS)
592 return (retval);
593
594 /*
595 * Doublecheck to make sure this entry doesn't exist already.
596 */
597 recid = RECID(addp->dn_cip.s_addr, dhp->dh_netmask);
598 if (read_rec(fd, &rec, recid) == -1)
599 return (syserr_to_dsvcerr(errno));
600
601 if (rec.rec_prev[image] != DN_NOREC)
602 return (DSVC_EXISTS);
603
604 /*
605 * We're going to insert `rec' at the head of the `hash' hash
606 * chain; get it ready-to-go. Note that we update the alternate
607 * image's hash record id pointers so that the record will
608 * atomically become in-use when we switch to the alternate image.
609 */
610 if (read_hashhead(fd, &recid_head, hash, image) == -1)
611 return (syserr_to_dsvcerr(errno));
612
613 rec.rec_dn = *addp;
614 rec.rec_dn.dn_sig = gensig();
615 rec.rec_prev[!image] = DN_HASHHEAD;
616 rec.rec_next[!image] = recid_head;
617
618 /*
619 * If there's a record currently on the hash chain (i.e, we're
620 * not the first) then load the record.
621 */
622 if (rec.rec_next[!image] != DN_NOREC) {
623 if (read_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
624 return (syserr_to_dsvcerr(errno));
625 }
626
627 /*
628 * Before we update any information on disk, mark the container as
629 * dirty so that there's no chance the container is inconsistent
630 * without us knowing about it.
631 */
632 retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
633 if (retval != DSVC_SUCCESS)
634 return (retval);
635
636 /*
637 * Update the new record on-disk; note that it's not yet reachable
638 * via hash.
639 */
640 if (write_rec(fd, &rec, recid) == -1)
641 return (syserr_to_dsvcerr(errno));
642
643 /*
644 * Update the alternate image's on-disk hash pointers. We need to
645 * do this before we switch to the alternate image so we cannot
646 * abort with an inconsistent active image.
647 */
648 if (rec.rec_next[!image] != DN_NOREC) {
649 rec_next.rec_prev[!image] = recid;
650 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
651 return (syserr_to_dsvcerr(errno));
652 }
653 if (write_hashhead(fd, recid, hash, !image) == -1)
654 return (syserr_to_dsvcerr(errno));
655
656 /*
657 * Activate the alternate image. This is our commit point -- if we
658 * fail after this point, we will roll forward on recovery.
659 */
660 image = !image;
661 retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
662 if (retval != DSVC_SUCCESS)
663 return (retval);
664
665 /*
666 * Update the old record id pointers to match
667 */
668 rec.rec_prev[!image] = rec.rec_prev[image];
669 rec.rec_next[!image] = rec.rec_next[image];
670 if (write_rec(fd, &rec, recid) == -1)
671 return (syserr_to_dsvcerr(errno));
672
673 if (rec.rec_next[!image] != DN_NOREC) {
674 rec_next.rec_prev[!image] = recid;
675 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
676 return (syserr_to_dsvcerr(errno));
677 }
678 if (write_hashhead(fd, recid, hash, !image) == -1)
679 return (syserr_to_dsvcerr(errno));
680
681 /*
682 * Update the signature on the record handed back to the caller.
683 */
684 addp->dn_sig = rec.rec_dn.dn_sig;
685
686 /*
687 * Finally, mark the container as clean.
688 */
689 return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
690 }
691
692 int
delete_dn(void * handle,const dn_rec_t * delp)693 delete_dn(void *handle, const dn_rec_t *delp)
694 {
695 dn_filerec_t rec, rec_prev, rec_next;
696 dn_recid_t recid;
697 uint16_t hash;
698 uchar_t image;
699 int retval;
700 dn_handle_t *dhp = (dn_handle_t *)handle;
701 int fd = dhp->dh_fd;
702
703 if ((dhp->dh_oflags & DSVC_WRITE) == 0)
704 return (DSVC_ACCESS);
705
706 retval = check_dn(dhp);
707 if (retval != DSVC_SUCCESS)
708 return (retval);
709
710 /*
711 * Get the active image.
712 */
713 retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
714 if (retval != DSVC_SUCCESS)
715 return (retval);
716
717 /*
718 * Find the original entry in the network table, make sure the
719 * record is in-use, and check the signature field (to guard
720 * against collisions).
721 */
722 recid = RECID(delp->dn_cip.s_addr, dhp->dh_netmask);
723 if (read_rec(fd, &rec, recid) == -1)
724 return (syserr_to_dsvcerr(errno));
725
726 if (rec.rec_prev[image] == DN_NOREC)
727 return (DSVC_NOENT);
728
729 hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
730
731 /*
732 * The signatures must match to delete a record, *except* when
733 * delp->dn_sig == 0. This is so records can be deleted that
734 * weren't retrieved via lookup_dn()
735 */
736 if (delp->dn_sig != 0 && rec.rec_dn.dn_sig != delp->dn_sig)
737 return (DSVC_COLLISION);
738
739 /*
740 * Read our neighboring records.
741 */
742 if (rec.rec_next[image] != DN_NOREC) {
743 if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
744 return (syserr_to_dsvcerr(errno));
745 }
746
747 if (rec.rec_prev[image] != DN_HASHHEAD) {
748 if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
749 return (syserr_to_dsvcerr(errno));
750 }
751
752 /*
753 * Before we update the alternate image's on-disk hash pointers,
754 * mark the container as dirty so that there's no chance the
755 * container is inconsistent without us knowing about it.
756 */
757 retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
758 if (retval != DSVC_SUCCESS)
759 return (retval);
760
761 /*
762 * Update the alternate image's on-disk hash pointers. We need to
763 * do this before we switch to the alternate image so we do not
764 * abort with an inconsistent active image. Also reset the
765 * record's alternate image record id pointers, so that the old
766 * record will not be in-use when we switch to the alternate image.
767 */
768 if (rec.rec_next[image] != DN_NOREC) {
769 rec_next.rec_prev[!image] = rec.rec_prev[image];
770 if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
771 return (syserr_to_dsvcerr(errno));
772 }
773
774 if (rec.rec_prev[image] != DN_HASHHEAD) {
775 rec_prev.rec_next[!image] = rec.rec_next[image];
776 if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
777 return (syserr_to_dsvcerr(errno));
778 } else {
779 if (write_hashhead(fd, rec.rec_next[image], hash, !image) == -1)
780 return (syserr_to_dsvcerr(errno));
781 }
782
783 rec.rec_next[!image] = DN_NOREC;
784 rec.rec_prev[!image] = DN_NOREC;
785 if (write_rec(fd, &rec, recid) == -1)
786 return (syserr_to_dsvcerr(errno));
787
788 /*
789 * Activate the alternate image. This is our commit point -- if we
790 * fail after this point, we will roll forward on recovery.
791 */
792 image = !image;
793 retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
794 if (retval != DSVC_SUCCESS)
795 return (retval);
796
797 /*
798 * Update the old record id pointers to match.
799 */
800 if (rec.rec_next[!image] != DN_NOREC) {
801 rec_next.rec_prev[!image] = rec.rec_prev[!image];
802 if (write_rec(fd, &rec_next, rec.rec_next[!image]) == -1)
803 return (syserr_to_dsvcerr(errno));
804 }
805
806 if (rec.rec_prev[!image] != DN_HASHHEAD) {
807 rec_prev.rec_next[!image] = rec.rec_next[!image];
808 if (write_rec(fd, &rec_prev, rec.rec_prev[!image]) == -1)
809 return (syserr_to_dsvcerr(errno));
810 } else {
811 if (write_hashhead(fd, rec.rec_next[!image], hash, !image)
812 == -1)
813 return (syserr_to_dsvcerr(errno));
814 }
815
816 rec.rec_next[!image] = DN_NOREC;
817 rec.rec_prev[!image] = DN_NOREC;
818 if (write_rec(fd, &rec, recid) == -1)
819 return (syserr_to_dsvcerr(errno));
820
821 /*
822 * Finally, mark the container as clean.
823 */
824 return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
825 }
826
827 int
modify_dn(void * handle,const dn_rec_t * origp,dn_rec_t * newp)828 modify_dn(void *handle, const dn_rec_t *origp, dn_rec_t *newp)
829 {
830 dn_filerec_t rec, new_rec, rec_head, rec_next, rec_prev;
831 dn_recid_t recid, new_recid, recid_head;
832 uint16_t hash, new_hash;
833 uchar_t image;
834 int retval;
835 dn_handle_t *dhp = (dn_handle_t *)handle;
836 int fd = dhp->dh_fd;
837
838 if ((dhp->dh_oflags & DSVC_WRITE) == 0)
839 return (DSVC_ACCESS);
840
841 retval = check_dn(dhp);
842 if (retval != DSVC_SUCCESS)
843 return (retval);
844
845 /*
846 * Get the active image
847 */
848 retval = getabyte(fd, offsetof(dn_header_t, dnh_image), &image);
849 if (retval != DSVC_SUCCESS)
850 return (retval);
851
852 /*
853 * Find the original entry in the network table, make sure the
854 * entry is in-use, and check the signature field (to guard against
855 * collisions).
856 */
857 recid = RECID(origp->dn_cip.s_addr, dhp->dh_netmask);
858 if (read_rec(fd, &rec, recid) == -1)
859 return (syserr_to_dsvcerr(errno));
860
861 if (rec.rec_prev[image] == DN_NOREC)
862 return (DSVC_NOENT);
863
864 if (rec.rec_dn.dn_sig != origp->dn_sig)
865 return (DSVC_COLLISION);
866
867 /*
868 * Check if the record id is changing (as a result of modifying the
869 * IP address). If it is, then make sure the new one is available
870 * (if not, fail with DSVC_EXISTS).
871 */
872 new_recid = RECID(newp->dn_cip.s_addr, dhp->dh_netmask);
873 if (recid != new_recid) {
874 if (read_rec(fd, &new_rec, new_recid) == -1)
875 return (syserr_to_dsvcerr(errno));
876 if (new_rec.rec_prev[image] != DN_NOREC)
877 return (DSVC_EXISTS);
878 }
879
880 /*
881 * Update the record with the new information.
882 */
883 new_rec.rec_dn = *newp;
884 new_rec.rec_dn.dn_sig = origp->dn_sig + 1;
885
886 /*
887 * Find out if our hash chain is changing. If so, then update the
888 * new record's record id pointers to be on the new chain;
889 * otherwise just take the original record's pointers. Note that
890 * in either case, only update the alternate image pointers, so
891 * that the new record becomes in-use when we switch to the
892 * alternate image.
893 */
894 hash = cidhash(rec.rec_dn.dn_cid, rec.rec_dn.dn_cid_len);
895 new_hash = cidhash(newp->dn_cid, newp->dn_cid_len);
896
897 if (hash == new_hash) {
898 new_rec.rec_prev[!image] = rec.rec_prev[image];
899 new_rec.rec_next[!image] = rec.rec_next[image];
900 } else {
901 if (read_hashhead(fd, &recid_head, new_hash, image) == -1)
902 return (syserr_to_dsvcerr(errno));
903
904 new_rec.rec_prev[!image] = DN_HASHHEAD;
905 new_rec.rec_next[!image] = recid_head;
906 }
907
908 /*
909 * Write the record out; if this means overwriting the old record,
910 * then write to a temporary record instead.
911 */
912 if (write_rec(fd, &new_rec, new_recid == recid ? DN_TEMPREC : new_recid)
913 == -1)
914 return (syserr_to_dsvcerr(errno));
915
916 /*
917 * Mark the container as dirty so that there's no chance the
918 * container is inconsistent without us knowing about it.
919 */
920 retval = setabyte(fd, offsetof(dn_header_t, dnh_dirty), 1);
921 if (retval != DSVC_SUCCESS)
922 return (retval);
923
924 /*
925 * If we've changed either the hash chain or the record id, then
926 * update our neighboring records' record id pointers. If we're
927 * changing hash chains, then remove ourselves from the old
928 * hash chain and insert ourselves on the new one -- otherwise, if
929 * we're changing record id's, then update our neighbors with our
930 * new record id. Note that we only apply these changes to the
931 * alternate image for now so that we can recover upon failure.
932 */
933 if (hash != new_hash || recid != new_recid) {
934 if (rec.rec_next[image] != DN_NOREC) {
935 if (read_rec(fd, &rec_next, rec.rec_next[image]) == -1)
936 return (syserr_to_dsvcerr(errno));
937 }
938 if (rec.rec_prev[image] != DN_HASHHEAD) {
939 if (read_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
940 return (syserr_to_dsvcerr(errno));
941 }
942
943 if (hash != new_hash) {
944 rec_next.rec_prev[!image] = rec.rec_prev[!image];
945 rec_prev.rec_next[!image] = rec.rec_next[!image];
946 } else {
947 rec_next.rec_prev[!image] = new_recid;
948 rec_prev.rec_next[!image] = new_recid;
949 }
950
951 if (rec.rec_next[image] != DN_NOREC) {
952 if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
953 return (syserr_to_dsvcerr(errno));
954 }
955 if (rec.rec_prev[image] != DN_HASHHEAD) {
956 if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
957 return (syserr_to_dsvcerr(errno));
958 } else {
959 if (write_hashhead(fd, rec_prev.rec_next[!image], hash,
960 !image) == -1)
961 return (syserr_to_dsvcerr(errno));
962 }
963
964 /*
965 * If our hash is changing, update the alternate image
966 * record id pointers to point to our moved record.
967 */
968 if (hash != new_hash) {
969 if (recid_head != DN_NOREC) {
970 if (read_rec(fd, &rec_head, recid_head) == -1)
971 return (syserr_to_dsvcerr(errno));
972 rec_head.rec_prev[!image] = new_recid;
973 if (write_rec(fd, &rec_head, recid_head) == -1)
974 return (syserr_to_dsvcerr(errno));
975 }
976 if (write_hashhead(fd, new_recid, new_hash, !image)
977 == -1)
978 return (syserr_to_dsvcerr(errno));
979 }
980
981 /*
982 * If our record id is changing, reset the old record's
983 * alternate image record id pointers, so that the old
984 * record will not be in-use once we switch over to the
985 * alternate image.
986 */
987 if (recid != new_recid) {
988 rec.rec_prev[!image] = DN_NOREC;
989 rec.rec_next[!image] = DN_NOREC;
990 if (write_rec(fd, &rec, recid) == -1)
991 return (syserr_to_dsvcerr(errno));
992 }
993 }
994
995 /*
996 * If we're using the temporary record, then set `dnh_tempimage' to
997 * the image that will be active when we're done. This piece of
998 * state is critical in the case of failure, since it indicates
999 * both that the temporary record is valid, and tells us whether we
1000 * failed before or after activating the alternate image (below).
1001 * If we failed before activating the alternate image, then the
1002 * failure code can just reset `dnh_tempimage' to DN_NOIMAGE and
1003 * resynchronize the pointers. Otherwise, we failed somewhere
1004 * after making the alternate image active but before we completed
1005 * copying the temporary record over to the actual record, which
1006 * the recovery code will then complete on our behalf before
1007 * resynchronizing the pointers.
1008 */
1009 if (recid == new_recid) {
1010 retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1011 !image);
1012 if (retval != DSVC_SUCCESS)
1013 return (retval);
1014 }
1015
1016 /*
1017 * Activate the alternate image. This is our commit point -- if we
1018 * fail after this point, we will roll forward on recovery.
1019 */
1020 image = !image;
1021 retval = setabyte(fd, offsetof(dn_header_t, dnh_image), image);
1022 if (retval != DSVC_SUCCESS)
1023 return (retval);
1024
1025 /*
1026 * If we used the temporary record, copy the data into the actual
1027 * record. Once finished, reset `dnh_tempimage' to DN_NOIMAGE
1028 * since the temporary record no longer needs to be used.
1029 */
1030 if (recid == new_recid) {
1031 if (write_rec(fd, &new_rec, new_recid) == -1)
1032 return (syserr_to_dsvcerr(errno));
1033
1034 retval = setabyte(fd, offsetof(dn_header_t, dnh_tempimage),
1035 DN_NOIMAGE);
1036 if (retval != DSVC_SUCCESS)
1037 return (retval);
1038 }
1039
1040 /*
1041 * Update the old record id pointers to match.
1042 */
1043 new_rec.rec_prev[!image] = new_rec.rec_prev[image];
1044 new_rec.rec_next[!image] = new_rec.rec_next[image];
1045 if (write_rec(fd, &new_rec, new_recid) == -1)
1046 return (syserr_to_dsvcerr(errno));
1047
1048 if (hash != new_hash || recid != new_recid) {
1049 if (rec.rec_next[image] != DN_NOREC) {
1050 rec_next.rec_prev[!image] = rec.rec_prev[image];
1051 if (write_rec(fd, &rec_next, rec.rec_next[image]) == -1)
1052 return (syserr_to_dsvcerr(errno));
1053 }
1054 if (rec.rec_prev[image] != DN_HASHHEAD) {
1055 rec_prev.rec_next[!image] = rec.rec_next[image];
1056 if (write_rec(fd, &rec_prev, rec.rec_prev[image]) == -1)
1057 return (syserr_to_dsvcerr(errno));
1058 } else {
1059 if (write_hashhead(fd, rec.rec_next[image], hash,
1060 !image) == -1)
1061 return (syserr_to_dsvcerr(errno));
1062 }
1063
1064 /*
1065 * If our hash changed, update the alternate image record
1066 * id pointers to point to our moved record.
1067 */
1068 if (hash != new_hash) {
1069 if (recid_head != DN_NOREC) {
1070 rec_head.rec_prev[!image] =
1071 rec_head.rec_prev[image];
1072 if (write_rec(fd, &rec_head, recid_head) == -1)
1073 return (syserr_to_dsvcerr(errno));
1074 }
1075 if (write_hashhead(fd, new_recid, new_hash, !image)
1076 == -1)
1077 return (syserr_to_dsvcerr(errno));
1078 }
1079
1080 /*
1081 * If our record id changed, then finish marking the old
1082 * record as "not in use".
1083 */
1084 if (recid != new_recid) {
1085 rec.rec_prev[!image] = DN_NOREC;
1086 rec.rec_next[!image] = DN_NOREC;
1087 if (write_rec(fd, &rec, recid) == -1)
1088 return (syserr_to_dsvcerr(errno));
1089 }
1090 }
1091
1092 /*
1093 * Update the signature on the new record handed back to the caller.
1094 */
1095 newp->dn_sig = new_rec.rec_dn.dn_sig;
1096
1097 /*
1098 * Finally, mark the container as clean.
1099 */
1100 return (setabyte(fd, offsetof(dn_header_t, dnh_dirty), 0));
1101 }
1102
1103 int
list_dn(const char * location,char *** listppp,uint_t * countp)1104 list_dn(const char *location, char ***listppp, uint_t *countp)
1105 {
1106 char ipaddr[INET_ADDRSTRLEN];
1107 struct dirent *result;
1108 DIR *dirp;
1109 unsigned int i, count = 0;
1110 char *re, **new_listpp, **listpp = NULL;
1111 char conver[4];
1112 int error;
1113
1114 dirp = opendir(location);
1115 if (dirp == NULL) {
1116 switch (errno) {
1117 case EACCES:
1118 case EPERM:
1119 return (DSVC_ACCESS);
1120 case ENOENT:
1121 return (DSVC_NO_LOCATION);
1122 default:
1123 break;
1124 }
1125 return (DSVC_INTERNAL);
1126 }
1127
1128 /*
1129 * Compile a regular expression matching "SUNWbinfilesX_" (where X
1130 * is a container version number) followed by an IP address
1131 * (roughly speaking). Note that the $N constructions allow us to
1132 * get the container version and IP address when calling regex(3C).
1133 */
1134 re = regcmp("^SUNWbinfiles([0-9]{1,3})$0_"
1135 "(([0-9]{1,3}_){3}[0-9]{1,3})$1$", (char *)0);
1136 if (re == NULL)
1137 return (DSVC_NO_MEMORY);
1138
1139 while ((result = readdir(dirp)) != NULL) {
1140
1141 if (regex(re, result->d_name, conver, ipaddr) != NULL) {
1142 if (atoi(conver) != DSVC_CONVER)
1143 continue;
1144
1145 for (i = 0; ipaddr[i] != '\0'; i++)
1146 if (ipaddr[i] == '_')
1147 ipaddr[i] = '.';
1148
1149 new_listpp = realloc(listpp,
1150 (sizeof (char **)) * (count + 1));
1151 if (new_listpp == NULL) {
1152 error = DSVC_NO_MEMORY;
1153 goto fail;
1154 }
1155 listpp = new_listpp;
1156 listpp[count] = strdup(ipaddr);
1157 if (listpp[count] == NULL) {
1158 error = DSVC_NO_MEMORY;
1159 goto fail;
1160 }
1161 count++;
1162 }
1163 }
1164 free(re);
1165 (void) closedir(dirp);
1166
1167 *countp = count;
1168 *listppp = listpp;
1169 return (DSVC_SUCCESS);
1170 fail:
1171 free(re);
1172 (void) closedir(dirp);
1173
1174 for (i = 0; i < count; i++)
1175 free(listpp[i]);
1176 free(listpp);
1177 return (error);
1178 }
1179
1180 /*
1181 * Check (a la fsck) that a given DHCP network container is in a consistent
1182 * state. If not, then attempt to restore internal consistency; this should
1183 * always be possible unless the container has been externally corrupted.
1184 */
1185 static int
check_dn(dn_handle_t * dhp)1186 check_dn(dn_handle_t *dhp)
1187 {
1188 dn_header_t header;
1189 uchar_t image, dirty;
1190 uint16_t hash;
1191 dn_filerec_t rec;
1192 dn_recid_t recid, maxrecid;
1193 int retval;
1194
1195 /*
1196 * Reading the whole header is a very expensive operation; only do
1197 * it once we're sure the container is actually dirty. On an
1198 * E4500, this optimization lowers the wall-clock cost of creating
1199 * a 5000-record datastore by 20 percent.
1200 */
1201 retval = getabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), &dirty);
1202 if (retval != DSVC_SUCCESS)
1203 return (retval);
1204
1205 if (dirty == 0)
1206 return (DSVC_SUCCESS);
1207
1208 if (read_header(dhp->dh_fd, &header, B_TRUE) == -1)
1209 return (syserr_to_dsvcerr(errno));
1210
1211 /*
1212 * If `dnh_tempimage' matches the current working image, then we
1213 * crashed in the middle of a modify_dn() operation. Complete
1214 * writing out the temporary record before restoring internal
1215 * consistency. This is a bit of a kludge but there doesn't seem
1216 * to be another way.
1217 */
1218 if (header.dnh_tempimage == header.dnh_image) {
1219 recid = RECID(header.dnh_temp.rec_dn.dn_cip.s_addr,
1220 header.dnh_netmask);
1221 if (write_rec(dhp->dh_fd, &header.dnh_temp, recid) == -1)
1222 return (syserr_to_dsvcerr(errno));
1223
1224 header.dnh_tempimage = DN_NOIMAGE;
1225 }
1226
1227 /*
1228 * Blindly update all the header hashhead pointers since we're
1229 * going to have to re-write the header anyway.
1230 */
1231 image = header.dnh_image;
1232 for (hash = 0; hash < DN_CIDHASHSZ; hash++) {
1233 header.dnh_cidhash[hash][!image] =
1234 header.dnh_cidhash[hash][image];
1235 }
1236
1237 /*
1238 * Synchronize the record pointers of all in-use records. We do
1239 * this instead of just walking the hashheads because not all dirty
1240 * records are hashed (for instance, we may have failed part way
1241 * through an add_dn()).
1242 */
1243 maxrecid = RECID(~0, header.dnh_netmask);
1244 for (recid = RECID(0, header.dnh_netmask); recid <= maxrecid; recid++) {
1245 if (read_rec(dhp->dh_fd, &rec, recid) == -1)
1246 return (syserr_to_dsvcerr(errno));
1247
1248 /*
1249 * Verify the pointers match. If not, then correct
1250 * the record and write it back to disk.
1251 */
1252 if (rec.rec_next[image] != rec.rec_next[!image] ||
1253 rec.rec_prev[image] != rec.rec_prev[!image]) {
1254 header.dnh_errors++;
1255
1256 rec.rec_prev[!image] = rec.rec_prev[image];
1257 rec.rec_next[!image] = rec.rec_next[image];
1258
1259 if (write_rec(dhp->dh_fd, &rec, recid) == -1)
1260 return (syserr_to_dsvcerr(errno));
1261 }
1262 }
1263
1264 header.dnh_checks++;
1265 if (write_header(dhp->dh_fd, &header) == -1)
1266 return (syserr_to_dsvcerr(errno));
1267
1268 /*
1269 * Clear the dirty bit on the container.
1270 */
1271 return (setabyte(dhp->dh_fd, offsetof(dn_header_t, dnh_dirty), 0));
1272 }
1273
1274 /*
1275 * Given a buffer `path' of `pathlen' bytes, fill it in with a path to the
1276 * DHCP Network table for IP network `ip' located in directory `dir'.
1277 */
1278 static void
net2path(char * path,size_t pathlen,const char * dir,ipaddr_t ip)1279 net2path(char *path, size_t pathlen, const char *dir, ipaddr_t ip)
1280 {
1281 (void) snprintf(path, pathlen, "%s/SUNWbinfiles%u_%d_%d_%d_%d", dir,
1282 DSVC_CONVER, ip >> 24, (ip >> 16) & 0xff, (ip >> 8) & 0xff,
1283 ip & 0xff);
1284 }
1285
1286 /*
1287 * Given a `cid' that's `cidlen' bytes long, hash it to a value between 0
1288 * and DN_CIDHASHSZ - 1. We use CRC16 for our hash since it's known to be
1289 * very evenly distributed.
1290 */
1291 static uint16_t
cidhash(const uchar_t * cid,size_t cidlen)1292 cidhash(const uchar_t *cid, size_t cidlen)
1293 {
1294 uchar_t bit;
1295 uint16_t result = 0xffff;
1296 const uint16_t crc16_poly = 0x8408; /* mutated CRC-CCITT polynomial */
1297
1298 while (cidlen-- != 0) {
1299 result ^= *cid++;
1300 for (bit = 0; bit < 8; bit++) {
1301 if (result & 1)
1302 result = (result >> 1) ^ crc16_poly;
1303 else
1304 result >>= 1;
1305 }
1306 }
1307 return (result % DN_CIDHASHSZ);
1308 }
1309
1310 /*
1311 * Convert the dn_filerec_t pointed to by `rec' from native (host) to
1312 * network order or the other way.
1313 */
1314 /* ARGSUSED */
1315 static void
nhconvert_rec(dn_filerec_t * rec)1316 nhconvert_rec(dn_filerec_t *rec)
1317 {
1318 #ifdef _LITTLE_ENDIAN
1319 dn_rec_t *dnp = &rec->rec_dn;
1320
1321 nhconvert(&rec->rec_prev[0], &rec->rec_prev[0], sizeof (dn_recid_t));
1322 nhconvert(&rec->rec_prev[1], &rec->rec_prev[1], sizeof (dn_recid_t));
1323 nhconvert(&rec->rec_next[0], &rec->rec_next[0], sizeof (dn_recid_t));
1324 nhconvert(&rec->rec_next[1], &rec->rec_next[1], sizeof (dn_recid_t));
1325
1326 nhconvert(&dnp->dn_cip.s_addr, &dnp->dn_cip.s_addr, sizeof (ipaddr_t));
1327 nhconvert(&dnp->dn_sip.s_addr, &dnp->dn_sip.s_addr, sizeof (ipaddr_t));
1328 nhconvert(&dnp->dn_lease, &dnp->dn_lease, sizeof (lease_t));
1329 nhconvert(&dnp->dn_sig, &dnp->dn_sig, sizeof (uint64_t));
1330 #endif
1331 }
1332
1333 /*
1334 * Convert the header pointed to by `hdrp' from native (host) to network
1335 * order or the other way. If `hash' is false, then don't bother
1336 * converting the hash chains.
1337 */
1338 /* ARGSUSED */
1339 static void
nhconvert_header(dn_header_t * hdrp,boolean_t hash)1340 nhconvert_header(dn_header_t *hdrp, boolean_t hash)
1341 {
1342 #ifdef _LITTLE_ENDIAN
1343 unsigned int i;
1344
1345 nhconvert(&hdrp->dnh_network, &hdrp->dnh_network, sizeof (ipaddr_t));
1346 nhconvert(&hdrp->dnh_netmask, &hdrp->dnh_netmask, sizeof (ipaddr_t));
1347 nhconvert(&hdrp->dnh_magic, &hdrp->dnh_magic, sizeof (uint32_t));
1348 nhconvert_rec(&hdrp->dnh_temp);
1349
1350 if (hash) {
1351 for (i = 0; i < DN_CIDHASHSZ; i++) {
1352 nhconvert(&hdrp->dnh_cidhash[i][0],
1353 &hdrp->dnh_cidhash[i][0], sizeof (dn_recid_t));
1354 nhconvert(&hdrp->dnh_cidhash[i][1],
1355 &hdrp->dnh_cidhash[i][1], sizeof (dn_recid_t));
1356 }
1357 }
1358 #endif
1359 }
1360
1361 /*
1362 * Read the dn_filerec_t identified by `recid' from open container `fd'
1363 * into `rec'. Returns 0 on success, -1 on failure (errno is set).
1364 */
1365 static int
read_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)1366 read_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1367 {
1368 if (pnread(fd, rec, sizeof (*rec), RECID2OFFSET(recid)) == -1)
1369 return (-1);
1370
1371 nhconvert_rec(rec);
1372 return (0);
1373 }
1374
1375 /*
1376 * Write the dn_filerec_t `rec' identified by `recid' into the open
1377 * container `fd'. Returns 0 on success, -1 on failure (errno is set).
1378 */
1379 static int
write_rec(int fd,dn_filerec_t * rec,dn_recid_t recid)1380 write_rec(int fd, dn_filerec_t *rec, dn_recid_t recid)
1381 {
1382 int retval;
1383
1384 nhconvert_rec(rec);
1385 retval = pnwrite(fd, rec, sizeof (*rec), RECID2OFFSET(recid));
1386 nhconvert_rec(rec);
1387 return (retval);
1388 }
1389
1390 /*
1391 * Read the dn_header_t from the open container `fd' into the dn_header_t
1392 * pointed to by `hdrp'; if `hash' is not set, then skip reading the
1393 * dn_header_t hash chains. Returns 0 on success, -1 on failure (errno is
1394 * set).
1395 */
1396 static int
read_header(int fd,dn_header_t * hdrp,boolean_t hash)1397 read_header(int fd, dn_header_t *hdrp, boolean_t hash)
1398 {
1399 size_t size;
1400
1401 size = hash ? sizeof (dn_header_t) : offsetof(dn_header_t, dnh_cidhash);
1402 if (pnread(fd, hdrp, size, 0) == -1)
1403 return (-1);
1404
1405 nhconvert_header(hdrp, hash);
1406 return (0);
1407 }
1408
1409 /*
1410 * Write the dn_header_t pointed to by `hdrp' into open container `fd'.
1411 * Returns 0 on success, -1 on failure (errno is set).
1412 */
1413 static int
write_header(int fd,dn_header_t * hdrp)1414 write_header(int fd, dn_header_t *hdrp)
1415 {
1416 int retval;
1417
1418 nhconvert_header(hdrp, B_TRUE);
1419 retval = pnwrite(fd, hdrp, sizeof (dn_header_t), 0);
1420 nhconvert_header(hdrp, B_TRUE);
1421 return (retval);
1422 }
1423
1424 /*
1425 * Read in the head of the `cidhash' hash chain from open container `fd'
1426 * into `recid_headp', using image `image'. Returns 0 on success, -1 on
1427 * failure (errno is set).
1428 */
1429 static int
read_hashhead(int fd,dn_recid_t * recid_headp,uint16_t cidhash,uchar_t image)1430 read_hashhead(int fd, dn_recid_t *recid_headp, uint16_t cidhash, uchar_t image)
1431 {
1432 if (pnread(fd, recid_headp, sizeof (dn_recid_t),
1433 offsetof(dn_header_t, dnh_cidhash[cidhash][image])) == -1)
1434 return (-1);
1435
1436 nhconvert(recid_headp, recid_headp, sizeof (dn_recid_t));
1437 return (0);
1438 }
1439
1440 /*
1441 * Write out the head of the `cidhash' hash chain into open container `fd'
1442 * from `recid_head', using image `image'. Returns 0 on success, -1 on
1443 * failure (errno is set).
1444 */
1445 static int
write_hashhead(int fd,dn_recid_t recid_head,uint16_t cidhash,uchar_t image)1446 write_hashhead(int fd, dn_recid_t recid_head, uint16_t cidhash, uchar_t image)
1447 {
1448 nhconvert(&recid_head, &recid_head, sizeof (dn_recid_t));
1449 return (pnwrite(fd, &recid_head, sizeof (dn_recid_t),
1450 offsetof(dn_header_t, dnh_cidhash[cidhash][image])));
1451 }
1452
1453 /*
1454 * Get the byte `offset' bytes into open file `fd', and store in `bytep'.
1455 * Returns a DSVC_* return code.
1456 */
1457 static int
getabyte(int fd,off_t offset,uchar_t * bytep)1458 getabyte(int fd, off_t offset, uchar_t *bytep)
1459 {
1460 switch (pread(fd, bytep, 1, offset)) {
1461 case 1:
1462 return (DSVC_SUCCESS);
1463 case -1:
1464 return (syserr_to_dsvcerr(errno));
1465 default:
1466 break;
1467 }
1468
1469 return (DSVC_INTERNAL);
1470 }
1471
1472 /*
1473 * Set the byte `offset' bytes into open file `fd' to `byte'. Returns a
1474 * DSVC_* return code.
1475 */
1476 static int
setabyte(int fd,off_t offset,uchar_t byte)1477 setabyte(int fd, off_t offset, uchar_t byte)
1478 {
1479 switch (pwrite(fd, &byte, 1, offset)) {
1480 case 1:
1481 return (DSVC_SUCCESS);
1482 case -1:
1483 return (syserr_to_dsvcerr(errno));
1484 default:
1485 break;
1486 }
1487
1488 return (DSVC_INTERNAL);
1489 }
1490