1 /*
2 * GRUB -- GRand Unified Bootloader
3 * Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19 /*
20 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
21 * Use is subject to license terms.
22 */
23
24 /*
25 * The zfs plug-in routines for GRUB are:
26 *
27 * zfs_mount() - locates a valid uberblock of the root pool and reads
28 * in its MOS at the memory address MOS.
29 *
30 * zfs_open() - locates a plain file object by following the MOS
31 * and places its dnode at the memory address DNODE.
32 *
33 * zfs_read() - read in the data blocks pointed by the DNODE.
34 *
35 * ZFS_SCRATCH is used as a working area.
36 *
37 * (memory addr) MOS DNODE ZFS_SCRATCH
38 * | | |
39 * +-------V---------V----------V---------------+
40 * memory | | dnode | dnode | scratch |
41 * | | 512B | 512B | area |
42 * +--------------------------------------------+
43 */
44
45 #ifdef FSYS_ZFS
46
47 #include "shared.h"
48 #include "filesys.h"
49 #include "fsys_zfs.h"
50
51 /* cache for a file block of the currently zfs_open()-ed file */
52 static void *file_buf = NULL;
53 static uint64_t file_start = 0;
54 static uint64_t file_end = 0;
55
56 /* cache for a dnode block */
57 static dnode_phys_t *dnode_buf = NULL;
58 static dnode_phys_t *dnode_mdn = NULL;
59 static uint64_t dnode_start = 0;
60 static uint64_t dnode_end = 0;
61
62 static uint64_t pool_guid = 0;
63 static uberblock_t current_uberblock;
64 static char *stackbase;
65
66 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
67 {
68 {"inherit", 0}, /* ZIO_COMPRESS_INHERIT */
69 {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
70 {"off", 0}, /* ZIO_COMPRESS_OFF */
71 {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */
72 {"empty", 0} /* ZIO_COMPRESS_EMPTY */
73 };
74
75 static int zio_read_data(blkptr_t *bp, void *buf, char *stack);
76
77 /*
78 * Our own version of bcmp().
79 */
80 static int
zfs_bcmp(const void * s1,const void * s2,size_t n)81 zfs_bcmp(const void *s1, const void *s2, size_t n)
82 {
83 const uchar_t *ps1 = s1;
84 const uchar_t *ps2 = s2;
85
86 if (s1 != s2 && n != 0) {
87 do {
88 if (*ps1++ != *ps2++)
89 return (1);
90 } while (--n != 0);
91 }
92
93 return (0);
94 }
95
96 /*
97 * Our own version of log2(). Same thing as highbit()-1.
98 */
99 static int
zfs_log2(uint64_t num)100 zfs_log2(uint64_t num)
101 {
102 int i = 0;
103
104 while (num > 1) {
105 i++;
106 num = num >> 1;
107 }
108
109 return (i);
110 }
111
112 /* Checksum Functions */
113 static void
zio_checksum_off(const void * buf,uint64_t size,zio_cksum_t * zcp)114 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
115 {
116 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
117 }
118
119 /* Checksum Table and Values */
120 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
121 NULL, NULL, 0, 0, "inherit",
122 NULL, NULL, 0, 0, "on",
123 zio_checksum_off, zio_checksum_off, 0, 0, "off",
124 zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "label",
125 zio_checksum_SHA256, zio_checksum_SHA256, 1, 1, "gang_header",
126 NULL, NULL, 0, 0, "zilog",
127 fletcher_2_native, fletcher_2_byteswap, 0, 0, "fletcher2",
128 fletcher_4_native, fletcher_4_byteswap, 1, 0, "fletcher4",
129 zio_checksum_SHA256, zio_checksum_SHA256, 1, 0, "SHA256",
130 NULL, NULL, 0, 0, "zilog2",
131 };
132
133 /*
134 * zio_checksum_verify: Provides support for checksum verification.
135 *
136 * Fletcher2, Fletcher4, and SHA256 are supported.
137 *
138 * Return:
139 * -1 = Failure
140 * 0 = Success
141 */
142 static int
zio_checksum_verify(blkptr_t * bp,char * data,int size)143 zio_checksum_verify(blkptr_t *bp, char *data, int size)
144 {
145 zio_cksum_t zc = bp->blk_cksum;
146 uint32_t checksum = BP_GET_CHECKSUM(bp);
147 int byteswap = BP_SHOULD_BYTESWAP(bp);
148 zio_eck_t *zec = (zio_eck_t *)(data + size) - 1;
149 zio_checksum_info_t *ci = &zio_checksum_table[checksum];
150 zio_cksum_t actual_cksum, expected_cksum;
151
152 /* byteswap is not supported */
153 if (byteswap)
154 return (-1);
155
156 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
157 return (-1);
158
159 if (ci->ci_eck) {
160 expected_cksum = zec->zec_cksum;
161 zec->zec_cksum = zc;
162 ci->ci_func[0](data, size, &actual_cksum);
163 zec->zec_cksum = expected_cksum;
164 zc = expected_cksum;
165
166 } else {
167 ci->ci_func[byteswap](data, size, &actual_cksum);
168 }
169
170 if ((actual_cksum.zc_word[0] - zc.zc_word[0]) |
171 (actual_cksum.zc_word[1] - zc.zc_word[1]) |
172 (actual_cksum.zc_word[2] - zc.zc_word[2]) |
173 (actual_cksum.zc_word[3] - zc.zc_word[3]))
174 return (-1);
175
176 return (0);
177 }
178
179 /*
180 * vdev_label_start returns the physical disk offset (in bytes) of
181 * label "l".
182 */
183 static uint64_t
vdev_label_start(uint64_t psize,int l)184 vdev_label_start(uint64_t psize, int l)
185 {
186 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
187 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
188 }
189
190 /*
191 * vdev_uberblock_compare takes two uberblock structures and returns an integer
192 * indicating the more recent of the two.
193 * Return Value = 1 if ub2 is more recent
194 * Return Value = -1 if ub1 is more recent
195 * The most recent uberblock is determined using its transaction number and
196 * timestamp. The uberblock with the highest transaction number is
197 * considered "newer". If the transaction numbers of the two blocks match, the
198 * timestamps are compared to determine the "newer" of the two.
199 */
200 static int
vdev_uberblock_compare(uberblock_t * ub1,uberblock_t * ub2)201 vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
202 {
203 if (ub1->ub_txg < ub2->ub_txg)
204 return (-1);
205 if (ub1->ub_txg > ub2->ub_txg)
206 return (1);
207
208 if (ub1->ub_timestamp < ub2->ub_timestamp)
209 return (-1);
210 if (ub1->ub_timestamp > ub2->ub_timestamp)
211 return (1);
212
213 return (0);
214 }
215
216 /*
217 * Three pieces of information are needed to verify an uberblock: the magic
218 * number, the version number, and the checksum.
219 *
220 * Currently Implemented: version number, magic number
221 * Need to Implement: checksum
222 *
223 * Return:
224 * 0 - Success
225 * -1 - Failure
226 */
227 static int
uberblock_verify(uberblock_phys_t * ub,uint64_t offset)228 uberblock_verify(uberblock_phys_t *ub, uint64_t offset)
229 {
230
231 uberblock_t *uber = &ub->ubp_uberblock;
232 blkptr_t bp;
233
234 BP_ZERO(&bp);
235 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
236 BP_SET_BYTEORDER(&bp, ZFS_HOST_BYTEORDER);
237 ZIO_SET_CHECKSUM(&bp.blk_cksum, offset, 0, 0, 0);
238
239 if (zio_checksum_verify(&bp, (char *)ub, UBERBLOCK_SIZE) != 0)
240 return (-1);
241
242 if (uber->ub_magic == UBERBLOCK_MAGIC &&
243 uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
244 return (0);
245
246 return (-1);
247 }
248
249 /*
250 * Find the best uberblock.
251 * Return:
252 * Success - Pointer to the best uberblock.
253 * Failure - NULL
254 */
255 static uberblock_phys_t *
find_bestub(uberblock_phys_t * ub_array,uint64_t sector)256 find_bestub(uberblock_phys_t *ub_array, uint64_t sector)
257 {
258 uberblock_phys_t *ubbest = NULL;
259 uint64_t offset;
260 int i;
261
262 for (i = 0; i < (VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT); i++) {
263 offset = (sector << SPA_MINBLOCKSHIFT) +
264 VDEV_UBERBLOCK_OFFSET(i);
265 if (uberblock_verify(&ub_array[i], offset) == 0) {
266 if (ubbest == NULL) {
267 ubbest = &ub_array[i];
268 } else if (vdev_uberblock_compare(
269 &(ub_array[i].ubp_uberblock),
270 &(ubbest->ubp_uberblock)) > 0) {
271 ubbest = &ub_array[i];
272 }
273 }
274 }
275
276 return (ubbest);
277 }
278
279 /*
280 * Read a block of data based on the gang block address dva,
281 * and put its data in buf.
282 *
283 * Return:
284 * 0 - success
285 * 1 - failure
286 */
287 static int
zio_read_gang(blkptr_t * bp,dva_t * dva,void * buf,char * stack)288 zio_read_gang(blkptr_t *bp, dva_t *dva, void *buf, char *stack)
289 {
290 zio_gbh_phys_t *zio_gb;
291 uint64_t offset, sector;
292 blkptr_t tmpbp;
293 int i;
294
295 zio_gb = (zio_gbh_phys_t *)stack;
296 stack += SPA_GANGBLOCKSIZE;
297 offset = DVA_GET_OFFSET(dva);
298 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
299
300 /* read in the gang block header */
301 if (devread(sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
302 grub_printf("failed to read in a gang block header\n");
303 return (1);
304 }
305
306 /* self checksuming the gang block header */
307 BP_ZERO(&tmpbp);
308 BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER);
309 BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER);
310 ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva),
311 DVA_GET_OFFSET(dva), bp->blk_birth, 0);
312 if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) {
313 grub_printf("failed to checksum a gang block header\n");
314 return (1);
315 }
316
317 for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
318 if (zio_gb->zg_blkptr[i].blk_birth == 0)
319 continue;
320
321 if (zio_read_data(&zio_gb->zg_blkptr[i], buf, stack))
322 return (1);
323 buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]);
324 }
325
326 return (0);
327 }
328
329 /*
330 * Read in a block of raw data to buf.
331 *
332 * Return:
333 * 0 - success
334 * 1 - failure
335 */
336 static int
zio_read_data(blkptr_t * bp,void * buf,char * stack)337 zio_read_data(blkptr_t *bp, void *buf, char *stack)
338 {
339 int i, psize;
340
341 psize = BP_GET_PSIZE(bp);
342
343 /* pick a good dva from the block pointer */
344 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
345 uint64_t offset, sector;
346
347 if (bp->blk_dva[i].dva_word[0] == 0 &&
348 bp->blk_dva[i].dva_word[1] == 0)
349 continue;
350
351 if (DVA_GET_GANG(&bp->blk_dva[i])) {
352 if (zio_read_gang(bp, &bp->blk_dva[i], buf, stack) == 0)
353 return (0);
354 } else {
355 /* read in a data block */
356 offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
357 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
358 if (devread(sector, 0, psize, buf))
359 return (0);
360 }
361 }
362
363 return (1);
364 }
365
366 /*
367 * Read in a block of data, verify its checksum, decompress if needed,
368 * and put the uncompressed data in buf.
369 *
370 * Return:
371 * 0 - success
372 * errnum - failure
373 */
374 static int
zio_read(blkptr_t * bp,void * buf,char * stack)375 zio_read(blkptr_t *bp, void *buf, char *stack)
376 {
377 int lsize, psize, comp;
378 char *retbuf;
379
380 comp = BP_GET_COMPRESS(bp);
381 lsize = BP_GET_LSIZE(bp);
382 psize = BP_GET_PSIZE(bp);
383
384 if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
385 (comp != ZIO_COMPRESS_OFF &&
386 decomp_table[comp].decomp_func == NULL)) {
387 grub_printf("compression algorithm not supported\n");
388 return (ERR_FSYS_CORRUPT);
389 }
390
391 if ((char *)buf < stack && ((char *)buf) + lsize > stack) {
392 grub_printf("not enough memory allocated\n");
393 return (ERR_WONT_FIT);
394 }
395
396 retbuf = buf;
397 if (comp != ZIO_COMPRESS_OFF) {
398 buf = stack;
399 stack += psize;
400 }
401
402 if (zio_read_data(bp, buf, stack)) {
403 grub_printf("zio_read_data failed\n");
404 return (ERR_FSYS_CORRUPT);
405 }
406
407 if (zio_checksum_verify(bp, buf, psize) != 0) {
408 grub_printf("checksum verification failed\n");
409 return (ERR_FSYS_CORRUPT);
410 }
411
412 if (comp != ZIO_COMPRESS_OFF)
413 decomp_table[comp].decomp_func(buf, retbuf, psize, lsize);
414
415 return (0);
416 }
417
418 /*
419 * Get the block from a block id.
420 * push the block onto the stack.
421 *
422 * Return:
423 * 0 - success
424 * errnum - failure
425 */
426 static int
dmu_read(dnode_phys_t * dn,uint64_t blkid,void * buf,char * stack)427 dmu_read(dnode_phys_t *dn, uint64_t blkid, void *buf, char *stack)
428 {
429 int idx, level;
430 blkptr_t *bp_array = dn->dn_blkptr;
431 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
432 blkptr_t *bp, *tmpbuf;
433
434 bp = (blkptr_t *)stack;
435 stack += sizeof (blkptr_t);
436
437 tmpbuf = (blkptr_t *)stack;
438 stack += 1<<dn->dn_indblkshift;
439
440 for (level = dn->dn_nlevels - 1; level >= 0; level--) {
441 idx = (blkid >> (epbs * level)) & ((1<<epbs)-1);
442 *bp = bp_array[idx];
443 if (level == 0)
444 tmpbuf = buf;
445 if (BP_IS_HOLE(bp)) {
446 grub_memset(buf, 0,
447 dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
448 break;
449 } else if (errnum = zio_read(bp, tmpbuf, stack)) {
450 return (errnum);
451 }
452
453 bp_array = tmpbuf;
454 }
455
456 return (0);
457 }
458
459 /*
460 * mzap_lookup: Looks up property described by "name" and returns the value
461 * in "value".
462 *
463 * Return:
464 * 0 - success
465 * errnum - failure
466 */
467 static int
mzap_lookup(mzap_phys_t * zapobj,int objsize,char * name,uint64_t * value)468 mzap_lookup(mzap_phys_t *zapobj, int objsize, char *name,
469 uint64_t *value)
470 {
471 int i, chunks;
472 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
473
474 chunks = objsize/MZAP_ENT_LEN - 1;
475 for (i = 0; i < chunks; i++) {
476 if (grub_strcmp(mzap_ent[i].mze_name, name) == 0) {
477 *value = mzap_ent[i].mze_value;
478 return (0);
479 }
480 }
481
482 return (ERR_FSYS_CORRUPT);
483 }
484
485 static uint64_t
zap_hash(uint64_t salt,const char * name)486 zap_hash(uint64_t salt, const char *name)
487 {
488 static uint64_t table[256];
489 const uint8_t *cp;
490 uint8_t c;
491 uint64_t crc = salt;
492
493 if (table[128] == 0) {
494 uint64_t *ct;
495 int i, j;
496 for (i = 0; i < 256; i++) {
497 for (ct = table + i, *ct = i, j = 8; j > 0; j--)
498 *ct = (*ct >> 1) ^ (-(*ct & 1) &
499 ZFS_CRC64_POLY);
500 }
501 }
502
503 if (crc == 0 || table[128] != ZFS_CRC64_POLY) {
504 errnum = ERR_FSYS_CORRUPT;
505 return (0);
506 }
507
508 for (cp = (const uint8_t *)name; (c = *cp) != '\0'; cp++)
509 crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
510
511 /*
512 * Only use 28 bits, since we need 4 bits in the cookie for the
513 * collision differentiator. We MUST use the high bits, since
514 * those are the onces that we first pay attention to when
515 * chosing the bucket.
516 */
517 crc &= ~((1ULL << (64 - 28)) - 1);
518
519 return (crc);
520 }
521
522 /*
523 * Only to be used on 8-bit arrays.
524 * array_len is actual len in bytes (not encoded le_value_length).
525 * buf is null-terminated.
526 */
527 static int
zap_leaf_array_equal(zap_leaf_phys_t * l,int blksft,int chunk,int array_len,const char * buf)528 zap_leaf_array_equal(zap_leaf_phys_t *l, int blksft, int chunk,
529 int array_len, const char *buf)
530 {
531 int bseen = 0;
532
533 while (bseen < array_len) {
534 struct zap_leaf_array *la =
535 &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
536 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
537
538 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
539 return (0);
540
541 if (zfs_bcmp(la->la_array, buf + bseen, toread) != 0)
542 break;
543 chunk = la->la_next;
544 bseen += toread;
545 }
546 return (bseen == array_len);
547 }
548
549 /*
550 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
551 * value for the property "name".
552 *
553 * Return:
554 * 0 - success
555 * errnum - failure
556 */
557 static int
zap_leaf_lookup(zap_leaf_phys_t * l,int blksft,uint64_t h,const char * name,uint64_t * value)558 zap_leaf_lookup(zap_leaf_phys_t *l, int blksft, uint64_t h,
559 const char *name, uint64_t *value)
560 {
561 uint16_t chunk;
562 struct zap_leaf_entry *le;
563
564 /* Verify if this is a valid leaf block */
565 if (l->l_hdr.lh_block_type != ZBT_LEAF)
566 return (ERR_FSYS_CORRUPT);
567 if (l->l_hdr.lh_magic != ZAP_LEAF_MAGIC)
568 return (ERR_FSYS_CORRUPT);
569
570 for (chunk = l->l_hash[LEAF_HASH(blksft, h)];
571 chunk != CHAIN_END; chunk = le->le_next) {
572
573 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
574 return (ERR_FSYS_CORRUPT);
575
576 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
577
578 /* Verify the chunk entry */
579 if (le->le_type != ZAP_CHUNK_ENTRY)
580 return (ERR_FSYS_CORRUPT);
581
582 if (le->le_hash != h)
583 continue;
584
585 if (zap_leaf_array_equal(l, blksft, le->le_name_chunk,
586 le->le_name_length, name)) {
587
588 struct zap_leaf_array *la;
589 uint8_t *ip;
590
591 if (le->le_int_size != 8 || le->le_value_length != 1)
592 return (ERR_FSYS_CORRUPT);
593
594 /* get the uint64_t property value */
595 la = &ZAP_LEAF_CHUNK(l, blksft,
596 le->le_value_chunk).l_array;
597 ip = la->la_array;
598
599 *value = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
600 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
601 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
602 (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
603
604 return (0);
605 }
606 }
607
608 return (ERR_FSYS_CORRUPT);
609 }
610
611 /*
612 * Fat ZAP lookup
613 *
614 * Return:
615 * 0 - success
616 * errnum - failure
617 */
618 static int
fzap_lookup(dnode_phys_t * zap_dnode,zap_phys_t * zap,char * name,uint64_t * value,char * stack)619 fzap_lookup(dnode_phys_t *zap_dnode, zap_phys_t *zap,
620 char *name, uint64_t *value, char *stack)
621 {
622 zap_leaf_phys_t *l;
623 uint64_t hash, idx, blkid;
624 int blksft = zfs_log2(zap_dnode->dn_datablkszsec << DNODE_SHIFT);
625
626 /* Verify if this is a fat zap header block */
627 if (zap->zap_magic != (uint64_t)ZAP_MAGIC ||
628 zap->zap_flags != 0)
629 return (ERR_FSYS_CORRUPT);
630
631 hash = zap_hash(zap->zap_salt, name);
632 if (errnum)
633 return (errnum);
634
635 /* get block id from index */
636 if (zap->zap_ptrtbl.zt_numblks != 0) {
637 /* external pointer tables not supported */
638 return (ERR_FSYS_CORRUPT);
639 }
640 idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
641 blkid = ((uint64_t *)zap)[idx + (1<<(blksft-3-1))];
642
643 /* Get the leaf block */
644 l = (zap_leaf_phys_t *)stack;
645 stack += 1<<blksft;
646 if ((1<<blksft) < sizeof (zap_leaf_phys_t))
647 return (ERR_FSYS_CORRUPT);
648 if (errnum = dmu_read(zap_dnode, blkid, l, stack))
649 return (errnum);
650
651 return (zap_leaf_lookup(l, blksft, hash, name, value));
652 }
653
654 /*
655 * Read in the data of a zap object and find the value for a matching
656 * property name.
657 *
658 * Return:
659 * 0 - success
660 * errnum - failure
661 */
662 static int
zap_lookup(dnode_phys_t * zap_dnode,char * name,uint64_t * val,char * stack)663 zap_lookup(dnode_phys_t *zap_dnode, char *name, uint64_t *val, char *stack)
664 {
665 uint64_t block_type;
666 int size;
667 void *zapbuf;
668
669 /* Read in the first block of the zap object data. */
670 zapbuf = stack;
671 size = zap_dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
672 stack += size;
673
674 if (errnum = dmu_read(zap_dnode, 0, zapbuf, stack))
675 return (errnum);
676
677 block_type = *((uint64_t *)zapbuf);
678
679 if (block_type == ZBT_MICRO) {
680 return (mzap_lookup(zapbuf, size, name, val));
681 } else if (block_type == ZBT_HEADER) {
682 /* this is a fat zap */
683 return (fzap_lookup(zap_dnode, zapbuf, name,
684 val, stack));
685 }
686
687 return (ERR_FSYS_CORRUPT);
688 }
689
690 /*
691 * Get the dnode of an object number from the metadnode of an object set.
692 *
693 * Input
694 * mdn - metadnode to get the object dnode
695 * objnum - object number for the object dnode
696 * buf - data buffer that holds the returning dnode
697 * stack - scratch area
698 *
699 * Return:
700 * 0 - success
701 * errnum - failure
702 */
703 static int
dnode_get(dnode_phys_t * mdn,uint64_t objnum,uint8_t type,dnode_phys_t * buf,char * stack)704 dnode_get(dnode_phys_t *mdn, uint64_t objnum, uint8_t type, dnode_phys_t *buf,
705 char *stack)
706 {
707 uint64_t blkid, blksz; /* the block id this object dnode is in */
708 int epbs; /* shift of number of dnodes in a block */
709 int idx; /* index within a block */
710 dnode_phys_t *dnbuf;
711
712 blksz = mdn->dn_datablkszsec << SPA_MINBLOCKSHIFT;
713 epbs = zfs_log2(blksz) - DNODE_SHIFT;
714 blkid = objnum >> epbs;
715 idx = objnum & ((1<<epbs)-1);
716
717 if (dnode_buf != NULL && dnode_mdn == mdn &&
718 objnum >= dnode_start && objnum < dnode_end) {
719 grub_memmove(buf, &dnode_buf[idx], DNODE_SIZE);
720 VERIFY_DN_TYPE(buf, type);
721 return (0);
722 }
723
724 if (dnode_buf && blksz == 1<<DNODE_BLOCK_SHIFT) {
725 dnbuf = dnode_buf;
726 dnode_mdn = mdn;
727 dnode_start = blkid << epbs;
728 dnode_end = (blkid + 1) << epbs;
729 } else {
730 dnbuf = (dnode_phys_t *)stack;
731 stack += blksz;
732 }
733
734 if (errnum = dmu_read(mdn, blkid, (char *)dnbuf, stack))
735 return (errnum);
736
737 grub_memmove(buf, &dnbuf[idx], DNODE_SIZE);
738 VERIFY_DN_TYPE(buf, type);
739
740 return (0);
741 }
742
743 /*
744 * Check if this is a special file that resides at the top
745 * dataset of the pool. Currently this is the GRUB menu,
746 * boot signature and boot signature backup.
747 * str starts with '/'.
748 */
749 static int
is_top_dataset_file(char * str)750 is_top_dataset_file(char *str)
751 {
752 char *tptr;
753
754 if ((tptr = grub_strstr(str, "menu.lst")) &&
755 (tptr[8] == '\0' || tptr[8] == ' ') &&
756 *(tptr-1) == '/')
757 return (1);
758
759 if (grub_strncmp(str, BOOTSIGN_DIR"/",
760 grub_strlen(BOOTSIGN_DIR) + 1) == 0)
761 return (1);
762
763 if (grub_strcmp(str, BOOTSIGN_BACKUP) == 0)
764 return (1);
765
766 return (0);
767 }
768
769 /*
770 * Get the file dnode for a given file name where mdn is the meta dnode
771 * for this ZFS object set. When found, place the file dnode in dn.
772 * The 'path' argument will be mangled.
773 *
774 * Return:
775 * 0 - success
776 * errnum - failure
777 */
778 static int
dnode_get_path(dnode_phys_t * mdn,char * path,dnode_phys_t * dn,char * stack)779 dnode_get_path(dnode_phys_t *mdn, char *path, dnode_phys_t *dn,
780 char *stack)
781 {
782 uint64_t objnum, version;
783 char *cname, ch;
784
785 if (errnum = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
786 dn, stack))
787 return (errnum);
788
789 if (errnum = zap_lookup(dn, ZPL_VERSION_STR, &version, stack))
790 return (errnum);
791 if (version > ZPL_VERSION)
792 return (-1);
793
794 if (errnum = zap_lookup(dn, ZFS_ROOT_OBJ, &objnum, stack))
795 return (errnum);
796
797 if (errnum = dnode_get(mdn, objnum, DMU_OT_DIRECTORY_CONTENTS,
798 dn, stack))
799 return (errnum);
800
801 /* skip leading slashes */
802 while (*path == '/')
803 path++;
804
805 while (*path && !isspace(*path)) {
806
807 /* get the next component name */
808 cname = path;
809 while (*path && !isspace(*path) && *path != '/')
810 path++;
811 ch = *path;
812 *path = 0; /* ensure null termination */
813
814 if (errnum = zap_lookup(dn, cname, &objnum, stack))
815 return (errnum);
816
817 objnum = ZFS_DIRENT_OBJ(objnum);
818 if (errnum = dnode_get(mdn, objnum, 0, dn, stack))
819 return (errnum);
820
821 *path = ch;
822 while (*path == '/')
823 path++;
824 }
825
826 /* We found the dnode for this file. Verify if it is a plain file. */
827 VERIFY_DN_TYPE(dn, DMU_OT_PLAIN_FILE_CONTENTS);
828
829 return (0);
830 }
831
832 /*
833 * Get the default 'bootfs' property value from the rootpool.
834 *
835 * Return:
836 * 0 - success
837 * errnum -failure
838 */
839 static int
get_default_bootfsobj(dnode_phys_t * mosmdn,uint64_t * obj,char * stack)840 get_default_bootfsobj(dnode_phys_t *mosmdn, uint64_t *obj, char *stack)
841 {
842 uint64_t objnum = 0;
843 dnode_phys_t *dn = (dnode_phys_t *)stack;
844 stack += DNODE_SIZE;
845
846 if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
847 DMU_OT_OBJECT_DIRECTORY, dn, stack))
848 return (errnum);
849
850 /*
851 * find the object number for 'pool_props', and get the dnode
852 * of the 'pool_props'.
853 */
854 if (zap_lookup(dn, DMU_POOL_PROPS, &objnum, stack))
855 return (ERR_FILESYSTEM_NOT_FOUND);
856
857 if (errnum = dnode_get(mosmdn, objnum, DMU_OT_POOL_PROPS, dn, stack))
858 return (errnum);
859
860 if (zap_lookup(dn, ZPOOL_PROP_BOOTFS, &objnum, stack))
861 return (ERR_FILESYSTEM_NOT_FOUND);
862
863 if (!objnum)
864 return (ERR_FILESYSTEM_NOT_FOUND);
865
866 *obj = objnum;
867 return (0);
868 }
869
870 /*
871 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
872 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
873 * of pool/rootfs.
874 *
875 * If no fsname and no obj are given, return the DSL_DIR metadnode.
876 * If fsname is given, return its metadnode and its matching object number.
877 * If only obj is given, return the metadnode for this object number.
878 *
879 * Return:
880 * 0 - success
881 * errnum - failure
882 */
883 static int
get_objset_mdn(dnode_phys_t * mosmdn,char * fsname,uint64_t * obj,dnode_phys_t * mdn,char * stack)884 get_objset_mdn(dnode_phys_t *mosmdn, char *fsname, uint64_t *obj,
885 dnode_phys_t *mdn, char *stack)
886 {
887 uint64_t objnum, headobj;
888 char *cname, ch;
889 blkptr_t *bp;
890 objset_phys_t *osp;
891 int issnapshot = 0;
892 char *snapname;
893
894 if (fsname == NULL && obj) {
895 headobj = *obj;
896 goto skip;
897 }
898
899 if (errnum = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
900 DMU_OT_OBJECT_DIRECTORY, mdn, stack))
901 return (errnum);
902
903 if (errnum = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum,
904 stack))
905 return (errnum);
906
907 if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, stack))
908 return (errnum);
909
910 if (fsname == NULL) {
911 headobj =
912 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
913 goto skip;
914 }
915
916 /* take out the pool name */
917 while (*fsname && !isspace(*fsname) && *fsname != '/')
918 fsname++;
919
920 while (*fsname && !isspace(*fsname)) {
921 uint64_t childobj;
922
923 while (*fsname == '/')
924 fsname++;
925
926 cname = fsname;
927 while (*fsname && !isspace(*fsname) && *fsname != '/')
928 fsname++;
929 ch = *fsname;
930 *fsname = 0;
931
932 snapname = cname;
933 while (*snapname && !isspace(*snapname) && *snapname != '@')
934 snapname++;
935 if (*snapname == '@') {
936 issnapshot = 1;
937 *snapname = 0;
938 }
939 childobj =
940 ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
941 if (errnum = dnode_get(mosmdn, childobj,
942 DMU_OT_DSL_DIR_CHILD_MAP, mdn, stack))
943 return (errnum);
944
945 if (zap_lookup(mdn, cname, &objnum, stack))
946 return (ERR_FILESYSTEM_NOT_FOUND);
947
948 if (errnum = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR,
949 mdn, stack))
950 return (errnum);
951
952 *fsname = ch;
953 if (issnapshot)
954 *snapname = '@';
955 }
956 headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
957 if (obj)
958 *obj = headobj;
959
960 skip:
961 if (errnum = dnode_get(mosmdn, headobj, DMU_OT_DSL_DATASET, mdn, stack))
962 return (errnum);
963 if (issnapshot) {
964 uint64_t snapobj;
965
966 snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))->
967 ds_snapnames_zapobj;
968
969 if (errnum = dnode_get(mosmdn, snapobj,
970 DMU_OT_DSL_DS_SNAP_MAP, mdn, stack))
971 return (errnum);
972 if (zap_lookup(mdn, snapname + 1, &headobj, stack))
973 return (ERR_FILESYSTEM_NOT_FOUND);
974 if (errnum = dnode_get(mosmdn, headobj,
975 DMU_OT_DSL_DATASET, mdn, stack))
976 return (errnum);
977 if (obj)
978 *obj = headobj;
979 }
980
981 bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
982 osp = (objset_phys_t *)stack;
983 stack += sizeof (objset_phys_t);
984 if (errnum = zio_read(bp, osp, stack))
985 return (errnum);
986
987 grub_memmove((char *)mdn, (char *)&osp->os_meta_dnode, DNODE_SIZE);
988
989 return (0);
990 }
991
992 /*
993 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
994 *
995 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
996 *
997 * encoding method/host endian (4 bytes)
998 * nvl_version (4 bytes)
999 * nvl_nvflag (4 bytes)
1000 * encoded nvpairs:
1001 * encoded size of the nvpair (4 bytes)
1002 * decoded size of the nvpair (4 bytes)
1003 * name string size (4 bytes)
1004 * name string data (sizeof(NV_ALIGN4(string))
1005 * data type (4 bytes)
1006 * # of elements in the nvpair (4 bytes)
1007 * data
1008 * 2 zero's for the last nvpair
1009 * (end of the entire list) (8 bytes)
1010 *
1011 * Return:
1012 * 0 - success
1013 * 1 - failure
1014 */
1015 static int
nvlist_unpack(char * nvlist,char ** out)1016 nvlist_unpack(char *nvlist, char **out)
1017 {
1018 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1019 if (nvlist[0] != NV_ENCODE_XDR || nvlist[1] != HOST_ENDIAN)
1020 return (1);
1021
1022 nvlist += 4;
1023 *out = nvlist;
1024 return (0);
1025 }
1026
1027 static char *
nvlist_array(char * nvlist,int index)1028 nvlist_array(char *nvlist, int index)
1029 {
1030 int i, encode_size;
1031
1032 for (i = 0; i < index; i++) {
1033 /* skip the header, nvl_version, and nvl_nvflag */
1034 nvlist = nvlist + 4 * 2;
1035
1036 while (encode_size = BSWAP_32(*(uint32_t *)nvlist))
1037 nvlist += encode_size; /* goto the next nvpair */
1038
1039 nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1040 }
1041
1042 return (nvlist);
1043 }
1044
1045 static int
nvlist_lookup_value(char * nvlist,char * name,void * val,int valtype,int * nelmp)1046 nvlist_lookup_value(char *nvlist, char *name, void *val, int valtype,
1047 int *nelmp)
1048 {
1049 int name_len, type, slen, encode_size;
1050 char *nvpair, *nvp_name, *strval = val;
1051 uint64_t *intval = val;
1052
1053 /* skip the header, nvl_version, and nvl_nvflag */
1054 nvlist = nvlist + 4 * 2;
1055
1056 /*
1057 * Loop thru the nvpair list
1058 * The XDR representation of an integer is in big-endian byte order.
1059 */
1060 while (encode_size = BSWAP_32(*(uint32_t *)nvlist)) {
1061
1062 nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1063
1064 name_len = BSWAP_32(*(uint32_t *)nvpair);
1065 nvpair += 4;
1066
1067 nvp_name = nvpair;
1068 nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1069
1070 type = BSWAP_32(*(uint32_t *)nvpair);
1071 nvpair += 4;
1072
1073 if ((grub_strncmp(nvp_name, name, name_len) == 0) &&
1074 type == valtype) {
1075 int nelm;
1076
1077 if ((nelm = BSWAP_32(*(uint32_t *)nvpair)) < 1)
1078 return (1);
1079 nvpair += 4;
1080
1081 switch (valtype) {
1082 case DATA_TYPE_STRING:
1083 slen = BSWAP_32(*(uint32_t *)nvpair);
1084 nvpair += 4;
1085 grub_memmove(strval, nvpair, slen);
1086 strval[slen] = '\0';
1087 return (0);
1088
1089 case DATA_TYPE_UINT64:
1090 *intval = BSWAP_64(*(uint64_t *)nvpair);
1091 return (0);
1092
1093 case DATA_TYPE_NVLIST:
1094 *(void **)val = (void *)nvpair;
1095 return (0);
1096
1097 case DATA_TYPE_NVLIST_ARRAY:
1098 *(void **)val = (void *)nvpair;
1099 if (nelmp)
1100 *nelmp = nelm;
1101 return (0);
1102 }
1103 }
1104
1105 nvlist += encode_size; /* goto the next nvpair */
1106 }
1107
1108 return (1);
1109 }
1110
1111 /*
1112 * Check if this vdev is online and is in a good state.
1113 */
1114 static int
vdev_validate(char * nv)1115 vdev_validate(char *nv)
1116 {
1117 uint64_t ival;
1118
1119 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_OFFLINE, &ival,
1120 DATA_TYPE_UINT64, NULL) == 0 ||
1121 nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
1122 DATA_TYPE_UINT64, NULL) == 0 ||
1123 nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
1124 DATA_TYPE_UINT64, NULL) == 0)
1125 return (ERR_DEV_VALUES);
1126
1127 return (0);
1128 }
1129
1130 /*
1131 * Get a valid vdev pathname/devid from the boot device.
1132 * The caller should already allocate MAXPATHLEN memory for bootpath and devid.
1133 */
1134 static int
vdev_get_bootpath(char * nv,uint64_t inguid,char * devid,char * bootpath,int is_spare)1135 vdev_get_bootpath(char *nv, uint64_t inguid, char *devid, char *bootpath,
1136 int is_spare)
1137 {
1138 char type[16];
1139
1140 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
1141 NULL))
1142 return (ERR_FSYS_CORRUPT);
1143
1144 if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1145 uint64_t guid;
1146
1147 if (vdev_validate(nv) != 0)
1148 return (ERR_NO_BOOTPATH);
1149
1150 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID,
1151 &guid, DATA_TYPE_UINT64, NULL) != 0)
1152 return (ERR_NO_BOOTPATH);
1153
1154 if (guid != inguid)
1155 return (ERR_NO_BOOTPATH);
1156
1157 /* for a spare vdev, pick the disk labeled with "is_spare" */
1158 if (is_spare) {
1159 uint64_t spare = 0;
1160 (void) nvlist_lookup_value(nv, ZPOOL_CONFIG_IS_SPARE,
1161 &spare, DATA_TYPE_UINT64, NULL);
1162 if (!spare)
1163 return (ERR_NO_BOOTPATH);
1164 }
1165
1166 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
1167 bootpath, DATA_TYPE_STRING, NULL) != 0)
1168 bootpath[0] = '\0';
1169
1170 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_DEVID,
1171 devid, DATA_TYPE_STRING, NULL) != 0)
1172 devid[0] = '\0';
1173
1174 if (strlen(bootpath) >= MAXPATHLEN ||
1175 strlen(devid) >= MAXPATHLEN)
1176 return (ERR_WONT_FIT);
1177
1178 return (0);
1179
1180 } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
1181 strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
1182 (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
1183 int nelm, i;
1184 char *child;
1185
1186 if (nvlist_lookup_value(nv, ZPOOL_CONFIG_CHILDREN, &child,
1187 DATA_TYPE_NVLIST_ARRAY, &nelm))
1188 return (ERR_FSYS_CORRUPT);
1189
1190 for (i = 0; i < nelm; i++) {
1191 char *child_i;
1192
1193 child_i = nvlist_array(child, i);
1194 if (vdev_get_bootpath(child_i, inguid, devid,
1195 bootpath, is_spare) == 0)
1196 return (0);
1197 }
1198 }
1199
1200 return (ERR_NO_BOOTPATH);
1201 }
1202
1203 /*
1204 * Check the disk label information and retrieve needed vdev name-value pairs.
1205 *
1206 * Return:
1207 * 0 - success
1208 * ERR_* - failure
1209 */
1210 int
check_pool_label(uint64_t sector,char * stack,char * outdevid,char * outpath,uint64_t * outguid)1211 check_pool_label(uint64_t sector, char *stack, char *outdevid,
1212 char *outpath, uint64_t *outguid)
1213 {
1214 vdev_phys_t *vdev;
1215 uint64_t pool_state, txg = 0;
1216 char *nvlist, *nv;
1217 uint64_t diskguid;
1218 uint64_t version;
1219
1220 sector += (VDEV_SKIP_SIZE >> SPA_MINBLOCKSHIFT);
1221
1222 /* Read in the vdev name-value pair list (112K). */
1223 if (devread(sector, 0, VDEV_PHYS_SIZE, stack) == 0)
1224 return (ERR_READ);
1225
1226 vdev = (vdev_phys_t *)stack;
1227 stack += sizeof (vdev_phys_t);
1228
1229 if (nvlist_unpack(vdev->vp_nvlist, &nvlist))
1230 return (ERR_FSYS_CORRUPT);
1231
1232 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_STATE, &pool_state,
1233 DATA_TYPE_UINT64, NULL))
1234 return (ERR_FSYS_CORRUPT);
1235
1236 if (pool_state == POOL_STATE_DESTROYED)
1237 return (ERR_FILESYSTEM_NOT_FOUND);
1238
1239 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_NAME,
1240 current_rootpool, DATA_TYPE_STRING, NULL))
1241 return (ERR_FSYS_CORRUPT);
1242
1243 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_TXG, &txg,
1244 DATA_TYPE_UINT64, NULL))
1245 return (ERR_FSYS_CORRUPT);
1246
1247 /* not an active device */
1248 if (txg == 0)
1249 return (ERR_NO_BOOTPATH);
1250
1251 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
1252 DATA_TYPE_UINT64, NULL))
1253 return (ERR_FSYS_CORRUPT);
1254 if (version > SPA_VERSION)
1255 return (ERR_NEWER_VERSION);
1256 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
1257 DATA_TYPE_NVLIST, NULL))
1258 return (ERR_FSYS_CORRUPT);
1259 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
1260 DATA_TYPE_UINT64, NULL))
1261 return (ERR_FSYS_CORRUPT);
1262 if (vdev_get_bootpath(nv, diskguid, outdevid, outpath, 0))
1263 return (ERR_NO_BOOTPATH);
1264 if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_POOL_GUID, outguid,
1265 DATA_TYPE_UINT64, NULL))
1266 return (ERR_FSYS_CORRUPT);
1267 return (0);
1268 }
1269
1270 /*
1271 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1272 * to the memory address MOS.
1273 *
1274 * Return:
1275 * 1 - success
1276 * 0 - failure
1277 */
1278 int
zfs_mount(void)1279 zfs_mount(void)
1280 {
1281 char *stack;
1282 int label = 0;
1283 uberblock_phys_t *ub_array, *ubbest;
1284 objset_phys_t *osp;
1285 char tmp_bootpath[MAXNAMELEN];
1286 char tmp_devid[MAXNAMELEN];
1287 uint64_t tmp_guid;
1288 uint64_t adjpl = (uint64_t)part_length << SPA_MINBLOCKSHIFT;
1289 int err = errnum; /* preserve previous errnum state */
1290
1291 /* if it's our first time here, zero the best uberblock out */
1292 if (best_drive == 0 && best_part == 0 && find_best_root) {
1293 grub_memset(¤t_uberblock, 0, sizeof (uberblock_t));
1294 pool_guid = 0;
1295 }
1296
1297 stackbase = ZFS_SCRATCH;
1298 stack = stackbase;
1299 ub_array = (uberblock_phys_t *)stack;
1300 stack += VDEV_UBERBLOCK_RING;
1301
1302 osp = (objset_phys_t *)stack;
1303 stack += sizeof (objset_phys_t);
1304 adjpl = P2ALIGN(adjpl, (uint64_t)sizeof (vdev_label_t));
1305
1306 for (label = 0; label < VDEV_LABELS; label++) {
1307
1308 /*
1309 * some eltorito stacks don't give us a size and
1310 * we end up setting the size to MAXUINT, further
1311 * some of these devices stop working once a single
1312 * read past the end has been issued. Checking
1313 * for a maximum part_length and skipping the backup
1314 * labels at the end of the slice/partition/device
1315 * avoids breaking down on such devices.
1316 */
1317 if (part_length == MAXUINT && label == 2)
1318 break;
1319
1320 uint64_t sector = vdev_label_start(adjpl,
1321 label) >> SPA_MINBLOCKSHIFT;
1322
1323 /* Read in the uberblock ring (128K). */
1324 if (devread(sector +
1325 ((VDEV_SKIP_SIZE + VDEV_PHYS_SIZE) >>
1326 SPA_MINBLOCKSHIFT), 0, VDEV_UBERBLOCK_RING,
1327 (char *)ub_array) == 0)
1328 continue;
1329
1330 if ((ubbest = find_bestub(ub_array, sector)) != NULL &&
1331 zio_read(&ubbest->ubp_uberblock.ub_rootbp, osp, stack)
1332 == 0) {
1333
1334 VERIFY_OS_TYPE(osp, DMU_OST_META);
1335
1336 if (check_pool_label(sector, stack, tmp_devid,
1337 tmp_bootpath, &tmp_guid))
1338 continue;
1339 if (pool_guid == 0)
1340 pool_guid = tmp_guid;
1341
1342 if (find_best_root && ((pool_guid != tmp_guid) ||
1343 vdev_uberblock_compare(&ubbest->ubp_uberblock,
1344 &(current_uberblock)) <= 0))
1345 continue;
1346
1347 /* Got the MOS. Save it at the memory addr MOS. */
1348 grub_memmove(MOS, &osp->os_meta_dnode, DNODE_SIZE);
1349 grub_memmove(¤t_uberblock,
1350 &ubbest->ubp_uberblock, sizeof (uberblock_t));
1351 grub_memmove(current_bootpath, tmp_bootpath,
1352 MAXNAMELEN);
1353 grub_memmove(current_devid, tmp_devid,
1354 grub_strlen(tmp_devid));
1355 is_zfs_mount = 1;
1356 return (1);
1357 }
1358 }
1359
1360 /*
1361 * While some fs impls. (tftp) rely on setting and keeping
1362 * global errnums set, others won't reset it and will break
1363 * when issuing rawreads. The goal here is to simply not
1364 * have zfs mount attempts impact the previous state.
1365 */
1366 errnum = err;
1367 return (0);
1368 }
1369
1370 /*
1371 * zfs_open() locates a file in the rootpool by following the
1372 * MOS and places the dnode of the file in the memory address DNODE.
1373 *
1374 * Return:
1375 * 1 - success
1376 * 0 - failure
1377 */
1378 int
zfs_open(char * filename)1379 zfs_open(char *filename)
1380 {
1381 char *stack;
1382 dnode_phys_t *mdn;
1383
1384 file_buf = NULL;
1385 stackbase = ZFS_SCRATCH;
1386 stack = stackbase;
1387
1388 mdn = (dnode_phys_t *)stack;
1389 stack += sizeof (dnode_phys_t);
1390
1391 dnode_mdn = NULL;
1392 dnode_buf = (dnode_phys_t *)stack;
1393 stack += 1<<DNODE_BLOCK_SHIFT;
1394
1395 /*
1396 * menu.lst is placed at the root pool filesystem level,
1397 * do not goto 'current_bootfs'.
1398 */
1399 if (is_top_dataset_file(filename)) {
1400 if (errnum = get_objset_mdn(MOS, NULL, NULL, mdn, stack))
1401 return (0);
1402
1403 current_bootfs_obj = 0;
1404 } else {
1405 if (current_bootfs[0] == '\0') {
1406 /* Get the default root filesystem object number */
1407 if (errnum = get_default_bootfsobj(MOS,
1408 ¤t_bootfs_obj, stack))
1409 return (0);
1410
1411 if (errnum = get_objset_mdn(MOS, NULL,
1412 ¤t_bootfs_obj, mdn, stack))
1413 return (0);
1414 } else {
1415 if (errnum = get_objset_mdn(MOS, current_bootfs,
1416 ¤t_bootfs_obj, mdn, stack)) {
1417 grub_memset(current_bootfs, 0, MAXNAMELEN);
1418 return (0);
1419 }
1420 }
1421 }
1422
1423 if (dnode_get_path(mdn, filename, DNODE, stack)) {
1424 errnum = ERR_FILE_NOT_FOUND;
1425 return (0);
1426 }
1427
1428 /* get the file size and set the file position to 0 */
1429
1430 /*
1431 * For DMU_OT_SA we will need to locate the SIZE attribute
1432 * attribute, which could be either in the bonus buffer
1433 * or the "spill" block.
1434 */
1435 if (DNODE->dn_bonustype == DMU_OT_SA) {
1436 sa_hdr_phys_t *sahdrp;
1437 int hdrsize;
1438
1439 if (DNODE->dn_bonuslen != 0) {
1440 sahdrp = (sa_hdr_phys_t *)DN_BONUS(DNODE);
1441 } else {
1442 if (DNODE->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
1443 blkptr_t *bp = &DNODE->dn_spill;
1444 void *buf;
1445
1446 buf = (void *)stack;
1447 stack += BP_GET_LSIZE(bp);
1448
1449 /* reset errnum to rawread() failure */
1450 errnum = 0;
1451 if (zio_read(bp, buf, stack) != 0) {
1452 return (0);
1453 }
1454 sahdrp = buf;
1455 } else {
1456 errnum = ERR_FSYS_CORRUPT;
1457 return (0);
1458 }
1459 }
1460 hdrsize = SA_HDR_SIZE(sahdrp);
1461 filemax = *(uint64_t *)((char *)sahdrp + hdrsize +
1462 SA_SIZE_OFFSET);
1463 } else {
1464 filemax = ((znode_phys_t *)DN_BONUS(DNODE))->zp_size;
1465 }
1466 filepos = 0;
1467
1468 dnode_buf = NULL;
1469 return (1);
1470 }
1471
1472 /*
1473 * zfs_read reads in the data blocks pointed by the DNODE.
1474 *
1475 * Return:
1476 * len - the length successfully read in to the buffer
1477 * 0 - failure
1478 */
1479 int
zfs_read(char * buf,int len)1480 zfs_read(char *buf, int len)
1481 {
1482 char *stack;
1483 char *tmpbuf;
1484 int blksz, length, movesize;
1485
1486 if (file_buf == NULL) {
1487 file_buf = stackbase;
1488 stackbase += SPA_MAXBLOCKSIZE;
1489 file_start = file_end = 0;
1490 }
1491 stack = stackbase;
1492
1493 /*
1494 * If offset is in memory, move it into the buffer provided and return.
1495 */
1496 if (filepos >= file_start && filepos+len <= file_end) {
1497 grub_memmove(buf, file_buf + filepos - file_start, len);
1498 filepos += len;
1499 return (len);
1500 }
1501
1502 blksz = DNODE->dn_datablkszsec << SPA_MINBLOCKSHIFT;
1503
1504 /*
1505 * Entire Dnode is too big to fit into the space available. We
1506 * will need to read it in chunks. This could be optimized to
1507 * read in as large a chunk as there is space available, but for
1508 * now, this only reads in one data block at a time.
1509 */
1510 length = len;
1511 while (length) {
1512 /*
1513 * Find requested blkid and the offset within that block.
1514 */
1515 uint64_t blkid = filepos / blksz;
1516
1517 if (errnum = dmu_read(DNODE, blkid, file_buf, stack))
1518 return (0);
1519
1520 file_start = blkid * blksz;
1521 file_end = file_start + blksz;
1522
1523 movesize = MIN(length, file_end - filepos);
1524
1525 grub_memmove(buf, file_buf + filepos - file_start,
1526 movesize);
1527 buf += movesize;
1528 length -= movesize;
1529 filepos += movesize;
1530 }
1531
1532 return (len);
1533 }
1534
1535 /*
1536 * No-Op
1537 */
1538 int
zfs_embed(int * start_sector,int needed_sectors)1539 zfs_embed(int *start_sector, int needed_sectors)
1540 {
1541 return (1);
1542 }
1543
1544 #endif /* FSYS_ZFS */
1545