1 /* $OpenBSD: softraid_amd64.c,v 1.8 2022/08/12 20:17:46 stsp Exp $ */
2
3 /*
4 * Copyright (c) 2012 Joel Sing <jsing@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/queue.h>
21 #include <sys/disklabel.h>
22 #include <sys/reboot.h>
23
24 #include <dev/biovar.h>
25 #include <dev/softraidvar.h>
26
27 #include <lib/libsa/aes_xts.h>
28 #include <lib/libsa/softraid.h>
29 #include <lib/libz/zlib.h>
30
31 #include "libsa.h"
32 #include "disk.h"
33 #include "softraid_amd64.h"
34
35 static int gpt_chk_mbr(struct dos_partition *, u_int64_t);
36 static uint64_t findopenbsd_gpt(struct sr_boot_volume *, const char **);
37
38 void
srprobe_meta_opt_load(struct sr_metadata * sm,struct sr_meta_opt_head * som)39 srprobe_meta_opt_load(struct sr_metadata *sm, struct sr_meta_opt_head *som)
40 {
41 struct sr_meta_opt_hdr *omh;
42 struct sr_meta_opt_item *omi;
43 #if 0
44 u_int8_t checksum[MD5_DIGEST_LENGTH];
45 #endif
46 int i;
47
48 /* Process optional metadata. */
49 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
50 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
51 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
52
53 #ifdef BIOS_DEBUG
54 printf("Found optional metadata of type %u, length %u\n",
55 omh->som_type, omh->som_length);
56 #endif
57
58 /* Unsupported old fixed length optional metadata. */
59 if (omh->som_length == 0) {
60 omh = (struct sr_meta_opt_hdr *)((void *)omh +
61 SR_OLD_META_OPT_SIZE);
62 continue;
63 }
64
65 /* Load variable length optional metadata. */
66 omi = alloc(sizeof(struct sr_meta_opt_item));
67 bzero(omi, sizeof(struct sr_meta_opt_item));
68 SLIST_INSERT_HEAD(som, omi, omi_link);
69 omi->omi_som = alloc(omh->som_length);
70 bzero(omi->omi_som, omh->som_length);
71 bcopy(omh, omi->omi_som, omh->som_length);
72
73 #if 0
74 /* XXX - Validate checksum. */
75 bcopy(&omi->omi_som->som_checksum, &checksum,
76 MD5_DIGEST_LENGTH);
77 bzero(&omi->omi_som->som_checksum, MD5_DIGEST_LENGTH);
78 sr_checksum(sc, omi->omi_som,
79 &omi->omi_som->som_checksum, omh->som_length);
80 if (bcmp(&checksum, &omi->omi_som->som_checksum,
81 sizeof(checksum)))
82 panic("%s: invalid optional metadata checksum",
83 DEVNAME(sc));
84 #endif
85
86 omh = (struct sr_meta_opt_hdr *)((void *)omh +
87 omh->som_length);
88 }
89 }
90
91 void
srprobe_keydisk_load(struct sr_metadata * sm)92 srprobe_keydisk_load(struct sr_metadata *sm)
93 {
94 struct sr_meta_opt_hdr *omh;
95 struct sr_meta_keydisk *skm;
96 struct sr_boot_keydisk *kd;
97 int i;
98
99 /* Process optional metadata. */
100 omh = (struct sr_meta_opt_hdr *)((u_int8_t *)(sm + 1) +
101 sizeof(struct sr_meta_chunk) * sm->ssdi.ssd_chunk_no);
102 for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
103
104 /* Unsupported old fixed length optional metadata. */
105 if (omh->som_length == 0) {
106 omh = (struct sr_meta_opt_hdr *)((void *)omh +
107 SR_OLD_META_OPT_SIZE);
108 continue;
109 }
110
111 if (omh->som_type != SR_OPT_KEYDISK) {
112 omh = (struct sr_meta_opt_hdr *)((void *)omh +
113 omh->som_length);
114 continue;
115 }
116
117 kd = alloc(sizeof(struct sr_boot_keydisk));
118 bcopy(&sm->ssdi.ssd_uuid, &kd->kd_uuid, sizeof(kd->kd_uuid));
119 skm = (struct sr_meta_keydisk*)omh;
120 bcopy(&skm->skm_maskkey, &kd->kd_key, sizeof(kd->kd_key));
121 SLIST_INSERT_HEAD(&sr_keydisks, kd, kd_link);
122 }
123 }
124
125 void
srprobe(void)126 srprobe(void)
127 {
128 struct sr_boot_volume *bv, *bv1, *bv2;
129 struct sr_boot_chunk *bc, *bc1, *bc2;
130 struct sr_meta_chunk *mc;
131 struct sr_metadata *md;
132 struct diskinfo *dip;
133 struct partition *pp;
134 int i, error, volno;
135 dev_t bsd_dev;
136 daddr_t off;
137
138 /* Probe for softraid volumes. */
139 SLIST_INIT(&sr_volumes);
140 SLIST_INIT(&sr_keydisks);
141
142 md = alloc(SR_META_SIZE * DEV_BSIZE);
143
144 TAILQ_FOREACH(dip, &disklist, list) {
145
146 /* Only check hard disks, skip those with I/O errors. */
147 if ((dip->bios_info.bios_number & 0x80) == 0 ||
148 (dip->bios_info.flags & BDI_INVALID))
149 continue;
150
151 /* Make sure disklabel has been read. */
152 if ((dip->bios_info.flags & (BDI_BADLABEL|BDI_GOODLABEL)) == 0)
153 continue;
154
155 for (i = 0; i < MAXPARTITIONS; i++) {
156
157 pp = &dip->disklabel.d_partitions[i];
158 if (pp->p_fstype != FS_RAID || pp->p_size == 0)
159 continue;
160
161 /* Read softraid metadata. */
162 bzero(md, SR_META_SIZE * DEV_BSIZE);
163 off = DL_SECTOBLK(&dip->disklabel, DL_GETPOFFSET(pp));
164 off += SR_META_OFFSET;
165 error = dip->diskio(F_READ, dip, off, SR_META_SIZE, md);
166 if (error)
167 continue;
168
169 /* Is this valid softraid metadata? */
170 if (md->ssdi.ssd_magic != SR_MAGIC)
171 continue;
172
173 /* XXX - validate checksum. */
174
175 /* Handle key disks separately... */
176 if (md->ssdi.ssd_level == SR_KEYDISK_LEVEL) {
177 srprobe_keydisk_load(md);
178 continue;
179 }
180
181 /* Locate chunk-specific metadata for this chunk. */
182 mc = (struct sr_meta_chunk *)(md + 1);
183 mc += md->ssdi.ssd_chunk_id;
184
185 bc = alloc(sizeof(struct sr_boot_chunk));
186 bc->sbc_diskinfo = dip;
187 bc->sbc_disk = dip->bios_info.bios_number;
188 bc->sbc_part = 'a' + i;
189
190 bsd_dev = dip->bios_info.bsd_dev;
191 bc->sbc_mm = MAKEBOOTDEV(B_TYPE(bsd_dev),
192 B_ADAPTOR(bsd_dev), B_CONTROLLER(bsd_dev),
193 B_UNIT(bsd_dev), bc->sbc_part - 'a');
194
195 bc->sbc_chunk_id = md->ssdi.ssd_chunk_id;
196 bc->sbc_ondisk = md->ssd_ondisk;
197 bc->sbc_state = mc->scm_status;
198
199 SLIST_FOREACH(bv, &sr_volumes, sbv_link) {
200 if (bcmp(&md->ssdi.ssd_uuid, &bv->sbv_uuid,
201 sizeof(md->ssdi.ssd_uuid)) == 0)
202 break;
203 }
204
205 if (bv == NULL) {
206 bv = alloc(sizeof(struct sr_boot_volume));
207 bzero(bv, sizeof(struct sr_boot_volume));
208 bv->sbv_level = md->ssdi.ssd_level;
209 bv->sbv_volid = md->ssdi.ssd_volid;
210 bv->sbv_chunk_no = md->ssdi.ssd_chunk_no;
211 bv->sbv_flags = md->ssdi.ssd_vol_flags;
212 bv->sbv_size = md->ssdi.ssd_size;
213 bv->sbv_secsize = md->ssdi.ssd_secsize;
214 bv->sbv_data_blkno = md->ssd_data_blkno;
215 bcopy(&md->ssdi.ssd_uuid, &bv->sbv_uuid,
216 sizeof(md->ssdi.ssd_uuid));
217 SLIST_INIT(&bv->sbv_chunks);
218 SLIST_INIT(&bv->sbv_meta_opt);
219
220 /* Load optional metadata for this volume. */
221 srprobe_meta_opt_load(md, &bv->sbv_meta_opt);
222
223 /* Maintain volume order. */
224 bv2 = NULL;
225 SLIST_FOREACH(bv1, &sr_volumes, sbv_link) {
226 if (bv1->sbv_volid > bv->sbv_volid)
227 break;
228 bv2 = bv1;
229 }
230 if (bv2 == NULL)
231 SLIST_INSERT_HEAD(&sr_volumes, bv,
232 sbv_link);
233 else
234 SLIST_INSERT_AFTER(bv2, bv, sbv_link);
235 }
236
237 /* Maintain chunk order. */
238 bc2 = NULL;
239 SLIST_FOREACH(bc1, &bv->sbv_chunks, sbc_link) {
240 if (bc1->sbc_chunk_id > bc->sbc_chunk_id)
241 break;
242 bc2 = bc1;
243 }
244 if (bc2 == NULL)
245 SLIST_INSERT_HEAD(&bv->sbv_chunks,
246 bc, sbc_link);
247 else
248 SLIST_INSERT_AFTER(bc2, bc, sbc_link);
249
250 bv->sbv_chunks_found++;
251 }
252 }
253
254 /*
255 * Assemble RAID volumes.
256 */
257 volno = 0;
258 SLIST_FOREACH(bv, &sr_volumes, sbv_link) {
259
260 /* Skip if this is a hotspare "volume". */
261 if (bv->sbv_level == SR_HOTSPARE_LEVEL &&
262 bv->sbv_chunk_no == 1)
263 continue;
264
265 /* Determine current ondisk version. */
266 bv->sbv_ondisk = 0;
267 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
268 if (bc->sbc_ondisk > bv->sbv_ondisk)
269 bv->sbv_ondisk = bc->sbc_ondisk;
270 }
271 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link) {
272 if (bc->sbc_ondisk != bv->sbv_ondisk)
273 bc->sbc_state = BIOC_SDOFFLINE;
274 }
275
276 /* XXX - Check for duplicate chunks. */
277
278 /*
279 * Validate that volume has sufficient chunks for
280 * read-only access.
281 *
282 * XXX - check chunk states.
283 */
284 bv->sbv_state = BIOC_SVOFFLINE;
285 switch (bv->sbv_level) {
286 case 0:
287 case 'C':
288 case 'c':
289 if (bv->sbv_chunk_no == bv->sbv_chunks_found)
290 bv->sbv_state = BIOC_SVONLINE;
291 break;
292
293 case 1:
294 case 0x1C:
295 if (bv->sbv_chunk_no == bv->sbv_chunks_found)
296 bv->sbv_state = BIOC_SVONLINE;
297 else if (bv->sbv_chunks_found > 0)
298 bv->sbv_state = BIOC_SVDEGRADED;
299 break;
300 }
301
302 bv->sbv_unit = volno++;
303 if (bv->sbv_state != BIOC_SVOFFLINE)
304 printf(" sr%d%s", bv->sbv_unit,
305 bv->sbv_flags & BIOC_SCBOOTABLE ? "*" : "");
306 }
307
308 explicit_bzero(md, SR_META_SIZE * DEV_BSIZE);
309 free(md, SR_META_SIZE * DEV_BSIZE);
310 }
311
312 int
sr_strategy(struct sr_boot_volume * bv,int rw,daddr_t blk,size_t size,void * buf,size_t * rsize)313 sr_strategy(struct sr_boot_volume *bv, int rw, daddr_t blk, size_t size,
314 void *buf, size_t *rsize)
315 {
316 struct diskinfo *sr_dip, *dip;
317 struct sr_boot_chunk *bc;
318 struct aes_xts_ctx ctx;
319 size_t i, j, nsect;
320 daddr_t blkno;
321 u_char iv[8];
322 u_char *bp;
323 int err;
324
325 /* We only support read-only softraid. */
326 if (rw != F_READ)
327 return ENOTSUP;
328
329 /* Partition offset within softraid volume. */
330 sr_dip = (struct diskinfo *)bv->sbv_diskinfo;
331 blk += DL_SECTOBLK(&sr_dip->disklabel,
332 sr_dip->disklabel.d_partitions[bv->sbv_part - 'a'].p_offset);
333
334 if (bv->sbv_level == 0) {
335 return ENOTSUP;
336 } else if (bv->sbv_level == 1) {
337
338 /* Select first online chunk. */
339 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link)
340 if (bc->sbc_state == BIOC_SDONLINE)
341 break;
342 if (bc == NULL)
343 return EIO;
344
345 dip = (struct diskinfo *)bc->sbc_diskinfo;
346 dip->bsddev = bc->sbc_mm;
347 blk += bv->sbv_data_blkno;
348
349 /* XXX - If I/O failed we should try another chunk... */
350 return dip->strategy(dip, rw, blk, size, buf, rsize);
351
352 } else if (bv->sbv_level == 'C' || bv->sbv_level == 0x1C) {
353
354 /* Select first online chunk. */
355 SLIST_FOREACH(bc, &bv->sbv_chunks, sbc_link)
356 if (bc->sbc_state == BIOC_SDONLINE)
357 break;
358 if (bc == NULL)
359 return EIO;
360
361 dip = (struct diskinfo *)bc->sbc_diskinfo;
362 dip->bsddev = bc->sbc_mm;
363
364 /* XXX - select correct key. */
365 aes_xts_setkey(&ctx, (u_char *)bv->sbv_keys, 64);
366
367 nsect = (size + DEV_BSIZE - 1) / DEV_BSIZE;
368 for (i = 0; i < nsect; i++) {
369 blkno = blk + i;
370 bp = ((u_char *)buf) + i * DEV_BSIZE;
371 err = dip->strategy(dip, rw, bv->sbv_data_blkno + blkno,
372 DEV_BSIZE, bp, NULL);
373 if (err != 0)
374 return err;
375
376 bcopy(&blkno, iv, sizeof(blkno));
377 aes_xts_reinit(&ctx, iv);
378 for (j = 0; j < DEV_BSIZE; j += AES_XTS_BLOCKSIZE)
379 aes_xts_decrypt(&ctx, bp + j);
380 }
381 if (rsize != NULL)
382 *rsize = nsect * DEV_BSIZE;
383
384 return err;
385
386 } else
387 return ENOTSUP;
388 }
389
390 /*
391 * Returns 0 if the MBR with the provided partition array is a GPT protective
392 * MBR, and returns 1 otherwise. A GPT protective MBR would have one and only
393 * one MBR partition, an EFI partition that either covers the whole disk or as
394 * much of it as is possible with a 32bit size field.
395 *
396 * Taken from kern/subr_disk.c.
397 *
398 * NOTE: MS always uses a size of UINT32_MAX for the EFI partition!**
399 */
400 static int
gpt_chk_mbr(struct dos_partition * dp,u_int64_t dsize)401 gpt_chk_mbr(struct dos_partition *dp, u_int64_t dsize)
402 {
403 struct dos_partition *dp2;
404 int efi, found, i;
405 u_int32_t psize;
406
407 found = efi = 0;
408 for (dp2=dp, i=0; i < NDOSPART; i++, dp2++) {
409 if (dp2->dp_typ == DOSPTYP_UNUSED)
410 continue;
411 found++;
412 if (dp2->dp_typ != DOSPTYP_EFI)
413 continue;
414 if (letoh32(dp2->dp_start) != GPTSECTOR)
415 continue;
416 psize = letoh32(dp2->dp_size);
417 if (psize <= (dsize - GPTSECTOR) || psize == UINT32_MAX)
418 efi++;
419 }
420 if (found == 1 && efi == 1)
421 return (0);
422
423 return (1);
424 }
425
426 static uint64_t
findopenbsd_gpt(struct sr_boot_volume * bv,const char ** err)427 findopenbsd_gpt(struct sr_boot_volume *bv, const char **err)
428 {
429 struct gpt_header gh;
430 int i, part, found;
431 uint64_t lba;
432 uint32_t orig_csum, new_csum;
433 uint32_t ghsize, ghpartsize, ghpartnum, ghpartspersec;
434 uint32_t gpsectors;
435 const char openbsd_uuid_code[] = GPT_UUID_OPENBSD;
436 struct gpt_partition gp;
437 static struct uuid *openbsd_uuid = NULL, openbsd_uuid_space;
438 u_char *buf;
439
440 /* Prepare OpenBSD UUID */
441 if (openbsd_uuid == NULL) {
442 /* XXX: should be replaced by uuid_dec_be() */
443 memcpy(&openbsd_uuid_space, openbsd_uuid_code,
444 sizeof(openbsd_uuid_space));
445 openbsd_uuid_space.time_low =
446 betoh32(openbsd_uuid_space.time_low);
447 openbsd_uuid_space.time_mid =
448 betoh16(openbsd_uuid_space.time_mid);
449 openbsd_uuid_space.time_hi_and_version =
450 betoh16(openbsd_uuid_space.time_hi_and_version);
451
452 openbsd_uuid = &openbsd_uuid_space;
453 }
454
455 if (bv->sbv_secsize > 4096) {
456 *err = "disk sector > 4096 bytes\n";
457 return (-1);
458 }
459 buf = alloc(bv->sbv_secsize);
460 if (buf == NULL) {
461 *err = "out of memory\n";
462 return (-1);
463 }
464 bzero(buf, bv->sbv_secsize);
465
466 /* GPT Header */
467 lba = GPTSECTOR;
468 sr_strategy(bv, F_READ, lba * (bv->sbv_secsize / DEV_BSIZE), DEV_BSIZE,
469 buf, NULL);
470 memcpy(&gh, buf, sizeof(gh));
471
472 /* Check signature */
473 if (letoh64(gh.gh_sig) != GPTSIGNATURE) {
474 *err = "bad GPT signature\n";
475 free(buf, bv->sbv_secsize);
476 return (-1);
477 }
478
479 if (letoh32(gh.gh_rev) != GPTREVISION) {
480 *err = "bad GPT revision\n";
481 free(buf, bv->sbv_secsize);
482 return (-1);
483 }
484
485 ghsize = letoh32(gh.gh_size);
486 if (ghsize < GPTMINHDRSIZE || ghsize > sizeof(struct gpt_header)) {
487 *err = "bad GPT header size\n";
488 free(buf, bv->sbv_secsize);
489 return (-1);
490 }
491
492 /* Check checksum */
493 orig_csum = gh.gh_csum;
494 gh.gh_csum = 0;
495 new_csum = crc32(0, (unsigned char *)&gh, ghsize);
496 gh.gh_csum = orig_csum;
497 if (letoh32(orig_csum) != new_csum) {
498 *err = "bad GPT header checksum\n";
499 free(buf, bv->sbv_secsize);
500 return (-1);
501 }
502
503 lba = letoh64(gh.gh_part_lba);
504 ghpartsize = letoh32(gh.gh_part_size);
505 ghpartspersec = bv->sbv_secsize / ghpartsize;
506 ghpartnum = letoh32(gh.gh_part_num);
507 gpsectors = (ghpartnum + ghpartspersec - 1) / ghpartspersec;
508 new_csum = crc32(0L, Z_NULL, 0);
509 found = 0;
510 for (i = 0; i < gpsectors; i++, lba++) {
511 sr_strategy(bv, F_READ, lba * (bv->sbv_secsize / DEV_BSIZE),
512 bv->sbv_secsize, buf, NULL);
513 for (part = 0; part < ghpartspersec; part++) {
514 if (ghpartnum == 0)
515 break;
516 new_csum = crc32(new_csum, buf + part * sizeof(gp),
517 sizeof(gp));
518 ghpartnum--;
519 if (found)
520 continue;
521 memcpy(&gp, buf + part * sizeof(gp), sizeof(gp));
522 if (memcmp(&gp.gp_type, openbsd_uuid,
523 sizeof(struct uuid)) == 0)
524 found = 1;
525 }
526 }
527
528 free(buf, bv->sbv_secsize);
529
530 if (new_csum != letoh32(gh.gh_part_csum)) {
531 *err = "bad GPT entries checksum\n";
532 return (-1);
533 }
534 if (found)
535 return (letoh64(gp.gp_lba_start));
536
537 return (-1);
538 }
539
540 const char *
sr_getdisklabel(struct sr_boot_volume * bv,struct disklabel * label)541 sr_getdisklabel(struct sr_boot_volume *bv, struct disklabel *label)
542 {
543 struct dos_partition *dp;
544 struct dos_mbr mbr;
545 const char *err = NULL;
546 u_int start = 0;
547 char buf[DEV_BSIZE];
548 int i;
549
550 /* Check for MBR to determine partition offset. */
551 bzero(&mbr, sizeof(mbr));
552 sr_strategy(bv, F_READ, DOSBBSECTOR, sizeof(mbr), &mbr, NULL);
553 if (gpt_chk_mbr(mbr.dmbr_parts, bv->sbv_size /
554 (bv->sbv_secsize / DEV_BSIZE)) == 0) {
555 start = findopenbsd_gpt(bv, &err);
556 if (start == (u_int)-1) {
557 if (err != NULL)
558 return (err);
559 return "no OpenBSD partition\n";
560 }
561 } else if (mbr.dmbr_sign == DOSMBR_SIGNATURE) {
562
563 /* Search for OpenBSD partition */
564 for (i = 0; i < NDOSPART; i++) {
565 dp = &mbr.dmbr_parts[i];
566 if (!dp->dp_size)
567 continue;
568 if (dp->dp_typ == DOSPTYP_OPENBSD) {
569 start = dp->dp_start;
570 break;
571 }
572 }
573 }
574
575 /* Read the disklabel. */
576 sr_strategy(bv, F_READ,
577 start * (bv->sbv_secsize / DEV_BSIZE) + DOS_LABELSECTOR,
578 sizeof(struct disklabel), buf, NULL);
579
580 #ifdef BIOS_DEBUG
581 printf("sr_getdisklabel: magic %lx\n",
582 ((struct disklabel *)buf)->d_magic);
583 for (i = 0; i < MAXPARTITIONS; i++)
584 printf("part %c: type = %d, size = %d, offset = %d\n", 'a' + i,
585 (int)((struct disklabel *)buf)->d_partitions[i].p_fstype,
586 (int)((struct disklabel *)buf)->d_partitions[i].p_size,
587 (int)((struct disklabel *)buf)->d_partitions[i].p_offset);
588 #endif
589
590 /* Fill in disklabel */
591 return (getdisklabel(buf, label));
592 }
593