1 /*- 2 * Copyright (C) 2010-2014 Nathan Whitehorn 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include <stand.h> 27 #include <sys/param.h> 28 #include <sys/boot.h> 29 #ifdef LOADER_FDT_SUPPORT 30 #include <fdt_platform.h> 31 #endif 32 33 #include <machine/cpufunc.h> 34 #include <bootstrap.h> 35 #include "host_syscall.h" 36 #include "kboot.h" 37 #include "stand.h" 38 #include <smbios.h> 39 40 struct arch_switch archsw; 41 extern void *_end; 42 43 int kboot_getdev(void **vdev, const char *devspec, const char **path); 44 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len); 45 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len); 46 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len); 47 int kboot_autoload(void); 48 static void kboot_zfs_probe(void); 49 50 extern int command_fdt_internal(int argc, char *argv[]); 51 52 #define PA_INVAL (vm_offset_t)-1 53 static vm_offset_t pa_start = PA_INVAL; 54 static vm_offset_t padding; 55 static vm_offset_t offset; 56 57 static uint64_t commit_limit; 58 static uint64_t committed_as; 59 static uint64_t mem_avail; 60 61 static void 62 memory_limits(void) 63 { 64 int fd; 65 char buf[128]; 66 67 /* 68 * To properly size the slabs, we need to find how much memory we can 69 * commit to using. commit_limit is the max, while commited_as is the 70 * current total. We can use these later to allocate the largetst amount 71 * of memory possible so we can support larger ram disks than we could 72 * by using fixed segment sizes. We also grab the memory available so 73 * we don't use more than 49% of that. 74 */ 75 fd = open("host:/proc/meminfo", O_RDONLY); 76 if (fd != -1) { 77 while (fgetstr(buf, sizeof(buf), fd) > 0) { 78 if (strncmp(buf, "MemAvailable:", 13) == 0) { 79 mem_avail = strtoll(buf + 13, NULL, 0); 80 mem_avail <<= 10; /* Units are kB */ 81 } else if (strncmp(buf, "CommitLimit:", 12) == 0) { 82 commit_limit = strtoll(buf + 13, NULL, 0); 83 commit_limit <<= 10; /* Units are kB */ 84 } else if (strncmp(buf, "Committed_AS:", 13) == 0) { 85 committed_as = strtoll(buf + 14, NULL, 0); 86 committed_as <<= 10; /* Units are kB */ 87 } 88 } 89 } else { 90 /* Otherwise, on FreeBSD host, for testing 32GB host: */ 91 mem_avail = 31ul << 30; /* 31GB free */ 92 commit_limit = mem_avail * 9 / 10; /* 90% comittable */ 93 committed_as = 20ul << 20; /* 20MB used */ 94 } 95 printf("Commit limit: %lld Committed bytes %lld Available %lld\n", 96 (long long)commit_limit, (long long)committed_as, 97 (long long)mem_avail); 98 close(fd); 99 } 100 101 /* 102 * NB: getdev should likely be identical to this most places, except maybe 103 * we should move to storing the length of the platform devdesc. 104 */ 105 int 106 kboot_getdev(void **vdev, const char *devspec, const char **path) 107 { 108 struct devdesc **dev = (struct devdesc **)vdev; 109 int rv; 110 111 /* 112 * If it looks like this is just a path and no device, go with the 113 * current device. 114 */ 115 if (devspec == NULL || strchr(devspec, ':') == NULL) { 116 if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) && 117 (path != NULL)) 118 *path = devspec; 119 return (rv); 120 } 121 122 /* 123 * Try to parse the device name off the beginning of the devspec 124 */ 125 return (devparse(dev, devspec, path)); 126 } 127 128 static int 129 parse_args(int argc, const char **argv) 130 { 131 int howto = 0; 132 133 /* 134 * When run as init, sometimes argv[0] is a EFI-ESP path, other times 135 * it's the name of the init program, and sometimes it's a placeholder 136 * string, so we exclude it here. For the other args, look for DOS-like 137 * and Unix-like absolte paths and exclude parsing it if we find that, 138 * otherwise parse it as a command arg (so looking for '-X', 'foo' or 139 * 'foo=bar'). This is a little different than EFI where it argv[0] 140 * often times is the first argument passed in. There are cases when 141 * linux-booting via EFI that we have the EFI path we used to run 142 * bootXXX.efi as the arguments to init, so we need to exclude the paths 143 * there as well. 144 */ 145 for (int i = 1; i < argc; i++) { 146 if (argv[i][0] != '\\' && argv[i][0] != '/') { 147 howto |= boot_parse_arg(argv[i]); 148 } 149 } 150 151 return (howto); 152 } 153 154 static vm_offset_t rsdp; 155 156 static vm_offset_t 157 kboot_rsdp_from_efi(void) 158 { 159 char buffer[512 + 1]; 160 char *walker, *ep; 161 162 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 163 return (0); /* Not an EFI system */ 164 ep = buffer + strlen(buffer); 165 walker = buffer; 166 while (walker < ep) { 167 if (strncmp("ACPI20=", walker, 7) == 0) 168 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 169 if (strncmp("ACPI=", walker, 5) == 0) 170 return((vm_offset_t)strtoull(walker + 5, NULL, 0)); 171 walker += strcspn(walker, "\n") + 1; 172 } 173 return (0); 174 } 175 176 static void 177 find_acpi(void) 178 { 179 rsdp = kboot_rsdp_from_efi(); 180 #if 0 /* maybe for amd64 */ 181 if (rsdp == 0) 182 rsdp = find_rsdp_arch(); 183 #endif 184 } 185 186 vm_offset_t 187 acpi_rsdp(void) 188 { 189 return (rsdp); 190 } 191 192 bool 193 has_acpi(void) 194 { 195 return rsdp != 0; 196 } 197 198 /* 199 * SMBIOS support. We map the physical memory address we get into a VA in this 200 * address space with mmap with 64k pages. Once we're done, we cleanup any 201 * mappings we made. 202 */ 203 204 #define MAX_MAP 10 205 #define PAGE (64<<10) 206 207 static struct mapping 208 { 209 uintptr_t pa; 210 caddr_t va; 211 } map[MAX_MAP]; 212 static int smbios_fd; 213 static int nmap; 214 215 caddr_t ptov(uintptr_t pa) 216 { 217 caddr_t va; 218 uintptr_t pa2; 219 struct mapping *m = map; 220 221 pa2 = rounddown(pa, PAGE); 222 for (int i = 0; i < nmap; i++, m++) { 223 if (m->pa == pa2) { 224 return (m->va + pa - m->pa); 225 } 226 } 227 if (nmap == MAX_MAP) 228 panic("Too many maps for smbios"); 229 230 /* 231 * host_mmap returns small negative numbers on errors, can't return an 232 * error here, so we have to panic. The Linux wrapper will set errno 233 * based on this and then return HOST_MAP_FAILED. Since we're calling 234 * the raw system call we have to do that ourselves. 235 */ 236 va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2); 237 if (is_linux_error((long)va)) 238 panic("smbios mmap offset %#jx failed", (uintmax_t)pa2); 239 m = &map[nmap++]; 240 m->pa = pa2; 241 m->va = va; 242 return (m->va + pa - m->pa); 243 } 244 245 static void 246 smbios_cleanup(void) 247 { 248 for (int i = 0; i < nmap; i++) { 249 host_munmap(map[i].va, PAGE); 250 } 251 } 252 253 static vm_offset_t 254 kboot_find_smbios(void) 255 { 256 char buffer[512 + 1]; 257 char *walker, *ep; 258 259 if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer))) 260 return (0); /* Not an EFI system */ 261 ep = buffer + strlen(buffer); 262 walker = buffer; 263 while (walker <= ep) { 264 if (strncmp("SMBIOS3=", walker, 8) == 0) 265 return((vm_offset_t)strtoull(walker + 8, NULL, 0)); 266 if (strncmp("SMBIOS=", walker, 7) == 0) 267 return((vm_offset_t)strtoull(walker + 7, NULL, 0)); 268 walker += strcspn(walker, "\n") + 1; 269 } 270 return (0); 271 } 272 273 static void 274 find_smbios(void) 275 { 276 char buf[40]; 277 uintptr_t pa; 278 caddr_t va; 279 280 pa = kboot_find_smbios(); 281 printf("SMBIOS at %#jx\n", (uintmax_t)pa); 282 if (pa == 0) 283 return; 284 285 snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa); 286 setenv("hint.smbios.0.mem", buf, 1); 287 smbios_fd = host_open("/dev/mem", O_RDONLY, 0); 288 if (smbios_fd < 0) { 289 printf("Can't open /dev/mem to read smbios\n"); 290 return; 291 } 292 va = ptov(pa); 293 printf("Start of smbios at pa %p va %p\n", (void *)pa, va); 294 smbios_detect(va); 295 smbios_cleanup(); 296 host_close(smbios_fd); 297 } 298 299 static void 300 parse_file(const char *fn) 301 { 302 struct stat st; 303 int fd = -1; 304 char *env = NULL; 305 306 if (stat(fn, &st) != 0) 307 return; 308 fd = open(fn, O_RDONLY); 309 if (fd == -1) 310 return; 311 env = malloc(st.st_size + 1); 312 if (env == NULL) 313 goto out; 314 if (read(fd, env, st.st_size) != st.st_size) 315 goto out; 316 env[st.st_size] = '\0'; 317 boot_parse_cmdline(env); 318 out: 319 free(env); 320 close(fd); 321 } 322 323 324 int 325 main(int argc, const char **argv) 326 { 327 void *heapbase; 328 const size_t heapsize = 64*1024*1024; 329 const char *bootdev; 330 331 archsw.arch_getdev = kboot_getdev; 332 archsw.arch_copyin = kboot_copyin; 333 archsw.arch_copyout = kboot_copyout; 334 archsw.arch_readin = kboot_readin; 335 archsw.arch_autoload = kboot_autoload; 336 archsw.arch_zfs_probe = kboot_zfs_probe; 337 338 /* Give us a sane world if we're running as init */ 339 do_init(); 340 341 /* 342 * Setup the heap, 64MB is minimum for ZFS booting 343 */ 344 heapbase = host_getmem(heapsize); 345 setheap(heapbase, heapbase + heapsize); 346 347 /* 348 * Set up console so we get error messages. 349 */ 350 cons_probe(); 351 352 /* 353 * Find acpi and smbios, if they exists. This allows command line and 354 * later scripts to override if necessary. 355 */ 356 find_acpi(); 357 find_smbios(); 358 359 /* Parse the command line args -- ignoring for now the console selection */ 360 parse_args(argc, argv); 361 362 hostfs_root = getenv("hostfs_root"); 363 if (hostfs_root == NULL) 364 hostfs_root = "/"; 365 366 /* Initialize all the devices */ 367 devinit(); 368 369 /* Figure out where we're booting from */ 370 bootdev = getenv("bootdev"); 371 if (bootdev == NULL) 372 bootdev = hostdisk_gen_probe(); 373 #if defined(LOADER_ZFS_SUPPORT) 374 if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) { 375 /* 376 * Pseudo device that says go find the right ZFS pool. This will be 377 * the first pool that we find that passes the sanity checks (eg looks 378 * like it might be vbootable) and sets currdev to the right thing based 379 * on active BEs, etc 380 */ 381 if (hostdisk_zfs_find_default()) 382 bootdev = getenv("currdev"); 383 } 384 #endif 385 if (bootdev != NULL) { 386 /* 387 * Otherwise, honor what's on the command line. If we've been 388 * given a specific ZFS partition, then we'll honor it w/o BE 389 * processing that would otherwise pick a different snapshot to 390 * boot than the default one in the pool. 391 */ 392 set_currdev(bootdev); 393 } else { 394 panic("Bootdev is still NULL"); 395 } 396 397 printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root); 398 399 printf("\n%s", bootprog_info); 400 401 setenv("LINES", "24", 1); 402 403 memory_limits(); 404 enumerate_memory_arch(); 405 406 interact(); /* doesn't return */ 407 408 return (0); 409 } 410 411 void 412 exit(int code) 413 { 414 host_exit(code); 415 __unreachable(); 416 } 417 418 void 419 delay(int usecs) 420 { 421 struct host_timeval tvi, tv; 422 uint64_t ti, t; 423 host_gettimeofday(&tvi, NULL); 424 ti = tvi.tv_sec*1000000 + tvi.tv_usec; 425 do { 426 host_gettimeofday(&tv, NULL); 427 t = tv.tv_sec*1000000 + tv.tv_usec; 428 } while (t < ti + usecs); 429 } 430 431 time_t 432 getsecs(void) 433 { 434 struct host_timeval tv; 435 host_gettimeofday(&tv, NULL); 436 return (tv.tv_sec); 437 } 438 439 time_t 440 time(time_t *tloc) 441 { 442 time_t rv; 443 444 rv = getsecs(); 445 if (tloc != NULL) 446 *tloc = rv; 447 448 return (rv); 449 } 450 451 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX]; 452 int nkexec_segments = 0; 453 454 #define SEGALIGN (1ul<<20) 455 456 static ssize_t 457 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) 458 { 459 int i = 0; 460 const size_t segsize = 64*1024*1024; 461 size_t sz, amt, l; 462 463 if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX) 464 panic("Tried to load too many kexec segments"); 465 for (i = 0; i < nkexec_segments; i++) { 466 if (dest >= (vm_offset_t)loaded_segments[i].mem && 467 dest < (vm_offset_t)loaded_segments[i].mem + 468 loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */ 469 goto out; 470 } 471 472 sz = segsize; 473 if (nkexec_segments == 0) { 474 /* how much space does this segment have */ 475 sz = space_avail(dest); 476 /* Clip to 45% of available memory (need 2 copies) */ 477 sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN)); 478 printf("limit to 45%% of mem_avail %zd\n", sz); 479 /* And only use 95% of what we can allocate */ 480 sz = MIN(sz, 481 rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN)); 482 printf("Allocating %zd MB for first segment\n", sz >> 20); 483 } 484 485 loaded_segments[nkexec_segments].buf = host_getmem(sz); 486 loaded_segments[nkexec_segments].bufsz = sz; 487 loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN); 488 loaded_segments[nkexec_segments].memsz = 0; 489 490 i = nkexec_segments; 491 nkexec_segments++; 492 493 out: 494 /* 495 * Keep track of the highest amount used in a segment 496 */ 497 amt = dest - (vm_offset_t)loaded_segments[i].mem; 498 l = min(len,loaded_segments[i].bufsz - amt); 499 *buf = loaded_segments[i].buf + amt; 500 if (amt + l > loaded_segments[i].memsz) 501 loaded_segments[i].memsz = amt + l; 502 return (l); 503 } 504 505 ssize_t 506 kboot_copyin(const void *src, vm_offset_t dest, const size_t len) 507 { 508 ssize_t segsize, remainder; 509 void *destbuf; 510 511 if (pa_start == PA_INVAL) { 512 pa_start = kboot_get_phys_load_segment(); 513 // padding = 2 << 20; /* XXX amd64: revisit this when we make it work */ 514 padding = 0; 515 offset = dest; 516 get_phys_buffer(pa_start, len, &destbuf); 517 } 518 519 remainder = len; 520 do { 521 segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf); 522 bcopy(src, destbuf, segsize); 523 remainder -= segsize; 524 src += segsize; 525 dest += segsize; 526 } while (remainder > 0); 527 528 return (len); 529 } 530 531 ssize_t 532 kboot_copyout(vm_offset_t src, void *dest, const size_t len) 533 { 534 ssize_t segsize, remainder; 535 void *srcbuf; 536 537 remainder = len; 538 do { 539 segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf); 540 bcopy(srcbuf, dest, segsize); 541 remainder -= segsize; 542 src += segsize; 543 dest += segsize; 544 } while (remainder > 0); 545 546 return (len); 547 } 548 549 ssize_t 550 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 551 { 552 void *buf; 553 size_t resid, chunk, get; 554 ssize_t got; 555 vm_offset_t p; 556 557 p = dest; 558 559 chunk = min(PAGE_SIZE, len); 560 buf = malloc(chunk); 561 if (buf == NULL) { 562 printf("kboot_readin: buf malloc failed\n"); 563 return (0); 564 } 565 566 for (resid = len; resid > 0; resid -= got, p += got) { 567 get = min(chunk, resid); 568 got = VECTX_READ(fd, buf, get); 569 if (got <= 0) { 570 if (got < 0) 571 printf("kboot_readin: read failed\n"); 572 break; 573 } 574 575 kboot_copyin(buf, p, got); 576 } 577 578 free (buf); 579 return (len - resid); 580 } 581 582 int 583 kboot_autoload(void) 584 { 585 586 return (0); 587 } 588 589 void 590 kboot_kseg_get(int *nseg, void **ptr) 591 { 592 printf("kseg_get: %d segments\n", nkexec_segments); 593 printf("VA SZ PA MEMSZ\n"); 594 printf("---------------- -------- ---------------- -----\n"); 595 for (int a = 0; a < nkexec_segments; a++) { 596 /* 597 * Truncate each segment to just what we've used in the segment, 598 * rounded up to the next page. 599 */ 600 loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE); 601 loaded_segments[a].bufsz = loaded_segments[a].memsz; 602 printf("%016jx %08jx %016jx %08jx\n", 603 (uintmax_t)loaded_segments[a].buf, 604 (uintmax_t)loaded_segments[a].bufsz, 605 (uintmax_t)loaded_segments[a].mem, 606 (uintmax_t)loaded_segments[a].memsz); 607 } 608 609 *nseg = nkexec_segments; 610 *ptr = &loaded_segments[0]; 611 } 612 613 static void 614 kboot_zfs_probe(void) 615 { 616 #if defined(LOADER_ZFS_SUPPORT) 617 /* 618 * Open all the disks and partitions we can find to see if there are ZFS 619 * pools on them. 620 */ 621 hostdisk_zfs_probe(); 622 #endif 623 } 624 625 #ifdef LOADER_FDT_SUPPORT 626 /* 627 * Since proper fdt command handling function is defined in fdt_loader_cmd.c, 628 * and declaring it as extern is in contradiction with COMMAND_SET() macro 629 * (which uses static pointer), we're defining wrapper function, which 630 * calls the proper fdt handling routine. 631 */ 632 static int 633 command_fdt(int argc, char *argv[]) 634 { 635 636 return (command_fdt_internal(argc, argv)); 637 } 638 639 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt); 640 #endif 641