xref: /freebsd-src/stand/kboot/kboot/main.c (revision d2434697bc9cc2d36da0f356cc5942b33df87896)
1 /*-
2  * Copyright (C) 2010-2014 Nathan Whitehorn
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include <stand.h>
27 #include <sys/param.h>
28 #include <sys/boot.h>
29 #ifdef LOADER_FDT_SUPPORT
30 #include <fdt_platform.h>
31 #endif
32 
33 #include <machine/cpufunc.h>
34 #include <bootstrap.h>
35 #include "host_syscall.h"
36 #include "kboot.h"
37 #include "stand.h"
38 #include <smbios.h>
39 
40 struct arch_switch	archsw;
41 extern void *_end;
42 
43 int kboot_getdev(void **vdev, const char *devspec, const char **path);
44 ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
45 ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
46 ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
47 int kboot_autoload(void);
48 static void kboot_zfs_probe(void);
49 
50 extern int command_fdt_internal(int argc, char *argv[]);
51 
52 #define PA_INVAL (vm_offset_t)-1
53 static vm_offset_t pa_start = PA_INVAL;
54 static vm_offset_t padding;
55 static vm_offset_t offset;
56 
57 static uint64_t commit_limit;
58 static uint64_t committed_as;
59 static uint64_t mem_avail;
60 
61 static void
62 memory_limits(void)
63 {
64 	int fd;
65 	char buf[128];
66 
67 	/*
68 	 * To properly size the slabs, we need to find how much memory we can
69 	 * commit to using. commit_limit is the max, while commited_as is the
70 	 * current total. We can use these later to allocate the largetst amount
71 	 * of memory possible so we can support larger ram disks than we could
72 	 * by using fixed segment sizes. We also grab the memory available so
73 	 * we don't use more than 49% of that.
74 	 */
75 	fd = open("host:/proc/meminfo", O_RDONLY);
76 	if (fd != -1) {
77 		while (fgetstr(buf, sizeof(buf), fd) > 0) {
78 			if (strncmp(buf, "MemAvailable:", 13) == 0) {
79 				mem_avail = strtoll(buf + 13, NULL, 0);
80 				mem_avail <<= 10; /* Units are kB */
81 			} else if (strncmp(buf, "CommitLimit:", 12) == 0) {
82 				commit_limit = strtoll(buf + 13, NULL, 0);
83 				commit_limit <<= 10; /* Units are kB */
84 			} else if (strncmp(buf, "Committed_AS:", 13) == 0) {
85 				committed_as = strtoll(buf + 14, NULL, 0);
86 				committed_as <<= 10; /* Units are kB */
87 			}
88 		}
89 	} else {
90 		/* Otherwise, on FreeBSD host, for testing 32GB host: */
91 		mem_avail = 31ul << 30;			/* 31GB free */
92 		commit_limit = mem_avail * 9 / 10;	/* 90% comittable */
93 		committed_as = 20ul << 20;		/* 20MB used */
94 	}
95 	printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
96 	    (long long)commit_limit, (long long)committed_as,
97 	    (long long)mem_avail);
98 	close(fd);
99 }
100 
101 /*
102  * NB: getdev should likely be identical to this most places, except maybe
103  * we should move to storing the length of the platform devdesc.
104  */
105 int
106 kboot_getdev(void **vdev, const char *devspec, const char **path)
107 {
108 	struct devdesc **dev = (struct devdesc **)vdev;
109 	int				rv;
110 
111 	/*
112 	 * If it looks like this is just a path and no device, go with the
113 	 * current device.
114 	 */
115 	if (devspec == NULL || strchr(devspec, ':') == NULL) {
116 		if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
117 		    (path != NULL))
118 			*path = devspec;
119 		return (rv);
120 	}
121 
122 	/*
123 	 * Try to parse the device name off the beginning of the devspec
124 	 */
125 	return (devparse(dev, devspec, path));
126 }
127 
128 static int
129 parse_args(int argc, const char **argv)
130 {
131 	int howto = 0;
132 
133 	/*
134 	 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
135 	 * it's the name of the init program, and sometimes it's a placeholder
136 	 * string, so we exclude it here. For the other args, look for DOS-like
137 	 * and Unix-like absolte paths and exclude parsing it if we find that,
138 	 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
139 	 * 'foo=bar'). This is a little different than EFI where it argv[0]
140 	 * often times is the first argument passed in. There are cases when
141 	 * linux-booting via EFI that we have the EFI path we used to run
142 	 * bootXXX.efi as the arguments to init, so we need to exclude the paths
143 	 * there as well.
144 	 */
145 	for (int i = 1; i < argc; i++) {
146 		if (argv[i][0] != '\\' && argv[i][0] != '/') {
147 			howto |= boot_parse_arg(argv[i]);
148 		}
149 	}
150 
151 	return (howto);
152 }
153 
154 static vm_offset_t rsdp;
155 
156 static vm_offset_t
157 kboot_rsdp_from_efi(void)
158 {
159 	char buffer[512 + 1];
160 	char *walker, *ep;
161 
162 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
163 		return (0);	/* Not an EFI system */
164 	ep = buffer + strlen(buffer);
165 	walker = buffer;
166 	while (walker < ep) {
167 		if (strncmp("ACPI20=", walker, 7) == 0)
168 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
169 		if (strncmp("ACPI=", walker, 5) == 0)
170 			return((vm_offset_t)strtoull(walker + 5, NULL, 0));
171 		walker += strcspn(walker, "\n") + 1;
172 	}
173 	return (0);
174 }
175 
176 static void
177 find_acpi(void)
178 {
179 	rsdp = kboot_rsdp_from_efi();
180 #if 0	/* maybe for amd64 */
181 	if (rsdp == 0)
182 		rsdp = find_rsdp_arch();
183 #endif
184 }
185 
186 vm_offset_t
187 acpi_rsdp(void)
188 {
189 	return (rsdp);
190 }
191 
192 bool
193 has_acpi(void)
194 {
195 	return rsdp != 0;
196 }
197 
198 /*
199  * SMBIOS support. We map the physical memory address we get into a VA in this
200  * address space with mmap with 64k pages. Once we're done, we cleanup any
201  * mappings we made.
202  */
203 
204 #define MAX_MAP	10
205 #define PAGE	(64<<10)
206 
207 static struct mapping
208 {
209 	uintptr_t pa;
210 	caddr_t va;
211 } map[MAX_MAP];
212 static int smbios_fd;
213 static int nmap;
214 
215 caddr_t ptov(uintptr_t pa)
216 {
217 	caddr_t va;
218 	uintptr_t pa2;
219 	struct mapping *m = map;
220 
221 	pa2 = rounddown(pa, PAGE);
222 	for (int i = 0; i < nmap; i++, m++) {
223 		if (m->pa == pa2) {
224 			return (m->va + pa - m->pa);
225 		}
226 	}
227 	if (nmap == MAX_MAP)
228 		panic("Too many maps for smbios");
229 
230 	/*
231 	 * host_mmap returns small negative numbers on errors, can't return an
232 	 * error here, so we have to panic. The Linux wrapper will set errno
233 	 * based on this and then return HOST_MAP_FAILED. Since we're calling
234 	 * the raw system call we have to do that ourselves.
235 	 */
236 	va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
237 	if (is_linux_error((long)va))
238 		panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
239 	m = &map[nmap++];
240 	m->pa = pa2;
241 	m->va = va;
242 	return (m->va + pa - m->pa);
243 }
244 
245 static void
246 smbios_cleanup(void)
247 {
248 	for (int i = 0; i < nmap; i++) {
249 		host_munmap(map[i].va, PAGE);
250 	}
251 }
252 
253 static vm_offset_t
254 kboot_find_smbios(void)
255 {
256 	char buffer[512 + 1];
257 	char *walker, *ep;
258 
259 	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
260 		return (0);	/* Not an EFI system */
261 	ep = buffer + strlen(buffer);
262 	walker = buffer;
263 	while (walker <= ep) {
264 		if (strncmp("SMBIOS3=", walker, 8) == 0)
265 			return((vm_offset_t)strtoull(walker + 8, NULL, 0));
266 		if (strncmp("SMBIOS=", walker, 7) == 0)
267 			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
268 		walker += strcspn(walker, "\n") + 1;
269 	}
270 	return (0);
271 }
272 
273 static void
274 find_smbios(void)
275 {
276 	char buf[40];
277 	uintptr_t pa;
278 	caddr_t va;
279 
280 	pa = kboot_find_smbios();
281 	printf("SMBIOS at %#jx\n", (uintmax_t)pa);
282 	if (pa == 0)
283 		return;
284 
285 	snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
286 	setenv("hint.smbios.0.mem", buf, 1);
287 	smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
288 	if (smbios_fd < 0) {
289 		printf("Can't open /dev/mem to read smbios\n");
290 		return;
291 	}
292 	va = ptov(pa);
293 	printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
294 	smbios_detect(va);
295 	smbios_cleanup();
296 	host_close(smbios_fd);
297 }
298 
299 static void
300 parse_file(const char *fn)
301 {
302 	struct stat st;
303 	int fd = -1;
304 	char *env = NULL;
305 
306 	if (stat(fn, &st) != 0)
307 		return;
308 	fd = open(fn, O_RDONLY);
309 	if (fd == -1)
310 		return;
311 	env = malloc(st.st_size + 1);
312 	if (env == NULL)
313 		goto out;
314 	if (read(fd, env, st.st_size) != st.st_size)
315 		goto out;
316 	env[st.st_size] = '\0';
317 	boot_parse_cmdline(env);
318 out:
319 	free(env);
320 	close(fd);
321 }
322 
323 
324 int
325 main(int argc, const char **argv)
326 {
327 	void *heapbase;
328 	const size_t heapsize = 64*1024*1024;
329 	const char *bootdev;
330 
331 	archsw.arch_getdev = kboot_getdev;
332 	archsw.arch_copyin = kboot_copyin;
333 	archsw.arch_copyout = kboot_copyout;
334 	archsw.arch_readin = kboot_readin;
335 	archsw.arch_autoload = kboot_autoload;
336 	archsw.arch_zfs_probe = kboot_zfs_probe;
337 
338 	/* Give us a sane world if we're running as init */
339 	do_init();
340 
341 	/*
342 	 * Setup the heap, 64MB is minimum for ZFS booting
343 	 */
344 	heapbase = host_getmem(heapsize);
345 	setheap(heapbase, heapbase + heapsize);
346 
347 	/*
348 	 * Set up console so we get error messages.
349 	 */
350 	cons_probe();
351 
352 	/*
353 	 * Find acpi and smbios, if they exists. This allows command line and
354 	 * later scripts to override if necessary.
355 	 */
356 	find_acpi();
357 	find_smbios();
358 
359 	/* Parse the command line args -- ignoring for now the console selection */
360 	parse_args(argc, argv);
361 
362 	hostfs_root = getenv("hostfs_root");
363 	if (hostfs_root == NULL)
364 		hostfs_root = "/";
365 
366 	/* Initialize all the devices */
367 	devinit();
368 
369 	/* Figure out where we're booting from */
370 	bootdev = getenv("bootdev");
371 	if (bootdev == NULL)
372 		bootdev = hostdisk_gen_probe();
373 #if defined(LOADER_ZFS_SUPPORT)
374 	if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
375 		/*
376 		 * Pseudo device that says go find the right ZFS pool. This will be
377 		 * the first pool that we find that passes the sanity checks (eg looks
378 		 * like it might be vbootable) and sets currdev to the right thing based
379 		 * on active BEs, etc
380 		 */
381 		if (hostdisk_zfs_find_default())
382 			bootdev = getenv("currdev");
383 	}
384 #endif
385 	if (bootdev != NULL) {
386 		/*
387 		 * Otherwise, honor what's on the command line. If we've been
388 		 * given a specific ZFS partition, then we'll honor it w/o BE
389 		 * processing that would otherwise pick a different snapshot to
390 		 * boot than the default one in the pool.
391 		 */
392 		set_currdev(bootdev);
393 	} else {
394 		panic("Bootdev is still NULL");
395 	}
396 
397 	printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
398 
399 	printf("\n%s", bootprog_info);
400 
401 	setenv("LINES", "24", 1);
402 
403 	memory_limits();
404 	enumerate_memory_arch();
405 
406 	interact();			/* doesn't return */
407 
408 	return (0);
409 }
410 
411 void
412 exit(int code)
413 {
414 	host_exit(code);
415 	__unreachable();
416 }
417 
418 void
419 delay(int usecs)
420 {
421 	struct host_timeval tvi, tv;
422 	uint64_t ti, t;
423 	host_gettimeofday(&tvi, NULL);
424 	ti = tvi.tv_sec*1000000 + tvi.tv_usec;
425 	do {
426 		host_gettimeofday(&tv, NULL);
427 		t = tv.tv_sec*1000000 + tv.tv_usec;
428 	} while (t < ti + usecs);
429 }
430 
431 time_t
432 getsecs(void)
433 {
434 	struct host_timeval tv;
435 	host_gettimeofday(&tv, NULL);
436 	return (tv.tv_sec);
437 }
438 
439 time_t
440 time(time_t *tloc)
441 {
442 	time_t rv;
443 
444 	rv = getsecs();
445 	if (tloc != NULL)
446 		*tloc = rv;
447 
448 	return (rv);
449 }
450 
451 struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
452 int nkexec_segments = 0;
453 
454 #define SEGALIGN (1ul<<20)
455 
456 static ssize_t
457 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
458 {
459 	int i = 0;
460 	const size_t segsize = 64*1024*1024;
461 	size_t sz, amt, l;
462 
463 	if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
464 		panic("Tried to load too many kexec segments");
465 	for (i = 0; i < nkexec_segments; i++) {
466 		if (dest >= (vm_offset_t)loaded_segments[i].mem &&
467 		    dest < (vm_offset_t)loaded_segments[i].mem +
468 		    loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
469 			goto out;
470 	}
471 
472 	sz = segsize;
473 	if (nkexec_segments == 0) {
474 		/* how much space does this segment have */
475 		sz = space_avail(dest);
476 		/* Clip to 45% of available memory (need 2 copies) */
477 		sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
478 		printf("limit to 45%% of mem_avail %zd\n", sz);
479 		/* And only use 95% of what we can allocate */
480 		sz = MIN(sz,
481 		    rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
482 		printf("Allocating %zd MB for first segment\n", sz >> 20);
483 	}
484 
485 	loaded_segments[nkexec_segments].buf = host_getmem(sz);
486 	loaded_segments[nkexec_segments].bufsz = sz;
487 	loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
488 	loaded_segments[nkexec_segments].memsz = 0;
489 
490 	i = nkexec_segments;
491 	nkexec_segments++;
492 
493 out:
494 	/*
495 	 * Keep track of the highest amount used in a segment
496 	 */
497 	amt = dest - (vm_offset_t)loaded_segments[i].mem;
498 	l = min(len,loaded_segments[i].bufsz - amt);
499 	*buf = loaded_segments[i].buf + amt;
500 	if (amt + l > loaded_segments[i].memsz)
501 		loaded_segments[i].memsz = amt + l;
502 	return (l);
503 }
504 
505 ssize_t
506 kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
507 {
508 	ssize_t segsize, remainder;
509 	void *destbuf;
510 
511 	if (pa_start == PA_INVAL) {
512 		pa_start = kboot_get_phys_load_segment();
513 //		padding = 2 << 20; /* XXX amd64: revisit this when we make it work */
514 		padding = 0;
515 		offset = dest;
516 		get_phys_buffer(pa_start, len, &destbuf);
517 	}
518 
519 	remainder = len;
520 	do {
521 		segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf);
522 		bcopy(src, destbuf, segsize);
523 		remainder -= segsize;
524 		src += segsize;
525 		dest += segsize;
526 	} while (remainder > 0);
527 
528 	return (len);
529 }
530 
531 ssize_t
532 kboot_copyout(vm_offset_t src, void *dest, const size_t len)
533 {
534 	ssize_t segsize, remainder;
535 	void *srcbuf;
536 
537 	remainder = len;
538 	do {
539 		segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf);
540 		bcopy(srcbuf, dest, segsize);
541 		remainder -= segsize;
542 		src += segsize;
543 		dest += segsize;
544 	} while (remainder > 0);
545 
546 	return (len);
547 }
548 
549 ssize_t
550 kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
551 {
552 	void            *buf;
553 	size_t          resid, chunk, get;
554 	ssize_t         got;
555 	vm_offset_t     p;
556 
557 	p = dest;
558 
559 	chunk = min(PAGE_SIZE, len);
560 	buf = malloc(chunk);
561 	if (buf == NULL) {
562 		printf("kboot_readin: buf malloc failed\n");
563 		return (0);
564 	}
565 
566 	for (resid = len; resid > 0; resid -= got, p += got) {
567 		get = min(chunk, resid);
568 		got = VECTX_READ(fd, buf, get);
569 		if (got <= 0) {
570 			if (got < 0)
571 				printf("kboot_readin: read failed\n");
572 			break;
573 		}
574 
575 		kboot_copyin(buf, p, got);
576 	}
577 
578 	free (buf);
579 	return (len - resid);
580 }
581 
582 int
583 kboot_autoload(void)
584 {
585 
586 	return (0);
587 }
588 
589 void
590 kboot_kseg_get(int *nseg, void **ptr)
591 {
592 	printf("kseg_get: %d segments\n", nkexec_segments);
593 	printf("VA               SZ       PA               MEMSZ\n");
594 	printf("---------------- -------- ---------------- -----\n");
595 	for (int a = 0; a < nkexec_segments; a++) {
596 		/*
597 		 * Truncate each segment to just what we've used in the segment,
598 		 * rounded up to the next page.
599 		 */
600 		loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
601 		loaded_segments[a].bufsz = loaded_segments[a].memsz;
602 		printf("%016jx %08jx %016jx %08jx\n",
603 			(uintmax_t)loaded_segments[a].buf,
604 			(uintmax_t)loaded_segments[a].bufsz,
605 			(uintmax_t)loaded_segments[a].mem,
606 			(uintmax_t)loaded_segments[a].memsz);
607 	}
608 
609 	*nseg = nkexec_segments;
610 	*ptr = &loaded_segments[0];
611 }
612 
613 static void
614 kboot_zfs_probe(void)
615 {
616 #if defined(LOADER_ZFS_SUPPORT)
617 	/*
618 	 * Open all the disks and partitions we can find to see if there are ZFS
619 	 * pools on them.
620 	 */
621 	hostdisk_zfs_probe();
622 #endif
623 }
624 
625 #ifdef LOADER_FDT_SUPPORT
626 /*
627  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
628  * and declaring it as extern is in contradiction with COMMAND_SET() macro
629  * (which uses static pointer), we're defining wrapper function, which
630  * calls the proper fdt handling routine.
631  */
632 static int
633 command_fdt(int argc, char *argv[])
634 {
635 
636 	return (command_fdt_internal(argc, argv));
637 }
638 
639 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
640 #endif
641