1 /* $NetBSD: mm.c,v 1.25 2020/02/15 10:41:25 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2017-2020 The NetBSD Foundation, Inc. All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Maxime Villard. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "prekern.h" 32 33 #define ELFROUND 64 34 35 static const uint8_t pads[4] = { 36 [BTSEG_NONE] = 0x00, 37 [BTSEG_TEXT] = 0xCC, 38 [BTSEG_RODATA] = 0x00, 39 [BTSEG_DATA] = 0x00 40 }; 41 42 #define MM_PROT_READ 0x00 43 #define MM_PROT_WRITE 0x01 44 #define MM_PROT_EXECUTE 0x02 45 46 static const pt_entry_t protection_codes[3] = { 47 [MM_PROT_READ] = PTE_NX, 48 [MM_PROT_WRITE] = PTE_W | PTE_NX, 49 [MM_PROT_EXECUTE] = 0, 50 /* RWX does not exist */ 51 }; 52 53 struct bootspace bootspace; 54 55 extern paddr_t kernpa_start, kernpa_end; 56 vaddr_t iom_base; 57 58 paddr_t pa_avail = 0; 59 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 60 61 void 62 mm_init(paddr_t first_pa) 63 { 64 pa_avail = first_pa; 65 } 66 67 static void 68 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 69 { 70 if (PTE_BASE[pl1_i(va)] & PTE_P) { 71 fatal("mm_enter_pa: mapping already present"); 72 } 73 PTE_BASE[pl1_i(va)] = pa | PTE_P | protection_codes[prot]; 74 } 75 76 static void 77 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 78 { 79 PTE_BASE[pl1_i(va)] = pa | PTE_P | protection_codes[prot]; 80 } 81 82 static void 83 mm_flush_va(vaddr_t va) 84 { 85 asm volatile("invlpg (%0)" ::"r" (va) : "memory"); 86 } 87 88 static paddr_t 89 mm_palloc(size_t npages) 90 { 91 paddr_t pa; 92 size_t i; 93 94 /* Allocate the physical pages */ 95 pa = pa_avail; 96 pa_avail += npages * PAGE_SIZE; 97 98 /* Zero them out */ 99 for (i = 0; i < npages; i++) { 100 mm_reenter_pa(pa + i * PAGE_SIZE, tmpva, 101 MM_PROT_READ|MM_PROT_WRITE); 102 mm_flush_va(tmpva); 103 memset((void *)tmpva, 0, PAGE_SIZE); 104 } 105 106 return pa; 107 } 108 109 static bool 110 mm_pte_is_valid(pt_entry_t pte) 111 { 112 return ((pte & PTE_P) != 0); 113 } 114 115 static void 116 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot) 117 { 118 size_t i, npages; 119 vaddr_t va; 120 paddr_t pa; 121 122 ASSERT(size % PAGE_SIZE == 0); 123 npages = size / PAGE_SIZE; 124 125 for (i = 0; i < npages; i++) { 126 va = startva + i * PAGE_SIZE; 127 pa = (PTE_BASE[pl1_i(va)] & PTE_FRAME); 128 mm_reenter_pa(pa, va, prot); 129 mm_flush_va(va); 130 } 131 } 132 133 void 134 mm_bootspace_mprotect(void) 135 { 136 pte_prot_t prot; 137 size_t i; 138 139 /* Remap the kernel segments with proper permissions. */ 140 for (i = 0; i < BTSPACE_NSEGS; i++) { 141 if (bootspace.segs[i].type == BTSEG_TEXT) { 142 prot = MM_PROT_READ|MM_PROT_EXECUTE; 143 } else if (bootspace.segs[i].type == BTSEG_RODATA) { 144 prot = MM_PROT_READ; 145 } else { 146 continue; 147 } 148 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot); 149 } 150 151 print_state(true, "Segments protection updated"); 152 } 153 154 static size_t 155 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz) 156 { 157 size_t npages; 158 159 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) - 160 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE)); 161 return (npages / (pgsz / PAGE_SIZE)); 162 } 163 164 static void 165 mm_map_tree(vaddr_t startva, vaddr_t endva) 166 { 167 size_t i, nL4e, nL3e, nL2e; 168 size_t L4e_idx, L3e_idx, L2e_idx; 169 paddr_t pa; 170 171 /* Build L4. */ 172 L4e_idx = pl4_i(startva); 173 nL4e = mm_nentries_range(startva, endva, NBPD_L4); 174 ASSERT(L4e_idx == 511); 175 ASSERT(nL4e == 1); 176 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) { 177 pa = mm_palloc(1); 178 L4_BASE[L4e_idx] = pa | PTE_P | PTE_W; 179 } 180 181 /* Build L3. */ 182 L3e_idx = pl3_i(startva); 183 nL3e = mm_nentries_range(startva, endva, NBPD_L3); 184 for (i = 0; i < nL3e; i++) { 185 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) { 186 continue; 187 } 188 pa = mm_palloc(1); 189 L3_BASE[L3e_idx+i] = pa | PTE_P | PTE_W; 190 } 191 192 /* Build L2. */ 193 L2e_idx = pl2_i(startva); 194 nL2e = mm_nentries_range(startva, endva, NBPD_L2); 195 for (i = 0; i < nL2e; i++) { 196 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) { 197 continue; 198 } 199 pa = mm_palloc(1); 200 L2_BASE[L2e_idx+i] = pa | PTE_P | PTE_W; 201 } 202 } 203 204 static vaddr_t 205 mm_randva_kregion(size_t size, size_t pagesz) 206 { 207 vaddr_t sva, eva; 208 vaddr_t randva; 209 uint64_t rnd; 210 size_t i; 211 bool ok; 212 213 while (1) { 214 prng_get_rand(&rnd, sizeof(rnd)); 215 randva = rounddown(KASLR_WINDOW_BASE + 216 rnd % (KASLR_WINDOW_SIZE - size), pagesz); 217 218 /* Detect collisions */ 219 ok = true; 220 for (i = 0; i < BTSPACE_NSEGS; i++) { 221 if (bootspace.segs[i].type == BTSEG_NONE) { 222 continue; 223 } 224 sva = bootspace.segs[i].va; 225 eva = sva + bootspace.segs[i].sz; 226 227 if ((sva <= randva) && (randva < eva)) { 228 ok = false; 229 break; 230 } 231 if ((sva < randva + size) && (randva + size <= eva)) { 232 ok = false; 233 break; 234 } 235 if (randva < sva && eva < (randva + size)) { 236 ok = false; 237 break; 238 } 239 } 240 if (ok) { 241 break; 242 } 243 } 244 245 mm_map_tree(randva, randva + size); 246 247 return randva; 248 } 249 250 static paddr_t 251 bootspace_getend(void) 252 { 253 paddr_t pa, max = 0; 254 size_t i; 255 256 for (i = 0; i < BTSPACE_NSEGS; i++) { 257 if (bootspace.segs[i].type == BTSEG_NONE) { 258 continue; 259 } 260 pa = bootspace.segs[i].pa + bootspace.segs[i].sz; 261 if (pa > max) 262 max = pa; 263 } 264 265 return max; 266 } 267 268 static void 269 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz) 270 { 271 size_t i; 272 273 for (i = 0; i < BTSPACE_NSEGS; i++) { 274 if (bootspace.segs[i].type == BTSEG_NONE) { 275 bootspace.segs[i].type = type; 276 bootspace.segs[i].va = va; 277 bootspace.segs[i].pa = pa; 278 bootspace.segs[i].sz = sz; 279 return; 280 } 281 } 282 283 fatal("bootspace_addseg: segments full"); 284 } 285 286 static size_t 287 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign) 288 { 289 size_t shiftsize, offset; 290 uint64_t rnd; 291 292 /* 293 * If possible, shift the segment in memory using a random offset. Once 294 * shifted the segment remains in the same page, of size pagesz. Make 295 * sure to respect the ELF alignment constraint. 296 */ 297 298 if (elfalign == 0) { 299 elfalign = ELFROUND; 300 } 301 302 ASSERT(pagesz >= elfalign); 303 ASSERT(pagesz % elfalign == 0); 304 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign); 305 if (shiftsize == 0) { 306 return 0; 307 } 308 309 prng_get_rand(&rnd, sizeof(rnd)); 310 offset = roundup(rnd % shiftsize, elfalign); 311 ASSERT((va + offset) % elfalign == 0); 312 313 memmove((void *)(va + offset), (void *)va, elfsz); 314 315 return offset; 316 } 317 318 static void 319 mm_map_head(void) 320 { 321 size_t i, npages, size; 322 uint64_t rnd; 323 vaddr_t randva; 324 325 /* 326 * The HEAD window is 1GB below the main KASLR window. This is to 327 * ensure that head always comes first in virtual memory. The reason 328 * for that is that we use (headva + sh_offset), and sh_offset is 329 * unsigned. 330 */ 331 332 /* 333 * To get the size of the head, we give a look at the read-only 334 * mapping of the kernel we created in locore. We're identity mapped, 335 * so kernpa = kernva. 336 */ 337 size = elf_get_head_size((vaddr_t)kernpa_start); 338 npages = size / PAGE_SIZE; 339 340 /* 341 * Choose a random range of VAs in the HEAD window, and create the page 342 * tree for it. 343 */ 344 prng_get_rand(&rnd, sizeof(rnd)); 345 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size), 346 PAGE_SIZE); 347 mm_map_tree(randva, randva + size); 348 349 /* Enter the area and build the ELF info */ 350 for (i = 0; i < npages; i++) { 351 mm_enter_pa(kernpa_start + i * PAGE_SIZE, 352 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 353 } 354 elf_build_head(randva); 355 356 /* Register the values in bootspace */ 357 bootspace.head.va = randva; 358 bootspace.head.pa = kernpa_start; 359 bootspace.head.sz = size; 360 } 361 362 vaddr_t 363 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign) 364 { 365 size_t i, npages, size, pagesz, offset; 366 vaddr_t randva; 367 char pad; 368 369 if (elfsz <= PAGE_SIZE) { 370 pagesz = NBPD_L1; 371 } else { 372 pagesz = NBPD_L2; 373 } 374 375 /* Create the page tree */ 376 size = roundup(elfsz, pagesz); 377 randva = mm_randva_kregion(size, pagesz); 378 379 /* Enter the segment */ 380 npages = size / PAGE_SIZE; 381 for (i = 0; i < npages; i++) { 382 mm_enter_pa(pa + i * PAGE_SIZE, 383 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 384 } 385 386 /* Shift the segment in memory */ 387 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign); 388 ASSERT(offset + elfsz <= size); 389 390 /* Fill the paddings */ 391 pad = pads[segtype]; 392 memset((void *)randva, pad, offset); 393 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset); 394 395 /* Register the bootspace information */ 396 bootspace_addseg(segtype, randva, pa, size); 397 398 return (randva + offset); 399 } 400 401 static void 402 mm_map_boot(void) 403 { 404 size_t i, npages, size; 405 vaddr_t randva; 406 paddr_t bootpa; 407 408 /* 409 * The "boot" region is special: its page tree has a fixed size, but 410 * the number of pages entered is lower. 411 */ 412 413 /* Create the page tree */ 414 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2; 415 randva = mm_randva_kregion(size, PAGE_SIZE); 416 417 /* Enter the area and build the ELF info */ 418 bootpa = bootspace_getend(); 419 size = (pa_avail - bootpa); 420 npages = size / PAGE_SIZE; 421 for (i = 0; i < npages; i++) { 422 mm_enter_pa(bootpa + i * PAGE_SIZE, 423 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 424 } 425 elf_build_boot(randva, bootpa); 426 427 /* Enter the ISA I/O MEM */ 428 iom_base = randva + npages * PAGE_SIZE; 429 npages = IOM_SIZE / PAGE_SIZE; 430 for (i = 0; i < npages; i++) { 431 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE, 432 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 433 } 434 435 /* Register the values in bootspace */ 436 bootspace.boot.va = randva; 437 bootspace.boot.pa = bootpa; 438 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) - 439 (size_t)bootspace.boot.va; 440 441 /* Initialize the values that are located in the "boot" region */ 442 extern uint64_t PDPpaddr; 443 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 444 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa); 445 bootspace.smodule = (vaddr_t)iom_base + IOM_SIZE; 446 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 447 } 448 449 /* 450 * The bootloader has set up the following layout of physical memory: 451 * +------------+-----------------+---------------+------------------+-------+ 452 * | ELF HEADER | SECTION HEADERS | KERN SECTIONS | SYM+REL SECTIONS | EXTRA | 453 * +------------+-----------------+---------------+------------------+-------+ 454 * Which we abstract into several "regions": 455 * +------------------------------+---------------+--------------------------+ 456 * | Head region | Several segs | Boot region | 457 * +------------------------------+---------------+--------------------------+ 458 * See loadfile_elf32.c:loadfile_dynamic() for the details. 459 * 460 * There is a variable number of independent regions we create: one head, 461 * several kernel segments, one boot. They are all mapped at random VAs. 462 * 463 * Head contains the ELF Header and ELF Section Headers, and we use them to 464 * map the rest of the regions. Head must be placed in both virtual memory 465 * and physical memory *before* the rest. 466 * 467 * The Kernel Sections are mapped at random VAs using individual segments 468 * in bootspace. 469 * 470 * Boot contains various information, including the ELF Sym+Rel sections, 471 * plus extra memory the prekern has used so far; it is a region that the 472 * kernel will eventually use for module_map. Boot is placed *after* the 473 * other regions in physical memory. In virtual memory however there is no 474 * constraint, so its VA is randomly selected in the main KASLR window. 475 * 476 * At the end of this function, the bootspace structure is fully constructed. 477 */ 478 void 479 mm_map_kernel(void) 480 { 481 memset(&bootspace, 0, sizeof(bootspace)); 482 mm_map_head(); 483 print_state(true, "Head region mapped"); 484 elf_map_sections(); 485 print_state(true, "Segments mapped"); 486 mm_map_boot(); 487 print_state(true, "Boot region mapped"); 488 } 489