1 /* $NetBSD: mm.c,v 1.22 2018/06/20 11:49:37 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Maxime Villard. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "prekern.h" 32 33 #define ELFROUND 64 34 35 static const uint8_t pads[4] = { 36 [BTSEG_NONE] = 0x00, 37 [BTSEG_TEXT] = 0xCC, 38 [BTSEG_RODATA] = 0x00, 39 [BTSEG_DATA] = 0x00 40 }; 41 42 #define MM_PROT_READ 0x00 43 #define MM_PROT_WRITE 0x01 44 #define MM_PROT_EXECUTE 0x02 45 46 static const pt_entry_t protection_codes[3] = { 47 [MM_PROT_READ] = PG_RO | PG_NX, 48 [MM_PROT_WRITE] = PG_RW | PG_NX, 49 [MM_PROT_EXECUTE] = PG_RO, 50 /* RWX does not exist */ 51 }; 52 53 struct bootspace bootspace; 54 55 extern paddr_t kernpa_start, kernpa_end; 56 vaddr_t iom_base; 57 58 paddr_t pa_avail = 0; 59 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 60 61 void 62 mm_init(paddr_t first_pa) 63 { 64 pa_avail = first_pa; 65 } 66 67 static void 68 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 69 { 70 if (PTE_BASE[pl1_i(va)] & PG_V) { 71 fatal("mm_enter_pa: mapping already present"); 72 } 73 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot]; 74 } 75 76 static void 77 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 78 { 79 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot]; 80 } 81 82 static void 83 mm_flush_va(vaddr_t va) 84 { 85 asm volatile("invlpg (%0)" ::"r" (va) : "memory"); 86 } 87 88 static paddr_t 89 mm_palloc(size_t npages) 90 { 91 paddr_t pa; 92 size_t i; 93 94 /* Allocate the physical pages */ 95 pa = pa_avail; 96 pa_avail += npages * PAGE_SIZE; 97 98 /* Zero them out */ 99 for (i = 0; i < npages; i++) { 100 mm_reenter_pa(pa + i * PAGE_SIZE, tmpva, 101 MM_PROT_READ|MM_PROT_WRITE); 102 mm_flush_va(tmpva); 103 memset((void *)tmpva, 0, PAGE_SIZE); 104 } 105 106 return pa; 107 } 108 109 static bool 110 mm_pte_is_valid(pt_entry_t pte) 111 { 112 return ((pte & PG_V) != 0); 113 } 114 115 static void 116 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot) 117 { 118 size_t i, npages; 119 vaddr_t va; 120 paddr_t pa; 121 122 ASSERT(size % PAGE_SIZE == 0); 123 npages = size / PAGE_SIZE; 124 125 for (i = 0; i < npages; i++) { 126 va = startva + i * PAGE_SIZE; 127 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME); 128 mm_reenter_pa(pa, va, prot); 129 mm_flush_va(va); 130 } 131 } 132 133 void 134 mm_bootspace_mprotect(void) 135 { 136 pte_prot_t prot; 137 size_t i; 138 139 /* Remap the kernel segments with proper permissions. */ 140 for (i = 0; i < BTSPACE_NSEGS; i++) { 141 if (bootspace.segs[i].type == BTSEG_TEXT) { 142 prot = MM_PROT_READ|MM_PROT_EXECUTE; 143 } else if (bootspace.segs[i].type == BTSEG_RODATA) { 144 prot = MM_PROT_READ; 145 } else { 146 continue; 147 } 148 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot); 149 } 150 151 print_state(true, "Segments protection updated"); 152 } 153 154 static size_t 155 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz) 156 { 157 size_t npages; 158 159 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) - 160 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE)); 161 return (npages / (pgsz / PAGE_SIZE)); 162 } 163 164 static void 165 mm_map_tree(vaddr_t startva, vaddr_t endva) 166 { 167 size_t i, nL4e, nL3e, nL2e; 168 size_t L4e_idx, L3e_idx, L2e_idx; 169 paddr_t pa; 170 171 /* Build L4. */ 172 L4e_idx = pl4_i(startva); 173 nL4e = mm_nentries_range(startva, endva, NBPD_L4); 174 ASSERT(L4e_idx == 511); 175 ASSERT(nL4e == 1); 176 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) { 177 pa = mm_palloc(1); 178 L4_BASE[L4e_idx] = pa | PG_V | PG_RW; 179 } 180 181 /* Build L3. */ 182 L3e_idx = pl3_i(startva); 183 nL3e = mm_nentries_range(startva, endva, NBPD_L3); 184 for (i = 0; i < nL3e; i++) { 185 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) { 186 continue; 187 } 188 pa = mm_palloc(1); 189 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW; 190 } 191 192 /* Build L2. */ 193 L2e_idx = pl2_i(startva); 194 nL2e = mm_nentries_range(startva, endva, NBPD_L2); 195 for (i = 0; i < nL2e; i++) { 196 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) { 197 continue; 198 } 199 pa = mm_palloc(1); 200 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW; 201 } 202 } 203 204 static vaddr_t 205 mm_randva_kregion(size_t size, size_t pagesz) 206 { 207 vaddr_t sva, eva; 208 vaddr_t randva; 209 uint64_t rnd; 210 size_t i; 211 bool ok; 212 213 while (1) { 214 prng_get_rand(&rnd, sizeof(rnd)); 215 randva = rounddown(KASLR_WINDOW_BASE + 216 rnd % (KASLR_WINDOW_SIZE - size), pagesz); 217 218 /* Detect collisions */ 219 ok = true; 220 for (i = 0; i < BTSPACE_NSEGS; i++) { 221 if (bootspace.segs[i].type == BTSEG_NONE) { 222 continue; 223 } 224 sva = bootspace.segs[i].va; 225 eva = sva + bootspace.segs[i].sz; 226 227 if ((sva <= randva) && (randva < eva)) { 228 ok = false; 229 break; 230 } 231 if ((sva < randva + size) && (randva + size <= eva)) { 232 ok = false; 233 break; 234 } 235 if (randva < sva && eva < (randva + size)) { 236 ok = false; 237 break; 238 } 239 } 240 if (ok) { 241 break; 242 } 243 } 244 245 mm_map_tree(randva, randva + size); 246 247 return randva; 248 } 249 250 static paddr_t 251 bootspace_getend(void) 252 { 253 paddr_t pa, max = 0; 254 size_t i; 255 256 for (i = 0; i < BTSPACE_NSEGS; i++) { 257 if (bootspace.segs[i].type == BTSEG_NONE) { 258 continue; 259 } 260 pa = bootspace.segs[i].pa + bootspace.segs[i].sz; 261 if (pa > max) 262 max = pa; 263 } 264 265 return max; 266 } 267 268 static void 269 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz) 270 { 271 size_t i; 272 273 for (i = 0; i < BTSPACE_NSEGS; i++) { 274 if (bootspace.segs[i].type == BTSEG_NONE) { 275 bootspace.segs[i].type = type; 276 bootspace.segs[i].va = va; 277 bootspace.segs[i].pa = pa; 278 bootspace.segs[i].sz = sz; 279 return; 280 } 281 } 282 283 fatal("bootspace_addseg: segments full"); 284 } 285 286 static size_t 287 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign) 288 { 289 size_t shiftsize, offset; 290 uint64_t rnd; 291 292 if (elfalign == 0) { 293 elfalign = ELFROUND; 294 } 295 296 ASSERT(pagesz >= elfalign); 297 ASSERT(pagesz % elfalign == 0); 298 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign); 299 if (shiftsize == 0) { 300 return 0; 301 } 302 303 prng_get_rand(&rnd, sizeof(rnd)); 304 offset = roundup(rnd % shiftsize, elfalign); 305 ASSERT((va + offset) % elfalign == 0); 306 307 memmove((void *)(va + offset), (void *)va, elfsz); 308 309 return offset; 310 } 311 312 static void 313 mm_map_head(void) 314 { 315 size_t i, npages, size; 316 uint64_t rnd; 317 vaddr_t randva; 318 319 /* 320 * To get the size of the head, we give a look at the read-only 321 * mapping of the kernel we created in locore. We're identity mapped, 322 * so kernpa = kernva. 323 */ 324 size = elf_get_head_size((vaddr_t)kernpa_start); 325 npages = size / PAGE_SIZE; 326 327 prng_get_rand(&rnd, sizeof(rnd)); 328 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size), 329 PAGE_SIZE); 330 mm_map_tree(randva, randva + size); 331 332 /* Enter the area and build the ELF info */ 333 for (i = 0; i < npages; i++) { 334 mm_enter_pa(kernpa_start + i * PAGE_SIZE, 335 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 336 } 337 elf_build_head(randva); 338 339 /* Register the values in bootspace */ 340 bootspace.head.va = randva; 341 bootspace.head.pa = kernpa_start; 342 bootspace.head.sz = size; 343 } 344 345 vaddr_t 346 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign) 347 { 348 size_t i, npages, size, pagesz, offset; 349 vaddr_t randva; 350 char pad; 351 352 if (elfsz <= PAGE_SIZE) { 353 pagesz = NBPD_L1; 354 } else { 355 pagesz = NBPD_L2; 356 } 357 358 size = roundup(elfsz, pagesz); 359 randva = mm_randva_kregion(size, pagesz); 360 361 npages = size / PAGE_SIZE; 362 for (i = 0; i < npages; i++) { 363 mm_enter_pa(pa + i * PAGE_SIZE, 364 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 365 } 366 367 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign); 368 ASSERT(offset + elfsz <= size); 369 370 pad = pads[segtype]; 371 memset((void *)randva, pad, offset); 372 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset); 373 374 bootspace_addseg(segtype, randva, pa, size); 375 376 return (randva + offset); 377 } 378 379 static void 380 mm_map_boot(void) 381 { 382 size_t i, npages, size; 383 vaddr_t randva; 384 paddr_t bootpa; 385 386 /* 387 * The "boot" region is special: its page tree has a fixed size, but 388 * the number of pages entered is lower. 389 */ 390 391 /* Create the page tree */ 392 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2; 393 randva = mm_randva_kregion(size, PAGE_SIZE); 394 395 /* Enter the area and build the ELF info */ 396 bootpa = bootspace_getend(); 397 size = (pa_avail - bootpa); 398 npages = size / PAGE_SIZE; 399 for (i = 0; i < npages; i++) { 400 mm_enter_pa(bootpa + i * PAGE_SIZE, 401 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 402 } 403 elf_build_boot(randva, bootpa); 404 405 /* Enter the ISA I/O MEM */ 406 iom_base = randva + npages * PAGE_SIZE; 407 npages = IOM_SIZE / PAGE_SIZE; 408 for (i = 0; i < npages; i++) { 409 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE, 410 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 411 } 412 413 /* Register the values in bootspace */ 414 bootspace.boot.va = randva; 415 bootspace.boot.pa = bootpa; 416 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) - 417 (size_t)bootspace.boot.va; 418 419 /* Initialize the values that are located in the "boot" region */ 420 extern uint64_t PDPpaddr; 421 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 422 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa); 423 bootspace.smodule = (vaddr_t)iom_base + IOM_SIZE; 424 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 425 } 426 427 /* 428 * There is a variable number of independent regions: one head, several kernel 429 * segments, one boot. They are all mapped at random VAs. 430 * 431 * Head contains the ELF Header and ELF Section Headers, and we use them to 432 * map the rest of the regions. Head must be placed in memory *before* the 433 * other regions. 434 * 435 * At the end of this function, the bootspace structure is fully constructed. 436 */ 437 void 438 mm_map_kernel(void) 439 { 440 memset(&bootspace, 0, sizeof(bootspace)); 441 mm_map_head(); 442 print_state(true, "Head region mapped"); 443 elf_map_sections(); 444 print_state(true, "Segments mapped"); 445 mm_map_boot(); 446 print_state(true, "Boot region mapped"); 447 } 448