1 /* $NetBSD: mm.c,v 1.20 2017/11/26 14:29:48 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2017 The NetBSD Foundation, Inc. All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Maxime Villard. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "prekern.h" 32 33 #define PAD_TEXT 0xCC 34 #define PAD_RODATA 0x00 35 #define PAD_DATA 0x00 36 37 #define ELFROUND 64 38 39 static const uint8_t pads[4] = { 40 [BTSEG_NONE] = 0x00, 41 [BTSEG_TEXT] = 0xCC, 42 [BTSEG_RODATA] = 0x00, 43 [BTSEG_DATA] = 0x00 44 }; 45 46 #define MM_PROT_READ 0x00 47 #define MM_PROT_WRITE 0x01 48 #define MM_PROT_EXECUTE 0x02 49 50 static const pt_entry_t protection_codes[3] = { 51 [MM_PROT_READ] = PG_RO | PG_NX, 52 [MM_PROT_WRITE] = PG_RW | PG_NX, 53 [MM_PROT_EXECUTE] = PG_RO, 54 /* RWX does not exist */ 55 }; 56 57 struct bootspace bootspace; 58 59 extern paddr_t kernpa_start, kernpa_end; 60 vaddr_t iom_base; 61 62 paddr_t pa_avail = 0; 63 static const vaddr_t tmpva = (PREKERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2); 64 65 void 66 mm_init(paddr_t first_pa) 67 { 68 pa_avail = first_pa; 69 } 70 71 static void 72 mm_enter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 73 { 74 if (PTE_BASE[pl1_i(va)] & PG_V) { 75 fatal("mm_enter_pa: mapping already present"); 76 } 77 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot]; 78 } 79 80 static void 81 mm_reenter_pa(paddr_t pa, vaddr_t va, pte_prot_t prot) 82 { 83 PTE_BASE[pl1_i(va)] = pa | PG_V | protection_codes[prot]; 84 } 85 86 static void 87 mm_flush_va(vaddr_t va) 88 { 89 asm volatile("invlpg (%0)" ::"r" (va) : "memory"); 90 } 91 92 static paddr_t 93 mm_palloc(size_t npages) 94 { 95 paddr_t pa; 96 size_t i; 97 98 /* Allocate the physical pages */ 99 pa = pa_avail; 100 pa_avail += npages * PAGE_SIZE; 101 102 /* Zero them out */ 103 for (i = 0; i < npages; i++) { 104 mm_reenter_pa(pa + i * PAGE_SIZE, tmpva, 105 MM_PROT_READ|MM_PROT_WRITE); 106 mm_flush_va(tmpva); 107 memset((void *)tmpva, 0, PAGE_SIZE); 108 } 109 110 return pa; 111 } 112 113 static bool 114 mm_pte_is_valid(pt_entry_t pte) 115 { 116 return ((pte & PG_V) != 0); 117 } 118 119 static void 120 mm_mprotect(vaddr_t startva, size_t size, pte_prot_t prot) 121 { 122 size_t i, npages; 123 vaddr_t va; 124 paddr_t pa; 125 126 ASSERT(size % PAGE_SIZE == 0); 127 npages = size / PAGE_SIZE; 128 129 for (i = 0; i < npages; i++) { 130 va = startva + i * PAGE_SIZE; 131 pa = (PTE_BASE[pl1_i(va)] & PG_FRAME); 132 mm_reenter_pa(pa, va, prot); 133 mm_flush_va(va); 134 } 135 } 136 137 void 138 mm_bootspace_mprotect(void) 139 { 140 pte_prot_t prot; 141 size_t i; 142 143 /* Remap the kernel segments with proper permissions. */ 144 for (i = 0; i < BTSPACE_NSEGS; i++) { 145 if (bootspace.segs[i].type == BTSEG_TEXT) { 146 prot = MM_PROT_READ|MM_PROT_EXECUTE; 147 } else if (bootspace.segs[i].type == BTSEG_RODATA) { 148 prot = MM_PROT_READ; 149 } else { 150 continue; 151 } 152 mm_mprotect(bootspace.segs[i].va, bootspace.segs[i].sz, prot); 153 } 154 155 print_state(true, "Segments protection updated"); 156 } 157 158 static size_t 159 mm_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz) 160 { 161 size_t npages; 162 163 npages = roundup((endva / PAGE_SIZE), (pgsz / PAGE_SIZE)) - 164 rounddown((startva / PAGE_SIZE), (pgsz / PAGE_SIZE)); 165 return (npages / (pgsz / PAGE_SIZE)); 166 } 167 168 static void 169 mm_map_tree(vaddr_t startva, vaddr_t endva) 170 { 171 size_t i, nL4e, nL3e, nL2e; 172 size_t L4e_idx, L3e_idx, L2e_idx; 173 paddr_t pa; 174 175 /* Build L4. */ 176 L4e_idx = pl4_i(startva); 177 nL4e = mm_nentries_range(startva, endva, NBPD_L4); 178 ASSERT(L4e_idx == 511); 179 ASSERT(nL4e == 1); 180 if (!mm_pte_is_valid(L4_BASE[L4e_idx])) { 181 pa = mm_palloc(1); 182 L4_BASE[L4e_idx] = pa | PG_V | PG_RW; 183 } 184 185 /* Build L3. */ 186 L3e_idx = pl3_i(startva); 187 nL3e = mm_nentries_range(startva, endva, NBPD_L3); 188 for (i = 0; i < nL3e; i++) { 189 if (mm_pte_is_valid(L3_BASE[L3e_idx+i])) { 190 continue; 191 } 192 pa = mm_palloc(1); 193 L3_BASE[L3e_idx+i] = pa | PG_V | PG_RW; 194 } 195 196 /* Build L2. */ 197 L2e_idx = pl2_i(startva); 198 nL2e = mm_nentries_range(startva, endva, NBPD_L2); 199 for (i = 0; i < nL2e; i++) { 200 if (mm_pte_is_valid(L2_BASE[L2e_idx+i])) { 201 continue; 202 } 203 pa = mm_palloc(1); 204 L2_BASE[L2e_idx+i] = pa | PG_V | PG_RW; 205 } 206 } 207 208 static vaddr_t 209 mm_randva_kregion(size_t size, size_t pagesz) 210 { 211 vaddr_t sva, eva; 212 vaddr_t randva; 213 uint64_t rnd; 214 size_t i; 215 bool ok; 216 217 while (1) { 218 prng_get_rand(&rnd, sizeof(rnd)); 219 randva = rounddown(KASLR_WINDOW_BASE + 220 rnd % (KASLR_WINDOW_SIZE - size), pagesz); 221 222 /* Detect collisions */ 223 ok = true; 224 for (i = 0; i < BTSPACE_NSEGS; i++) { 225 if (bootspace.segs[i].type == BTSEG_NONE) { 226 continue; 227 } 228 sva = bootspace.segs[i].va; 229 eva = sva + bootspace.segs[i].sz; 230 231 if ((sva <= randva) && (randva < eva)) { 232 ok = false; 233 break; 234 } 235 if ((sva < randva + size) && (randva + size <= eva)) { 236 ok = false; 237 break; 238 } 239 if (randva < sva && eva < (randva + size)) { 240 ok = false; 241 break; 242 } 243 } 244 if (ok) { 245 break; 246 } 247 } 248 249 mm_map_tree(randva, randva + size); 250 251 return randva; 252 } 253 254 static paddr_t 255 bootspace_getend(void) 256 { 257 paddr_t pa, max = 0; 258 size_t i; 259 260 for (i = 0; i < BTSPACE_NSEGS; i++) { 261 if (bootspace.segs[i].type == BTSEG_NONE) { 262 continue; 263 } 264 pa = bootspace.segs[i].pa + bootspace.segs[i].sz; 265 if (pa > max) 266 max = pa; 267 } 268 269 return max; 270 } 271 272 static void 273 bootspace_addseg(int type, vaddr_t va, paddr_t pa, size_t sz) 274 { 275 size_t i; 276 277 for (i = 0; i < BTSPACE_NSEGS; i++) { 278 if (bootspace.segs[i].type == BTSEG_NONE) { 279 bootspace.segs[i].type = type; 280 bootspace.segs[i].va = va; 281 bootspace.segs[i].pa = pa; 282 bootspace.segs[i].sz = sz; 283 return; 284 } 285 } 286 287 fatal("bootspace_addseg: segments full"); 288 } 289 290 static size_t 291 mm_shift_segment(vaddr_t va, size_t pagesz, size_t elfsz, size_t elfalign) 292 { 293 size_t shiftsize, offset; 294 uint64_t rnd; 295 296 if (elfalign == 0) { 297 elfalign = ELFROUND; 298 } 299 300 ASSERT(pagesz >= elfalign); 301 ASSERT(pagesz % elfalign == 0); 302 shiftsize = roundup(elfsz, pagesz) - roundup(elfsz, elfalign); 303 if (shiftsize == 0) { 304 return 0; 305 } 306 307 prng_get_rand(&rnd, sizeof(rnd)); 308 offset = roundup(rnd % shiftsize, elfalign); 309 ASSERT((va + offset) % elfalign == 0); 310 311 memmove((void *)(va + offset), (void *)va, elfsz); 312 313 return offset; 314 } 315 316 static void 317 mm_map_head(void) 318 { 319 size_t i, npages, size; 320 uint64_t rnd; 321 vaddr_t randva; 322 323 /* 324 * To get the size of the head, we give a look at the read-only 325 * mapping of the kernel we created in locore. We're identity mapped, 326 * so kernpa = kernva. 327 */ 328 size = elf_get_head_size((vaddr_t)kernpa_start); 329 npages = size / PAGE_SIZE; 330 331 prng_get_rand(&rnd, sizeof(rnd)); 332 randva = rounddown(HEAD_WINDOW_BASE + rnd % (HEAD_WINDOW_SIZE - size), 333 PAGE_SIZE); 334 mm_map_tree(randva, randva + size); 335 336 /* Enter the area and build the ELF info */ 337 for (i = 0; i < npages; i++) { 338 mm_enter_pa(kernpa_start + i * PAGE_SIZE, 339 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 340 } 341 elf_build_head(randva); 342 343 /* Register the values in bootspace */ 344 bootspace.head.va = randva; 345 bootspace.head.pa = kernpa_start; 346 bootspace.head.sz = size; 347 } 348 349 vaddr_t 350 mm_map_segment(int segtype, paddr_t pa, size_t elfsz, size_t elfalign) 351 { 352 size_t i, npages, size, pagesz, offset; 353 vaddr_t randva; 354 char pad; 355 356 if (elfsz <= PAGE_SIZE) { 357 pagesz = NBPD_L1; 358 } else { 359 pagesz = NBPD_L2; 360 } 361 362 size = roundup(elfsz, pagesz); 363 randva = mm_randva_kregion(size, pagesz); 364 365 npages = size / PAGE_SIZE; 366 for (i = 0; i < npages; i++) { 367 mm_enter_pa(pa + i * PAGE_SIZE, 368 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 369 } 370 371 offset = mm_shift_segment(randva, pagesz, elfsz, elfalign); 372 ASSERT(offset + elfsz <= size); 373 374 pad = pads[segtype]; 375 memset((void *)randva, pad, offset); 376 memset((void *)(randva + offset + elfsz), pad, size - elfsz - offset); 377 378 bootspace_addseg(segtype, randva, pa, size); 379 380 return (randva + offset); 381 } 382 383 static void 384 mm_map_boot(void) 385 { 386 size_t i, npages, size; 387 vaddr_t randva; 388 paddr_t bootpa; 389 390 /* 391 * The "boot" region is special: its page tree has a fixed size, but 392 * the number of pages entered is lower. 393 */ 394 395 /* Create the page tree */ 396 size = (NKL2_KIMG_ENTRIES + 1) * NBPD_L2; 397 randva = mm_randva_kregion(size, PAGE_SIZE); 398 399 /* Enter the area and build the ELF info */ 400 bootpa = bootspace_getend(); 401 size = (pa_avail - bootpa); 402 npages = size / PAGE_SIZE; 403 for (i = 0; i < npages; i++) { 404 mm_enter_pa(bootpa + i * PAGE_SIZE, 405 randva + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 406 } 407 elf_build_boot(randva, bootpa); 408 409 /* Enter the ISA I/O MEM */ 410 iom_base = randva + npages * PAGE_SIZE; 411 npages = IOM_SIZE / PAGE_SIZE; 412 for (i = 0; i < npages; i++) { 413 mm_enter_pa(IOM_BEGIN + i * PAGE_SIZE, 414 iom_base + i * PAGE_SIZE, MM_PROT_READ|MM_PROT_WRITE); 415 } 416 417 /* Register the values in bootspace */ 418 bootspace.boot.va = randva; 419 bootspace.boot.pa = bootpa; 420 bootspace.boot.sz = (size_t)(iom_base + IOM_SIZE) - 421 (size_t)bootspace.boot.va; 422 423 /* Initialize the values that are located in the "boot" region */ 424 extern uint64_t PDPpaddr; 425 bootspace.spareva = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 426 bootspace.pdir = bootspace.boot.va + (PDPpaddr - bootspace.boot.pa); 427 bootspace.emodule = bootspace.boot.va + NKL2_KIMG_ENTRIES * NBPD_L2; 428 } 429 430 /* 431 * There is a variable number of independent regions: one head, several kernel 432 * segments, one boot. They are all mapped at random VAs. 433 * 434 * Head contains the ELF Header and ELF Section Headers, and we use them to 435 * map the rest of the regions. Head must be placed in memory *before* the 436 * other regions. 437 * 438 * At the end of this function, the bootspace structure is fully constructed. 439 */ 440 void 441 mm_map_kernel(void) 442 { 443 memset(&bootspace, 0, sizeof(bootspace)); 444 mm_map_head(); 445 print_state(true, "Head region mapped"); 446 elf_map_sections(); 447 print_state(true, "Segments mapped"); 448 mm_map_boot(); 449 print_state(true, "Boot region mapped"); 450 } 451