1 /* $NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $ */ 2 3 /* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 39 #include <dev/acpi/acpivar.h> 40 #include <dev/acpi/acpi_srat.h> 41 42 #include <uvm/uvm_extern.h> 43 44 static ACPI_TABLE_SRAT *srat; 45 46 static uint32_t nnodes; /* Number of NUMA nodes */ 47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 48 static uint32_t ncpus; /* Number of CPUs */ 49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 50 static uint32_t nmems; /* Number of Memory ranges */ 51 static struct acpisrat_mem *mem_array; 52 53 struct cpulist { 54 struct acpisrat_cpu cpu; 55 TAILQ_ENTRY(cpulist) entry; 56 }; 57 58 static TAILQ_HEAD(, cpulist) cpulisthead; 59 60 #define CPU_INIT() TAILQ_INIT(&cpulisthead); 61 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 62 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 63 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 64 #define CPU_FIRST() TAILQ_FIRST(&cpulisthead) 65 66 struct memlist { 67 struct acpisrat_mem mem; 68 TAILQ_ENTRY(memlist) entry; 69 }; 70 71 static TAILQ_HEAD(, memlist) memlisthead; 72 73 #define MEM_INIT() TAILQ_INIT(&memlisthead) 74 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 75 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 76 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 77 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 78 #define MEM_FIRST() TAILQ_FIRST(&memlisthead) 79 80 81 static struct cpulist * 82 cpu_alloc(void) 83 { 84 return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP); 85 } 86 87 static void 88 cpu_free(struct cpulist *c) 89 { 90 kmem_free(c, sizeof(struct cpulist)); 91 } 92 93 static struct memlist * 94 mem_alloc(void) 95 { 96 return kmem_zalloc(sizeof(struct memlist), KM_SLEEP); 97 } 98 99 static void 100 mem_free(struct memlist *m) 101 { 102 kmem_free(m, sizeof(struct memlist)); 103 } 104 105 static struct memlist * 106 mem_get(acpisrat_nodeid_t nodeid) 107 { 108 struct memlist *tmp; 109 110 MEM_FOREACH(tmp) { 111 if (tmp->mem.nodeid == nodeid) 112 return tmp; 113 } 114 115 return NULL; 116 } 117 118 /* 119 * Returns true if ACPI SRAT table is available. If table does not exist, all 120 * functions below have undefined behaviour. 121 */ 122 bool 123 acpisrat_exist(void) 124 { 125 ACPI_TABLE_HEADER *table; 126 ACPI_STATUS rv; 127 128 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 129 if (ACPI_FAILURE(rv)) 130 return false; 131 132 /* Check if header is valid */ 133 if (table == NULL) 134 return false; 135 136 if (table->Length == 0xffffffff) 137 return false; 138 139 srat = (ACPI_TABLE_SRAT *)table; 140 141 return true; 142 } 143 144 static int 145 acpisrat_parse(void) 146 { 147 ACPI_SUBTABLE_HEADER *subtable; 148 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 149 ACPI_SRAT_MEM_AFFINITY *srat_mem; 150 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 151 152 acpisrat_nodeid_t nodeid; 153 struct cpulist *cpuentry = NULL; 154 struct memlist *mementry; 155 uint32_t srat_pos; 156 bool ignore_cpu_affinity = false; 157 158 KASSERT(srat != NULL); 159 160 /* Content starts right after the header */ 161 srat_pos = sizeof(ACPI_TABLE_SRAT); 162 163 while (srat_pos < srat->Header.Length) { 164 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 165 srat_pos += subtable->Length; 166 167 switch (subtable->Type) { 168 case ACPI_SRAT_TYPE_CPU_AFFINITY: 169 if (ignore_cpu_affinity) 170 continue; 171 172 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 173 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 174 break; 175 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 176 (srat_cpu->ProximityDomainHi[1] << 16) | 177 (srat_cpu->ProximityDomainHi[0] << 8) | 178 (srat_cpu->ProximityDomainLo); 179 180 cpuentry = cpu_alloc(); 181 if (cpuentry == NULL) 182 return ENOMEM; 183 CPU_ADD(cpuentry); 184 185 cpuentry->cpu.nodeid = nodeid; 186 cpuentry->cpu.apicid = srat_cpu->ApicId; 187 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 188 cpuentry->cpu.flags = srat_cpu->Flags; 189 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 190 break; 191 192 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 193 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 194 nodeid = srat_mem->ProximityDomain; 195 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0) 196 break; 197 198 mementry = mem_alloc(); 199 if (mementry == NULL) 200 return ENOMEM; 201 MEM_ADD(mementry); 202 203 mementry->mem.nodeid = nodeid; 204 mementry->mem.baseaddress = srat_mem->BaseAddress; 205 mementry->mem.length = srat_mem->Length; 206 mementry->mem.flags = srat_mem->Flags; 207 break; 208 209 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 210 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 211 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 212 break; 213 nodeid = srat_x2apic->ProximityDomain; 214 215 /* 216 * This table entry overrides 217 * ACPI_SRAT_TYPE_CPU_AFFINITY. 218 */ 219 if (!ignore_cpu_affinity) { 220 struct cpulist *citer; 221 while ((citer = CPU_FIRST()) != NULL) { 222 CPU_REM(citer); 223 cpu_free(citer); 224 } 225 ignore_cpu_affinity = true; 226 } 227 228 cpuentry = cpu_alloc(); 229 if (cpuentry == NULL) 230 return ENOMEM; 231 CPU_ADD(cpuentry); 232 233 cpuentry->cpu.nodeid = nodeid; 234 cpuentry->cpu.apicid = srat_x2apic->ApicId; 235 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 236 cpuentry->cpu.flags = srat_x2apic->Flags; 237 break; 238 239 case ACPI_SRAT_TYPE_RESERVED: 240 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 241 subtable->Length); 242 break; 243 } 244 } 245 246 return 0; 247 } 248 249 static int 250 acpisrat_quirks(void) 251 { 252 struct cpulist *citer; 253 struct memlist *mem, *miter; 254 255 /* Some sanity checks. */ 256 257 /* 258 * Deal with holes in the memory nodes. BIOS doesn't enlist memory 259 * nodes which don't have any memory modules plugged in. This behaviour 260 * has been observed on AMD machines. 261 * 262 * Do that by searching for CPUs in NUMA nodes which don't exist in the 263 * memory and then insert a zero memory range for the missing node. 264 */ 265 CPU_FOREACH(citer) { 266 mem = mem_get(citer->cpu.nodeid); 267 if (mem != NULL) 268 continue; 269 mem = mem_alloc(); 270 if (mem == NULL) 271 return ENOMEM; 272 mem->mem.nodeid = citer->cpu.nodeid; 273 /* all other fields are already zero filled */ 274 275 MEM_FOREACH(miter) { 276 if (miter->mem.nodeid < citer->cpu.nodeid) 277 continue; 278 MEM_ADD_BEFORE(mem, miter); 279 break; 280 } 281 } 282 283 return 0; 284 } 285 286 /* 287 * Initializes parser. Must be the first function being called when table is 288 * available. 289 */ 290 int 291 acpisrat_init(void) 292 { 293 if (!acpisrat_exist()) 294 return EEXIST; 295 return acpisrat_refresh(); 296 } 297 298 /* 299 * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM. 300 */ 301 int 302 acpisrat_refresh(void) 303 { 304 int rc, i, j, k; 305 struct cpulist *citer; 306 struct memlist *miter; 307 uint32_t cnodes = 0, mnodes = 0; 308 309 CPU_INIT(); 310 MEM_INIT(); 311 312 rc = acpisrat_parse(); 313 if (rc) 314 return rc; 315 316 rc = acpisrat_quirks(); 317 if (rc) 318 return rc; 319 320 /* cleanup resources */ 321 rc = acpisrat_exit(); 322 if (rc) 323 return rc; 324 325 ncpus = 0; 326 CPU_FOREACH(citer) { 327 cnodes = MAX(citer->cpu.nodeid, cnodes); 328 ncpus++; 329 } 330 331 nmems = 0; 332 MEM_FOREACH(miter) { 333 mnodes = MAX(miter->mem.nodeid, mnodes); 334 nmems++; 335 } 336 337 nnodes = MAX(cnodes, mnodes) + 1; 338 339 if (nnodes == 0 || nmems == 0 || ncpus == 0) { 340 rc = ENOENT; 341 goto fail; 342 } 343 344 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 345 KM_SLEEP); 346 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 347 KM_SLEEP); 348 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 349 KM_SLEEP); 350 351 i = 0; 352 CPU_FOREACH(citer) { 353 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 354 i++; 355 node_array[citer->cpu.nodeid].ncpus++; 356 } 357 358 i = 0; 359 MEM_FOREACH(miter) { 360 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 361 i++; 362 node_array[miter->mem.nodeid].nmems++; 363 } 364 365 for (i = 0; i < nnodes; i++) { 366 node_array[i].nodeid = i; 367 368 if (node_array[i].ncpus != 0) { 369 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 370 sizeof(struct acpisrat_cpu *), KM_SLEEP); 371 } 372 if (node_array[i].nmems != 0) { 373 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 374 sizeof(struct acpisrat_mem *), KM_SLEEP); 375 } 376 377 k = 0; 378 for (j = 0; j < ncpus; j++) { 379 if (cpu_array[j].nodeid != i) 380 continue; 381 KASSERT(node_array[i].cpu != NULL); 382 node_array[i].cpu[k] = &cpu_array[j]; 383 k++; 384 } 385 386 k = 0; 387 for (j = 0; j < nmems; j++) { 388 if (mem_array[j].nodeid != i) 389 continue; 390 KASSERT(node_array[i].mem != NULL); 391 node_array[i].mem[k] = &mem_array[j]; 392 k++; 393 } 394 } 395 396 fail: 397 while ((citer = CPU_FIRST()) != NULL) { 398 CPU_REM(citer); 399 cpu_free(citer); 400 } 401 402 while ((miter = MEM_FIRST()) != NULL) { 403 MEM_REM(miter); 404 mem_free(miter); 405 } 406 407 return rc; 408 } 409 410 /* 411 * Free allocated memory. Should be called when acpisrat is no longer of any 412 * use. 413 */ 414 int 415 acpisrat_exit(void) 416 { 417 int i; 418 419 if (node_array) { 420 for (i = 0; i < nnodes; i++) { 421 if (node_array[i].cpu) 422 kmem_free(node_array[i].cpu, 423 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 424 if (node_array[i].mem) 425 kmem_free(node_array[i].mem, 426 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 427 } 428 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 429 } 430 node_array = NULL; 431 432 if (cpu_array) 433 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 434 cpu_array = NULL; 435 436 if (mem_array) 437 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 438 mem_array = NULL; 439 440 nnodes = 0; 441 ncpus = 0; 442 nmems = 0; 443 444 return 0; 445 } 446 447 void 448 acpisrat_dump(void) 449 { 450 uint32_t i, j, nn, nc, nm; 451 struct acpisrat_cpu c; 452 struct acpisrat_mem m; 453 454 nn = acpisrat_nodes(); 455 aprint_debug("SRAT: %u NUMA nodes\n", nn); 456 for (i = 0; i < nn; i++) { 457 nc = acpisrat_node_cpus(i); 458 for (j = 0; j < nc; j++) { 459 acpisrat_cpu(i, j, &c); 460 aprint_debug("SRAT: node %u cpu %u " 461 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 462 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 463 c.clockdomain); 464 } 465 466 nm = acpisrat_node_memoryranges(i); 467 for (j = 0; j < nm; j++) { 468 acpisrat_mem(i, j, &m); 469 aprint_debug("SRAT: node %u memory range %u (0x%" 470 PRIx64" - 0x%"PRIx64" flags %u)\n", 471 m.nodeid, j, m.baseaddress, 472 m.baseaddress + m.length, m.flags); 473 } 474 } 475 } 476 477 void 478 acpisrat_load_uvm(void) 479 { 480 uint32_t i, j, nn, nm; 481 struct acpisrat_mem m; 482 483 nn = acpisrat_nodes(); 484 aprint_debug("SRAT: %u NUMA nodes\n", nn); 485 for (i = 0; i < nn; i++) { 486 nm = acpisrat_node_memoryranges(i); 487 for (j = 0; j < nm; j++) { 488 acpisrat_mem(i, j, &m); 489 aprint_debug("SRAT: node %u memory range %u (0x%" 490 PRIx64" - 0x%"PRIx64" flags %u)\n", 491 m.nodeid, j, m.baseaddress, 492 m.baseaddress + m.length, m.flags); 493 uvm_page_numa_load(trunc_page(m.baseaddress), 494 trunc_page(m.length), m.nodeid); 495 } 496 } 497 } 498 499 /* 500 * Get number of NUMA nodes. 501 */ 502 uint32_t 503 acpisrat_nodes(void) 504 { 505 return nnodes; 506 } 507 508 /* 509 * Get number of cpus in the node. 0 means, this is a cpu-less node. 510 */ 511 uint32_t 512 acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 513 { 514 return node_array[nodeid].ncpus; 515 } 516 517 /* 518 * Get number of memory ranges in the node 0 means, this node has no RAM. 519 */ 520 uint32_t 521 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 522 { 523 return node_array[nodeid].nmems; 524 } 525 526 void 527 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 528 struct acpisrat_cpu *c) 529 { 530 memcpy(c, node_array[nodeid].cpu[cpunum], 531 sizeof(struct acpisrat_cpu)); 532 } 533 534 void 535 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 536 struct acpisrat_mem *mem) 537 { 538 memcpy(mem, node_array[nodeid].mem[memrange], 539 sizeof(struct acpisrat_mem)); 540 } 541 542 /* 543 * Get a node from an APIC id (belonging to a cpu). 544 */ 545 struct acpisrat_node * 546 acpisrat_get_node(uint32_t apicid) 547 { 548 struct acpisrat_node *node; 549 struct acpisrat_cpu *cpu; 550 size_t i, n; 551 552 for (i = 0; i < nnodes; i++) { 553 node = &node_array[i]; 554 555 for (n = 0; n < node->ncpus; n++) { 556 cpu = node->cpu[n]; 557 if (cpu->apicid == apicid) { 558 return node; 559 } 560 } 561 } 562 563 return NULL; 564 } 565