1 /* $NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $ */ 2 3 /* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 39 #include <dev/acpi/acpivar.h> 40 #include <dev/acpi/acpi_srat.h> 41 42 #include <uvm/uvm_extern.h> 43 44 static ACPI_TABLE_SRAT *srat; 45 46 static uint32_t nnodes; /* Number of NUMA nodes */ 47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 48 static uint32_t ncpus; /* Number of CPUs */ 49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 50 static uint32_t nmems; /* Number of Memory ranges */ 51 static struct acpisrat_mem *mem_array; 52 53 struct cpulist { 54 struct acpisrat_cpu cpu; 55 TAILQ_ENTRY(cpulist) entry; 56 }; 57 58 static TAILQ_HEAD(, cpulist) cpulisthead; 59 60 #define CPU_INIT() TAILQ_INIT(&cpulisthead); 61 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 62 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 63 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 64 #define CPU_FIRST() TAILQ_FIRST(&cpulisthead) 65 66 struct memlist { 67 struct acpisrat_mem mem; 68 TAILQ_ENTRY(memlist) entry; 69 }; 70 71 static TAILQ_HEAD(, memlist) memlisthead; 72 73 #define MEM_INIT() TAILQ_INIT(&memlisthead) 74 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 75 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 76 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 77 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 78 #define MEM_FIRST() TAILQ_FIRST(&memlisthead) 79 80 81 static struct cpulist * 82 cpu_alloc(void) 83 { 84 return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP); 85 } 86 87 static void 88 cpu_free(struct cpulist *c) 89 { 90 kmem_free(c, sizeof(struct cpulist)); 91 } 92 93 static struct memlist * 94 mem_alloc(void) 95 { 96 return kmem_zalloc(sizeof(struct memlist), KM_SLEEP); 97 } 98 99 static void 100 mem_free(struct memlist *m) 101 { 102 kmem_free(m, sizeof(struct memlist)); 103 } 104 105 static struct memlist * 106 mem_get(acpisrat_nodeid_t nodeid) 107 { 108 struct memlist *tmp; 109 110 MEM_FOREACH(tmp) { 111 if (tmp->mem.nodeid == nodeid) 112 return tmp; 113 } 114 115 return NULL; 116 } 117 118 /* 119 * Returns true if ACPI SRAT table is available. If table does not exist, all 120 * functions below have undefined behaviour. 121 */ 122 bool 123 acpisrat_exist(void) 124 { 125 ACPI_TABLE_HEADER *table; 126 ACPI_STATUS rv; 127 128 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 129 if (ACPI_FAILURE(rv)) 130 return false; 131 132 /* Check if header is valid */ 133 if (table == NULL) 134 return false; 135 136 if (table->Length == 0xffffffff) 137 return false; 138 139 srat = (ACPI_TABLE_SRAT *)table; 140 141 return true; 142 } 143 144 static int 145 acpisrat_parse(void) 146 { 147 ACPI_SUBTABLE_HEADER *subtable; 148 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 149 ACPI_SRAT_MEM_AFFINITY *srat_mem; 150 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 151 ACPI_SRAT_GICC_AFFINITY *srat_gicc; 152 153 acpisrat_nodeid_t nodeid; 154 struct cpulist *cpuentry = NULL; 155 struct memlist *mementry; 156 uint32_t srat_pos; 157 bool ignore_cpu_affinity = false; 158 159 KASSERT(srat != NULL); 160 161 /* Content starts right after the header */ 162 srat_pos = sizeof(ACPI_TABLE_SRAT); 163 164 while (srat_pos < srat->Header.Length) { 165 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 166 srat_pos += subtable->Length; 167 168 switch (subtable->Type) { 169 case ACPI_SRAT_TYPE_CPU_AFFINITY: 170 if (ignore_cpu_affinity) 171 continue; 172 173 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 174 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 175 break; 176 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 177 (srat_cpu->ProximityDomainHi[1] << 16) | 178 (srat_cpu->ProximityDomainHi[0] << 8) | 179 (srat_cpu->ProximityDomainLo); 180 181 cpuentry = cpu_alloc(); 182 if (cpuentry == NULL) 183 return ENOMEM; 184 CPU_ADD(cpuentry); 185 186 cpuentry->cpu.nodeid = nodeid; 187 cpuentry->cpu.apicid = srat_cpu->ApicId; 188 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 189 cpuentry->cpu.flags = srat_cpu->Flags; 190 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 191 break; 192 193 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 194 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 195 nodeid = srat_mem->ProximityDomain; 196 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0) 197 break; 198 199 mementry = mem_alloc(); 200 if (mementry == NULL) 201 return ENOMEM; 202 MEM_ADD(mementry); 203 204 mementry->mem.nodeid = nodeid; 205 mementry->mem.baseaddress = srat_mem->BaseAddress; 206 mementry->mem.length = srat_mem->Length; 207 mementry->mem.flags = srat_mem->Flags; 208 break; 209 210 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 211 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 212 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 213 break; 214 nodeid = srat_x2apic->ProximityDomain; 215 216 /* 217 * This table entry overrides 218 * ACPI_SRAT_TYPE_CPU_AFFINITY. 219 */ 220 if (!ignore_cpu_affinity) { 221 struct cpulist *citer; 222 while ((citer = CPU_FIRST()) != NULL) { 223 CPU_REM(citer); 224 cpu_free(citer); 225 } 226 ignore_cpu_affinity = true; 227 } 228 229 cpuentry = cpu_alloc(); 230 if (cpuentry == NULL) 231 return ENOMEM; 232 CPU_ADD(cpuentry); 233 234 cpuentry->cpu.nodeid = nodeid; 235 cpuentry->cpu.apicid = srat_x2apic->ApicId; 236 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 237 cpuentry->cpu.flags = srat_x2apic->Flags; 238 break; 239 240 case ACPI_SRAT_TYPE_GICC_AFFINITY: 241 srat_gicc = (ACPI_SRAT_GICC_AFFINITY *)subtable; 242 if ((srat_gicc->Flags & ACPI_SRAT_GICC_ENABLED) == 0) 243 break; 244 nodeid = srat_gicc->ProximityDomain; 245 246 /* 247 * This table entry overrides 248 * ACPI_SRAT_TYPE_CPU_AFFINITY. 249 */ 250 if (!ignore_cpu_affinity) { 251 struct cpulist *citer; 252 while ((citer = CPU_FIRST()) != NULL) { 253 CPU_REM(citer); 254 cpu_free(citer); 255 } 256 ignore_cpu_affinity = true; 257 } 258 259 cpuentry = cpu_alloc(); 260 if (cpuentry == NULL) 261 return ENOMEM; 262 CPU_ADD(cpuentry); 263 264 cpuentry->cpu.nodeid = nodeid; 265 cpuentry->cpu.apicid = srat_gicc->AcpiProcessorUid; 266 cpuentry->cpu.clockdomain = srat_gicc->ClockDomain; 267 cpuentry->cpu.flags = srat_gicc->Flags; 268 break; 269 270 case ACPI_SRAT_TYPE_RESERVED: 271 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 272 subtable->Length); 273 break; 274 } 275 } 276 277 return 0; 278 } 279 280 static int 281 acpisrat_quirks(void) 282 { 283 struct cpulist *citer; 284 struct memlist *mem, *miter; 285 286 /* Some sanity checks. */ 287 288 /* 289 * Deal with holes in the memory nodes. BIOS doesn't enlist memory 290 * nodes which don't have any memory modules plugged in. This behaviour 291 * has been observed on AMD machines. 292 * 293 * Do that by searching for CPUs in NUMA nodes which don't exist in the 294 * memory and then insert a zero memory range for the missing node. 295 */ 296 CPU_FOREACH(citer) { 297 mem = mem_get(citer->cpu.nodeid); 298 if (mem != NULL) 299 continue; 300 mem = mem_alloc(); 301 if (mem == NULL) 302 return ENOMEM; 303 mem->mem.nodeid = citer->cpu.nodeid; 304 /* all other fields are already zero filled */ 305 306 MEM_FOREACH(miter) { 307 if (miter->mem.nodeid < citer->cpu.nodeid) 308 continue; 309 MEM_ADD_BEFORE(mem, miter); 310 break; 311 } 312 } 313 314 return 0; 315 } 316 317 /* 318 * Initializes parser. Must be the first function being called when table is 319 * available. 320 */ 321 int 322 acpisrat_init(void) 323 { 324 if (!acpisrat_exist()) 325 return EEXIST; 326 return acpisrat_refresh(); 327 } 328 329 /* 330 * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM. 331 */ 332 int 333 acpisrat_refresh(void) 334 { 335 int rc, i, j, k; 336 struct cpulist *citer; 337 struct memlist *miter; 338 uint32_t cnodes = 0, mnodes = 0; 339 340 CPU_INIT(); 341 MEM_INIT(); 342 343 rc = acpisrat_parse(); 344 if (rc) 345 return rc; 346 347 rc = acpisrat_quirks(); 348 if (rc) 349 return rc; 350 351 /* cleanup resources */ 352 rc = acpisrat_exit(); 353 if (rc) 354 return rc; 355 356 ncpus = 0; 357 CPU_FOREACH(citer) { 358 cnodes = MAX(citer->cpu.nodeid, cnodes); 359 ncpus++; 360 } 361 362 nmems = 0; 363 MEM_FOREACH(miter) { 364 mnodes = MAX(miter->mem.nodeid, mnodes); 365 nmems++; 366 } 367 368 nnodes = MAX(cnodes, mnodes) + 1; 369 370 if (nnodes == 0 || nmems == 0 || ncpus == 0) { 371 rc = ENOENT; 372 goto fail; 373 } 374 375 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 376 KM_SLEEP); 377 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 378 KM_SLEEP); 379 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 380 KM_SLEEP); 381 382 i = 0; 383 CPU_FOREACH(citer) { 384 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 385 i++; 386 node_array[citer->cpu.nodeid].ncpus++; 387 } 388 389 i = 0; 390 MEM_FOREACH(miter) { 391 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 392 i++; 393 node_array[miter->mem.nodeid].nmems++; 394 } 395 396 for (i = 0; i < nnodes; i++) { 397 node_array[i].nodeid = i; 398 399 if (node_array[i].ncpus != 0) { 400 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 401 sizeof(struct acpisrat_cpu *), KM_SLEEP); 402 } 403 if (node_array[i].nmems != 0) { 404 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 405 sizeof(struct acpisrat_mem *), KM_SLEEP); 406 } 407 408 k = 0; 409 for (j = 0; j < ncpus; j++) { 410 if (cpu_array[j].nodeid != i) 411 continue; 412 KASSERT(node_array[i].cpu != NULL); 413 node_array[i].cpu[k] = &cpu_array[j]; 414 k++; 415 } 416 417 k = 0; 418 for (j = 0; j < nmems; j++) { 419 if (mem_array[j].nodeid != i) 420 continue; 421 KASSERT(node_array[i].mem != NULL); 422 node_array[i].mem[k] = &mem_array[j]; 423 k++; 424 } 425 } 426 427 fail: 428 while ((citer = CPU_FIRST()) != NULL) { 429 CPU_REM(citer); 430 cpu_free(citer); 431 } 432 433 while ((miter = MEM_FIRST()) != NULL) { 434 MEM_REM(miter); 435 mem_free(miter); 436 } 437 438 return rc; 439 } 440 441 /* 442 * Free allocated memory. Should be called when acpisrat is no longer of any 443 * use. 444 */ 445 int 446 acpisrat_exit(void) 447 { 448 int i; 449 450 if (node_array) { 451 for (i = 0; i < nnodes; i++) { 452 if (node_array[i].cpu) 453 kmem_free(node_array[i].cpu, 454 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 455 if (node_array[i].mem) 456 kmem_free(node_array[i].mem, 457 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 458 } 459 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 460 } 461 node_array = NULL; 462 463 if (cpu_array) 464 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 465 cpu_array = NULL; 466 467 if (mem_array) 468 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 469 mem_array = NULL; 470 471 nnodes = 0; 472 ncpus = 0; 473 nmems = 0; 474 475 return 0; 476 } 477 478 void 479 acpisrat_dump(void) 480 { 481 uint32_t i, j, nn, nc, nm; 482 struct acpisrat_cpu c; 483 struct acpisrat_mem m; 484 485 nn = acpisrat_nodes(); 486 aprint_debug("SRAT: %u NUMA nodes\n", nn); 487 for (i = 0; i < nn; i++) { 488 nc = acpisrat_node_cpus(i); 489 for (j = 0; j < nc; j++) { 490 acpisrat_cpu(i, j, &c); 491 aprint_debug("SRAT: node %u cpu %u " 492 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 493 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 494 c.clockdomain); 495 } 496 497 nm = acpisrat_node_memoryranges(i); 498 for (j = 0; j < nm; j++) { 499 acpisrat_mem(i, j, &m); 500 aprint_debug("SRAT: node %u memory range %u (0x%" 501 PRIx64" - 0x%"PRIx64" flags %u)\n", 502 m.nodeid, j, m.baseaddress, 503 m.baseaddress + m.length, m.flags); 504 } 505 } 506 } 507 508 void 509 acpisrat_load_uvm(void) 510 { 511 uint32_t i, j, nn, nm; 512 struct acpisrat_mem m; 513 514 nn = acpisrat_nodes(); 515 aprint_debug("SRAT: %u NUMA nodes\n", nn); 516 for (i = 0; i < nn; i++) { 517 nm = acpisrat_node_memoryranges(i); 518 for (j = 0; j < nm; j++) { 519 acpisrat_mem(i, j, &m); 520 aprint_debug("SRAT: node %u memory range %u (0x%" 521 PRIx64" - 0x%"PRIx64" flags %u)\n", 522 m.nodeid, j, m.baseaddress, 523 m.baseaddress + m.length, m.flags); 524 uvm_page_numa_load(trunc_page(m.baseaddress), 525 trunc_page(m.length), m.nodeid); 526 } 527 } 528 } 529 530 /* 531 * Get number of NUMA nodes. 532 */ 533 uint32_t 534 acpisrat_nodes(void) 535 { 536 return nnodes; 537 } 538 539 /* 540 * Get number of cpus in the node. 0 means, this is a cpu-less node. 541 */ 542 uint32_t 543 acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 544 { 545 return node_array[nodeid].ncpus; 546 } 547 548 /* 549 * Get number of memory ranges in the node 0 means, this node has no RAM. 550 */ 551 uint32_t 552 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 553 { 554 return node_array[nodeid].nmems; 555 } 556 557 void 558 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 559 struct acpisrat_cpu *c) 560 { 561 memcpy(c, node_array[nodeid].cpu[cpunum], 562 sizeof(struct acpisrat_cpu)); 563 } 564 565 void 566 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 567 struct acpisrat_mem *mem) 568 { 569 memcpy(mem, node_array[nodeid].mem[memrange], 570 sizeof(struct acpisrat_mem)); 571 } 572 573 /* 574 * Get a node from an APIC id (belonging to a cpu). 575 */ 576 struct acpisrat_node * 577 acpisrat_get_node(uint32_t apicid) 578 { 579 struct acpisrat_node *node; 580 struct acpisrat_cpu *cpu; 581 size_t i, n; 582 583 for (i = 0; i < nnodes; i++) { 584 node = &node_array[i]; 585 586 for (n = 0; n < node->ncpus; n++) { 587 cpu = node->cpu[n]; 588 if (cpu->apicid == apicid) { 589 return node; 590 } 591 } 592 } 593 594 return NULL; 595 } 596