1 /* $NetBSD: acpi_srat.c,v 1.5 2017/12/28 08:49:28 maxv Exp $ */ 2 3 /* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.5 2017/12/28 08:49:28 maxv Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 39 #include <dev/acpi/acpivar.h> 40 #include <dev/acpi/acpi_srat.h> 41 42 static ACPI_TABLE_SRAT *srat; 43 44 static uint32_t nnodes; /* Number of NUMA nodes */ 45 static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 46 static uint32_t ncpus; /* Number of CPUs */ 47 static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 48 static uint32_t nmems; /* Number of Memory ranges */ 49 static struct acpisrat_mem *mem_array; 50 51 struct cpulist { 52 struct acpisrat_cpu cpu; 53 TAILQ_ENTRY(cpulist) entry; 54 }; 55 56 static TAILQ_HEAD(, cpulist) cpulisthead; 57 58 #define CPU_INIT() TAILQ_INIT(&cpulisthead); 59 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 60 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 61 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 62 #define CPU_FIRST() TAILQ_FIRST(&cpulisthead) 63 64 struct memlist { 65 struct acpisrat_mem mem; 66 TAILQ_ENTRY(memlist) entry; 67 }; 68 69 static TAILQ_HEAD(, memlist) memlisthead; 70 71 #define MEM_INIT() TAILQ_INIT(&memlisthead) 72 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 73 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 74 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 75 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 76 #define MEM_FIRST() TAILQ_FIRST(&memlisthead) 77 78 79 static struct cpulist * 80 cpu_alloc(void) 81 { 82 return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP); 83 } 84 85 static void 86 cpu_free(struct cpulist *c) 87 { 88 kmem_free(c, sizeof(struct cpulist)); 89 } 90 91 static struct memlist * 92 mem_alloc(void) 93 { 94 return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP); 95 } 96 97 static void 98 mem_free(struct memlist *m) 99 { 100 kmem_free(m, sizeof(struct memlist)); 101 } 102 103 static struct memlist * 104 mem_get(acpisrat_nodeid_t nodeid) 105 { 106 struct memlist *tmp; 107 108 MEM_FOREACH(tmp) { 109 if (tmp->mem.nodeid == nodeid) 110 return tmp; 111 } 112 113 return NULL; 114 } 115 116 /* 117 * Returns true if ACPI SRAT table is available. If table does not exist, all 118 * functions below have undefined behaviour. 119 */ 120 bool 121 acpisrat_exist(void) 122 { 123 ACPI_TABLE_HEADER *table; 124 ACPI_STATUS rv; 125 126 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 127 if (ACPI_FAILURE(rv)) 128 return false; 129 130 /* Check if header is valid */ 131 if (table == NULL) 132 return false; 133 134 if (table->Length == 0xffffffff) 135 return false; 136 137 srat = (ACPI_TABLE_SRAT *)table; 138 139 return true; 140 } 141 142 static int 143 acpisrat_parse(void) 144 { 145 ACPI_SUBTABLE_HEADER *subtable; 146 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 147 ACPI_SRAT_MEM_AFFINITY *srat_mem; 148 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 149 150 acpisrat_nodeid_t nodeid; 151 struct cpulist *cpuentry = NULL; 152 struct memlist *mementry; 153 uint32_t srat_pos; 154 bool ignore_cpu_affinity = false; 155 156 KASSERT(srat != NULL); 157 158 /* Content starts right after the header */ 159 srat_pos = sizeof(ACPI_TABLE_SRAT); 160 161 while (srat_pos < srat->Header.Length) { 162 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 163 srat_pos += subtable->Length; 164 165 switch (subtable->Type) { 166 case ACPI_SRAT_TYPE_CPU_AFFINITY: 167 if (ignore_cpu_affinity) 168 continue; 169 170 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 171 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 172 break; 173 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 174 (srat_cpu->ProximityDomainHi[1] << 16) | 175 (srat_cpu->ProximityDomainHi[0] << 8) | 176 (srat_cpu->ProximityDomainLo); 177 178 cpuentry = cpu_alloc(); 179 if (cpuentry == NULL) 180 return ENOMEM; 181 CPU_ADD(cpuentry); 182 183 cpuentry->cpu.nodeid = nodeid; 184 cpuentry->cpu.apicid = srat_cpu->ApicId; 185 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 186 cpuentry->cpu.flags = srat_cpu->Flags; 187 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 188 break; 189 190 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 191 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 192 nodeid = srat_mem->ProximityDomain; 193 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0) 194 break; 195 196 mementry = mem_alloc(); 197 if (mementry == NULL) 198 return ENOMEM; 199 MEM_ADD(mementry); 200 201 mementry->mem.nodeid = nodeid; 202 mementry->mem.baseaddress = srat_mem->BaseAddress; 203 mementry->mem.length = srat_mem->Length; 204 mementry->mem.flags = srat_mem->Flags; 205 break; 206 207 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 208 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 209 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 210 break; 211 nodeid = srat_x2apic->ProximityDomain; 212 213 /* 214 * This table entry overrides 215 * ACPI_SRAT_TYPE_CPU_AFFINITY. 216 */ 217 if (!ignore_cpu_affinity) { 218 struct cpulist *citer; 219 while ((citer = CPU_FIRST()) != NULL) { 220 CPU_REM(citer); 221 cpu_free(citer); 222 } 223 ignore_cpu_affinity = true; 224 } 225 226 cpuentry = cpu_alloc(); 227 if (cpuentry == NULL) 228 return ENOMEM; 229 CPU_ADD(cpuentry); 230 231 cpuentry->cpu.nodeid = nodeid; 232 cpuentry->cpu.apicid = srat_x2apic->ApicId; 233 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 234 cpuentry->cpu.flags = srat_x2apic->Flags; 235 break; 236 237 case ACPI_SRAT_TYPE_RESERVED: 238 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 239 subtable->Length); 240 break; 241 } 242 } 243 244 return 0; 245 } 246 247 static int 248 acpisrat_quirks(void) 249 { 250 struct cpulist *citer; 251 struct memlist *mem, *miter; 252 253 /* Some sanity checks. */ 254 255 /* 256 * Deal with holes in the memory nodes. BIOS doesn't enlist memory 257 * nodes which don't have any memory modules plugged in. This behaviour 258 * has been observed on AMD machines. 259 * 260 * Do that by searching for CPUs in NUMA nodes which don't exist in the 261 * memory and then insert a zero memory range for the missing node. 262 */ 263 CPU_FOREACH(citer) { 264 mem = mem_get(citer->cpu.nodeid); 265 if (mem != NULL) 266 continue; 267 mem = mem_alloc(); 268 if (mem == NULL) 269 return ENOMEM; 270 mem->mem.nodeid = citer->cpu.nodeid; 271 /* all other fields are already zero filled */ 272 273 MEM_FOREACH(miter) { 274 if (miter->mem.nodeid < citer->cpu.nodeid) 275 continue; 276 MEM_ADD_BEFORE(mem, miter); 277 break; 278 } 279 } 280 281 return 0; 282 } 283 284 /* 285 * Initializes parser. Must be the first function being called when table is 286 * available. 287 */ 288 int 289 acpisrat_init(void) 290 { 291 if (!acpisrat_exist()) 292 return EEXIST; 293 return acpisrat_refresh(); 294 } 295 296 /* 297 * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM. 298 */ 299 int 300 acpisrat_refresh(void) 301 { 302 int rc, i, j, k; 303 struct cpulist *citer; 304 struct memlist *miter; 305 uint32_t cnodes = 0, mnodes = 0; 306 307 CPU_INIT(); 308 MEM_INIT(); 309 310 rc = acpisrat_parse(); 311 if (rc) 312 return rc; 313 314 rc = acpisrat_quirks(); 315 if (rc) 316 return rc; 317 318 /* cleanup resources */ 319 rc = acpisrat_exit(); 320 if (rc) 321 return rc; 322 323 ncpus = 0; 324 CPU_FOREACH(citer) { 325 cnodes = MAX(citer->cpu.nodeid, cnodes); 326 ncpus++; 327 } 328 329 nmems = 0; 330 MEM_FOREACH(miter) { 331 mnodes = MAX(miter->mem.nodeid, mnodes); 332 nmems++; 333 } 334 335 nnodes = MAX(cnodes, mnodes) + 1; 336 337 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 338 KM_NOSLEEP); 339 if (node_array == NULL) 340 return ENOMEM; 341 342 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 343 KM_NOSLEEP); 344 if (cpu_array == NULL) 345 return ENOMEM; 346 347 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 348 KM_NOSLEEP); 349 if (mem_array == NULL) 350 return ENOMEM; 351 352 i = 0; 353 CPU_FOREACH(citer) { 354 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 355 i++; 356 node_array[citer->cpu.nodeid].ncpus++; 357 } 358 359 i = 0; 360 MEM_FOREACH(miter) { 361 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 362 i++; 363 node_array[miter->mem.nodeid].nmems++; 364 } 365 366 for (i = 0; i < nnodes; i++) { 367 node_array[i].nodeid = i; 368 369 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 370 sizeof(struct acpisrat_cpu *), KM_NOSLEEP); 371 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 372 sizeof(struct acpisrat_mem *), KM_NOSLEEP); 373 374 k = 0; 375 for (j = 0; j < ncpus; j++) { 376 if (cpu_array[j].nodeid != i) 377 continue; 378 node_array[i].cpu[k] = &cpu_array[j]; 379 k++; 380 } 381 382 k = 0; 383 for (j = 0; j < nmems; j++) { 384 if (mem_array[j].nodeid != i) 385 continue; 386 node_array[i].mem[k] = &mem_array[j]; 387 k++; 388 } 389 } 390 391 while ((citer = CPU_FIRST()) != NULL) { 392 CPU_REM(citer); 393 cpu_free(citer); 394 } 395 396 while ((miter = MEM_FIRST()) != NULL) { 397 MEM_REM(miter); 398 mem_free(miter); 399 } 400 401 return 0; 402 } 403 404 /* 405 * Free allocated memory. Should be called when acpisrat is no longer of any 406 * use. 407 */ 408 int 409 acpisrat_exit(void) 410 { 411 int i; 412 413 if (node_array) { 414 for (i = 0; i < nnodes; i++) { 415 if (node_array[i].cpu) 416 kmem_free(node_array[i].cpu, 417 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 418 if (node_array[i].mem) 419 kmem_free(node_array[i].mem, 420 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 421 } 422 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 423 } 424 node_array = NULL; 425 426 if (cpu_array) 427 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 428 cpu_array = NULL; 429 430 if (mem_array) 431 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 432 mem_array = NULL; 433 434 nnodes = 0; 435 ncpus = 0; 436 nmems = 0; 437 438 return 0; 439 } 440 441 void 442 acpisrat_dump(void) 443 { 444 uint32_t i, j, nn, nc, nm; 445 struct acpisrat_cpu c; 446 struct acpisrat_mem m; 447 448 nn = acpisrat_nodes(); 449 aprint_debug("SRAT: %u NUMA nodes\n", nn); 450 for (i = 0; i < nn; i++) { 451 nc = acpisrat_node_cpus(i); 452 for (j = 0; j < nc; j++) { 453 acpisrat_cpu(i, j, &c); 454 aprint_debug("SRAT: node %u cpu %u " 455 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 456 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 457 c.clockdomain); 458 } 459 460 nm = acpisrat_node_memoryranges(i); 461 for (j = 0; j < nm; j++) { 462 acpisrat_mem(i, j, &m); 463 aprint_debug("SRAT: node %u memory range %u (0x%" 464 PRIx64" - 0x%"PRIx64" flags %u)\n", 465 m.nodeid, j, m.baseaddress, 466 m.baseaddress + m.length, m.flags); 467 } 468 } 469 } 470 471 /* 472 * Get number of NUMA nodes. 473 */ 474 uint32_t 475 acpisrat_nodes(void) 476 { 477 return nnodes; 478 } 479 480 /* 481 * Get number of cpus in the node. 0 means, this is a cpu-less node. 482 */ 483 uint32_t 484 acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 485 { 486 return node_array[nodeid].ncpus; 487 } 488 489 /* 490 * Get number of memory ranges in the node 0 means, this node has no RAM. 491 */ 492 uint32_t 493 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 494 { 495 return node_array[nodeid].nmems; 496 } 497 498 void 499 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 500 struct acpisrat_cpu *c) 501 { 502 memcpy(c, node_array[nodeid].cpu[cpunum], 503 sizeof(struct acpisrat_cpu)); 504 } 505 506 void 507 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 508 struct acpisrat_mem *mem) 509 { 510 memcpy(mem, node_array[nodeid].mem[memrange], 511 sizeof(struct acpisrat_mem)); 512 } 513 514 /* 515 * Get a node from an APIC id (belonging to a cpu). 516 */ 517 struct acpisrat_node * 518 acpisrat_get_node(uint32_t apicid) 519 { 520 struct acpisrat_node *node; 521 struct acpisrat_cpu *cpu; 522 size_t i, n; 523 524 for (i = 0; i < nnodes; i++) { 525 node = &node_array[i]; 526 527 for (n = 0; n < node->ncpus; n++) { 528 cpu = node->cpu[n]; 529 if (cpu->apicid == apicid) { 530 return node; 531 } 532 } 533 } 534 535 return NULL; 536 } 537 538