1 /* $NetBSD: acpi_srat.c,v 1.4 2017/08/31 08:45:03 msaitoh Exp $ */ 2 3 /* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.4 2017/08/31 08:45:03 msaitoh Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/kmem.h> 37 #include <sys/systm.h> 38 39 #include <dev/acpi/acpivar.h> 40 #include <dev/acpi/acpi_srat.h> 41 42 static ACPI_TABLE_SRAT *srat; 43 44 struct acpisrat_node { 45 acpisrat_nodeid_t nodeid; 46 uint32_t ncpus; /* Number of cpus in this node */ 47 struct acpisrat_cpu **cpu; /* Array of cpus */ 48 uint32_t nmems; /* Number of memory ranges in this node */ 49 struct acpisrat_mem **mem; /* Array of memory ranges */ 50 }; 51 52 static uint32_t nnodes; /* Number of NUMA nodes */ 53 static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 54 static uint32_t ncpus; /* Number of CPUs */ 55 static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 56 static uint32_t nmems; /* Number of Memory ranges */ 57 static struct acpisrat_mem *mem_array; 58 59 60 struct cpulist { 61 struct acpisrat_cpu cpu; 62 TAILQ_ENTRY(cpulist) entry; 63 }; 64 65 static TAILQ_HEAD(, cpulist) cpulisthead; 66 67 #define CPU_INIT TAILQ_INIT(&cpulisthead); 68 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 69 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 70 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 71 #define CPU_FIRST TAILQ_FIRST(&cpulisthead) 72 73 74 struct memlist { 75 struct acpisrat_mem mem; 76 TAILQ_ENTRY(memlist) entry; 77 }; 78 79 static TAILQ_HEAD(, memlist) memlisthead; 80 81 #define MEM_INIT TAILQ_INIT(&memlisthead) 82 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 83 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 84 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 85 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 86 #define MEM_FIRST TAILQ_FIRST(&memlisthead) 87 88 89 static struct cpulist * 90 cpu_alloc(void) 91 { 92 return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP); 93 } 94 95 static void 96 cpu_free(struct cpulist *c) 97 { 98 kmem_free(c, sizeof(struct cpulist)); 99 } 100 101 #if 0 102 static struct cpulist * 103 cpu_get(acpisrat_nodeid_t nodeid) 104 { 105 struct cpulist *tmp; 106 107 CPU_FOREACH(tmp) { 108 if (tmp->cpu.nodeid == nodeid) 109 return tmp; 110 } 111 112 return NULL; 113 } 114 #endif 115 116 static struct memlist * 117 mem_alloc(void) 118 { 119 return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP); 120 } 121 122 static void 123 mem_free(struct memlist *m) 124 { 125 kmem_free(m, sizeof(struct memlist)); 126 } 127 128 static struct memlist * 129 mem_get(acpisrat_nodeid_t nodeid) 130 { 131 struct memlist *tmp; 132 133 MEM_FOREACH(tmp) { 134 if (tmp->mem.nodeid == nodeid) 135 return tmp; 136 } 137 138 return NULL; 139 } 140 141 142 bool 143 acpisrat_exist(void) 144 { 145 ACPI_TABLE_HEADER *table; 146 ACPI_STATUS rv; 147 148 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 149 if (ACPI_FAILURE(rv)) 150 return false; 151 152 /* Check if header is valid */ 153 if (table == NULL) 154 return false; 155 156 if (table->Length == 0xffffffff) 157 return false; 158 159 srat = (ACPI_TABLE_SRAT *)table; 160 161 return true; 162 } 163 164 static int 165 acpisrat_parse(void) 166 { 167 ACPI_SUBTABLE_HEADER *subtable; 168 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 169 ACPI_SRAT_MEM_AFFINITY *srat_mem; 170 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 171 172 acpisrat_nodeid_t nodeid; 173 struct cpulist *cpuentry = NULL; 174 struct memlist *mementry; 175 uint32_t srat_pos; 176 bool ignore_cpu_affinity = false; 177 178 KASSERT(srat != NULL); 179 180 /* Content starts right after the header */ 181 srat_pos = sizeof(ACPI_TABLE_SRAT); 182 183 while (srat_pos < srat->Header.Length) { 184 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 185 srat_pos += subtable->Length; 186 187 switch (subtable->Type) { 188 case ACPI_SRAT_TYPE_CPU_AFFINITY: 189 if (ignore_cpu_affinity) 190 continue; 191 192 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 193 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 194 break; 195 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 196 (srat_cpu->ProximityDomainHi[1] << 16) | 197 (srat_cpu->ProximityDomainHi[0] << 8) | 198 (srat_cpu->ProximityDomainLo); 199 200 cpuentry = cpu_alloc(); 201 if (cpuentry == NULL) 202 return ENOMEM; 203 CPU_ADD(cpuentry); 204 205 cpuentry->cpu.nodeid = nodeid; 206 cpuentry->cpu.apicid = srat_cpu->ApicId; 207 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 208 cpuentry->cpu.flags = srat_cpu->Flags; 209 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 210 break; 211 212 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 213 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 214 nodeid = srat_mem->ProximityDomain; 215 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0) 216 break; 217 218 mementry = mem_alloc(); 219 if (mementry == NULL) 220 return ENOMEM; 221 MEM_ADD(mementry); 222 223 mementry->mem.nodeid = nodeid; 224 mementry->mem.baseaddress = srat_mem->BaseAddress; 225 mementry->mem.length = srat_mem->Length; 226 mementry->mem.flags = srat_mem->Flags; 227 break; 228 229 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 230 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 231 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0) 232 break; 233 nodeid = srat_x2apic->ProximityDomain; 234 235 /* This table entry overrides 236 * ACPI_SRAT_TYPE_CPU_AFFINITY. 237 */ 238 if (!ignore_cpu_affinity) { 239 struct cpulist *citer; 240 while ((citer = CPU_FIRST) != NULL) { 241 CPU_REM(citer); 242 cpu_free(citer); 243 } 244 ignore_cpu_affinity = true; 245 } 246 247 cpuentry = cpu_alloc(); 248 if (cpuentry == NULL) 249 return ENOMEM; 250 CPU_ADD(cpuentry); 251 252 cpuentry->cpu.nodeid = nodeid; 253 cpuentry->cpu.apicid = srat_x2apic->ApicId; 254 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 255 cpuentry->cpu.flags = srat_x2apic->Flags; 256 break; 257 258 case ACPI_SRAT_TYPE_RESERVED: 259 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 260 subtable->Length); 261 break; 262 } 263 } 264 265 return 0; 266 } 267 268 static int 269 acpisrat_quirks(void) 270 { 271 struct cpulist *citer; 272 struct memlist *mem, *miter; 273 274 /* Some sanity checks. */ 275 276 /* Deal with holes in the memory nodes. 277 * BIOS doesn't enlist memory nodes which 278 * don't have any memory modules plugged in. 279 * This behaviour has been observed on AMD machines. 280 * 281 * Do that by searching for CPUs in NUMA nodes 282 * which don't exist in the memory and then insert 283 * a zero memory range for the missing node. 284 */ 285 CPU_FOREACH(citer) { 286 mem = mem_get(citer->cpu.nodeid); 287 if (mem != NULL) 288 continue; 289 mem = mem_alloc(); 290 if (mem == NULL) 291 return ENOMEM; 292 mem->mem.nodeid = citer->cpu.nodeid; 293 /* all other fields are already zero filled */ 294 295 MEM_FOREACH(miter) { 296 if (miter->mem.nodeid < citer->cpu.nodeid) 297 continue; 298 MEM_ADD_BEFORE(mem, miter); 299 break; 300 } 301 } 302 303 return 0; 304 } 305 306 int 307 acpisrat_init(void) 308 { 309 if (!acpisrat_exist()) 310 return EEXIST; 311 return acpisrat_refresh(); 312 } 313 314 int 315 acpisrat_refresh(void) 316 { 317 int rc, i, j, k; 318 struct cpulist *citer; 319 struct memlist *miter; 320 uint32_t cnodes = 0, mnodes = 0; 321 322 CPU_INIT; 323 MEM_INIT; 324 325 rc = acpisrat_parse(); 326 if (rc) 327 return rc; 328 329 rc = acpisrat_quirks(); 330 if (rc) 331 return rc; 332 333 /* cleanup resources */ 334 rc = acpisrat_exit(); 335 if (rc) 336 return rc; 337 338 nnodes = 0; 339 ncpus = 0; 340 CPU_FOREACH(citer) { 341 cnodes = MAX(citer->cpu.nodeid, cnodes); 342 ncpus++; 343 } 344 345 nmems = 0; 346 MEM_FOREACH(miter) { 347 mnodes = MAX(miter->mem.nodeid, mnodes); 348 nmems++; 349 } 350 351 nnodes = MAX(cnodes, mnodes) + 1; 352 353 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 354 KM_NOSLEEP); 355 if (node_array == NULL) 356 return ENOMEM; 357 358 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 359 KM_NOSLEEP); 360 if (cpu_array == NULL) 361 return ENOMEM; 362 363 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 364 KM_NOSLEEP); 365 if (mem_array == NULL) 366 return ENOMEM; 367 368 i = 0; 369 CPU_FOREACH(citer) { 370 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 371 i++; 372 node_array[citer->cpu.nodeid].ncpus++; 373 } 374 375 i = 0; 376 MEM_FOREACH(miter) { 377 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 378 i++; 379 node_array[miter->mem.nodeid].nmems++; 380 } 381 382 for (i = 0; i < nnodes; i++) { 383 node_array[i].nodeid = i; 384 385 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 386 sizeof(struct acpisrat_cpu *), KM_NOSLEEP); 387 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 388 sizeof(struct acpisrat_mem *), KM_NOSLEEP); 389 390 k = 0; 391 for (j = 0; j < ncpus; j++) { 392 if (cpu_array[j].nodeid != i) 393 continue; 394 node_array[i].cpu[k] = &cpu_array[j]; 395 k++; 396 } 397 398 k = 0; 399 for (j = 0; j < nmems; j++) { 400 if (mem_array[j].nodeid != i) 401 continue; 402 node_array[i].mem[k] = &mem_array[j]; 403 k++; 404 } 405 } 406 407 while ((citer = CPU_FIRST) != NULL) { 408 CPU_REM(citer); 409 cpu_free(citer); 410 } 411 412 while ((miter = MEM_FIRST) != NULL) { 413 MEM_REM(miter); 414 mem_free(miter); 415 } 416 417 return 0; 418 } 419 420 421 int 422 acpisrat_exit(void) 423 { 424 int i; 425 426 if (node_array) { 427 for (i = 0; i < nnodes; i++) { 428 if (node_array[i].cpu) 429 kmem_free(node_array[i].cpu, 430 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 431 if (node_array[i].mem) 432 kmem_free(node_array[i].mem, 433 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 434 } 435 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 436 } 437 node_array = NULL; 438 439 if (cpu_array) 440 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 441 cpu_array = NULL; 442 443 if (mem_array) 444 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 445 mem_array = NULL; 446 447 nnodes = 0; 448 ncpus = 0; 449 nmems = 0; 450 451 return 0; 452 } 453 454 455 void 456 acpisrat_dump(void) 457 { 458 uint32_t i, j, nn, nc, nm; 459 struct acpisrat_cpu c; 460 struct acpisrat_mem m; 461 462 nn = acpisrat_nodes(); 463 aprint_debug("SRAT: %u NUMA nodes\n", nn); 464 for (i = 0; i < nn; i++) { 465 nc = acpisrat_node_cpus(i); 466 for (j = 0; j < nc; j++) { 467 acpisrat_cpu(i, j, &c); 468 aprint_debug("SRAT: node %u cpu %u " 469 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 470 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 471 c.clockdomain); 472 } 473 474 nm = acpisrat_node_memoryranges(i); 475 for (j = 0; j < nm; j++) { 476 acpisrat_mem(i, j, &m); 477 aprint_debug("SRAT: node %u memory range %u (0x%" 478 PRIx64" - 0x%"PRIx64" flags %u)\n", 479 m.nodeid, j, m.baseaddress, 480 m.baseaddress + m.length, m.flags); 481 } 482 } 483 } 484 485 uint32_t 486 acpisrat_nodes(void) 487 { 488 return nnodes; 489 } 490 491 uint32_t 492 acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 493 { 494 return node_array[nodeid].ncpus; 495 } 496 497 uint32_t 498 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 499 { 500 return node_array[nodeid].nmems; 501 } 502 503 void 504 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 505 struct acpisrat_cpu *c) 506 { 507 memcpy(c, node_array[nodeid].cpu[cpunum], 508 sizeof(struct acpisrat_cpu)); 509 } 510 511 void 512 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 513 struct acpisrat_mem *mem) 514 { 515 memcpy(mem, node_array[nodeid].mem[memrange], 516 sizeof(struct acpisrat_mem)); 517 } 518