1 /* $NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $ */ 2 3 /* 4 * Copyright (c) 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Christoph Egger. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 38 #include <sys/kmem.h> 39 40 #include <dev/acpi/acpica.h> 41 #include <dev/acpi/acpivar.h> 42 #include <dev/acpi/acpi_srat.h> 43 44 static ACPI_TABLE_SRAT *srat; 45 46 struct acpisrat_node { 47 acpisrat_nodeid_t nodeid; 48 uint32_t ncpus; /* Number of cpus in this node */ 49 struct acpisrat_cpu **cpu; /* Array of cpus */ 50 uint32_t nmems; /* Number of memory ranges in this node */ 51 struct acpisrat_mem **mem; /* Array of memory ranges */ 52 }; 53 54 static uint32_t nnodes; /* Number of NUMA nodes */ 55 static struct acpisrat_node *node_array; /* Array of NUMA nodes */ 56 static uint32_t ncpus; /* Number of CPUs */ 57 static struct acpisrat_cpu *cpu_array; /* Array of cpus */ 58 static uint32_t nmems; /* Number of Memory ranges */ 59 static struct acpisrat_mem *mem_array; 60 61 62 struct cpulist { 63 struct acpisrat_cpu cpu; 64 TAILQ_ENTRY(cpulist) entry; 65 }; 66 67 static TAILQ_HEAD(, cpulist) cpulisthead; 68 69 #define CPU_INIT TAILQ_INIT(&cpulisthead); 70 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) 71 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) 72 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) 73 #define CPU_FIRST TAILQ_FIRST(&cpulisthead) 74 75 76 struct memlist { 77 struct acpisrat_mem mem; 78 TAILQ_ENTRY(memlist) entry; 79 }; 80 81 static TAILQ_HEAD(, memlist) memlisthead; 82 83 #define MEM_INIT TAILQ_INIT(&memlisthead) 84 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) 85 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) 86 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) 87 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) 88 #define MEM_FIRST TAILQ_FIRST(&memlisthead) 89 90 91 static struct cpulist * 92 cpu_alloc(void) 93 { 94 return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP); 95 } 96 97 static void 98 cpu_free(struct cpulist *c) 99 { 100 kmem_free(c, sizeof(struct cpulist)); 101 } 102 103 #if 0 104 static struct cpulist * 105 cpu_get(acpisrat_nodeid_t nodeid) 106 { 107 struct cpulist *tmp; 108 109 CPU_FOREACH(tmp) { 110 if (tmp->cpu.nodeid == nodeid) 111 return tmp; 112 } 113 114 return NULL; 115 } 116 #endif 117 118 static struct memlist * 119 mem_alloc(void) 120 { 121 return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP); 122 } 123 124 static void 125 mem_free(struct memlist *m) 126 { 127 kmem_free(m, sizeof(struct memlist)); 128 } 129 130 static struct memlist * 131 mem_get(acpisrat_nodeid_t nodeid) 132 { 133 struct memlist *tmp; 134 135 MEM_FOREACH(tmp) { 136 if (tmp->mem.nodeid == nodeid) 137 return tmp; 138 } 139 140 return NULL; 141 } 142 143 144 bool 145 acpisrat_exist(void) 146 { 147 ACPI_TABLE_HEADER *table; 148 ACPI_STATUS rv; 149 150 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); 151 if (ACPI_FAILURE(rv)) 152 return false; 153 154 /* Check if header is valid */ 155 if (table == NULL) 156 return false; 157 158 if (table->Length == 0xffffffff) 159 return false; 160 161 srat = (ACPI_TABLE_SRAT *)table; 162 163 return true; 164 } 165 166 static int 167 acpisrat_parse(void) 168 { 169 ACPI_SUBTABLE_HEADER *subtable; 170 ACPI_SRAT_CPU_AFFINITY *srat_cpu; 171 ACPI_SRAT_MEM_AFFINITY *srat_mem; 172 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; 173 174 acpisrat_nodeid_t nodeid; 175 struct cpulist *cpuentry = NULL; 176 struct memlist *mementry; 177 uint32_t srat_pos; 178 bool ignore_cpu_affinity = false; 179 180 KASSERT(srat != NULL); 181 182 /* Content starts right after the header */ 183 srat_pos = sizeof(ACPI_TABLE_SRAT); 184 185 while (srat_pos < srat->Header.Length) { 186 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); 187 srat_pos += subtable->Length; 188 189 switch (subtable->Type) { 190 case ACPI_SRAT_TYPE_CPU_AFFINITY: 191 if (ignore_cpu_affinity) 192 continue; 193 194 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; 195 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | 196 (srat_cpu->ProximityDomainHi[1] << 16) | 197 (srat_cpu->ProximityDomainHi[0] << 8) | 198 (srat_cpu->ProximityDomainLo); 199 200 cpuentry = cpu_alloc(); 201 if (cpuentry == NULL) 202 return ENOMEM; 203 CPU_ADD(cpuentry); 204 205 cpuentry->cpu.nodeid = nodeid; 206 cpuentry->cpu.apicid = srat_cpu->ApicId; 207 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; 208 cpuentry->cpu.flags = srat_cpu->Flags; 209 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; 210 break; 211 212 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 213 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; 214 nodeid = srat_mem->ProximityDomain; 215 216 mementry = mem_alloc(); 217 if (mementry == NULL) 218 return ENOMEM; 219 MEM_ADD(mementry); 220 221 mementry->mem.nodeid = nodeid; 222 mementry->mem.baseaddress = srat_mem->BaseAddress; 223 mementry->mem.length = srat_mem->Length; 224 mementry->mem.flags = srat_mem->Flags; 225 break; 226 227 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 228 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; 229 nodeid = srat_x2apic->ProximityDomain; 230 231 /* This table entry overrides 232 * ACPI_SRAT_TYPE_CPU_AFFINITY. 233 */ 234 if (!ignore_cpu_affinity) { 235 struct cpulist *citer; 236 while ((citer = CPU_FIRST) != NULL) { 237 CPU_REM(citer); 238 cpu_free(citer); 239 } 240 ignore_cpu_affinity = true; 241 } 242 243 cpuentry = cpu_alloc(); 244 if (cpuentry == NULL) 245 return ENOMEM; 246 CPU_ADD(cpuentry); 247 248 cpuentry->cpu.nodeid = nodeid; 249 cpuentry->cpu.apicid = srat_x2apic->ApicId; 250 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; 251 cpuentry->cpu.flags = srat_x2apic->Flags; 252 break; 253 254 case ACPI_SRAT_TYPE_RESERVED: 255 printf("ACPI SRAT subtable reserved, length: 0x%x\n", 256 subtable->Length); 257 break; 258 } 259 } 260 261 return 0; 262 } 263 264 static int 265 acpisrat_quirks(void) 266 { 267 struct cpulist *citer; 268 struct memlist *mem, *miter; 269 270 /* Some sanity checks. */ 271 272 /* Deal with holes in the memory nodes. 273 * BIOS doesn't enlist memory nodes which 274 * don't have any memory modules plugged in. 275 * This behaviour has been observed on AMD machines. 276 * 277 * Do that by searching for CPUs in NUMA nodes 278 * which don't exist in the memory and then insert 279 * a zero memory range for the missing node. 280 */ 281 CPU_FOREACH(citer) { 282 mem = mem_get(citer->cpu.nodeid); 283 if (mem != NULL) 284 continue; 285 mem = mem_alloc(); 286 if (mem == NULL) 287 return ENOMEM; 288 mem->mem.nodeid = citer->cpu.nodeid; 289 /* all other fields are already zero filled */ 290 291 MEM_FOREACH(miter) { 292 if (miter->mem.nodeid < citer->cpu.nodeid) 293 continue; 294 MEM_ADD_BEFORE(mem, miter); 295 break; 296 } 297 } 298 299 return 0; 300 } 301 302 int 303 acpisrat_init(void) 304 { 305 if (!acpisrat_exist()) 306 return EEXIST; 307 return acpisrat_refresh(); 308 } 309 310 int 311 acpisrat_refresh(void) 312 { 313 int rc, i, j, k; 314 struct cpulist *citer; 315 struct memlist *miter; 316 uint32_t cnodes = 0, mnodes = 0; 317 318 CPU_INIT; 319 MEM_INIT; 320 321 rc = acpisrat_parse(); 322 if (rc) 323 return rc; 324 325 rc = acpisrat_quirks(); 326 if (rc) 327 return rc; 328 329 /* cleanup resources */ 330 rc = acpisrat_exit(); 331 if (rc) 332 return rc; 333 334 nnodes = 0; 335 ncpus = 0; 336 CPU_FOREACH(citer) { 337 cnodes = MAX(citer->cpu.nodeid, cnodes); 338 ncpus++; 339 } 340 341 nmems = 0; 342 MEM_FOREACH(miter) { 343 mnodes = MAX(miter->mem.nodeid, mnodes); 344 nmems++; 345 } 346 347 nnodes = MAX(cnodes, mnodes) + 1; 348 349 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), 350 KM_NOSLEEP); 351 if (node_array == NULL) 352 return ENOMEM; 353 354 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), 355 KM_NOSLEEP); 356 if (cpu_array == NULL) 357 return ENOMEM; 358 359 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), 360 KM_NOSLEEP); 361 if (mem_array == NULL) 362 return ENOMEM; 363 364 i = 0; 365 CPU_FOREACH(citer) { 366 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); 367 i++; 368 node_array[citer->cpu.nodeid].ncpus++; 369 } 370 371 i = 0; 372 MEM_FOREACH(miter) { 373 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); 374 i++; 375 node_array[miter->mem.nodeid].nmems++; 376 } 377 378 for (i = 0; i < nnodes; i++) { 379 node_array[i].nodeid = i; 380 381 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * 382 sizeof(struct acpisrat_cpu *), KM_NOSLEEP); 383 node_array[i].mem = kmem_zalloc(node_array[i].nmems * 384 sizeof(struct acpisrat_mem *), KM_NOSLEEP); 385 386 k = 0; 387 for (j = 0; j < ncpus; j++) { 388 if (cpu_array[j].nodeid != i) 389 continue; 390 node_array[i].cpu[k] = &cpu_array[j]; 391 k++; 392 } 393 394 k = 0; 395 for (j = 0; j < nmems; j++) { 396 if (mem_array[j].nodeid != i) 397 continue; 398 node_array[i].mem[k] = &mem_array[j]; 399 k++; 400 } 401 } 402 403 while ((citer = CPU_FIRST) != NULL) { 404 CPU_REM(citer); 405 cpu_free(citer); 406 } 407 408 while ((miter = MEM_FIRST) != NULL) { 409 MEM_REM(miter); 410 mem_free(miter); 411 } 412 413 return 0; 414 } 415 416 417 int 418 acpisrat_exit(void) 419 { 420 int i; 421 422 if (node_array) { 423 for (i = 0; i < nnodes; i++) { 424 if (node_array[i].cpu) 425 kmem_free(node_array[i].cpu, 426 node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); 427 if (node_array[i].mem) 428 kmem_free(node_array[i].mem, 429 node_array[i].nmems * sizeof(struct acpisrat_mem *)); 430 } 431 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); 432 } 433 node_array = NULL; 434 435 if (cpu_array) 436 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); 437 cpu_array = NULL; 438 439 if (mem_array) 440 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); 441 mem_array = NULL; 442 443 nnodes = 0; 444 ncpus = 0; 445 nmems = 0; 446 447 return 0; 448 } 449 450 451 void 452 acpisrat_dump(void) 453 { 454 uint32_t i, j, nn, nc, nm; 455 struct acpisrat_cpu c; 456 struct acpisrat_mem m; 457 458 nn = acpisrat_nodes(); 459 aprint_debug("SRAT: %u NUMA nodes\n", nn); 460 for (i = 0; i < nn; i++) { 461 nc = acpisrat_node_cpus(i); 462 for (j = 0; j < nc; j++) { 463 acpisrat_cpu(i, j, &c); 464 aprint_debug("SRAT: node %u cpu %u " 465 "(apic %u, sapic %u, flags %u, clockdomain %u)\n", 466 c.nodeid, j, c.apicid, c.sapiceid, c.flags, 467 c.clockdomain); 468 } 469 470 nm = acpisrat_node_memoryranges(i); 471 for (j = 0; j < nm; j++) { 472 acpisrat_mem(i, j, &m); 473 aprint_debug("SRAT: node %u memory range %u (0x%" 474 PRIx64" - 0x%"PRIx64" flags %u)\n", 475 m.nodeid, j, m.baseaddress, 476 m.baseaddress + m.length, m.flags); 477 } 478 } 479 } 480 481 uint32_t 482 acpisrat_nodes(void) 483 { 484 return nnodes; 485 } 486 487 uint32_t 488 acpisrat_node_cpus(acpisrat_nodeid_t nodeid) 489 { 490 return node_array[nodeid].ncpus; 491 } 492 493 uint32_t 494 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) 495 { 496 return node_array[nodeid].nmems; 497 } 498 499 void 500 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, 501 struct acpisrat_cpu *c) 502 { 503 memcpy(c, node_array[nodeid].cpu[cpunum], 504 sizeof(struct acpisrat_cpu)); 505 } 506 507 void 508 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, 509 struct acpisrat_mem *mem) 510 { 511 memcpy(mem, node_array[nodeid].mem[memrange], 512 sizeof(struct acpisrat_mem)); 513 } 514