1 /* $NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $ */
2
3 /*
4 * Copyright (c) 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christoph Egger.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $");
34
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41
42 #include <uvm/uvm_extern.h>
43
44 static ACPI_TABLE_SRAT *srat;
45
46 static uint32_t nnodes; /* Number of NUMA nodes */
47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
48 static uint32_t ncpus; /* Number of CPUs */
49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
50 static uint32_t nmems; /* Number of Memory ranges */
51 static struct acpisrat_mem *mem_array;
52
53 struct cpulist {
54 struct acpisrat_cpu cpu;
55 TAILQ_ENTRY(cpulist) entry;
56 };
57
58 static TAILQ_HEAD(, cpulist) cpulisthead;
59
60 #define CPU_INIT() TAILQ_INIT(&cpulisthead);
61 #define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry)
62 #define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
63 #define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry)
64 #define CPU_FIRST() TAILQ_FIRST(&cpulisthead)
65
66 struct memlist {
67 struct acpisrat_mem mem;
68 TAILQ_ENTRY(memlist) entry;
69 };
70
71 static TAILQ_HEAD(, memlist) memlisthead;
72
73 #define MEM_INIT() TAILQ_INIT(&memlisthead)
74 #define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry)
75 #define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
76 #define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry)
77 #define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry)
78 #define MEM_FIRST() TAILQ_FIRST(&memlisthead)
79
80
81 static struct cpulist *
cpu_alloc(void)82 cpu_alloc(void)
83 {
84 return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
85 }
86
87 static void
cpu_free(struct cpulist * c)88 cpu_free(struct cpulist *c)
89 {
90 kmem_free(c, sizeof(struct cpulist));
91 }
92
93 static struct memlist *
mem_alloc(void)94 mem_alloc(void)
95 {
96 return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
97 }
98
99 static void
mem_free(struct memlist * m)100 mem_free(struct memlist *m)
101 {
102 kmem_free(m, sizeof(struct memlist));
103 }
104
105 static struct memlist *
mem_get(acpisrat_nodeid_t nodeid)106 mem_get(acpisrat_nodeid_t nodeid)
107 {
108 struct memlist *tmp;
109
110 MEM_FOREACH(tmp) {
111 if (tmp->mem.nodeid == nodeid)
112 return tmp;
113 }
114
115 return NULL;
116 }
117
118 /*
119 * Returns true if ACPI SRAT table is available. If table does not exist, all
120 * functions below have undefined behaviour.
121 */
122 bool
acpisrat_exist(void)123 acpisrat_exist(void)
124 {
125 ACPI_TABLE_HEADER *table;
126 ACPI_STATUS rv;
127
128 rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
129 if (ACPI_FAILURE(rv))
130 return false;
131
132 /* Check if header is valid */
133 if (table == NULL)
134 return false;
135
136 if (table->Length == 0xffffffff)
137 return false;
138
139 srat = (ACPI_TABLE_SRAT *)table;
140
141 return true;
142 }
143
144 static int
acpisrat_parse(void)145 acpisrat_parse(void)
146 {
147 ACPI_SUBTABLE_HEADER *subtable;
148 ACPI_SRAT_CPU_AFFINITY *srat_cpu;
149 ACPI_SRAT_MEM_AFFINITY *srat_mem;
150 ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
151 ACPI_SRAT_GICC_AFFINITY *srat_gicc;
152
153 acpisrat_nodeid_t nodeid;
154 struct cpulist *cpuentry = NULL;
155 struct memlist *mementry;
156 uint32_t srat_pos;
157 bool ignore_cpu_affinity = false;
158
159 KASSERT(srat != NULL);
160
161 /* Content starts right after the header */
162 srat_pos = sizeof(ACPI_TABLE_SRAT);
163
164 while (srat_pos < srat->Header.Length) {
165 subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
166 srat_pos += subtable->Length;
167
168 switch (subtable->Type) {
169 case ACPI_SRAT_TYPE_CPU_AFFINITY:
170 if (ignore_cpu_affinity)
171 continue;
172
173 srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
174 if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
175 break;
176 nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
177 (srat_cpu->ProximityDomainHi[1] << 16) |
178 (srat_cpu->ProximityDomainHi[0] << 8) |
179 (srat_cpu->ProximityDomainLo);
180
181 cpuentry = cpu_alloc();
182 if (cpuentry == NULL)
183 return ENOMEM;
184 CPU_ADD(cpuentry);
185
186 cpuentry->cpu.nodeid = nodeid;
187 cpuentry->cpu.apicid = srat_cpu->ApicId;
188 cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
189 cpuentry->cpu.flags = srat_cpu->Flags;
190 cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
191 break;
192
193 case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
194 srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
195 nodeid = srat_mem->ProximityDomain;
196 if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
197 break;
198
199 mementry = mem_alloc();
200 if (mementry == NULL)
201 return ENOMEM;
202 MEM_ADD(mementry);
203
204 mementry->mem.nodeid = nodeid;
205 mementry->mem.baseaddress = srat_mem->BaseAddress;
206 mementry->mem.length = srat_mem->Length;
207 mementry->mem.flags = srat_mem->Flags;
208 break;
209
210 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
211 srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
212 if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
213 break;
214 nodeid = srat_x2apic->ProximityDomain;
215
216 /*
217 * This table entry overrides
218 * ACPI_SRAT_TYPE_CPU_AFFINITY.
219 */
220 if (!ignore_cpu_affinity) {
221 struct cpulist *citer;
222 while ((citer = CPU_FIRST()) != NULL) {
223 CPU_REM(citer);
224 cpu_free(citer);
225 }
226 ignore_cpu_affinity = true;
227 }
228
229 cpuentry = cpu_alloc();
230 if (cpuentry == NULL)
231 return ENOMEM;
232 CPU_ADD(cpuentry);
233
234 cpuentry->cpu.nodeid = nodeid;
235 cpuentry->cpu.apicid = srat_x2apic->ApicId;
236 cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
237 cpuentry->cpu.flags = srat_x2apic->Flags;
238 break;
239
240 case ACPI_SRAT_TYPE_GICC_AFFINITY:
241 srat_gicc = (ACPI_SRAT_GICC_AFFINITY *)subtable;
242 if ((srat_gicc->Flags & ACPI_SRAT_GICC_ENABLED) == 0)
243 break;
244 nodeid = srat_gicc->ProximityDomain;
245
246 /*
247 * This table entry overrides
248 * ACPI_SRAT_TYPE_CPU_AFFINITY.
249 */
250 if (!ignore_cpu_affinity) {
251 struct cpulist *citer;
252 while ((citer = CPU_FIRST()) != NULL) {
253 CPU_REM(citer);
254 cpu_free(citer);
255 }
256 ignore_cpu_affinity = true;
257 }
258
259 cpuentry = cpu_alloc();
260 if (cpuentry == NULL)
261 return ENOMEM;
262 CPU_ADD(cpuentry);
263
264 cpuentry->cpu.nodeid = nodeid;
265 cpuentry->cpu.apicid = srat_gicc->AcpiProcessorUid;
266 cpuentry->cpu.clockdomain = srat_gicc->ClockDomain;
267 cpuentry->cpu.flags = srat_gicc->Flags;
268 break;
269
270 case ACPI_SRAT_TYPE_RESERVED:
271 printf("ACPI SRAT subtable reserved, length: 0x%x\n",
272 subtable->Length);
273 break;
274 }
275 }
276
277 return 0;
278 }
279
280 static int
acpisrat_quirks(void)281 acpisrat_quirks(void)
282 {
283 struct cpulist *citer;
284 struct memlist *mem, *miter;
285
286 /* Some sanity checks. */
287
288 /*
289 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
290 * nodes which don't have any memory modules plugged in. This behaviour
291 * has been observed on AMD machines.
292 *
293 * Do that by searching for CPUs in NUMA nodes which don't exist in the
294 * memory and then insert a zero memory range for the missing node.
295 */
296 CPU_FOREACH(citer) {
297 mem = mem_get(citer->cpu.nodeid);
298 if (mem != NULL)
299 continue;
300 mem = mem_alloc();
301 if (mem == NULL)
302 return ENOMEM;
303 mem->mem.nodeid = citer->cpu.nodeid;
304 /* all other fields are already zero filled */
305
306 MEM_FOREACH(miter) {
307 if (miter->mem.nodeid < citer->cpu.nodeid)
308 continue;
309 MEM_ADD_BEFORE(mem, miter);
310 break;
311 }
312 }
313
314 return 0;
315 }
316
317 /*
318 * Initializes parser. Must be the first function being called when table is
319 * available.
320 */
321 int
acpisrat_init(void)322 acpisrat_init(void)
323 {
324 if (!acpisrat_exist())
325 return EEXIST;
326 return acpisrat_refresh();
327 }
328
329 /*
330 * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
331 */
332 int
acpisrat_refresh(void)333 acpisrat_refresh(void)
334 {
335 int rc, i, j, k;
336 struct cpulist *citer;
337 struct memlist *miter;
338 uint32_t cnodes = 0, mnodes = 0;
339
340 CPU_INIT();
341 MEM_INIT();
342
343 rc = acpisrat_parse();
344 if (rc)
345 return rc;
346
347 rc = acpisrat_quirks();
348 if (rc)
349 return rc;
350
351 /* cleanup resources */
352 rc = acpisrat_exit();
353 if (rc)
354 return rc;
355
356 ncpus = 0;
357 CPU_FOREACH(citer) {
358 cnodes = MAX(citer->cpu.nodeid, cnodes);
359 ncpus++;
360 }
361
362 nmems = 0;
363 MEM_FOREACH(miter) {
364 mnodes = MAX(miter->mem.nodeid, mnodes);
365 nmems++;
366 }
367
368 nnodes = MAX(cnodes, mnodes) + 1;
369
370 if (nnodes == 0 || nmems == 0 || ncpus == 0) {
371 rc = ENOENT;
372 goto fail;
373 }
374
375 node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
376 KM_SLEEP);
377 cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
378 KM_SLEEP);
379 mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
380 KM_SLEEP);
381
382 i = 0;
383 CPU_FOREACH(citer) {
384 memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
385 i++;
386 node_array[citer->cpu.nodeid].ncpus++;
387 }
388
389 i = 0;
390 MEM_FOREACH(miter) {
391 memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
392 i++;
393 node_array[miter->mem.nodeid].nmems++;
394 }
395
396 for (i = 0; i < nnodes; i++) {
397 node_array[i].nodeid = i;
398
399 if (node_array[i].ncpus != 0) {
400 node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
401 sizeof(struct acpisrat_cpu *), KM_SLEEP);
402 }
403 if (node_array[i].nmems != 0) {
404 node_array[i].mem = kmem_zalloc(node_array[i].nmems *
405 sizeof(struct acpisrat_mem *), KM_SLEEP);
406 }
407
408 k = 0;
409 for (j = 0; j < ncpus; j++) {
410 if (cpu_array[j].nodeid != i)
411 continue;
412 KASSERT(node_array[i].cpu != NULL);
413 node_array[i].cpu[k] = &cpu_array[j];
414 k++;
415 }
416
417 k = 0;
418 for (j = 0; j < nmems; j++) {
419 if (mem_array[j].nodeid != i)
420 continue;
421 KASSERT(node_array[i].mem != NULL);
422 node_array[i].mem[k] = &mem_array[j];
423 k++;
424 }
425 }
426
427 fail:
428 while ((citer = CPU_FIRST()) != NULL) {
429 CPU_REM(citer);
430 cpu_free(citer);
431 }
432
433 while ((miter = MEM_FIRST()) != NULL) {
434 MEM_REM(miter);
435 mem_free(miter);
436 }
437
438 return rc;
439 }
440
441 /*
442 * Free allocated memory. Should be called when acpisrat is no longer of any
443 * use.
444 */
445 int
acpisrat_exit(void)446 acpisrat_exit(void)
447 {
448 int i;
449
450 if (node_array) {
451 for (i = 0; i < nnodes; i++) {
452 if (node_array[i].cpu)
453 kmem_free(node_array[i].cpu,
454 node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
455 if (node_array[i].mem)
456 kmem_free(node_array[i].mem,
457 node_array[i].nmems * sizeof(struct acpisrat_mem *));
458 }
459 kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
460 }
461 node_array = NULL;
462
463 if (cpu_array)
464 kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
465 cpu_array = NULL;
466
467 if (mem_array)
468 kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
469 mem_array = NULL;
470
471 nnodes = 0;
472 ncpus = 0;
473 nmems = 0;
474
475 return 0;
476 }
477
478 void
acpisrat_dump(void)479 acpisrat_dump(void)
480 {
481 uint32_t i, j, nn, nc, nm;
482 struct acpisrat_cpu c;
483 struct acpisrat_mem m;
484
485 nn = acpisrat_nodes();
486 aprint_debug("SRAT: %u NUMA nodes\n", nn);
487 for (i = 0; i < nn; i++) {
488 nc = acpisrat_node_cpus(i);
489 for (j = 0; j < nc; j++) {
490 acpisrat_cpu(i, j, &c);
491 aprint_debug("SRAT: node %u cpu %u "
492 "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
493 c.nodeid, j, c.apicid, c.sapiceid, c.flags,
494 c.clockdomain);
495 }
496
497 nm = acpisrat_node_memoryranges(i);
498 for (j = 0; j < nm; j++) {
499 acpisrat_mem(i, j, &m);
500 aprint_debug("SRAT: node %u memory range %u (0x%"
501 PRIx64" - 0x%"PRIx64" flags %u)\n",
502 m.nodeid, j, m.baseaddress,
503 m.baseaddress + m.length, m.flags);
504 }
505 }
506 }
507
508 void
acpisrat_load_uvm(void)509 acpisrat_load_uvm(void)
510 {
511 uint32_t i, j, nn, nm;
512 struct acpisrat_mem m;
513
514 nn = acpisrat_nodes();
515 aprint_debug("SRAT: %u NUMA nodes\n", nn);
516 for (i = 0; i < nn; i++) {
517 nm = acpisrat_node_memoryranges(i);
518 for (j = 0; j < nm; j++) {
519 acpisrat_mem(i, j, &m);
520 aprint_debug("SRAT: node %u memory range %u (0x%"
521 PRIx64" - 0x%"PRIx64" flags %u)\n",
522 m.nodeid, j, m.baseaddress,
523 m.baseaddress + m.length, m.flags);
524 uvm_page_numa_load(trunc_page(m.baseaddress),
525 trunc_page(m.length), m.nodeid);
526 }
527 }
528 }
529
530 /*
531 * Get number of NUMA nodes.
532 */
533 uint32_t
acpisrat_nodes(void)534 acpisrat_nodes(void)
535 {
536 return nnodes;
537 }
538
539 /*
540 * Get number of cpus in the node. 0 means, this is a cpu-less node.
541 */
542 uint32_t
acpisrat_node_cpus(acpisrat_nodeid_t nodeid)543 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
544 {
545 return node_array[nodeid].ncpus;
546 }
547
548 /*
549 * Get number of memory ranges in the node 0 means, this node has no RAM.
550 */
551 uint32_t
acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)552 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
553 {
554 return node_array[nodeid].nmems;
555 }
556
557 void
acpisrat_cpu(acpisrat_nodeid_t nodeid,uint32_t cpunum,struct acpisrat_cpu * c)558 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
559 struct acpisrat_cpu *c)
560 {
561 memcpy(c, node_array[nodeid].cpu[cpunum],
562 sizeof(struct acpisrat_cpu));
563 }
564
565 void
acpisrat_mem(acpisrat_nodeid_t nodeid,uint32_t memrange,struct acpisrat_mem * mem)566 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
567 struct acpisrat_mem *mem)
568 {
569 memcpy(mem, node_array[nodeid].mem[memrange],
570 sizeof(struct acpisrat_mem));
571 }
572
573 /*
574 * Get a node from an APIC id (belonging to a cpu).
575 */
576 struct acpisrat_node *
acpisrat_get_node(uint32_t apicid)577 acpisrat_get_node(uint32_t apicid)
578 {
579 struct acpisrat_node *node;
580 struct acpisrat_cpu *cpu;
581 size_t i, n;
582
583 for (i = 0; i < nnodes; i++) {
584 node = &node_array[i];
585
586 for (n = 0; n < node->ncpus; n++) {
587 cpu = node->cpu[n];
588 if (cpu->apicid == apicid) {
589 return node;
590 }
591 }
592 }
593
594 return NULL;
595 }
596