xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision 86b783e91d75793e0fdeff7dd5277dd2d70dec09)
1 /* $NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.9 2024/06/30 17:54:08 jmcneill Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41 
42 #include <uvm/uvm_extern.h>
43 
44 static ACPI_TABLE_SRAT *srat;
45 
46 static uint32_t nnodes; /* Number of NUMA nodes */
47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
48 static uint32_t ncpus; /* Number of CPUs */
49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
50 static uint32_t nmems; /* Number of Memory ranges */
51 static struct acpisrat_mem *mem_array;
52 
53 struct cpulist {
54 	struct acpisrat_cpu cpu;
55 	TAILQ_ENTRY(cpulist) entry;
56 };
57 
58 static TAILQ_HEAD(, cpulist) cpulisthead;
59 
60 #define CPU_INIT()		TAILQ_INIT(&cpulisthead);
61 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
62 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
63 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
64 #define CPU_FIRST()		TAILQ_FIRST(&cpulisthead)
65 
66 struct memlist {
67 	struct acpisrat_mem mem;
68 	TAILQ_ENTRY(memlist) entry;
69 };
70 
71 static TAILQ_HEAD(, memlist) memlisthead;
72 
73 #define MEM_INIT()		TAILQ_INIT(&memlisthead)
74 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
75 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
76 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
77 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
78 #define MEM_FIRST()		TAILQ_FIRST(&memlisthead)
79 
80 
81 static struct cpulist *
cpu_alloc(void)82 cpu_alloc(void)
83 {
84 	return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
85 }
86 
87 static void
cpu_free(struct cpulist * c)88 cpu_free(struct cpulist *c)
89 {
90 	kmem_free(c, sizeof(struct cpulist));
91 }
92 
93 static struct memlist *
mem_alloc(void)94 mem_alloc(void)
95 {
96 	return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
97 }
98 
99 static void
mem_free(struct memlist * m)100 mem_free(struct memlist *m)
101 {
102 	kmem_free(m, sizeof(struct memlist));
103 }
104 
105 static struct memlist *
mem_get(acpisrat_nodeid_t nodeid)106 mem_get(acpisrat_nodeid_t nodeid)
107 {
108 	struct memlist *tmp;
109 
110 	MEM_FOREACH(tmp) {
111 		if (tmp->mem.nodeid == nodeid)
112 			return tmp;
113 	}
114 
115 	return NULL;
116 }
117 
118 /*
119  * Returns true if ACPI SRAT table is available. If table does not exist, all
120  * functions below have undefined behaviour.
121  */
122 bool
acpisrat_exist(void)123 acpisrat_exist(void)
124 {
125 	ACPI_TABLE_HEADER *table;
126 	ACPI_STATUS rv;
127 
128 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
129 	if (ACPI_FAILURE(rv))
130 		return false;
131 
132 	/* Check if header is valid */
133 	if (table == NULL)
134 		return false;
135 
136 	if (table->Length == 0xffffffff)
137 		return false;
138 
139 	srat = (ACPI_TABLE_SRAT *)table;
140 
141 	return true;
142 }
143 
144 static int
acpisrat_parse(void)145 acpisrat_parse(void)
146 {
147 	ACPI_SUBTABLE_HEADER *subtable;
148 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
149 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
150 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
151 	ACPI_SRAT_GICC_AFFINITY *srat_gicc;
152 
153 	acpisrat_nodeid_t nodeid;
154 	struct cpulist *cpuentry = NULL;
155 	struct memlist *mementry;
156 	uint32_t srat_pos;
157 	bool ignore_cpu_affinity = false;
158 
159 	KASSERT(srat != NULL);
160 
161 	/* Content starts right after the header */
162 	srat_pos = sizeof(ACPI_TABLE_SRAT);
163 
164 	while (srat_pos < srat->Header.Length) {
165 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
166 		srat_pos += subtable->Length;
167 
168 		switch (subtable->Type) {
169 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
170 			if (ignore_cpu_affinity)
171 				continue;
172 
173 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
174 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
175 				break;
176 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
177 			    (srat_cpu->ProximityDomainHi[1] << 16) |
178 			    (srat_cpu->ProximityDomainHi[0] << 8) |
179 			    (srat_cpu->ProximityDomainLo);
180 
181 			cpuentry = cpu_alloc();
182 			if (cpuentry == NULL)
183 				return ENOMEM;
184 			CPU_ADD(cpuentry);
185 
186 			cpuentry->cpu.nodeid = nodeid;
187 			cpuentry->cpu.apicid = srat_cpu->ApicId;
188 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
189 			cpuentry->cpu.flags = srat_cpu->Flags;
190 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
191 			break;
192 
193 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
194 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
195 			nodeid = srat_mem->ProximityDomain;
196 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
197 				break;
198 
199 			mementry = mem_alloc();
200 			if (mementry == NULL)
201 				return ENOMEM;
202 			MEM_ADD(mementry);
203 
204 			mementry->mem.nodeid = nodeid;
205 			mementry->mem.baseaddress = srat_mem->BaseAddress;
206 			mementry->mem.length = srat_mem->Length;
207 			mementry->mem.flags = srat_mem->Flags;
208 			break;
209 
210 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
211 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
212 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
213 				break;
214 			nodeid = srat_x2apic->ProximityDomain;
215 
216 			/*
217 			 * This table entry overrides
218 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
219 			 */
220 			if (!ignore_cpu_affinity) {
221 				struct cpulist *citer;
222 				while ((citer = CPU_FIRST()) != NULL) {
223 					CPU_REM(citer);
224 					cpu_free(citer);
225 				}
226 				ignore_cpu_affinity = true;
227 			}
228 
229 			cpuentry = cpu_alloc();
230 			if (cpuentry == NULL)
231 				return ENOMEM;
232 			CPU_ADD(cpuentry);
233 
234 			cpuentry->cpu.nodeid = nodeid;
235 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
236 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
237 			cpuentry->cpu.flags = srat_x2apic->Flags;
238 			break;
239 
240 		case ACPI_SRAT_TYPE_GICC_AFFINITY:
241 			srat_gicc = (ACPI_SRAT_GICC_AFFINITY *)subtable;
242 			if ((srat_gicc->Flags & ACPI_SRAT_GICC_ENABLED) == 0)
243 				break;
244 			nodeid = srat_gicc->ProximityDomain;
245 
246 			/*
247 			 * This table entry overrides
248 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
249 			 */
250 			if (!ignore_cpu_affinity) {
251 				struct cpulist *citer;
252 				while ((citer = CPU_FIRST()) != NULL) {
253 					CPU_REM(citer);
254 					cpu_free(citer);
255 				}
256 				ignore_cpu_affinity = true;
257 			}
258 
259 			cpuentry = cpu_alloc();
260 			if (cpuentry == NULL)
261 				return ENOMEM;
262 			CPU_ADD(cpuentry);
263 
264 			cpuentry->cpu.nodeid = nodeid;
265 			cpuentry->cpu.apicid = srat_gicc->AcpiProcessorUid;
266 			cpuentry->cpu.clockdomain = srat_gicc->ClockDomain;
267 			cpuentry->cpu.flags = srat_gicc->Flags;
268 			break;
269 
270 		case ACPI_SRAT_TYPE_RESERVED:
271 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
272 				subtable->Length);
273 			break;
274 		}
275 	}
276 
277 	return 0;
278 }
279 
280 static int
acpisrat_quirks(void)281 acpisrat_quirks(void)
282 {
283 	struct cpulist *citer;
284 	struct memlist *mem, *miter;
285 
286 	/* Some sanity checks. */
287 
288 	/*
289 	 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
290 	 * nodes which don't have any memory modules plugged in. This behaviour
291 	 * has been observed on AMD machines.
292 	 *
293 	 * Do that by searching for CPUs in NUMA nodes which don't exist in the
294 	 * memory and then insert a zero memory range for the missing node.
295 	 */
296 	CPU_FOREACH(citer) {
297 		mem = mem_get(citer->cpu.nodeid);
298 		if (mem != NULL)
299 			continue;
300 		mem = mem_alloc();
301 		if (mem == NULL)
302 			return ENOMEM;
303 		mem->mem.nodeid = citer->cpu.nodeid;
304 		/* all other fields are already zero filled */
305 
306 		MEM_FOREACH(miter) {
307 			if (miter->mem.nodeid < citer->cpu.nodeid)
308 				continue;
309 			MEM_ADD_BEFORE(mem, miter);
310 			break;
311 		}
312 	}
313 
314 	return 0;
315 }
316 
317 /*
318  * Initializes parser. Must be the first function being called when table is
319  * available.
320  */
321 int
acpisrat_init(void)322 acpisrat_init(void)
323 {
324 	if (!acpisrat_exist())
325 		return EEXIST;
326 	return acpisrat_refresh();
327 }
328 
329 /*
330  * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
331  */
332 int
acpisrat_refresh(void)333 acpisrat_refresh(void)
334 {
335 	int rc, i, j, k;
336 	struct cpulist *citer;
337 	struct memlist *miter;
338 	uint32_t cnodes = 0, mnodes = 0;
339 
340 	CPU_INIT();
341 	MEM_INIT();
342 
343 	rc = acpisrat_parse();
344 	if (rc)
345 		return rc;
346 
347 	rc = acpisrat_quirks();
348 	if (rc)
349 		return rc;
350 
351 	/* cleanup resources */
352 	rc = acpisrat_exit();
353 	if (rc)
354 		return rc;
355 
356 	ncpus = 0;
357 	CPU_FOREACH(citer) {
358 		cnodes = MAX(citer->cpu.nodeid, cnodes);
359 		ncpus++;
360 	}
361 
362 	nmems = 0;
363 	MEM_FOREACH(miter) {
364 		mnodes = MAX(miter->mem.nodeid, mnodes);
365 		nmems++;
366 	}
367 
368 	nnodes = MAX(cnodes, mnodes) + 1;
369 
370 	if (nnodes == 0 || nmems == 0 || ncpus == 0) {
371 		rc = ENOENT;
372 		goto fail;
373 	}
374 
375 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
376 	    KM_SLEEP);
377 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
378 	    KM_SLEEP);
379 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
380 	    KM_SLEEP);
381 
382 	i = 0;
383 	CPU_FOREACH(citer) {
384 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
385 		i++;
386 		node_array[citer->cpu.nodeid].ncpus++;
387 	}
388 
389 	i = 0;
390 	MEM_FOREACH(miter) {
391 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
392 		i++;
393 		node_array[miter->mem.nodeid].nmems++;
394 	}
395 
396 	for (i = 0; i < nnodes; i++) {
397 		node_array[i].nodeid = i;
398 
399 		if (node_array[i].ncpus != 0) {
400 			node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
401 			    sizeof(struct acpisrat_cpu *), KM_SLEEP);
402 		}
403 		if (node_array[i].nmems != 0) {
404 			node_array[i].mem = kmem_zalloc(node_array[i].nmems *
405 			    sizeof(struct acpisrat_mem *), KM_SLEEP);
406 		}
407 
408 		k = 0;
409 		for (j = 0; j < ncpus; j++) {
410 			if (cpu_array[j].nodeid != i)
411 				continue;
412 			KASSERT(node_array[i].cpu != NULL);
413 			node_array[i].cpu[k] = &cpu_array[j];
414 			k++;
415 		}
416 
417 		k = 0;
418 		for (j = 0; j < nmems; j++) {
419 			if (mem_array[j].nodeid != i)
420 				continue;
421 			KASSERT(node_array[i].mem != NULL);
422 			node_array[i].mem[k] = &mem_array[j];
423 			k++;
424 		}
425 	}
426 
427  fail:
428 	while ((citer = CPU_FIRST()) != NULL) {
429 		CPU_REM(citer);
430 		cpu_free(citer);
431 	}
432 
433 	while ((miter = MEM_FIRST()) != NULL) {
434 		MEM_REM(miter);
435 		mem_free(miter);
436 	}
437 
438 	return rc;
439 }
440 
441 /*
442  * Free allocated memory. Should be called when acpisrat is no longer of any
443  * use.
444  */
445 int
acpisrat_exit(void)446 acpisrat_exit(void)
447 {
448 	int i;
449 
450 	if (node_array) {
451 		for (i = 0; i < nnodes; i++) {
452 			if (node_array[i].cpu)
453 				kmem_free(node_array[i].cpu,
454 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
455 			if (node_array[i].mem)
456 				kmem_free(node_array[i].mem,
457 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
458 		}
459 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
460 	}
461 	node_array = NULL;
462 
463 	if (cpu_array)
464 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
465 	cpu_array = NULL;
466 
467 	if (mem_array)
468 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
469 	mem_array = NULL;
470 
471 	nnodes = 0;
472 	ncpus = 0;
473 	nmems = 0;
474 
475 	return 0;
476 }
477 
478 void
acpisrat_dump(void)479 acpisrat_dump(void)
480 {
481 	uint32_t i, j, nn, nc, nm;
482 	struct acpisrat_cpu c;
483 	struct acpisrat_mem m;
484 
485 	nn = acpisrat_nodes();
486 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
487 	for (i = 0; i < nn; i++) {
488 		nc = acpisrat_node_cpus(i);
489 		for (j = 0; j < nc; j++) {
490 			acpisrat_cpu(i, j, &c);
491 			aprint_debug("SRAT: node %u cpu %u "
492 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
493 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
494 			    c.clockdomain);
495 		}
496 
497 		nm = acpisrat_node_memoryranges(i);
498 		for (j = 0; j < nm; j++) {
499 			acpisrat_mem(i, j, &m);
500 			aprint_debug("SRAT: node %u memory range %u (0x%"
501 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
502 			    m.nodeid, j, m.baseaddress,
503 			    m.baseaddress + m.length, m.flags);
504 		}
505 	}
506 }
507 
508 void
acpisrat_load_uvm(void)509 acpisrat_load_uvm(void)
510 {
511 	uint32_t i, j, nn, nm;
512 	struct acpisrat_mem m;
513 
514 	nn = acpisrat_nodes();
515 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
516 	for (i = 0; i < nn; i++) {
517 		nm = acpisrat_node_memoryranges(i);
518 		for (j = 0; j < nm; j++) {
519 			acpisrat_mem(i, j, &m);
520 			aprint_debug("SRAT: node %u memory range %u (0x%"
521 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
522 			    m.nodeid, j, m.baseaddress,
523 			    m.baseaddress + m.length, m.flags);
524 			uvm_page_numa_load(trunc_page(m.baseaddress),
525 			    trunc_page(m.length), m.nodeid);
526 		}
527 	}
528 }
529 
530 /*
531  * Get number of NUMA nodes.
532  */
533 uint32_t
acpisrat_nodes(void)534 acpisrat_nodes(void)
535 {
536 	return nnodes;
537 }
538 
539 /*
540  * Get number of cpus in the node. 0 means, this is a cpu-less node.
541  */
542 uint32_t
acpisrat_node_cpus(acpisrat_nodeid_t nodeid)543 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
544 {
545 	return node_array[nodeid].ncpus;
546 }
547 
548 /*
549  * Get number of memory ranges in the node 0 means, this node has no RAM.
550  */
551 uint32_t
acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)552 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
553 {
554 	return node_array[nodeid].nmems;
555 }
556 
557 void
acpisrat_cpu(acpisrat_nodeid_t nodeid,uint32_t cpunum,struct acpisrat_cpu * c)558 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
559     struct acpisrat_cpu *c)
560 {
561 	memcpy(c, node_array[nodeid].cpu[cpunum],
562 	    sizeof(struct acpisrat_cpu));
563 }
564 
565 void
acpisrat_mem(acpisrat_nodeid_t nodeid,uint32_t memrange,struct acpisrat_mem * mem)566 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
567     struct acpisrat_mem *mem)
568 {
569 	memcpy(mem, node_array[nodeid].mem[memrange],
570 	    sizeof(struct acpisrat_mem));
571 }
572 
573 /*
574  * Get a node from an APIC id (belonging to a cpu).
575  */
576 struct acpisrat_node *
acpisrat_get_node(uint32_t apicid)577 acpisrat_get_node(uint32_t apicid)
578 {
579 	struct acpisrat_node *node;
580 	struct acpisrat_cpu *cpu;
581 	size_t i, n;
582 
583 	for (i = 0; i < nnodes; i++) {
584 		node = &node_array[i];
585 
586 		for (n = 0; n < node->ncpus; n++) {
587 			cpu = node->cpu[n];
588 			if (cpu->apicid == apicid) {
589 				return node;
590 			}
591 		}
592 	}
593 
594 	return NULL;
595 }
596