xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision 181254a7b1bdde6873432bffef2d2decc4b5c22f)
1 /* $NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.8 2019/12/27 12:51:57 ad Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41 
42 #include <uvm/uvm_extern.h>
43 
44 static ACPI_TABLE_SRAT *srat;
45 
46 static uint32_t nnodes; /* Number of NUMA nodes */
47 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
48 static uint32_t ncpus; /* Number of CPUs */
49 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
50 static uint32_t nmems; /* Number of Memory ranges */
51 static struct acpisrat_mem *mem_array;
52 
53 struct cpulist {
54 	struct acpisrat_cpu cpu;
55 	TAILQ_ENTRY(cpulist) entry;
56 };
57 
58 static TAILQ_HEAD(, cpulist) cpulisthead;
59 
60 #define CPU_INIT()		TAILQ_INIT(&cpulisthead);
61 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
62 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
63 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
64 #define CPU_FIRST()		TAILQ_FIRST(&cpulisthead)
65 
66 struct memlist {
67 	struct acpisrat_mem mem;
68 	TAILQ_ENTRY(memlist) entry;
69 };
70 
71 static TAILQ_HEAD(, memlist) memlisthead;
72 
73 #define MEM_INIT()		TAILQ_INIT(&memlisthead)
74 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
75 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
76 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
77 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
78 #define MEM_FIRST()		TAILQ_FIRST(&memlisthead)
79 
80 
81 static struct cpulist *
82 cpu_alloc(void)
83 {
84 	return kmem_zalloc(sizeof(struct cpulist), KM_SLEEP);
85 }
86 
87 static void
88 cpu_free(struct cpulist *c)
89 {
90 	kmem_free(c, sizeof(struct cpulist));
91 }
92 
93 static struct memlist *
94 mem_alloc(void)
95 {
96 	return kmem_zalloc(sizeof(struct memlist), KM_SLEEP);
97 }
98 
99 static void
100 mem_free(struct memlist *m)
101 {
102 	kmem_free(m, sizeof(struct memlist));
103 }
104 
105 static struct memlist *
106 mem_get(acpisrat_nodeid_t nodeid)
107 {
108 	struct memlist *tmp;
109 
110 	MEM_FOREACH(tmp) {
111 		if (tmp->mem.nodeid == nodeid)
112 			return tmp;
113 	}
114 
115 	return NULL;
116 }
117 
118 /*
119  * Returns true if ACPI SRAT table is available. If table does not exist, all
120  * functions below have undefined behaviour.
121  */
122 bool
123 acpisrat_exist(void)
124 {
125 	ACPI_TABLE_HEADER *table;
126 	ACPI_STATUS rv;
127 
128 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
129 	if (ACPI_FAILURE(rv))
130 		return false;
131 
132 	/* Check if header is valid */
133 	if (table == NULL)
134 		return false;
135 
136 	if (table->Length == 0xffffffff)
137 		return false;
138 
139 	srat = (ACPI_TABLE_SRAT *)table;
140 
141 	return true;
142 }
143 
144 static int
145 acpisrat_parse(void)
146 {
147 	ACPI_SUBTABLE_HEADER *subtable;
148 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
149 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
150 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
151 
152 	acpisrat_nodeid_t nodeid;
153 	struct cpulist *cpuentry = NULL;
154 	struct memlist *mementry;
155 	uint32_t srat_pos;
156 	bool ignore_cpu_affinity = false;
157 
158 	KASSERT(srat != NULL);
159 
160 	/* Content starts right after the header */
161 	srat_pos = sizeof(ACPI_TABLE_SRAT);
162 
163 	while (srat_pos < srat->Header.Length) {
164 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
165 		srat_pos += subtable->Length;
166 
167 		switch (subtable->Type) {
168 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
169 			if (ignore_cpu_affinity)
170 				continue;
171 
172 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
173 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
174 				break;
175 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
176 			    (srat_cpu->ProximityDomainHi[1] << 16) |
177 			    (srat_cpu->ProximityDomainHi[0] << 8) |
178 			    (srat_cpu->ProximityDomainLo);
179 
180 			cpuentry = cpu_alloc();
181 			if (cpuentry == NULL)
182 				return ENOMEM;
183 			CPU_ADD(cpuentry);
184 
185 			cpuentry->cpu.nodeid = nodeid;
186 			cpuentry->cpu.apicid = srat_cpu->ApicId;
187 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
188 			cpuentry->cpu.flags = srat_cpu->Flags;
189 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
190 			break;
191 
192 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
193 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
194 			nodeid = srat_mem->ProximityDomain;
195 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
196 				break;
197 
198 			mementry = mem_alloc();
199 			if (mementry == NULL)
200 				return ENOMEM;
201 			MEM_ADD(mementry);
202 
203 			mementry->mem.nodeid = nodeid;
204 			mementry->mem.baseaddress = srat_mem->BaseAddress;
205 			mementry->mem.length = srat_mem->Length;
206 			mementry->mem.flags = srat_mem->Flags;
207 			break;
208 
209 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
210 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
211 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
212 				break;
213 			nodeid = srat_x2apic->ProximityDomain;
214 
215 			/*
216 			 * This table entry overrides
217 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
218 			 */
219 			if (!ignore_cpu_affinity) {
220 				struct cpulist *citer;
221 				while ((citer = CPU_FIRST()) != NULL) {
222 					CPU_REM(citer);
223 					cpu_free(citer);
224 				}
225 				ignore_cpu_affinity = true;
226 			}
227 
228 			cpuentry = cpu_alloc();
229 			if (cpuentry == NULL)
230 				return ENOMEM;
231 			CPU_ADD(cpuentry);
232 
233 			cpuentry->cpu.nodeid = nodeid;
234 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
235 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
236 			cpuentry->cpu.flags = srat_x2apic->Flags;
237 			break;
238 
239 		case ACPI_SRAT_TYPE_RESERVED:
240 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
241 				subtable->Length);
242 			break;
243 		}
244 	}
245 
246 	return 0;
247 }
248 
249 static int
250 acpisrat_quirks(void)
251 {
252 	struct cpulist *citer;
253 	struct memlist *mem, *miter;
254 
255 	/* Some sanity checks. */
256 
257 	/*
258 	 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
259 	 * nodes which don't have any memory modules plugged in. This behaviour
260 	 * has been observed on AMD machines.
261 	 *
262 	 * Do that by searching for CPUs in NUMA nodes which don't exist in the
263 	 * memory and then insert a zero memory range for the missing node.
264 	 */
265 	CPU_FOREACH(citer) {
266 		mem = mem_get(citer->cpu.nodeid);
267 		if (mem != NULL)
268 			continue;
269 		mem = mem_alloc();
270 		if (mem == NULL)
271 			return ENOMEM;
272 		mem->mem.nodeid = citer->cpu.nodeid;
273 		/* all other fields are already zero filled */
274 
275 		MEM_FOREACH(miter) {
276 			if (miter->mem.nodeid < citer->cpu.nodeid)
277 				continue;
278 			MEM_ADD_BEFORE(mem, miter);
279 			break;
280 		}
281 	}
282 
283 	return 0;
284 }
285 
286 /*
287  * Initializes parser. Must be the first function being called when table is
288  * available.
289  */
290 int
291 acpisrat_init(void)
292 {
293 	if (!acpisrat_exist())
294 		return EEXIST;
295 	return acpisrat_refresh();
296 }
297 
298 /*
299  * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
300  */
301 int
302 acpisrat_refresh(void)
303 {
304 	int rc, i, j, k;
305 	struct cpulist *citer;
306 	struct memlist *miter;
307 	uint32_t cnodes = 0, mnodes = 0;
308 
309 	CPU_INIT();
310 	MEM_INIT();
311 
312 	rc = acpisrat_parse();
313 	if (rc)
314 		return rc;
315 
316 	rc = acpisrat_quirks();
317 	if (rc)
318 		return rc;
319 
320 	/* cleanup resources */
321 	rc = acpisrat_exit();
322 	if (rc)
323 		return rc;
324 
325 	ncpus = 0;
326 	CPU_FOREACH(citer) {
327 		cnodes = MAX(citer->cpu.nodeid, cnodes);
328 		ncpus++;
329 	}
330 
331 	nmems = 0;
332 	MEM_FOREACH(miter) {
333 		mnodes = MAX(miter->mem.nodeid, mnodes);
334 		nmems++;
335 	}
336 
337 	nnodes = MAX(cnodes, mnodes) + 1;
338 
339 	if (nnodes == 0 || nmems == 0 || ncpus == 0) {
340 		rc = ENOENT;
341 		goto fail;
342 	}
343 
344 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
345 	    KM_SLEEP);
346 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
347 	    KM_SLEEP);
348 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
349 	    KM_SLEEP);
350 
351 	i = 0;
352 	CPU_FOREACH(citer) {
353 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
354 		i++;
355 		node_array[citer->cpu.nodeid].ncpus++;
356 	}
357 
358 	i = 0;
359 	MEM_FOREACH(miter) {
360 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
361 		i++;
362 		node_array[miter->mem.nodeid].nmems++;
363 	}
364 
365 	for (i = 0; i < nnodes; i++) {
366 		node_array[i].nodeid = i;
367 
368 		if (node_array[i].ncpus != 0) {
369 			node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
370 			    sizeof(struct acpisrat_cpu *), KM_SLEEP);
371 		}
372 		if (node_array[i].nmems != 0) {
373 			node_array[i].mem = kmem_zalloc(node_array[i].nmems *
374 			    sizeof(struct acpisrat_mem *), KM_SLEEP);
375 		}
376 
377 		k = 0;
378 		for (j = 0; j < ncpus; j++) {
379 			if (cpu_array[j].nodeid != i)
380 				continue;
381 			KASSERT(node_array[i].cpu != NULL);
382 			node_array[i].cpu[k] = &cpu_array[j];
383 			k++;
384 		}
385 
386 		k = 0;
387 		for (j = 0; j < nmems; j++) {
388 			if (mem_array[j].nodeid != i)
389 				continue;
390 			KASSERT(node_array[i].mem != NULL);
391 			node_array[i].mem[k] = &mem_array[j];
392 			k++;
393 		}
394 	}
395 
396  fail:
397 	while ((citer = CPU_FIRST()) != NULL) {
398 		CPU_REM(citer);
399 		cpu_free(citer);
400 	}
401 
402 	while ((miter = MEM_FIRST()) != NULL) {
403 		MEM_REM(miter);
404 		mem_free(miter);
405 	}
406 
407 	return rc;
408 }
409 
410 /*
411  * Free allocated memory. Should be called when acpisrat is no longer of any
412  * use.
413  */
414 int
415 acpisrat_exit(void)
416 {
417 	int i;
418 
419 	if (node_array) {
420 		for (i = 0; i < nnodes; i++) {
421 			if (node_array[i].cpu)
422 				kmem_free(node_array[i].cpu,
423 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
424 			if (node_array[i].mem)
425 				kmem_free(node_array[i].mem,
426 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
427 		}
428 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
429 	}
430 	node_array = NULL;
431 
432 	if (cpu_array)
433 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
434 	cpu_array = NULL;
435 
436 	if (mem_array)
437 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
438 	mem_array = NULL;
439 
440 	nnodes = 0;
441 	ncpus = 0;
442 	nmems = 0;
443 
444 	return 0;
445 }
446 
447 void
448 acpisrat_dump(void)
449 {
450 	uint32_t i, j, nn, nc, nm;
451 	struct acpisrat_cpu c;
452 	struct acpisrat_mem m;
453 
454 	nn = acpisrat_nodes();
455 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
456 	for (i = 0; i < nn; i++) {
457 		nc = acpisrat_node_cpus(i);
458 		for (j = 0; j < nc; j++) {
459 			acpisrat_cpu(i, j, &c);
460 			aprint_debug("SRAT: node %u cpu %u "
461 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
462 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
463 			    c.clockdomain);
464 		}
465 
466 		nm = acpisrat_node_memoryranges(i);
467 		for (j = 0; j < nm; j++) {
468 			acpisrat_mem(i, j, &m);
469 			aprint_debug("SRAT: node %u memory range %u (0x%"
470 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
471 			    m.nodeid, j, m.baseaddress,
472 			    m.baseaddress + m.length, m.flags);
473 		}
474 	}
475 }
476 
477 void
478 acpisrat_load_uvm(void)
479 {
480 	uint32_t i, j, nn, nm;
481 	struct acpisrat_mem m;
482 
483 	nn = acpisrat_nodes();
484 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
485 	for (i = 0; i < nn; i++) {
486 		nm = acpisrat_node_memoryranges(i);
487 		for (j = 0; j < nm; j++) {
488 			acpisrat_mem(i, j, &m);
489 			aprint_debug("SRAT: node %u memory range %u (0x%"
490 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
491 			    m.nodeid, j, m.baseaddress,
492 			    m.baseaddress + m.length, m.flags);
493 			uvm_page_numa_load(trunc_page(m.baseaddress),
494 			    trunc_page(m.length), m.nodeid);
495 		}
496 	}
497 }
498 
499 /*
500  * Get number of NUMA nodes.
501  */
502 uint32_t
503 acpisrat_nodes(void)
504 {
505 	return nnodes;
506 }
507 
508 /*
509  * Get number of cpus in the node. 0 means, this is a cpu-less node.
510  */
511 uint32_t
512 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
513 {
514 	return node_array[nodeid].ncpus;
515 }
516 
517 /*
518  * Get number of memory ranges in the node 0 means, this node has no RAM.
519  */
520 uint32_t
521 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
522 {
523 	return node_array[nodeid].nmems;
524 }
525 
526 void
527 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
528     struct acpisrat_cpu *c)
529 {
530 	memcpy(c, node_array[nodeid].cpu[cpunum],
531 	    sizeof(struct acpisrat_cpu));
532 }
533 
534 void
535 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
536     struct acpisrat_mem *mem)
537 {
538 	memcpy(mem, node_array[nodeid].mem[memrange],
539 	    sizeof(struct acpisrat_mem));
540 }
541 
542 /*
543  * Get a node from an APIC id (belonging to a cpu).
544  */
545 struct acpisrat_node *
546 acpisrat_get_node(uint32_t apicid)
547 {
548 	struct acpisrat_node *node;
549 	struct acpisrat_cpu *cpu;
550 	size_t i, n;
551 
552 	for (i = 0; i < nnodes; i++) {
553 		node = &node_array[i];
554 
555 		for (n = 0; n < node->ncpus; n++) {
556 			cpu = node->cpu[n];
557 			if (cpu->apicid == apicid) {
558 				return node;
559 			}
560 		}
561 	}
562 
563 	return NULL;
564 }
565