xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /* $NetBSD: acpi_srat.c,v 1.5 2017/12/28 08:49:28 maxv Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.5 2017/12/28 08:49:28 maxv Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41 
42 static ACPI_TABLE_SRAT *srat;
43 
44 static uint32_t nnodes; /* Number of NUMA nodes */
45 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
46 static uint32_t ncpus; /* Number of CPUs */
47 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
48 static uint32_t nmems; /* Number of Memory ranges */
49 static struct acpisrat_mem *mem_array;
50 
51 struct cpulist {
52 	struct acpisrat_cpu cpu;
53 	TAILQ_ENTRY(cpulist) entry;
54 };
55 
56 static TAILQ_HEAD(, cpulist) cpulisthead;
57 
58 #define CPU_INIT()		TAILQ_INIT(&cpulisthead);
59 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
60 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
61 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
62 #define CPU_FIRST()		TAILQ_FIRST(&cpulisthead)
63 
64 struct memlist {
65 	struct acpisrat_mem mem;
66 	TAILQ_ENTRY(memlist) entry;
67 };
68 
69 static TAILQ_HEAD(, memlist) memlisthead;
70 
71 #define MEM_INIT()		TAILQ_INIT(&memlisthead)
72 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
73 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
74 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
75 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
76 #define MEM_FIRST()		TAILQ_FIRST(&memlisthead)
77 
78 
79 static struct cpulist *
80 cpu_alloc(void)
81 {
82 	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
83 }
84 
85 static void
86 cpu_free(struct cpulist *c)
87 {
88 	kmem_free(c, sizeof(struct cpulist));
89 }
90 
91 static struct memlist *
92 mem_alloc(void)
93 {
94 	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
95 }
96 
97 static void
98 mem_free(struct memlist *m)
99 {
100 	kmem_free(m, sizeof(struct memlist));
101 }
102 
103 static struct memlist *
104 mem_get(acpisrat_nodeid_t nodeid)
105 {
106 	struct memlist *tmp;
107 
108 	MEM_FOREACH(tmp) {
109 		if (tmp->mem.nodeid == nodeid)
110 			return tmp;
111 	}
112 
113 	return NULL;
114 }
115 
116 /*
117  * Returns true if ACPI SRAT table is available. If table does not exist, all
118  * functions below have undefined behaviour.
119  */
120 bool
121 acpisrat_exist(void)
122 {
123 	ACPI_TABLE_HEADER *table;
124 	ACPI_STATUS rv;
125 
126 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
127 	if (ACPI_FAILURE(rv))
128 		return false;
129 
130 	/* Check if header is valid */
131 	if (table == NULL)
132 		return false;
133 
134 	if (table->Length == 0xffffffff)
135 		return false;
136 
137 	srat = (ACPI_TABLE_SRAT *)table;
138 
139 	return true;
140 }
141 
142 static int
143 acpisrat_parse(void)
144 {
145 	ACPI_SUBTABLE_HEADER *subtable;
146 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
147 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
148 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
149 
150 	acpisrat_nodeid_t nodeid;
151 	struct cpulist *cpuentry = NULL;
152 	struct memlist *mementry;
153 	uint32_t srat_pos;
154 	bool ignore_cpu_affinity = false;
155 
156 	KASSERT(srat != NULL);
157 
158 	/* Content starts right after the header */
159 	srat_pos = sizeof(ACPI_TABLE_SRAT);
160 
161 	while (srat_pos < srat->Header.Length) {
162 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
163 		srat_pos += subtable->Length;
164 
165 		switch (subtable->Type) {
166 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
167 			if (ignore_cpu_affinity)
168 				continue;
169 
170 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
171 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
172 				break;
173 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
174 			    (srat_cpu->ProximityDomainHi[1] << 16) |
175 			    (srat_cpu->ProximityDomainHi[0] << 8) |
176 			    (srat_cpu->ProximityDomainLo);
177 
178 			cpuentry = cpu_alloc();
179 			if (cpuentry == NULL)
180 				return ENOMEM;
181 			CPU_ADD(cpuentry);
182 
183 			cpuentry->cpu.nodeid = nodeid;
184 			cpuentry->cpu.apicid = srat_cpu->ApicId;
185 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
186 			cpuentry->cpu.flags = srat_cpu->Flags;
187 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
188 			break;
189 
190 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
191 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
192 			nodeid = srat_mem->ProximityDomain;
193 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
194 				break;
195 
196 			mementry = mem_alloc();
197 			if (mementry == NULL)
198 				return ENOMEM;
199 			MEM_ADD(mementry);
200 
201 			mementry->mem.nodeid = nodeid;
202 			mementry->mem.baseaddress = srat_mem->BaseAddress;
203 			mementry->mem.length = srat_mem->Length;
204 			mementry->mem.flags = srat_mem->Flags;
205 			break;
206 
207 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
208 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
209 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
210 				break;
211 			nodeid = srat_x2apic->ProximityDomain;
212 
213 			/*
214 			 * This table entry overrides
215 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
216 			 */
217 			if (!ignore_cpu_affinity) {
218 				struct cpulist *citer;
219 				while ((citer = CPU_FIRST()) != NULL) {
220 					CPU_REM(citer);
221 					cpu_free(citer);
222 				}
223 				ignore_cpu_affinity = true;
224 			}
225 
226 			cpuentry = cpu_alloc();
227 			if (cpuentry == NULL)
228 				return ENOMEM;
229 			CPU_ADD(cpuentry);
230 
231 			cpuentry->cpu.nodeid = nodeid;
232 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
233 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
234 			cpuentry->cpu.flags = srat_x2apic->Flags;
235 			break;
236 
237 		case ACPI_SRAT_TYPE_RESERVED:
238 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
239 				subtable->Length);
240 			break;
241 		}
242 	}
243 
244 	return 0;
245 }
246 
247 static int
248 acpisrat_quirks(void)
249 {
250 	struct cpulist *citer;
251 	struct memlist *mem, *miter;
252 
253 	/* Some sanity checks. */
254 
255 	/*
256 	 * Deal with holes in the memory nodes. BIOS doesn't enlist memory
257 	 * nodes which don't have any memory modules plugged in. This behaviour
258 	 * has been observed on AMD machines.
259 	 *
260 	 * Do that by searching for CPUs in NUMA nodes which don't exist in the
261 	 * memory and then insert a zero memory range for the missing node.
262 	 */
263 	CPU_FOREACH(citer) {
264 		mem = mem_get(citer->cpu.nodeid);
265 		if (mem != NULL)
266 			continue;
267 		mem = mem_alloc();
268 		if (mem == NULL)
269 			return ENOMEM;
270 		mem->mem.nodeid = citer->cpu.nodeid;
271 		/* all other fields are already zero filled */
272 
273 		MEM_FOREACH(miter) {
274 			if (miter->mem.nodeid < citer->cpu.nodeid)
275 				continue;
276 			MEM_ADD_BEFORE(mem, miter);
277 			break;
278 		}
279 	}
280 
281 	return 0;
282 }
283 
284 /*
285  * Initializes parser. Must be the first function being called when table is
286  * available.
287  */
288 int
289 acpisrat_init(void)
290 {
291 	if (!acpisrat_exist())
292 		return EEXIST;
293 	return acpisrat_refresh();
294 }
295 
296 /*
297  * Re-parse ACPI SRAT table. Useful after hotplugging cpu or RAM.
298  */
299 int
300 acpisrat_refresh(void)
301 {
302 	int rc, i, j, k;
303 	struct cpulist *citer;
304 	struct memlist *miter;
305 	uint32_t cnodes = 0, mnodes = 0;
306 
307 	CPU_INIT();
308 	MEM_INIT();
309 
310 	rc = acpisrat_parse();
311 	if (rc)
312 		return rc;
313 
314 	rc = acpisrat_quirks();
315 	if (rc)
316 		return rc;
317 
318 	/* cleanup resources */
319 	rc = acpisrat_exit();
320 	if (rc)
321 		return rc;
322 
323 	ncpus = 0;
324 	CPU_FOREACH(citer) {
325 		cnodes = MAX(citer->cpu.nodeid, cnodes);
326 		ncpus++;
327 	}
328 
329 	nmems = 0;
330 	MEM_FOREACH(miter) {
331 		mnodes = MAX(miter->mem.nodeid, mnodes);
332 		nmems++;
333 	}
334 
335 	nnodes = MAX(cnodes, mnodes) + 1;
336 
337 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
338 	    KM_NOSLEEP);
339 	if (node_array == NULL)
340 		return ENOMEM;
341 
342 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
343 	    KM_NOSLEEP);
344 	if (cpu_array == NULL)
345 		return ENOMEM;
346 
347 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
348 	    KM_NOSLEEP);
349 	if (mem_array == NULL)
350 		return ENOMEM;
351 
352 	i = 0;
353 	CPU_FOREACH(citer) {
354 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
355 		i++;
356 		node_array[citer->cpu.nodeid].ncpus++;
357 	}
358 
359 	i = 0;
360 	MEM_FOREACH(miter) {
361 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
362 		i++;
363 		node_array[miter->mem.nodeid].nmems++;
364 	}
365 
366 	for (i = 0; i < nnodes; i++) {
367 		node_array[i].nodeid = i;
368 
369 		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
370 		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
371 		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
372 		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
373 
374 		k = 0;
375 		for (j = 0; j < ncpus; j++) {
376 			if (cpu_array[j].nodeid != i)
377 				continue;
378 			node_array[i].cpu[k] = &cpu_array[j];
379 			k++;
380 		}
381 
382 		k = 0;
383 		for (j = 0; j < nmems; j++) {
384 			if (mem_array[j].nodeid != i)
385 				continue;
386 			node_array[i].mem[k] = &mem_array[j];
387 			k++;
388 		}
389 	}
390 
391 	while ((citer = CPU_FIRST()) != NULL) {
392 		CPU_REM(citer);
393 		cpu_free(citer);
394 	}
395 
396 	while ((miter = MEM_FIRST()) != NULL) {
397 		MEM_REM(miter);
398 		mem_free(miter);
399 	}
400 
401 	return 0;
402 }
403 
404 /*
405  * Free allocated memory. Should be called when acpisrat is no longer of any
406  * use.
407  */
408 int
409 acpisrat_exit(void)
410 {
411 	int i;
412 
413 	if (node_array) {
414 		for (i = 0; i < nnodes; i++) {
415 			if (node_array[i].cpu)
416 				kmem_free(node_array[i].cpu,
417 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
418 			if (node_array[i].mem)
419 				kmem_free(node_array[i].mem,
420 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
421 		}
422 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
423 	}
424 	node_array = NULL;
425 
426 	if (cpu_array)
427 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
428 	cpu_array = NULL;
429 
430 	if (mem_array)
431 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
432 	mem_array = NULL;
433 
434 	nnodes = 0;
435 	ncpus = 0;
436 	nmems = 0;
437 
438 	return 0;
439 }
440 
441 void
442 acpisrat_dump(void)
443 {
444 	uint32_t i, j, nn, nc, nm;
445 	struct acpisrat_cpu c;
446 	struct acpisrat_mem m;
447 
448 	nn = acpisrat_nodes();
449 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
450 	for (i = 0; i < nn; i++) {
451 		nc = acpisrat_node_cpus(i);
452 		for (j = 0; j < nc; j++) {
453 			acpisrat_cpu(i, j, &c);
454 			aprint_debug("SRAT: node %u cpu %u "
455 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
456 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
457 			    c.clockdomain);
458 		}
459 
460 		nm = acpisrat_node_memoryranges(i);
461 		for (j = 0; j < nm; j++) {
462 			acpisrat_mem(i, j, &m);
463 			aprint_debug("SRAT: node %u memory range %u (0x%"
464 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
465 			    m.nodeid, j, m.baseaddress,
466 			    m.baseaddress + m.length, m.flags);
467 		}
468 	}
469 }
470 
471 /*
472  * Get number of NUMA nodes.
473  */
474 uint32_t
475 acpisrat_nodes(void)
476 {
477 	return nnodes;
478 }
479 
480 /*
481  * Get number of cpus in the node. 0 means, this is a cpu-less node.
482  */
483 uint32_t
484 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
485 {
486 	return node_array[nodeid].ncpus;
487 }
488 
489 /*
490  * Get number of memory ranges in the node 0 means, this node has no RAM.
491  */
492 uint32_t
493 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
494 {
495 	return node_array[nodeid].nmems;
496 }
497 
498 void
499 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
500     struct acpisrat_cpu *c)
501 {
502 	memcpy(c, node_array[nodeid].cpu[cpunum],
503 	    sizeof(struct acpisrat_cpu));
504 }
505 
506 void
507 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
508     struct acpisrat_mem *mem)
509 {
510 	memcpy(mem, node_array[nodeid].mem[memrange],
511 	    sizeof(struct acpisrat_mem));
512 }
513 
514 /*
515  * Get a node from an APIC id (belonging to a cpu).
516  */
517 struct acpisrat_node *
518 acpisrat_get_node(uint32_t apicid)
519 {
520 	struct acpisrat_node *node;
521 	struct acpisrat_cpu *cpu;
522 	size_t i, n;
523 
524 	for (i = 0; i < nnodes; i++) {
525 		node = &node_array[i];
526 
527 		for (n = 0; n < node->ncpus; n++) {
528 			cpu = node->cpu[n];
529 			if (cpu->apicid == apicid) {
530 				return node;
531 			}
532 		}
533 	}
534 
535 	return NULL;
536 }
537 
538