xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /* $NetBSD: acpi_srat.c,v 1.4 2017/08/31 08:45:03 msaitoh Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.4 2017/08/31 08:45:03 msaitoh Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41 
42 static ACPI_TABLE_SRAT *srat;
43 
44 struct acpisrat_node {
45 	acpisrat_nodeid_t nodeid;
46 	uint32_t ncpus; /* Number of cpus in this node */
47 	struct acpisrat_cpu **cpu; /* Array of cpus */
48 	uint32_t nmems; /* Number of memory ranges in this node */
49 	struct acpisrat_mem **mem; /* Array of memory ranges */
50 };
51 
52 static uint32_t nnodes; /* Number of NUMA nodes */
53 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
54 static uint32_t ncpus; /* Number of CPUs */
55 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
56 static uint32_t nmems; /* Number of Memory ranges */
57 static struct acpisrat_mem *mem_array;
58 
59 
60 struct cpulist {
61 	struct acpisrat_cpu cpu;
62 	TAILQ_ENTRY(cpulist) entry;
63 };
64 
65 static TAILQ_HEAD(, cpulist) cpulisthead;
66 
67 #define CPU_INIT		TAILQ_INIT(&cpulisthead);
68 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
69 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
70 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
71 #define CPU_FIRST		TAILQ_FIRST(&cpulisthead)
72 
73 
74 struct memlist {
75 	struct acpisrat_mem mem;
76 	TAILQ_ENTRY(memlist) entry;
77 };
78 
79 static TAILQ_HEAD(, memlist) memlisthead;
80 
81 #define MEM_INIT		TAILQ_INIT(&memlisthead)
82 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
83 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
84 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
85 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
86 #define MEM_FIRST		TAILQ_FIRST(&memlisthead)
87 
88 
89 static struct cpulist *
90 cpu_alloc(void)
91 {
92 	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
93 }
94 
95 static void
96 cpu_free(struct cpulist *c)
97 {
98 	kmem_free(c, sizeof(struct cpulist));
99 }
100 
101 #if 0
102 static struct cpulist *
103 cpu_get(acpisrat_nodeid_t nodeid)
104 {
105 	struct cpulist *tmp;
106 
107 	CPU_FOREACH(tmp) {
108 		if (tmp->cpu.nodeid == nodeid)
109 			return tmp;
110 	}
111 
112 	return NULL;
113 }
114 #endif
115 
116 static struct memlist *
117 mem_alloc(void)
118 {
119 	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
120 }
121 
122 static void
123 mem_free(struct memlist *m)
124 {
125 	kmem_free(m, sizeof(struct memlist));
126 }
127 
128 static struct memlist *
129 mem_get(acpisrat_nodeid_t nodeid)
130 {
131 	struct memlist *tmp;
132 
133 	MEM_FOREACH(tmp) {
134 		if (tmp->mem.nodeid == nodeid)
135 			return tmp;
136 	}
137 
138 	return NULL;
139 }
140 
141 
142 bool
143 acpisrat_exist(void)
144 {
145 	ACPI_TABLE_HEADER *table;
146 	ACPI_STATUS rv;
147 
148 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
149 	if (ACPI_FAILURE(rv))
150 		return false;
151 
152 	/* Check if header is valid */
153 	if (table == NULL)
154 		return false;
155 
156 	if (table->Length == 0xffffffff)
157 		return false;
158 
159 	srat = (ACPI_TABLE_SRAT *)table;
160 
161 	return true;
162 }
163 
164 static int
165 acpisrat_parse(void)
166 {
167 	ACPI_SUBTABLE_HEADER *subtable;
168 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
169 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
170 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
171 
172 	acpisrat_nodeid_t nodeid;
173 	struct cpulist *cpuentry = NULL;
174 	struct memlist *mementry;
175 	uint32_t srat_pos;
176 	bool ignore_cpu_affinity = false;
177 
178 	KASSERT(srat != NULL);
179 
180 	/* Content starts right after the header */
181 	srat_pos = sizeof(ACPI_TABLE_SRAT);
182 
183 	while (srat_pos < srat->Header.Length) {
184 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
185 		srat_pos += subtable->Length;
186 
187 		switch (subtable->Type) {
188 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
189 			if (ignore_cpu_affinity)
190 				continue;
191 
192 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
193 			if ((srat_cpu->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
194 				break;
195 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
196 			    (srat_cpu->ProximityDomainHi[1] << 16) |
197 			    (srat_cpu->ProximityDomainHi[0] << 8) |
198 			    (srat_cpu->ProximityDomainLo);
199 
200 			cpuentry = cpu_alloc();
201 			if (cpuentry == NULL)
202 				return ENOMEM;
203 			CPU_ADD(cpuentry);
204 
205 			cpuentry->cpu.nodeid = nodeid;
206 			cpuentry->cpu.apicid = srat_cpu->ApicId;
207 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
208 			cpuentry->cpu.flags = srat_cpu->Flags;
209 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
210 			break;
211 
212 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
213 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
214 			nodeid = srat_mem->ProximityDomain;
215 			if ((srat_mem->Flags & ACPI_SRAT_MEM_ENABLED) == 0)
216 				break;
217 
218 			mementry = mem_alloc();
219 			if (mementry == NULL)
220 				return ENOMEM;
221 			MEM_ADD(mementry);
222 
223 			mementry->mem.nodeid = nodeid;
224 			mementry->mem.baseaddress = srat_mem->BaseAddress;
225 			mementry->mem.length = srat_mem->Length;
226 			mementry->mem.flags = srat_mem->Flags;
227 			break;
228 
229 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
230 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
231 			if ((srat_x2apic->Flags & ACPI_SRAT_CPU_ENABLED) == 0)
232 				break;
233 			nodeid = srat_x2apic->ProximityDomain;
234 
235 			/* This table entry overrides
236 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
237 			 */
238 			if (!ignore_cpu_affinity) {
239 				struct cpulist *citer;
240 				while ((citer = CPU_FIRST) != NULL) {
241 					CPU_REM(citer);
242 					cpu_free(citer);
243 				}
244 				ignore_cpu_affinity = true;
245 			}
246 
247 			cpuentry = cpu_alloc();
248 			if (cpuentry == NULL)
249 				return ENOMEM;
250 			CPU_ADD(cpuentry);
251 
252 			cpuentry->cpu.nodeid = nodeid;
253 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
254 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
255 			cpuentry->cpu.flags = srat_x2apic->Flags;
256 			break;
257 
258 		case ACPI_SRAT_TYPE_RESERVED:
259 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
260 				subtable->Length);
261 			break;
262 		}
263 	}
264 
265 	return 0;
266 }
267 
268 static int
269 acpisrat_quirks(void)
270 {
271 	struct cpulist *citer;
272 	struct memlist *mem, *miter;
273 
274 	/* Some sanity checks. */
275 
276 	/* Deal with holes in the memory nodes.
277 	 * BIOS doesn't enlist memory nodes which
278 	 * don't have any memory modules plugged in.
279 	 * This behaviour has been observed on AMD machines.
280 	 *
281 	 * Do that by searching for CPUs in NUMA nodes
282 	 * which don't exist in the memory and then insert
283 	 * a zero memory range for the missing node.
284 	 */
285 	CPU_FOREACH(citer) {
286 		mem = mem_get(citer->cpu.nodeid);
287 		if (mem != NULL)
288 			continue;
289 		mem = mem_alloc();
290 		if (mem == NULL)
291 			return ENOMEM;
292 		mem->mem.nodeid = citer->cpu.nodeid;
293 		/* all other fields are already zero filled */
294 
295 		MEM_FOREACH(miter) {
296 			if (miter->mem.nodeid < citer->cpu.nodeid)
297 				continue;
298 			MEM_ADD_BEFORE(mem, miter);
299 			break;
300 		}
301 	}
302 
303 	return 0;
304 }
305 
306 int
307 acpisrat_init(void)
308 {
309 	if (!acpisrat_exist())
310 		return EEXIST;
311 	return acpisrat_refresh();
312 }
313 
314 int
315 acpisrat_refresh(void)
316 {
317 	int rc, i, j, k;
318 	struct cpulist *citer;
319 	struct memlist *miter;
320 	uint32_t cnodes = 0, mnodes = 0;
321 
322 	CPU_INIT;
323 	MEM_INIT;
324 
325 	rc = acpisrat_parse();
326 	if (rc)
327 		return rc;
328 
329 	rc = acpisrat_quirks();
330 	if (rc)
331 		return rc;
332 
333 	/* cleanup resources */
334 	rc = acpisrat_exit();
335 	if (rc)
336 		return rc;
337 
338 	nnodes = 0;
339 	ncpus = 0;
340 	CPU_FOREACH(citer) {
341 		cnodes = MAX(citer->cpu.nodeid, cnodes);
342 		ncpus++;
343 	}
344 
345 	nmems = 0;
346 	MEM_FOREACH(miter) {
347 		mnodes = MAX(miter->mem.nodeid, mnodes);
348 		nmems++;
349 	}
350 
351 	nnodes = MAX(cnodes, mnodes) + 1;
352 
353 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
354 	    KM_NOSLEEP);
355 	if (node_array == NULL)
356 		return ENOMEM;
357 
358 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
359 	    KM_NOSLEEP);
360 	if (cpu_array == NULL)
361 		return ENOMEM;
362 
363 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
364 	    KM_NOSLEEP);
365 	if (mem_array == NULL)
366 		return ENOMEM;
367 
368 	i = 0;
369 	CPU_FOREACH(citer) {
370 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
371 		i++;
372 		node_array[citer->cpu.nodeid].ncpus++;
373 	}
374 
375 	i = 0;
376 	MEM_FOREACH(miter) {
377 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
378 		i++;
379 		node_array[miter->mem.nodeid].nmems++;
380 	}
381 
382 	for (i = 0; i < nnodes; i++) {
383 		node_array[i].nodeid = i;
384 
385 		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
386 		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
387 		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
388 		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
389 
390 		k = 0;
391 		for (j = 0; j < ncpus; j++) {
392 			if (cpu_array[j].nodeid != i)
393 				continue;
394 			node_array[i].cpu[k] = &cpu_array[j];
395 			k++;
396 		}
397 
398 		k = 0;
399 		for (j = 0; j < nmems; j++) {
400 			if (mem_array[j].nodeid != i)
401 				continue;
402 			node_array[i].mem[k] = &mem_array[j];
403 			k++;
404 		}
405 	}
406 
407 	while ((citer = CPU_FIRST) != NULL) {
408 		CPU_REM(citer);
409 		cpu_free(citer);
410 	}
411 
412 	while ((miter = MEM_FIRST) != NULL) {
413 		MEM_REM(miter);
414 		mem_free(miter);
415 	}
416 
417 	return 0;
418 }
419 
420 
421 int
422 acpisrat_exit(void)
423 {
424 	int i;
425 
426 	if (node_array) {
427 		for (i = 0; i < nnodes; i++) {
428 			if (node_array[i].cpu)
429 				kmem_free(node_array[i].cpu,
430 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
431 			if (node_array[i].mem)
432 				kmem_free(node_array[i].mem,
433 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
434 		}
435 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
436 	}
437 	node_array = NULL;
438 
439 	if (cpu_array)
440 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
441 	cpu_array = NULL;
442 
443 	if (mem_array)
444 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
445 	mem_array = NULL;
446 
447 	nnodes = 0;
448 	ncpus = 0;
449 	nmems = 0;
450 
451 	return 0;
452 }
453 
454 
455 void
456 acpisrat_dump(void)
457 {
458 	uint32_t i, j, nn, nc, nm;
459 	struct acpisrat_cpu c;
460 	struct acpisrat_mem m;
461 
462 	nn = acpisrat_nodes();
463 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
464 	for (i = 0; i < nn; i++) {
465 		nc = acpisrat_node_cpus(i);
466 		for (j = 0; j < nc; j++) {
467 			acpisrat_cpu(i, j, &c);
468 			aprint_debug("SRAT: node %u cpu %u "
469 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
470 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
471 			    c.clockdomain);
472 		}
473 
474 		nm = acpisrat_node_memoryranges(i);
475 		for (j = 0; j < nm; j++) {
476 			acpisrat_mem(i, j, &m);
477 			aprint_debug("SRAT: node %u memory range %u (0x%"
478 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
479 			    m.nodeid, j, m.baseaddress,
480 			    m.baseaddress + m.length, m.flags);
481 		}
482 	}
483 }
484 
485 uint32_t
486 acpisrat_nodes(void)
487 {
488 	return nnodes;
489 }
490 
491 uint32_t
492 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
493 {
494 	return node_array[nodeid].ncpus;
495 }
496 
497 uint32_t
498 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
499 {
500 	return node_array[nodeid].nmems;
501 }
502 
503 void
504 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
505     struct acpisrat_cpu *c)
506 {
507 	memcpy(c, node_array[nodeid].cpu[cpunum],
508 	    sizeof(struct acpisrat_cpu));
509 }
510 
511 void
512 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
513     struct acpisrat_mem *mem)
514 {
515 	memcpy(mem, node_array[nodeid].mem[memrange],
516 	    sizeof(struct acpisrat_mem));
517 }
518