xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision 3816d47b2c42fcd6e549e3407f842a5b1a1d23ad)
1 /* $NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.2 2009/12/04 10:42:39 njoly Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 
38 #include <sys/kmem.h>
39 
40 #include <dev/acpi/acpica.h>
41 #include <dev/acpi/acpivar.h>
42 #include <dev/acpi/acpi_srat.h>
43 
44 static ACPI_TABLE_SRAT *srat;
45 
46 struct acpisrat_node {
47 	acpisrat_nodeid_t nodeid;
48 	uint32_t ncpus; /* Number of cpus in this node */
49 	struct acpisrat_cpu **cpu; /* Array of cpus */
50 	uint32_t nmems; /* Number of memory ranges in this node */
51 	struct acpisrat_mem **mem; /* Array of memory ranges */
52 };
53 
54 static uint32_t nnodes; /* Number of NUMA nodes */
55 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
56 static uint32_t ncpus; /* Number of CPUs */
57 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
58 static uint32_t nmems; /* Number of Memory ranges */
59 static struct acpisrat_mem *mem_array;
60 
61 
62 struct cpulist {
63 	struct acpisrat_cpu cpu;
64 	TAILQ_ENTRY(cpulist) entry;
65 };
66 
67 static TAILQ_HEAD(, cpulist) cpulisthead;
68 
69 #define CPU_INIT		TAILQ_INIT(&cpulisthead);
70 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
71 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
72 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
73 #define CPU_FIRST		TAILQ_FIRST(&cpulisthead)
74 
75 
76 struct memlist {
77 	struct acpisrat_mem mem;
78 	TAILQ_ENTRY(memlist) entry;
79 };
80 
81 static TAILQ_HEAD(, memlist) memlisthead;
82 
83 #define MEM_INIT		TAILQ_INIT(&memlisthead)
84 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
85 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
86 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
87 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
88 #define MEM_FIRST		TAILQ_FIRST(&memlisthead)
89 
90 
91 static struct cpulist *
92 cpu_alloc(void)
93 {
94 	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
95 }
96 
97 static void
98 cpu_free(struct cpulist *c)
99 {
100 	kmem_free(c, sizeof(struct cpulist));
101 }
102 
103 #if 0
104 static struct cpulist *
105 cpu_get(acpisrat_nodeid_t nodeid)
106 {
107 	struct cpulist *tmp;
108 
109 	CPU_FOREACH(tmp) {
110 		if (tmp->cpu.nodeid == nodeid)
111 			return tmp;
112 	}
113 
114 	return NULL;
115 }
116 #endif
117 
118 static struct memlist *
119 mem_alloc(void)
120 {
121 	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
122 }
123 
124 static void
125 mem_free(struct memlist *m)
126 {
127 	kmem_free(m, sizeof(struct memlist));
128 }
129 
130 static struct memlist *
131 mem_get(acpisrat_nodeid_t nodeid)
132 {
133 	struct memlist *tmp;
134 
135 	MEM_FOREACH(tmp) {
136 		if (tmp->mem.nodeid == nodeid)
137 			return tmp;
138 	}
139 
140 	return NULL;
141 }
142 
143 
144 bool
145 acpisrat_exist(void)
146 {
147 	ACPI_TABLE_HEADER *table;
148 	ACPI_STATUS rv;
149 
150 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
151 	if (ACPI_FAILURE(rv))
152 		return false;
153 
154 	/* Check if header is valid */
155 	if (table == NULL)
156 		return false;
157 
158 	if (table->Length == 0xffffffff)
159 		return false;
160 
161 	srat = (ACPI_TABLE_SRAT *)table;
162 
163 	return true;
164 }
165 
166 static int
167 acpisrat_parse(void)
168 {
169 	ACPI_SUBTABLE_HEADER *subtable;
170 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
171 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
172 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
173 
174 	acpisrat_nodeid_t nodeid;
175 	struct cpulist *cpuentry = NULL;
176 	struct memlist *mementry;
177 	uint32_t srat_pos;
178 	bool ignore_cpu_affinity = false;
179 
180 	KASSERT(srat != NULL);
181 
182 	/* Content starts right after the header */
183 	srat_pos = sizeof(ACPI_TABLE_SRAT);
184 
185 	while (srat_pos < srat->Header.Length) {
186 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
187 		srat_pos += subtable->Length;
188 
189 		switch (subtable->Type) {
190 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
191 			if (ignore_cpu_affinity)
192 				continue;
193 
194 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
195 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
196 			    (srat_cpu->ProximityDomainHi[1] << 16) |
197 			    (srat_cpu->ProximityDomainHi[0] << 8) |
198 			    (srat_cpu->ProximityDomainLo);
199 
200 			cpuentry = cpu_alloc();
201 			if (cpuentry == NULL)
202 				return ENOMEM;
203 			CPU_ADD(cpuentry);
204 
205 			cpuentry->cpu.nodeid = nodeid;
206 			cpuentry->cpu.apicid = srat_cpu->ApicId;
207 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
208 			cpuentry->cpu.flags = srat_cpu->Flags;
209 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
210 			break;
211 
212 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
213 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
214 			nodeid = srat_mem->ProximityDomain;
215 
216 			mementry = mem_alloc();
217 			if (mementry == NULL)
218 				return ENOMEM;
219 			MEM_ADD(mementry);
220 
221 			mementry->mem.nodeid = nodeid;
222 			mementry->mem.baseaddress = srat_mem->BaseAddress;
223 			mementry->mem.length = srat_mem->Length;
224 			mementry->mem.flags = srat_mem->Flags;
225 			break;
226 
227 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
228 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
229 			nodeid = srat_x2apic->ProximityDomain;
230 
231 			/* This table entry overrides
232 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
233 			 */
234 			if (!ignore_cpu_affinity) {
235 				struct cpulist *citer;
236 				while ((citer = CPU_FIRST) != NULL) {
237 					CPU_REM(citer);
238 					cpu_free(citer);
239 				}
240 				ignore_cpu_affinity = true;
241 			}
242 
243 			cpuentry = cpu_alloc();
244 			if (cpuentry == NULL)
245 				return ENOMEM;
246 			CPU_ADD(cpuentry);
247 
248 			cpuentry->cpu.nodeid = nodeid;
249 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
250 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
251 			cpuentry->cpu.flags = srat_x2apic->Flags;
252 			break;
253 
254 		case ACPI_SRAT_TYPE_RESERVED:
255 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
256 				subtable->Length);
257 			break;
258 		}
259 	}
260 
261 	return 0;
262 }
263 
264 static int
265 acpisrat_quirks(void)
266 {
267 	struct cpulist *citer;
268 	struct memlist *mem, *miter;
269 
270 	/* Some sanity checks. */
271 
272 	/* Deal with holes in the memory nodes.
273 	 * BIOS doesn't enlist memory nodes which
274 	 * don't have any memory modules plugged in.
275 	 * This behaviour has been observed on AMD machines.
276 	 *
277 	 * Do that by searching for CPUs in NUMA nodes
278 	 * which don't exist in the memory and then insert
279 	 * a zero memory range for the missing node.
280 	 */
281 	CPU_FOREACH(citer) {
282 		mem = mem_get(citer->cpu.nodeid);
283 		if (mem != NULL)
284 			continue;
285 		mem = mem_alloc();
286 		if (mem == NULL)
287 			return ENOMEM;
288 		mem->mem.nodeid = citer->cpu.nodeid;
289 		/* all other fields are already zero filled */
290 
291 		MEM_FOREACH(miter) {
292 			if (miter->mem.nodeid < citer->cpu.nodeid)
293 				continue;
294 			MEM_ADD_BEFORE(mem, miter);
295 			break;
296 		}
297 	}
298 
299 	return 0;
300 }
301 
302 int
303 acpisrat_init(void)
304 {
305 	if (!acpisrat_exist())
306 		return EEXIST;
307 	return acpisrat_refresh();
308 }
309 
310 int
311 acpisrat_refresh(void)
312 {
313 	int rc, i, j, k;
314 	struct cpulist *citer;
315 	struct memlist *miter;
316 	uint32_t cnodes = 0, mnodes = 0;
317 
318 	CPU_INIT;
319 	MEM_INIT;
320 
321 	rc = acpisrat_parse();
322 	if (rc)
323 		return rc;
324 
325 	rc = acpisrat_quirks();
326 	if (rc)
327 		return rc;
328 
329 	/* cleanup resources */
330 	rc = acpisrat_exit();
331 	if (rc)
332 		return rc;
333 
334 	nnodes = 0;
335 	ncpus = 0;
336 	CPU_FOREACH(citer) {
337 		cnodes = MAX(citer->cpu.nodeid, cnodes);
338 		ncpus++;
339 	}
340 
341 	nmems = 0;
342 	MEM_FOREACH(miter) {
343 		mnodes = MAX(miter->mem.nodeid, mnodes);
344 		nmems++;
345 	}
346 
347 	nnodes = MAX(cnodes, mnodes) + 1;
348 
349 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
350 	    KM_NOSLEEP);
351 	if (node_array == NULL)
352 		return ENOMEM;
353 
354 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
355 	    KM_NOSLEEP);
356 	if (cpu_array == NULL)
357 		return ENOMEM;
358 
359 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
360 	    KM_NOSLEEP);
361 	if (mem_array == NULL)
362 		return ENOMEM;
363 
364 	i = 0;
365 	CPU_FOREACH(citer) {
366 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
367 		i++;
368 		node_array[citer->cpu.nodeid].ncpus++;
369 	}
370 
371 	i = 0;
372 	MEM_FOREACH(miter) {
373 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
374 		i++;
375 		node_array[miter->mem.nodeid].nmems++;
376 	}
377 
378 	for (i = 0; i < nnodes; i++) {
379 		node_array[i].nodeid = i;
380 
381 		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
382 		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
383 		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
384 		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
385 
386 		k = 0;
387 		for (j = 0; j < ncpus; j++) {
388 			if (cpu_array[j].nodeid != i)
389 				continue;
390 			node_array[i].cpu[k] = &cpu_array[j];
391 			k++;
392 		}
393 
394 		k = 0;
395 		for (j = 0; j < nmems; j++) {
396 			if (mem_array[j].nodeid != i)
397 				continue;
398 			node_array[i].mem[k] = &mem_array[j];
399 			k++;
400 		}
401 	}
402 
403 	while ((citer = CPU_FIRST) != NULL) {
404 		CPU_REM(citer);
405 		cpu_free(citer);
406 	}
407 
408 	while ((miter = MEM_FIRST) != NULL) {
409 		MEM_REM(miter);
410 		mem_free(miter);
411 	}
412 
413 	return 0;
414 }
415 
416 
417 int
418 acpisrat_exit(void)
419 {
420 	int i;
421 
422 	if (node_array) {
423 		for (i = 0; i < nnodes; i++) {
424 			if (node_array[i].cpu)
425 				kmem_free(node_array[i].cpu,
426 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
427 			if (node_array[i].mem)
428 				kmem_free(node_array[i].mem,
429 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
430 		}
431 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
432 	}
433 	node_array = NULL;
434 
435 	if (cpu_array)
436 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
437 	cpu_array = NULL;
438 
439 	if (mem_array)
440 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
441 	mem_array = NULL;
442 
443 	nnodes = 0;
444 	ncpus = 0;
445 	nmems = 0;
446 
447 	return 0;
448 }
449 
450 
451 void
452 acpisrat_dump(void)
453 {
454 	uint32_t i, j, nn, nc, nm;
455 	struct acpisrat_cpu c;
456 	struct acpisrat_mem m;
457 
458 	nn = acpisrat_nodes();
459 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
460 	for (i = 0; i < nn; i++) {
461 		nc = acpisrat_node_cpus(i);
462 		for (j = 0; j < nc; j++) {
463 			acpisrat_cpu(i, j, &c);
464 			aprint_debug("SRAT: node %u cpu %u "
465 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
466 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
467 			    c.clockdomain);
468 		}
469 
470 		nm = acpisrat_node_memoryranges(i);
471 		for (j = 0; j < nm; j++) {
472 			acpisrat_mem(i, j, &m);
473 			aprint_debug("SRAT: node %u memory range %u (0x%"
474 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
475 			    m.nodeid, j, m.baseaddress,
476 			    m.baseaddress + m.length, m.flags);
477 		}
478 	}
479 }
480 
481 uint32_t
482 acpisrat_nodes(void)
483 {
484 	return nnodes;
485 }
486 
487 uint32_t
488 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
489 {
490 	return node_array[nodeid].ncpus;
491 }
492 
493 uint32_t
494 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
495 {
496 	return node_array[nodeid].nmems;
497 }
498 
499 void
500 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
501     struct acpisrat_cpu *c)
502 {
503 	memcpy(c, node_array[nodeid].cpu[cpunum],
504 	    sizeof(struct acpisrat_cpu));
505 }
506 
507 void
508 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
509     struct acpisrat_mem *mem)
510 {
511 	memcpy(mem, node_array[nodeid].mem[memrange],
512 	    sizeof(struct acpisrat_mem));
513 }
514