xref: /netbsd-src/sys/dev/acpi/acpi_srat.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /* $NetBSD: acpi_srat.c,v 1.3 2010/03/05 14:00:17 jruoho Exp $ */
2 
3 /*
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christoph Egger.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: acpi_srat.c,v 1.3 2010/03/05 14:00:17 jruoho Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/kmem.h>
37 #include <sys/systm.h>
38 
39 #include <dev/acpi/acpivar.h>
40 #include <dev/acpi/acpi_srat.h>
41 
42 static ACPI_TABLE_SRAT *srat;
43 
44 struct acpisrat_node {
45 	acpisrat_nodeid_t nodeid;
46 	uint32_t ncpus; /* Number of cpus in this node */
47 	struct acpisrat_cpu **cpu; /* Array of cpus */
48 	uint32_t nmems; /* Number of memory ranges in this node */
49 	struct acpisrat_mem **mem; /* Array of memory ranges */
50 };
51 
52 static uint32_t nnodes; /* Number of NUMA nodes */
53 static struct acpisrat_node *node_array; /* Array of NUMA nodes */
54 static uint32_t ncpus; /* Number of CPUs */
55 static struct acpisrat_cpu *cpu_array; /* Array of cpus */
56 static uint32_t nmems; /* Number of Memory ranges */
57 static struct acpisrat_mem *mem_array;
58 
59 
60 struct cpulist {
61 	struct acpisrat_cpu cpu;
62 	TAILQ_ENTRY(cpulist) entry;
63 };
64 
65 static TAILQ_HEAD(, cpulist) cpulisthead;
66 
67 #define CPU_INIT		TAILQ_INIT(&cpulisthead);
68 #define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
69 #define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
70 #define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
71 #define CPU_FIRST		TAILQ_FIRST(&cpulisthead)
72 
73 
74 struct memlist {
75 	struct acpisrat_mem mem;
76 	TAILQ_ENTRY(memlist) entry;
77 };
78 
79 static TAILQ_HEAD(, memlist) memlisthead;
80 
81 #define MEM_INIT		TAILQ_INIT(&memlisthead)
82 #define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
83 #define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
84 #define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
85 #define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
86 #define MEM_FIRST		TAILQ_FIRST(&memlisthead)
87 
88 
89 static struct cpulist *
90 cpu_alloc(void)
91 {
92 	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
93 }
94 
95 static void
96 cpu_free(struct cpulist *c)
97 {
98 	kmem_free(c, sizeof(struct cpulist));
99 }
100 
101 #if 0
102 static struct cpulist *
103 cpu_get(acpisrat_nodeid_t nodeid)
104 {
105 	struct cpulist *tmp;
106 
107 	CPU_FOREACH(tmp) {
108 		if (tmp->cpu.nodeid == nodeid)
109 			return tmp;
110 	}
111 
112 	return NULL;
113 }
114 #endif
115 
116 static struct memlist *
117 mem_alloc(void)
118 {
119 	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
120 }
121 
122 static void
123 mem_free(struct memlist *m)
124 {
125 	kmem_free(m, sizeof(struct memlist));
126 }
127 
128 static struct memlist *
129 mem_get(acpisrat_nodeid_t nodeid)
130 {
131 	struct memlist *tmp;
132 
133 	MEM_FOREACH(tmp) {
134 		if (tmp->mem.nodeid == nodeid)
135 			return tmp;
136 	}
137 
138 	return NULL;
139 }
140 
141 
142 bool
143 acpisrat_exist(void)
144 {
145 	ACPI_TABLE_HEADER *table;
146 	ACPI_STATUS rv;
147 
148 	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
149 	if (ACPI_FAILURE(rv))
150 		return false;
151 
152 	/* Check if header is valid */
153 	if (table == NULL)
154 		return false;
155 
156 	if (table->Length == 0xffffffff)
157 		return false;
158 
159 	srat = (ACPI_TABLE_SRAT *)table;
160 
161 	return true;
162 }
163 
164 static int
165 acpisrat_parse(void)
166 {
167 	ACPI_SUBTABLE_HEADER *subtable;
168 	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
169 	ACPI_SRAT_MEM_AFFINITY *srat_mem;
170 	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
171 
172 	acpisrat_nodeid_t nodeid;
173 	struct cpulist *cpuentry = NULL;
174 	struct memlist *mementry;
175 	uint32_t srat_pos;
176 	bool ignore_cpu_affinity = false;
177 
178 	KASSERT(srat != NULL);
179 
180 	/* Content starts right after the header */
181 	srat_pos = sizeof(ACPI_TABLE_SRAT);
182 
183 	while (srat_pos < srat->Header.Length) {
184 		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
185 		srat_pos += subtable->Length;
186 
187 		switch (subtable->Type) {
188 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
189 			if (ignore_cpu_affinity)
190 				continue;
191 
192 			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
193 			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
194 			    (srat_cpu->ProximityDomainHi[1] << 16) |
195 			    (srat_cpu->ProximityDomainHi[0] << 8) |
196 			    (srat_cpu->ProximityDomainLo);
197 
198 			cpuentry = cpu_alloc();
199 			if (cpuentry == NULL)
200 				return ENOMEM;
201 			CPU_ADD(cpuentry);
202 
203 			cpuentry->cpu.nodeid = nodeid;
204 			cpuentry->cpu.apicid = srat_cpu->ApicId;
205 			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
206 			cpuentry->cpu.flags = srat_cpu->Flags;
207 			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
208 			break;
209 
210 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
211 			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
212 			nodeid = srat_mem->ProximityDomain;
213 
214 			mementry = mem_alloc();
215 			if (mementry == NULL)
216 				return ENOMEM;
217 			MEM_ADD(mementry);
218 
219 			mementry->mem.nodeid = nodeid;
220 			mementry->mem.baseaddress = srat_mem->BaseAddress;
221 			mementry->mem.length = srat_mem->Length;
222 			mementry->mem.flags = srat_mem->Flags;
223 			break;
224 
225 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
226 			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
227 			nodeid = srat_x2apic->ProximityDomain;
228 
229 			/* This table entry overrides
230 			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
231 			 */
232 			if (!ignore_cpu_affinity) {
233 				struct cpulist *citer;
234 				while ((citer = CPU_FIRST) != NULL) {
235 					CPU_REM(citer);
236 					cpu_free(citer);
237 				}
238 				ignore_cpu_affinity = true;
239 			}
240 
241 			cpuentry = cpu_alloc();
242 			if (cpuentry == NULL)
243 				return ENOMEM;
244 			CPU_ADD(cpuentry);
245 
246 			cpuentry->cpu.nodeid = nodeid;
247 			cpuentry->cpu.apicid = srat_x2apic->ApicId;
248 			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
249 			cpuentry->cpu.flags = srat_x2apic->Flags;
250 			break;
251 
252 		case ACPI_SRAT_TYPE_RESERVED:
253 			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
254 				subtable->Length);
255 			break;
256 		}
257 	}
258 
259 	return 0;
260 }
261 
262 static int
263 acpisrat_quirks(void)
264 {
265 	struct cpulist *citer;
266 	struct memlist *mem, *miter;
267 
268 	/* Some sanity checks. */
269 
270 	/* Deal with holes in the memory nodes.
271 	 * BIOS doesn't enlist memory nodes which
272 	 * don't have any memory modules plugged in.
273 	 * This behaviour has been observed on AMD machines.
274 	 *
275 	 * Do that by searching for CPUs in NUMA nodes
276 	 * which don't exist in the memory and then insert
277 	 * a zero memory range for the missing node.
278 	 */
279 	CPU_FOREACH(citer) {
280 		mem = mem_get(citer->cpu.nodeid);
281 		if (mem != NULL)
282 			continue;
283 		mem = mem_alloc();
284 		if (mem == NULL)
285 			return ENOMEM;
286 		mem->mem.nodeid = citer->cpu.nodeid;
287 		/* all other fields are already zero filled */
288 
289 		MEM_FOREACH(miter) {
290 			if (miter->mem.nodeid < citer->cpu.nodeid)
291 				continue;
292 			MEM_ADD_BEFORE(mem, miter);
293 			break;
294 		}
295 	}
296 
297 	return 0;
298 }
299 
300 int
301 acpisrat_init(void)
302 {
303 	if (!acpisrat_exist())
304 		return EEXIST;
305 	return acpisrat_refresh();
306 }
307 
308 int
309 acpisrat_refresh(void)
310 {
311 	int rc, i, j, k;
312 	struct cpulist *citer;
313 	struct memlist *miter;
314 	uint32_t cnodes = 0, mnodes = 0;
315 
316 	CPU_INIT;
317 	MEM_INIT;
318 
319 	rc = acpisrat_parse();
320 	if (rc)
321 		return rc;
322 
323 	rc = acpisrat_quirks();
324 	if (rc)
325 		return rc;
326 
327 	/* cleanup resources */
328 	rc = acpisrat_exit();
329 	if (rc)
330 		return rc;
331 
332 	nnodes = 0;
333 	ncpus = 0;
334 	CPU_FOREACH(citer) {
335 		cnodes = MAX(citer->cpu.nodeid, cnodes);
336 		ncpus++;
337 	}
338 
339 	nmems = 0;
340 	MEM_FOREACH(miter) {
341 		mnodes = MAX(miter->mem.nodeid, mnodes);
342 		nmems++;
343 	}
344 
345 	nnodes = MAX(cnodes, mnodes) + 1;
346 
347 	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
348 	    KM_NOSLEEP);
349 	if (node_array == NULL)
350 		return ENOMEM;
351 
352 	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
353 	    KM_NOSLEEP);
354 	if (cpu_array == NULL)
355 		return ENOMEM;
356 
357 	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
358 	    KM_NOSLEEP);
359 	if (mem_array == NULL)
360 		return ENOMEM;
361 
362 	i = 0;
363 	CPU_FOREACH(citer) {
364 		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
365 		i++;
366 		node_array[citer->cpu.nodeid].ncpus++;
367 	}
368 
369 	i = 0;
370 	MEM_FOREACH(miter) {
371 		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
372 		i++;
373 		node_array[miter->mem.nodeid].nmems++;
374 	}
375 
376 	for (i = 0; i < nnodes; i++) {
377 		node_array[i].nodeid = i;
378 
379 		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
380 		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
381 		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
382 		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
383 
384 		k = 0;
385 		for (j = 0; j < ncpus; j++) {
386 			if (cpu_array[j].nodeid != i)
387 				continue;
388 			node_array[i].cpu[k] = &cpu_array[j];
389 			k++;
390 		}
391 
392 		k = 0;
393 		for (j = 0; j < nmems; j++) {
394 			if (mem_array[j].nodeid != i)
395 				continue;
396 			node_array[i].mem[k] = &mem_array[j];
397 			k++;
398 		}
399 	}
400 
401 	while ((citer = CPU_FIRST) != NULL) {
402 		CPU_REM(citer);
403 		cpu_free(citer);
404 	}
405 
406 	while ((miter = MEM_FIRST) != NULL) {
407 		MEM_REM(miter);
408 		mem_free(miter);
409 	}
410 
411 	return 0;
412 }
413 
414 
415 int
416 acpisrat_exit(void)
417 {
418 	int i;
419 
420 	if (node_array) {
421 		for (i = 0; i < nnodes; i++) {
422 			if (node_array[i].cpu)
423 				kmem_free(node_array[i].cpu,
424 				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
425 			if (node_array[i].mem)
426 				kmem_free(node_array[i].mem,
427 				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
428 		}
429 		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
430 	}
431 	node_array = NULL;
432 
433 	if (cpu_array)
434 		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
435 	cpu_array = NULL;
436 
437 	if (mem_array)
438 		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
439 	mem_array = NULL;
440 
441 	nnodes = 0;
442 	ncpus = 0;
443 	nmems = 0;
444 
445 	return 0;
446 }
447 
448 
449 void
450 acpisrat_dump(void)
451 {
452 	uint32_t i, j, nn, nc, nm;
453 	struct acpisrat_cpu c;
454 	struct acpisrat_mem m;
455 
456 	nn = acpisrat_nodes();
457 	aprint_debug("SRAT: %u NUMA nodes\n", nn);
458 	for (i = 0; i < nn; i++) {
459 		nc = acpisrat_node_cpus(i);
460 		for (j = 0; j < nc; j++) {
461 			acpisrat_cpu(i, j, &c);
462 			aprint_debug("SRAT: node %u cpu %u "
463 			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
464 			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
465 			    c.clockdomain);
466 		}
467 
468 		nm = acpisrat_node_memoryranges(i);
469 		for (j = 0; j < nm; j++) {
470 			acpisrat_mem(i, j, &m);
471 			aprint_debug("SRAT: node %u memory range %u (0x%"
472 			    PRIx64" - 0x%"PRIx64" flags %u)\n",
473 			    m.nodeid, j, m.baseaddress,
474 			    m.baseaddress + m.length, m.flags);
475 		}
476 	}
477 }
478 
479 uint32_t
480 acpisrat_nodes(void)
481 {
482 	return nnodes;
483 }
484 
485 uint32_t
486 acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
487 {
488 	return node_array[nodeid].ncpus;
489 }
490 
491 uint32_t
492 acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
493 {
494 	return node_array[nodeid].nmems;
495 }
496 
497 void
498 acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
499     struct acpisrat_cpu *c)
500 {
501 	memcpy(c, node_array[nodeid].cpu[cpunum],
502 	    sizeof(struct acpisrat_cpu));
503 }
504 
505 void
506 acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
507     struct acpisrat_mem *mem)
508 {
509 	memcpy(mem, node_array[nodeid].mem[memrange],
510 	    sizeof(struct acpisrat_mem));
511 }
512