xref: /plan9-contrib/sys/src/9k/k10/physalloc.c (revision 491cc0e56f324ad247ae84c1168a0db04fd04b41)
1 /*
2  * Buddy allocator for physical memory allocation.
3  * One per ACPI affinity domain, to color pages depending on their
4  * NUMA location.
5  *
6  */
7 #include "u.h"
8 #include "../port/lib.h"
9 #include "mem.h"
10 #include "dat.h"
11 #include "fns.h"
12 #include "acpi.h"
13 
14 #define ISPOWEROF2(x)	(((x) != 0) && !((x) & ((x)-1)))
15 #define UNO		((uintmem)1)
16 
17 enum {
18 	BKmin		= 12,			/* Minimum lg2 */
19 	BKmax		= 30,			/* Maximum lg2 */
20 
21 	Ndoms = 16,				/* Max # of domains */
22 
23 	Used = 0,
24 	Avail = 1,
25 };
26 
27 
28 #define INDEX(b, v)	((uint)(((v))/(b)->bminsz))
29 #define BLOCK(b, i)	((i)-INDEX((b),(b)->memory))
30 
31 typedef struct Buddy Buddy;
32 struct Buddy {
33 	short	tag;		/* Used or Avail */
34 	short	kval;
35 	uint	next;
36 	uint	prev;
37 	void	*p;
38 };
39 
40 /*
41  * Bals should allocate using its base address as 0.
42  * For now, all of them refer to the entire memory and we record
43  * the base and size for each one.
44  */
45 typedef struct Bal Bal;
46 struct Bal {
47 	uintmem	base;
48 	u64int	size;
49 	usize	nfree;
50 	usize	nblocks;
51 	int	kmin;		/* Minimum lg2 */
52 	int	kmax;		/* Maximum lg2 */
53 	uintmem	bminsz;		/* minimum block sz */
54 	uintmem memory;
55 	uint	kspan;
56 
57 	Buddy* blocks;
58 	Buddy* avail;
59 };
60 
61 static Bal bal[Ndoms];
62 static int ndoms;
63 static Lock budlock;
64 
65 char*
seprintphysstats(char * s,char * e)66 seprintphysstats(char *s,  char *e)
67 {
68 	Bal *b;
69 	int i;
70 
71 	lock(&budlock);
72 	for(i = 0; i < Ndoms; i++){
73 		b = &bal[i];
74 		if(b->size > 0)
75 			s = seprint(s, e, "%uld/%uld %ulldK color %d blocks avail\n",
76 				b->nfree, b->nblocks, b->bminsz/KiB, i);
77 	}
78 	unlock(&budlock);
79 	return s;
80 }
81 
82 static void
xphysfree(Bal * b,uintmem data,u64int size)83 xphysfree(Bal *b, uintmem data, u64int size)
84 {
85 	uint i;
86 	Buddy *l, *p;
87 	Buddy *blocks, *avail;
88 
89 	DBG("physfree\n");
90 
91 	/*
92 	 * Knuth's Algorithm S (Buddy System Liberation).
93 	 */
94 	blocks = b->blocks;
95 	avail = b->avail;
96 
97 	if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
98 		return;
99 	i = INDEX(b,data);
100 
101 	lock(&budlock);
102 S1:
103 	/*
104 	 * Find buddy.
105 	 */
106 	l = &blocks[BLOCK(b,i)];
107 	l->p = nil;
108 	DBG("\tbsl: BLOCK(b,i) %d index %ulld kval %d\n",
109 		BLOCK(b,i), BLOCK(b,i)/((1<<l->kval)/b->bminsz), l->kval);
110 	if((BLOCK(b,i)/((1<<l->kval)/b->bminsz)) & 1)	/* simpler test? */
111 		p = l - (1<<l->kval)/b->bminsz;
112 	else
113 		p = l + (1<<l->kval)/(b->bminsz);
114 	DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks);
115 
116 	/*
117 	 * Is buddy available?
118 	 * Can't merge if:
119 	 *	this is the largest block;
120 	 *	buddy isn't free;
121 	 *	buddy has been subsequently split again.
122 	 */
123 	if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){
124 		/*
125 		 * Put on list.
126 		 */
127 		l->tag = Avail;
128 		l->next = avail[l->kval].next;
129 		l->prev = 0;
130 		if(l->next != 0)
131 			blocks[BLOCK(b,l->next)].prev = i;
132 		avail[l->kval].next = i;
133 
134 		b->nfree += size/b->bminsz;
135 
136 		unlock(&budlock);
137 		DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
138 			i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
139 		return;
140 	}
141 
142 	/*
143 	 * Combine with buddy.
144 	 * This removes block P from the avail list.
145 	 */
146 	if(p->prev != 0)
147 		blocks[BLOCK(b,p->prev)].next = p->next;
148 	else
149 		avail[p->kval].next = p->next;
150 	if(p->next != 0)
151 		blocks[BLOCK(b,p->next)].prev = p->prev;
152 	p->next = p->prev = 0;
153 	p->tag = Used;
154 
155 	/*
156 	 * Now can try to merge this larger block.
157 	k++;
158 	 */
159 	DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks);
160 	if(p < l)
161 		l = p;
162 	i = l - blocks + INDEX(b,b->memory);
163 	l->kval++;
164 	DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n",
165 		i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
166 	goto S1;
167 }
168 
169 void
physfree(uintmem data,u64int size)170 physfree(uintmem data, u64int size)
171 {
172 	Bal *b;
173 	int i;
174 
175 	for(i = 0; i < Ndoms; i++){
176 		b = &bal[i];
177 		if(b->base <= data && data < b->base + b->size){
178 			xphysfree(b, data, size);
179 			return;
180 		}
181 	}
182 	panic("physfree: no bal");
183 }
184 
185 static void*
xphystag(Bal * b,uintmem data)186 xphystag(Bal *b, uintmem data)
187 {
188 	uint i;
189 	Buddy *blocks;
190 
191 	DBG("phystag\n");
192 
193 	blocks = b->blocks;
194 
195 	if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
196 		return nil;
197 	i = INDEX(b,data);
198 	return blocks[BLOCK(b,i)].p;
199 }
200 
201 void*
phystag(uintmem data)202 phystag(uintmem data)
203 {
204 	Bal *b;
205 	int i;
206 
207 	for(i = 0; i < Ndoms; i++){
208 		b = &bal[i];
209 		if(b->base <= data && data < b->base + b->size)
210 			return xphystag(b, data);
211 	}
212 	return nil;
213 }
214 
215 static uchar lg2table[256] = {
216 	0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
217 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
218 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
219 	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
220 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
221 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
222 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
223 	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
224 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
225 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
226 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
227 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
228 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
229 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
230 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
231 	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
232 };
233 
234 static int
lg2floor(u64int w)235 lg2floor(u64int w)
236 {
237 	u64int hi, lo;
238 
239 	if((lo = (w>>48)) != 0){
240 		if((hi = (lo>>8)) != 0)
241 			return 56+lg2table[hi];
242 		return 48+lg2table[lo];
243 	}
244 	if((lo = (w>>32)) != 0){
245 		if((hi = (lo>>8)) != 0)
246 			return 40+lg2table[hi];
247 		return 32+lg2table[lo];
248 	}
249 	if((lo = (w>>16)) != 0){
250 		if((hi = (lo>>8)) != 0)
251 			return 24+lg2table[hi];
252 		return 16+lg2table[lo];
253 	}
254 	if((hi = (w>>8)) != 0)
255 		return 8+lg2table[hi];
256 	return lg2table[w];
257 }
258 
259 static uintmem
xphysalloc(Bal * b,u64int size,void * tag)260 xphysalloc(Bal *b, u64int size, void *tag)
261 {
262 	uint i, j, k;
263 	Buddy *l, *p;
264 	Buddy *avail, *blocks;
265 	uintmem m;
266 
267 	DBG("physalloc\n");
268 	assert(b->size > 0);
269 
270 	avail = b->avail;
271 	blocks = b->blocks;
272 
273 	/*
274 	 * Knuth's Algorithm R (Buddy System Reservation).
275 	 */
276 	if(size < b->bminsz)
277 		size = b->bminsz;
278 
279 	/*
280 	 * Find block.
281 	 */
282 	if(!ISPOWEROF2(size))
283 		return 0;
284 	k = lg2floor(size);
285 
286 	lock(&budlock);
287 	for(j = k; j <= b->kmax; j++){
288 		if(avail[j].next != 0)
289 			break;
290 	}
291 	DBG("bsr: size %#llud k %d j %d\n", size, k, j);
292 	if(j > b->kmax){
293 		unlock(&budlock);
294 		return 0;
295 	}
296 
297 	/*
298 	 * Remove from list.
299 	 */
300 	i = avail[j].next;
301 	l = &blocks[BLOCK(b,i)];
302 	DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
303 		i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
304 	avail[j].next = l->next;
305 	if(avail[j].next != 0)
306 		blocks[BLOCK(b,avail[j].next)].prev = 0;
307 	l->prev = l->next = 0;
308 	l->tag = Used;
309 	l->kval = k;
310 
311 	/*
312 	 * Split required?
313 	 */
314 	while(j > k){
315 		/*
316 		 * Split.
317 		 */
318 		j--;
319 		p = &blocks[BLOCK(b,i) + (UNO<<j)/(b->bminsz)];
320 		p->tag = Avail;
321 		p->kval = j;
322 		p->next = avail[j].next;
323 		p->prev = 0;
324 		if(p->next != 0)
325 			blocks[BLOCK(b,p->next)].prev = i + (UNO<<j)/(b->bminsz);
326 		avail[j].next = i + (UNO<<j)/(b->bminsz);
327 		DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n",
328 			i, p - blocks, j, p->next, BLOCK(b,p->next),
329 			p->tag?"avail":"used");
330 	}
331 	b->nfree -= size/b->bminsz;
332 	unlock(&budlock);
333 
334 	m = b->memory + b->bminsz*BLOCK(b,i);
335 	assert(m >= b->base && m < b->base + b->size);
336 	blocks[BLOCK(b,i)].p = tag;
337 
338 	return m;
339 }
340 
341 uintmem
physalloc(u64int size,int * colorp,void * tag)342 physalloc(u64int size, int *colorp, void *tag)
343 {
344 	int i, color;
345 	uintmem m;
346 
347 	m = 0;
348 
349 	color = *colorp;
350 	if(color >= 0){
351 		color %= ndoms;
352 		if(bal[color].kmin > 0){
353 			*colorp = color;
354 			m = xphysalloc(&bal[color], size, tag);
355 		}
356 	}
357 	if(m == 0)
358 		for(i = 0; i < ndoms; i++)
359 			if(bal[i].kmin > 0)
360 				if((m = xphysalloc(&bal[i], size, tag)) != 0){
361 					*colorp = i;
362 					return m;
363 				}
364 	return m;
365 }
366 
367 static void
dump(Bal * b)368 dump(Bal *b)
369 {
370 	uint bi, i, k;
371 	Buddy *blocks;
372 
373 	blocks = b->blocks;
374 	for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){
375 		if(blocks[i].tag == Used)
376 			continue;
377 		print("blocks[%d]: size %d prev %d next %d\n",
378 			i, 1<<b->blocks[i].kval, blocks[i].prev, blocks[i].next);
379 		//i += (1<<blocks[i].kval)/b->bminsz-1;
380 	}
381 
382 	for(k = 0; k <= b->kmax; k++){
383 		print("a[%d]:", k);
384 		for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){
385 			print(" %d", bi);
386 		}
387 		print("\n");
388 	}
389 }
390 
391 void
physallocdump(void)392 physallocdump(void)
393 {
394 	int n;
395 
396 	for(n = 0; n < Ndoms; n++)
397 		if(bal[n].size > 0)
398 			print("physalloc color=%d base=%#ullx size=%#ullx\n",
399 				n, bal[n].base, bal[n].size);
400 }
401 
402 static int
plop(Bal * b,uintmem a,int k,int type)403 plop(Bal *b, uintmem a, int k, int type)
404 {
405 	uint i;
406 	Buddy *l;
407 
408 
409 	DBG("plop(a %#p k %d type %d)\n", a, k, type);
410 
411 	i = INDEX(b,a);
412 	l = &b->blocks[BLOCK(b,i)];
413 
414 	l->kval = k;
415 	xphysfree(b, a, 1<<k);
416 
417 	return 1;
418 }
419 
420 static int
iimbchunk(Bal * b,uintmem a,uintmem e,int type)421 iimbchunk(Bal *b, uintmem a, uintmem e, int type)
422 {
423 	int k;
424 	uint s;
425 
426 	a = ROUNDUP(a, b->bminsz);
427 	e = ROUNDDN(e, b->bminsz);
428 	DBG("iimbchunk: start a %#P e %#P\n", a, e);
429 
430 	b->nblocks += (e-a)/b->bminsz;
431 
432 	for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){
433 		if(a & s){
434 			plop(b, a, k, type);
435 			a += s;
436 		}
437 	}
438 	DBG("done1 a %#P e %#P s %#ux %d\n", a, e, s, k);
439 
440 	while(a+s <= e){
441 		plop(b, a, k, type);
442 		a += s;
443 	}
444 	DBG("done2 a %#P e %#P s %#ux %d\n", a, e, s, k);
445 
446 	for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){
447 		if(a+s <= e){
448 			plop(b, a, k, type);
449 			a += s;
450 		}
451 	}
452 	DBG("done3 a %#P e %#P s %#ux %d\n", a, e, s, k);
453 
454 	return 0;
455 }
456 
457 /*
458  * Called from umeminit to initialize user memory allocators.
459  */
460 void
physinit(uintmem a,u64int size)461 physinit(uintmem a, u64int size)
462 {
463 	uintmem dtsz;
464 	Bal *b;
465 	int i, dom;
466 	uintmem addr, len;
467 
468 	DBG("physinit %#ullx %#ullx\n", a, size);
469 
470 	for(addr = a; addr < a+size; addr += len){
471 		dom = 0;
472 		len = acpimblocksize(addr, &dom);
473 		/* len can be zero if there's no acpi information about addr */
474 		if(len == 0 || addr + len > a + size)
475 			len = a + size - addr;
476 		/*
477 		 * Each block belongs to a different domain (ie. cpu/mem socket)
478 		 * We must create a buddy allocator for each block, so we could
479 		 * allocate memory from different domains.
480 		 *
481 		 * This code assumes that a domain may be extended later and
482 		 * that there is no interleaving of domains. Ok by now.
483 		 */
484 		DBG("physmem block dom %d addr %#ullx size %#ullx\n",
485 			dom, addr, len);
486 		if(dom < 0 || dom >= Ndoms){
487 			print("physinit: invalid dom %d\n", dom);
488 			dom = 0;
489 		}
490 		b = &bal[dom];
491 		if(dom >= ndoms)
492 			ndoms = dom+1;
493 		if(b->kmin == 0){
494 			b->base = addr;
495 			b->size = len;
496 			b->kmin = BKmin;
497 			b->kmax = BKmax;
498 			b->bminsz = (UNO<<b->kmin);
499 			b->memory = sys->pmstart;
500 			b->kspan = lg2floor(sys->pmend);
501 			if(!ISPOWEROF2(sys->pmend))
502 				b->kspan++;
503 			dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1));
504 			DBG("kspan %ud (arrysz = %llud)\n", b->kspan, dtsz);
505 			b->blocks = malloc(dtsz);
506 			if(b->blocks == nil)
507 				panic("physinit: no blocks");
508 			memset(b->blocks, 0, dtsz);
509 			b->avail = malloc(sizeof(Buddy)*(b->kmax+1));
510 			if(b->avail == nil)
511 				panic("physinit: no avail");
512 			memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1));
513 		}else{
514 			if(addr < b->base)
515 				panic("physinit: decreasing base");
516 			if(b->base+b->size < addr + len)
517 				b->size = (addr-b->base) + len;
518 			for(i = 0; i < Ndoms; i++)
519 				if(bal[i].kmin && &bal[i] != b)
520 				if(bal[i].base < b->base + b->size &&
521 				   bal[i].base + bal[i].size > b->base + b->size)
522 					panic("physinit: doms overlap");
523 		}
524 		assert(addr >= b->base && addr+len <= b->base + b->size);
525 
526 		iimbchunk(b, addr, addr+len, 0);
527 	}
528 
529 
530 }
531