1 /*
2 * Buddy allocator for physical memory allocation.
3 * One per ACPI affinity domain, to color pages depending on their
4 * NUMA location.
5 *
6 */
7 #include "u.h"
8 #include "../port/lib.h"
9 #include "mem.h"
10 #include "dat.h"
11 #include "fns.h"
12 #include "acpi.h"
13
14 #define ISPOWEROF2(x) (((x) != 0) && !((x) & ((x)-1)))
15 #define UNO ((uintmem)1)
16
17 enum {
18 BKmin = 12, /* Minimum lg2 */
19 BKmax = 30, /* Maximum lg2 */
20
21 Ndoms = 16, /* Max # of domains */
22
23 Used = 0,
24 Avail = 1,
25 };
26
27
28 #define INDEX(b, v) ((uint)(((v))/(b)->bminsz))
29 #define BLOCK(b, i) ((i)-INDEX((b),(b)->memory))
30
31 typedef struct Buddy Buddy;
32 struct Buddy {
33 short tag; /* Used or Avail */
34 short kval;
35 uint next;
36 uint prev;
37 void *p;
38 };
39
40 /*
41 * Bals should allocate using its base address as 0.
42 * For now, all of them refer to the entire memory and we record
43 * the base and size for each one.
44 */
45 typedef struct Bal Bal;
46 struct Bal {
47 uintmem base;
48 u64int size;
49 usize nfree;
50 usize nblocks;
51 int kmin; /* Minimum lg2 */
52 int kmax; /* Maximum lg2 */
53 uintmem bminsz; /* minimum block sz */
54 uintmem memory;
55 uint kspan;
56
57 Buddy* blocks;
58 Buddy* avail;
59 };
60
61 static Bal bal[Ndoms];
62 static int ndoms;
63 static Lock budlock;
64
65 char*
seprintphysstats(char * s,char * e)66 seprintphysstats(char *s, char *e)
67 {
68 Bal *b;
69 int i;
70
71 lock(&budlock);
72 for(i = 0; i < Ndoms; i++){
73 b = &bal[i];
74 if(b->size > 0)
75 s = seprint(s, e, "%uld/%uld %ulldK color %d blocks avail\n",
76 b->nfree, b->nblocks, b->bminsz/KiB, i);
77 }
78 unlock(&budlock);
79 return s;
80 }
81
82 static void
xphysfree(Bal * b,uintmem data,u64int size)83 xphysfree(Bal *b, uintmem data, u64int size)
84 {
85 uint i;
86 Buddy *l, *p;
87 Buddy *blocks, *avail;
88
89 DBG("physfree\n");
90
91 /*
92 * Knuth's Algorithm S (Buddy System Liberation).
93 */
94 blocks = b->blocks;
95 avail = b->avail;
96
97 if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
98 return;
99 i = INDEX(b,data);
100
101 lock(&budlock);
102 S1:
103 /*
104 * Find buddy.
105 */
106 l = &blocks[BLOCK(b,i)];
107 l->p = nil;
108 DBG("\tbsl: BLOCK(b,i) %d index %ulld kval %d\n",
109 BLOCK(b,i), BLOCK(b,i)/((1<<l->kval)/b->bminsz), l->kval);
110 if((BLOCK(b,i)/((1<<l->kval)/b->bminsz)) & 1) /* simpler test? */
111 p = l - (1<<l->kval)/b->bminsz;
112 else
113 p = l + (1<<l->kval)/(b->bminsz);
114 DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks);
115
116 /*
117 * Is buddy available?
118 * Can't merge if:
119 * this is the largest block;
120 * buddy isn't free;
121 * buddy has been subsequently split again.
122 */
123 if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){
124 /*
125 * Put on list.
126 */
127 l->tag = Avail;
128 l->next = avail[l->kval].next;
129 l->prev = 0;
130 if(l->next != 0)
131 blocks[BLOCK(b,l->next)].prev = i;
132 avail[l->kval].next = i;
133
134 b->nfree += size/b->bminsz;
135
136 unlock(&budlock);
137 DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
138 i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
139 return;
140 }
141
142 /*
143 * Combine with buddy.
144 * This removes block P from the avail list.
145 */
146 if(p->prev != 0)
147 blocks[BLOCK(b,p->prev)].next = p->next;
148 else
149 avail[p->kval].next = p->next;
150 if(p->next != 0)
151 blocks[BLOCK(b,p->next)].prev = p->prev;
152 p->next = p->prev = 0;
153 p->tag = Used;
154
155 /*
156 * Now can try to merge this larger block.
157 k++;
158 */
159 DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks);
160 if(p < l)
161 l = p;
162 i = l - blocks + INDEX(b,b->memory);
163 l->kval++;
164 DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n",
165 i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
166 goto S1;
167 }
168
169 void
physfree(uintmem data,u64int size)170 physfree(uintmem data, u64int size)
171 {
172 Bal *b;
173 int i;
174
175 for(i = 0; i < Ndoms; i++){
176 b = &bal[i];
177 if(b->base <= data && data < b->base + b->size){
178 xphysfree(b, data, size);
179 return;
180 }
181 }
182 panic("physfree: no bal");
183 }
184
185 static void*
xphystag(Bal * b,uintmem data)186 xphystag(Bal *b, uintmem data)
187 {
188 uint i;
189 Buddy *blocks;
190
191 DBG("phystag\n");
192
193 blocks = b->blocks;
194
195 if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/)
196 return nil;
197 i = INDEX(b,data);
198 return blocks[BLOCK(b,i)].p;
199 }
200
201 void*
phystag(uintmem data)202 phystag(uintmem data)
203 {
204 Bal *b;
205 int i;
206
207 for(i = 0; i < Ndoms; i++){
208 b = &bal[i];
209 if(b->base <= data && data < b->base + b->size)
210 return xphystag(b, data);
211 }
212 return nil;
213 }
214
215 static uchar lg2table[256] = {
216 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
217 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
218 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
219 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
220 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
221 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
222 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
223 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
224 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
225 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
226 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
227 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
228 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
229 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
230 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
231 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
232 };
233
234 static int
lg2floor(u64int w)235 lg2floor(u64int w)
236 {
237 u64int hi, lo;
238
239 if((lo = (w>>48)) != 0){
240 if((hi = (lo>>8)) != 0)
241 return 56+lg2table[hi];
242 return 48+lg2table[lo];
243 }
244 if((lo = (w>>32)) != 0){
245 if((hi = (lo>>8)) != 0)
246 return 40+lg2table[hi];
247 return 32+lg2table[lo];
248 }
249 if((lo = (w>>16)) != 0){
250 if((hi = (lo>>8)) != 0)
251 return 24+lg2table[hi];
252 return 16+lg2table[lo];
253 }
254 if((hi = (w>>8)) != 0)
255 return 8+lg2table[hi];
256 return lg2table[w];
257 }
258
259 static uintmem
xphysalloc(Bal * b,u64int size,void * tag)260 xphysalloc(Bal *b, u64int size, void *tag)
261 {
262 uint i, j, k;
263 Buddy *l, *p;
264 Buddy *avail, *blocks;
265 uintmem m;
266
267 DBG("physalloc\n");
268 assert(b->size > 0);
269
270 avail = b->avail;
271 blocks = b->blocks;
272
273 /*
274 * Knuth's Algorithm R (Buddy System Reservation).
275 */
276 if(size < b->bminsz)
277 size = b->bminsz;
278
279 /*
280 * Find block.
281 */
282 if(!ISPOWEROF2(size))
283 return 0;
284 k = lg2floor(size);
285
286 lock(&budlock);
287 for(j = k; j <= b->kmax; j++){
288 if(avail[j].next != 0)
289 break;
290 }
291 DBG("bsr: size %#llud k %d j %d\n", size, k, j);
292 if(j > b->kmax){
293 unlock(&budlock);
294 return 0;
295 }
296
297 /*
298 * Remove from list.
299 */
300 i = avail[j].next;
301 l = &blocks[BLOCK(b,i)];
302 DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n",
303 i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used");
304 avail[j].next = l->next;
305 if(avail[j].next != 0)
306 blocks[BLOCK(b,avail[j].next)].prev = 0;
307 l->prev = l->next = 0;
308 l->tag = Used;
309 l->kval = k;
310
311 /*
312 * Split required?
313 */
314 while(j > k){
315 /*
316 * Split.
317 */
318 j--;
319 p = &blocks[BLOCK(b,i) + (UNO<<j)/(b->bminsz)];
320 p->tag = Avail;
321 p->kval = j;
322 p->next = avail[j].next;
323 p->prev = 0;
324 if(p->next != 0)
325 blocks[BLOCK(b,p->next)].prev = i + (UNO<<j)/(b->bminsz);
326 avail[j].next = i + (UNO<<j)/(b->bminsz);
327 DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n",
328 i, p - blocks, j, p->next, BLOCK(b,p->next),
329 p->tag?"avail":"used");
330 }
331 b->nfree -= size/b->bminsz;
332 unlock(&budlock);
333
334 m = b->memory + b->bminsz*BLOCK(b,i);
335 assert(m >= b->base && m < b->base + b->size);
336 blocks[BLOCK(b,i)].p = tag;
337
338 return m;
339 }
340
341 uintmem
physalloc(u64int size,int * colorp,void * tag)342 physalloc(u64int size, int *colorp, void *tag)
343 {
344 int i, color;
345 uintmem m;
346
347 m = 0;
348
349 color = *colorp;
350 if(color >= 0){
351 color %= ndoms;
352 if(bal[color].kmin > 0){
353 *colorp = color;
354 m = xphysalloc(&bal[color], size, tag);
355 }
356 }
357 if(m == 0)
358 for(i = 0; i < ndoms; i++)
359 if(bal[i].kmin > 0)
360 if((m = xphysalloc(&bal[i], size, tag)) != 0){
361 *colorp = i;
362 return m;
363 }
364 return m;
365 }
366
367 static void
dump(Bal * b)368 dump(Bal *b)
369 {
370 uint bi, i, k;
371 Buddy *blocks;
372
373 blocks = b->blocks;
374 for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){
375 if(blocks[i].tag == Used)
376 continue;
377 print("blocks[%d]: size %d prev %d next %d\n",
378 i, 1<<b->blocks[i].kval, blocks[i].prev, blocks[i].next);
379 //i += (1<<blocks[i].kval)/b->bminsz-1;
380 }
381
382 for(k = 0; k <= b->kmax; k++){
383 print("a[%d]:", k);
384 for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){
385 print(" %d", bi);
386 }
387 print("\n");
388 }
389 }
390
391 void
physallocdump(void)392 physallocdump(void)
393 {
394 int n;
395
396 for(n = 0; n < Ndoms; n++)
397 if(bal[n].size > 0)
398 print("physalloc color=%d base=%#ullx size=%#ullx\n",
399 n, bal[n].base, bal[n].size);
400 }
401
402 static int
plop(Bal * b,uintmem a,int k,int type)403 plop(Bal *b, uintmem a, int k, int type)
404 {
405 uint i;
406 Buddy *l;
407
408
409 DBG("plop(a %#p k %d type %d)\n", a, k, type);
410
411 i = INDEX(b,a);
412 l = &b->blocks[BLOCK(b,i)];
413
414 l->kval = k;
415 xphysfree(b, a, 1<<k);
416
417 return 1;
418 }
419
420 static int
iimbchunk(Bal * b,uintmem a,uintmem e,int type)421 iimbchunk(Bal *b, uintmem a, uintmem e, int type)
422 {
423 int k;
424 uint s;
425
426 a = ROUNDUP(a, b->bminsz);
427 e = ROUNDDN(e, b->bminsz);
428 DBG("iimbchunk: start a %#P e %#P\n", a, e);
429
430 b->nblocks += (e-a)/b->bminsz;
431
432 for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){
433 if(a & s){
434 plop(b, a, k, type);
435 a += s;
436 }
437 }
438 DBG("done1 a %#P e %#P s %#ux %d\n", a, e, s, k);
439
440 while(a+s <= e){
441 plop(b, a, k, type);
442 a += s;
443 }
444 DBG("done2 a %#P e %#P s %#ux %d\n", a, e, s, k);
445
446 for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){
447 if(a+s <= e){
448 plop(b, a, k, type);
449 a += s;
450 }
451 }
452 DBG("done3 a %#P e %#P s %#ux %d\n", a, e, s, k);
453
454 return 0;
455 }
456
457 /*
458 * Called from umeminit to initialize user memory allocators.
459 */
460 void
physinit(uintmem a,u64int size)461 physinit(uintmem a, u64int size)
462 {
463 uintmem dtsz;
464 Bal *b;
465 int i, dom;
466 uintmem addr, len;
467
468 DBG("physinit %#ullx %#ullx\n", a, size);
469
470 for(addr = a; addr < a+size; addr += len){
471 dom = 0;
472 len = acpimblocksize(addr, &dom);
473 /* len can be zero if there's no acpi information about addr */
474 if(len == 0 || addr + len > a + size)
475 len = a + size - addr;
476 /*
477 * Each block belongs to a different domain (ie. cpu/mem socket)
478 * We must create a buddy allocator for each block, so we could
479 * allocate memory from different domains.
480 *
481 * This code assumes that a domain may be extended later and
482 * that there is no interleaving of domains. Ok by now.
483 */
484 DBG("physmem block dom %d addr %#ullx size %#ullx\n",
485 dom, addr, len);
486 if(dom < 0 || dom >= Ndoms){
487 print("physinit: invalid dom %d\n", dom);
488 dom = 0;
489 }
490 b = &bal[dom];
491 if(dom >= ndoms)
492 ndoms = dom+1;
493 if(b->kmin == 0){
494 b->base = addr;
495 b->size = len;
496 b->kmin = BKmin;
497 b->kmax = BKmax;
498 b->bminsz = (UNO<<b->kmin);
499 b->memory = sys->pmstart;
500 b->kspan = lg2floor(sys->pmend);
501 if(!ISPOWEROF2(sys->pmend))
502 b->kspan++;
503 dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1));
504 DBG("kspan %ud (arrysz = %llud)\n", b->kspan, dtsz);
505 b->blocks = malloc(dtsz);
506 if(b->blocks == nil)
507 panic("physinit: no blocks");
508 memset(b->blocks, 0, dtsz);
509 b->avail = malloc(sizeof(Buddy)*(b->kmax+1));
510 if(b->avail == nil)
511 panic("physinit: no avail");
512 memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1));
513 }else{
514 if(addr < b->base)
515 panic("physinit: decreasing base");
516 if(b->base+b->size < addr + len)
517 b->size = (addr-b->base) + len;
518 for(i = 0; i < Ndoms; i++)
519 if(bal[i].kmin && &bal[i] != b)
520 if(bal[i].base < b->base + b->size &&
521 bal[i].base + bal[i].size > b->base + b->size)
522 panic("physinit: doms overlap");
523 }
524 assert(addr >= b->base && addr+len <= b->base + b->size);
525
526 iimbchunk(b, addr, addr+len, 0);
527 }
528
529
530 }
531