1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include "umem.h"
27
28 #include <sys/vmem_impl_user.h>
29 #include <umem_impl.h>
30
31 #include <alloca.h>
32 #include <limits.h>
33 #include <mdb/mdb_whatis.h>
34
35 #include "misc.h"
36 #include "leaky.h"
37 #include "dist.h"
38
39 #include "umem_pagesize.h"
40
41 #define UM_ALLOCATED 0x1
42 #define UM_FREE 0x2
43 #define UM_BUFCTL 0x4
44 #define UM_HASH 0x8
45
46 int umem_ready;
47
48 static int umem_stack_depth_warned;
49 static uint32_t umem_max_ncpus;
50 uint32_t umem_stack_depth;
51
52 size_t umem_pagesize;
53
54 #define UMEM_READVAR(var) \
55 (umem_readvar(&(var), #var) == -1 && \
56 (mdb_warn("failed to read "#var), 1))
57
58 int
umem_update_variables(void)59 umem_update_variables(void)
60 {
61 size_t pagesize;
62
63 /*
64 * Figure out which type of umem is being used; if it's not there
65 * yet, succeed quietly.
66 */
67 if (umem_set_standalone() == -1) {
68 umem_ready = 0;
69 return (0); /* umem not there yet */
70 }
71
72 /*
73 * Solaris 9 used a different name for umem_max_ncpus. It's
74 * cheap backwards compatibility to check for both names.
75 */
76 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
77 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
78 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
79 return (-1);
80 }
81 if (UMEM_READVAR(umem_ready))
82 return (-1);
83 if (UMEM_READVAR(umem_stack_depth))
84 return (-1);
85 if (UMEM_READVAR(pagesize))
86 return (-1);
87
88 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
89 if (umem_stack_depth_warned == 0) {
90 mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
91 umem_stack_depth, UMEM_MAX_STACK_DEPTH);
92 umem_stack_depth_warned = 1;
93 }
94 umem_stack_depth = 0;
95 }
96
97 umem_pagesize = pagesize;
98
99 return (0);
100 }
101
102 /*ARGSUSED*/
103 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,void * ignored)104 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, void *ignored)
105 {
106 mdb_walker_t w;
107 char descr[64];
108
109 (void) mdb_snprintf(descr, sizeof (descr),
110 "walk the %s cache", c->cache_name);
111
112 w.walk_name = c->cache_name;
113 w.walk_descr = descr;
114 w.walk_init = umem_walk_init;
115 w.walk_step = umem_walk_step;
116 w.walk_fini = umem_walk_fini;
117 w.walk_init_arg = (void *)addr;
118
119 if (mdb_add_walker(&w) == -1)
120 mdb_warn("failed to add %s walker", c->cache_name);
121
122 return (WALK_NEXT);
123 }
124
125 /*ARGSUSED*/
126 static void
umem_statechange_cb(void * arg)127 umem_statechange_cb(void *arg)
128 {
129 static int been_ready = 0;
130
131 #ifndef _KMDB
132 leaky_cleanup(1); /* state changes invalidate leaky state */
133 #endif
134
135 if (umem_update_variables() == -1)
136 return;
137
138 if (been_ready)
139 return;
140
141 if (umem_ready != UMEM_READY)
142 return;
143
144 been_ready = 1;
145 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, NULL);
146 }
147
148 int
umem_abort_messages(void)149 umem_abort_messages(void)
150 {
151 char *umem_error_buffer;
152 uint_t umem_error_begin;
153 GElf_Sym sym;
154 size_t bufsize;
155
156 if (UMEM_READVAR(umem_error_begin))
157 return (DCMD_ERR);
158
159 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
160 mdb_warn("unable to look up umem_error_buffer");
161 return (DCMD_ERR);
162 }
163
164 bufsize = (size_t)sym.st_size;
165
166 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
167
168 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
169 != bufsize) {
170 mdb_warn("unable to read umem_error_buffer");
171 return (DCMD_ERR);
172 }
173 /* put a zero after the end of the buffer to simplify printing */
174 umem_error_buffer[bufsize] = 0;
175
176 if ((umem_error_begin % bufsize) == 0)
177 mdb_printf("%s\n", umem_error_buffer);
178 else {
179 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
180 mdb_printf("%s%s\n",
181 &umem_error_buffer[umem_error_begin % bufsize],
182 umem_error_buffer);
183 }
184
185 return (DCMD_OK);
186 }
187
188 static void
umem_log_status(const char * name,umem_log_header_t * val)189 umem_log_status(const char *name, umem_log_header_t *val)
190 {
191 umem_log_header_t my_lh;
192 uintptr_t pos = (uintptr_t)val;
193 size_t size;
194
195 if (pos == NULL)
196 return;
197
198 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
199 mdb_warn("\nunable to read umem_%s_log pointer %p",
200 name, pos);
201 return;
202 }
203
204 size = my_lh.lh_chunksize * my_lh.lh_nchunks;
205
206 if (size % (1024 * 1024) == 0)
207 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
208 else if (size % 1024 == 0)
209 mdb_printf("%s=%dk ", name, size / 1024);
210 else
211 mdb_printf("%s=%d ", name, size);
212 }
213
214 typedef struct umem_debug_flags {
215 const char *udf_name;
216 uint_t udf_flags;
217 uint_t udf_clear; /* if 0, uses udf_flags */
218 } umem_debug_flags_t;
219
220 umem_debug_flags_t umem_status_flags[] = {
221 { "random", UMF_RANDOMIZE, UMF_RANDOM },
222 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
223 { "audit", UMF_AUDIT },
224 { "guards", UMF_DEADBEEF | UMF_REDZONE },
225 { "nosignal", UMF_CHECKSIGNAL },
226 { "firewall", UMF_FIREWALL },
227 { "lite", UMF_LITE },
228 { NULL }
229 };
230
231 /*ARGSUSED*/
232 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)233 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
234 {
235 int umem_logging;
236
237 umem_log_header_t *umem_transaction_log;
238 umem_log_header_t *umem_content_log;
239 umem_log_header_t *umem_failure_log;
240 umem_log_header_t *umem_slab_log;
241
242 mdb_printf("Status:\t\t%s\n",
243 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
244 umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
245 umem_ready == UMEM_READY_INITING ? "initialization in process" :
246 umem_ready == UMEM_READY ? "ready and active" :
247 umem_ready == 0 ? "not loaded into address space" :
248 "unknown (umem_ready invalid)");
249
250 if (umem_ready == 0)
251 return (DCMD_OK);
252
253 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
254
255 if (UMEM_READVAR(umem_logging))
256 goto err;
257 if (UMEM_READVAR(umem_transaction_log))
258 goto err;
259 if (UMEM_READVAR(umem_content_log))
260 goto err;
261 if (UMEM_READVAR(umem_failure_log))
262 goto err;
263 if (UMEM_READVAR(umem_slab_log))
264 goto err;
265
266 mdb_printf("Logs:\t\t");
267 umem_log_status("transaction", umem_transaction_log);
268 umem_log_status("content", umem_content_log);
269 umem_log_status("fail", umem_failure_log);
270 umem_log_status("slab", umem_slab_log);
271 if (!umem_logging)
272 mdb_printf("(inactive)");
273 mdb_printf("\n");
274
275 mdb_printf("Message buffer:\n");
276 return (umem_abort_messages());
277
278 err:
279 mdb_printf("Message buffer:\n");
280 (void) umem_abort_messages();
281 return (DCMD_ERR);
282 }
283
284 typedef struct {
285 uintptr_t ucw_first;
286 uintptr_t ucw_current;
287 } umem_cache_walk_t;
288
289 int
umem_cache_walk_init(mdb_walk_state_t * wsp)290 umem_cache_walk_init(mdb_walk_state_t *wsp)
291 {
292 umem_cache_walk_t *ucw;
293 umem_cache_t c;
294 uintptr_t cp;
295 GElf_Sym sym;
296
297 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
298 mdb_warn("couldn't find umem_null_cache");
299 return (WALK_ERR);
300 }
301
302 cp = (uintptr_t)sym.st_value;
303
304 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
305 mdb_warn("couldn't read cache at %p", cp);
306 return (WALK_ERR);
307 }
308
309 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
310
311 ucw->ucw_first = cp;
312 ucw->ucw_current = (uintptr_t)c.cache_next;
313 wsp->walk_data = ucw;
314
315 return (WALK_NEXT);
316 }
317
318 int
umem_cache_walk_step(mdb_walk_state_t * wsp)319 umem_cache_walk_step(mdb_walk_state_t *wsp)
320 {
321 umem_cache_walk_t *ucw = wsp->walk_data;
322 umem_cache_t c;
323 int status;
324
325 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
326 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
327 return (WALK_DONE);
328 }
329
330 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
331
332 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
333 return (WALK_DONE);
334
335 return (status);
336 }
337
338 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)339 umem_cache_walk_fini(mdb_walk_state_t *wsp)
340 {
341 umem_cache_walk_t *ucw = wsp->walk_data;
342 mdb_free(ucw, sizeof (umem_cache_walk_t));
343 }
344
345 typedef struct {
346 umem_cpu_t *ucw_cpus;
347 uint32_t ucw_current;
348 uint32_t ucw_max;
349 } umem_cpu_walk_state_t;
350
351 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)352 umem_cpu_walk_init(mdb_walk_state_t *wsp)
353 {
354 umem_cpu_t *umem_cpus;
355
356 umem_cpu_walk_state_t *ucw;
357
358 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
359 mdb_warn("failed to read 'umem_cpus'");
360 return (WALK_ERR);
361 }
362
363 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
364
365 ucw->ucw_cpus = umem_cpus;
366 ucw->ucw_current = 0;
367 ucw->ucw_max = umem_max_ncpus;
368
369 wsp->walk_data = ucw;
370 return (WALK_NEXT);
371 }
372
373 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)374 umem_cpu_walk_step(mdb_walk_state_t *wsp)
375 {
376 umem_cpu_t cpu;
377 umem_cpu_walk_state_t *ucw = wsp->walk_data;
378
379 uintptr_t caddr;
380
381 if (ucw->ucw_current >= ucw->ucw_max)
382 return (WALK_DONE);
383
384 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
385
386 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
387 mdb_warn("failed to read cpu %d", ucw->ucw_current);
388 return (WALK_ERR);
389 }
390
391 ucw->ucw_current++;
392
393 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
394 }
395
396 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)397 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
398 {
399 umem_cpu_walk_state_t *ucw = wsp->walk_data;
400
401 mdb_free(ucw, sizeof (*ucw));
402 }
403
404 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)405 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
406 {
407 if (wsp->walk_addr == NULL) {
408 mdb_warn("umem_cpu_cache doesn't support global walks");
409 return (WALK_ERR);
410 }
411
412 if (mdb_layered_walk("umem_cpu", wsp) == -1) {
413 mdb_warn("couldn't walk 'umem_cpu'");
414 return (WALK_ERR);
415 }
416
417 wsp->walk_data = (void *)wsp->walk_addr;
418
419 return (WALK_NEXT);
420 }
421
422 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)423 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
424 {
425 uintptr_t caddr = (uintptr_t)wsp->walk_data;
426 const umem_cpu_t *cpu = wsp->walk_layer;
427 umem_cpu_cache_t cc;
428
429 caddr += cpu->cpu_cache_offset;
430
431 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
432 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
433 return (WALK_ERR);
434 }
435
436 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
437 }
438
439 int
umem_slab_walk_init(mdb_walk_state_t * wsp)440 umem_slab_walk_init(mdb_walk_state_t *wsp)
441 {
442 uintptr_t caddr = wsp->walk_addr;
443 umem_cache_t c;
444
445 if (caddr == NULL) {
446 mdb_warn("umem_slab doesn't support global walks\n");
447 return (WALK_ERR);
448 }
449
450 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
451 mdb_warn("couldn't read umem_cache at %p", caddr);
452 return (WALK_ERR);
453 }
454
455 wsp->walk_data =
456 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
457 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
458
459 return (WALK_NEXT);
460 }
461
462 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)463 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
464 {
465 uintptr_t caddr = wsp->walk_addr;
466 umem_cache_t c;
467
468 if (caddr == NULL) {
469 mdb_warn("umem_slab_partial doesn't support global walks\n");
470 return (WALK_ERR);
471 }
472
473 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
474 mdb_warn("couldn't read umem_cache at %p", caddr);
475 return (WALK_ERR);
476 }
477
478 wsp->walk_data =
479 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
480 wsp->walk_addr = (uintptr_t)c.cache_freelist;
481
482 /*
483 * Some consumers (umem_walk_step(), in particular) require at
484 * least one callback if there are any buffers in the cache. So
485 * if there are *no* partial slabs, report the last full slab, if
486 * any.
487 *
488 * Yes, this is ugly, but it's cleaner than the other possibilities.
489 */
490 if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
491 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
492
493 return (WALK_NEXT);
494 }
495
496 int
umem_slab_walk_step(mdb_walk_state_t * wsp)497 umem_slab_walk_step(mdb_walk_state_t *wsp)
498 {
499 umem_slab_t s;
500 uintptr_t addr = wsp->walk_addr;
501 uintptr_t saddr = (uintptr_t)wsp->walk_data;
502 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
503
504 if (addr == saddr)
505 return (WALK_DONE);
506
507 if (mdb_vread(&s, sizeof (s), addr) == -1) {
508 mdb_warn("failed to read slab at %p", wsp->walk_addr);
509 return (WALK_ERR);
510 }
511
512 if ((uintptr_t)s.slab_cache != caddr) {
513 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
514 addr, caddr, s.slab_cache);
515 return (WALK_ERR);
516 }
517
518 wsp->walk_addr = (uintptr_t)s.slab_next;
519
520 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
521 }
522
523 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)524 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
525 {
526 umem_cache_t c;
527
528 if (!(flags & DCMD_ADDRSPEC)) {
529 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
530 mdb_warn("can't walk umem_cache");
531 return (DCMD_ERR);
532 }
533 return (DCMD_OK);
534 }
535
536 if (DCMD_HDRSPEC(flags))
537 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
538 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
539
540 if (mdb_vread(&c, sizeof (c), addr) == -1) {
541 mdb_warn("couldn't read umem_cache at %p", addr);
542 return (DCMD_ERR);
543 }
544
545 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
546 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
547
548 return (DCMD_OK);
549 }
550
551 static int
addrcmp(const void * lhs,const void * rhs)552 addrcmp(const void *lhs, const void *rhs)
553 {
554 uintptr_t p1 = *((uintptr_t *)lhs);
555 uintptr_t p2 = *((uintptr_t *)rhs);
556
557 if (p1 < p2)
558 return (-1);
559 if (p1 > p2)
560 return (1);
561 return (0);
562 }
563
564 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)565 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
566 {
567 const umem_bufctl_audit_t *bcp1 = *lhs;
568 const umem_bufctl_audit_t *bcp2 = *rhs;
569
570 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
571 return (-1);
572
573 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
574 return (1);
575
576 return (0);
577 }
578
579 typedef struct umem_hash_walk {
580 uintptr_t *umhw_table;
581 size_t umhw_nelems;
582 size_t umhw_pos;
583 umem_bufctl_t umhw_cur;
584 } umem_hash_walk_t;
585
586 int
umem_hash_walk_init(mdb_walk_state_t * wsp)587 umem_hash_walk_init(mdb_walk_state_t *wsp)
588 {
589 umem_hash_walk_t *umhw;
590 uintptr_t *hash;
591 umem_cache_t c;
592 uintptr_t haddr, addr = wsp->walk_addr;
593 size_t nelems;
594 size_t hsize;
595
596 if (addr == NULL) {
597 mdb_warn("umem_hash doesn't support global walks\n");
598 return (WALK_ERR);
599 }
600
601 if (mdb_vread(&c, sizeof (c), addr) == -1) {
602 mdb_warn("couldn't read cache at addr %p", addr);
603 return (WALK_ERR);
604 }
605
606 if (!(c.cache_flags & UMF_HASH)) {
607 mdb_warn("cache %p doesn't have a hash table\n", addr);
608 return (WALK_DONE); /* nothing to do */
609 }
610
611 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
612 umhw->umhw_cur.bc_next = NULL;
613 umhw->umhw_pos = 0;
614
615 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
616 hsize = nelems * sizeof (uintptr_t);
617 haddr = (uintptr_t)c.cache_hash_table;
618
619 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
620 if (mdb_vread(hash, hsize, haddr) == -1) {
621 mdb_warn("failed to read hash table at %p", haddr);
622 mdb_free(hash, hsize);
623 mdb_free(umhw, sizeof (umem_hash_walk_t));
624 return (WALK_ERR);
625 }
626
627 wsp->walk_data = umhw;
628
629 return (WALK_NEXT);
630 }
631
632 int
umem_hash_walk_step(mdb_walk_state_t * wsp)633 umem_hash_walk_step(mdb_walk_state_t *wsp)
634 {
635 umem_hash_walk_t *umhw = wsp->walk_data;
636 uintptr_t addr = NULL;
637
638 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
639 while (umhw->umhw_pos < umhw->umhw_nelems) {
640 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
641 break;
642 }
643 }
644 if (addr == NULL)
645 return (WALK_DONE);
646
647 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
648 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
649 return (WALK_ERR);
650 }
651
652 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
653 }
654
655 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)656 umem_hash_walk_fini(mdb_walk_state_t *wsp)
657 {
658 umem_hash_walk_t *umhw = wsp->walk_data;
659
660 if (umhw == NULL)
661 return;
662
663 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
664 mdb_free(umhw, sizeof (umem_hash_walk_t));
665 }
666
667 /*
668 * Find the address of the bufctl structure for the address 'buf' in cache
669 * 'cp', which is at address caddr, and place it in *out.
670 */
671 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)672 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
673 {
674 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
675 umem_bufctl_t *bcp;
676 umem_bufctl_t bc;
677
678 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
679 mdb_warn("unable to read hash bucket for %p in cache %p",
680 buf, caddr);
681 return (-1);
682 }
683
684 while (bcp != NULL) {
685 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
686 (uintptr_t)bcp) == -1) {
687 mdb_warn("unable to read bufctl at %p", bcp);
688 return (-1);
689 }
690 if (bc.bc_addr == buf) {
691 *out = (uintptr_t)bcp;
692 return (0);
693 }
694 bcp = bc.bc_next;
695 }
696
697 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
698 return (-1);
699 }
700
701 int
umem_get_magsize(const umem_cache_t * cp)702 umem_get_magsize(const umem_cache_t *cp)
703 {
704 uintptr_t addr = (uintptr_t)cp->cache_magtype;
705 GElf_Sym mt_sym;
706 umem_magtype_t mt;
707 int res;
708
709 /*
710 * if cpu 0 has a non-zero magsize, it must be correct. caches
711 * with UMF_NOMAGAZINE have disabled their magazine layers, so
712 * it is okay to return 0 for them.
713 */
714 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
715 (cp->cache_flags & UMF_NOMAGAZINE))
716 return (res);
717
718 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
719 mdb_warn("unable to read 'umem_magtype'");
720 } else if (addr < mt_sym.st_value ||
721 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
722 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
723 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
724 cp->cache_name, addr);
725 return (0);
726 }
727 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
728 mdb_warn("unable to read magtype at %a", addr);
729 return (0);
730 }
731 return (mt.mt_magsize);
732 }
733
734 /*ARGSUSED*/
735 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)736 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
737 {
738 *est -= (sp->slab_chunks - sp->slab_refcnt);
739
740 return (WALK_NEXT);
741 }
742
743 /*
744 * Returns an upper bound on the number of allocated buffers in a given
745 * cache.
746 */
747 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)748 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
749 {
750 int magsize;
751 size_t cache_est;
752
753 cache_est = cp->cache_buftotal;
754
755 (void) mdb_pwalk("umem_slab_partial",
756 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
757
758 if ((magsize = umem_get_magsize(cp)) != 0) {
759 size_t mag_est = cp->cache_full.ml_total * magsize;
760
761 if (cache_est >= mag_est) {
762 cache_est -= mag_est;
763 } else {
764 mdb_warn("cache %p's magazine layer holds more buffers "
765 "than the slab layer.\n", addr);
766 }
767 }
768 return (cache_est);
769 }
770
771 #define READMAG_ROUNDS(rounds) { \
772 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
773 mdb_warn("couldn't read magazine at %p", ump); \
774 goto fail; \
775 } \
776 for (i = 0; i < rounds; i++) { \
777 maglist[magcnt++] = mp->mag_round[i]; \
778 if (magcnt == magmax) { \
779 mdb_warn("%d magazines exceeds fudge factor\n", \
780 magcnt); \
781 goto fail; \
782 } \
783 } \
784 }
785
786 int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp,int alloc_flags)787 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
788 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
789 {
790 umem_magazine_t *ump, *mp;
791 void **maglist = NULL;
792 int i, cpu;
793 size_t magsize, magmax, magbsize;
794 size_t magcnt = 0;
795
796 /*
797 * Read the magtype out of the cache, after verifying the pointer's
798 * correctness.
799 */
800 magsize = umem_get_magsize(cp);
801 if (magsize == 0) {
802 *maglistp = NULL;
803 *magcntp = 0;
804 *magmaxp = 0;
805 return (WALK_NEXT);
806 }
807
808 /*
809 * There are several places where we need to go buffer hunting:
810 * the per-CPU loaded magazine, the per-CPU spare full magazine,
811 * and the full magazine list in the depot.
812 *
813 * For an upper bound on the number of buffers in the magazine
814 * layer, we have the number of magazines on the cache_full
815 * list plus at most two magazines per CPU (the loaded and the
816 * spare). Toss in 100 magazines as a fudge factor in case this
817 * is live (the number "100" comes from the same fudge factor in
818 * crash(1M)).
819 */
820 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
821 magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
822
823 if (magbsize >= PAGESIZE / 2) {
824 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
825 addr, magbsize);
826 return (WALK_ERR);
827 }
828
829 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
830 mp = mdb_alloc(magbsize, alloc_flags);
831 if (mp == NULL || maglist == NULL)
832 goto fail;
833
834 /*
835 * First up: the magazines in the depot (i.e. on the cache_full list).
836 */
837 for (ump = cp->cache_full.ml_list; ump != NULL; ) {
838 READMAG_ROUNDS(magsize);
839 ump = mp->mag_next;
840
841 if (ump == cp->cache_full.ml_list)
842 break; /* cache_full list loop detected */
843 }
844
845 dprintf(("cache_full list done\n"));
846
847 /*
848 * Now whip through the CPUs, snagging the loaded magazines
849 * and full spares.
850 */
851 for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
852 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
853
854 dprintf(("reading cpu cache %p\n",
855 (uintptr_t)ccp - (uintptr_t)cp + addr));
856
857 if (ccp->cc_rounds > 0 &&
858 (ump = ccp->cc_loaded) != NULL) {
859 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
860 READMAG_ROUNDS(ccp->cc_rounds);
861 }
862
863 if (ccp->cc_prounds > 0 &&
864 (ump = ccp->cc_ploaded) != NULL) {
865 dprintf(("reading %d previously loaded rounds\n",
866 ccp->cc_prounds));
867 READMAG_ROUNDS(ccp->cc_prounds);
868 }
869 }
870
871 dprintf(("magazine layer: %d buffers\n", magcnt));
872
873 if (!(alloc_flags & UM_GC))
874 mdb_free(mp, magbsize);
875
876 *maglistp = maglist;
877 *magcntp = magcnt;
878 *magmaxp = magmax;
879
880 return (WALK_NEXT);
881
882 fail:
883 if (!(alloc_flags & UM_GC)) {
884 if (mp)
885 mdb_free(mp, magbsize);
886 if (maglist)
887 mdb_free(maglist, magmax * sizeof (void *));
888 }
889 return (WALK_ERR);
890 }
891
892 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)893 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
894 {
895 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
896 }
897
898 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)899 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
900 {
901 umem_bufctl_audit_t *b;
902 UMEM_LOCAL_BUFCTL_AUDIT(&b);
903
904 /*
905 * if UMF_AUDIT is not set, we know that we're looking at a
906 * umem_bufctl_t.
907 */
908 if (!(cp->cache_flags & UMF_AUDIT) ||
909 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
910 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
911 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
912 mdb_warn("unable to read bufctl at %p", buf);
913 return (WALK_ERR);
914 }
915 }
916
917 return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
918 }
919
920 typedef struct umem_walk {
921 int umw_type;
922
923 int umw_addr; /* cache address */
924 umem_cache_t *umw_cp;
925 size_t umw_csize;
926
927 /*
928 * magazine layer
929 */
930 void **umw_maglist;
931 size_t umw_max;
932 size_t umw_count;
933 size_t umw_pos;
934
935 /*
936 * slab layer
937 */
938 char *umw_valid; /* to keep track of freed buffers */
939 char *umw_ubase; /* buffer for slab data */
940 } umem_walk_t;
941
942 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)943 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
944 {
945 umem_walk_t *umw;
946 int csize;
947 umem_cache_t *cp;
948 size_t vm_quantum;
949
950 size_t magmax, magcnt;
951 void **maglist = NULL;
952 uint_t chunksize, slabsize;
953 int status = WALK_ERR;
954 uintptr_t addr = wsp->walk_addr;
955 const char *layered;
956
957 type &= ~UM_HASH;
958
959 if (addr == NULL) {
960 mdb_warn("umem walk doesn't support global walks\n");
961 return (WALK_ERR);
962 }
963
964 dprintf(("walking %p\n", addr));
965
966 /*
967 * The number of "cpus" determines how large the cache is.
968 */
969 csize = UMEM_CACHE_SIZE(umem_max_ncpus);
970 cp = mdb_alloc(csize, UM_SLEEP);
971
972 if (mdb_vread(cp, csize, addr) == -1) {
973 mdb_warn("couldn't read cache at addr %p", addr);
974 goto out2;
975 }
976
977 /*
978 * It's easy for someone to hand us an invalid cache address.
979 * Unfortunately, it is hard for this walker to survive an
980 * invalid cache cleanly. So we make sure that:
981 *
982 * 1. the vmem arena for the cache is readable,
983 * 2. the vmem arena's quantum is a power of 2,
984 * 3. our slabsize is a multiple of the quantum, and
985 * 4. our chunksize is >0 and less than our slabsize.
986 */
987 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
988 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
989 vm_quantum == 0 ||
990 (vm_quantum & (vm_quantum - 1)) != 0 ||
991 cp->cache_slabsize < vm_quantum ||
992 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
993 cp->cache_chunksize == 0 ||
994 cp->cache_chunksize > cp->cache_slabsize) {
995 mdb_warn("%p is not a valid umem_cache_t\n", addr);
996 goto out2;
997 }
998
999 dprintf(("buf total is %d\n", cp->cache_buftotal));
1000
1001 if (cp->cache_buftotal == 0) {
1002 mdb_free(cp, csize);
1003 return (WALK_DONE);
1004 }
1005
1006 /*
1007 * If they ask for bufctls, but it's a small-slab cache,
1008 * there is nothing to report.
1009 */
1010 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1011 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1012 cp->cache_flags));
1013 mdb_free(cp, csize);
1014 return (WALK_DONE);
1015 }
1016
1017 /*
1018 * Read in the contents of the magazine layer
1019 */
1020 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax,
1021 UM_SLEEP) == WALK_ERR)
1022 goto out2;
1023
1024 /*
1025 * We have all of the buffers from the magazines; if we are walking
1026 * allocated buffers, sort them so we can bsearch them later.
1027 */
1028 if (type & UM_ALLOCATED)
1029 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1030
1031 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1032
1033 umw->umw_type = type;
1034 umw->umw_addr = addr;
1035 umw->umw_cp = cp;
1036 umw->umw_csize = csize;
1037 umw->umw_maglist = maglist;
1038 umw->umw_max = magmax;
1039 umw->umw_count = magcnt;
1040 umw->umw_pos = 0;
1041
1042 /*
1043 * When walking allocated buffers in a UMF_HASH cache, we walk the
1044 * hash table instead of the slab layer.
1045 */
1046 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1047 layered = "umem_hash";
1048
1049 umw->umw_type |= UM_HASH;
1050 } else {
1051 /*
1052 * If we are walking freed buffers, we only need the
1053 * magazine layer plus the partially allocated slabs.
1054 * To walk allocated buffers, we need all of the slabs.
1055 */
1056 if (type & UM_ALLOCATED)
1057 layered = "umem_slab";
1058 else
1059 layered = "umem_slab_partial";
1060
1061 /*
1062 * for small-slab caches, we read in the entire slab. For
1063 * freed buffers, we can just walk the freelist. For
1064 * allocated buffers, we use a 'valid' array to track
1065 * the freed buffers.
1066 */
1067 if (!(cp->cache_flags & UMF_HASH)) {
1068 chunksize = cp->cache_chunksize;
1069 slabsize = cp->cache_slabsize;
1070
1071 umw->umw_ubase = mdb_alloc(slabsize +
1072 sizeof (umem_bufctl_t), UM_SLEEP);
1073
1074 if (type & UM_ALLOCATED)
1075 umw->umw_valid =
1076 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1077 }
1078 }
1079
1080 status = WALK_NEXT;
1081
1082 if (mdb_layered_walk(layered, wsp) == -1) {
1083 mdb_warn("unable to start layered '%s' walk", layered);
1084 status = WALK_ERR;
1085 }
1086
1087 out1:
1088 if (status == WALK_ERR) {
1089 if (umw->umw_valid)
1090 mdb_free(umw->umw_valid, slabsize / chunksize);
1091
1092 if (umw->umw_ubase)
1093 mdb_free(umw->umw_ubase, slabsize +
1094 sizeof (umem_bufctl_t));
1095
1096 if (umw->umw_maglist)
1097 mdb_free(umw->umw_maglist, umw->umw_max *
1098 sizeof (uintptr_t));
1099
1100 mdb_free(umw, sizeof (umem_walk_t));
1101 wsp->walk_data = NULL;
1102 }
1103
1104 out2:
1105 if (status == WALK_ERR)
1106 mdb_free(cp, csize);
1107
1108 return (status);
1109 }
1110
1111 int
umem_walk_step(mdb_walk_state_t * wsp)1112 umem_walk_step(mdb_walk_state_t *wsp)
1113 {
1114 umem_walk_t *umw = wsp->walk_data;
1115 int type = umw->umw_type;
1116 umem_cache_t *cp = umw->umw_cp;
1117
1118 void **maglist = umw->umw_maglist;
1119 int magcnt = umw->umw_count;
1120
1121 uintptr_t chunksize, slabsize;
1122 uintptr_t addr;
1123 const umem_slab_t *sp;
1124 const umem_bufctl_t *bcp;
1125 umem_bufctl_t bc;
1126
1127 int chunks;
1128 char *kbase;
1129 void *buf;
1130 int i, ret;
1131
1132 char *valid, *ubase;
1133
1134 /*
1135 * first, handle the 'umem_hash' layered walk case
1136 */
1137 if (type & UM_HASH) {
1138 /*
1139 * We have a buffer which has been allocated out of the
1140 * global layer. We need to make sure that it's not
1141 * actually sitting in a magazine before we report it as
1142 * an allocated buffer.
1143 */
1144 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1145
1146 if (magcnt > 0 &&
1147 bsearch(&buf, maglist, magcnt, sizeof (void *),
1148 addrcmp) != NULL)
1149 return (WALK_NEXT);
1150
1151 if (type & UM_BUFCTL)
1152 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1153
1154 return (umem_walk_callback(wsp, (uintptr_t)buf));
1155 }
1156
1157 ret = WALK_NEXT;
1158
1159 addr = umw->umw_addr;
1160
1161 /*
1162 * If we're walking freed buffers, report everything in the
1163 * magazine layer before processing the first slab.
1164 */
1165 if ((type & UM_FREE) && magcnt != 0) {
1166 umw->umw_count = 0; /* only do this once */
1167 for (i = 0; i < magcnt; i++) {
1168 buf = maglist[i];
1169
1170 if (type & UM_BUFCTL) {
1171 uintptr_t out;
1172
1173 if (cp->cache_flags & UMF_BUFTAG) {
1174 umem_buftag_t *btp;
1175 umem_buftag_t tag;
1176
1177 /* LINTED - alignment */
1178 btp = UMEM_BUFTAG(cp, buf);
1179 if (mdb_vread(&tag, sizeof (tag),
1180 (uintptr_t)btp) == -1) {
1181 mdb_warn("reading buftag for "
1182 "%p at %p", buf, btp);
1183 continue;
1184 }
1185 out = (uintptr_t)tag.bt_bufctl;
1186 } else {
1187 if (umem_hash_lookup(cp, addr, buf,
1188 &out) == -1)
1189 continue;
1190 }
1191 ret = bufctl_walk_callback(cp, wsp, out);
1192 } else {
1193 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1194 }
1195
1196 if (ret != WALK_NEXT)
1197 return (ret);
1198 }
1199 }
1200
1201 /*
1202 * Handle the buffers in the current slab
1203 */
1204 chunksize = cp->cache_chunksize;
1205 slabsize = cp->cache_slabsize;
1206
1207 sp = wsp->walk_layer;
1208 chunks = sp->slab_chunks;
1209 kbase = sp->slab_base;
1210
1211 dprintf(("kbase is %p\n", kbase));
1212
1213 if (!(cp->cache_flags & UMF_HASH)) {
1214 valid = umw->umw_valid;
1215 ubase = umw->umw_ubase;
1216
1217 if (mdb_vread(ubase, chunks * chunksize,
1218 (uintptr_t)kbase) == -1) {
1219 mdb_warn("failed to read slab contents at %p", kbase);
1220 return (WALK_ERR);
1221 }
1222
1223 /*
1224 * Set up the valid map as fully allocated -- we'll punch
1225 * out the freelist.
1226 */
1227 if (type & UM_ALLOCATED)
1228 (void) memset(valid, 1, chunks);
1229 } else {
1230 valid = NULL;
1231 ubase = NULL;
1232 }
1233
1234 /*
1235 * walk the slab's freelist
1236 */
1237 bcp = sp->slab_head;
1238
1239 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1240
1241 /*
1242 * since we could be in the middle of allocating a buffer,
1243 * our refcnt could be one higher than it aught. So we
1244 * check one further on the freelist than the count allows.
1245 */
1246 for (i = sp->slab_refcnt; i <= chunks; i++) {
1247 uint_t ndx;
1248
1249 dprintf(("bcp is %p\n", bcp));
1250
1251 if (bcp == NULL) {
1252 if (i == chunks)
1253 break;
1254 mdb_warn(
1255 "slab %p in cache %p freelist too short by %d\n",
1256 sp, addr, chunks - i);
1257 break;
1258 }
1259
1260 if (cp->cache_flags & UMF_HASH) {
1261 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1262 mdb_warn("failed to read bufctl ptr at %p",
1263 bcp);
1264 break;
1265 }
1266 buf = bc.bc_addr;
1267 } else {
1268 /*
1269 * Otherwise the buffer is in the slab which
1270 * we've read in; we just need to determine
1271 * its offset in the slab to find the
1272 * umem_bufctl_t.
1273 */
1274 bc = *((umem_bufctl_t *)
1275 ((uintptr_t)bcp - (uintptr_t)kbase +
1276 (uintptr_t)ubase));
1277
1278 buf = UMEM_BUF(cp, bcp);
1279 }
1280
1281 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1282
1283 if (ndx > slabsize / cp->cache_bufsize) {
1284 /*
1285 * This is very wrong; we have managed to find
1286 * a buffer in the slab which shouldn't
1287 * actually be here. Emit a warning, and
1288 * try to continue.
1289 */
1290 mdb_warn("buf %p is out of range for "
1291 "slab %p, cache %p\n", buf, sp, addr);
1292 } else if (type & UM_ALLOCATED) {
1293 /*
1294 * we have found a buffer on the slab's freelist;
1295 * clear its entry
1296 */
1297 valid[ndx] = 0;
1298 } else {
1299 /*
1300 * Report this freed buffer
1301 */
1302 if (type & UM_BUFCTL) {
1303 ret = bufctl_walk_callback(cp, wsp,
1304 (uintptr_t)bcp);
1305 } else {
1306 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1307 }
1308 if (ret != WALK_NEXT)
1309 return (ret);
1310 }
1311
1312 bcp = bc.bc_next;
1313 }
1314
1315 if (bcp != NULL) {
1316 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1317 sp, addr, bcp));
1318 }
1319
1320 /*
1321 * If we are walking freed buffers, the loop above handled reporting
1322 * them.
1323 */
1324 if (type & UM_FREE)
1325 return (WALK_NEXT);
1326
1327 if (type & UM_BUFCTL) {
1328 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1329 "cache %p\n", addr);
1330 return (WALK_ERR);
1331 }
1332
1333 /*
1334 * Report allocated buffers, skipping buffers in the magazine layer.
1335 * We only get this far for small-slab caches.
1336 */
1337 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1338 buf = (char *)kbase + i * chunksize;
1339
1340 if (!valid[i])
1341 continue; /* on slab freelist */
1342
1343 if (magcnt > 0 &&
1344 bsearch(&buf, maglist, magcnt, sizeof (void *),
1345 addrcmp) != NULL)
1346 continue; /* in magazine layer */
1347
1348 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1349 }
1350 return (ret);
1351 }
1352
1353 void
umem_walk_fini(mdb_walk_state_t * wsp)1354 umem_walk_fini(mdb_walk_state_t *wsp)
1355 {
1356 umem_walk_t *umw = wsp->walk_data;
1357 uintptr_t chunksize;
1358 uintptr_t slabsize;
1359
1360 if (umw == NULL)
1361 return;
1362
1363 if (umw->umw_maglist != NULL)
1364 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1365
1366 chunksize = umw->umw_cp->cache_chunksize;
1367 slabsize = umw->umw_cp->cache_slabsize;
1368
1369 if (umw->umw_valid != NULL)
1370 mdb_free(umw->umw_valid, slabsize / chunksize);
1371 if (umw->umw_ubase != NULL)
1372 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1373
1374 mdb_free(umw->umw_cp, umw->umw_csize);
1375 mdb_free(umw, sizeof (umem_walk_t));
1376 }
1377
1378 /*ARGSUSED*/
1379 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1380 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1381 {
1382 /*
1383 * Buffers allocated from NOTOUCH caches can also show up as freed
1384 * memory in other caches. This can be a little confusing, so we
1385 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1386 * that "::walk umem" and "::walk freemem" yield disjoint output).
1387 */
1388 if (c->cache_cflags & UMC_NOTOUCH)
1389 return (WALK_NEXT);
1390
1391 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1392 wsp->walk_cbdata, addr) == -1)
1393 return (WALK_DONE);
1394
1395 return (WALK_NEXT);
1396 }
1397
1398 #define UMEM_WALK_ALL(name, wsp) { \
1399 wsp->walk_data = (name); \
1400 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1401 return (WALK_ERR); \
1402 return (WALK_DONE); \
1403 }
1404
1405 int
umem_walk_init(mdb_walk_state_t * wsp)1406 umem_walk_init(mdb_walk_state_t *wsp)
1407 {
1408 if (wsp->walk_arg != NULL)
1409 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1410
1411 if (wsp->walk_addr == NULL)
1412 UMEM_WALK_ALL("umem", wsp);
1413 return (umem_walk_init_common(wsp, UM_ALLOCATED));
1414 }
1415
1416 int
bufctl_walk_init(mdb_walk_state_t * wsp)1417 bufctl_walk_init(mdb_walk_state_t *wsp)
1418 {
1419 if (wsp->walk_addr == NULL)
1420 UMEM_WALK_ALL("bufctl", wsp);
1421 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1422 }
1423
1424 int
freemem_walk_init(mdb_walk_state_t * wsp)1425 freemem_walk_init(mdb_walk_state_t *wsp)
1426 {
1427 if (wsp->walk_addr == NULL)
1428 UMEM_WALK_ALL("freemem", wsp);
1429 return (umem_walk_init_common(wsp, UM_FREE));
1430 }
1431
1432 int
freectl_walk_init(mdb_walk_state_t * wsp)1433 freectl_walk_init(mdb_walk_state_t *wsp)
1434 {
1435 if (wsp->walk_addr == NULL)
1436 UMEM_WALK_ALL("freectl", wsp);
1437 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1438 }
1439
1440 typedef struct bufctl_history_walk {
1441 void *bhw_next;
1442 umem_cache_t *bhw_cache;
1443 umem_slab_t *bhw_slab;
1444 hrtime_t bhw_timestamp;
1445 } bufctl_history_walk_t;
1446
1447 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1448 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1449 {
1450 bufctl_history_walk_t *bhw;
1451 umem_bufctl_audit_t bc;
1452 umem_bufctl_audit_t bcn;
1453
1454 if (wsp->walk_addr == NULL) {
1455 mdb_warn("bufctl_history walk doesn't support global walks\n");
1456 return (WALK_ERR);
1457 }
1458
1459 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1460 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1461 return (WALK_ERR);
1462 }
1463
1464 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1465 bhw->bhw_timestamp = 0;
1466 bhw->bhw_cache = bc.bc_cache;
1467 bhw->bhw_slab = bc.bc_slab;
1468
1469 /*
1470 * sometimes the first log entry matches the base bufctl; in that
1471 * case, skip the base bufctl.
1472 */
1473 if (bc.bc_lastlog != NULL &&
1474 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1475 bc.bc_addr == bcn.bc_addr &&
1476 bc.bc_cache == bcn.bc_cache &&
1477 bc.bc_slab == bcn.bc_slab &&
1478 bc.bc_timestamp == bcn.bc_timestamp &&
1479 bc.bc_thread == bcn.bc_thread)
1480 bhw->bhw_next = bc.bc_lastlog;
1481 else
1482 bhw->bhw_next = (void *)wsp->walk_addr;
1483
1484 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1485 wsp->walk_data = bhw;
1486
1487 return (WALK_NEXT);
1488 }
1489
1490 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1491 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1492 {
1493 bufctl_history_walk_t *bhw = wsp->walk_data;
1494 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1495 uintptr_t baseaddr = wsp->walk_addr;
1496 umem_bufctl_audit_t *b;
1497 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1498
1499 if (addr == NULL)
1500 return (WALK_DONE);
1501
1502 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1503 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1504 return (WALK_ERR);
1505 }
1506
1507 /*
1508 * The bufctl is only valid if the address, cache, and slab are
1509 * correct. We also check that the timestamp is decreasing, to
1510 * prevent infinite loops.
1511 */
1512 if ((uintptr_t)b->bc_addr != baseaddr ||
1513 b->bc_cache != bhw->bhw_cache ||
1514 b->bc_slab != bhw->bhw_slab ||
1515 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1516 return (WALK_DONE);
1517
1518 bhw->bhw_next = b->bc_lastlog;
1519 bhw->bhw_timestamp = b->bc_timestamp;
1520
1521 return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1522 }
1523
1524 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1525 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1526 {
1527 bufctl_history_walk_t *bhw = wsp->walk_data;
1528
1529 mdb_free(bhw, sizeof (*bhw));
1530 }
1531
1532 typedef struct umem_log_walk {
1533 umem_bufctl_audit_t *ulw_base;
1534 umem_bufctl_audit_t **ulw_sorted;
1535 umem_log_header_t ulw_lh;
1536 size_t ulw_size;
1537 size_t ulw_maxndx;
1538 size_t ulw_ndx;
1539 } umem_log_walk_t;
1540
1541 int
umem_log_walk_init(mdb_walk_state_t * wsp)1542 umem_log_walk_init(mdb_walk_state_t *wsp)
1543 {
1544 uintptr_t lp = wsp->walk_addr;
1545 umem_log_walk_t *ulw;
1546 umem_log_header_t *lhp;
1547 int maxndx, i, j, k;
1548
1549 /*
1550 * By default (global walk), walk the umem_transaction_log. Otherwise
1551 * read the log whose umem_log_header_t is stored at walk_addr.
1552 */
1553 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1554 mdb_warn("failed to read 'umem_transaction_log'");
1555 return (WALK_ERR);
1556 }
1557
1558 if (lp == NULL) {
1559 mdb_warn("log is disabled\n");
1560 return (WALK_ERR);
1561 }
1562
1563 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1564 lhp = &ulw->ulw_lh;
1565
1566 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1567 mdb_warn("failed to read log header at %p", lp);
1568 mdb_free(ulw, sizeof (umem_log_walk_t));
1569 return (WALK_ERR);
1570 }
1571
1572 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1573 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1574 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1575
1576 if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1577 (uintptr_t)lhp->lh_base) == -1) {
1578 mdb_warn("failed to read log at base %p", lhp->lh_base);
1579 mdb_free(ulw->ulw_base, ulw->ulw_size);
1580 mdb_free(ulw, sizeof (umem_log_walk_t));
1581 return (WALK_ERR);
1582 }
1583
1584 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1585 sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1586
1587 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1588 caddr_t chunk = (caddr_t)
1589 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1590
1591 for (j = 0; j < maxndx; j++) {
1592 /* LINTED align */
1593 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1594 chunk += UMEM_BUFCTL_AUDIT_SIZE;
1595 }
1596 }
1597
1598 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1599 (int(*)(const void *, const void *))bufctlcmp);
1600
1601 ulw->ulw_maxndx = k;
1602 wsp->walk_data = ulw;
1603
1604 return (WALK_NEXT);
1605 }
1606
1607 int
umem_log_walk_step(mdb_walk_state_t * wsp)1608 umem_log_walk_step(mdb_walk_state_t *wsp)
1609 {
1610 umem_log_walk_t *ulw = wsp->walk_data;
1611 umem_bufctl_audit_t *bcp;
1612
1613 if (ulw->ulw_ndx == ulw->ulw_maxndx)
1614 return (WALK_DONE);
1615
1616 bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1617
1618 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1619 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1620 }
1621
1622 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1623 umem_log_walk_fini(mdb_walk_state_t *wsp)
1624 {
1625 umem_log_walk_t *ulw = wsp->walk_data;
1626
1627 mdb_free(ulw->ulw_base, ulw->ulw_size);
1628 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1629 sizeof (umem_bufctl_audit_t *));
1630 mdb_free(ulw, sizeof (umem_log_walk_t));
1631 }
1632
1633 typedef struct allocdby_bufctl {
1634 uintptr_t abb_addr;
1635 hrtime_t abb_ts;
1636 } allocdby_bufctl_t;
1637
1638 typedef struct allocdby_walk {
1639 const char *abw_walk;
1640 uintptr_t abw_thread;
1641 size_t abw_nbufs;
1642 size_t abw_size;
1643 allocdby_bufctl_t *abw_buf;
1644 size_t abw_ndx;
1645 } allocdby_walk_t;
1646
1647 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1648 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1649 allocdby_walk_t *abw)
1650 {
1651 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1652 return (WALK_NEXT);
1653
1654 if (abw->abw_nbufs == abw->abw_size) {
1655 allocdby_bufctl_t *buf;
1656 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1657
1658 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1659
1660 bcopy(abw->abw_buf, buf, oldsize);
1661 mdb_free(abw->abw_buf, oldsize);
1662
1663 abw->abw_size <<= 1;
1664 abw->abw_buf = buf;
1665 }
1666
1667 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1668 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1669 abw->abw_nbufs++;
1670
1671 return (WALK_NEXT);
1672 }
1673
1674 /*ARGSUSED*/
1675 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1676 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1677 {
1678 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1679 abw, addr) == -1) {
1680 mdb_warn("couldn't walk bufctl for cache %p", addr);
1681 return (WALK_DONE);
1682 }
1683
1684 return (WALK_NEXT);
1685 }
1686
1687 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1688 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1689 {
1690 if (lhs->abb_ts < rhs->abb_ts)
1691 return (1);
1692 if (lhs->abb_ts > rhs->abb_ts)
1693 return (-1);
1694 return (0);
1695 }
1696
1697 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1698 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1699 {
1700 allocdby_walk_t *abw;
1701
1702 if (wsp->walk_addr == NULL) {
1703 mdb_warn("allocdby walk doesn't support global walks\n");
1704 return (WALK_ERR);
1705 }
1706
1707 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1708
1709 abw->abw_thread = wsp->walk_addr;
1710 abw->abw_walk = walk;
1711 abw->abw_size = 128; /* something reasonable */
1712 abw->abw_buf =
1713 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1714
1715 wsp->walk_data = abw;
1716
1717 if (mdb_walk("umem_cache",
1718 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1719 mdb_warn("couldn't walk umem_cache");
1720 allocdby_walk_fini(wsp);
1721 return (WALK_ERR);
1722 }
1723
1724 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1725 (int(*)(const void *, const void *))allocdby_cmp);
1726
1727 return (WALK_NEXT);
1728 }
1729
1730 int
allocdby_walk_init(mdb_walk_state_t * wsp)1731 allocdby_walk_init(mdb_walk_state_t *wsp)
1732 {
1733 return (allocdby_walk_init_common(wsp, "bufctl"));
1734 }
1735
1736 int
freedby_walk_init(mdb_walk_state_t * wsp)1737 freedby_walk_init(mdb_walk_state_t *wsp)
1738 {
1739 return (allocdby_walk_init_common(wsp, "freectl"));
1740 }
1741
1742 int
allocdby_walk_step(mdb_walk_state_t * wsp)1743 allocdby_walk_step(mdb_walk_state_t *wsp)
1744 {
1745 allocdby_walk_t *abw = wsp->walk_data;
1746 uintptr_t addr;
1747 umem_bufctl_audit_t *bcp;
1748 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1749
1750 if (abw->abw_ndx == abw->abw_nbufs)
1751 return (WALK_DONE);
1752
1753 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1754
1755 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1756 mdb_warn("couldn't read bufctl at %p", addr);
1757 return (WALK_DONE);
1758 }
1759
1760 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1761 }
1762
1763 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1764 allocdby_walk_fini(mdb_walk_state_t *wsp)
1765 {
1766 allocdby_walk_t *abw = wsp->walk_data;
1767
1768 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1769 mdb_free(abw, sizeof (allocdby_walk_t));
1770 }
1771
1772 /*ARGSUSED*/
1773 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1774 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1775 {
1776 char c[MDB_SYM_NAMLEN];
1777 GElf_Sym sym;
1778 int i;
1779
1780 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1781 for (i = 0; i < bcp->bc_depth; i++) {
1782 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1783 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1784 continue;
1785 if (is_umem_sym(c, "umem_"))
1786 continue;
1787 mdb_printf("%s+0x%lx",
1788 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1789 break;
1790 }
1791 mdb_printf("\n");
1792
1793 return (WALK_NEXT);
1794 }
1795
1796 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1797 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1798 {
1799 if (!(flags & DCMD_ADDRSPEC))
1800 return (DCMD_USAGE);
1801
1802 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1803
1804 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1805 mdb_warn("can't walk '%s' for %p", w, addr);
1806 return (DCMD_ERR);
1807 }
1808
1809 return (DCMD_OK);
1810 }
1811
1812 /*ARGSUSED*/
1813 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1814 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1815 {
1816 return (allocdby_common(addr, flags, "allocdby"));
1817 }
1818
1819 /*ARGSUSED*/
1820 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1821 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1822 {
1823 return (allocdby_common(addr, flags, "freedby"));
1824 }
1825
1826 typedef struct whatis_info {
1827 mdb_whatis_t *wi_w;
1828 const umem_cache_t *wi_cache;
1829 const vmem_t *wi_vmem;
1830 vmem_t *wi_msb_arena;
1831 size_t wi_slab_size;
1832 int wi_slab_found;
1833 uint_t wi_freemem;
1834 } whatis_info_t;
1835
1836 /* call one of our dcmd functions with "-v" and the provided address */
1837 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)1838 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
1839 {
1840 mdb_arg_t a;
1841 a.a_type = MDB_TYPE_STRING;
1842 a.a_un.a_str = "-v";
1843
1844 mdb_printf(":\n");
1845 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
1846 }
1847
1848 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)1849 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
1850 uintptr_t baddr)
1851 {
1852 mdb_whatis_t *w = wi->wi_w;
1853 const umem_cache_t *cp = wi->wi_cache;
1854 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
1855
1856 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
1857
1858 mdb_whatis_report_object(w, maddr, addr, "");
1859
1860 if (baddr != 0 && !call_printer)
1861 mdb_printf("bufctl %p ", baddr);
1862
1863 mdb_printf("%s from %s",
1864 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
1865
1866 if (call_printer && baddr != 0) {
1867 whatis_call_printer(bufctl, baddr);
1868 return;
1869 }
1870 mdb_printf("\n");
1871 }
1872
1873 /*ARGSUSED*/
1874 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)1875 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
1876 {
1877 mdb_whatis_t *w = wi->wi_w;
1878
1879 uintptr_t cur;
1880 size_t size = wi->wi_cache->cache_bufsize;
1881
1882 while (mdb_whatis_match(w, addr, size, &cur))
1883 whatis_print_umem(wi, cur, addr, NULL);
1884
1885 return (WHATIS_WALKRET(w));
1886 }
1887
1888 /*ARGSUSED*/
1889 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)1890 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
1891 {
1892 mdb_whatis_t *w = wi->wi_w;
1893
1894 uintptr_t cur;
1895 uintptr_t addr = (uintptr_t)bcp->bc_addr;
1896 size_t size = wi->wi_cache->cache_bufsize;
1897
1898 while (mdb_whatis_match(w, addr, size, &cur))
1899 whatis_print_umem(wi, cur, addr, baddr);
1900
1901 return (WHATIS_WALKRET(w));
1902 }
1903
1904
1905 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)1906 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
1907 {
1908 mdb_whatis_t *w = wi->wi_w;
1909
1910 size_t size = vs->vs_end - vs->vs_start;
1911 uintptr_t cur;
1912
1913 /* We're not interested in anything but alloc and free segments */
1914 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
1915 return (WALK_NEXT);
1916
1917 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
1918 mdb_whatis_report_object(w, cur, vs->vs_start, "");
1919
1920 /*
1921 * If we're not printing it seperately, provide the vmem_seg
1922 * pointer if it has a stack trace.
1923 */
1924 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
1925 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
1926 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
1927 mdb_printf("vmem_seg %p ", addr);
1928 }
1929
1930 mdb_printf("%s from %s vmem arena",
1931 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
1932 wi->wi_vmem->vm_name);
1933
1934 if (!mdb_whatis_flags(w) & WHATIS_QUIET)
1935 whatis_call_printer(vmem_seg, addr);
1936 else
1937 mdb_printf("\n");
1938 }
1939
1940 return (WHATIS_WALKRET(w));
1941 }
1942
1943 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)1944 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
1945 {
1946 mdb_whatis_t *w = wi->wi_w;
1947 const char *nm = vmem->vm_name;
1948 wi->wi_vmem = vmem;
1949
1950 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
1951 mdb_printf("Searching vmem arena %s...\n", nm);
1952
1953 if (mdb_pwalk("vmem_seg",
1954 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
1955 mdb_warn("can't walk vmem seg for %p", addr);
1956 return (WALK_NEXT);
1957 }
1958
1959 return (WHATIS_WALKRET(w));
1960 }
1961
1962 /*ARGSUSED*/
1963 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)1964 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
1965 {
1966 mdb_whatis_t *w = wi->wi_w;
1967
1968 /* It must overlap with the slab data, or it's not interesting */
1969 if (mdb_whatis_overlaps(w,
1970 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
1971 wi->wi_slab_found++;
1972 return (WALK_DONE);
1973 }
1974 return (WALK_NEXT);
1975 }
1976
1977 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)1978 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
1979 {
1980 mdb_whatis_t *w = wi->wi_w;
1981 char *walk, *freewalk;
1982 mdb_walk_cb_t func;
1983 int do_bufctl;
1984
1985 /* Override the '-b' flag as necessary */
1986 if (!(c->cache_flags & UMF_HASH))
1987 do_bufctl = FALSE; /* no bufctls to walk */
1988 else if (c->cache_flags & UMF_AUDIT)
1989 do_bufctl = TRUE; /* we always want debugging info */
1990 else
1991 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
1992
1993 if (do_bufctl) {
1994 walk = "bufctl";
1995 freewalk = "freectl";
1996 func = (mdb_walk_cb_t)whatis_walk_bufctl;
1997 } else {
1998 walk = "umem";
1999 freewalk = "freemem";
2000 func = (mdb_walk_cb_t)whatis_walk_umem;
2001 }
2002
2003 wi->wi_cache = c;
2004
2005 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2006 mdb_printf("Searching %s...\n", c->cache_name);
2007
2008 /*
2009 * If more then two buffers live on each slab, figure out if we're
2010 * interested in anything in any slab before doing the more expensive
2011 * umem/freemem (bufctl/freectl) walkers.
2012 */
2013 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2014 if (!(c->cache_flags & UMF_HASH))
2015 wi->wi_slab_size -= sizeof (umem_slab_t);
2016
2017 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2018 wi->wi_slab_found = 0;
2019 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2020 addr) == -1) {
2021 mdb_warn("can't find umem_slab walker");
2022 return (WALK_DONE);
2023 }
2024 if (wi->wi_slab_found == 0)
2025 return (WALK_NEXT);
2026 }
2027
2028 wi->wi_freemem = FALSE;
2029 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2030 mdb_warn("can't find %s walker", walk);
2031 return (WALK_DONE);
2032 }
2033
2034 if (mdb_whatis_done(w))
2035 return (WALK_DONE);
2036
2037 /*
2038 * We have searched for allocated memory; now search for freed memory.
2039 */
2040 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2041 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2042
2043 wi->wi_freemem = TRUE;
2044
2045 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2046 mdb_warn("can't find %s walker", freewalk);
2047 return (WALK_DONE);
2048 }
2049
2050 return (WHATIS_WALKRET(w));
2051 }
2052
2053 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2054 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2055 {
2056 if (c->cache_arena == wi->wi_msb_arena ||
2057 (c->cache_cflags & UMC_NOTOUCH))
2058 return (WALK_NEXT);
2059
2060 return (whatis_walk_cache(addr, c, wi));
2061 }
2062
2063 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2064 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2065 {
2066 if (c->cache_arena != wi->wi_msb_arena)
2067 return (WALK_NEXT);
2068
2069 return (whatis_walk_cache(addr, c, wi));
2070 }
2071
2072 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2073 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2074 {
2075 if (c->cache_arena == wi->wi_msb_arena ||
2076 !(c->cache_cflags & UMC_NOTOUCH))
2077 return (WALK_NEXT);
2078
2079 return (whatis_walk_cache(addr, c, wi));
2080 }
2081
2082 /*ARGSUSED*/
2083 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2084 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2085 {
2086 whatis_info_t wi;
2087
2088 bzero(&wi, sizeof (wi));
2089 wi.wi_w = w;
2090
2091 /* umem's metadata is allocated from the umem_internal_arena */
2092 if (mdb_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2093 mdb_warn("unable to readvar \"umem_internal_arena\"");
2094
2095 /*
2096 * We process umem caches in the following order:
2097 *
2098 * non-UMC_NOTOUCH, non-metadata (typically the most interesting)
2099 * metadata (can be huge with UMF_AUDIT)
2100 * UMC_NOTOUCH, non-metadata (see umem_walk_all())
2101 */
2102 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2103 &wi) == -1 ||
2104 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2105 &wi) == -1 ||
2106 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2107 &wi) == -1) {
2108 mdb_warn("couldn't find umem_cache walker");
2109 return (1);
2110 }
2111 return (0);
2112 }
2113
2114 /*ARGSUSED*/
2115 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2116 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2117 {
2118 whatis_info_t wi;
2119
2120 bzero(&wi, sizeof (wi));
2121 wi.wi_w = w;
2122
2123 if (mdb_walk("vmem_postfix",
2124 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2125 mdb_warn("couldn't find vmem_postfix walker");
2126 return (1);
2127 }
2128 return (0);
2129 }
2130
2131 int
umem_init(void)2132 umem_init(void)
2133 {
2134 mdb_walker_t w = {
2135 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2136 umem_cache_walk_step, umem_cache_walk_fini
2137 };
2138
2139 if (mdb_add_walker(&w) == -1) {
2140 mdb_warn("failed to add umem_cache walker");
2141 return (-1);
2142 }
2143
2144 if (umem_update_variables() == -1)
2145 return (-1);
2146
2147 /* install a callback so that our variables are always up-to-date */
2148 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2149 umem_statechange_cb(NULL);
2150
2151 /*
2152 * Register our ::whatis callbacks.
2153 */
2154 mdb_whatis_register("umem", whatis_run_umem, NULL,
2155 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2156 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2157 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2158
2159 return (0);
2160 }
2161
2162 typedef struct umem_log_cpu {
2163 uintptr_t umc_low;
2164 uintptr_t umc_high;
2165 } umem_log_cpu_t;
2166
2167 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2168 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2169 {
2170 int i;
2171
2172 for (i = 0; i < umem_max_ncpus; i++) {
2173 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2174 break;
2175 }
2176
2177 if (i == umem_max_ncpus)
2178 mdb_printf(" ");
2179 else
2180 mdb_printf("%3d", i);
2181
2182 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2183 b->bc_timestamp, b->bc_thread);
2184
2185 return (WALK_NEXT);
2186 }
2187
2188 /*ARGSUSED*/
2189 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2190 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2191 {
2192 umem_log_header_t lh;
2193 umem_cpu_log_header_t clh;
2194 uintptr_t lhp, clhp;
2195 umem_log_cpu_t *umc;
2196 int i;
2197
2198 if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2199 mdb_warn("failed to read 'umem_transaction_log'");
2200 return (DCMD_ERR);
2201 }
2202
2203 if (lhp == NULL) {
2204 mdb_warn("no umem transaction log\n");
2205 return (DCMD_ERR);
2206 }
2207
2208 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2209 mdb_warn("failed to read log header at %p", lhp);
2210 return (DCMD_ERR);
2211 }
2212
2213 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2214
2215 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2216 UM_SLEEP | UM_GC);
2217
2218 for (i = 0; i < umem_max_ncpus; i++) {
2219 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2220 mdb_warn("cannot read cpu %d's log header at %p",
2221 i, clhp);
2222 return (DCMD_ERR);
2223 }
2224
2225 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2226 (uintptr_t)lh.lh_base;
2227 umc[i].umc_high = (uintptr_t)clh.clh_current;
2228
2229 clhp += sizeof (umem_cpu_log_header_t);
2230 }
2231
2232 if (DCMD_HDRSPEC(flags)) {
2233 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2234 "BUFADDR", "TIMESTAMP", "THREAD");
2235 }
2236
2237 /*
2238 * If we have been passed an address, we'll just print out that
2239 * log entry.
2240 */
2241 if (flags & DCMD_ADDRSPEC) {
2242 umem_bufctl_audit_t *bp;
2243 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2244
2245 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2246 mdb_warn("failed to read bufctl at %p", addr);
2247 return (DCMD_ERR);
2248 }
2249
2250 (void) umem_log_walk(addr, bp, umc);
2251
2252 return (DCMD_OK);
2253 }
2254
2255 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2256 mdb_warn("can't find umem log walker");
2257 return (DCMD_ERR);
2258 }
2259
2260 return (DCMD_OK);
2261 }
2262
2263 typedef struct bufctl_history_cb {
2264 int bhc_flags;
2265 int bhc_argc;
2266 const mdb_arg_t *bhc_argv;
2267 int bhc_ret;
2268 } bufctl_history_cb_t;
2269
2270 /*ARGSUSED*/
2271 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2272 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2273 {
2274 bufctl_history_cb_t *bhc = arg;
2275
2276 bhc->bhc_ret =
2277 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2278
2279 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2280
2281 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2282 }
2283
2284 void
bufctl_help(void)2285 bufctl_help(void)
2286 {
2287 mdb_printf("%s\n",
2288 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2289 mdb_dec_indent(2);
2290 mdb_printf("%<b>OPTIONS%</b>\n");
2291 mdb_inc_indent(2);
2292 mdb_printf("%s",
2293 " -v Display the full content of the bufctl, including its stack trace\n"
2294 " -h retrieve the bufctl's transaction history, if available\n"
2295 " -a addr\n"
2296 " filter out bufctls not involving the buffer at addr\n"
2297 " -c caller\n"
2298 " filter out bufctls without the function/PC in their stack trace\n"
2299 " -e earliest\n"
2300 " filter out bufctls timestamped before earliest\n"
2301 " -l latest\n"
2302 " filter out bufctls timestamped after latest\n"
2303 " -t thread\n"
2304 " filter out bufctls not involving thread\n");
2305 }
2306
2307 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2308 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2309 {
2310 uint_t verbose = FALSE;
2311 uint_t history = FALSE;
2312 uint_t in_history = FALSE;
2313 uintptr_t caller = NULL, thread = NULL;
2314 uintptr_t laddr, haddr, baddr = NULL;
2315 hrtime_t earliest = 0, latest = 0;
2316 int i, depth;
2317 char c[MDB_SYM_NAMLEN];
2318 GElf_Sym sym;
2319 umem_bufctl_audit_t *bcp;
2320 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2321
2322 if (mdb_getopts(argc, argv,
2323 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2324 'h', MDB_OPT_SETBITS, TRUE, &history,
2325 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2326 'c', MDB_OPT_UINTPTR, &caller,
2327 't', MDB_OPT_UINTPTR, &thread,
2328 'e', MDB_OPT_UINT64, &earliest,
2329 'l', MDB_OPT_UINT64, &latest,
2330 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2331 return (DCMD_USAGE);
2332
2333 if (!(flags & DCMD_ADDRSPEC))
2334 return (DCMD_USAGE);
2335
2336 if (in_history && !history)
2337 return (DCMD_USAGE);
2338
2339 if (history && !in_history) {
2340 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2341 UM_SLEEP | UM_GC);
2342 bufctl_history_cb_t bhc;
2343
2344 nargv[0].a_type = MDB_TYPE_STRING;
2345 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2346
2347 for (i = 0; i < argc; i++)
2348 nargv[i + 1] = argv[i];
2349
2350 /*
2351 * When in history mode, we treat each element as if it
2352 * were in a seperate loop, so that the headers group
2353 * bufctls with similar histories.
2354 */
2355 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2356 bhc.bhc_argc = argc + 1;
2357 bhc.bhc_argv = nargv;
2358 bhc.bhc_ret = DCMD_OK;
2359
2360 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2361 addr) == -1) {
2362 mdb_warn("unable to walk bufctl_history");
2363 return (DCMD_ERR);
2364 }
2365
2366 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2367 mdb_printf("\n");
2368
2369 return (bhc.bhc_ret);
2370 }
2371
2372 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2373 if (verbose) {
2374 mdb_printf("%16s %16s %16s %16s\n"
2375 "%<u>%16s %16s %16s %16s%</u>\n",
2376 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2377 "", "CACHE", "LASTLOG", "CONTENTS");
2378 } else {
2379 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2380 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2381 }
2382 }
2383
2384 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2385 mdb_warn("couldn't read bufctl at %p", addr);
2386 return (DCMD_ERR);
2387 }
2388
2389 /*
2390 * Guard against bogus bc_depth in case the bufctl is corrupt or
2391 * the address does not really refer to a bufctl.
2392 */
2393 depth = MIN(bcp->bc_depth, umem_stack_depth);
2394
2395 if (caller != NULL) {
2396 laddr = caller;
2397 haddr = caller + sizeof (caller);
2398
2399 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2400 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2401 /*
2402 * We were provided an exact symbol value; any
2403 * address in the function is valid.
2404 */
2405 laddr = (uintptr_t)sym.st_value;
2406 haddr = (uintptr_t)sym.st_value + sym.st_size;
2407 }
2408
2409 for (i = 0; i < depth; i++)
2410 if (bcp->bc_stack[i] >= laddr &&
2411 bcp->bc_stack[i] < haddr)
2412 break;
2413
2414 if (i == depth)
2415 return (DCMD_OK);
2416 }
2417
2418 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2419 return (DCMD_OK);
2420
2421 if (earliest != 0 && bcp->bc_timestamp < earliest)
2422 return (DCMD_OK);
2423
2424 if (latest != 0 && bcp->bc_timestamp > latest)
2425 return (DCMD_OK);
2426
2427 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2428 return (DCMD_OK);
2429
2430 if (flags & DCMD_PIPE_OUT) {
2431 mdb_printf("%#r\n", addr);
2432 return (DCMD_OK);
2433 }
2434
2435 if (verbose) {
2436 mdb_printf(
2437 "%<b>%16p%</b> %16p %16llx %16d\n"
2438 "%16s %16p %16p %16p\n",
2439 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2440 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2441
2442 mdb_inc_indent(17);
2443 for (i = 0; i < depth; i++)
2444 mdb_printf("%a\n", bcp->bc_stack[i]);
2445 mdb_dec_indent(17);
2446 mdb_printf("\n");
2447 } else {
2448 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2449 bcp->bc_timestamp, bcp->bc_thread);
2450
2451 for (i = 0; i < depth; i++) {
2452 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2453 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2454 continue;
2455 if (is_umem_sym(c, "umem_"))
2456 continue;
2457 mdb_printf(" %a\n", bcp->bc_stack[i]);
2458 break;
2459 }
2460
2461 if (i >= depth)
2462 mdb_printf("\n");
2463 }
2464
2465 return (DCMD_OK);
2466 }
2467
2468 /*ARGSUSED*/
2469 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2470 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2471 {
2472 mdb_arg_t a;
2473
2474 if (!(flags & DCMD_ADDRSPEC))
2475 return (DCMD_USAGE);
2476
2477 if (argc != 0)
2478 return (DCMD_USAGE);
2479
2480 a.a_type = MDB_TYPE_STRING;
2481 a.a_un.a_str = "-v";
2482
2483 return (bufctl(addr, flags, 1, &a));
2484 }
2485
2486 typedef struct umem_verify {
2487 uint64_t *umv_buf; /* buffer to read cache contents into */
2488 size_t umv_size; /* number of bytes in umv_buf */
2489 int umv_corruption; /* > 0 if corruption found. */
2490 int umv_besilent; /* report actual corruption sites */
2491 struct umem_cache umv_cache; /* the cache we're operating on */
2492 } umem_verify_t;
2493
2494 /*
2495 * verify_pattern()
2496 * verify that buf is filled with the pattern pat.
2497 */
2498 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2499 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2500 {
2501 /*LINTED*/
2502 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2503 uint64_t *buf;
2504
2505 for (buf = buf_arg; buf < bufend; buf++)
2506 if (*buf != pat)
2507 return ((uintptr_t)buf - (uintptr_t)buf_arg);
2508 return (-1);
2509 }
2510
2511 /*
2512 * verify_buftag()
2513 * verify that btp->bt_bxstat == (bcp ^ pat)
2514 */
2515 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2516 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2517 {
2518 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2519 }
2520
2521 /*
2522 * verify_free()
2523 * verify the integrity of a free block of memory by checking
2524 * that it is filled with 0xdeadbeef and that its buftag is sane.
2525 */
2526 /*ARGSUSED1*/
2527 static int
verify_free(uintptr_t addr,const void * data,void * private)2528 verify_free(uintptr_t addr, const void *data, void *private)
2529 {
2530 umem_verify_t *umv = (umem_verify_t *)private;
2531 uint64_t *buf = umv->umv_buf; /* buf to validate */
2532 int64_t corrupt; /* corruption offset */
2533 umem_buftag_t *buftagp; /* ptr to buftag */
2534 umem_cache_t *cp = &umv->umv_cache;
2535 int besilent = umv->umv_besilent;
2536
2537 /*LINTED*/
2538 buftagp = UMEM_BUFTAG(cp, buf);
2539
2540 /*
2541 * Read the buffer to check.
2542 */
2543 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2544 if (!besilent)
2545 mdb_warn("couldn't read %p", addr);
2546 return (WALK_NEXT);
2547 }
2548
2549 if ((corrupt = verify_pattern(buf, cp->cache_verify,
2550 UMEM_FREE_PATTERN)) >= 0) {
2551 if (!besilent)
2552 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2553 addr, (uintptr_t)addr + corrupt);
2554 goto corrupt;
2555 }
2556
2557 if ((cp->cache_flags & UMF_HASH) &&
2558 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2559 if (!besilent)
2560 mdb_printf("buffer %p (free) seems to "
2561 "have a corrupt redzone pattern\n", addr);
2562 goto corrupt;
2563 }
2564
2565 /*
2566 * confirm bufctl pointer integrity.
2567 */
2568 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2569 if (!besilent)
2570 mdb_printf("buffer %p (free) has a corrupt "
2571 "buftag\n", addr);
2572 goto corrupt;
2573 }
2574
2575 return (WALK_NEXT);
2576 corrupt:
2577 umv->umv_corruption++;
2578 return (WALK_NEXT);
2579 }
2580
2581 /*
2582 * verify_alloc()
2583 * Verify that the buftag of an allocated buffer makes sense with respect
2584 * to the buffer.
2585 */
2586 /*ARGSUSED1*/
2587 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2588 verify_alloc(uintptr_t addr, const void *data, void *private)
2589 {
2590 umem_verify_t *umv = (umem_verify_t *)private;
2591 umem_cache_t *cp = &umv->umv_cache;
2592 uint64_t *buf = umv->umv_buf; /* buf to validate */
2593 /*LINTED*/
2594 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2595 uint32_t *ip = (uint32_t *)buftagp;
2596 uint8_t *bp = (uint8_t *)buf;
2597 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
2598 int besilent = umv->umv_besilent;
2599
2600 /*
2601 * Read the buffer to check.
2602 */
2603 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2604 if (!besilent)
2605 mdb_warn("couldn't read %p", addr);
2606 return (WALK_NEXT);
2607 }
2608
2609 /*
2610 * There are two cases to handle:
2611 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2612 * 0xfeedfacefeedface at the end of it
2613 * 2. If the buf was alloc'd using umem_alloc, it will have
2614 * 0xbb just past the end of the region in use. At the buftag,
2615 * it will have 0xfeedface (or, if the whole buffer is in use,
2616 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2617 * endianness), followed by 32 bits containing the offset of the
2618 * 0xbb byte in the buffer.
2619 *
2620 * Finally, the two 32-bit words that comprise the second half of the
2621 * buftag should xor to UMEM_BUFTAG_ALLOC
2622 */
2623
2624 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2625 looks_ok = 1;
2626 else if (!UMEM_SIZE_VALID(ip[1]))
2627 size_ok = 0;
2628 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2629 looks_ok = 1;
2630 else
2631 size_ok = 0;
2632
2633 if (!size_ok) {
2634 if (!besilent)
2635 mdb_printf("buffer %p (allocated) has a corrupt "
2636 "redzone size encoding\n", addr);
2637 goto corrupt;
2638 }
2639
2640 if (!looks_ok) {
2641 if (!besilent)
2642 mdb_printf("buffer %p (allocated) has a corrupt "
2643 "redzone signature\n", addr);
2644 goto corrupt;
2645 }
2646
2647 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2648 if (!besilent)
2649 mdb_printf("buffer %p (allocated) has a "
2650 "corrupt buftag\n", addr);
2651 goto corrupt;
2652 }
2653
2654 return (WALK_NEXT);
2655 corrupt:
2656 umv->umv_corruption++;
2657 return (WALK_NEXT);
2658 }
2659
2660 /*ARGSUSED2*/
2661 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2662 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2663 {
2664 if (flags & DCMD_ADDRSPEC) {
2665 int check_alloc = 0, check_free = 0;
2666 umem_verify_t umv;
2667
2668 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2669 addr) == -1) {
2670 mdb_warn("couldn't read umem_cache %p", addr);
2671 return (DCMD_ERR);
2672 }
2673
2674 umv.umv_size = umv.umv_cache.cache_buftag +
2675 sizeof (umem_buftag_t);
2676 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2677 umv.umv_corruption = 0;
2678
2679 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2680 check_alloc = 1;
2681 if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2682 check_free = 1;
2683 } else {
2684 if (!(flags & DCMD_LOOP)) {
2685 mdb_warn("cache %p (%s) does not have "
2686 "redzone checking enabled\n", addr,
2687 umv.umv_cache.cache_name);
2688 }
2689 return (DCMD_ERR);
2690 }
2691
2692 if (flags & DCMD_LOOP) {
2693 /*
2694 * table mode, don't print out every corrupt buffer
2695 */
2696 umv.umv_besilent = 1;
2697 } else {
2698 mdb_printf("Summary for cache '%s'\n",
2699 umv.umv_cache.cache_name);
2700 mdb_inc_indent(2);
2701 umv.umv_besilent = 0;
2702 }
2703
2704 if (check_alloc)
2705 (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2706 if (check_free)
2707 (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2708
2709 if (flags & DCMD_LOOP) {
2710 if (umv.umv_corruption == 0) {
2711 mdb_printf("%-*s %?p clean\n",
2712 UMEM_CACHE_NAMELEN,
2713 umv.umv_cache.cache_name, addr);
2714 } else {
2715 char *s = ""; /* optional s in "buffer[s]" */
2716 if (umv.umv_corruption > 1)
2717 s = "s";
2718
2719 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2720 UMEM_CACHE_NAMELEN,
2721 umv.umv_cache.cache_name, addr,
2722 umv.umv_corruption, s);
2723 }
2724 } else {
2725 /*
2726 * This is the more verbose mode, when the user has
2727 * type addr::umem_verify. If the cache was clean,
2728 * nothing will have yet been printed. So say something.
2729 */
2730 if (umv.umv_corruption == 0)
2731 mdb_printf("clean\n");
2732
2733 mdb_dec_indent(2);
2734 }
2735 } else {
2736 /*
2737 * If the user didn't specify a cache to verify, we'll walk all
2738 * umem_cache's, specifying ourself as a callback for each...
2739 * this is the equivalent of '::walk umem_cache .::umem_verify'
2740 */
2741 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2742 "Cache Name", "Addr", "Cache Integrity");
2743 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2744 }
2745
2746 return (DCMD_OK);
2747 }
2748
2749 typedef struct vmem_node {
2750 struct vmem_node *vn_next;
2751 struct vmem_node *vn_parent;
2752 struct vmem_node *vn_sibling;
2753 struct vmem_node *vn_children;
2754 uintptr_t vn_addr;
2755 int vn_marked;
2756 vmem_t vn_vmem;
2757 } vmem_node_t;
2758
2759 typedef struct vmem_walk {
2760 vmem_node_t *vw_root;
2761 vmem_node_t *vw_current;
2762 } vmem_walk_t;
2763
2764 int
vmem_walk_init(mdb_walk_state_t * wsp)2765 vmem_walk_init(mdb_walk_state_t *wsp)
2766 {
2767 uintptr_t vaddr, paddr;
2768 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2769 vmem_walk_t *vw;
2770
2771 if (umem_readvar(&vaddr, "vmem_list") == -1) {
2772 mdb_warn("couldn't read 'vmem_list'");
2773 return (WALK_ERR);
2774 }
2775
2776 while (vaddr != NULL) {
2777 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2778 vp->vn_addr = vaddr;
2779 vp->vn_next = head;
2780 head = vp;
2781
2782 if (vaddr == wsp->walk_addr)
2783 current = vp;
2784
2785 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2786 mdb_warn("couldn't read vmem_t at %p", vaddr);
2787 goto err;
2788 }
2789
2790 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2791 }
2792
2793 for (vp = head; vp != NULL; vp = vp->vn_next) {
2794
2795 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2796 vp->vn_sibling = root;
2797 root = vp;
2798 continue;
2799 }
2800
2801 for (parent = head; parent != NULL; parent = parent->vn_next) {
2802 if (parent->vn_addr != paddr)
2803 continue;
2804 vp->vn_sibling = parent->vn_children;
2805 parent->vn_children = vp;
2806 vp->vn_parent = parent;
2807 break;
2808 }
2809
2810 if (parent == NULL) {
2811 mdb_warn("couldn't find %p's parent (%p)\n",
2812 vp->vn_addr, paddr);
2813 goto err;
2814 }
2815 }
2816
2817 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
2818 vw->vw_root = root;
2819
2820 if (current != NULL)
2821 vw->vw_current = current;
2822 else
2823 vw->vw_current = root;
2824
2825 wsp->walk_data = vw;
2826 return (WALK_NEXT);
2827 err:
2828 for (vp = head; head != NULL; vp = head) {
2829 head = vp->vn_next;
2830 mdb_free(vp, sizeof (vmem_node_t));
2831 }
2832
2833 return (WALK_ERR);
2834 }
2835
2836 int
vmem_walk_step(mdb_walk_state_t * wsp)2837 vmem_walk_step(mdb_walk_state_t *wsp)
2838 {
2839 vmem_walk_t *vw = wsp->walk_data;
2840 vmem_node_t *vp;
2841 int rval;
2842
2843 if ((vp = vw->vw_current) == NULL)
2844 return (WALK_DONE);
2845
2846 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2847
2848 if (vp->vn_children != NULL) {
2849 vw->vw_current = vp->vn_children;
2850 return (rval);
2851 }
2852
2853 do {
2854 vw->vw_current = vp->vn_sibling;
2855 vp = vp->vn_parent;
2856 } while (vw->vw_current == NULL && vp != NULL);
2857
2858 return (rval);
2859 }
2860
2861 /*
2862 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
2863 * children are visited before their parent. We perform the postfix walk
2864 * iteratively (rather than recursively) to allow mdb to regain control
2865 * after each callback.
2866 */
2867 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)2868 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
2869 {
2870 vmem_walk_t *vw = wsp->walk_data;
2871 vmem_node_t *vp = vw->vw_current;
2872 int rval;
2873
2874 /*
2875 * If this node is marked, then we know that we have already visited
2876 * all of its children. If the node has any siblings, they need to
2877 * be visited next; otherwise, we need to visit the parent. Note
2878 * that vp->vn_marked will only be zero on the first invocation of
2879 * the step function.
2880 */
2881 if (vp->vn_marked) {
2882 if (vp->vn_sibling != NULL)
2883 vp = vp->vn_sibling;
2884 else if (vp->vn_parent != NULL)
2885 vp = vp->vn_parent;
2886 else {
2887 /*
2888 * We have neither a parent, nor a sibling, and we
2889 * have already been visited; we're done.
2890 */
2891 return (WALK_DONE);
2892 }
2893 }
2894
2895 /*
2896 * Before we visit this node, visit its children.
2897 */
2898 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
2899 vp = vp->vn_children;
2900
2901 vp->vn_marked = 1;
2902 vw->vw_current = vp;
2903 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
2904
2905 return (rval);
2906 }
2907
2908 void
vmem_walk_fini(mdb_walk_state_t * wsp)2909 vmem_walk_fini(mdb_walk_state_t *wsp)
2910 {
2911 vmem_walk_t *vw = wsp->walk_data;
2912 vmem_node_t *root = vw->vw_root;
2913 int done;
2914
2915 if (root == NULL)
2916 return;
2917
2918 if ((vw->vw_root = root->vn_children) != NULL)
2919 vmem_walk_fini(wsp);
2920
2921 vw->vw_root = root->vn_sibling;
2922 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
2923 mdb_free(root, sizeof (vmem_node_t));
2924
2925 if (done) {
2926 mdb_free(vw, sizeof (vmem_walk_t));
2927 } else {
2928 vmem_walk_fini(wsp);
2929 }
2930 }
2931
2932 typedef struct vmem_seg_walk {
2933 uint8_t vsw_type;
2934 uintptr_t vsw_start;
2935 uintptr_t vsw_current;
2936 } vmem_seg_walk_t;
2937
2938 /*ARGSUSED*/
2939 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)2940 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
2941 {
2942 vmem_seg_walk_t *vsw;
2943
2944 if (wsp->walk_addr == NULL) {
2945 mdb_warn("vmem_%s does not support global walks\n", name);
2946 return (WALK_ERR);
2947 }
2948
2949 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
2950
2951 vsw->vsw_type = type;
2952 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
2953 vsw->vsw_current = vsw->vsw_start;
2954
2955 return (WALK_NEXT);
2956 }
2957
2958 /*
2959 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
2960 */
2961 #define VMEM_NONE 0
2962
2963 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)2964 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
2965 {
2966 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
2967 }
2968
2969 int
vmem_free_walk_init(mdb_walk_state_t * wsp)2970 vmem_free_walk_init(mdb_walk_state_t *wsp)
2971 {
2972 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
2973 }
2974
2975 int
vmem_span_walk_init(mdb_walk_state_t * wsp)2976 vmem_span_walk_init(mdb_walk_state_t *wsp)
2977 {
2978 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
2979 }
2980
2981 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)2982 vmem_seg_walk_init(mdb_walk_state_t *wsp)
2983 {
2984 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
2985 }
2986
2987 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)2988 vmem_seg_walk_step(mdb_walk_state_t *wsp)
2989 {
2990 vmem_seg_t seg;
2991 vmem_seg_walk_t *vsw = wsp->walk_data;
2992 uintptr_t addr = vsw->vsw_current;
2993 static size_t seg_size = 0;
2994 int rval;
2995
2996 if (!seg_size) {
2997 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
2998 mdb_warn("failed to read 'vmem_seg_size'");
2999 seg_size = sizeof (vmem_seg_t);
3000 }
3001 }
3002
3003 if (seg_size < sizeof (seg))
3004 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3005
3006 if (mdb_vread(&seg, seg_size, addr) == -1) {
3007 mdb_warn("couldn't read vmem_seg at %p", addr);
3008 return (WALK_ERR);
3009 }
3010
3011 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3012 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3013 rval = WALK_NEXT;
3014 } else {
3015 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3016 }
3017
3018 if (vsw->vsw_current == vsw->vsw_start)
3019 return (WALK_DONE);
3020
3021 return (rval);
3022 }
3023
3024 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3025 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3026 {
3027 vmem_seg_walk_t *vsw = wsp->walk_data;
3028
3029 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3030 }
3031
3032 #define VMEM_NAMEWIDTH 22
3033
3034 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3035 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3036 {
3037 vmem_t v, parent;
3038 uintptr_t paddr;
3039 int ident = 0;
3040 char c[VMEM_NAMEWIDTH];
3041
3042 if (!(flags & DCMD_ADDRSPEC)) {
3043 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3044 mdb_warn("can't walk vmem");
3045 return (DCMD_ERR);
3046 }
3047 return (DCMD_OK);
3048 }
3049
3050 if (DCMD_HDRSPEC(flags))
3051 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3052 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3053 "TOTAL", "SUCCEED", "FAIL");
3054
3055 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3056 mdb_warn("couldn't read vmem at %p", addr);
3057 return (DCMD_ERR);
3058 }
3059
3060 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3061 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3062 mdb_warn("couldn't trace %p's ancestry", addr);
3063 ident = 0;
3064 break;
3065 }
3066 paddr = (uintptr_t)parent.vm_source;
3067 }
3068
3069 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3070
3071 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3072 addr, VMEM_NAMEWIDTH, c,
3073 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3074 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3075
3076 return (DCMD_OK);
3077 }
3078
3079 void
vmem_seg_help(void)3080 vmem_seg_help(void)
3081 {
3082 mdb_printf("%s\n",
3083 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3084 "\n"
3085 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3086 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3087 "information.\n");
3088 mdb_dec_indent(2);
3089 mdb_printf("%<b>OPTIONS%</b>\n");
3090 mdb_inc_indent(2);
3091 mdb_printf("%s",
3092 " -v Display the full content of the vmem_seg, including its stack trace\n"
3093 " -s report the size of the segment, instead of the end address\n"
3094 " -c caller\n"
3095 " filter out segments without the function/PC in their stack trace\n"
3096 " -e earliest\n"
3097 " filter out segments timestamped before earliest\n"
3098 " -l latest\n"
3099 " filter out segments timestamped after latest\n"
3100 " -m minsize\n"
3101 " filer out segments smaller than minsize\n"
3102 " -M maxsize\n"
3103 " filer out segments larger than maxsize\n"
3104 " -t thread\n"
3105 " filter out segments not involving thread\n"
3106 " -T type\n"
3107 " filter out segments not of type 'type'\n"
3108 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3109 }
3110
3111
3112 /*ARGSUSED*/
3113 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3114 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3115 {
3116 vmem_seg_t vs;
3117 uintptr_t *stk = vs.vs_stack;
3118 uintptr_t sz;
3119 uint8_t t;
3120 const char *type = NULL;
3121 GElf_Sym sym;
3122 char c[MDB_SYM_NAMLEN];
3123 int no_debug;
3124 int i;
3125 int depth;
3126 uintptr_t laddr, haddr;
3127
3128 uintptr_t caller = NULL, thread = NULL;
3129 uintptr_t minsize = 0, maxsize = 0;
3130
3131 hrtime_t earliest = 0, latest = 0;
3132
3133 uint_t size = 0;
3134 uint_t verbose = 0;
3135
3136 if (!(flags & DCMD_ADDRSPEC))
3137 return (DCMD_USAGE);
3138
3139 if (mdb_getopts(argc, argv,
3140 'c', MDB_OPT_UINTPTR, &caller,
3141 'e', MDB_OPT_UINT64, &earliest,
3142 'l', MDB_OPT_UINT64, &latest,
3143 's', MDB_OPT_SETBITS, TRUE, &size,
3144 'm', MDB_OPT_UINTPTR, &minsize,
3145 'M', MDB_OPT_UINTPTR, &maxsize,
3146 't', MDB_OPT_UINTPTR, &thread,
3147 'T', MDB_OPT_STR, &type,
3148 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3149 NULL) != argc)
3150 return (DCMD_USAGE);
3151
3152 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3153 if (verbose) {
3154 mdb_printf("%16s %4s %16s %16s %16s\n"
3155 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3156 "ADDR", "TYPE", "START", "END", "SIZE",
3157 "", "", "THREAD", "TIMESTAMP", "");
3158 } else {
3159 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3160 "START", size? "SIZE" : "END", "WHO");
3161 }
3162 }
3163
3164 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3165 mdb_warn("couldn't read vmem_seg at %p", addr);
3166 return (DCMD_ERR);
3167 }
3168
3169 if (type != NULL) {
3170 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3171 t = VMEM_ALLOC;
3172 else if (strcmp(type, "FREE") == 0)
3173 t = VMEM_FREE;
3174 else if (strcmp(type, "SPAN") == 0)
3175 t = VMEM_SPAN;
3176 else if (strcmp(type, "ROTR") == 0 ||
3177 strcmp(type, "ROTOR") == 0)
3178 t = VMEM_ROTOR;
3179 else if (strcmp(type, "WLKR") == 0 ||
3180 strcmp(type, "WALKER") == 0)
3181 t = VMEM_WALKER;
3182 else {
3183 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3184 type);
3185 return (DCMD_ERR);
3186 }
3187
3188 if (vs.vs_type != t)
3189 return (DCMD_OK);
3190 }
3191
3192 sz = vs.vs_end - vs.vs_start;
3193
3194 if (minsize != 0 && sz < minsize)
3195 return (DCMD_OK);
3196
3197 if (maxsize != 0 && sz > maxsize)
3198 return (DCMD_OK);
3199
3200 t = vs.vs_type;
3201 depth = vs.vs_depth;
3202
3203 /*
3204 * debug info, when present, is only accurate for VMEM_ALLOC segments
3205 */
3206 no_debug = (t != VMEM_ALLOC) ||
3207 (depth == 0 || depth > VMEM_STACK_DEPTH);
3208
3209 if (no_debug) {
3210 if (caller != NULL || thread != NULL || earliest != 0 ||
3211 latest != 0)
3212 return (DCMD_OK); /* not enough info */
3213 } else {
3214 if (caller != NULL) {
3215 laddr = caller;
3216 haddr = caller + sizeof (caller);
3217
3218 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3219 sizeof (c), &sym) != -1 &&
3220 caller == (uintptr_t)sym.st_value) {
3221 /*
3222 * We were provided an exact symbol value; any
3223 * address in the function is valid.
3224 */
3225 laddr = (uintptr_t)sym.st_value;
3226 haddr = (uintptr_t)sym.st_value + sym.st_size;
3227 }
3228
3229 for (i = 0; i < depth; i++)
3230 if (vs.vs_stack[i] >= laddr &&
3231 vs.vs_stack[i] < haddr)
3232 break;
3233
3234 if (i == depth)
3235 return (DCMD_OK);
3236 }
3237
3238 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3239 return (DCMD_OK);
3240
3241 if (earliest != 0 && vs.vs_timestamp < earliest)
3242 return (DCMD_OK);
3243
3244 if (latest != 0 && vs.vs_timestamp > latest)
3245 return (DCMD_OK);
3246 }
3247
3248 type = (t == VMEM_ALLOC ? "ALLC" :
3249 t == VMEM_FREE ? "FREE" :
3250 t == VMEM_SPAN ? "SPAN" :
3251 t == VMEM_ROTOR ? "ROTR" :
3252 t == VMEM_WALKER ? "WLKR" :
3253 "????");
3254
3255 if (flags & DCMD_PIPE_OUT) {
3256 mdb_printf("%#r\n", addr);
3257 return (DCMD_OK);
3258 }
3259
3260 if (verbose) {
3261 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3262 addr, type, vs.vs_start, vs.vs_end, sz);
3263
3264 if (no_debug)
3265 return (DCMD_OK);
3266
3267 mdb_printf("%16s %4s %16d %16llx\n",
3268 "", "", vs.vs_thread, vs.vs_timestamp);
3269
3270 mdb_inc_indent(17);
3271 for (i = 0; i < depth; i++) {
3272 mdb_printf("%a\n", stk[i]);
3273 }
3274 mdb_dec_indent(17);
3275 mdb_printf("\n");
3276 } else {
3277 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3278 vs.vs_start, size? sz : vs.vs_end);
3279
3280 if (no_debug) {
3281 mdb_printf("\n");
3282 return (DCMD_OK);
3283 }
3284
3285 for (i = 0; i < depth; i++) {
3286 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3287 c, sizeof (c), &sym) == -1)
3288 continue;
3289 if (is_umem_sym(c, "vmem_"))
3290 continue;
3291 break;
3292 }
3293 mdb_printf(" %a\n", stk[i]);
3294 }
3295 return (DCMD_OK);
3296 }
3297
3298 /*ARGSUSED*/
3299 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3300 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3301 {
3302 char name[UMEM_CACHE_NAMELEN + 1];
3303 hrtime_t delta;
3304 int i, depth;
3305
3306 if (bcp->bc_timestamp == 0)
3307 return (WALK_DONE);
3308
3309 if (*newest == 0)
3310 *newest = bcp->bc_timestamp;
3311
3312 delta = *newest - bcp->bc_timestamp;
3313 depth = MIN(bcp->bc_depth, umem_stack_depth);
3314
3315 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3316 &bcp->bc_cache->cache_name) <= 0)
3317 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3318
3319 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3320 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3321
3322 for (i = 0; i < depth; i++)
3323 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3324
3325 return (WALK_NEXT);
3326 }
3327
3328 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3329 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3330 {
3331 const char *logname = "umem_transaction_log";
3332 hrtime_t newest = 0;
3333
3334 if ((flags & DCMD_ADDRSPEC) || argc > 1)
3335 return (DCMD_USAGE);
3336
3337 if (argc > 0) {
3338 if (argv->a_type != MDB_TYPE_STRING)
3339 return (DCMD_USAGE);
3340 if (strcmp(argv->a_un.a_str, "fail") == 0)
3341 logname = "umem_failure_log";
3342 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3343 logname = "umem_slab_log";
3344 else
3345 return (DCMD_USAGE);
3346 }
3347
3348 if (umem_readvar(&addr, logname) == -1) {
3349 mdb_warn("failed to read %s log header pointer");
3350 return (DCMD_ERR);
3351 }
3352
3353 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3354 mdb_warn("failed to walk umem log");
3355 return (DCMD_ERR);
3356 }
3357
3358 return (DCMD_OK);
3359 }
3360
3361 /*
3362 * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3363 * The first piece is a structure which we use to accumulate umem_cache_t
3364 * addresses of interest. The umc_add is used as a callback for the umem_cache
3365 * walker; we either add all caches, or ones named explicitly as arguments.
3366 */
3367
3368 typedef struct umclist {
3369 const char *umc_name; /* Name to match (or NULL) */
3370 uintptr_t *umc_caches; /* List of umem_cache_t addrs */
3371 int umc_nelems; /* Num entries in umc_caches */
3372 int umc_size; /* Size of umc_caches array */
3373 } umclist_t;
3374
3375 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3376 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3377 {
3378 void *p;
3379 int s;
3380
3381 if (umc->umc_name == NULL ||
3382 strcmp(cp->cache_name, umc->umc_name) == 0) {
3383 /*
3384 * If we have a match, grow our array (if necessary), and then
3385 * add the virtual address of the matching cache to our list.
3386 */
3387 if (umc->umc_nelems >= umc->umc_size) {
3388 s = umc->umc_size ? umc->umc_size * 2 : 256;
3389 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3390
3391 bcopy(umc->umc_caches, p,
3392 sizeof (uintptr_t) * umc->umc_size);
3393
3394 umc->umc_caches = p;
3395 umc->umc_size = s;
3396 }
3397
3398 umc->umc_caches[umc->umc_nelems++] = addr;
3399 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3400 }
3401
3402 return (WALK_NEXT);
3403 }
3404
3405 /*
3406 * The second piece of ::umausers is a hash table of allocations. Each
3407 * allocation owner is identified by its stack trace and data_size. We then
3408 * track the total bytes of all such allocations, and the number of allocations
3409 * to report at the end. Once we have a list of caches, we walk through the
3410 * allocated bufctls of each, and update our hash table accordingly.
3411 */
3412
3413 typedef struct umowner {
3414 struct umowner *umo_head; /* First hash elt in bucket */
3415 struct umowner *umo_next; /* Next hash elt in chain */
3416 size_t umo_signature; /* Hash table signature */
3417 uint_t umo_num; /* Number of allocations */
3418 size_t umo_data_size; /* Size of each allocation */
3419 size_t umo_total_size; /* Total bytes of allocation */
3420 int umo_depth; /* Depth of stack trace */
3421 uintptr_t *umo_stack; /* Stack trace */
3422 } umowner_t;
3423
3424 typedef struct umusers {
3425 const umem_cache_t *umu_cache; /* Current umem cache */
3426 umowner_t *umu_hash; /* Hash table of owners */
3427 uintptr_t *umu_stacks; /* stacks for owners */
3428 int umu_nelems; /* Number of entries in use */
3429 int umu_size; /* Total number of entries */
3430 } umusers_t;
3431
3432 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3433 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3434 size_t size, size_t data_size)
3435 {
3436 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3437 size_t bucket, signature = data_size;
3438 umowner_t *umo, *umoend;
3439
3440 /*
3441 * If the hash table is full, double its size and rehash everything.
3442 */
3443 if (umu->umu_nelems >= umu->umu_size) {
3444 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3445 size_t umowner_size = sizeof (umowner_t);
3446 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3447 uintptr_t *new_stacks;
3448
3449 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3450 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3451
3452 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3453 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3454 umu->umu_hash = umo;
3455 umu->umu_stacks = new_stacks;
3456 umu->umu_size = s;
3457
3458 umoend = umu->umu_hash + umu->umu_size;
3459 for (umo = umu->umu_hash; umo < umoend; umo++) {
3460 umo->umo_head = NULL;
3461 umo->umo_stack = &umu->umu_stacks[
3462 umem_stack_depth * (umo - umu->umu_hash)];
3463 }
3464
3465 umoend = umu->umu_hash + umu->umu_nelems;
3466 for (umo = umu->umu_hash; umo < umoend; umo++) {
3467 bucket = umo->umo_signature & (umu->umu_size - 1);
3468 umo->umo_next = umu->umu_hash[bucket].umo_head;
3469 umu->umu_hash[bucket].umo_head = umo;
3470 }
3471 }
3472
3473 /*
3474 * Finish computing the hash signature from the stack trace, and then
3475 * see if the owner is in the hash table. If so, update our stats.
3476 */
3477 for (i = 0; i < depth; i++)
3478 signature += bcp->bc_stack[i];
3479
3480 bucket = signature & (umu->umu_size - 1);
3481
3482 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3483 if (umo->umo_signature == signature) {
3484 size_t difference = 0;
3485
3486 difference |= umo->umo_data_size - data_size;
3487 difference |= umo->umo_depth - depth;
3488
3489 for (i = 0; i < depth; i++) {
3490 difference |= umo->umo_stack[i] -
3491 bcp->bc_stack[i];
3492 }
3493
3494 if (difference == 0) {
3495 umo->umo_total_size += size;
3496 umo->umo_num++;
3497 return;
3498 }
3499 }
3500 }
3501
3502 /*
3503 * If the owner is not yet hashed, grab the next element and fill it
3504 * in based on the allocation information.
3505 */
3506 umo = &umu->umu_hash[umu->umu_nelems++];
3507 umo->umo_next = umu->umu_hash[bucket].umo_head;
3508 umu->umu_hash[bucket].umo_head = umo;
3509
3510 umo->umo_signature = signature;
3511 umo->umo_num = 1;
3512 umo->umo_data_size = data_size;
3513 umo->umo_total_size = size;
3514 umo->umo_depth = depth;
3515
3516 for (i = 0; i < depth; i++)
3517 umo->umo_stack[i] = bcp->bc_stack[i];
3518 }
3519
3520 /*
3521 * When ::umausers is invoked without the -f flag, we simply update our hash
3522 * table with the information from each allocated bufctl.
3523 */
3524 /*ARGSUSED*/
3525 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3526 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3527 {
3528 const umem_cache_t *cp = umu->umu_cache;
3529
3530 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3531 return (WALK_NEXT);
3532 }
3533
3534 /*
3535 * When ::umausers is invoked with the -f flag, we print out the information
3536 * for each bufctl as well as updating the hash table.
3537 */
3538 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3539 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3540 {
3541 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3542 const umem_cache_t *cp = umu->umu_cache;
3543
3544 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3545 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3546
3547 for (i = 0; i < depth; i++)
3548 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3549
3550 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3551 return (WALK_NEXT);
3552 }
3553
3554 /*
3555 * We sort our results by allocation size before printing them.
3556 */
3557 static int
umownercmp(const void * lp,const void * rp)3558 umownercmp(const void *lp, const void *rp)
3559 {
3560 const umowner_t *lhs = lp;
3561 const umowner_t *rhs = rp;
3562
3563 return (rhs->umo_total_size - lhs->umo_total_size);
3564 }
3565
3566 /*
3567 * The main engine of ::umausers is relatively straightforward: First we
3568 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3569 * iterate over the allocated bufctls of each cache in the list. Finally,
3570 * we sort and print our results.
3571 */
3572 /*ARGSUSED*/
3573 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3574 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3575 {
3576 int mem_threshold = 8192; /* Minimum # bytes for printing */
3577 int cnt_threshold = 100; /* Minimum # blocks for printing */
3578 int audited_caches = 0; /* Number of UMF_AUDIT caches found */
3579 int do_all_caches = 1; /* Do all caches (no arguments) */
3580 int opt_e = FALSE; /* Include "small" users */
3581 int opt_f = FALSE; /* Print stack traces */
3582
3583 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3584 umowner_t *umo, *umoend;
3585 int i, oelems;
3586
3587 umclist_t umc;
3588 umusers_t umu;
3589
3590 if (flags & DCMD_ADDRSPEC)
3591 return (DCMD_USAGE);
3592
3593 bzero(&umc, sizeof (umc));
3594 bzero(&umu, sizeof (umu));
3595
3596 while ((i = mdb_getopts(argc, argv,
3597 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3598 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3599
3600 argv += i; /* skip past options we just processed */
3601 argc -= i; /* adjust argc */
3602
3603 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3604 return (DCMD_USAGE);
3605
3606 oelems = umc.umc_nelems;
3607 umc.umc_name = argv->a_un.a_str;
3608 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3609
3610 if (umc.umc_nelems == oelems) {
3611 mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3612 return (DCMD_ERR);
3613 }
3614
3615 do_all_caches = 0;
3616 argv++;
3617 argc--;
3618 }
3619
3620 if (opt_e)
3621 mem_threshold = cnt_threshold = 0;
3622
3623 if (opt_f)
3624 callback = (mdb_walk_cb_t)umause2;
3625
3626 if (do_all_caches) {
3627 umc.umc_name = NULL; /* match all cache names */
3628 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3629 }
3630
3631 for (i = 0; i < umc.umc_nelems; i++) {
3632 uintptr_t cp = umc.umc_caches[i];
3633 umem_cache_t c;
3634
3635 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3636 mdb_warn("failed to read cache at %p", cp);
3637 continue;
3638 }
3639
3640 if (!(c.cache_flags & UMF_AUDIT)) {
3641 if (!do_all_caches) {
3642 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3643 c.cache_name);
3644 }
3645 continue;
3646 }
3647
3648 umu.umu_cache = &c;
3649 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3650 audited_caches++;
3651 }
3652
3653 if (audited_caches == 0 && do_all_caches) {
3654 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3655 return (DCMD_ERR);
3656 }
3657
3658 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3659 umoend = umu.umu_hash + umu.umu_nelems;
3660
3661 for (umo = umu.umu_hash; umo < umoend; umo++) {
3662 if (umo->umo_total_size < mem_threshold &&
3663 umo->umo_num < cnt_threshold)
3664 continue;
3665 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3666 umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3667 for (i = 0; i < umo->umo_depth; i++)
3668 mdb_printf("\t %a\n", umo->umo_stack[i]);
3669 }
3670
3671 return (DCMD_OK);
3672 }
3673
3674 struct malloc_data {
3675 uint32_t malloc_size;
3676 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3677 };
3678
3679 #ifdef _LP64
3680 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3681 #else
3682 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data))
3683 #endif
3684
3685 typedef struct umem_malloc_info {
3686 size_t um_total; /* total allocated buffers */
3687 size_t um_malloc; /* malloc buffers */
3688 size_t um_malloc_size; /* sum of malloc buffer sizes */
3689 size_t um_malloc_overhead; /* sum of in-chunk overheads */
3690
3691 umem_cache_t *um_cp;
3692
3693 uint_t *um_bucket;
3694 } umem_malloc_info_t;
3695
3696 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3697 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3698 size_t maxbuckets, size_t minbucketsize, int geometric)
3699 {
3700 uint64_t um_malloc;
3701 int minb = -1;
3702 int maxb = -1;
3703 int buckets;
3704 int nbucks;
3705 int i;
3706 int b;
3707 const int *distarray;
3708
3709 minb = (int)minmalloc;
3710 maxb = (int)maxmalloc;
3711
3712 nbucks = buckets = maxb - minb + 1;
3713
3714 um_malloc = 0;
3715 for (b = minb; b <= maxb; b++)
3716 um_malloc += um_bucket[b];
3717
3718 if (maxbuckets != 0)
3719 buckets = MIN(buckets, maxbuckets);
3720
3721 if (minbucketsize > 1) {
3722 buckets = MIN(buckets, nbucks/minbucketsize);
3723 if (buckets == 0) {
3724 buckets = 1;
3725 minbucketsize = nbucks;
3726 }
3727 }
3728
3729 if (geometric)
3730 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3731 else
3732 distarray = dist_linear(buckets, minb, maxb);
3733
3734 dist_print_header("malloc size", 11, "count");
3735 for (i = 0; i < buckets; i++) {
3736 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3737 }
3738 mdb_printf("\n");
3739 }
3740
3741 /*
3742 * A malloc()ed buffer looks like:
3743 *
3744 * <----------- mi.malloc_size --->
3745 * <----------- cp.cache_bufsize ------------------>
3746 * <----------- cp.cache_chunksize -------------------------------->
3747 * +-------+-----------------------+---------------+---------------+
3748 * |/tag///| mallocsz |/round-off/////|/debug info////|
3749 * +-------+---------------------------------------+---------------+
3750 * <-- usable space ------>
3751 *
3752 * mallocsz is the argument to malloc(3C).
3753 * mi.malloc_size is the actual size passed to umem_alloc(), which
3754 * is rounded up to the smallest available cache size, which is
3755 * cache_bufsize. If there is debugging or alignment overhead in
3756 * the cache, that is reflected in a larger cache_chunksize.
3757 *
3758 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3759 * depending upon the ISA's alignment requirements. For 32-bit allocations,
3760 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes,
3761 * the tag has 8 bytes of padding before it.
3762 *
3763 * 32-byte, 64-byte buffers <= 8 bytes:
3764 * +-------+-------+--------- ...
3765 * |/size//|/stat//| mallocsz ...
3766 * +-------+-------+--------- ...
3767 * ^
3768 * pointer returned from malloc(3C)
3769 *
3770 * 64-byte buffers > 8 bytes:
3771 * +---------------+-------+-------+--------- ...
3772 * |/padding///////|/size//|/stat//| mallocsz ...
3773 * +---------------+-------+-------+--------- ...
3774 * ^
3775 * pointer returned from malloc(3C)
3776 *
3777 * The "size" field is "malloc_size", which is mallocsz + the padding.
3778 * The "stat" field is derived from malloc_size, and functions as a
3779 * validation that this buffer is actually from malloc(3C).
3780 */
3781 /*ARGSUSED*/
3782 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3783 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3784 {
3785 struct malloc_data md;
3786 size_t m_addr = addr;
3787 size_t overhead = sizeof (md);
3788 size_t mallocsz;
3789
3790 ump->um_total++;
3791
3792 #ifdef _LP64
3793 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3794 m_addr += overhead;
3795 overhead += sizeof (md);
3796 }
3797 #endif
3798
3799 if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3800 mdb_warn("unable to read malloc header at %p", m_addr);
3801 return (WALK_NEXT);
3802 }
3803
3804 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3805 case MALLOC_MAGIC:
3806 #ifdef _LP64
3807 case MALLOC_SECOND_MAGIC:
3808 #endif
3809 mallocsz = md.malloc_size - overhead;
3810
3811 ump->um_malloc++;
3812 ump->um_malloc_size += mallocsz;
3813 ump->um_malloc_overhead += overhead;
3814
3815 /* include round-off and debug overhead */
3816 ump->um_malloc_overhead +=
3817 ump->um_cp->cache_chunksize - md.malloc_size;
3818
3819 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
3820 ump->um_bucket[mallocsz]++;
3821
3822 break;
3823 default:
3824 break;
3825 }
3826
3827 return (WALK_NEXT);
3828 }
3829
3830 int
get_umem_alloc_sizes(int ** out,size_t * out_num)3831 get_umem_alloc_sizes(int **out, size_t *out_num)
3832 {
3833 GElf_Sym sym;
3834
3835 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
3836 mdb_warn("unable to look up umem_alloc_sizes");
3837 return (-1);
3838 }
3839
3840 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
3841 *out_num = sym.st_size / sizeof (int);
3842
3843 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
3844 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
3845 *out = NULL;
3846 return (-1);
3847 }
3848
3849 return (0);
3850 }
3851
3852
3853 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)3854 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
3855 {
3856 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
3857 return (WALK_NEXT);
3858
3859 ump->um_cp = cp;
3860
3861 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
3862 -1) {
3863 mdb_warn("can't walk 'umem' for cache %p", addr);
3864 return (WALK_ERR);
3865 }
3866
3867 return (WALK_NEXT);
3868 }
3869
3870 void
umem_malloc_dist_help(void)3871 umem_malloc_dist_help(void)
3872 {
3873 mdb_printf("%s\n",
3874 "report distribution of outstanding malloc()s");
3875 mdb_dec_indent(2);
3876 mdb_printf("%<b>OPTIONS%</b>\n");
3877 mdb_inc_indent(2);
3878 mdb_printf("%s",
3879 " -b maxbins\n"
3880 " Use at most maxbins bins for the data\n"
3881 " -B minbinsize\n"
3882 " Make the bins at least minbinsize bytes apart\n"
3883 " -d dump the raw data out, without binning\n"
3884 " -g use geometric binning instead of linear binning\n");
3885 }
3886
3887 /*ARGSUSED*/
3888 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3889 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3890 {
3891 umem_malloc_info_t mi;
3892 uint_t geometric = 0;
3893 uint_t dump = 0;
3894 size_t maxbuckets = 0;
3895 size_t minbucketsize = 0;
3896
3897 size_t minalloc = 0;
3898 size_t maxalloc = UMI_MAX_BUCKET;
3899
3900 if (flags & DCMD_ADDRSPEC)
3901 return (DCMD_USAGE);
3902
3903 if (mdb_getopts(argc, argv,
3904 'd', MDB_OPT_SETBITS, TRUE, &dump,
3905 'g', MDB_OPT_SETBITS, TRUE, &geometric,
3906 'b', MDB_OPT_UINTPTR, &maxbuckets,
3907 'B', MDB_OPT_UINTPTR, &minbucketsize,
3908 0) != argc)
3909 return (DCMD_USAGE);
3910
3911 bzero(&mi, sizeof (mi));
3912 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
3913 UM_SLEEP | UM_GC);
3914
3915 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
3916 &mi) == -1) {
3917 mdb_warn("unable to walk 'umem_cache'");
3918 return (DCMD_ERR);
3919 }
3920
3921 if (dump) {
3922 int i;
3923 for (i = minalloc; i <= maxalloc; i++)
3924 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
3925
3926 return (DCMD_OK);
3927 }
3928
3929 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
3930 maxbuckets, minbucketsize, geometric);
3931
3932 return (DCMD_OK);
3933 }
3934
3935 void
umem_malloc_info_help(void)3936 umem_malloc_info_help(void)
3937 {
3938 mdb_printf("%s\n",
3939 "report information about malloc()s by cache. ");
3940 mdb_dec_indent(2);
3941 mdb_printf("%<b>OPTIONS%</b>\n");
3942 mdb_inc_indent(2);
3943 mdb_printf("%s",
3944 " -b maxbins\n"
3945 " Use at most maxbins bins for the data\n"
3946 " -B minbinsize\n"
3947 " Make the bins at least minbinsize bytes apart\n"
3948 " -d dump the raw distribution data without binning\n"
3949 #ifndef _KMDB
3950 " -g use geometric binning instead of linear binning\n"
3951 #endif
3952 "");
3953 }
3954 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3955 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3956 {
3957 umem_cache_t c;
3958 umem_malloc_info_t mi;
3959
3960 int skip = 0;
3961
3962 size_t maxmalloc;
3963 size_t overhead;
3964 size_t allocated;
3965 size_t avg_malloc;
3966 size_t overhead_pct; /* 1000 * overhead_percent */
3967
3968 uint_t verbose = 0;
3969 uint_t dump = 0;
3970 uint_t geometric = 0;
3971 size_t maxbuckets = 0;
3972 size_t minbucketsize = 0;
3973
3974 int *alloc_sizes;
3975 int idx;
3976 size_t num;
3977 size_t minmalloc;
3978
3979 if (mdb_getopts(argc, argv,
3980 'd', MDB_OPT_SETBITS, TRUE, &dump,
3981 'g', MDB_OPT_SETBITS, TRUE, &geometric,
3982 'b', MDB_OPT_UINTPTR, &maxbuckets,
3983 'B', MDB_OPT_UINTPTR, &minbucketsize,
3984 0) != argc)
3985 return (DCMD_USAGE);
3986
3987 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
3988 verbose = 1;
3989
3990 if (!(flags & DCMD_ADDRSPEC)) {
3991 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
3992 argc, argv) == -1) {
3993 mdb_warn("can't walk umem_cache");
3994 return (DCMD_ERR);
3995 }
3996 return (DCMD_OK);
3997 }
3998
3999 if (!mdb_vread(&c, sizeof (c), addr)) {
4000 mdb_warn("unable to read cache at %p", addr);
4001 return (DCMD_ERR);
4002 }
4003
4004 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4005 if (!(flags & DCMD_LOOP))
4006 mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4007 "by malloc()\n", c.cache_name);
4008 skip = 1;
4009 }
4010
4011 /*
4012 * normally, print the header only the first time. In verbose mode,
4013 * print the header on every non-skipped buffer
4014 */
4015 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4016 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4017 "CACHE", "BUFSZ", "MAXMAL",
4018 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4019
4020 if (skip)
4021 return (DCMD_OK);
4022
4023 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4024 #ifdef _LP64
4025 if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4026 maxmalloc -= sizeof (struct malloc_data);
4027 #endif
4028
4029 bzero(&mi, sizeof (mi));
4030 mi.um_cp = &c;
4031 if (verbose)
4032 mi.um_bucket =
4033 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4034 UM_SLEEP | UM_GC);
4035
4036 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4037 -1) {
4038 mdb_warn("can't walk 'umem'");
4039 return (DCMD_ERR);
4040 }
4041
4042 overhead = mi.um_malloc_overhead;
4043 allocated = mi.um_malloc_size;
4044
4045 /* do integer round off for the average */
4046 if (mi.um_malloc != 0)
4047 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4048 else
4049 avg_malloc = 0;
4050
4051 /*
4052 * include per-slab overhead
4053 *
4054 * Each slab in a given cache is the same size, and has the same
4055 * number of chunks in it; we read in the first slab on the
4056 * slab list to get the number of chunks for all slabs. To
4057 * compute the per-slab overhead, we just subtract the chunk usage
4058 * from the slabsize:
4059 *
4060 * +------------+-------+-------+ ... --+-------+-------+-------+
4061 * |////////////| | | ... | |///////|///////|
4062 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//|
4063 * |////////////| | | ... | |///////|///////|
4064 * +------------+-------+-------+ ... --+-------+-------+-------+
4065 * | \_______chunksize * chunks_____/ |
4066 * \__________________________slabsize__________________________/
4067 *
4068 * For UMF_HASH caches, there is an additional source of overhead;
4069 * the external umem_slab_t and per-chunk bufctl structures. We
4070 * include those in our per-slab overhead.
4071 *
4072 * Once we have a number for the per-slab overhead, we estimate
4073 * the actual overhead by treating the malloc()ed buffers as if
4074 * they were densely packed:
4075 *
4076 * additional overhead = (# mallocs) * (per-slab) / (chunks);
4077 *
4078 * carefully ordering the multiply before the divide, to avoid
4079 * round-off error.
4080 */
4081 if (mi.um_malloc != 0) {
4082 umem_slab_t slab;
4083 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4084
4085 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4086 mdb_warn("unable to read slab at %p\n", saddr);
4087 } else {
4088 long chunks = slab.slab_chunks;
4089 if (chunks != 0 && c.cache_chunksize != 0 &&
4090 chunks <= c.cache_slabsize / c.cache_chunksize) {
4091 uintmax_t perslab =
4092 c.cache_slabsize -
4093 (c.cache_chunksize * chunks);
4094
4095 if (c.cache_flags & UMF_HASH) {
4096 perslab += sizeof (umem_slab_t) +
4097 chunks *
4098 ((c.cache_flags & UMF_AUDIT) ?
4099 sizeof (umem_bufctl_audit_t) :
4100 sizeof (umem_bufctl_t));
4101 }
4102 overhead +=
4103 (perslab * (uintmax_t)mi.um_malloc)/chunks;
4104 } else {
4105 mdb_warn("invalid #chunks (%d) in slab %p\n",
4106 chunks, saddr);
4107 }
4108 }
4109 }
4110
4111 if (allocated != 0)
4112 overhead_pct = (1000ULL * overhead) / allocated;
4113 else
4114 overhead_pct = 0;
4115
4116 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4117 addr, c.cache_bufsize, maxmalloc,
4118 mi.um_malloc, avg_malloc, allocated, overhead,
4119 overhead_pct / 10, overhead_pct % 10);
4120
4121 if (!verbose)
4122 return (DCMD_OK);
4123
4124 if (!dump)
4125 mdb_printf("\n");
4126
4127 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4128 return (DCMD_ERR);
4129
4130 for (idx = 0; idx < num; idx++) {
4131 if (alloc_sizes[idx] == c.cache_bufsize)
4132 break;
4133 if (alloc_sizes[idx] == 0) {
4134 idx = num; /* 0-terminated array */
4135 break;
4136 }
4137 }
4138 if (idx == num) {
4139 mdb_warn(
4140 "cache %p's size (%d) not in umem_alloc_sizes\n",
4141 addr, c.cache_bufsize);
4142 return (DCMD_ERR);
4143 }
4144
4145 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4146 if (minmalloc > 0) {
4147 #ifdef _LP64
4148 if (minmalloc > UMEM_SECOND_ALIGN)
4149 minmalloc -= sizeof (struct malloc_data);
4150 #endif
4151 minmalloc -= sizeof (struct malloc_data);
4152 minmalloc += 1;
4153 }
4154
4155 if (dump) {
4156 for (idx = minmalloc; idx <= maxmalloc; idx++)
4157 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4158 mdb_printf("\n");
4159 } else {
4160 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4161 maxbuckets, minbucketsize, geometric);
4162 }
4163
4164 return (DCMD_OK);
4165 }
4166