1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <mdb/mdb_param.h>
27 #include <mdb/mdb_modapi.h>
28 #include <mdb/mdb_ctf.h>
29 #include <mdb/mdb_whatis.h>
30 #include <sys/cpuvar.h>
31 #include <sys/kmem_impl.h>
32 #include <sys/vmem_impl.h>
33 #include <sys/machelf.h>
34 #include <sys/modctl.h>
35 #include <sys/kobj.h>
36 #include <sys/panic.h>
37 #include <sys/stack.h>
38 #include <sys/sysmacros.h>
39 #include <vm/page.h>
40
41 #include "avl.h"
42 #include "combined.h"
43 #include "dist.h"
44 #include "kmem.h"
45 #include "list.h"
46
47 #define dprintf(x) if (mdb_debug_level) { \
48 mdb_printf("kmem debug: "); \
49 /*CSTYLED*/\
50 mdb_printf x ;\
51 }
52
53 #define KM_ALLOCATED 0x01
54 #define KM_FREE 0x02
55 #define KM_BUFCTL 0x04
56 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
57 #define KM_HASH 0x10
58
59 static int mdb_debug_level = 0;
60
61 /*ARGSUSED*/
62 static int
kmem_init_walkers(uintptr_t addr,const kmem_cache_t * c,void * ignored)63 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
64 {
65 mdb_walker_t w;
66 char descr[64];
67
68 (void) mdb_snprintf(descr, sizeof (descr),
69 "walk the %s cache", c->cache_name);
70
71 w.walk_name = c->cache_name;
72 w.walk_descr = descr;
73 w.walk_init = kmem_walk_init;
74 w.walk_step = kmem_walk_step;
75 w.walk_fini = kmem_walk_fini;
76 w.walk_init_arg = (void *)addr;
77
78 if (mdb_add_walker(&w) == -1)
79 mdb_warn("failed to add %s walker", c->cache_name);
80
81 return (WALK_NEXT);
82 }
83
84 /*ARGSUSED*/
85 int
kmem_debug(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)86 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
87 {
88 mdb_debug_level ^= 1;
89
90 mdb_printf("kmem: debugging is now %s\n",
91 mdb_debug_level ? "on" : "off");
92
93 return (DCMD_OK);
94 }
95
96 int
kmem_cache_walk_init(mdb_walk_state_t * wsp)97 kmem_cache_walk_init(mdb_walk_state_t *wsp)
98 {
99 GElf_Sym sym;
100
101 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
102 mdb_warn("couldn't find kmem_caches");
103 return (WALK_ERR);
104 }
105
106 wsp->walk_addr = (uintptr_t)sym.st_value;
107
108 return (list_walk_init_named(wsp, "cache list", "cache"));
109 }
110
111 int
kmem_cpu_cache_walk_init(mdb_walk_state_t * wsp)112 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
113 {
114 if (wsp->walk_addr == NULL) {
115 mdb_warn("kmem_cpu_cache doesn't support global walks");
116 return (WALK_ERR);
117 }
118
119 if (mdb_layered_walk("cpu", wsp) == -1) {
120 mdb_warn("couldn't walk 'cpu'");
121 return (WALK_ERR);
122 }
123
124 wsp->walk_data = (void *)wsp->walk_addr;
125
126 return (WALK_NEXT);
127 }
128
129 int
kmem_cpu_cache_walk_step(mdb_walk_state_t * wsp)130 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
131 {
132 uintptr_t caddr = (uintptr_t)wsp->walk_data;
133 const cpu_t *cpu = wsp->walk_layer;
134 kmem_cpu_cache_t cc;
135
136 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
137
138 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
139 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
140 return (WALK_ERR);
141 }
142
143 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
144 }
145
146 static int
kmem_slab_check(void * p,uintptr_t saddr,void * arg)147 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
148 {
149 kmem_slab_t *sp = p;
150 uintptr_t caddr = (uintptr_t)arg;
151 if ((uintptr_t)sp->slab_cache != caddr) {
152 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
153 saddr, caddr, sp->slab_cache);
154 return (-1);
155 }
156
157 return (0);
158 }
159
160 static int
kmem_partial_slab_check(void * p,uintptr_t saddr,void * arg)161 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
162 {
163 kmem_slab_t *sp = p;
164
165 int rc = kmem_slab_check(p, saddr, arg);
166 if (rc != 0) {
167 return (rc);
168 }
169
170 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
171 mdb_warn("slab %p is not a partial slab\n", saddr);
172 return (-1);
173 }
174
175 return (0);
176 }
177
178 static int
kmem_complete_slab_check(void * p,uintptr_t saddr,void * arg)179 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
180 {
181 kmem_slab_t *sp = p;
182
183 int rc = kmem_slab_check(p, saddr, arg);
184 if (rc != 0) {
185 return (rc);
186 }
187
188 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
189 mdb_warn("slab %p is not completely allocated\n", saddr);
190 return (-1);
191 }
192
193 return (0);
194 }
195
196 typedef struct {
197 uintptr_t kns_cache_addr;
198 int kns_nslabs;
199 } kmem_nth_slab_t;
200
201 static int
kmem_nth_slab_check(void * p,uintptr_t saddr,void * arg)202 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
203 {
204 kmem_nth_slab_t *chkp = arg;
205
206 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
207 if (rc != 0) {
208 return (rc);
209 }
210
211 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
212 }
213
214 static int
kmem_complete_slab_walk_init(mdb_walk_state_t * wsp)215 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
216 {
217 uintptr_t caddr = wsp->walk_addr;
218
219 wsp->walk_addr = (uintptr_t)(caddr +
220 offsetof(kmem_cache_t, cache_complete_slabs));
221
222 return (list_walk_init_checked(wsp, "slab list", "slab",
223 kmem_complete_slab_check, (void *)caddr));
224 }
225
226 static int
kmem_partial_slab_walk_init(mdb_walk_state_t * wsp)227 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
228 {
229 uintptr_t caddr = wsp->walk_addr;
230
231 wsp->walk_addr = (uintptr_t)(caddr +
232 offsetof(kmem_cache_t, cache_partial_slabs));
233
234 return (avl_walk_init_checked(wsp, "slab list", "slab",
235 kmem_partial_slab_check, (void *)caddr));
236 }
237
238 int
kmem_slab_walk_init(mdb_walk_state_t * wsp)239 kmem_slab_walk_init(mdb_walk_state_t *wsp)
240 {
241 uintptr_t caddr = wsp->walk_addr;
242
243 if (caddr == NULL) {
244 mdb_warn("kmem_slab doesn't support global walks\n");
245 return (WALK_ERR);
246 }
247
248 combined_walk_init(wsp);
249 combined_walk_add(wsp,
250 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
251 combined_walk_add(wsp,
252 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
253
254 return (WALK_NEXT);
255 }
256
257 static int
kmem_first_complete_slab_walk_init(mdb_walk_state_t * wsp)258 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
259 {
260 uintptr_t caddr = wsp->walk_addr;
261 kmem_nth_slab_t *chk;
262
263 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
264 UM_SLEEP | UM_GC);
265 chk->kns_cache_addr = caddr;
266 chk->kns_nslabs = 1;
267 wsp->walk_addr = (uintptr_t)(caddr +
268 offsetof(kmem_cache_t, cache_complete_slabs));
269
270 return (list_walk_init_checked(wsp, "slab list", "slab",
271 kmem_nth_slab_check, chk));
272 }
273
274 int
kmem_slab_walk_partial_init(mdb_walk_state_t * wsp)275 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
276 {
277 uintptr_t caddr = wsp->walk_addr;
278 kmem_cache_t c;
279
280 if (caddr == NULL) {
281 mdb_warn("kmem_slab_partial doesn't support global walks\n");
282 return (WALK_ERR);
283 }
284
285 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
286 mdb_warn("couldn't read kmem_cache at %p", caddr);
287 return (WALK_ERR);
288 }
289
290 combined_walk_init(wsp);
291
292 /*
293 * Some consumers (umem_walk_step(), in particular) require at
294 * least one callback if there are any buffers in the cache. So
295 * if there are *no* partial slabs, report the first full slab, if
296 * any.
297 *
298 * Yes, this is ugly, but it's cleaner than the other possibilities.
299 */
300 if (c.cache_partial_slabs.avl_numnodes == 0) {
301 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
302 list_walk_step, list_walk_fini);
303 } else {
304 combined_walk_add(wsp, kmem_partial_slab_walk_init,
305 avl_walk_step, avl_walk_fini);
306 }
307
308 return (WALK_NEXT);
309 }
310
311 int
kmem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)312 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
313 {
314 kmem_cache_t c;
315 const char *filter = NULL;
316
317 if (mdb_getopts(ac, argv,
318 'n', MDB_OPT_STR, &filter,
319 NULL) != ac) {
320 return (DCMD_USAGE);
321 }
322
323 if (!(flags & DCMD_ADDRSPEC)) {
324 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
325 mdb_warn("can't walk kmem_cache");
326 return (DCMD_ERR);
327 }
328 return (DCMD_OK);
329 }
330
331 if (DCMD_HDRSPEC(flags))
332 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
333 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
334
335 if (mdb_vread(&c, sizeof (c), addr) == -1) {
336 mdb_warn("couldn't read kmem_cache at %p", addr);
337 return (DCMD_ERR);
338 }
339
340 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
341 return (DCMD_OK);
342
343 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
344 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
345
346 return (DCMD_OK);
347 }
348
349 void
kmem_cache_help(void)350 kmem_cache_help(void)
351 {
352 mdb_printf("%s", "Print kernel memory caches.\n\n");
353 mdb_dec_indent(2);
354 mdb_printf("%<b>OPTIONS%</b>\n");
355 mdb_inc_indent(2);
356 mdb_printf("%s",
357 " -n name\n"
358 " name of kmem cache (or matching partial name)\n"
359 "\n"
360 "Column\tDescription\n"
361 "\n"
362 "ADDR\t\taddress of kmem cache\n"
363 "NAME\t\tname of kmem cache\n"
364 "FLAG\t\tvarious cache state flags\n"
365 "CFLAG\t\tcache creation flags\n"
366 "BUFSIZE\tobject size in bytes\n"
367 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
368 }
369
370 #define LABEL_WIDTH 11
371 static void
kmem_slabs_print_dist(uint_t * ks_bucket,size_t buffers_per_slab,size_t maxbuckets,size_t minbucketsize)372 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
373 size_t maxbuckets, size_t minbucketsize)
374 {
375 uint64_t total;
376 int buckets;
377 int i;
378 const int *distarray;
379 int complete[2];
380
381 buckets = buffers_per_slab;
382
383 total = 0;
384 for (i = 0; i <= buffers_per_slab; i++)
385 total += ks_bucket[i];
386
387 if (maxbuckets > 1)
388 buckets = MIN(buckets, maxbuckets);
389
390 if (minbucketsize > 1) {
391 /*
392 * minbucketsize does not apply to the first bucket reserved
393 * for completely allocated slabs
394 */
395 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
396 minbucketsize));
397 if ((buckets < 2) && (buffers_per_slab > 1)) {
398 buckets = 2;
399 minbucketsize = (buffers_per_slab - 1);
400 }
401 }
402
403 /*
404 * The first printed bucket is reserved for completely allocated slabs.
405 * Passing (buckets - 1) excludes that bucket from the generated
406 * distribution, since we're handling it as a special case.
407 */
408 complete[0] = buffers_per_slab;
409 complete[1] = buffers_per_slab + 1;
410 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
411
412 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
413 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
414
415 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
416 /*
417 * Print bucket ranges in descending order after the first bucket for
418 * completely allocated slabs, so a person can see immediately whether
419 * or not there is fragmentation without having to scan possibly
420 * multiple screens of output. Starting at (buckets - 2) excludes the
421 * extra terminating bucket.
422 */
423 for (i = buckets - 2; i >= 0; i--) {
424 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
425 }
426 mdb_printf("\n");
427 }
428 #undef LABEL_WIDTH
429
430 /*ARGSUSED*/
431 static int
kmem_first_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)432 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
433 {
434 *is_slab = B_TRUE;
435 return (WALK_DONE);
436 }
437
438 /*ARGSUSED*/
439 static int
kmem_first_partial_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)440 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
441 boolean_t *is_slab)
442 {
443 /*
444 * The "kmem_partial_slab" walker reports the first full slab if there
445 * are no partial slabs (for the sake of consumers that require at least
446 * one callback if there are any buffers in the cache).
447 */
448 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
449 return (WALK_DONE);
450 }
451
452 typedef struct kmem_slab_usage {
453 int ksu_refcnt; /* count of allocated buffers on slab */
454 boolean_t ksu_nomove; /* slab marked non-reclaimable */
455 } kmem_slab_usage_t;
456
457 typedef struct kmem_slab_stats {
458 const kmem_cache_t *ks_cp;
459 int ks_slabs; /* slabs in cache */
460 int ks_partial_slabs; /* partially allocated slabs in cache */
461 uint64_t ks_unused_buffers; /* total unused buffers in cache */
462 int ks_max_buffers_per_slab; /* max buffers per slab */
463 int ks_usage_len; /* ks_usage array length */
464 kmem_slab_usage_t *ks_usage; /* partial slab usage */
465 uint_t *ks_bucket; /* slab usage distribution */
466 } kmem_slab_stats_t;
467
468 /*ARGSUSED*/
469 static int
kmem_slablist_stat(uintptr_t addr,const kmem_slab_t * sp,kmem_slab_stats_t * ks)470 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
471 kmem_slab_stats_t *ks)
472 {
473 kmem_slab_usage_t *ksu;
474 long unused;
475
476 ks->ks_slabs++;
477 ks->ks_bucket[sp->slab_refcnt]++;
478
479 unused = (sp->slab_chunks - sp->slab_refcnt);
480 if (unused == 0) {
481 return (WALK_NEXT);
482 }
483
484 ks->ks_partial_slabs++;
485 ks->ks_unused_buffers += unused;
486
487 if (ks->ks_partial_slabs > ks->ks_usage_len) {
488 kmem_slab_usage_t *usage;
489 int len = ks->ks_usage_len;
490
491 len = (len == 0 ? 16 : len * 2);
492 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
493 if (ks->ks_usage != NULL) {
494 bcopy(ks->ks_usage, usage,
495 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
496 mdb_free(ks->ks_usage,
497 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
498 }
499 ks->ks_usage = usage;
500 ks->ks_usage_len = len;
501 }
502
503 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
504 ksu->ksu_refcnt = sp->slab_refcnt;
505 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
506 return (WALK_NEXT);
507 }
508
509 static void
kmem_slabs_header()510 kmem_slabs_header()
511 {
512 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
513 "", "", "Partial", "", "Unused", "");
514 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
515 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
516 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
517 "-------------------------", "--------", "--------", "---------",
518 "---------", "------");
519 }
520
521 int
kmem_slabs(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)522 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
523 {
524 kmem_cache_t c;
525 kmem_slab_stats_t stats;
526 mdb_walk_cb_t cb;
527 int pct;
528 int tenths_pct;
529 size_t maxbuckets = 1;
530 size_t minbucketsize = 0;
531 const char *filter = NULL;
532 const char *name = NULL;
533 uint_t opt_v = FALSE;
534 boolean_t buckets = B_FALSE;
535 boolean_t skip = B_FALSE;
536
537 if (mdb_getopts(argc, argv,
538 'B', MDB_OPT_UINTPTR, &minbucketsize,
539 'b', MDB_OPT_UINTPTR, &maxbuckets,
540 'n', MDB_OPT_STR, &filter,
541 'N', MDB_OPT_STR, &name,
542 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
543 NULL) != argc) {
544 return (DCMD_USAGE);
545 }
546
547 if ((maxbuckets != 1) || (minbucketsize != 0)) {
548 buckets = B_TRUE;
549 }
550
551 if (!(flags & DCMD_ADDRSPEC)) {
552 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
553 argv) == -1) {
554 mdb_warn("can't walk kmem_cache");
555 return (DCMD_ERR);
556 }
557 return (DCMD_OK);
558 }
559
560 if (mdb_vread(&c, sizeof (c), addr) == -1) {
561 mdb_warn("couldn't read kmem_cache at %p", addr);
562 return (DCMD_ERR);
563 }
564
565 if (name == NULL) {
566 skip = ((filter != NULL) &&
567 (strstr(c.cache_name, filter) == NULL));
568 } else if (filter == NULL) {
569 skip = (strcmp(c.cache_name, name) != 0);
570 } else {
571 /* match either -n or -N */
572 skip = ((strcmp(c.cache_name, name) != 0) &&
573 (strstr(c.cache_name, filter) == NULL));
574 }
575
576 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
577 kmem_slabs_header();
578 } else if ((opt_v || buckets) && !skip) {
579 if (DCMD_HDRSPEC(flags)) {
580 kmem_slabs_header();
581 } else {
582 boolean_t is_slab = B_FALSE;
583 const char *walker_name;
584 if (opt_v) {
585 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
586 walker_name = "kmem_slab_partial";
587 } else {
588 cb = (mdb_walk_cb_t)kmem_first_slab;
589 walker_name = "kmem_slab";
590 }
591 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
592 if (is_slab) {
593 kmem_slabs_header();
594 }
595 }
596 }
597
598 if (skip) {
599 return (DCMD_OK);
600 }
601
602 bzero(&stats, sizeof (kmem_slab_stats_t));
603 stats.ks_cp = &c;
604 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
605 /* +1 to include a zero bucket */
606 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
607 sizeof (*stats.ks_bucket), UM_SLEEP);
608 cb = (mdb_walk_cb_t)kmem_slablist_stat;
609 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
610
611 if (c.cache_buftotal == 0) {
612 pct = 0;
613 tenths_pct = 0;
614 } else {
615 uint64_t n = stats.ks_unused_buffers * 10000;
616 pct = (int)(n / c.cache_buftotal);
617 tenths_pct = pct - ((pct / 100) * 100);
618 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
619 if (tenths_pct == 10) {
620 pct += 100;
621 tenths_pct = 0;
622 }
623 }
624
625 pct /= 100;
626 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
627 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
628 stats.ks_unused_buffers, pct, tenths_pct);
629
630 if (maxbuckets == 0) {
631 maxbuckets = stats.ks_max_buffers_per_slab;
632 }
633
634 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
635 (stats.ks_slabs > 0)) {
636 mdb_printf("\n");
637 kmem_slabs_print_dist(stats.ks_bucket,
638 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
639 }
640
641 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
642 sizeof (*stats.ks_bucket));
643
644 if (!opt_v) {
645 return (DCMD_OK);
646 }
647
648 if (opt_v && (stats.ks_partial_slabs > 0)) {
649 int i;
650 kmem_slab_usage_t *ksu;
651
652 mdb_printf(" %d complete (%d), %d partial:",
653 (stats.ks_slabs - stats.ks_partial_slabs),
654 stats.ks_max_buffers_per_slab,
655 stats.ks_partial_slabs);
656
657 for (i = 0; i < stats.ks_partial_slabs; i++) {
658 ksu = &stats.ks_usage[i];
659 mdb_printf(" %d%s", ksu->ksu_refcnt,
660 (ksu->ksu_nomove ? "*" : ""));
661 }
662 mdb_printf("\n\n");
663 }
664
665 if (stats.ks_usage_len > 0) {
666 mdb_free(stats.ks_usage,
667 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
668 }
669
670 return (DCMD_OK);
671 }
672
673 void
kmem_slabs_help(void)674 kmem_slabs_help(void)
675 {
676 mdb_printf("%s",
677 "Display slab usage per kmem cache.\n\n");
678 mdb_dec_indent(2);
679 mdb_printf("%<b>OPTIONS%</b>\n");
680 mdb_inc_indent(2);
681 mdb_printf("%s",
682 " -n name\n"
683 " name of kmem cache (or matching partial name)\n"
684 " -N name\n"
685 " exact name of kmem cache\n"
686 " -b maxbins\n"
687 " Print a distribution of allocated buffers per slab using at\n"
688 " most maxbins bins. The first bin is reserved for completely\n"
689 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
690 " effect as specifying the maximum allocated buffers per slab\n"
691 " or setting minbinsize to 1 (-B 1).\n"
692 " -B minbinsize\n"
693 " Print a distribution of allocated buffers per slab, making\n"
694 " all bins (except the first, reserved for completely allocated\n"
695 " slabs) at least minbinsize buffers apart.\n"
696 " -v verbose output: List the allocated buffer count of each partial\n"
697 " slab on the free list in order from front to back to show how\n"
698 " closely the slabs are ordered by usage. For example\n"
699 "\n"
700 " 10 complete, 3 partial (8): 7 3 1\n"
701 "\n"
702 " means there are thirteen slabs with eight buffers each, including\n"
703 " three partially allocated slabs with less than all eight buffers\n"
704 " allocated.\n"
705 "\n"
706 " Buffer allocations are always from the front of the partial slab\n"
707 " list. When a buffer is freed from a completely used slab, that\n"
708 " slab is added to the front of the partial slab list. Assuming\n"
709 " that all buffers are equally likely to be freed soon, the\n"
710 " desired order of partial slabs is most-used at the front of the\n"
711 " list and least-used at the back (as in the example above).\n"
712 " However, if a slab contains an allocated buffer that will not\n"
713 " soon be freed, it would be better for that slab to be at the\n"
714 " front where all of its buffers can be allocated. Taking a slab\n"
715 " off the partial slab list (either with all buffers freed or all\n"
716 " buffers allocated) reduces cache fragmentation.\n"
717 "\n"
718 " A slab's allocated buffer count representing a partial slab (9 in\n"
719 " the example below) may be marked as follows:\n"
720 "\n"
721 " 9* An asterisk indicates that kmem has marked the slab non-\n"
722 " reclaimable because the kmem client refused to move one of the\n"
723 " slab's buffers. Since kmem does not expect to completely free the\n"
724 " slab, it moves it to the front of the list in the hope of\n"
725 " completely allocating it instead. A slab marked with an asterisk\n"
726 " stays marked for as long as it remains on the partial slab list.\n"
727 "\n"
728 "Column\t\tDescription\n"
729 "\n"
730 "Cache Name\t\tname of kmem cache\n"
731 "Slabs\t\t\ttotal slab count\n"
732 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
733 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
734 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
735 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
736 "\t\t\t for accounting structures (debug mode), slab\n"
737 "\t\t\t coloring (incremental small offsets to stagger\n"
738 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
739 }
740
741 static int
addrcmp(const void * lhs,const void * rhs)742 addrcmp(const void *lhs, const void *rhs)
743 {
744 uintptr_t p1 = *((uintptr_t *)lhs);
745 uintptr_t p2 = *((uintptr_t *)rhs);
746
747 if (p1 < p2)
748 return (-1);
749 if (p1 > p2)
750 return (1);
751 return (0);
752 }
753
754 static int
bufctlcmp(const kmem_bufctl_audit_t ** lhs,const kmem_bufctl_audit_t ** rhs)755 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
756 {
757 const kmem_bufctl_audit_t *bcp1 = *lhs;
758 const kmem_bufctl_audit_t *bcp2 = *rhs;
759
760 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
761 return (-1);
762
763 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
764 return (1);
765
766 return (0);
767 }
768
769 typedef struct kmem_hash_walk {
770 uintptr_t *kmhw_table;
771 size_t kmhw_nelems;
772 size_t kmhw_pos;
773 kmem_bufctl_t kmhw_cur;
774 } kmem_hash_walk_t;
775
776 int
kmem_hash_walk_init(mdb_walk_state_t * wsp)777 kmem_hash_walk_init(mdb_walk_state_t *wsp)
778 {
779 kmem_hash_walk_t *kmhw;
780 uintptr_t *hash;
781 kmem_cache_t c;
782 uintptr_t haddr, addr = wsp->walk_addr;
783 size_t nelems;
784 size_t hsize;
785
786 if (addr == NULL) {
787 mdb_warn("kmem_hash doesn't support global walks\n");
788 return (WALK_ERR);
789 }
790
791 if (mdb_vread(&c, sizeof (c), addr) == -1) {
792 mdb_warn("couldn't read cache at addr %p", addr);
793 return (WALK_ERR);
794 }
795
796 if (!(c.cache_flags & KMF_HASH)) {
797 mdb_warn("cache %p doesn't have a hash table\n", addr);
798 return (WALK_DONE); /* nothing to do */
799 }
800
801 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
802 kmhw->kmhw_cur.bc_next = NULL;
803 kmhw->kmhw_pos = 0;
804
805 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
806 hsize = nelems * sizeof (uintptr_t);
807 haddr = (uintptr_t)c.cache_hash_table;
808
809 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
810 if (mdb_vread(hash, hsize, haddr) == -1) {
811 mdb_warn("failed to read hash table at %p", haddr);
812 mdb_free(hash, hsize);
813 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
814 return (WALK_ERR);
815 }
816
817 wsp->walk_data = kmhw;
818
819 return (WALK_NEXT);
820 }
821
822 int
kmem_hash_walk_step(mdb_walk_state_t * wsp)823 kmem_hash_walk_step(mdb_walk_state_t *wsp)
824 {
825 kmem_hash_walk_t *kmhw = wsp->walk_data;
826 uintptr_t addr = NULL;
827
828 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
829 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
830 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
831 break;
832 }
833 }
834 if (addr == NULL)
835 return (WALK_DONE);
836
837 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
838 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
839 return (WALK_ERR);
840 }
841
842 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
843 }
844
845 void
kmem_hash_walk_fini(mdb_walk_state_t * wsp)846 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
847 {
848 kmem_hash_walk_t *kmhw = wsp->walk_data;
849
850 if (kmhw == NULL)
851 return;
852
853 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
854 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
855 }
856
857 /*
858 * Find the address of the bufctl structure for the address 'buf' in cache
859 * 'cp', which is at address caddr, and place it in *out.
860 */
861 static int
kmem_hash_lookup(kmem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)862 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
863 {
864 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
865 kmem_bufctl_t *bcp;
866 kmem_bufctl_t bc;
867
868 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
869 mdb_warn("unable to read hash bucket for %p in cache %p",
870 buf, caddr);
871 return (-1);
872 }
873
874 while (bcp != NULL) {
875 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
876 (uintptr_t)bcp) == -1) {
877 mdb_warn("unable to read bufctl at %p", bcp);
878 return (-1);
879 }
880 if (bc.bc_addr == buf) {
881 *out = (uintptr_t)bcp;
882 return (0);
883 }
884 bcp = bc.bc_next;
885 }
886
887 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
888 return (-1);
889 }
890
891 int
kmem_get_magsize(const kmem_cache_t * cp)892 kmem_get_magsize(const kmem_cache_t *cp)
893 {
894 uintptr_t addr = (uintptr_t)cp->cache_magtype;
895 GElf_Sym mt_sym;
896 kmem_magtype_t mt;
897 int res;
898
899 /*
900 * if cpu 0 has a non-zero magsize, it must be correct. caches
901 * with KMF_NOMAGAZINE have disabled their magazine layers, so
902 * it is okay to return 0 for them.
903 */
904 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
905 (cp->cache_flags & KMF_NOMAGAZINE))
906 return (res);
907
908 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
909 mdb_warn("unable to read 'kmem_magtype'");
910 } else if (addr < mt_sym.st_value ||
911 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
912 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
913 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
914 cp->cache_name, addr);
915 return (0);
916 }
917 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
918 mdb_warn("unable to read magtype at %a", addr);
919 return (0);
920 }
921 return (mt.mt_magsize);
922 }
923
924 /*ARGSUSED*/
925 static int
kmem_estimate_slab(uintptr_t addr,const kmem_slab_t * sp,size_t * est)926 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
927 {
928 *est -= (sp->slab_chunks - sp->slab_refcnt);
929
930 return (WALK_NEXT);
931 }
932
933 /*
934 * Returns an upper bound on the number of allocated buffers in a given
935 * cache.
936 */
937 size_t
kmem_estimate_allocated(uintptr_t addr,const kmem_cache_t * cp)938 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
939 {
940 int magsize;
941 size_t cache_est;
942
943 cache_est = cp->cache_buftotal;
944
945 (void) mdb_pwalk("kmem_slab_partial",
946 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
947
948 if ((magsize = kmem_get_magsize(cp)) != 0) {
949 size_t mag_est = cp->cache_full.ml_total * magsize;
950
951 if (cache_est >= mag_est) {
952 cache_est -= mag_est;
953 } else {
954 mdb_warn("cache %p's magazine layer holds more buffers "
955 "than the slab layer.\n", addr);
956 }
957 }
958 return (cache_est);
959 }
960
961 #define READMAG_ROUNDS(rounds) { \
962 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
963 mdb_warn("couldn't read magazine at %p", kmp); \
964 goto fail; \
965 } \
966 for (i = 0; i < rounds; i++) { \
967 maglist[magcnt++] = mp->mag_round[i]; \
968 if (magcnt == magmax) { \
969 mdb_warn("%d magazines exceeds fudge factor\n", \
970 magcnt); \
971 goto fail; \
972 } \
973 } \
974 }
975
976 int
kmem_read_magazines(kmem_cache_t * cp,uintptr_t addr,int ncpus,void *** maglistp,size_t * magcntp,size_t * magmaxp,int alloc_flags)977 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
978 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
979 {
980 kmem_magazine_t *kmp, *mp;
981 void **maglist = NULL;
982 int i, cpu;
983 size_t magsize, magmax, magbsize;
984 size_t magcnt = 0;
985
986 /*
987 * Read the magtype out of the cache, after verifying the pointer's
988 * correctness.
989 */
990 magsize = kmem_get_magsize(cp);
991 if (magsize == 0) {
992 *maglistp = NULL;
993 *magcntp = 0;
994 *magmaxp = 0;
995 return (WALK_NEXT);
996 }
997
998 /*
999 * There are several places where we need to go buffer hunting:
1000 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1001 * and the full magazine list in the depot.
1002 *
1003 * For an upper bound on the number of buffers in the magazine
1004 * layer, we have the number of magazines on the cache_full
1005 * list plus at most two magazines per CPU (the loaded and the
1006 * spare). Toss in 100 magazines as a fudge factor in case this
1007 * is live (the number "100" comes from the same fudge factor in
1008 * crash(1M)).
1009 */
1010 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1011 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1012
1013 if (magbsize >= PAGESIZE / 2) {
1014 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1015 addr, magbsize);
1016 return (WALK_ERR);
1017 }
1018
1019 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1020 mp = mdb_alloc(magbsize, alloc_flags);
1021 if (mp == NULL || maglist == NULL)
1022 goto fail;
1023
1024 /*
1025 * First up: the magazines in the depot (i.e. on the cache_full list).
1026 */
1027 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1028 READMAG_ROUNDS(magsize);
1029 kmp = mp->mag_next;
1030
1031 if (kmp == cp->cache_full.ml_list)
1032 break; /* cache_full list loop detected */
1033 }
1034
1035 dprintf(("cache_full list done\n"));
1036
1037 /*
1038 * Now whip through the CPUs, snagging the loaded magazines
1039 * and full spares.
1040 *
1041 * In order to prevent inconsistent dumps, rounds and prounds
1042 * are copied aside before dumping begins.
1043 */
1044 for (cpu = 0; cpu < ncpus; cpu++) {
1045 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1046 short rounds, prounds;
1047
1048 if (KMEM_DUMPCC(ccp)) {
1049 rounds = ccp->cc_dump_rounds;
1050 prounds = ccp->cc_dump_prounds;
1051 } else {
1052 rounds = ccp->cc_rounds;
1053 prounds = ccp->cc_prounds;
1054 }
1055
1056 dprintf(("reading cpu cache %p\n",
1057 (uintptr_t)ccp - (uintptr_t)cp + addr));
1058
1059 if (rounds > 0 &&
1060 (kmp = ccp->cc_loaded) != NULL) {
1061 dprintf(("reading %d loaded rounds\n", rounds));
1062 READMAG_ROUNDS(rounds);
1063 }
1064
1065 if (prounds > 0 &&
1066 (kmp = ccp->cc_ploaded) != NULL) {
1067 dprintf(("reading %d previously loaded rounds\n",
1068 prounds));
1069 READMAG_ROUNDS(prounds);
1070 }
1071 }
1072
1073 dprintf(("magazine layer: %d buffers\n", magcnt));
1074
1075 if (!(alloc_flags & UM_GC))
1076 mdb_free(mp, magbsize);
1077
1078 *maglistp = maglist;
1079 *magcntp = magcnt;
1080 *magmaxp = magmax;
1081
1082 return (WALK_NEXT);
1083
1084 fail:
1085 if (!(alloc_flags & UM_GC)) {
1086 if (mp)
1087 mdb_free(mp, magbsize);
1088 if (maglist)
1089 mdb_free(maglist, magmax * sizeof (void *));
1090 }
1091 return (WALK_ERR);
1092 }
1093
1094 static int
kmem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1095 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1096 {
1097 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1098 }
1099
1100 static int
bufctl_walk_callback(kmem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1101 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1102 {
1103 kmem_bufctl_audit_t b;
1104
1105 /*
1106 * if KMF_AUDIT is not set, we know that we're looking at a
1107 * kmem_bufctl_t.
1108 */
1109 if (!(cp->cache_flags & KMF_AUDIT) ||
1110 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1111 (void) memset(&b, 0, sizeof (b));
1112 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1113 mdb_warn("unable to read bufctl at %p", buf);
1114 return (WALK_ERR);
1115 }
1116 }
1117
1118 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1119 }
1120
1121 typedef struct kmem_walk {
1122 int kmw_type;
1123
1124 int kmw_addr; /* cache address */
1125 kmem_cache_t *kmw_cp;
1126 size_t kmw_csize;
1127
1128 /*
1129 * magazine layer
1130 */
1131 void **kmw_maglist;
1132 size_t kmw_max;
1133 size_t kmw_count;
1134 size_t kmw_pos;
1135
1136 /*
1137 * slab layer
1138 */
1139 char *kmw_valid; /* to keep track of freed buffers */
1140 char *kmw_ubase; /* buffer for slab data */
1141 } kmem_walk_t;
1142
1143 static int
kmem_walk_init_common(mdb_walk_state_t * wsp,int type)1144 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1145 {
1146 kmem_walk_t *kmw;
1147 int ncpus, csize;
1148 kmem_cache_t *cp;
1149 size_t vm_quantum;
1150
1151 size_t magmax, magcnt;
1152 void **maglist = NULL;
1153 uint_t chunksize, slabsize;
1154 int status = WALK_ERR;
1155 uintptr_t addr = wsp->walk_addr;
1156 const char *layered;
1157
1158 type &= ~KM_HASH;
1159
1160 if (addr == NULL) {
1161 mdb_warn("kmem walk doesn't support global walks\n");
1162 return (WALK_ERR);
1163 }
1164
1165 dprintf(("walking %p\n", addr));
1166
1167 /*
1168 * First we need to figure out how many CPUs are configured in the
1169 * system to know how much to slurp out.
1170 */
1171 mdb_readvar(&ncpus, "max_ncpus");
1172
1173 csize = KMEM_CACHE_SIZE(ncpus);
1174 cp = mdb_alloc(csize, UM_SLEEP);
1175
1176 if (mdb_vread(cp, csize, addr) == -1) {
1177 mdb_warn("couldn't read cache at addr %p", addr);
1178 goto out2;
1179 }
1180
1181 /*
1182 * It's easy for someone to hand us an invalid cache address.
1183 * Unfortunately, it is hard for this walker to survive an
1184 * invalid cache cleanly. So we make sure that:
1185 *
1186 * 1. the vmem arena for the cache is readable,
1187 * 2. the vmem arena's quantum is a power of 2,
1188 * 3. our slabsize is a multiple of the quantum, and
1189 * 4. our chunksize is >0 and less than our slabsize.
1190 */
1191 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1192 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1193 vm_quantum == 0 ||
1194 (vm_quantum & (vm_quantum - 1)) != 0 ||
1195 cp->cache_slabsize < vm_quantum ||
1196 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1197 cp->cache_chunksize == 0 ||
1198 cp->cache_chunksize > cp->cache_slabsize) {
1199 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1200 goto out2;
1201 }
1202
1203 dprintf(("buf total is %d\n", cp->cache_buftotal));
1204
1205 if (cp->cache_buftotal == 0) {
1206 mdb_free(cp, csize);
1207 return (WALK_DONE);
1208 }
1209
1210 /*
1211 * If they ask for bufctls, but it's a small-slab cache,
1212 * there is nothing to report.
1213 */
1214 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1215 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1216 cp->cache_flags));
1217 mdb_free(cp, csize);
1218 return (WALK_DONE);
1219 }
1220
1221 /*
1222 * If they want constructed buffers, but there's no constructor or
1223 * the cache has DEADBEEF checking enabled, there is nothing to report.
1224 */
1225 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1226 cp->cache_constructor == NULL ||
1227 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1228 mdb_free(cp, csize);
1229 return (WALK_DONE);
1230 }
1231
1232 /*
1233 * Read in the contents of the magazine layer
1234 */
1235 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1236 &magmax, UM_SLEEP) == WALK_ERR)
1237 goto out2;
1238
1239 /*
1240 * We have all of the buffers from the magazines; if we are walking
1241 * allocated buffers, sort them so we can bsearch them later.
1242 */
1243 if (type & KM_ALLOCATED)
1244 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1245
1246 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1247
1248 kmw->kmw_type = type;
1249 kmw->kmw_addr = addr;
1250 kmw->kmw_cp = cp;
1251 kmw->kmw_csize = csize;
1252 kmw->kmw_maglist = maglist;
1253 kmw->kmw_max = magmax;
1254 kmw->kmw_count = magcnt;
1255 kmw->kmw_pos = 0;
1256
1257 /*
1258 * When walking allocated buffers in a KMF_HASH cache, we walk the
1259 * hash table instead of the slab layer.
1260 */
1261 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1262 layered = "kmem_hash";
1263
1264 kmw->kmw_type |= KM_HASH;
1265 } else {
1266 /*
1267 * If we are walking freed buffers, we only need the
1268 * magazine layer plus the partially allocated slabs.
1269 * To walk allocated buffers, we need all of the slabs.
1270 */
1271 if (type & KM_ALLOCATED)
1272 layered = "kmem_slab";
1273 else
1274 layered = "kmem_slab_partial";
1275
1276 /*
1277 * for small-slab caches, we read in the entire slab. For
1278 * freed buffers, we can just walk the freelist. For
1279 * allocated buffers, we use a 'valid' array to track
1280 * the freed buffers.
1281 */
1282 if (!(cp->cache_flags & KMF_HASH)) {
1283 chunksize = cp->cache_chunksize;
1284 slabsize = cp->cache_slabsize;
1285
1286 kmw->kmw_ubase = mdb_alloc(slabsize +
1287 sizeof (kmem_bufctl_t), UM_SLEEP);
1288
1289 if (type & KM_ALLOCATED)
1290 kmw->kmw_valid =
1291 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1292 }
1293 }
1294
1295 status = WALK_NEXT;
1296
1297 if (mdb_layered_walk(layered, wsp) == -1) {
1298 mdb_warn("unable to start layered '%s' walk", layered);
1299 status = WALK_ERR;
1300 }
1301
1302 out1:
1303 if (status == WALK_ERR) {
1304 if (kmw->kmw_valid)
1305 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1306
1307 if (kmw->kmw_ubase)
1308 mdb_free(kmw->kmw_ubase, slabsize +
1309 sizeof (kmem_bufctl_t));
1310
1311 if (kmw->kmw_maglist)
1312 mdb_free(kmw->kmw_maglist,
1313 kmw->kmw_max * sizeof (uintptr_t));
1314
1315 mdb_free(kmw, sizeof (kmem_walk_t));
1316 wsp->walk_data = NULL;
1317 }
1318
1319 out2:
1320 if (status == WALK_ERR)
1321 mdb_free(cp, csize);
1322
1323 return (status);
1324 }
1325
1326 int
kmem_walk_step(mdb_walk_state_t * wsp)1327 kmem_walk_step(mdb_walk_state_t *wsp)
1328 {
1329 kmem_walk_t *kmw = wsp->walk_data;
1330 int type = kmw->kmw_type;
1331 kmem_cache_t *cp = kmw->kmw_cp;
1332
1333 void **maglist = kmw->kmw_maglist;
1334 int magcnt = kmw->kmw_count;
1335
1336 uintptr_t chunksize, slabsize;
1337 uintptr_t addr;
1338 const kmem_slab_t *sp;
1339 const kmem_bufctl_t *bcp;
1340 kmem_bufctl_t bc;
1341
1342 int chunks;
1343 char *kbase;
1344 void *buf;
1345 int i, ret;
1346
1347 char *valid, *ubase;
1348
1349 /*
1350 * first, handle the 'kmem_hash' layered walk case
1351 */
1352 if (type & KM_HASH) {
1353 /*
1354 * We have a buffer which has been allocated out of the
1355 * global layer. We need to make sure that it's not
1356 * actually sitting in a magazine before we report it as
1357 * an allocated buffer.
1358 */
1359 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1360
1361 if (magcnt > 0 &&
1362 bsearch(&buf, maglist, magcnt, sizeof (void *),
1363 addrcmp) != NULL)
1364 return (WALK_NEXT);
1365
1366 if (type & KM_BUFCTL)
1367 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1368
1369 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1370 }
1371
1372 ret = WALK_NEXT;
1373
1374 addr = kmw->kmw_addr;
1375
1376 /*
1377 * If we're walking freed buffers, report everything in the
1378 * magazine layer before processing the first slab.
1379 */
1380 if ((type & KM_FREE) && magcnt != 0) {
1381 kmw->kmw_count = 0; /* only do this once */
1382 for (i = 0; i < magcnt; i++) {
1383 buf = maglist[i];
1384
1385 if (type & KM_BUFCTL) {
1386 uintptr_t out;
1387
1388 if (cp->cache_flags & KMF_BUFTAG) {
1389 kmem_buftag_t *btp;
1390 kmem_buftag_t tag;
1391
1392 /* LINTED - alignment */
1393 btp = KMEM_BUFTAG(cp, buf);
1394 if (mdb_vread(&tag, sizeof (tag),
1395 (uintptr_t)btp) == -1) {
1396 mdb_warn("reading buftag for "
1397 "%p at %p", buf, btp);
1398 continue;
1399 }
1400 out = (uintptr_t)tag.bt_bufctl;
1401 } else {
1402 if (kmem_hash_lookup(cp, addr, buf,
1403 &out) == -1)
1404 continue;
1405 }
1406 ret = bufctl_walk_callback(cp, wsp, out);
1407 } else {
1408 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1409 }
1410
1411 if (ret != WALK_NEXT)
1412 return (ret);
1413 }
1414 }
1415
1416 /*
1417 * If they want constructed buffers, we're finished, since the
1418 * magazine layer holds them all.
1419 */
1420 if (type & KM_CONSTRUCTED)
1421 return (WALK_DONE);
1422
1423 /*
1424 * Handle the buffers in the current slab
1425 */
1426 chunksize = cp->cache_chunksize;
1427 slabsize = cp->cache_slabsize;
1428
1429 sp = wsp->walk_layer;
1430 chunks = sp->slab_chunks;
1431 kbase = sp->slab_base;
1432
1433 dprintf(("kbase is %p\n", kbase));
1434
1435 if (!(cp->cache_flags & KMF_HASH)) {
1436 valid = kmw->kmw_valid;
1437 ubase = kmw->kmw_ubase;
1438
1439 if (mdb_vread(ubase, chunks * chunksize,
1440 (uintptr_t)kbase) == -1) {
1441 mdb_warn("failed to read slab contents at %p", kbase);
1442 return (WALK_ERR);
1443 }
1444
1445 /*
1446 * Set up the valid map as fully allocated -- we'll punch
1447 * out the freelist.
1448 */
1449 if (type & KM_ALLOCATED)
1450 (void) memset(valid, 1, chunks);
1451 } else {
1452 valid = NULL;
1453 ubase = NULL;
1454 }
1455
1456 /*
1457 * walk the slab's freelist
1458 */
1459 bcp = sp->slab_head;
1460
1461 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1462
1463 /*
1464 * since we could be in the middle of allocating a buffer,
1465 * our refcnt could be one higher than it aught. So we
1466 * check one further on the freelist than the count allows.
1467 */
1468 for (i = sp->slab_refcnt; i <= chunks; i++) {
1469 uint_t ndx;
1470
1471 dprintf(("bcp is %p\n", bcp));
1472
1473 if (bcp == NULL) {
1474 if (i == chunks)
1475 break;
1476 mdb_warn(
1477 "slab %p in cache %p freelist too short by %d\n",
1478 sp, addr, chunks - i);
1479 break;
1480 }
1481
1482 if (cp->cache_flags & KMF_HASH) {
1483 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1484 mdb_warn("failed to read bufctl ptr at %p",
1485 bcp);
1486 break;
1487 }
1488 buf = bc.bc_addr;
1489 } else {
1490 /*
1491 * Otherwise the buffer is in the slab which
1492 * we've read in; we just need to determine
1493 * its offset in the slab to find the
1494 * kmem_bufctl_t.
1495 */
1496 bc = *((kmem_bufctl_t *)
1497 ((uintptr_t)bcp - (uintptr_t)kbase +
1498 (uintptr_t)ubase));
1499
1500 buf = KMEM_BUF(cp, bcp);
1501 }
1502
1503 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1504
1505 if (ndx > slabsize / cp->cache_bufsize) {
1506 /*
1507 * This is very wrong; we have managed to find
1508 * a buffer in the slab which shouldn't
1509 * actually be here. Emit a warning, and
1510 * try to continue.
1511 */
1512 mdb_warn("buf %p is out of range for "
1513 "slab %p, cache %p\n", buf, sp, addr);
1514 } else if (type & KM_ALLOCATED) {
1515 /*
1516 * we have found a buffer on the slab's freelist;
1517 * clear its entry
1518 */
1519 valid[ndx] = 0;
1520 } else {
1521 /*
1522 * Report this freed buffer
1523 */
1524 if (type & KM_BUFCTL) {
1525 ret = bufctl_walk_callback(cp, wsp,
1526 (uintptr_t)bcp);
1527 } else {
1528 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1529 }
1530 if (ret != WALK_NEXT)
1531 return (ret);
1532 }
1533
1534 bcp = bc.bc_next;
1535 }
1536
1537 if (bcp != NULL) {
1538 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1539 sp, addr, bcp));
1540 }
1541
1542 /*
1543 * If we are walking freed buffers, the loop above handled reporting
1544 * them.
1545 */
1546 if (type & KM_FREE)
1547 return (WALK_NEXT);
1548
1549 if (type & KM_BUFCTL) {
1550 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1551 "cache %p\n", addr);
1552 return (WALK_ERR);
1553 }
1554
1555 /*
1556 * Report allocated buffers, skipping buffers in the magazine layer.
1557 * We only get this far for small-slab caches.
1558 */
1559 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1560 buf = (char *)kbase + i * chunksize;
1561
1562 if (!valid[i])
1563 continue; /* on slab freelist */
1564
1565 if (magcnt > 0 &&
1566 bsearch(&buf, maglist, magcnt, sizeof (void *),
1567 addrcmp) != NULL)
1568 continue; /* in magazine layer */
1569
1570 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1571 }
1572 return (ret);
1573 }
1574
1575 void
kmem_walk_fini(mdb_walk_state_t * wsp)1576 kmem_walk_fini(mdb_walk_state_t *wsp)
1577 {
1578 kmem_walk_t *kmw = wsp->walk_data;
1579 uintptr_t chunksize;
1580 uintptr_t slabsize;
1581
1582 if (kmw == NULL)
1583 return;
1584
1585 if (kmw->kmw_maglist != NULL)
1586 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1587
1588 chunksize = kmw->kmw_cp->cache_chunksize;
1589 slabsize = kmw->kmw_cp->cache_slabsize;
1590
1591 if (kmw->kmw_valid != NULL)
1592 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1593 if (kmw->kmw_ubase != NULL)
1594 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1595
1596 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1597 mdb_free(kmw, sizeof (kmem_walk_t));
1598 }
1599
1600 /*ARGSUSED*/
1601 static int
kmem_walk_all(uintptr_t addr,const kmem_cache_t * c,mdb_walk_state_t * wsp)1602 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1603 {
1604 /*
1605 * Buffers allocated from NOTOUCH caches can also show up as freed
1606 * memory in other caches. This can be a little confusing, so we
1607 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1608 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1609 */
1610 if (c->cache_cflags & KMC_NOTOUCH)
1611 return (WALK_NEXT);
1612
1613 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1614 wsp->walk_cbdata, addr) == -1)
1615 return (WALK_DONE);
1616
1617 return (WALK_NEXT);
1618 }
1619
1620 #define KMEM_WALK_ALL(name, wsp) { \
1621 wsp->walk_data = (name); \
1622 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1623 return (WALK_ERR); \
1624 return (WALK_DONE); \
1625 }
1626
1627 int
kmem_walk_init(mdb_walk_state_t * wsp)1628 kmem_walk_init(mdb_walk_state_t *wsp)
1629 {
1630 if (wsp->walk_arg != NULL)
1631 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1632
1633 if (wsp->walk_addr == NULL)
1634 KMEM_WALK_ALL("kmem", wsp);
1635 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1636 }
1637
1638 int
bufctl_walk_init(mdb_walk_state_t * wsp)1639 bufctl_walk_init(mdb_walk_state_t *wsp)
1640 {
1641 if (wsp->walk_addr == NULL)
1642 KMEM_WALK_ALL("bufctl", wsp);
1643 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1644 }
1645
1646 int
freemem_walk_init(mdb_walk_state_t * wsp)1647 freemem_walk_init(mdb_walk_state_t *wsp)
1648 {
1649 if (wsp->walk_addr == NULL)
1650 KMEM_WALK_ALL("freemem", wsp);
1651 return (kmem_walk_init_common(wsp, KM_FREE));
1652 }
1653
1654 int
freemem_constructed_walk_init(mdb_walk_state_t * wsp)1655 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1656 {
1657 if (wsp->walk_addr == NULL)
1658 KMEM_WALK_ALL("freemem_constructed", wsp);
1659 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1660 }
1661
1662 int
freectl_walk_init(mdb_walk_state_t * wsp)1663 freectl_walk_init(mdb_walk_state_t *wsp)
1664 {
1665 if (wsp->walk_addr == NULL)
1666 KMEM_WALK_ALL("freectl", wsp);
1667 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1668 }
1669
1670 int
freectl_constructed_walk_init(mdb_walk_state_t * wsp)1671 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1672 {
1673 if (wsp->walk_addr == NULL)
1674 KMEM_WALK_ALL("freectl_constructed", wsp);
1675 return (kmem_walk_init_common(wsp,
1676 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1677 }
1678
1679 typedef struct bufctl_history_walk {
1680 void *bhw_next;
1681 kmem_cache_t *bhw_cache;
1682 kmem_slab_t *bhw_slab;
1683 hrtime_t bhw_timestamp;
1684 } bufctl_history_walk_t;
1685
1686 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1687 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1688 {
1689 bufctl_history_walk_t *bhw;
1690 kmem_bufctl_audit_t bc;
1691 kmem_bufctl_audit_t bcn;
1692
1693 if (wsp->walk_addr == NULL) {
1694 mdb_warn("bufctl_history walk doesn't support global walks\n");
1695 return (WALK_ERR);
1696 }
1697
1698 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1699 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1700 return (WALK_ERR);
1701 }
1702
1703 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1704 bhw->bhw_timestamp = 0;
1705 bhw->bhw_cache = bc.bc_cache;
1706 bhw->bhw_slab = bc.bc_slab;
1707
1708 /*
1709 * sometimes the first log entry matches the base bufctl; in that
1710 * case, skip the base bufctl.
1711 */
1712 if (bc.bc_lastlog != NULL &&
1713 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1714 bc.bc_addr == bcn.bc_addr &&
1715 bc.bc_cache == bcn.bc_cache &&
1716 bc.bc_slab == bcn.bc_slab &&
1717 bc.bc_timestamp == bcn.bc_timestamp &&
1718 bc.bc_thread == bcn.bc_thread)
1719 bhw->bhw_next = bc.bc_lastlog;
1720 else
1721 bhw->bhw_next = (void *)wsp->walk_addr;
1722
1723 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1724 wsp->walk_data = bhw;
1725
1726 return (WALK_NEXT);
1727 }
1728
1729 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1730 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1731 {
1732 bufctl_history_walk_t *bhw = wsp->walk_data;
1733 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1734 uintptr_t baseaddr = wsp->walk_addr;
1735 kmem_bufctl_audit_t bc;
1736
1737 if (addr == NULL)
1738 return (WALK_DONE);
1739
1740 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1741 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1742 return (WALK_ERR);
1743 }
1744
1745 /*
1746 * The bufctl is only valid if the address, cache, and slab are
1747 * correct. We also check that the timestamp is decreasing, to
1748 * prevent infinite loops.
1749 */
1750 if ((uintptr_t)bc.bc_addr != baseaddr ||
1751 bc.bc_cache != bhw->bhw_cache ||
1752 bc.bc_slab != bhw->bhw_slab ||
1753 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1754 return (WALK_DONE);
1755
1756 bhw->bhw_next = bc.bc_lastlog;
1757 bhw->bhw_timestamp = bc.bc_timestamp;
1758
1759 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1760 }
1761
1762 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1763 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1764 {
1765 bufctl_history_walk_t *bhw = wsp->walk_data;
1766
1767 mdb_free(bhw, sizeof (*bhw));
1768 }
1769
1770 typedef struct kmem_log_walk {
1771 kmem_bufctl_audit_t *klw_base;
1772 kmem_bufctl_audit_t **klw_sorted;
1773 kmem_log_header_t klw_lh;
1774 size_t klw_size;
1775 size_t klw_maxndx;
1776 size_t klw_ndx;
1777 } kmem_log_walk_t;
1778
1779 int
kmem_log_walk_init(mdb_walk_state_t * wsp)1780 kmem_log_walk_init(mdb_walk_state_t *wsp)
1781 {
1782 uintptr_t lp = wsp->walk_addr;
1783 kmem_log_walk_t *klw;
1784 kmem_log_header_t *lhp;
1785 int maxndx, i, j, k;
1786
1787 /*
1788 * By default (global walk), walk the kmem_transaction_log. Otherwise
1789 * read the log whose kmem_log_header_t is stored at walk_addr.
1790 */
1791 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1792 mdb_warn("failed to read 'kmem_transaction_log'");
1793 return (WALK_ERR);
1794 }
1795
1796 if (lp == NULL) {
1797 mdb_warn("log is disabled\n");
1798 return (WALK_ERR);
1799 }
1800
1801 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1802 lhp = &klw->klw_lh;
1803
1804 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1805 mdb_warn("failed to read log header at %p", lp);
1806 mdb_free(klw, sizeof (kmem_log_walk_t));
1807 return (WALK_ERR);
1808 }
1809
1810 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1811 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1812 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1813
1814 if (mdb_vread(klw->klw_base, klw->klw_size,
1815 (uintptr_t)lhp->lh_base) == -1) {
1816 mdb_warn("failed to read log at base %p", lhp->lh_base);
1817 mdb_free(klw->klw_base, klw->klw_size);
1818 mdb_free(klw, sizeof (kmem_log_walk_t));
1819 return (WALK_ERR);
1820 }
1821
1822 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1823 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1824
1825 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1826 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1827 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1828
1829 for (j = 0; j < maxndx; j++)
1830 klw->klw_sorted[k++] = &chunk[j];
1831 }
1832
1833 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1834 (int(*)(const void *, const void *))bufctlcmp);
1835
1836 klw->klw_maxndx = k;
1837 wsp->walk_data = klw;
1838
1839 return (WALK_NEXT);
1840 }
1841
1842 int
kmem_log_walk_step(mdb_walk_state_t * wsp)1843 kmem_log_walk_step(mdb_walk_state_t *wsp)
1844 {
1845 kmem_log_walk_t *klw = wsp->walk_data;
1846 kmem_bufctl_audit_t *bcp;
1847
1848 if (klw->klw_ndx == klw->klw_maxndx)
1849 return (WALK_DONE);
1850
1851 bcp = klw->klw_sorted[klw->klw_ndx++];
1852
1853 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1854 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1855 }
1856
1857 void
kmem_log_walk_fini(mdb_walk_state_t * wsp)1858 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1859 {
1860 kmem_log_walk_t *klw = wsp->walk_data;
1861
1862 mdb_free(klw->klw_base, klw->klw_size);
1863 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1864 sizeof (kmem_bufctl_audit_t *));
1865 mdb_free(klw, sizeof (kmem_log_walk_t));
1866 }
1867
1868 typedef struct allocdby_bufctl {
1869 uintptr_t abb_addr;
1870 hrtime_t abb_ts;
1871 } allocdby_bufctl_t;
1872
1873 typedef struct allocdby_walk {
1874 const char *abw_walk;
1875 uintptr_t abw_thread;
1876 size_t abw_nbufs;
1877 size_t abw_size;
1878 allocdby_bufctl_t *abw_buf;
1879 size_t abw_ndx;
1880 } allocdby_walk_t;
1881
1882 int
allocdby_walk_bufctl(uintptr_t addr,const kmem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1883 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1884 allocdby_walk_t *abw)
1885 {
1886 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1887 return (WALK_NEXT);
1888
1889 if (abw->abw_nbufs == abw->abw_size) {
1890 allocdby_bufctl_t *buf;
1891 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1892
1893 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1894
1895 bcopy(abw->abw_buf, buf, oldsize);
1896 mdb_free(abw->abw_buf, oldsize);
1897
1898 abw->abw_size <<= 1;
1899 abw->abw_buf = buf;
1900 }
1901
1902 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1903 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1904 abw->abw_nbufs++;
1905
1906 return (WALK_NEXT);
1907 }
1908
1909 /*ARGSUSED*/
1910 int
allocdby_walk_cache(uintptr_t addr,const kmem_cache_t * c,allocdby_walk_t * abw)1911 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1912 {
1913 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1914 abw, addr) == -1) {
1915 mdb_warn("couldn't walk bufctl for cache %p", addr);
1916 return (WALK_DONE);
1917 }
1918
1919 return (WALK_NEXT);
1920 }
1921
1922 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1923 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1924 {
1925 if (lhs->abb_ts < rhs->abb_ts)
1926 return (1);
1927 if (lhs->abb_ts > rhs->abb_ts)
1928 return (-1);
1929 return (0);
1930 }
1931
1932 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1933 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1934 {
1935 allocdby_walk_t *abw;
1936
1937 if (wsp->walk_addr == NULL) {
1938 mdb_warn("allocdby walk doesn't support global walks\n");
1939 return (WALK_ERR);
1940 }
1941
1942 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1943
1944 abw->abw_thread = wsp->walk_addr;
1945 abw->abw_walk = walk;
1946 abw->abw_size = 128; /* something reasonable */
1947 abw->abw_buf =
1948 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1949
1950 wsp->walk_data = abw;
1951
1952 if (mdb_walk("kmem_cache",
1953 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1954 mdb_warn("couldn't walk kmem_cache");
1955 allocdby_walk_fini(wsp);
1956 return (WALK_ERR);
1957 }
1958
1959 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1960 (int(*)(const void *, const void *))allocdby_cmp);
1961
1962 return (WALK_NEXT);
1963 }
1964
1965 int
allocdby_walk_init(mdb_walk_state_t * wsp)1966 allocdby_walk_init(mdb_walk_state_t *wsp)
1967 {
1968 return (allocdby_walk_init_common(wsp, "bufctl"));
1969 }
1970
1971 int
freedby_walk_init(mdb_walk_state_t * wsp)1972 freedby_walk_init(mdb_walk_state_t *wsp)
1973 {
1974 return (allocdby_walk_init_common(wsp, "freectl"));
1975 }
1976
1977 int
allocdby_walk_step(mdb_walk_state_t * wsp)1978 allocdby_walk_step(mdb_walk_state_t *wsp)
1979 {
1980 allocdby_walk_t *abw = wsp->walk_data;
1981 kmem_bufctl_audit_t bc;
1982 uintptr_t addr;
1983
1984 if (abw->abw_ndx == abw->abw_nbufs)
1985 return (WALK_DONE);
1986
1987 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1988
1989 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1990 mdb_warn("couldn't read bufctl at %p", addr);
1991 return (WALK_DONE);
1992 }
1993
1994 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1995 }
1996
1997 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1998 allocdby_walk_fini(mdb_walk_state_t *wsp)
1999 {
2000 allocdby_walk_t *abw = wsp->walk_data;
2001
2002 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2003 mdb_free(abw, sizeof (allocdby_walk_t));
2004 }
2005
2006 /*ARGSUSED*/
2007 int
allocdby_walk(uintptr_t addr,const kmem_bufctl_audit_t * bcp,void * ignored)2008 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2009 {
2010 char c[MDB_SYM_NAMLEN];
2011 GElf_Sym sym;
2012 int i;
2013
2014 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2015 for (i = 0; i < bcp->bc_depth; i++) {
2016 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2017 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2018 continue;
2019 if (strncmp(c, "kmem_", 5) == 0)
2020 continue;
2021 mdb_printf("%s+0x%lx",
2022 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2023 break;
2024 }
2025 mdb_printf("\n");
2026
2027 return (WALK_NEXT);
2028 }
2029
2030 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)2031 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2032 {
2033 if (!(flags & DCMD_ADDRSPEC))
2034 return (DCMD_USAGE);
2035
2036 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2037
2038 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2039 mdb_warn("can't walk '%s' for %p", w, addr);
2040 return (DCMD_ERR);
2041 }
2042
2043 return (DCMD_OK);
2044 }
2045
2046 /*ARGSUSED*/
2047 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2048 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2049 {
2050 return (allocdby_common(addr, flags, "allocdby"));
2051 }
2052
2053 /*ARGSUSED*/
2054 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2055 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2056 {
2057 return (allocdby_common(addr, flags, "freedby"));
2058 }
2059
2060 /*
2061 * Return a string describing the address in relation to the given thread's
2062 * stack.
2063 *
2064 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2065 *
2066 * - If the address is above the stack pointer, return an empty string
2067 * signifying that the address is active.
2068 *
2069 * - If the address is below the stack pointer, and the thread is not on proc,
2070 * return " (below sp)".
2071 *
2072 * - If the address is below the stack pointer, and the thread is on proc,
2073 * return " (possibly below sp)". Depending on context, we may or may not
2074 * have an accurate t_sp.
2075 */
2076 static const char *
stack_active(const kthread_t * t,uintptr_t addr)2077 stack_active(const kthread_t *t, uintptr_t addr)
2078 {
2079 uintptr_t panicstk;
2080 GElf_Sym sym;
2081
2082 if (t->t_state == TS_FREE)
2083 return (" (inactive interrupt thread)");
2084
2085 /*
2086 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2087 * no longer relates to the thread's real stack.
2088 */
2089 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2090 panicstk = (uintptr_t)sym.st_value;
2091
2092 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2093 return ("");
2094 }
2095
2096 if (addr >= t->t_sp + STACK_BIAS)
2097 return ("");
2098
2099 if (t->t_state == TS_ONPROC)
2100 return (" (possibly below sp)");
2101
2102 return (" (below sp)");
2103 }
2104
2105 /*
2106 * Additional state for the kmem and vmem ::whatis handlers
2107 */
2108 typedef struct whatis_info {
2109 mdb_whatis_t *wi_w;
2110 const kmem_cache_t *wi_cache;
2111 const vmem_t *wi_vmem;
2112 vmem_t *wi_msb_arena;
2113 size_t wi_slab_size;
2114 uint_t wi_slab_found;
2115 uint_t wi_kmem_lite_count;
2116 uint_t wi_freemem;
2117 } whatis_info_t;
2118
2119 /* call one of our dcmd functions with "-v" and the provided address */
2120 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2121 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2122 {
2123 mdb_arg_t a;
2124 a.a_type = MDB_TYPE_STRING;
2125 a.a_un.a_str = "-v";
2126
2127 mdb_printf(":\n");
2128 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2129 }
2130
2131 static void
whatis_print_kmf_lite(uintptr_t btaddr,size_t count)2132 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2133 {
2134 #define KMEM_LITE_MAX 16
2135 pc_t callers[KMEM_LITE_MAX];
2136 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2137
2138 kmem_buftag_t bt;
2139 intptr_t stat;
2140 const char *plural = "";
2141 int i;
2142
2143 /* validate our arguments and read in the buftag */
2144 if (count == 0 || count > KMEM_LITE_MAX ||
2145 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2146 return;
2147
2148 /* validate the buffer state and read in the callers */
2149 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2150
2151 if (stat != KMEM_BUFTAG_ALLOC || stat != KMEM_BUFTAG_FREE ||
2152 mdb_vread(callers, count * sizeof (pc_t),
2153 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2154 return;
2155
2156 /* If there aren't any filled in callers, bail */
2157 if (callers[0] == uninit)
2158 return;
2159
2160 plural = (callers[1] == uninit) ? "" : "s";
2161
2162 /* Everything's done and checked; print them out */
2163 mdb_printf(":\n");
2164
2165 mdb_inc_indent(8);
2166 mdb_printf("recent caller%s: %a", plural, callers[0]);
2167 for (i = 1; i < count; i++) {
2168 if (callers[i] == uninit)
2169 break;
2170 mdb_printf(", %a", callers[i]);
2171 }
2172 mdb_dec_indent(8);
2173 }
2174
2175 static void
whatis_print_kmem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2176 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2177 uintptr_t baddr)
2178 {
2179 mdb_whatis_t *w = wi->wi_w;
2180
2181 const kmem_cache_t *cp = wi->wi_cache;
2182 /* LINTED pointer cast may result in improper alignment */
2183 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2184 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2185 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2186
2187 mdb_whatis_report_object(w, maddr, addr, "");
2188
2189 if (baddr != 0 && !call_printer)
2190 mdb_printf("bufctl %p ", baddr);
2191
2192 mdb_printf("%s from %s",
2193 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2194
2195 if (baddr != 0 && call_printer) {
2196 whatis_call_printer(bufctl, baddr);
2197 return;
2198 }
2199
2200 /* for KMF_LITE caches, try to print out the previous callers */
2201 if (!quiet && (cp->cache_flags & KMF_LITE))
2202 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2203
2204 mdb_printf("\n");
2205 }
2206
2207 /*ARGSUSED*/
2208 static int
whatis_walk_kmem(uintptr_t addr,void * ignored,whatis_info_t * wi)2209 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2210 {
2211 mdb_whatis_t *w = wi->wi_w;
2212
2213 uintptr_t cur;
2214 size_t size = wi->wi_cache->cache_bufsize;
2215
2216 while (mdb_whatis_match(w, addr, size, &cur))
2217 whatis_print_kmem(wi, cur, addr, NULL);
2218
2219 return (WHATIS_WALKRET(w));
2220 }
2221
2222 /*ARGSUSED*/
2223 static int
whatis_walk_bufctl(uintptr_t baddr,const kmem_bufctl_t * bcp,whatis_info_t * wi)2224 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2225 {
2226 mdb_whatis_t *w = wi->wi_w;
2227
2228 uintptr_t cur;
2229 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2230 size_t size = wi->wi_cache->cache_bufsize;
2231
2232 while (mdb_whatis_match(w, addr, size, &cur))
2233 whatis_print_kmem(wi, cur, addr, baddr);
2234
2235 return (WHATIS_WALKRET(w));
2236 }
2237
2238 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2239 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2240 {
2241 mdb_whatis_t *w = wi->wi_w;
2242
2243 size_t size = vs->vs_end - vs->vs_start;
2244 uintptr_t cur;
2245
2246 /* We're not interested in anything but alloc and free segments */
2247 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2248 return (WALK_NEXT);
2249
2250 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2251 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2252
2253 /*
2254 * If we're not printing it seperately, provide the vmem_seg
2255 * pointer if it has a stack trace.
2256 */
2257 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2258 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2259 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2260 mdb_printf("vmem_seg %p ", addr);
2261 }
2262
2263 mdb_printf("%s from the %s vmem arena",
2264 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2265 wi->wi_vmem->vm_name);
2266
2267 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2268 whatis_call_printer(vmem_seg, addr);
2269 else
2270 mdb_printf("\n");
2271 }
2272
2273 return (WHATIS_WALKRET(w));
2274 }
2275
2276 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2277 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2278 {
2279 mdb_whatis_t *w = wi->wi_w;
2280 const char *nm = vmem->vm_name;
2281
2282 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2283 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2284
2285 if (identifier != idspace)
2286 return (WALK_NEXT);
2287
2288 wi->wi_vmem = vmem;
2289
2290 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2291 mdb_printf("Searching vmem arena %s...\n", nm);
2292
2293 if (mdb_pwalk("vmem_seg",
2294 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2295 mdb_warn("can't walk vmem_seg for %p", addr);
2296 return (WALK_NEXT);
2297 }
2298
2299 return (WHATIS_WALKRET(w));
2300 }
2301
2302 /*ARGSUSED*/
2303 static int
whatis_walk_slab(uintptr_t saddr,const kmem_slab_t * sp,whatis_info_t * wi)2304 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2305 {
2306 mdb_whatis_t *w = wi->wi_w;
2307
2308 /* It must overlap with the slab data, or it's not interesting */
2309 if (mdb_whatis_overlaps(w,
2310 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2311 wi->wi_slab_found++;
2312 return (WALK_DONE);
2313 }
2314 return (WALK_NEXT);
2315 }
2316
2317 static int
whatis_walk_cache(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2318 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2319 {
2320 mdb_whatis_t *w = wi->wi_w;
2321
2322 char *walk, *freewalk;
2323 mdb_walk_cb_t func;
2324 int do_bufctl;
2325
2326 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2327 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2328
2329 if (identifier != idspace)
2330 return (WALK_NEXT);
2331
2332 /* Override the '-b' flag as necessary */
2333 if (!(c->cache_flags & KMF_HASH))
2334 do_bufctl = FALSE; /* no bufctls to walk */
2335 else if (c->cache_flags & KMF_AUDIT)
2336 do_bufctl = TRUE; /* we always want debugging info */
2337 else
2338 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2339
2340 if (do_bufctl) {
2341 walk = "bufctl";
2342 freewalk = "freectl";
2343 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2344 } else {
2345 walk = "kmem";
2346 freewalk = "freemem";
2347 func = (mdb_walk_cb_t)whatis_walk_kmem;
2348 }
2349
2350 wi->wi_cache = c;
2351
2352 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2353 mdb_printf("Searching %s...\n", c->cache_name);
2354
2355 /*
2356 * If more then two buffers live on each slab, figure out if we're
2357 * interested in anything in any slab before doing the more expensive
2358 * kmem/freemem (bufctl/freectl) walkers.
2359 */
2360 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2361 if (!(c->cache_flags & KMF_HASH))
2362 wi->wi_slab_size -= sizeof (kmem_slab_t);
2363
2364 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2365 wi->wi_slab_found = 0;
2366 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2367 addr) == -1) {
2368 mdb_warn("can't find kmem_slab walker");
2369 return (WALK_DONE);
2370 }
2371 if (wi->wi_slab_found == 0)
2372 return (WALK_NEXT);
2373 }
2374
2375 wi->wi_freemem = FALSE;
2376 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2377 mdb_warn("can't find %s walker", walk);
2378 return (WALK_DONE);
2379 }
2380
2381 if (mdb_whatis_done(w))
2382 return (WALK_DONE);
2383
2384 /*
2385 * We have searched for allocated memory; now search for freed memory.
2386 */
2387 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2388 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2389
2390 wi->wi_freemem = TRUE;
2391 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2392 mdb_warn("can't find %s walker", freewalk);
2393 return (WALK_DONE);
2394 }
2395
2396 return (WHATIS_WALKRET(w));
2397 }
2398
2399 static int
whatis_walk_touch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2400 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2401 {
2402 if (c->cache_arena == wi->wi_msb_arena ||
2403 (c->cache_cflags & KMC_NOTOUCH))
2404 return (WALK_NEXT);
2405
2406 return (whatis_walk_cache(addr, c, wi));
2407 }
2408
2409 static int
whatis_walk_metadata(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2410 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2411 {
2412 if (c->cache_arena != wi->wi_msb_arena)
2413 return (WALK_NEXT);
2414
2415 return (whatis_walk_cache(addr, c, wi));
2416 }
2417
2418 static int
whatis_walk_notouch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2419 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2420 {
2421 if (c->cache_arena == wi->wi_msb_arena ||
2422 !(c->cache_cflags & KMC_NOTOUCH))
2423 return (WALK_NEXT);
2424
2425 return (whatis_walk_cache(addr, c, wi));
2426 }
2427
2428 static int
whatis_walk_thread(uintptr_t addr,const kthread_t * t,mdb_whatis_t * w)2429 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2430 {
2431 uintptr_t cur;
2432 uintptr_t saddr;
2433 size_t size;
2434
2435 /*
2436 * Often, one calls ::whatis on an address from a thread structure.
2437 * We use this opportunity to short circuit this case...
2438 */
2439 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2440 mdb_whatis_report_object(w, cur, addr,
2441 "allocated as a thread structure\n");
2442
2443 /*
2444 * Now check the stack
2445 */
2446 if (t->t_stkbase == NULL)
2447 return (WALK_NEXT);
2448
2449 /*
2450 * This assumes that t_stk is the end of the stack, but it's really
2451 * only the initial stack pointer for the thread. Arguments to the
2452 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2453 * that 't->t_stk::whatis' reports "part of t's stack", we include
2454 * t_stk in the range (the "+ 1", below), but the kernel should
2455 * really include the full stack bounds where we can find it.
2456 */
2457 saddr = (uintptr_t)t->t_stkbase;
2458 size = (uintptr_t)t->t_stk - saddr + 1;
2459 while (mdb_whatis_match(w, saddr, size, &cur))
2460 mdb_whatis_report_object(w, cur, cur,
2461 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2462
2463 return (WHATIS_WALKRET(w));
2464 }
2465
2466 static void
whatis_modctl_match(mdb_whatis_t * w,const char * name,uintptr_t base,size_t size,const char * where)2467 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2468 uintptr_t base, size_t size, const char *where)
2469 {
2470 uintptr_t cur;
2471
2472 /*
2473 * Since we're searching for addresses inside a module, we report
2474 * them as symbols.
2475 */
2476 while (mdb_whatis_match(w, base, size, &cur))
2477 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2478 }
2479
2480 static int
whatis_walk_modctl(uintptr_t addr,const struct modctl * m,mdb_whatis_t * w)2481 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2482 {
2483 char name[MODMAXNAMELEN];
2484 struct module mod;
2485 Shdr shdr;
2486
2487 if (m->mod_mp == NULL)
2488 return (WALK_NEXT);
2489
2490 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2491 mdb_warn("couldn't read modctl %p's module", addr);
2492 return (WALK_NEXT);
2493 }
2494
2495 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2496 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2497
2498 whatis_modctl_match(w, name,
2499 (uintptr_t)mod.text, mod.text_size, "text segment");
2500 whatis_modctl_match(w, name,
2501 (uintptr_t)mod.data, mod.data_size, "data segment");
2502 whatis_modctl_match(w, name,
2503 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2504
2505 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2506 mdb_warn("couldn't read symbol header for %p's module", addr);
2507 return (WALK_NEXT);
2508 }
2509
2510 whatis_modctl_match(w, name,
2511 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2512 whatis_modctl_match(w, name,
2513 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2514
2515 return (WHATIS_WALKRET(w));
2516 }
2517
2518 /*ARGSUSED*/
2519 static int
whatis_walk_memseg(uintptr_t addr,const struct memseg * seg,mdb_whatis_t * w)2520 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2521 {
2522 uintptr_t cur;
2523
2524 uintptr_t base = (uintptr_t)seg->pages;
2525 size_t size = (uintptr_t)seg->epages - base;
2526
2527 while (mdb_whatis_match(w, base, size, &cur)) {
2528 /* round our found pointer down to the page_t base. */
2529 size_t offset = (cur - base) % sizeof (page_t);
2530
2531 mdb_whatis_report_object(w, cur, cur - offset,
2532 "allocated as a page structure\n");
2533 }
2534
2535 return (WHATIS_WALKRET(w));
2536 }
2537
2538 /*ARGSUSED*/
2539 static int
whatis_run_modules(mdb_whatis_t * w,void * arg)2540 whatis_run_modules(mdb_whatis_t *w, void *arg)
2541 {
2542 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2543 mdb_warn("couldn't find modctl walker");
2544 return (1);
2545 }
2546 return (0);
2547 }
2548
2549 /*ARGSUSED*/
2550 static int
whatis_run_threads(mdb_whatis_t * w,void * ignored)2551 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2552 {
2553 /*
2554 * Now search all thread stacks. Yes, this is a little weak; we
2555 * can save a lot of work by first checking to see if the
2556 * address is in segkp vs. segkmem. But hey, computers are
2557 * fast.
2558 */
2559 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2560 mdb_warn("couldn't find thread walker");
2561 return (1);
2562 }
2563 return (0);
2564 }
2565
2566 /*ARGSUSED*/
2567 static int
whatis_run_pages(mdb_whatis_t * w,void * ignored)2568 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2569 {
2570 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2571 mdb_warn("couldn't find memseg walker");
2572 return (1);
2573 }
2574 return (0);
2575 }
2576
2577 /*ARGSUSED*/
2578 static int
whatis_run_kmem(mdb_whatis_t * w,void * ignored)2579 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2580 {
2581 whatis_info_t wi;
2582
2583 bzero(&wi, sizeof (wi));
2584 wi.wi_w = w;
2585
2586 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2587 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2588
2589 if (mdb_readvar(&wi.wi_kmem_lite_count,
2590 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2591 wi.wi_kmem_lite_count = 0;
2592
2593 /*
2594 * We process kmem caches in the following order:
2595 *
2596 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2597 * metadata (can be huge with KMF_AUDIT)
2598 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2599 */
2600 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2601 &wi) == -1 ||
2602 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2603 &wi) == -1 ||
2604 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2605 &wi) == -1) {
2606 mdb_warn("couldn't find kmem_cache walker");
2607 return (1);
2608 }
2609 return (0);
2610 }
2611
2612 /*ARGSUSED*/
2613 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2614 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2615 {
2616 whatis_info_t wi;
2617
2618 bzero(&wi, sizeof (wi));
2619 wi.wi_w = w;
2620
2621 if (mdb_walk("vmem_postfix",
2622 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2623 mdb_warn("couldn't find vmem_postfix walker");
2624 return (1);
2625 }
2626 return (0);
2627 }
2628
2629 typedef struct kmem_log_cpu {
2630 uintptr_t kmc_low;
2631 uintptr_t kmc_high;
2632 } kmem_log_cpu_t;
2633
2634 typedef struct kmem_log_data {
2635 uintptr_t kmd_addr;
2636 kmem_log_cpu_t *kmd_cpu;
2637 } kmem_log_data_t;
2638
2639 int
kmem_log_walk(uintptr_t addr,const kmem_bufctl_audit_t * b,kmem_log_data_t * kmd)2640 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2641 kmem_log_data_t *kmd)
2642 {
2643 int i;
2644 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2645 size_t bufsize;
2646
2647 for (i = 0; i < NCPU; i++) {
2648 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2649 break;
2650 }
2651
2652 if (kmd->kmd_addr) {
2653 if (b->bc_cache == NULL)
2654 return (WALK_NEXT);
2655
2656 if (mdb_vread(&bufsize, sizeof (bufsize),
2657 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2658 mdb_warn(
2659 "failed to read cache_bufsize for cache at %p",
2660 b->bc_cache);
2661 return (WALK_ERR);
2662 }
2663
2664 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2665 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2666 return (WALK_NEXT);
2667 }
2668
2669 if (i == NCPU)
2670 mdb_printf(" ");
2671 else
2672 mdb_printf("%3d", i);
2673
2674 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2675 b->bc_timestamp, b->bc_thread);
2676
2677 return (WALK_NEXT);
2678 }
2679
2680 /*ARGSUSED*/
2681 int
kmem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2682 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2683 {
2684 kmem_log_header_t lh;
2685 kmem_cpu_log_header_t clh;
2686 uintptr_t lhp, clhp;
2687 int ncpus;
2688 uintptr_t *cpu;
2689 GElf_Sym sym;
2690 kmem_log_cpu_t *kmc;
2691 int i;
2692 kmem_log_data_t kmd;
2693 uint_t opt_b = FALSE;
2694
2695 if (mdb_getopts(argc, argv,
2696 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2697 return (DCMD_USAGE);
2698
2699 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2700 mdb_warn("failed to read 'kmem_transaction_log'");
2701 return (DCMD_ERR);
2702 }
2703
2704 if (lhp == NULL) {
2705 mdb_warn("no kmem transaction log\n");
2706 return (DCMD_ERR);
2707 }
2708
2709 mdb_readvar(&ncpus, "ncpus");
2710
2711 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2712 mdb_warn("failed to read log header at %p", lhp);
2713 return (DCMD_ERR);
2714 }
2715
2716 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2717
2718 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2719
2720 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2721 mdb_warn("couldn't find 'cpu' array");
2722 return (DCMD_ERR);
2723 }
2724
2725 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2726 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2727 NCPU * sizeof (uintptr_t), sym.st_size);
2728 return (DCMD_ERR);
2729 }
2730
2731 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2732 mdb_warn("failed to read cpu array at %p", sym.st_value);
2733 return (DCMD_ERR);
2734 }
2735
2736 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2737 kmd.kmd_addr = NULL;
2738 kmd.kmd_cpu = kmc;
2739
2740 for (i = 0; i < NCPU; i++) {
2741
2742 if (cpu[i] == NULL)
2743 continue;
2744
2745 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2746 mdb_warn("cannot read cpu %d's log header at %p",
2747 i, clhp);
2748 return (DCMD_ERR);
2749 }
2750
2751 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2752 (uintptr_t)lh.lh_base;
2753 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2754
2755 clhp += sizeof (kmem_cpu_log_header_t);
2756 }
2757
2758 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2759 "TIMESTAMP", "THREAD");
2760
2761 /*
2762 * If we have been passed an address, print out only log entries
2763 * corresponding to that address. If opt_b is specified, then interpret
2764 * the address as a bufctl.
2765 */
2766 if (flags & DCMD_ADDRSPEC) {
2767 kmem_bufctl_audit_t b;
2768
2769 if (opt_b) {
2770 kmd.kmd_addr = addr;
2771 } else {
2772 if (mdb_vread(&b,
2773 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2774 mdb_warn("failed to read bufctl at %p", addr);
2775 return (DCMD_ERR);
2776 }
2777
2778 (void) kmem_log_walk(addr, &b, &kmd);
2779
2780 return (DCMD_OK);
2781 }
2782 }
2783
2784 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2785 mdb_warn("can't find kmem log walker");
2786 return (DCMD_ERR);
2787 }
2788
2789 return (DCMD_OK);
2790 }
2791
2792 typedef struct bufctl_history_cb {
2793 int bhc_flags;
2794 int bhc_argc;
2795 const mdb_arg_t *bhc_argv;
2796 int bhc_ret;
2797 } bufctl_history_cb_t;
2798
2799 /*ARGSUSED*/
2800 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2801 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2802 {
2803 bufctl_history_cb_t *bhc = arg;
2804
2805 bhc->bhc_ret =
2806 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2807
2808 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2809
2810 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2811 }
2812
2813 void
bufctl_help(void)2814 bufctl_help(void)
2815 {
2816 mdb_printf("%s",
2817 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2818 mdb_dec_indent(2);
2819 mdb_printf("%<b>OPTIONS%</b>\n");
2820 mdb_inc_indent(2);
2821 mdb_printf("%s",
2822 " -v Display the full content of the bufctl, including its stack trace\n"
2823 " -h retrieve the bufctl's transaction history, if available\n"
2824 " -a addr\n"
2825 " filter out bufctls not involving the buffer at addr\n"
2826 " -c caller\n"
2827 " filter out bufctls without the function/PC in their stack trace\n"
2828 " -e earliest\n"
2829 " filter out bufctls timestamped before earliest\n"
2830 " -l latest\n"
2831 " filter out bufctls timestamped after latest\n"
2832 " -t thread\n"
2833 " filter out bufctls not involving thread\n");
2834 }
2835
2836 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2837 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2838 {
2839 kmem_bufctl_audit_t bc;
2840 uint_t verbose = FALSE;
2841 uint_t history = FALSE;
2842 uint_t in_history = FALSE;
2843 uintptr_t caller = NULL, thread = NULL;
2844 uintptr_t laddr, haddr, baddr = NULL;
2845 hrtime_t earliest = 0, latest = 0;
2846 int i, depth;
2847 char c[MDB_SYM_NAMLEN];
2848 GElf_Sym sym;
2849
2850 if (mdb_getopts(argc, argv,
2851 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2852 'h', MDB_OPT_SETBITS, TRUE, &history,
2853 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2854 'c', MDB_OPT_UINTPTR, &caller,
2855 't', MDB_OPT_UINTPTR, &thread,
2856 'e', MDB_OPT_UINT64, &earliest,
2857 'l', MDB_OPT_UINT64, &latest,
2858 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2859 return (DCMD_USAGE);
2860
2861 if (!(flags & DCMD_ADDRSPEC))
2862 return (DCMD_USAGE);
2863
2864 if (in_history && !history)
2865 return (DCMD_USAGE);
2866
2867 if (history && !in_history) {
2868 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2869 UM_SLEEP | UM_GC);
2870 bufctl_history_cb_t bhc;
2871
2872 nargv[0].a_type = MDB_TYPE_STRING;
2873 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2874
2875 for (i = 0; i < argc; i++)
2876 nargv[i + 1] = argv[i];
2877
2878 /*
2879 * When in history mode, we treat each element as if it
2880 * were in a seperate loop, so that the headers group
2881 * bufctls with similar histories.
2882 */
2883 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2884 bhc.bhc_argc = argc + 1;
2885 bhc.bhc_argv = nargv;
2886 bhc.bhc_ret = DCMD_OK;
2887
2888 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2889 addr) == -1) {
2890 mdb_warn("unable to walk bufctl_history");
2891 return (DCMD_ERR);
2892 }
2893
2894 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2895 mdb_printf("\n");
2896
2897 return (bhc.bhc_ret);
2898 }
2899
2900 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2901 if (verbose) {
2902 mdb_printf("%16s %16s %16s %16s\n"
2903 "%<u>%16s %16s %16s %16s%</u>\n",
2904 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2905 "", "CACHE", "LASTLOG", "CONTENTS");
2906 } else {
2907 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2908 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2909 }
2910 }
2911
2912 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2913 mdb_warn("couldn't read bufctl at %p", addr);
2914 return (DCMD_ERR);
2915 }
2916
2917 /*
2918 * Guard against bogus bc_depth in case the bufctl is corrupt or
2919 * the address does not really refer to a bufctl.
2920 */
2921 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2922
2923 if (caller != NULL) {
2924 laddr = caller;
2925 haddr = caller + sizeof (caller);
2926
2927 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2928 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2929 /*
2930 * We were provided an exact symbol value; any
2931 * address in the function is valid.
2932 */
2933 laddr = (uintptr_t)sym.st_value;
2934 haddr = (uintptr_t)sym.st_value + sym.st_size;
2935 }
2936
2937 for (i = 0; i < depth; i++)
2938 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2939 break;
2940
2941 if (i == depth)
2942 return (DCMD_OK);
2943 }
2944
2945 if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2946 return (DCMD_OK);
2947
2948 if (earliest != 0 && bc.bc_timestamp < earliest)
2949 return (DCMD_OK);
2950
2951 if (latest != 0 && bc.bc_timestamp > latest)
2952 return (DCMD_OK);
2953
2954 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2955 return (DCMD_OK);
2956
2957 if (flags & DCMD_PIPE_OUT) {
2958 mdb_printf("%#lr\n", addr);
2959 return (DCMD_OK);
2960 }
2961
2962 if (verbose) {
2963 mdb_printf(
2964 "%<b>%16p%</b> %16p %16llx %16p\n"
2965 "%16s %16p %16p %16p\n",
2966 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2967 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2968
2969 mdb_inc_indent(17);
2970 for (i = 0; i < depth; i++)
2971 mdb_printf("%a\n", bc.bc_stack[i]);
2972 mdb_dec_indent(17);
2973 mdb_printf("\n");
2974 } else {
2975 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2976 bc.bc_timestamp, bc.bc_thread);
2977
2978 for (i = 0; i < depth; i++) {
2979 if (mdb_lookup_by_addr(bc.bc_stack[i],
2980 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2981 continue;
2982 if (strncmp(c, "kmem_", 5) == 0)
2983 continue;
2984 mdb_printf(" %a\n", bc.bc_stack[i]);
2985 break;
2986 }
2987
2988 if (i >= depth)
2989 mdb_printf("\n");
2990 }
2991
2992 return (DCMD_OK);
2993 }
2994
2995 typedef struct kmem_verify {
2996 uint64_t *kmv_buf; /* buffer to read cache contents into */
2997 size_t kmv_size; /* number of bytes in kmv_buf */
2998 int kmv_corruption; /* > 0 if corruption found. */
2999 int kmv_besilent; /* report actual corruption sites */
3000 struct kmem_cache kmv_cache; /* the cache we're operating on */
3001 } kmem_verify_t;
3002
3003 /*
3004 * verify_pattern()
3005 * verify that buf is filled with the pattern pat.
3006 */
3007 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)3008 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3009 {
3010 /*LINTED*/
3011 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3012 uint64_t *buf;
3013
3014 for (buf = buf_arg; buf < bufend; buf++)
3015 if (*buf != pat)
3016 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3017 return (-1);
3018 }
3019
3020 /*
3021 * verify_buftag()
3022 * verify that btp->bt_bxstat == (bcp ^ pat)
3023 */
3024 static int
verify_buftag(kmem_buftag_t * btp,uintptr_t pat)3025 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3026 {
3027 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3028 }
3029
3030 /*
3031 * verify_free()
3032 * verify the integrity of a free block of memory by checking
3033 * that it is filled with 0xdeadbeef and that its buftag is sane.
3034 */
3035 /*ARGSUSED1*/
3036 static int
verify_free(uintptr_t addr,const void * data,void * private)3037 verify_free(uintptr_t addr, const void *data, void *private)
3038 {
3039 kmem_verify_t *kmv = (kmem_verify_t *)private;
3040 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3041 int64_t corrupt; /* corruption offset */
3042 kmem_buftag_t *buftagp; /* ptr to buftag */
3043 kmem_cache_t *cp = &kmv->kmv_cache;
3044 int besilent = kmv->kmv_besilent;
3045
3046 /*LINTED*/
3047 buftagp = KMEM_BUFTAG(cp, buf);
3048
3049 /*
3050 * Read the buffer to check.
3051 */
3052 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3053 if (!besilent)
3054 mdb_warn("couldn't read %p", addr);
3055 return (WALK_NEXT);
3056 }
3057
3058 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3059 KMEM_FREE_PATTERN)) >= 0) {
3060 if (!besilent)
3061 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3062 addr, (uintptr_t)addr + corrupt);
3063 goto corrupt;
3064 }
3065 /*
3066 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3067 * the first bytes of the buffer, hence we cannot check for red
3068 * zone corruption.
3069 */
3070 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3071 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3072 if (!besilent)
3073 mdb_printf("buffer %p (free) seems to "
3074 "have a corrupt redzone pattern\n", addr);
3075 goto corrupt;
3076 }
3077
3078 /*
3079 * confirm bufctl pointer integrity.
3080 */
3081 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3082 if (!besilent)
3083 mdb_printf("buffer %p (free) has a corrupt "
3084 "buftag\n", addr);
3085 goto corrupt;
3086 }
3087
3088 return (WALK_NEXT);
3089 corrupt:
3090 kmv->kmv_corruption++;
3091 return (WALK_NEXT);
3092 }
3093
3094 /*
3095 * verify_alloc()
3096 * Verify that the buftag of an allocated buffer makes sense with respect
3097 * to the buffer.
3098 */
3099 /*ARGSUSED1*/
3100 static int
verify_alloc(uintptr_t addr,const void * data,void * private)3101 verify_alloc(uintptr_t addr, const void *data, void *private)
3102 {
3103 kmem_verify_t *kmv = (kmem_verify_t *)private;
3104 kmem_cache_t *cp = &kmv->kmv_cache;
3105 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3106 /*LINTED*/
3107 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3108 uint32_t *ip = (uint32_t *)buftagp;
3109 uint8_t *bp = (uint8_t *)buf;
3110 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3111 int besilent = kmv->kmv_besilent;
3112
3113 /*
3114 * Read the buffer to check.
3115 */
3116 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3117 if (!besilent)
3118 mdb_warn("couldn't read %p", addr);
3119 return (WALK_NEXT);
3120 }
3121
3122 /*
3123 * There are two cases to handle:
3124 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3125 * 0xfeedfacefeedface at the end of it
3126 * 2. If the buf was alloc'd using kmem_alloc, it will have
3127 * 0xbb just past the end of the region in use. At the buftag,
3128 * it will have 0xfeedface (or, if the whole buffer is in use,
3129 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3130 * endianness), followed by 32 bits containing the offset of the
3131 * 0xbb byte in the buffer.
3132 *
3133 * Finally, the two 32-bit words that comprise the second half of the
3134 * buftag should xor to KMEM_BUFTAG_ALLOC
3135 */
3136
3137 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3138 looks_ok = 1;
3139 else if (!KMEM_SIZE_VALID(ip[1]))
3140 size_ok = 0;
3141 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3142 looks_ok = 1;
3143 else
3144 size_ok = 0;
3145
3146 if (!size_ok) {
3147 if (!besilent)
3148 mdb_printf("buffer %p (allocated) has a corrupt "
3149 "redzone size encoding\n", addr);
3150 goto corrupt;
3151 }
3152
3153 if (!looks_ok) {
3154 if (!besilent)
3155 mdb_printf("buffer %p (allocated) has a corrupt "
3156 "redzone signature\n", addr);
3157 goto corrupt;
3158 }
3159
3160 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3161 if (!besilent)
3162 mdb_printf("buffer %p (allocated) has a "
3163 "corrupt buftag\n", addr);
3164 goto corrupt;
3165 }
3166
3167 return (WALK_NEXT);
3168 corrupt:
3169 kmv->kmv_corruption++;
3170 return (WALK_NEXT);
3171 }
3172
3173 /*ARGSUSED2*/
3174 int
kmem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3175 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3176 {
3177 if (flags & DCMD_ADDRSPEC) {
3178 int check_alloc = 0, check_free = 0;
3179 kmem_verify_t kmv;
3180
3181 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3182 addr) == -1) {
3183 mdb_warn("couldn't read kmem_cache %p", addr);
3184 return (DCMD_ERR);
3185 }
3186
3187 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3188 sizeof (kmem_buftag_t);
3189 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3190 kmv.kmv_corruption = 0;
3191
3192 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3193 check_alloc = 1;
3194 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3195 check_free = 1;
3196 } else {
3197 if (!(flags & DCMD_LOOP)) {
3198 mdb_warn("cache %p (%s) does not have "
3199 "redzone checking enabled\n", addr,
3200 kmv.kmv_cache.cache_name);
3201 }
3202 return (DCMD_ERR);
3203 }
3204
3205 if (flags & DCMD_LOOP) {
3206 /*
3207 * table mode, don't print out every corrupt buffer
3208 */
3209 kmv.kmv_besilent = 1;
3210 } else {
3211 mdb_printf("Summary for cache '%s'\n",
3212 kmv.kmv_cache.cache_name);
3213 mdb_inc_indent(2);
3214 kmv.kmv_besilent = 0;
3215 }
3216
3217 if (check_alloc)
3218 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3219 if (check_free)
3220 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3221
3222 if (flags & DCMD_LOOP) {
3223 if (kmv.kmv_corruption == 0) {
3224 mdb_printf("%-*s %?p clean\n",
3225 KMEM_CACHE_NAMELEN,
3226 kmv.kmv_cache.cache_name, addr);
3227 } else {
3228 char *s = ""; /* optional s in "buffer[s]" */
3229 if (kmv.kmv_corruption > 1)
3230 s = "s";
3231
3232 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
3233 KMEM_CACHE_NAMELEN,
3234 kmv.kmv_cache.cache_name, addr,
3235 kmv.kmv_corruption, s);
3236 }
3237 } else {
3238 /*
3239 * This is the more verbose mode, when the user has
3240 * type addr::kmem_verify. If the cache was clean,
3241 * nothing will have yet been printed. So say something.
3242 */
3243 if (kmv.kmv_corruption == 0)
3244 mdb_printf("clean\n");
3245
3246 mdb_dec_indent(2);
3247 }
3248 } else {
3249 /*
3250 * If the user didn't specify a cache to verify, we'll walk all
3251 * kmem_cache's, specifying ourself as a callback for each...
3252 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3253 */
3254 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", KMEM_CACHE_NAMELEN,
3255 "Cache Name", "Addr", "Cache Integrity");
3256 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3257 }
3258
3259 return (DCMD_OK);
3260 }
3261
3262 typedef struct vmem_node {
3263 struct vmem_node *vn_next;
3264 struct vmem_node *vn_parent;
3265 struct vmem_node *vn_sibling;
3266 struct vmem_node *vn_children;
3267 uintptr_t vn_addr;
3268 int vn_marked;
3269 vmem_t vn_vmem;
3270 } vmem_node_t;
3271
3272 typedef struct vmem_walk {
3273 vmem_node_t *vw_root;
3274 vmem_node_t *vw_current;
3275 } vmem_walk_t;
3276
3277 int
vmem_walk_init(mdb_walk_state_t * wsp)3278 vmem_walk_init(mdb_walk_state_t *wsp)
3279 {
3280 uintptr_t vaddr, paddr;
3281 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3282 vmem_walk_t *vw;
3283
3284 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3285 mdb_warn("couldn't read 'vmem_list'");
3286 return (WALK_ERR);
3287 }
3288
3289 while (vaddr != NULL) {
3290 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3291 vp->vn_addr = vaddr;
3292 vp->vn_next = head;
3293 head = vp;
3294
3295 if (vaddr == wsp->walk_addr)
3296 current = vp;
3297
3298 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3299 mdb_warn("couldn't read vmem_t at %p", vaddr);
3300 goto err;
3301 }
3302
3303 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3304 }
3305
3306 for (vp = head; vp != NULL; vp = vp->vn_next) {
3307
3308 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3309 vp->vn_sibling = root;
3310 root = vp;
3311 continue;
3312 }
3313
3314 for (parent = head; parent != NULL; parent = parent->vn_next) {
3315 if (parent->vn_addr != paddr)
3316 continue;
3317 vp->vn_sibling = parent->vn_children;
3318 parent->vn_children = vp;
3319 vp->vn_parent = parent;
3320 break;
3321 }
3322
3323 if (parent == NULL) {
3324 mdb_warn("couldn't find %p's parent (%p)\n",
3325 vp->vn_addr, paddr);
3326 goto err;
3327 }
3328 }
3329
3330 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3331 vw->vw_root = root;
3332
3333 if (current != NULL)
3334 vw->vw_current = current;
3335 else
3336 vw->vw_current = root;
3337
3338 wsp->walk_data = vw;
3339 return (WALK_NEXT);
3340 err:
3341 for (vp = head; head != NULL; vp = head) {
3342 head = vp->vn_next;
3343 mdb_free(vp, sizeof (vmem_node_t));
3344 }
3345
3346 return (WALK_ERR);
3347 }
3348
3349 int
vmem_walk_step(mdb_walk_state_t * wsp)3350 vmem_walk_step(mdb_walk_state_t *wsp)
3351 {
3352 vmem_walk_t *vw = wsp->walk_data;
3353 vmem_node_t *vp;
3354 int rval;
3355
3356 if ((vp = vw->vw_current) == NULL)
3357 return (WALK_DONE);
3358
3359 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3360
3361 if (vp->vn_children != NULL) {
3362 vw->vw_current = vp->vn_children;
3363 return (rval);
3364 }
3365
3366 do {
3367 vw->vw_current = vp->vn_sibling;
3368 vp = vp->vn_parent;
3369 } while (vw->vw_current == NULL && vp != NULL);
3370
3371 return (rval);
3372 }
3373
3374 /*
3375 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3376 * children are visited before their parent. We perform the postfix walk
3377 * iteratively (rather than recursively) to allow mdb to regain control
3378 * after each callback.
3379 */
3380 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3381 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3382 {
3383 vmem_walk_t *vw = wsp->walk_data;
3384 vmem_node_t *vp = vw->vw_current;
3385 int rval;
3386
3387 /*
3388 * If this node is marked, then we know that we have already visited
3389 * all of its children. If the node has any siblings, they need to
3390 * be visited next; otherwise, we need to visit the parent. Note
3391 * that vp->vn_marked will only be zero on the first invocation of
3392 * the step function.
3393 */
3394 if (vp->vn_marked) {
3395 if (vp->vn_sibling != NULL)
3396 vp = vp->vn_sibling;
3397 else if (vp->vn_parent != NULL)
3398 vp = vp->vn_parent;
3399 else {
3400 /*
3401 * We have neither a parent, nor a sibling, and we
3402 * have already been visited; we're done.
3403 */
3404 return (WALK_DONE);
3405 }
3406 }
3407
3408 /*
3409 * Before we visit this node, visit its children.
3410 */
3411 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3412 vp = vp->vn_children;
3413
3414 vp->vn_marked = 1;
3415 vw->vw_current = vp;
3416 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3417
3418 return (rval);
3419 }
3420
3421 void
vmem_walk_fini(mdb_walk_state_t * wsp)3422 vmem_walk_fini(mdb_walk_state_t *wsp)
3423 {
3424 vmem_walk_t *vw = wsp->walk_data;
3425 vmem_node_t *root = vw->vw_root;
3426 int done;
3427
3428 if (root == NULL)
3429 return;
3430
3431 if ((vw->vw_root = root->vn_children) != NULL)
3432 vmem_walk_fini(wsp);
3433
3434 vw->vw_root = root->vn_sibling;
3435 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3436 mdb_free(root, sizeof (vmem_node_t));
3437
3438 if (done) {
3439 mdb_free(vw, sizeof (vmem_walk_t));
3440 } else {
3441 vmem_walk_fini(wsp);
3442 }
3443 }
3444
3445 typedef struct vmem_seg_walk {
3446 uint8_t vsw_type;
3447 uintptr_t vsw_start;
3448 uintptr_t vsw_current;
3449 } vmem_seg_walk_t;
3450
3451 /*ARGSUSED*/
3452 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3453 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3454 {
3455 vmem_seg_walk_t *vsw;
3456
3457 if (wsp->walk_addr == NULL) {
3458 mdb_warn("vmem_%s does not support global walks\n", name);
3459 return (WALK_ERR);
3460 }
3461
3462 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3463
3464 vsw->vsw_type = type;
3465 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3466 vsw->vsw_current = vsw->vsw_start;
3467
3468 return (WALK_NEXT);
3469 }
3470
3471 /*
3472 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3473 */
3474 #define VMEM_NONE 0
3475
3476 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3477 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3478 {
3479 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3480 }
3481
3482 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3483 vmem_free_walk_init(mdb_walk_state_t *wsp)
3484 {
3485 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3486 }
3487
3488 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3489 vmem_span_walk_init(mdb_walk_state_t *wsp)
3490 {
3491 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3492 }
3493
3494 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3495 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3496 {
3497 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3498 }
3499
3500 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3501 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3502 {
3503 vmem_seg_t seg;
3504 vmem_seg_walk_t *vsw = wsp->walk_data;
3505 uintptr_t addr = vsw->vsw_current;
3506 static size_t seg_size = 0;
3507 int rval;
3508
3509 if (!seg_size) {
3510 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3511 mdb_warn("failed to read 'vmem_seg_size'");
3512 seg_size = sizeof (vmem_seg_t);
3513 }
3514 }
3515
3516 if (seg_size < sizeof (seg))
3517 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3518
3519 if (mdb_vread(&seg, seg_size, addr) == -1) {
3520 mdb_warn("couldn't read vmem_seg at %p", addr);
3521 return (WALK_ERR);
3522 }
3523
3524 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3525 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3526 rval = WALK_NEXT;
3527 } else {
3528 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3529 }
3530
3531 if (vsw->vsw_current == vsw->vsw_start)
3532 return (WALK_DONE);
3533
3534 return (rval);
3535 }
3536
3537 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3538 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3539 {
3540 vmem_seg_walk_t *vsw = wsp->walk_data;
3541
3542 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3543 }
3544
3545 #define VMEM_NAMEWIDTH 22
3546
3547 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3548 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3549 {
3550 vmem_t v, parent;
3551 vmem_kstat_t *vkp = &v.vm_kstat;
3552 uintptr_t paddr;
3553 int ident = 0;
3554 char c[VMEM_NAMEWIDTH];
3555
3556 if (!(flags & DCMD_ADDRSPEC)) {
3557 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3558 mdb_warn("can't walk vmem");
3559 return (DCMD_ERR);
3560 }
3561 return (DCMD_OK);
3562 }
3563
3564 if (DCMD_HDRSPEC(flags))
3565 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3566 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3567 "TOTAL", "SUCCEED", "FAIL");
3568
3569 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3570 mdb_warn("couldn't read vmem at %p", addr);
3571 return (DCMD_ERR);
3572 }
3573
3574 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3575 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3576 mdb_warn("couldn't trace %p's ancestry", addr);
3577 ident = 0;
3578 break;
3579 }
3580 paddr = (uintptr_t)parent.vm_source;
3581 }
3582
3583 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3584
3585 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3586 addr, VMEM_NAMEWIDTH, c,
3587 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3588 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3589
3590 return (DCMD_OK);
3591 }
3592
3593 void
vmem_seg_help(void)3594 vmem_seg_help(void)
3595 {
3596 mdb_printf("%s",
3597 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3598 "\n"
3599 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3600 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3601 "information.\n");
3602 mdb_dec_indent(2);
3603 mdb_printf("%<b>OPTIONS%</b>\n");
3604 mdb_inc_indent(2);
3605 mdb_printf("%s",
3606 " -v Display the full content of the vmem_seg, including its stack trace\n"
3607 " -s report the size of the segment, instead of the end address\n"
3608 " -c caller\n"
3609 " filter out segments without the function/PC in their stack trace\n"
3610 " -e earliest\n"
3611 " filter out segments timestamped before earliest\n"
3612 " -l latest\n"
3613 " filter out segments timestamped after latest\n"
3614 " -m minsize\n"
3615 " filer out segments smaller than minsize\n"
3616 " -M maxsize\n"
3617 " filer out segments larger than maxsize\n"
3618 " -t thread\n"
3619 " filter out segments not involving thread\n"
3620 " -T type\n"
3621 " filter out segments not of type 'type'\n"
3622 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3623 }
3624
3625 /*ARGSUSED*/
3626 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3627 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3628 {
3629 vmem_seg_t vs;
3630 pc_t *stk = vs.vs_stack;
3631 uintptr_t sz;
3632 uint8_t t;
3633 const char *type = NULL;
3634 GElf_Sym sym;
3635 char c[MDB_SYM_NAMLEN];
3636 int no_debug;
3637 int i;
3638 int depth;
3639 uintptr_t laddr, haddr;
3640
3641 uintptr_t caller = NULL, thread = NULL;
3642 uintptr_t minsize = 0, maxsize = 0;
3643
3644 hrtime_t earliest = 0, latest = 0;
3645
3646 uint_t size = 0;
3647 uint_t verbose = 0;
3648
3649 if (!(flags & DCMD_ADDRSPEC))
3650 return (DCMD_USAGE);
3651
3652 if (mdb_getopts(argc, argv,
3653 'c', MDB_OPT_UINTPTR, &caller,
3654 'e', MDB_OPT_UINT64, &earliest,
3655 'l', MDB_OPT_UINT64, &latest,
3656 's', MDB_OPT_SETBITS, TRUE, &size,
3657 'm', MDB_OPT_UINTPTR, &minsize,
3658 'M', MDB_OPT_UINTPTR, &maxsize,
3659 't', MDB_OPT_UINTPTR, &thread,
3660 'T', MDB_OPT_STR, &type,
3661 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3662 NULL) != argc)
3663 return (DCMD_USAGE);
3664
3665 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3666 if (verbose) {
3667 mdb_printf("%16s %4s %16s %16s %16s\n"
3668 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3669 "ADDR", "TYPE", "START", "END", "SIZE",
3670 "", "", "THREAD", "TIMESTAMP", "");
3671 } else {
3672 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3673 "START", size? "SIZE" : "END", "WHO");
3674 }
3675 }
3676
3677 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3678 mdb_warn("couldn't read vmem_seg at %p", addr);
3679 return (DCMD_ERR);
3680 }
3681
3682 if (type != NULL) {
3683 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3684 t = VMEM_ALLOC;
3685 else if (strcmp(type, "FREE") == 0)
3686 t = VMEM_FREE;
3687 else if (strcmp(type, "SPAN") == 0)
3688 t = VMEM_SPAN;
3689 else if (strcmp(type, "ROTR") == 0 ||
3690 strcmp(type, "ROTOR") == 0)
3691 t = VMEM_ROTOR;
3692 else if (strcmp(type, "WLKR") == 0 ||
3693 strcmp(type, "WALKER") == 0)
3694 t = VMEM_WALKER;
3695 else {
3696 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3697 type);
3698 return (DCMD_ERR);
3699 }
3700
3701 if (vs.vs_type != t)
3702 return (DCMD_OK);
3703 }
3704
3705 sz = vs.vs_end - vs.vs_start;
3706
3707 if (minsize != 0 && sz < minsize)
3708 return (DCMD_OK);
3709
3710 if (maxsize != 0 && sz > maxsize)
3711 return (DCMD_OK);
3712
3713 t = vs.vs_type;
3714 depth = vs.vs_depth;
3715
3716 /*
3717 * debug info, when present, is only accurate for VMEM_ALLOC segments
3718 */
3719 no_debug = (t != VMEM_ALLOC) ||
3720 (depth == 0 || depth > VMEM_STACK_DEPTH);
3721
3722 if (no_debug) {
3723 if (caller != NULL || thread != NULL || earliest != 0 ||
3724 latest != 0)
3725 return (DCMD_OK); /* not enough info */
3726 } else {
3727 if (caller != NULL) {
3728 laddr = caller;
3729 haddr = caller + sizeof (caller);
3730
3731 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3732 sizeof (c), &sym) != -1 &&
3733 caller == (uintptr_t)sym.st_value) {
3734 /*
3735 * We were provided an exact symbol value; any
3736 * address in the function is valid.
3737 */
3738 laddr = (uintptr_t)sym.st_value;
3739 haddr = (uintptr_t)sym.st_value + sym.st_size;
3740 }
3741
3742 for (i = 0; i < depth; i++)
3743 if (vs.vs_stack[i] >= laddr &&
3744 vs.vs_stack[i] < haddr)
3745 break;
3746
3747 if (i == depth)
3748 return (DCMD_OK);
3749 }
3750
3751 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3752 return (DCMD_OK);
3753
3754 if (earliest != 0 && vs.vs_timestamp < earliest)
3755 return (DCMD_OK);
3756
3757 if (latest != 0 && vs.vs_timestamp > latest)
3758 return (DCMD_OK);
3759 }
3760
3761 type = (t == VMEM_ALLOC ? "ALLC" :
3762 t == VMEM_FREE ? "FREE" :
3763 t == VMEM_SPAN ? "SPAN" :
3764 t == VMEM_ROTOR ? "ROTR" :
3765 t == VMEM_WALKER ? "WLKR" :
3766 "????");
3767
3768 if (flags & DCMD_PIPE_OUT) {
3769 mdb_printf("%#lr\n", addr);
3770 return (DCMD_OK);
3771 }
3772
3773 if (verbose) {
3774 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3775 addr, type, vs.vs_start, vs.vs_end, sz);
3776
3777 if (no_debug)
3778 return (DCMD_OK);
3779
3780 mdb_printf("%16s %4s %16p %16llx\n",
3781 "", "", vs.vs_thread, vs.vs_timestamp);
3782
3783 mdb_inc_indent(17);
3784 for (i = 0; i < depth; i++) {
3785 mdb_printf("%a\n", stk[i]);
3786 }
3787 mdb_dec_indent(17);
3788 mdb_printf("\n");
3789 } else {
3790 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3791 vs.vs_start, size? sz : vs.vs_end);
3792
3793 if (no_debug) {
3794 mdb_printf("\n");
3795 return (DCMD_OK);
3796 }
3797
3798 for (i = 0; i < depth; i++) {
3799 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3800 c, sizeof (c), &sym) == -1)
3801 continue;
3802 if (strncmp(c, "vmem_", 5) == 0)
3803 continue;
3804 break;
3805 }
3806 mdb_printf(" %a\n", stk[i]);
3807 }
3808 return (DCMD_OK);
3809 }
3810
3811 typedef struct kmalog_data {
3812 uintptr_t kma_addr;
3813 hrtime_t kma_newest;
3814 } kmalog_data_t;
3815
3816 /*ARGSUSED*/
3817 static int
showbc(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmalog_data_t * kma)3818 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3819 {
3820 char name[KMEM_CACHE_NAMELEN + 1];
3821 hrtime_t delta;
3822 int i, depth;
3823 size_t bufsize;
3824
3825 if (bcp->bc_timestamp == 0)
3826 return (WALK_DONE);
3827
3828 if (kma->kma_newest == 0)
3829 kma->kma_newest = bcp->bc_timestamp;
3830
3831 if (kma->kma_addr) {
3832 if (mdb_vread(&bufsize, sizeof (bufsize),
3833 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3834 mdb_warn(
3835 "failed to read cache_bufsize for cache at %p",
3836 bcp->bc_cache);
3837 return (WALK_ERR);
3838 }
3839
3840 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3841 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3842 return (WALK_NEXT);
3843 }
3844
3845 delta = kma->kma_newest - bcp->bc_timestamp;
3846 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3847
3848 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3849 &bcp->bc_cache->cache_name) <= 0)
3850 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3851
3852 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3853 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3854
3855 for (i = 0; i < depth; i++)
3856 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3857
3858 return (WALK_NEXT);
3859 }
3860
3861 int
kmalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3862 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3863 {
3864 const char *logname = "kmem_transaction_log";
3865 kmalog_data_t kma;
3866
3867 if (argc > 1)
3868 return (DCMD_USAGE);
3869
3870 kma.kma_newest = 0;
3871 if (flags & DCMD_ADDRSPEC)
3872 kma.kma_addr = addr;
3873 else
3874 kma.kma_addr = NULL;
3875
3876 if (argc > 0) {
3877 if (argv->a_type != MDB_TYPE_STRING)
3878 return (DCMD_USAGE);
3879 if (strcmp(argv->a_un.a_str, "fail") == 0)
3880 logname = "kmem_failure_log";
3881 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3882 logname = "kmem_slab_log";
3883 else
3884 return (DCMD_USAGE);
3885 }
3886
3887 if (mdb_readvar(&addr, logname) == -1) {
3888 mdb_warn("failed to read %s log header pointer");
3889 return (DCMD_ERR);
3890 }
3891
3892 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3893 mdb_warn("failed to walk kmem log");
3894 return (DCMD_ERR);
3895 }
3896
3897 return (DCMD_OK);
3898 }
3899
3900 /*
3901 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3902 * The first piece is a structure which we use to accumulate kmem_cache_t
3903 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3904 * walker; we either add all caches, or ones named explicitly as arguments.
3905 */
3906
3907 typedef struct kmclist {
3908 const char *kmc_name; /* Name to match (or NULL) */
3909 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3910 int kmc_nelems; /* Num entries in kmc_caches */
3911 int kmc_size; /* Size of kmc_caches array */
3912 } kmclist_t;
3913
3914 static int
kmc_add(uintptr_t addr,const kmem_cache_t * cp,kmclist_t * kmc)3915 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3916 {
3917 void *p;
3918 int s;
3919
3920 if (kmc->kmc_name == NULL ||
3921 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3922 /*
3923 * If we have a match, grow our array (if necessary), and then
3924 * add the virtual address of the matching cache to our list.
3925 */
3926 if (kmc->kmc_nelems >= kmc->kmc_size) {
3927 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3928 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3929
3930 bcopy(kmc->kmc_caches, p,
3931 sizeof (uintptr_t) * kmc->kmc_size);
3932
3933 kmc->kmc_caches = p;
3934 kmc->kmc_size = s;
3935 }
3936
3937 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3938 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3939 }
3940
3941 return (WALK_NEXT);
3942 }
3943
3944 /*
3945 * The second piece of ::kmausers is a hash table of allocations. Each
3946 * allocation owner is identified by its stack trace and data_size. We then
3947 * track the total bytes of all such allocations, and the number of allocations
3948 * to report at the end. Once we have a list of caches, we walk through the
3949 * allocated bufctls of each, and update our hash table accordingly.
3950 */
3951
3952 typedef struct kmowner {
3953 struct kmowner *kmo_head; /* First hash elt in bucket */
3954 struct kmowner *kmo_next; /* Next hash elt in chain */
3955 size_t kmo_signature; /* Hash table signature */
3956 uint_t kmo_num; /* Number of allocations */
3957 size_t kmo_data_size; /* Size of each allocation */
3958 size_t kmo_total_size; /* Total bytes of allocation */
3959 int kmo_depth; /* Depth of stack trace */
3960 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
3961 } kmowner_t;
3962
3963 typedef struct kmusers {
3964 uintptr_t kmu_addr; /* address of interest */
3965 const kmem_cache_t *kmu_cache; /* Current kmem cache */
3966 kmowner_t *kmu_hash; /* Hash table of owners */
3967 int kmu_nelems; /* Number of entries in use */
3968 int kmu_size; /* Total number of entries */
3969 } kmusers_t;
3970
3971 static void
kmu_add(kmusers_t * kmu,const kmem_bufctl_audit_t * bcp,size_t size,size_t data_size)3972 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
3973 size_t size, size_t data_size)
3974 {
3975 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3976 size_t bucket, signature = data_size;
3977 kmowner_t *kmo, *kmoend;
3978
3979 /*
3980 * If the hash table is full, double its size and rehash everything.
3981 */
3982 if (kmu->kmu_nelems >= kmu->kmu_size) {
3983 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
3984
3985 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
3986 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
3987 kmu->kmu_hash = kmo;
3988 kmu->kmu_size = s;
3989
3990 kmoend = kmu->kmu_hash + kmu->kmu_size;
3991 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
3992 kmo->kmo_head = NULL;
3993
3994 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
3995 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
3996 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
3997 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
3998 kmu->kmu_hash[bucket].kmo_head = kmo;
3999 }
4000 }
4001
4002 /*
4003 * Finish computing the hash signature from the stack trace, and then
4004 * see if the owner is in the hash table. If so, update our stats.
4005 */
4006 for (i = 0; i < depth; i++)
4007 signature += bcp->bc_stack[i];
4008
4009 bucket = signature & (kmu->kmu_size - 1);
4010
4011 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4012 if (kmo->kmo_signature == signature) {
4013 size_t difference = 0;
4014
4015 difference |= kmo->kmo_data_size - data_size;
4016 difference |= kmo->kmo_depth - depth;
4017
4018 for (i = 0; i < depth; i++) {
4019 difference |= kmo->kmo_stack[i] -
4020 bcp->bc_stack[i];
4021 }
4022
4023 if (difference == 0) {
4024 kmo->kmo_total_size += size;
4025 kmo->kmo_num++;
4026 return;
4027 }
4028 }
4029 }
4030
4031 /*
4032 * If the owner is not yet hashed, grab the next element and fill it
4033 * in based on the allocation information.
4034 */
4035 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4036 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4037 kmu->kmu_hash[bucket].kmo_head = kmo;
4038
4039 kmo->kmo_signature = signature;
4040 kmo->kmo_num = 1;
4041 kmo->kmo_data_size = data_size;
4042 kmo->kmo_total_size = size;
4043 kmo->kmo_depth = depth;
4044
4045 for (i = 0; i < depth; i++)
4046 kmo->kmo_stack[i] = bcp->bc_stack[i];
4047 }
4048
4049 /*
4050 * When ::kmausers is invoked without the -f flag, we simply update our hash
4051 * table with the information from each allocated bufctl.
4052 */
4053 /*ARGSUSED*/
4054 static int
kmause1(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4055 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4056 {
4057 const kmem_cache_t *cp = kmu->kmu_cache;
4058
4059 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4060 return (WALK_NEXT);
4061 }
4062
4063 /*
4064 * When ::kmausers is invoked with the -f flag, we print out the information
4065 * for each bufctl as well as updating the hash table.
4066 */
4067 static int
kmause2(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4068 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4069 {
4070 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4071 const kmem_cache_t *cp = kmu->kmu_cache;
4072 kmem_bufctl_t bufctl;
4073
4074 if (kmu->kmu_addr) {
4075 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4076 mdb_warn("couldn't read bufctl at %p", addr);
4077 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4078 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4079 cp->cache_bufsize)
4080 return (WALK_NEXT);
4081 }
4082
4083 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4084 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4085
4086 for (i = 0; i < depth; i++)
4087 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4088
4089 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4090 return (WALK_NEXT);
4091 }
4092
4093 /*
4094 * We sort our results by allocation size before printing them.
4095 */
4096 static int
kmownercmp(const void * lp,const void * rp)4097 kmownercmp(const void *lp, const void *rp)
4098 {
4099 const kmowner_t *lhs = lp;
4100 const kmowner_t *rhs = rp;
4101
4102 return (rhs->kmo_total_size - lhs->kmo_total_size);
4103 }
4104
4105 /*
4106 * The main engine of ::kmausers is relatively straightforward: First we
4107 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4108 * iterate over the allocated bufctls of each cache in the list. Finally,
4109 * we sort and print our results.
4110 */
4111 /*ARGSUSED*/
4112 int
kmausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4113 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4114 {
4115 int mem_threshold = 8192; /* Minimum # bytes for printing */
4116 int cnt_threshold = 100; /* Minimum # blocks for printing */
4117 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4118 int do_all_caches = 1; /* Do all caches (no arguments) */
4119 int opt_e = FALSE; /* Include "small" users */
4120 int opt_f = FALSE; /* Print stack traces */
4121
4122 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4123 kmowner_t *kmo, *kmoend;
4124 int i, oelems;
4125
4126 kmclist_t kmc;
4127 kmusers_t kmu;
4128
4129 bzero(&kmc, sizeof (kmc));
4130 bzero(&kmu, sizeof (kmu));
4131
4132 while ((i = mdb_getopts(argc, argv,
4133 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4134 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4135
4136 argv += i; /* skip past options we just processed */
4137 argc -= i; /* adjust argc */
4138
4139 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4140 return (DCMD_USAGE);
4141
4142 oelems = kmc.kmc_nelems;
4143 kmc.kmc_name = argv->a_un.a_str;
4144 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4145
4146 if (kmc.kmc_nelems == oelems) {
4147 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4148 return (DCMD_ERR);
4149 }
4150
4151 do_all_caches = 0;
4152 argv++;
4153 argc--;
4154 }
4155
4156 if (flags & DCMD_ADDRSPEC) {
4157 opt_f = TRUE;
4158 kmu.kmu_addr = addr;
4159 } else {
4160 kmu.kmu_addr = NULL;
4161 }
4162
4163 if (opt_e)
4164 mem_threshold = cnt_threshold = 0;
4165
4166 if (opt_f)
4167 callback = (mdb_walk_cb_t)kmause2;
4168
4169 if (do_all_caches) {
4170 kmc.kmc_name = NULL; /* match all cache names */
4171 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4172 }
4173
4174 for (i = 0; i < kmc.kmc_nelems; i++) {
4175 uintptr_t cp = kmc.kmc_caches[i];
4176 kmem_cache_t c;
4177
4178 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4179 mdb_warn("failed to read cache at %p", cp);
4180 continue;
4181 }
4182
4183 if (!(c.cache_flags & KMF_AUDIT)) {
4184 if (!do_all_caches) {
4185 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4186 c.cache_name);
4187 }
4188 continue;
4189 }
4190
4191 kmu.kmu_cache = &c;
4192 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4193 audited_caches++;
4194 }
4195
4196 if (audited_caches == 0 && do_all_caches) {
4197 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4198 return (DCMD_ERR);
4199 }
4200
4201 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4202 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4203
4204 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4205 if (kmo->kmo_total_size < mem_threshold &&
4206 kmo->kmo_num < cnt_threshold)
4207 continue;
4208 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4209 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4210 for (i = 0; i < kmo->kmo_depth; i++)
4211 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4212 }
4213
4214 return (DCMD_OK);
4215 }
4216
4217 void
kmausers_help(void)4218 kmausers_help(void)
4219 {
4220 mdb_printf(
4221 "Displays the largest users of the kmem allocator, sorted by \n"
4222 "trace. If one or more caches is specified, only those caches\n"
4223 "will be searched. By default, all caches are searched. If an\n"
4224 "address is specified, then only those allocations which include\n"
4225 "the given address are displayed. Specifying an address implies\n"
4226 "-f.\n"
4227 "\n"
4228 "\t-e\tInclude all users, not just the largest\n"
4229 "\t-f\tDisplay individual allocations. By default, users are\n"
4230 "\t\tgrouped by stack\n");
4231 }
4232
4233 static int
kmem_ready_check(void)4234 kmem_ready_check(void)
4235 {
4236 int ready;
4237
4238 if (mdb_readvar(&ready, "kmem_ready") < 0)
4239 return (-1); /* errno is set for us */
4240
4241 return (ready);
4242 }
4243
4244 void
kmem_statechange(void)4245 kmem_statechange(void)
4246 {
4247 static int been_ready = 0;
4248
4249 if (been_ready)
4250 return;
4251
4252 if (kmem_ready_check() <= 0)
4253 return;
4254
4255 been_ready = 1;
4256 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4257 }
4258
4259 void
kmem_init(void)4260 kmem_init(void)
4261 {
4262 mdb_walker_t w = {
4263 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4264 list_walk_step, list_walk_fini
4265 };
4266
4267 /*
4268 * If kmem is ready, we'll need to invoke the kmem_cache walker
4269 * immediately. Walkers in the linkage structure won't be ready until
4270 * _mdb_init returns, so we'll need to add this one manually. If kmem
4271 * is ready, we'll use the walker to initialize the caches. If kmem
4272 * isn't ready, we'll register a callback that will allow us to defer
4273 * cache walking until it is.
4274 */
4275 if (mdb_add_walker(&w) != 0) {
4276 mdb_warn("failed to add kmem_cache walker");
4277 return;
4278 }
4279
4280 kmem_statechange();
4281
4282 /* register our ::whatis handlers */
4283 mdb_whatis_register("modules", whatis_run_modules, NULL,
4284 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4285 mdb_whatis_register("threads", whatis_run_threads, NULL,
4286 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4287 mdb_whatis_register("pages", whatis_run_pages, NULL,
4288 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4289 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4290 WHATIS_PRIO_ALLOCATOR, 0);
4291 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4292 WHATIS_PRIO_ALLOCATOR, 0);
4293 }
4294
4295 typedef struct whatthread {
4296 uintptr_t wt_target;
4297 int wt_verbose;
4298 } whatthread_t;
4299
4300 static int
whatthread_walk_thread(uintptr_t addr,const kthread_t * t,whatthread_t * w)4301 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4302 {
4303 uintptr_t current, data;
4304
4305 if (t->t_stkbase == NULL)
4306 return (WALK_NEXT);
4307
4308 /*
4309 * Warn about swapped out threads, but drive on anyway
4310 */
4311 if (!(t->t_schedflag & TS_LOAD)) {
4312 mdb_warn("thread %p's stack swapped out\n", addr);
4313 return (WALK_NEXT);
4314 }
4315
4316 /*
4317 * Search the thread's stack for the given pointer. Note that it would
4318 * be more efficient to follow ::kgrep's lead and read in page-sized
4319 * chunks, but this routine is already fast and simple.
4320 */
4321 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4322 current += sizeof (uintptr_t)) {
4323 if (mdb_vread(&data, sizeof (data), current) == -1) {
4324 mdb_warn("couldn't read thread %p's stack at %p",
4325 addr, current);
4326 return (WALK_ERR);
4327 }
4328
4329 if (data == w->wt_target) {
4330 if (w->wt_verbose) {
4331 mdb_printf("%p in thread %p's stack%s\n",
4332 current, addr, stack_active(t, current));
4333 } else {
4334 mdb_printf("%#lr\n", addr);
4335 return (WALK_NEXT);
4336 }
4337 }
4338 }
4339
4340 return (WALK_NEXT);
4341 }
4342
4343 int
whatthread(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4344 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4345 {
4346 whatthread_t w;
4347
4348 if (!(flags & DCMD_ADDRSPEC))
4349 return (DCMD_USAGE);
4350
4351 w.wt_verbose = FALSE;
4352 w.wt_target = addr;
4353
4354 if (mdb_getopts(argc, argv,
4355 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4356 return (DCMD_USAGE);
4357
4358 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4359 == -1) {
4360 mdb_warn("couldn't walk threads");
4361 return (DCMD_ERR);
4362 }
4363
4364 return (DCMD_OK);
4365 }
4366