xref: /freebsd-src/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c (revision f5678b698afb3a97f99804f87ebb179de5f87df0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
28  * Copyright (c) 2011 by Delphix. All rights reserved.
29  */
30 
31 #include <stdlib.h>
32 #include <strings.h>
33 #include <errno.h>
34 #include <unistd.h>
35 #include <limits.h>
36 #include <assert.h>
37 #include <ctype.h>
38 #if defined(sun)
39 #include <alloca.h>
40 #endif
41 #include <dt_impl.h>
42 #if !defined(sun)
43 #include <libproc_compat.h>
44 #endif
45 
46 #define	DT_MASK_LO 0x00000000FFFFFFFFULL
47 
48 /*
49  * We declare this here because (1) we need it and (2) we want to avoid a
50  * dependency on libm in libdtrace.
51  */
52 static long double
53 dt_fabsl(long double x)
54 {
55 	if (x < 0)
56 		return (-x);
57 
58 	return (x);
59 }
60 
61 /*
62  * 128-bit arithmetic functions needed to support the stddev() aggregating
63  * action.
64  */
65 static int
66 dt_gt_128(uint64_t *a, uint64_t *b)
67 {
68 	return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
69 }
70 
71 static int
72 dt_ge_128(uint64_t *a, uint64_t *b)
73 {
74 	return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
75 }
76 
77 static int
78 dt_le_128(uint64_t *a, uint64_t *b)
79 {
80 	return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
81 }
82 
83 /*
84  * Shift the 128-bit value in a by b. If b is positive, shift left.
85  * If b is negative, shift right.
86  */
87 static void
88 dt_shift_128(uint64_t *a, int b)
89 {
90 	uint64_t mask;
91 
92 	if (b == 0)
93 		return;
94 
95 	if (b < 0) {
96 		b = -b;
97 		if (b >= 64) {
98 			a[0] = a[1] >> (b - 64);
99 			a[1] = 0;
100 		} else {
101 			a[0] >>= b;
102 			mask = 1LL << (64 - b);
103 			mask -= 1;
104 			a[0] |= ((a[1] & mask) << (64 - b));
105 			a[1] >>= b;
106 		}
107 	} else {
108 		if (b >= 64) {
109 			a[1] = a[0] << (b - 64);
110 			a[0] = 0;
111 		} else {
112 			a[1] <<= b;
113 			mask = a[0] >> (64 - b);
114 			a[1] |= mask;
115 			a[0] <<= b;
116 		}
117 	}
118 }
119 
120 static int
121 dt_nbits_128(uint64_t *a)
122 {
123 	int nbits = 0;
124 	uint64_t tmp[2];
125 	uint64_t zero[2] = { 0, 0 };
126 
127 	tmp[0] = a[0];
128 	tmp[1] = a[1];
129 
130 	dt_shift_128(tmp, -1);
131 	while (dt_gt_128(tmp, zero)) {
132 		dt_shift_128(tmp, -1);
133 		nbits++;
134 	}
135 
136 	return (nbits);
137 }
138 
139 static void
140 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
141 {
142 	uint64_t result[2];
143 
144 	result[0] = minuend[0] - subtrahend[0];
145 	result[1] = minuend[1] - subtrahend[1] -
146 	    (minuend[0] < subtrahend[0] ? 1 : 0);
147 
148 	difference[0] = result[0];
149 	difference[1] = result[1];
150 }
151 
152 static void
153 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
154 {
155 	uint64_t result[2];
156 
157 	result[0] = addend1[0] + addend2[0];
158 	result[1] = addend1[1] + addend2[1] +
159 	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
160 
161 	sum[0] = result[0];
162 	sum[1] = result[1];
163 }
164 
165 /*
166  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
167  * use native multiplication on those, and then re-combine into the
168  * resulting 128-bit value.
169  *
170  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
171  *     hi1 * hi2 << 64 +
172  *     hi1 * lo2 << 32 +
173  *     hi2 * lo1 << 32 +
174  *     lo1 * lo2
175  */
176 static void
177 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
178 {
179 	uint64_t hi1, hi2, lo1, lo2;
180 	uint64_t tmp[2];
181 
182 	hi1 = factor1 >> 32;
183 	hi2 = factor2 >> 32;
184 
185 	lo1 = factor1 & DT_MASK_LO;
186 	lo2 = factor2 & DT_MASK_LO;
187 
188 	product[0] = lo1 * lo2;
189 	product[1] = hi1 * hi2;
190 
191 	tmp[0] = hi1 * lo2;
192 	tmp[1] = 0;
193 	dt_shift_128(tmp, 32);
194 	dt_add_128(product, tmp, product);
195 
196 	tmp[0] = hi2 * lo1;
197 	tmp[1] = 0;
198 	dt_shift_128(tmp, 32);
199 	dt_add_128(product, tmp, product);
200 }
201 
202 /*
203  * This is long-hand division.
204  *
205  * We initialize subtrahend by shifting divisor left as far as possible. We
206  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
207  * subtract and set the appropriate bit in the result.  We then shift
208  * subtrahend right by one bit for the next comparison.
209  */
210 static void
211 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
212 {
213 	uint64_t result[2] = { 0, 0 };
214 	uint64_t remainder[2];
215 	uint64_t subtrahend[2];
216 	uint64_t divisor_128[2];
217 	uint64_t mask[2] = { 1, 0 };
218 	int log = 0;
219 
220 	assert(divisor != 0);
221 
222 	divisor_128[0] = divisor;
223 	divisor_128[1] = 0;
224 
225 	remainder[0] = dividend[0];
226 	remainder[1] = dividend[1];
227 
228 	subtrahend[0] = divisor;
229 	subtrahend[1] = 0;
230 
231 	while (divisor > 0) {
232 		log++;
233 		divisor >>= 1;
234 	}
235 
236 	dt_shift_128(subtrahend, 128 - log);
237 	dt_shift_128(mask, 128 - log);
238 
239 	while (dt_ge_128(remainder, divisor_128)) {
240 		if (dt_ge_128(remainder, subtrahend)) {
241 			dt_subtract_128(remainder, subtrahend, remainder);
242 			result[0] |= mask[0];
243 			result[1] |= mask[1];
244 		}
245 
246 		dt_shift_128(subtrahend, -1);
247 		dt_shift_128(mask, -1);
248 	}
249 
250 	quotient[0] = result[0];
251 	quotient[1] = result[1];
252 }
253 
254 /*
255  * This is the long-hand method of calculating a square root.
256  * The algorithm is as follows:
257  *
258  * 1. Group the digits by 2 from the right.
259  * 2. Over the leftmost group, find the largest single-digit number
260  *    whose square is less than that group.
261  * 3. Subtract the result of the previous step (2 or 4, depending) and
262  *    bring down the next two-digit group.
263  * 4. For the result R we have so far, find the largest single-digit number
264  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
265  *    (Note that this is doubling R and performing a decimal left-shift by 1
266  *    and searching for the appropriate decimal to fill the one's place.)
267  *    The value x is the next digit in the square root.
268  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
269  * dealing with integers, so the above is sufficient.)
270  *
271  * In decimal, the square root of 582,734 would be calculated as so:
272  *
273  *     __7__6__3
274  *    | 58 27 34
275  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
276  *      --
277  *       9 27    (Subtract and bring down the next group.)
278  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
279  *      -----     the square root)
280  *         51 34 (Subtract and bring down the next group.)
281  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
282  *         -----  the square root)
283  *          5 65 (remainder)
284  *
285  * The above algorithm applies similarly in binary, but note that the
286  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
287  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
288  * preceding difference?
289  *
290  * In binary, the square root of 11011011 would be calculated as so:
291  *
292  *     __1__1__1__0
293  *    | 11 01 10 11
294  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
295  *      --
296  *      10 01 10 11
297  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
298  *      -----
299  *       1 00 10 11
300  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
301  *       -------
302  *          1 01 11
303  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
304  *
305  */
306 static uint64_t
307 dt_sqrt_128(uint64_t *square)
308 {
309 	uint64_t result[2] = { 0, 0 };
310 	uint64_t diff[2] = { 0, 0 };
311 	uint64_t one[2] = { 1, 0 };
312 	uint64_t next_pair[2];
313 	uint64_t next_try[2];
314 	uint64_t bit_pairs, pair_shift;
315 	int i;
316 
317 	bit_pairs = dt_nbits_128(square) / 2;
318 	pair_shift = bit_pairs * 2;
319 
320 	for (i = 0; i <= bit_pairs; i++) {
321 		/*
322 		 * Bring down the next pair of bits.
323 		 */
324 		next_pair[0] = square[0];
325 		next_pair[1] = square[1];
326 		dt_shift_128(next_pair, -pair_shift);
327 		next_pair[0] &= 0x3;
328 		next_pair[1] = 0;
329 
330 		dt_shift_128(diff, 2);
331 		dt_add_128(diff, next_pair, diff);
332 
333 		/*
334 		 * next_try = R << 2 + 1
335 		 */
336 		next_try[0] = result[0];
337 		next_try[1] = result[1];
338 		dt_shift_128(next_try, 2);
339 		dt_add_128(next_try, one, next_try);
340 
341 		if (dt_le_128(next_try, diff)) {
342 			dt_subtract_128(diff, next_try, diff);
343 			dt_shift_128(result, 1);
344 			dt_add_128(result, one, result);
345 		} else {
346 			dt_shift_128(result, 1);
347 		}
348 
349 		pair_shift -= 2;
350 	}
351 
352 	assert(result[1] == 0);
353 
354 	return (result[0]);
355 }
356 
357 uint64_t
358 dt_stddev(uint64_t *data, uint64_t normal)
359 {
360 	uint64_t avg_of_squares[2];
361 	uint64_t square_of_avg[2];
362 	int64_t norm_avg;
363 	uint64_t diff[2];
364 
365 	/*
366 	 * The standard approximation for standard deviation is
367 	 * sqrt(average(x**2) - average(x)**2), i.e. the square root
368 	 * of the average of the squares minus the square of the average.
369 	 */
370 	dt_divide_128(data + 2, normal, avg_of_squares);
371 	dt_divide_128(avg_of_squares, data[0], avg_of_squares);
372 
373 	norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
374 
375 	if (norm_avg < 0)
376 		norm_avg = -norm_avg;
377 
378 	dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
379 
380 	dt_subtract_128(avg_of_squares, square_of_avg, diff);
381 
382 	return (dt_sqrt_128(diff));
383 }
384 
385 static int
386 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
387     dtrace_bufdesc_t *buf, size_t offs)
388 {
389 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
390 	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
391 	char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
392 	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
393 	const char *str = NULL;
394 	static const char *e_str[2] = { " -> ", " => " };
395 	static const char *r_str[2] = { " <- ", " <= " };
396 	static const char *ent = "entry", *ret = "return";
397 	static int entlen = 0, retlen = 0;
398 	dtrace_epid_t next, id = epd->dtepd_epid;
399 	int rval;
400 
401 	if (entlen == 0) {
402 		assert(retlen == 0);
403 		entlen = strlen(ent);
404 		retlen = strlen(ret);
405 	}
406 
407 	/*
408 	 * If the name of the probe is "entry" or ends with "-entry", we
409 	 * treat it as an entry; if it is "return" or ends with "-return",
410 	 * we treat it as a return.  (This allows application-provided probes
411 	 * like "method-entry" or "function-entry" to participate in flow
412 	 * indentation -- without accidentally misinterpreting popular probe
413 	 * names like "carpentry", "gentry" or "Coventry".)
414 	 */
415 	if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
416 	    (sub == n || sub[-1] == '-')) {
417 		flow = DTRACEFLOW_ENTRY;
418 		str = e_str[strcmp(p, "syscall") == 0];
419 	} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
420 	    (sub == n || sub[-1] == '-')) {
421 		flow = DTRACEFLOW_RETURN;
422 		str = r_str[strcmp(p, "syscall") == 0];
423 	}
424 
425 	/*
426 	 * If we're going to indent this, we need to check the ID of our last
427 	 * call.  If we're looking at the same probe ID but a different EPID,
428 	 * we _don't_ want to indent.  (Yes, there are some minor holes in
429 	 * this scheme -- it's a heuristic.)
430 	 */
431 	if (flow == DTRACEFLOW_ENTRY) {
432 		if ((last != DTRACE_EPIDNONE && id != last &&
433 		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
434 			flow = DTRACEFLOW_NONE;
435 	}
436 
437 	/*
438 	 * If we're going to unindent this, it's more difficult to see if
439 	 * we don't actually want to unindent it -- we need to look at the
440 	 * _next_ EPID.
441 	 */
442 	if (flow == DTRACEFLOW_RETURN) {
443 		offs += epd->dtepd_size;
444 
445 		do {
446 			if (offs >= buf->dtbd_size) {
447 				/*
448 				 * We're at the end -- maybe.  If the oldest
449 				 * record is non-zero, we need to wrap.
450 				 */
451 				if (buf->dtbd_oldest != 0) {
452 					offs = 0;
453 				} else {
454 					goto out;
455 				}
456 			}
457 
458 			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
459 
460 			if (next == DTRACE_EPIDNONE)
461 				offs += sizeof (id);
462 		} while (next == DTRACE_EPIDNONE);
463 
464 		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
465 			return (rval);
466 
467 		if (next != id && npd->dtpd_id == pd->dtpd_id)
468 			flow = DTRACEFLOW_NONE;
469 	}
470 
471 out:
472 	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
473 		data->dtpda_prefix = str;
474 	} else {
475 		data->dtpda_prefix = "| ";
476 	}
477 
478 	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
479 		data->dtpda_indent -= 2;
480 
481 	data->dtpda_flow = flow;
482 
483 	return (0);
484 }
485 
486 static int
487 dt_nullprobe()
488 {
489 	return (DTRACE_CONSUME_THIS);
490 }
491 
492 static int
493 dt_nullrec()
494 {
495 	return (DTRACE_CONSUME_NEXT);
496 }
497 
498 int
499 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
500     uint64_t normal, long double total, char positives, char negatives)
501 {
502 	long double f;
503 	uint_t depth, len = 40;
504 
505 	const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
506 	const char *spaces = "                                        ";
507 
508 	assert(strlen(ats) == len && strlen(spaces) == len);
509 	assert(!(total == 0 && (positives || negatives)));
510 	assert(!(val < 0 && !negatives));
511 	assert(!(val > 0 && !positives));
512 	assert(!(val != 0 && total == 0));
513 
514 	if (!negatives) {
515 		if (positives) {
516 			f = (dt_fabsl((long double)val) * len) / total;
517 			depth = (uint_t)(f + 0.5);
518 		} else {
519 			depth = 0;
520 		}
521 
522 		return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
523 		    spaces + depth, (long long)val / normal));
524 	}
525 
526 	if (!positives) {
527 		f = (dt_fabsl((long double)val) * len) / total;
528 		depth = (uint_t)(f + 0.5);
529 
530 		return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
531 		    ats + len - depth, (long long)val / normal));
532 	}
533 
534 	/*
535 	 * If we're here, we have both positive and negative bucket values.
536 	 * To express this graphically, we're going to generate both positive
537 	 * and negative bars separated by a centerline.  These bars are half
538 	 * the size of normal quantize()/lquantize() bars, so we divide the
539 	 * length in half before calculating the bar length.
540 	 */
541 	len /= 2;
542 	ats = &ats[len];
543 	spaces = &spaces[len];
544 
545 	f = (dt_fabsl((long double)val) * len) / total;
546 	depth = (uint_t)(f + 0.5);
547 
548 	if (val <= 0) {
549 		return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
550 		    ats + len - depth, len, "", (long long)val / normal));
551 	} else {
552 		return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
553 		    ats + len - depth, spaces + depth,
554 		    (long long)val / normal));
555 	}
556 }
557 
558 int
559 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
560     size_t size, uint64_t normal)
561 {
562 	const int64_t *data = addr;
563 	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
564 	long double total = 0;
565 	char positives = 0, negatives = 0;
566 
567 	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
568 		return (dt_set_errno(dtp, EDT_DMISMATCH));
569 
570 	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
571 		first_bin++;
572 
573 	if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
574 		/*
575 		 * There isn't any data.  This is possible if (and only if)
576 		 * negative increment values have been used.  In this case,
577 		 * we'll print the buckets around 0.
578 		 */
579 		first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
580 		last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
581 	} else {
582 		if (first_bin > 0)
583 			first_bin--;
584 
585 		while (last_bin > 0 && data[last_bin] == 0)
586 			last_bin--;
587 
588 		if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
589 			last_bin++;
590 	}
591 
592 	for (i = first_bin; i <= last_bin; i++) {
593 		positives |= (data[i] > 0);
594 		negatives |= (data[i] < 0);
595 		total += dt_fabsl((long double)data[i]);
596 	}
597 
598 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
599 	    "------------- Distribution -------------", "count") < 0)
600 		return (-1);
601 
602 	for (i = first_bin; i <= last_bin; i++) {
603 		if (dt_printf(dtp, fp, "%16lld ",
604 		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
605 			return (-1);
606 
607 		if (dt_print_quantline(dtp, fp, data[i], normal, total,
608 		    positives, negatives) < 0)
609 			return (-1);
610 	}
611 
612 	return (0);
613 }
614 
615 int
616 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
617     size_t size, uint64_t normal)
618 {
619 	const int64_t *data = addr;
620 	int i, first_bin, last_bin, base;
621 	uint64_t arg;
622 	long double total = 0;
623 	uint16_t step, levels;
624 	char positives = 0, negatives = 0;
625 
626 	if (size < sizeof (uint64_t))
627 		return (dt_set_errno(dtp, EDT_DMISMATCH));
628 
629 	arg = *data++;
630 	size -= sizeof (uint64_t);
631 
632 	base = DTRACE_LQUANTIZE_BASE(arg);
633 	step = DTRACE_LQUANTIZE_STEP(arg);
634 	levels = DTRACE_LQUANTIZE_LEVELS(arg);
635 
636 	first_bin = 0;
637 	last_bin = levels + 1;
638 
639 	if (size != sizeof (uint64_t) * (levels + 2))
640 		return (dt_set_errno(dtp, EDT_DMISMATCH));
641 
642 	while (first_bin <= levels + 1 && data[first_bin] == 0)
643 		first_bin++;
644 
645 	if (first_bin > levels + 1) {
646 		first_bin = 0;
647 		last_bin = 2;
648 	} else {
649 		if (first_bin > 0)
650 			first_bin--;
651 
652 		while (last_bin > 0 && data[last_bin] == 0)
653 			last_bin--;
654 
655 		if (last_bin < levels + 1)
656 			last_bin++;
657 	}
658 
659 	for (i = first_bin; i <= last_bin; i++) {
660 		positives |= (data[i] > 0);
661 		negatives |= (data[i] < 0);
662 		total += dt_fabsl((long double)data[i]);
663 	}
664 
665 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
666 	    "------------- Distribution -------------", "count") < 0)
667 		return (-1);
668 
669 	for (i = first_bin; i <= last_bin; i++) {
670 		char c[32];
671 		int err;
672 
673 		if (i == 0) {
674 			(void) snprintf(c, sizeof (c), "< %d",
675 			    base / (uint32_t)normal);
676 			err = dt_printf(dtp, fp, "%16s ", c);
677 		} else if (i == levels + 1) {
678 			(void) snprintf(c, sizeof (c), ">= %d",
679 			    base + (levels * step));
680 			err = dt_printf(dtp, fp, "%16s ", c);
681 		} else {
682 			err = dt_printf(dtp, fp, "%16d ",
683 			    base + (i - 1) * step);
684 		}
685 
686 		if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
687 		    total, positives, negatives) < 0)
688 			return (-1);
689 	}
690 
691 	return (0);
692 }
693 
694 int
695 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
696     size_t size, uint64_t normal)
697 {
698 	int i, first_bin, last_bin, bin = 1, order, levels;
699 	uint16_t factor, low, high, nsteps;
700 	const int64_t *data = addr;
701 	int64_t value = 1, next, step;
702 	char positives = 0, negatives = 0;
703 	long double total = 0;
704 	uint64_t arg;
705 	char c[32];
706 
707 	if (size < sizeof (uint64_t))
708 		return (dt_set_errno(dtp, EDT_DMISMATCH));
709 
710 	arg = *data++;
711 	size -= sizeof (uint64_t);
712 
713 	factor = DTRACE_LLQUANTIZE_FACTOR(arg);
714 	low = DTRACE_LLQUANTIZE_LOW(arg);
715 	high = DTRACE_LLQUANTIZE_HIGH(arg);
716 	nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
717 
718 	/*
719 	 * We don't expect to be handed invalid llquantize() parameters here,
720 	 * but sanity check them (to a degree) nonetheless.
721 	 */
722 	if (size > INT32_MAX || factor < 2 || low >= high ||
723 	    nsteps == 0 || factor > nsteps)
724 		return (dt_set_errno(dtp, EDT_DMISMATCH));
725 
726 	levels = (int)size / sizeof (uint64_t);
727 
728 	first_bin = 0;
729 	last_bin = levels - 1;
730 
731 	while (first_bin < levels && data[first_bin] == 0)
732 		first_bin++;
733 
734 	if (first_bin == levels) {
735 		first_bin = 0;
736 		last_bin = 1;
737 	} else {
738 		if (first_bin > 0)
739 			first_bin--;
740 
741 		while (last_bin > 0 && data[last_bin] == 0)
742 			last_bin--;
743 
744 		if (last_bin < levels - 1)
745 			last_bin++;
746 	}
747 
748 	for (i = first_bin; i <= last_bin; i++) {
749 		positives |= (data[i] > 0);
750 		negatives |= (data[i] < 0);
751 		total += dt_fabsl((long double)data[i]);
752 	}
753 
754 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
755 	    "------------- Distribution -------------", "count") < 0)
756 		return (-1);
757 
758 	for (order = 0; order < low; order++)
759 		value *= factor;
760 
761 	next = value * factor;
762 	step = next > nsteps ? next / nsteps : 1;
763 
764 	if (first_bin == 0) {
765 		(void) snprintf(c, sizeof (c), "< %lld", (long long)value);
766 
767 		if (dt_printf(dtp, fp, "%16s ", c) < 0)
768 			return (-1);
769 
770 		if (dt_print_quantline(dtp, fp, data[0], normal,
771 		    total, positives, negatives) < 0)
772 			return (-1);
773 	}
774 
775 	while (order <= high) {
776 		if (bin >= first_bin && bin <= last_bin) {
777 			if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
778 				return (-1);
779 
780 			if (dt_print_quantline(dtp, fp, data[bin],
781 			    normal, total, positives, negatives) < 0)
782 				return (-1);
783 		}
784 
785 		assert(value < next);
786 		bin++;
787 
788 		if ((value += step) != next)
789 			continue;
790 
791 		next = value * factor;
792 		step = next > nsteps ? next / nsteps : 1;
793 		order++;
794 	}
795 
796 	if (last_bin < bin)
797 		return (0);
798 
799 	assert(last_bin == bin);
800 	(void) snprintf(c, sizeof (c), ">= %lld", (long long)value);
801 
802 	if (dt_printf(dtp, fp, "%16s ", c) < 0)
803 		return (-1);
804 
805 	return (dt_print_quantline(dtp, fp, data[bin], normal,
806 	    total, positives, negatives));
807 }
808 
809 /*ARGSUSED*/
810 static int
811 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
812     size_t size, uint64_t normal)
813 {
814 	/* LINTED - alignment */
815 	int64_t *data = (int64_t *)addr;
816 
817 	return (dt_printf(dtp, fp, " %16lld", data[0] ?
818 	    (long long)(data[1] / (int64_t)normal / data[0]) : 0));
819 }
820 
821 /*ARGSUSED*/
822 static int
823 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
824     size_t size, uint64_t normal)
825 {
826 	/* LINTED - alignment */
827 	uint64_t *data = (uint64_t *)addr;
828 
829 	return (dt_printf(dtp, fp, " %16llu", data[0] ?
830 	    (unsigned long long) dt_stddev(data, normal) : 0));
831 }
832 
833 /*ARGSUSED*/
834 int
835 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
836     size_t nbytes, int width, int quiet, int forceraw)
837 {
838 	/*
839 	 * If the byte stream is a series of printable characters, followed by
840 	 * a terminating byte, we print it out as a string.  Otherwise, we
841 	 * assume that it's something else and just print the bytes.
842 	 */
843 	int i, j, margin = 5;
844 	char *c = (char *)addr;
845 
846 	if (nbytes == 0)
847 		return (0);
848 
849 	if (forceraw)
850 		goto raw;
851 
852 	if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
853 		goto raw;
854 
855 	for (i = 0; i < nbytes; i++) {
856 		/*
857 		 * We define a "printable character" to be one for which
858 		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
859 		 * or a character which is either backspace or the bell.
860 		 * Backspace and the bell are regrettably special because
861 		 * they fail the first two tests -- and yet they are entirely
862 		 * printable.  These are the only two control characters that
863 		 * have meaning for the terminal and for which isprint(3C) and
864 		 * isspace(3C) return 0.
865 		 */
866 		if (isprint(c[i]) || isspace(c[i]) ||
867 		    c[i] == '\b' || c[i] == '\a')
868 			continue;
869 
870 		if (c[i] == '\0' && i > 0) {
871 			/*
872 			 * This looks like it might be a string.  Before we
873 			 * assume that it is indeed a string, check the
874 			 * remainder of the byte range; if it contains
875 			 * additional non-nul characters, we'll assume that
876 			 * it's a binary stream that just happens to look like
877 			 * a string, and we'll print out the individual bytes.
878 			 */
879 			for (j = i + 1; j < nbytes; j++) {
880 				if (c[j] != '\0')
881 					break;
882 			}
883 
884 			if (j != nbytes)
885 				break;
886 
887 			if (quiet)
888 				return (dt_printf(dtp, fp, "%s", c));
889 			else
890 				return (dt_printf(dtp, fp, "  %-*s", width, c));
891 		}
892 
893 		break;
894 	}
895 
896 	if (i == nbytes) {
897 		/*
898 		 * The byte range is all printable characters, but there is
899 		 * no trailing nul byte.  We'll assume that it's a string and
900 		 * print it as such.
901 		 */
902 		char *s = alloca(nbytes + 1);
903 		bcopy(c, s, nbytes);
904 		s[nbytes] = '\0';
905 		return (dt_printf(dtp, fp, "  %-*s", width, s));
906 	}
907 
908 raw:
909 	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
910 		return (-1);
911 
912 	for (i = 0; i < 16; i++)
913 		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
914 			return (-1);
915 
916 	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
917 		return (-1);
918 
919 
920 	for (i = 0; i < nbytes; i += 16) {
921 		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
922 			return (-1);
923 
924 		for (j = i; j < i + 16 && j < nbytes; j++) {
925 			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
926 				return (-1);
927 		}
928 
929 		while (j++ % 16) {
930 			if (dt_printf(dtp, fp, "   ") < 0)
931 				return (-1);
932 		}
933 
934 		if (dt_printf(dtp, fp, "  ") < 0)
935 			return (-1);
936 
937 		for (j = i; j < i + 16 && j < nbytes; j++) {
938 			if (dt_printf(dtp, fp, "%c",
939 			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
940 				return (-1);
941 		}
942 
943 		if (dt_printf(dtp, fp, "\n") < 0)
944 			return (-1);
945 	}
946 
947 	return (0);
948 }
949 
950 int
951 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
952     caddr_t addr, int depth, int size)
953 {
954 	dtrace_syminfo_t dts;
955 	GElf_Sym sym;
956 	int i, indent;
957 	char c[PATH_MAX * 2];
958 	uint64_t pc;
959 
960 	if (dt_printf(dtp, fp, "\n") < 0)
961 		return (-1);
962 
963 	if (format == NULL)
964 		format = "%s";
965 
966 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
967 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
968 	else
969 		indent = _dtrace_stkindent;
970 
971 	for (i = 0; i < depth; i++) {
972 		switch (size) {
973 		case sizeof (uint32_t):
974 			/* LINTED - alignment */
975 			pc = *((uint32_t *)addr);
976 			break;
977 
978 		case sizeof (uint64_t):
979 			/* LINTED - alignment */
980 			pc = *((uint64_t *)addr);
981 			break;
982 
983 		default:
984 			return (dt_set_errno(dtp, EDT_BADSTACKPC));
985 		}
986 
987 		if (pc == 0)
988 			break;
989 
990 		addr += size;
991 
992 		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
993 			return (-1);
994 
995 		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
996 			if (pc > sym.st_value) {
997 				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
998 				    dts.dts_object, dts.dts_name,
999 				    (u_longlong_t)(pc - sym.st_value));
1000 			} else {
1001 				(void) snprintf(c, sizeof (c), "%s`%s",
1002 				    dts.dts_object, dts.dts_name);
1003 			}
1004 		} else {
1005 			/*
1006 			 * We'll repeat the lookup, but this time we'll specify
1007 			 * a NULL GElf_Sym -- indicating that we're only
1008 			 * interested in the containing module.
1009 			 */
1010 			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1011 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1012 				    dts.dts_object, (u_longlong_t)pc);
1013 			} else {
1014 				(void) snprintf(c, sizeof (c), "0x%llx",
1015 				    (u_longlong_t)pc);
1016 			}
1017 		}
1018 
1019 		if (dt_printf(dtp, fp, format, c) < 0)
1020 			return (-1);
1021 
1022 		if (dt_printf(dtp, fp, "\n") < 0)
1023 			return (-1);
1024 	}
1025 
1026 	return (0);
1027 }
1028 
1029 int
1030 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1031     caddr_t addr, uint64_t arg)
1032 {
1033 	/* LINTED - alignment */
1034 	uint64_t *pc = (uint64_t *)addr;
1035 	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1036 	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1037 	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1038 	const char *str = strsize ? strbase : NULL;
1039 	int err = 0;
1040 
1041 	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1042 	struct ps_prochandle *P;
1043 	GElf_Sym sym;
1044 	int i, indent;
1045 	pid_t pid;
1046 
1047 	if (depth == 0)
1048 		return (0);
1049 
1050 	pid = (pid_t)*pc++;
1051 
1052 	if (dt_printf(dtp, fp, "\n") < 0)
1053 		return (-1);
1054 
1055 	if (format == NULL)
1056 		format = "%s";
1057 
1058 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1059 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1060 	else
1061 		indent = _dtrace_stkindent;
1062 
1063 	/*
1064 	 * Ultimately, we need to add an entry point in the library vector for
1065 	 * determining <symbol, offset> from <pid, address>.  For now, if
1066 	 * this is a vector open, we just print the raw address or string.
1067 	 */
1068 	if (dtp->dt_vector == NULL)
1069 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1070 	else
1071 		P = NULL;
1072 
1073 	if (P != NULL)
1074 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1075 
1076 	for (i = 0; i < depth && pc[i] != 0; i++) {
1077 		const prmap_t *map;
1078 
1079 		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1080 			break;
1081 
1082 		if (P != NULL && Plookup_by_addr(P, pc[i],
1083 		    name, sizeof (name), &sym) == 0) {
1084 			(void) Pobjname(P, pc[i], objname, sizeof (objname));
1085 
1086 			if (pc[i] > sym.st_value) {
1087 				(void) snprintf(c, sizeof (c),
1088 				    "%s`%s+0x%llx", dt_basename(objname), name,
1089 				    (u_longlong_t)(pc[i] - sym.st_value));
1090 			} else {
1091 				(void) snprintf(c, sizeof (c),
1092 				    "%s`%s", dt_basename(objname), name);
1093 			}
1094 		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1095 		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1096 		    (map->pr_mflags & MA_WRITE)))) {
1097 			/*
1098 			 * If the current string pointer in the string table
1099 			 * does not point to an empty string _and_ the program
1100 			 * counter falls in a writable region, we'll use the
1101 			 * string from the string table instead of the raw
1102 			 * address.  This last condition is necessary because
1103 			 * some (broken) ustack helpers will return a string
1104 			 * even for a program counter that they can't
1105 			 * identify.  If we have a string for a program
1106 			 * counter that falls in a segment that isn't
1107 			 * writable, we assume that we have fallen into this
1108 			 * case and we refuse to use the string.
1109 			 */
1110 			(void) snprintf(c, sizeof (c), "%s", str);
1111 		} else {
1112 			if (P != NULL && Pobjname(P, pc[i], objname,
1113 			    sizeof (objname)) != 0) {
1114 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
1115 				    dt_basename(objname), (u_longlong_t)pc[i]);
1116 			} else {
1117 				(void) snprintf(c, sizeof (c), "0x%llx",
1118 				    (u_longlong_t)pc[i]);
1119 			}
1120 		}
1121 
1122 		if ((err = dt_printf(dtp, fp, format, c)) < 0)
1123 			break;
1124 
1125 		if ((err = dt_printf(dtp, fp, "\n")) < 0)
1126 			break;
1127 
1128 		if (str != NULL && str[0] == '@') {
1129 			/*
1130 			 * If the first character of the string is an "at" sign,
1131 			 * then the string is inferred to be an annotation --
1132 			 * and it is printed out beneath the frame and offset
1133 			 * with brackets.
1134 			 */
1135 			if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1136 				break;
1137 
1138 			(void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1139 
1140 			if ((err = dt_printf(dtp, fp, format, c)) < 0)
1141 				break;
1142 
1143 			if ((err = dt_printf(dtp, fp, "\n")) < 0)
1144 				break;
1145 		}
1146 
1147 		if (str != NULL) {
1148 			str += strlen(str) + 1;
1149 			if (str - strbase >= strsize)
1150 				str = NULL;
1151 		}
1152 	}
1153 
1154 	if (P != NULL) {
1155 		dt_proc_unlock(dtp, P);
1156 		dt_proc_release(dtp, P);
1157 	}
1158 
1159 	return (err);
1160 }
1161 
1162 static int
1163 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1164 {
1165 	/* LINTED - alignment */
1166 	uint64_t pid = ((uint64_t *)addr)[0];
1167 	/* LINTED - alignment */
1168 	uint64_t pc = ((uint64_t *)addr)[1];
1169 	const char *format = "  %-50s";
1170 	char *s;
1171 	int n, len = 256;
1172 
1173 	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1174 		struct ps_prochandle *P;
1175 
1176 		if ((P = dt_proc_grab(dtp, pid,
1177 		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1178 			GElf_Sym sym;
1179 
1180 			dt_proc_lock(dtp, P);
1181 
1182 			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1183 				pc = sym.st_value;
1184 
1185 			dt_proc_unlock(dtp, P);
1186 			dt_proc_release(dtp, P);
1187 		}
1188 	}
1189 
1190 	do {
1191 		n = len;
1192 		s = alloca(n);
1193 	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1194 
1195 	return (dt_printf(dtp, fp, format, s));
1196 }
1197 
1198 int
1199 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1200 {
1201 	/* LINTED - alignment */
1202 	uint64_t pid = ((uint64_t *)addr)[0];
1203 	/* LINTED - alignment */
1204 	uint64_t pc = ((uint64_t *)addr)[1];
1205 	int err = 0;
1206 
1207 	char objname[PATH_MAX], c[PATH_MAX * 2];
1208 	struct ps_prochandle *P;
1209 
1210 	if (format == NULL)
1211 		format = "  %-50s";
1212 
1213 	/*
1214 	 * See the comment in dt_print_ustack() for the rationale for
1215 	 * printing raw addresses in the vectored case.
1216 	 */
1217 	if (dtp->dt_vector == NULL)
1218 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1219 	else
1220 		P = NULL;
1221 
1222 	if (P != NULL)
1223 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1224 
1225 	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
1226 		(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1227 	} else {
1228 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1229 	}
1230 
1231 	err = dt_printf(dtp, fp, format, c);
1232 
1233 	if (P != NULL) {
1234 		dt_proc_unlock(dtp, P);
1235 		dt_proc_release(dtp, P);
1236 	}
1237 
1238 	return (err);
1239 }
1240 
1241 int
1242 dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1243 {
1244 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1245 	size_t nbytes = *((uintptr_t *) addr);
1246 
1247 	return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
1248 	    nbytes, 50, quiet, 1));
1249 }
1250 
1251 typedef struct dt_type_cbdata {
1252 	dtrace_hdl_t		*dtp;
1253 	dtrace_typeinfo_t	dtt;
1254 	caddr_t			addr;
1255 	caddr_t			addrend;
1256 	const char		*name;
1257 	int			f_type;
1258 	int			indent;
1259 	int			type_width;
1260 	int			name_width;
1261 	FILE			*fp;
1262 } dt_type_cbdata_t;
1263 
1264 static int	dt_print_type_data(dt_type_cbdata_t *, ctf_id_t);
1265 
1266 static int
1267 dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg)
1268 {
1269 	dt_type_cbdata_t cbdata;
1270 	dt_type_cbdata_t *cbdatap = arg;
1271 	ssize_t ssz;
1272 
1273 	if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0)
1274 		return (0);
1275 
1276 	off /= 8;
1277 
1278 	cbdata = *cbdatap;
1279 	cbdata.name = name;
1280 	cbdata.addr += off;
1281 	cbdata.addrend = cbdata.addr + ssz;
1282 
1283 	return (dt_print_type_data(&cbdata, type));
1284 }
1285 
1286 static int
1287 dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg)
1288 {
1289 	char buf[DT_TYPE_NAMELEN];
1290 	char *p;
1291 	dt_type_cbdata_t *cbdatap = arg;
1292 	size_t sz = strlen(name);
1293 
1294 	ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1295 
1296 	if ((p = strchr(buf, '[')) != NULL)
1297 		p[-1] = '\0';
1298 	else
1299 		p = "";
1300 
1301 	sz += strlen(p);
1302 
1303 	if (sz > cbdatap->name_width)
1304 		cbdatap->name_width = sz;
1305 
1306 	sz = strlen(buf);
1307 
1308 	if (sz > cbdatap->type_width)
1309 		cbdatap->type_width = sz;
1310 
1311 	return (0);
1312 }
1313 
1314 static int
1315 dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type)
1316 {
1317 	caddr_t addr = cbdatap->addr;
1318 	caddr_t addrend = cbdatap->addrend;
1319 	char buf[DT_TYPE_NAMELEN];
1320 	char *p;
1321 	int cnt = 0;
1322 	uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type);
1323 	ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type);
1324 
1325 	ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1326 
1327 	if ((p = strchr(buf, '[')) != NULL)
1328 		p[-1] = '\0';
1329 	else
1330 		p = "";
1331 
1332 	if (cbdatap->f_type) {
1333 		int type_width = roundup(cbdatap->type_width + 1, 4);
1334 		int name_width = roundup(cbdatap->name_width + 1, 4);
1335 
1336 		name_width -= strlen(cbdatap->name);
1337 
1338 		dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s	= ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p);
1339 	}
1340 
1341 	while (addr < addrend) {
1342 		dt_type_cbdata_t cbdata;
1343 		ctf_arinfo_t arinfo;
1344 		ctf_encoding_t cte;
1345 		uintptr_t *up;
1346 		void *vp = addr;
1347 		cbdata = *cbdatap;
1348 		cbdata.name = "";
1349 		cbdata.addr = addr;
1350 		cbdata.addrend = addr + ssz;
1351 		cbdata.f_type = 0;
1352 		cbdata.indent++;
1353 		cbdata.type_width = 0;
1354 		cbdata.name_width = 0;
1355 
1356 		if (cnt > 0)
1357 			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,"");
1358 
1359 		switch (kind) {
1360 		case CTF_K_INTEGER:
1361 			if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0)
1362 				return (-1);
1363 			if ((cte.cte_format & CTF_INT_SIGNED) != 0)
1364 				switch (cte.cte_bits) {
1365 				case 8:
1366 					if (isprint(*((char *) vp)))
1367 						dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp));
1368 					dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp));
1369 					break;
1370 				case 16:
1371 					dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp));
1372 					break;
1373 				case 32:
1374 					dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp));
1375 					break;
1376 				case 64:
1377 					dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp));
1378 					break;
1379 				default:
1380 					dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1381 					break;
1382 				}
1383 			else
1384 				switch (cte.cte_bits) {
1385 				case 8:
1386 					dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff);
1387 					break;
1388 				case 16:
1389 					dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp));
1390 					break;
1391 				case 32:
1392 					dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp));
1393 					break;
1394 				case 64:
1395 					dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp));
1396 					break;
1397 				default:
1398 					dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1399 					break;
1400 				}
1401 			break;
1402 		case CTF_K_FLOAT:
1403 			dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1404 			break;
1405 		case CTF_K_POINTER:
1406 			dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr));
1407 			break;
1408 		case CTF_K_ARRAY:
1409 			if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0)
1410 				return (-1);
1411 			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,"");
1412 			dt_print_type_data(&cbdata, arinfo.ctr_contents);
1413 			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1414 			break;
1415 		case CTF_K_FUNCTION:
1416 			dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n");
1417 			break;
1418 		case CTF_K_STRUCT:
1419 			cbdata.f_type = 1;
1420 			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1421 			    dt_print_type_width, &cbdata) != 0)
1422 				return (-1);
1423 			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1424 			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1425 			    dt_print_type_member, &cbdata) != 0)
1426 				return (-1);
1427 			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1428 			break;
1429 		case CTF_K_UNION:
1430 			cbdata.f_type = 1;
1431 			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1432 			    dt_print_type_width, &cbdata) != 0)
1433 				return (-1);
1434 			dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1435 			if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1436 			    dt_print_type_member, &cbdata) != 0)
1437 				return (-1);
1438 			dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1439 			break;
1440 		case CTF_K_ENUM:
1441 			dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp)));
1442 			break;
1443 		case CTF_K_TYPEDEF:
1444 			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1445 			break;
1446 		case CTF_K_VOLATILE:
1447 			if (cbdatap->f_type)
1448 				dt_printf(cbdatap->dtp, cbdatap->fp, "volatile ");
1449 			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1450 			break;
1451 		case CTF_K_CONST:
1452 			if (cbdatap->f_type)
1453 				dt_printf(cbdatap->dtp, cbdatap->fp, "const ");
1454 			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1455 			break;
1456 		case CTF_K_RESTRICT:
1457 			if (cbdatap->f_type)
1458 				dt_printf(cbdatap->dtp, cbdatap->fp, "restrict ");
1459 			dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1460 			break;
1461 		default:
1462 			break;
1463 		}
1464 
1465 		addr += ssz;
1466 		cnt++;
1467 	}
1468 
1469 	return (0);
1470 }
1471 
1472 static int
1473 dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1474 {
1475 	caddr_t addrend;
1476 	char *p;
1477 	dtrace_typeinfo_t dtt;
1478 	dt_type_cbdata_t cbdata;
1479 	int num = 0;
1480 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1481 	ssize_t ssz;
1482 
1483 	if (!quiet)
1484 		dt_printf(dtp, fp, "\n");
1485 
1486 	/* Get the total number of bytes of data buffered. */
1487 	size_t nbytes = *((uintptr_t *) addr);
1488 	addr += sizeof(uintptr_t);
1489 
1490 	/*
1491 	 * Get the size of the type so that we can check that it matches
1492 	 * the CTF data we look up and so that we can figure out how many
1493 	 * type elements are buffered.
1494 	 */
1495 	size_t typs = *((uintptr_t *) addr);
1496 	addr += sizeof(uintptr_t);
1497 
1498 	/*
1499 	 * Point to the type string in the buffer. Get it's string
1500 	 * length and round it up to become the offset to the start
1501 	 * of the buffered type data which we would like to be aligned
1502 	 * for easy access.
1503 	 */
1504 	char *strp = (char *) addr;
1505 	int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t));
1506 
1507 	/*
1508 	 * The type string might have a format such as 'int [20]'.
1509 	 * Check if there is an array dimension present.
1510 	 */
1511 	if ((p = strchr(strp, '[')) != NULL) {
1512 		/* Strip off the array dimension. */
1513 		*p++ = '\0';
1514 
1515 		for (; *p != '\0' && *p != ']'; p++)
1516 			num = num * 10 + *p - '0';
1517 	} else
1518 		/* No array dimension, so default. */
1519 		num = 1;
1520 
1521 	/* Lookup the CTF type from the type string. */
1522 	if (dtrace_lookup_by_type(dtp,  DTRACE_OBJ_EVERY, strp, &dtt) < 0)
1523 		return (-1);
1524 
1525 	/* Offset the buffer address to the start of the data... */
1526 	addr += offset;
1527 
1528 	ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type);
1529 
1530 	if (typs != ssz) {
1531 		printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz);
1532 		return (-1);
1533 	}
1534 
1535 	cbdata.dtp = dtp;
1536 	cbdata.dtt = dtt;
1537 	cbdata.name = "";
1538 	cbdata.addr = addr;
1539 	cbdata.addrend = addr + nbytes;
1540 	cbdata.indent = 1;
1541 	cbdata.f_type = 1;
1542 	cbdata.type_width = 0;
1543 	cbdata.name_width = 0;
1544 	cbdata.fp = fp;
1545 
1546 	return (dt_print_type_data(&cbdata, dtt.dtt_type));
1547 }
1548 
1549 static int
1550 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1551 {
1552 	/* LINTED - alignment */
1553 	uint64_t pc = *((uint64_t *)addr);
1554 	dtrace_syminfo_t dts;
1555 	GElf_Sym sym;
1556 	char c[PATH_MAX * 2];
1557 
1558 	if (format == NULL)
1559 		format = "  %-50s";
1560 
1561 	if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1562 		(void) snprintf(c, sizeof (c), "%s`%s",
1563 		    dts.dts_object, dts.dts_name);
1564 	} else {
1565 		/*
1566 		 * We'll repeat the lookup, but this time we'll specify a
1567 		 * NULL GElf_Sym -- indicating that we're only interested in
1568 		 * the containing module.
1569 		 */
1570 		if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1571 			(void) snprintf(c, sizeof (c), "%s`0x%llx",
1572 			    dts.dts_object, (u_longlong_t)pc);
1573 		} else {
1574 			(void) snprintf(c, sizeof (c), "0x%llx",
1575 			    (u_longlong_t)pc);
1576 		}
1577 	}
1578 
1579 	if (dt_printf(dtp, fp, format, c) < 0)
1580 		return (-1);
1581 
1582 	return (0);
1583 }
1584 
1585 int
1586 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1587 {
1588 	/* LINTED - alignment */
1589 	uint64_t pc = *((uint64_t *)addr);
1590 	dtrace_syminfo_t dts;
1591 	char c[PATH_MAX * 2];
1592 
1593 	if (format == NULL)
1594 		format = "  %-50s";
1595 
1596 	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1597 		(void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1598 	} else {
1599 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1600 	}
1601 
1602 	if (dt_printf(dtp, fp, format, c) < 0)
1603 		return (-1);
1604 
1605 	return (0);
1606 }
1607 
1608 typedef struct dt_normal {
1609 	dtrace_aggvarid_t dtnd_id;
1610 	uint64_t dtnd_normal;
1611 } dt_normal_t;
1612 
1613 static int
1614 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1615 {
1616 	dt_normal_t *normal = arg;
1617 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1618 	dtrace_aggvarid_t id = normal->dtnd_id;
1619 
1620 	if (agg->dtagd_nrecs == 0)
1621 		return (DTRACE_AGGWALK_NEXT);
1622 
1623 	if (agg->dtagd_varid != id)
1624 		return (DTRACE_AGGWALK_NEXT);
1625 
1626 	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1627 	return (DTRACE_AGGWALK_NORMALIZE);
1628 }
1629 
1630 static int
1631 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1632 {
1633 	dt_normal_t normal;
1634 	caddr_t addr;
1635 
1636 	/*
1637 	 * We (should) have two records:  the aggregation ID followed by the
1638 	 * normalization value.
1639 	 */
1640 	addr = base + rec->dtrd_offset;
1641 
1642 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1643 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1644 
1645 	/* LINTED - alignment */
1646 	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1647 	rec++;
1648 
1649 	if (rec->dtrd_action != DTRACEACT_LIBACT)
1650 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1651 
1652 	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1653 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1654 
1655 	addr = base + rec->dtrd_offset;
1656 
1657 	switch (rec->dtrd_size) {
1658 	case sizeof (uint64_t):
1659 		/* LINTED - alignment */
1660 		normal.dtnd_normal = *((uint64_t *)addr);
1661 		break;
1662 	case sizeof (uint32_t):
1663 		/* LINTED - alignment */
1664 		normal.dtnd_normal = *((uint32_t *)addr);
1665 		break;
1666 	case sizeof (uint16_t):
1667 		/* LINTED - alignment */
1668 		normal.dtnd_normal = *((uint16_t *)addr);
1669 		break;
1670 	case sizeof (uint8_t):
1671 		normal.dtnd_normal = *((uint8_t *)addr);
1672 		break;
1673 	default:
1674 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1675 	}
1676 
1677 	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1678 
1679 	return (0);
1680 }
1681 
1682 static int
1683 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1684 {
1685 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1686 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1687 
1688 	if (agg->dtagd_nrecs == 0)
1689 		return (DTRACE_AGGWALK_NEXT);
1690 
1691 	if (agg->dtagd_varid != id)
1692 		return (DTRACE_AGGWALK_NEXT);
1693 
1694 	return (DTRACE_AGGWALK_DENORMALIZE);
1695 }
1696 
1697 static int
1698 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1699 {
1700 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1701 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1702 
1703 	if (agg->dtagd_nrecs == 0)
1704 		return (DTRACE_AGGWALK_NEXT);
1705 
1706 	if (agg->dtagd_varid != id)
1707 		return (DTRACE_AGGWALK_NEXT);
1708 
1709 	return (DTRACE_AGGWALK_CLEAR);
1710 }
1711 
1712 typedef struct dt_trunc {
1713 	dtrace_aggvarid_t dttd_id;
1714 	uint64_t dttd_remaining;
1715 } dt_trunc_t;
1716 
1717 static int
1718 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1719 {
1720 	dt_trunc_t *trunc = arg;
1721 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1722 	dtrace_aggvarid_t id = trunc->dttd_id;
1723 
1724 	if (agg->dtagd_nrecs == 0)
1725 		return (DTRACE_AGGWALK_NEXT);
1726 
1727 	if (agg->dtagd_varid != id)
1728 		return (DTRACE_AGGWALK_NEXT);
1729 
1730 	if (trunc->dttd_remaining == 0)
1731 		return (DTRACE_AGGWALK_REMOVE);
1732 
1733 	trunc->dttd_remaining--;
1734 	return (DTRACE_AGGWALK_NEXT);
1735 }
1736 
1737 static int
1738 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1739 {
1740 	dt_trunc_t trunc;
1741 	caddr_t addr;
1742 	int64_t remaining;
1743 	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1744 
1745 	/*
1746 	 * We (should) have two records:  the aggregation ID followed by the
1747 	 * number of aggregation entries after which the aggregation is to be
1748 	 * truncated.
1749 	 */
1750 	addr = base + rec->dtrd_offset;
1751 
1752 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1753 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1754 
1755 	/* LINTED - alignment */
1756 	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1757 	rec++;
1758 
1759 	if (rec->dtrd_action != DTRACEACT_LIBACT)
1760 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1761 
1762 	if (rec->dtrd_arg != DT_ACT_TRUNC)
1763 		return (dt_set_errno(dtp, EDT_BADTRUNC));
1764 
1765 	addr = base + rec->dtrd_offset;
1766 
1767 	switch (rec->dtrd_size) {
1768 	case sizeof (uint64_t):
1769 		/* LINTED - alignment */
1770 		remaining = *((int64_t *)addr);
1771 		break;
1772 	case sizeof (uint32_t):
1773 		/* LINTED - alignment */
1774 		remaining = *((int32_t *)addr);
1775 		break;
1776 	case sizeof (uint16_t):
1777 		/* LINTED - alignment */
1778 		remaining = *((int16_t *)addr);
1779 		break;
1780 	case sizeof (uint8_t):
1781 		remaining = *((int8_t *)addr);
1782 		break;
1783 	default:
1784 		return (dt_set_errno(dtp, EDT_BADNORMAL));
1785 	}
1786 
1787 	if (remaining < 0) {
1788 		func = dtrace_aggregate_walk_valsorted;
1789 		remaining = -remaining;
1790 	} else {
1791 		func = dtrace_aggregate_walk_valrevsorted;
1792 	}
1793 
1794 	assert(remaining >= 0);
1795 	trunc.dttd_remaining = remaining;
1796 
1797 	(void) func(dtp, dt_trunc_agg, &trunc);
1798 
1799 	return (0);
1800 }
1801 
1802 static int
1803 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1804     caddr_t addr, size_t size, uint64_t normal)
1805 {
1806 	int err;
1807 	dtrace_actkind_t act = rec->dtrd_action;
1808 
1809 	switch (act) {
1810 	case DTRACEACT_STACK:
1811 		return (dt_print_stack(dtp, fp, NULL, addr,
1812 		    rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1813 
1814 	case DTRACEACT_USTACK:
1815 	case DTRACEACT_JSTACK:
1816 		return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1817 
1818 	case DTRACEACT_USYM:
1819 	case DTRACEACT_UADDR:
1820 		return (dt_print_usym(dtp, fp, addr, act));
1821 
1822 	case DTRACEACT_UMOD:
1823 		return (dt_print_umod(dtp, fp, NULL, addr));
1824 
1825 	case DTRACEACT_SYM:
1826 		return (dt_print_sym(dtp, fp, NULL, addr));
1827 
1828 	case DTRACEACT_MOD:
1829 		return (dt_print_mod(dtp, fp, NULL, addr));
1830 
1831 	case DTRACEAGG_QUANTIZE:
1832 		return (dt_print_quantize(dtp, fp, addr, size, normal));
1833 
1834 	case DTRACEAGG_LQUANTIZE:
1835 		return (dt_print_lquantize(dtp, fp, addr, size, normal));
1836 
1837 	case DTRACEAGG_LLQUANTIZE:
1838 		return (dt_print_llquantize(dtp, fp, addr, size, normal));
1839 
1840 	case DTRACEAGG_AVG:
1841 		return (dt_print_average(dtp, fp, addr, size, normal));
1842 
1843 	case DTRACEAGG_STDDEV:
1844 		return (dt_print_stddev(dtp, fp, addr, size, normal));
1845 
1846 	default:
1847 		break;
1848 	}
1849 
1850 	switch (size) {
1851 	case sizeof (uint64_t):
1852 		err = dt_printf(dtp, fp, " %16lld",
1853 		    /* LINTED - alignment */
1854 		    (long long)*((uint64_t *)addr) / normal);
1855 		break;
1856 	case sizeof (uint32_t):
1857 		/* LINTED - alignment */
1858 		err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1859 		    (uint32_t)normal);
1860 		break;
1861 	case sizeof (uint16_t):
1862 		/* LINTED - alignment */
1863 		err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1864 		    (uint32_t)normal);
1865 		break;
1866 	case sizeof (uint8_t):
1867 		err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1868 		    (uint32_t)normal);
1869 		break;
1870 	default:
1871 		err = dt_print_bytes(dtp, fp, addr, size, 50, 0, 0);
1872 		break;
1873 	}
1874 
1875 	return (err);
1876 }
1877 
1878 int
1879 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1880 {
1881 	int i, aggact = 0;
1882 	dt_print_aggdata_t *pd = arg;
1883 	const dtrace_aggdata_t *aggdata = aggsdata[0];
1884 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1885 	FILE *fp = pd->dtpa_fp;
1886 	dtrace_hdl_t *dtp = pd->dtpa_dtp;
1887 	dtrace_recdesc_t *rec;
1888 	dtrace_actkind_t act;
1889 	caddr_t addr;
1890 	size_t size;
1891 
1892 	/*
1893 	 * Iterate over each record description in the key, printing the traced
1894 	 * data, skipping the first datum (the tuple member created by the
1895 	 * compiler).
1896 	 */
1897 	for (i = 1; i < agg->dtagd_nrecs; i++) {
1898 		rec = &agg->dtagd_rec[i];
1899 		act = rec->dtrd_action;
1900 		addr = aggdata->dtada_data + rec->dtrd_offset;
1901 		size = rec->dtrd_size;
1902 
1903 		if (DTRACEACT_ISAGG(act)) {
1904 			aggact = i;
1905 			break;
1906 		}
1907 
1908 		if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1909 			return (-1);
1910 
1911 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1912 		    DTRACE_BUFDATA_AGGKEY) < 0)
1913 			return (-1);
1914 	}
1915 
1916 	assert(aggact != 0);
1917 
1918 	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1919 		uint64_t normal;
1920 
1921 		aggdata = aggsdata[i];
1922 		agg = aggdata->dtada_desc;
1923 		rec = &agg->dtagd_rec[aggact];
1924 		act = rec->dtrd_action;
1925 		addr = aggdata->dtada_data + rec->dtrd_offset;
1926 		size = rec->dtrd_size;
1927 
1928 		assert(DTRACEACT_ISAGG(act));
1929 		normal = aggdata->dtada_normal;
1930 
1931 		if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1932 			return (-1);
1933 
1934 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1935 		    DTRACE_BUFDATA_AGGVAL) < 0)
1936 			return (-1);
1937 
1938 		if (!pd->dtpa_allunprint)
1939 			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1940 	}
1941 
1942 	if (dt_printf(dtp, fp, "\n") < 0)
1943 		return (-1);
1944 
1945 	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1946 	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1947 		return (-1);
1948 
1949 	return (0);
1950 }
1951 
1952 int
1953 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1954 {
1955 	dt_print_aggdata_t *pd = arg;
1956 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1957 	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1958 
1959 	if (pd->dtpa_allunprint) {
1960 		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1961 			return (0);
1962 	} else {
1963 		/*
1964 		 * If we're not printing all unprinted aggregations, then the
1965 		 * aggregation variable ID denotes a specific aggregation
1966 		 * variable that we should print -- skip any other aggregations
1967 		 * that we encounter.
1968 		 */
1969 		if (agg->dtagd_nrecs == 0)
1970 			return (0);
1971 
1972 		if (aggvarid != agg->dtagd_varid)
1973 			return (0);
1974 	}
1975 
1976 	return (dt_print_aggs(&aggdata, 1, arg));
1977 }
1978 
1979 int
1980 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1981     const char *option, const char *value)
1982 {
1983 	int len, rval;
1984 	char *msg;
1985 	const char *errstr;
1986 	dtrace_setoptdata_t optdata;
1987 
1988 	bzero(&optdata, sizeof (optdata));
1989 	(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1990 
1991 	if (dtrace_setopt(dtp, option, value) == 0) {
1992 		(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1993 		optdata.dtsda_probe = data;
1994 		optdata.dtsda_option = option;
1995 		optdata.dtsda_handle = dtp;
1996 
1997 		if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1998 			return (rval);
1999 
2000 		return (0);
2001 	}
2002 
2003 	errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
2004 	len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2005 	msg = alloca(len);
2006 
2007 	(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2008 	    option, value, errstr);
2009 
2010 	if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2011 		return (0);
2012 
2013 	return (rval);
2014 }
2015 
2016 static int
2017 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
2018     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
2019 {
2020 	dtrace_epid_t id;
2021 	size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
2022 	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
2023 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2024 	int rval, i, n;
2025 	dtrace_epid_t last = DTRACE_EPIDNONE;
2026 	uint64_t tracememsize = 0;
2027 	dtrace_probedata_t data;
2028 	uint64_t drops;
2029 	caddr_t addr;
2030 
2031 	bzero(&data, sizeof (data));
2032 	data.dtpda_handle = dtp;
2033 	data.dtpda_cpu = cpu;
2034 
2035 again:
2036 	for (offs = start; offs < end; ) {
2037 		dtrace_eprobedesc_t *epd;
2038 
2039 		/*
2040 		 * We're guaranteed to have an ID.
2041 		 */
2042 		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
2043 
2044 		if (id == DTRACE_EPIDNONE) {
2045 			/*
2046 			 * This is filler to assure proper alignment of the
2047 			 * next record; we simply ignore it.
2048 			 */
2049 			offs += sizeof (id);
2050 			continue;
2051 		}
2052 
2053 		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
2054 		    &data.dtpda_pdesc)) != 0)
2055 			return (rval);
2056 
2057 		epd = data.dtpda_edesc;
2058 		data.dtpda_data = buf->dtbd_data + offs;
2059 
2060 		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
2061 			rval = dt_handle(dtp, &data);
2062 
2063 			if (rval == DTRACE_CONSUME_NEXT)
2064 				goto nextepid;
2065 
2066 			if (rval == DTRACE_CONSUME_ERROR)
2067 				return (-1);
2068 		}
2069 
2070 		if (flow)
2071 			(void) dt_flowindent(dtp, &data, last, buf, offs);
2072 
2073 		rval = (*efunc)(&data, arg);
2074 
2075 		if (flow) {
2076 			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
2077 				data.dtpda_indent += 2;
2078 		}
2079 
2080 		if (rval == DTRACE_CONSUME_NEXT)
2081 			goto nextepid;
2082 
2083 		if (rval == DTRACE_CONSUME_ABORT)
2084 			return (dt_set_errno(dtp, EDT_DIRABORT));
2085 
2086 		if (rval != DTRACE_CONSUME_THIS)
2087 			return (dt_set_errno(dtp, EDT_BADRVAL));
2088 
2089 		for (i = 0; i < epd->dtepd_nrecs; i++) {
2090 			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
2091 			dtrace_actkind_t act = rec->dtrd_action;
2092 
2093 			data.dtpda_data = buf->dtbd_data + offs +
2094 			    rec->dtrd_offset;
2095 			addr = data.dtpda_data;
2096 
2097 			if (act == DTRACEACT_LIBACT) {
2098 				uint64_t arg = rec->dtrd_arg;
2099 				dtrace_aggvarid_t id;
2100 
2101 				switch (arg) {
2102 				case DT_ACT_CLEAR:
2103 					/* LINTED - alignment */
2104 					id = *((dtrace_aggvarid_t *)addr);
2105 					(void) dtrace_aggregate_walk(dtp,
2106 					    dt_clear_agg, &id);
2107 					continue;
2108 
2109 				case DT_ACT_DENORMALIZE:
2110 					/* LINTED - alignment */
2111 					id = *((dtrace_aggvarid_t *)addr);
2112 					(void) dtrace_aggregate_walk(dtp,
2113 					    dt_denormalize_agg, &id);
2114 					continue;
2115 
2116 				case DT_ACT_FTRUNCATE:
2117 					if (fp == NULL)
2118 						continue;
2119 
2120 					(void) fflush(fp);
2121 					(void) ftruncate(fileno(fp), 0);
2122 					(void) fseeko(fp, 0, SEEK_SET);
2123 					continue;
2124 
2125 				case DT_ACT_NORMALIZE:
2126 					if (i == epd->dtepd_nrecs - 1)
2127 						return (dt_set_errno(dtp,
2128 						    EDT_BADNORMAL));
2129 
2130 					if (dt_normalize(dtp,
2131 					    buf->dtbd_data + offs, rec) != 0)
2132 						return (-1);
2133 
2134 					i++;
2135 					continue;
2136 
2137 				case DT_ACT_SETOPT: {
2138 					uint64_t *opts = dtp->dt_options;
2139 					dtrace_recdesc_t *valrec;
2140 					uint32_t valsize;
2141 					caddr_t val;
2142 					int rv;
2143 
2144 					if (i == epd->dtepd_nrecs - 1) {
2145 						return (dt_set_errno(dtp,
2146 						    EDT_BADSETOPT));
2147 					}
2148 
2149 					valrec = &epd->dtepd_rec[++i];
2150 					valsize = valrec->dtrd_size;
2151 
2152 					if (valrec->dtrd_action != act ||
2153 					    valrec->dtrd_arg != arg) {
2154 						return (dt_set_errno(dtp,
2155 						    EDT_BADSETOPT));
2156 					}
2157 
2158 					if (valsize > sizeof (uint64_t)) {
2159 						val = buf->dtbd_data + offs +
2160 						    valrec->dtrd_offset;
2161 					} else {
2162 						val = "1";
2163 					}
2164 
2165 					rv = dt_setopt(dtp, &data, addr, val);
2166 
2167 					if (rv != 0)
2168 						return (-1);
2169 
2170 					flow = (opts[DTRACEOPT_FLOWINDENT] !=
2171 					    DTRACEOPT_UNSET);
2172 					quiet = (opts[DTRACEOPT_QUIET] !=
2173 					    DTRACEOPT_UNSET);
2174 
2175 					continue;
2176 				}
2177 
2178 				case DT_ACT_TRUNC:
2179 					if (i == epd->dtepd_nrecs - 1)
2180 						return (dt_set_errno(dtp,
2181 						    EDT_BADTRUNC));
2182 
2183 					if (dt_trunc(dtp,
2184 					    buf->dtbd_data + offs, rec) != 0)
2185 						return (-1);
2186 
2187 					i++;
2188 					continue;
2189 
2190 				default:
2191 					continue;
2192 				}
2193 			}
2194 
2195 			if (act == DTRACEACT_TRACEMEM_DYNSIZE &&
2196 			    rec->dtrd_size == sizeof (uint64_t)) {
2197 			    	/* LINTED - alignment */
2198 				tracememsize = *((unsigned long long *)addr);
2199 				continue;
2200 			}
2201 
2202 			rval = (*rfunc)(&data, rec, arg);
2203 
2204 			if (rval == DTRACE_CONSUME_NEXT)
2205 				continue;
2206 
2207 			if (rval == DTRACE_CONSUME_ABORT)
2208 				return (dt_set_errno(dtp, EDT_DIRABORT));
2209 
2210 			if (rval != DTRACE_CONSUME_THIS)
2211 				return (dt_set_errno(dtp, EDT_BADRVAL));
2212 
2213 			if (act == DTRACEACT_STACK) {
2214 				int depth = rec->dtrd_arg;
2215 
2216 				if (dt_print_stack(dtp, fp, NULL, addr, depth,
2217 				    rec->dtrd_size / depth) < 0)
2218 					return (-1);
2219 				goto nextrec;
2220 			}
2221 
2222 			if (act == DTRACEACT_USTACK ||
2223 			    act == DTRACEACT_JSTACK) {
2224 				if (dt_print_ustack(dtp, fp, NULL,
2225 				    addr, rec->dtrd_arg) < 0)
2226 					return (-1);
2227 				goto nextrec;
2228 			}
2229 
2230 			if (act == DTRACEACT_SYM) {
2231 				if (dt_print_sym(dtp, fp, NULL, addr) < 0)
2232 					return (-1);
2233 				goto nextrec;
2234 			}
2235 
2236 			if (act == DTRACEACT_MOD) {
2237 				if (dt_print_mod(dtp, fp, NULL, addr) < 0)
2238 					return (-1);
2239 				goto nextrec;
2240 			}
2241 
2242 			if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
2243 				if (dt_print_usym(dtp, fp, addr, act) < 0)
2244 					return (-1);
2245 				goto nextrec;
2246 			}
2247 
2248 			if (act == DTRACEACT_UMOD) {
2249 				if (dt_print_umod(dtp, fp, NULL, addr) < 0)
2250 					return (-1);
2251 				goto nextrec;
2252 			}
2253 
2254 			if (act == DTRACEACT_PRINTM) {
2255 				if (dt_print_memory(dtp, fp, addr) < 0)
2256 					return (-1);
2257 				goto nextrec;
2258 			}
2259 
2260 			if (act == DTRACEACT_PRINTT) {
2261 				if (dt_print_type(dtp, fp, addr) < 0)
2262 					return (-1);
2263 				goto nextrec;
2264 			}
2265 
2266 			if (DTRACEACT_ISPRINTFLIKE(act)) {
2267 				void *fmtdata;
2268 				int (*func)(dtrace_hdl_t *, FILE *, void *,
2269 				    const dtrace_probedata_t *,
2270 				    const dtrace_recdesc_t *, uint_t,
2271 				    const void *buf, size_t);
2272 
2273 				if ((fmtdata = dt_format_lookup(dtp,
2274 				    rec->dtrd_format)) == NULL)
2275 					goto nofmt;
2276 
2277 				switch (act) {
2278 				case DTRACEACT_PRINTF:
2279 					func = dtrace_fprintf;
2280 					break;
2281 				case DTRACEACT_PRINTA:
2282 					func = dtrace_fprinta;
2283 					break;
2284 				case DTRACEACT_SYSTEM:
2285 					func = dtrace_system;
2286 					break;
2287 				case DTRACEACT_FREOPEN:
2288 					func = dtrace_freopen;
2289 					break;
2290 				}
2291 
2292 				n = (*func)(dtp, fp, fmtdata, &data,
2293 				    rec, epd->dtepd_nrecs - i,
2294 				    (uchar_t *)buf->dtbd_data + offs,
2295 				    buf->dtbd_size - offs);
2296 
2297 				if (n < 0)
2298 					return (-1); /* errno is set for us */
2299 
2300 				if (n > 0)
2301 					i += n - 1;
2302 				goto nextrec;
2303 			}
2304 
2305 			/*
2306 			 * If this is a DIF expression, and the record has a
2307 			 * format set, this indicates we have a CTF type name
2308 			 * associated with the data and we should try to print
2309 			 * it out by type.
2310 			 */
2311 			if (act == DTRACEACT_DIFEXPR) {
2312 				const char *strdata = dt_strdata_lookup(dtp,
2313 				    rec->dtrd_format);
2314 				if (strdata != NULL) {
2315 					n = dtrace_print(dtp, fp, strdata,
2316 					    addr, rec->dtrd_size);
2317 
2318 					/*
2319 					 * dtrace_print() will return -1 on
2320 					 * error, or return the number of bytes
2321 					 * consumed.  It will return 0 if the
2322 					 * type couldn't be determined, and we
2323 					 * should fall through to the normal
2324 					 * trace method.
2325 					 */
2326 					if (n < 0)
2327 						return (-1);
2328 
2329 					if (n > 0)
2330 						goto nextrec;
2331 				}
2332 			}
2333 
2334 nofmt:
2335 			if (act == DTRACEACT_PRINTA) {
2336 				dt_print_aggdata_t pd;
2337 				dtrace_aggvarid_t *aggvars;
2338 				int j, naggvars = 0;
2339 				size_t size = ((epd->dtepd_nrecs - i) *
2340 				    sizeof (dtrace_aggvarid_t));
2341 
2342 				if ((aggvars = dt_alloc(dtp, size)) == NULL)
2343 					return (-1);
2344 
2345 				/*
2346 				 * This might be a printa() with multiple
2347 				 * aggregation variables.  We need to scan
2348 				 * forward through the records until we find
2349 				 * a record from a different statement.
2350 				 */
2351 				for (j = i; j < epd->dtepd_nrecs; j++) {
2352 					dtrace_recdesc_t *nrec;
2353 					caddr_t naddr;
2354 
2355 					nrec = &epd->dtepd_rec[j];
2356 
2357 					if (nrec->dtrd_uarg != rec->dtrd_uarg)
2358 						break;
2359 
2360 					if (nrec->dtrd_action != act) {
2361 						return (dt_set_errno(dtp,
2362 						    EDT_BADAGG));
2363 					}
2364 
2365 					naddr = buf->dtbd_data + offs +
2366 					    nrec->dtrd_offset;
2367 
2368 					aggvars[naggvars++] =
2369 					    /* LINTED - alignment */
2370 					    *((dtrace_aggvarid_t *)naddr);
2371 				}
2372 
2373 				i = j - 1;
2374 				bzero(&pd, sizeof (pd));
2375 				pd.dtpa_dtp = dtp;
2376 				pd.dtpa_fp = fp;
2377 
2378 				assert(naggvars >= 1);
2379 
2380 				if (naggvars == 1) {
2381 					pd.dtpa_id = aggvars[0];
2382 					dt_free(dtp, aggvars);
2383 
2384 					if (dt_printf(dtp, fp, "\n") < 0 ||
2385 					    dtrace_aggregate_walk_sorted(dtp,
2386 					    dt_print_agg, &pd) < 0)
2387 						return (-1);
2388 					goto nextrec;
2389 				}
2390 
2391 				if (dt_printf(dtp, fp, "\n") < 0 ||
2392 				    dtrace_aggregate_walk_joined(dtp, aggvars,
2393 				    naggvars, dt_print_aggs, &pd) < 0) {
2394 					dt_free(dtp, aggvars);
2395 					return (-1);
2396 				}
2397 
2398 				dt_free(dtp, aggvars);
2399 				goto nextrec;
2400 			}
2401 
2402 			if (act == DTRACEACT_TRACEMEM) {
2403 				if (tracememsize == 0 ||
2404 				    tracememsize > rec->dtrd_size) {
2405 					tracememsize = rec->dtrd_size;
2406 				}
2407 
2408 				n = dt_print_bytes(dtp, fp, addr,
2409 				    tracememsize, 33, quiet, 1);
2410 
2411 				tracememsize = 0;
2412 
2413 				if (n < 0)
2414 					return (-1);
2415 
2416 				goto nextrec;
2417 			}
2418 
2419 			switch (rec->dtrd_size) {
2420 			case sizeof (uint64_t):
2421 				n = dt_printf(dtp, fp,
2422 				    quiet ? "%lld" : " %16lld",
2423 				    /* LINTED - alignment */
2424 				    *((unsigned long long *)addr));
2425 				break;
2426 			case sizeof (uint32_t):
2427 				n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2428 				    /* LINTED - alignment */
2429 				    *((uint32_t *)addr));
2430 				break;
2431 			case sizeof (uint16_t):
2432 				n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2433 				    /* LINTED - alignment */
2434 				    *((uint16_t *)addr));
2435 				break;
2436 			case sizeof (uint8_t):
2437 				n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2438 				    *((uint8_t *)addr));
2439 				break;
2440 			default:
2441 				n = dt_print_bytes(dtp, fp, addr,
2442 				    rec->dtrd_size, 33, quiet, 0);
2443 				break;
2444 			}
2445 
2446 			if (n < 0)
2447 				return (-1); /* errno is set for us */
2448 
2449 nextrec:
2450 			if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2451 				return (-1); /* errno is set for us */
2452 		}
2453 
2454 		/*
2455 		 * Call the record callback with a NULL record to indicate
2456 		 * that we're done processing this EPID.
2457 		 */
2458 		rval = (*rfunc)(&data, NULL, arg);
2459 nextepid:
2460 		offs += epd->dtepd_size;
2461 		last = id;
2462 	}
2463 
2464 	if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
2465 		end = buf->dtbd_oldest;
2466 		start = 0;
2467 		goto again;
2468 	}
2469 
2470 	if ((drops = buf->dtbd_drops) == 0)
2471 		return (0);
2472 
2473 	/*
2474 	 * Explicitly zero the drops to prevent us from processing them again.
2475 	 */
2476 	buf->dtbd_drops = 0;
2477 
2478 	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2479 }
2480 
2481 typedef struct dt_begin {
2482 	dtrace_consume_probe_f *dtbgn_probefunc;
2483 	dtrace_consume_rec_f *dtbgn_recfunc;
2484 	void *dtbgn_arg;
2485 	dtrace_handle_err_f *dtbgn_errhdlr;
2486 	void *dtbgn_errarg;
2487 	int dtbgn_beginonly;
2488 } dt_begin_t;
2489 
2490 static int
2491 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
2492 {
2493 	dt_begin_t *begin = (dt_begin_t *)arg;
2494 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
2495 
2496 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2497 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2498 
2499 	if (begin->dtbgn_beginonly) {
2500 		if (!(r1 && r2))
2501 			return (DTRACE_CONSUME_NEXT);
2502 	} else {
2503 		if (r1 && r2)
2504 			return (DTRACE_CONSUME_NEXT);
2505 	}
2506 
2507 	/*
2508 	 * We have a record that we're interested in.  Now call the underlying
2509 	 * probe function...
2510 	 */
2511 	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2512 }
2513 
2514 static int
2515 dt_consume_begin_record(const dtrace_probedata_t *data,
2516     const dtrace_recdesc_t *rec, void *arg)
2517 {
2518 	dt_begin_t *begin = (dt_begin_t *)arg;
2519 
2520 	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2521 }
2522 
2523 static int
2524 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2525 {
2526 	dt_begin_t *begin = (dt_begin_t *)arg;
2527 	dtrace_probedesc_t *pd = data->dteda_pdesc;
2528 
2529 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2530 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2531 
2532 	if (begin->dtbgn_beginonly) {
2533 		if (!(r1 && r2))
2534 			return (DTRACE_HANDLE_OK);
2535 	} else {
2536 		if (r1 && r2)
2537 			return (DTRACE_HANDLE_OK);
2538 	}
2539 
2540 	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2541 }
2542 
2543 static int
2544 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2545     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2546 {
2547 	/*
2548 	 * There's this idea that the BEGIN probe should be processed before
2549 	 * everything else, and that the END probe should be processed after
2550 	 * anything else.  In the common case, this is pretty easy to deal
2551 	 * with.  However, a situation may arise where the BEGIN enabling and
2552 	 * END enabling are on the same CPU, and some enabling in the middle
2553 	 * occurred on a different CPU.  To deal with this (blech!) we need to
2554 	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
2555 	 * then set it aside.  We will then process every other CPU, and then
2556 	 * we'll return to the BEGIN CPU and process the rest of the data
2557 	 * (which will inevitably include the END probe, if any).  Making this
2558 	 * even more complicated (!) is the library's ERROR enabling.  Because
2559 	 * this enabling is processed before we even get into the consume call
2560 	 * back, any ERROR firing would result in the library's ERROR enabling
2561 	 * being processed twice -- once in our first pass (for BEGIN probes),
2562 	 * and again in our second pass (for everything but BEGIN probes).  To
2563 	 * deal with this, we interpose on the ERROR handler to assure that we
2564 	 * only process ERROR enablings induced by BEGIN enablings in the
2565 	 * first pass, and that we only process ERROR enablings _not_ induced
2566 	 * by BEGIN enablings in the second pass.
2567 	 */
2568 	dt_begin_t begin;
2569 	processorid_t cpu = dtp->dt_beganon;
2570 	dtrace_bufdesc_t nbuf;
2571 #if !defined(sun)
2572 	dtrace_bufdesc_t *pbuf;
2573 #endif
2574 	int rval, i;
2575 	static int max_ncpus;
2576 	dtrace_optval_t size;
2577 
2578 	dtp->dt_beganon = -1;
2579 
2580 #if defined(sun)
2581 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2582 #else
2583 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2584 #endif
2585 		/*
2586 		 * We really don't expect this to fail, but it is at least
2587 		 * technically possible for this to fail with ENOENT.  In this
2588 		 * case, we just drive on...
2589 		 */
2590 		if (errno == ENOENT)
2591 			return (0);
2592 
2593 		return (dt_set_errno(dtp, errno));
2594 	}
2595 
2596 	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2597 		/*
2598 		 * This is the simple case.  We're either not stopped, or if
2599 		 * we are, we actually processed any END probes on another
2600 		 * CPU.  We can simply consume this buffer and return.
2601 		 */
2602 		return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2603 	}
2604 
2605 	begin.dtbgn_probefunc = pf;
2606 	begin.dtbgn_recfunc = rf;
2607 	begin.dtbgn_arg = arg;
2608 	begin.dtbgn_beginonly = 1;
2609 
2610 	/*
2611 	 * We need to interpose on the ERROR handler to be sure that we
2612 	 * only process ERRORs induced by BEGIN.
2613 	 */
2614 	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2615 	begin.dtbgn_errarg = dtp->dt_errarg;
2616 	dtp->dt_errhdlr = dt_consume_begin_error;
2617 	dtp->dt_errarg = &begin;
2618 
2619 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2620 	    dt_consume_begin_record, &begin);
2621 
2622 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2623 	dtp->dt_errarg = begin.dtbgn_errarg;
2624 
2625 	if (rval != 0)
2626 		return (rval);
2627 
2628 	/*
2629 	 * Now allocate a new buffer.  We'll use this to deal with every other
2630 	 * CPU.
2631 	 */
2632 	bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2633 	(void) dtrace_getopt(dtp, "bufsize", &size);
2634 	if ((nbuf.dtbd_data = malloc(size)) == NULL)
2635 		return (dt_set_errno(dtp, EDT_NOMEM));
2636 
2637 	if (max_ncpus == 0)
2638 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2639 
2640 	for (i = 0; i < max_ncpus; i++) {
2641 		nbuf.dtbd_cpu = i;
2642 
2643 		if (i == cpu)
2644 			continue;
2645 
2646 #if defined(sun)
2647 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2648 #else
2649 		pbuf = &nbuf;
2650 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) {
2651 #endif
2652 			/*
2653 			 * If we failed with ENOENT, it may be because the
2654 			 * CPU was unconfigured -- this is okay.  Any other
2655 			 * error, however, is unexpected.
2656 			 */
2657 			if (errno == ENOENT)
2658 				continue;
2659 
2660 			free(nbuf.dtbd_data);
2661 
2662 			return (dt_set_errno(dtp, errno));
2663 		}
2664 
2665 		if ((rval = dt_consume_cpu(dtp, fp,
2666 		    i, &nbuf, pf, rf, arg)) != 0) {
2667 			free(nbuf.dtbd_data);
2668 			return (rval);
2669 		}
2670 	}
2671 
2672 	free(nbuf.dtbd_data);
2673 
2674 	/*
2675 	 * Okay -- we're done with the other buffers.  Now we want to
2676 	 * reconsume the first buffer -- but this time we're looking for
2677 	 * everything _but_ BEGIN.  And of course, in order to only consume
2678 	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2679 	 * ERROR interposition function...
2680 	 */
2681 	begin.dtbgn_beginonly = 0;
2682 
2683 	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2684 	assert(begin.dtbgn_errarg == dtp->dt_errarg);
2685 	dtp->dt_errhdlr = dt_consume_begin_error;
2686 	dtp->dt_errarg = &begin;
2687 
2688 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2689 	    dt_consume_begin_record, &begin);
2690 
2691 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2692 	dtp->dt_errarg = begin.dtbgn_errarg;
2693 
2694 	return (rval);
2695 }
2696 
2697 int
2698 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2699     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2700 {
2701 	dtrace_bufdesc_t *buf = &dtp->dt_buf;
2702 	dtrace_optval_t size;
2703 	static int max_ncpus;
2704 	int i, rval;
2705 	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2706 	hrtime_t now = gethrtime();
2707 
2708 	if (dtp->dt_lastswitch != 0) {
2709 		if (now - dtp->dt_lastswitch < interval)
2710 			return (0);
2711 
2712 		dtp->dt_lastswitch += interval;
2713 	} else {
2714 		dtp->dt_lastswitch = now;
2715 	}
2716 
2717 	if (!dtp->dt_active)
2718 		return (dt_set_errno(dtp, EINVAL));
2719 
2720 	if (max_ncpus == 0)
2721 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2722 
2723 	if (pf == NULL)
2724 		pf = (dtrace_consume_probe_f *)dt_nullprobe;
2725 
2726 	if (rf == NULL)
2727 		rf = (dtrace_consume_rec_f *)dt_nullrec;
2728 
2729 	if (buf->dtbd_data == NULL) {
2730 		(void) dtrace_getopt(dtp, "bufsize", &size);
2731 		if ((buf->dtbd_data = malloc(size)) == NULL)
2732 			return (dt_set_errno(dtp, EDT_NOMEM));
2733 
2734 		buf->dtbd_size = size;
2735 	}
2736 
2737 	/*
2738 	 * If we have just begun, we want to first process the CPU that
2739 	 * executed the BEGIN probe (if any).
2740 	 */
2741 	if (dtp->dt_active && dtp->dt_beganon != -1) {
2742 		buf->dtbd_cpu = dtp->dt_beganon;
2743 		if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2744 			return (rval);
2745 	}
2746 
2747 	for (i = 0; i < max_ncpus; i++) {
2748 		buf->dtbd_cpu = i;
2749 
2750 		/*
2751 		 * If we have stopped, we want to process the CPU on which the
2752 		 * END probe was processed only _after_ we have processed
2753 		 * everything else.
2754 		 */
2755 		if (dtp->dt_stopped && (i == dtp->dt_endedon))
2756 			continue;
2757 
2758 #if defined(sun)
2759 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2760 #else
2761 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2762 #endif
2763 			/*
2764 			 * If we failed with ENOENT, it may be because the
2765 			 * CPU was unconfigured -- this is okay.  Any other
2766 			 * error, however, is unexpected.
2767 			 */
2768 			if (errno == ENOENT)
2769 				continue;
2770 
2771 			return (dt_set_errno(dtp, errno));
2772 		}
2773 
2774 		if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2775 			return (rval);
2776 	}
2777 
2778 	if (!dtp->dt_stopped)
2779 		return (0);
2780 
2781 	buf->dtbd_cpu = dtp->dt_endedon;
2782 
2783 #if defined(sun)
2784 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2785 #else
2786 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2787 #endif
2788 		/*
2789 		 * This _really_ shouldn't fail, but it is strictly speaking
2790 		 * possible for this to return ENOENT if the CPU that called
2791 		 * the END enabling somehow managed to become unconfigured.
2792 		 * It's unclear how the user can possibly expect anything
2793 		 * rational to happen in this case -- the state has been thrown
2794 		 * out along with the unconfigured CPU -- so we'll just drive
2795 		 * on...
2796 		 */
2797 		if (errno == ENOENT)
2798 			return (0);
2799 
2800 		return (dt_set_errno(dtp, errno));
2801 	}
2802 
2803 	return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2804 }
2805