1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <stdlib.h>
27 #include <strings.h>
28 #include <errno.h>
29 #include <unistd.h>
30 #include <limits.h>
31 #include <assert.h>
32 #include <ctype.h>
33 #include <alloca.h>
34 #include <dt_impl.h>
35
36 #define DT_MASK_LO 0x00000000FFFFFFFFULL
37
38 /*
39 * We declare this here because (1) we need it and (2) we want to avoid a
40 * dependency on libm in libdtrace.
41 */
42 static long double
dt_fabsl(long double x)43 dt_fabsl(long double x)
44 {
45 if (x < 0)
46 return (-x);
47
48 return (x);
49 }
50
51 /*
52 * 128-bit arithmetic functions needed to support the stddev() aggregating
53 * action.
54 */
55 static int
dt_gt_128(uint64_t * a,uint64_t * b)56 dt_gt_128(uint64_t *a, uint64_t *b)
57 {
58 return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
59 }
60
61 static int
dt_ge_128(uint64_t * a,uint64_t * b)62 dt_ge_128(uint64_t *a, uint64_t *b)
63 {
64 return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
65 }
66
67 static int
dt_le_128(uint64_t * a,uint64_t * b)68 dt_le_128(uint64_t *a, uint64_t *b)
69 {
70 return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
71 }
72
73 /*
74 * Shift the 128-bit value in a by b. If b is positive, shift left.
75 * If b is negative, shift right.
76 */
77 static void
dt_shift_128(uint64_t * a,int b)78 dt_shift_128(uint64_t *a, int b)
79 {
80 uint64_t mask;
81
82 if (b == 0)
83 return;
84
85 if (b < 0) {
86 b = -b;
87 if (b >= 64) {
88 a[0] = a[1] >> (b - 64);
89 a[1] = 0;
90 } else {
91 a[0] >>= b;
92 mask = 1LL << (64 - b);
93 mask -= 1;
94 a[0] |= ((a[1] & mask) << (64 - b));
95 a[1] >>= b;
96 }
97 } else {
98 if (b >= 64) {
99 a[1] = a[0] << (b - 64);
100 a[0] = 0;
101 } else {
102 a[1] <<= b;
103 mask = a[0] >> (64 - b);
104 a[1] |= mask;
105 a[0] <<= b;
106 }
107 }
108 }
109
110 static int
dt_nbits_128(uint64_t * a)111 dt_nbits_128(uint64_t *a)
112 {
113 int nbits = 0;
114 uint64_t tmp[2];
115 uint64_t zero[2] = { 0, 0 };
116
117 tmp[0] = a[0];
118 tmp[1] = a[1];
119
120 dt_shift_128(tmp, -1);
121 while (dt_gt_128(tmp, zero)) {
122 dt_shift_128(tmp, -1);
123 nbits++;
124 }
125
126 return (nbits);
127 }
128
129 static void
dt_subtract_128(uint64_t * minuend,uint64_t * subtrahend,uint64_t * difference)130 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
131 {
132 uint64_t result[2];
133
134 result[0] = minuend[0] - subtrahend[0];
135 result[1] = minuend[1] - subtrahend[1] -
136 (minuend[0] < subtrahend[0] ? 1 : 0);
137
138 difference[0] = result[0];
139 difference[1] = result[1];
140 }
141
142 static void
dt_add_128(uint64_t * addend1,uint64_t * addend2,uint64_t * sum)143 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
144 {
145 uint64_t result[2];
146
147 result[0] = addend1[0] + addend2[0];
148 result[1] = addend1[1] + addend2[1] +
149 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
150
151 sum[0] = result[0];
152 sum[1] = result[1];
153 }
154
155 /*
156 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
157 * use native multiplication on those, and then re-combine into the
158 * resulting 128-bit value.
159 *
160 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
161 * hi1 * hi2 << 64 +
162 * hi1 * lo2 << 32 +
163 * hi2 * lo1 << 32 +
164 * lo1 * lo2
165 */
166 static void
dt_multiply_128(uint64_t factor1,uint64_t factor2,uint64_t * product)167 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
168 {
169 uint64_t hi1, hi2, lo1, lo2;
170 uint64_t tmp[2];
171
172 hi1 = factor1 >> 32;
173 hi2 = factor2 >> 32;
174
175 lo1 = factor1 & DT_MASK_LO;
176 lo2 = factor2 & DT_MASK_LO;
177
178 product[0] = lo1 * lo2;
179 product[1] = hi1 * hi2;
180
181 tmp[0] = hi1 * lo2;
182 tmp[1] = 0;
183 dt_shift_128(tmp, 32);
184 dt_add_128(product, tmp, product);
185
186 tmp[0] = hi2 * lo1;
187 tmp[1] = 0;
188 dt_shift_128(tmp, 32);
189 dt_add_128(product, tmp, product);
190 }
191
192 /*
193 * This is long-hand division.
194 *
195 * We initialize subtrahend by shifting divisor left as far as possible. We
196 * loop, comparing subtrahend to dividend: if subtrahend is smaller, we
197 * subtract and set the appropriate bit in the result. We then shift
198 * subtrahend right by one bit for the next comparison.
199 */
200 static void
dt_divide_128(uint64_t * dividend,uint64_t divisor,uint64_t * quotient)201 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
202 {
203 uint64_t result[2] = { 0, 0 };
204 uint64_t remainder[2];
205 uint64_t subtrahend[2];
206 uint64_t divisor_128[2];
207 uint64_t mask[2] = { 1, 0 };
208 int log = 0;
209
210 assert(divisor != 0);
211
212 divisor_128[0] = divisor;
213 divisor_128[1] = 0;
214
215 remainder[0] = dividend[0];
216 remainder[1] = dividend[1];
217
218 subtrahend[0] = divisor;
219 subtrahend[1] = 0;
220
221 while (divisor > 0) {
222 log++;
223 divisor >>= 1;
224 }
225
226 dt_shift_128(subtrahend, 128 - log);
227 dt_shift_128(mask, 128 - log);
228
229 while (dt_ge_128(remainder, divisor_128)) {
230 if (dt_ge_128(remainder, subtrahend)) {
231 dt_subtract_128(remainder, subtrahend, remainder);
232 result[0] |= mask[0];
233 result[1] |= mask[1];
234 }
235
236 dt_shift_128(subtrahend, -1);
237 dt_shift_128(mask, -1);
238 }
239
240 quotient[0] = result[0];
241 quotient[1] = result[1];
242 }
243
244 /*
245 * This is the long-hand method of calculating a square root.
246 * The algorithm is as follows:
247 *
248 * 1. Group the digits by 2 from the right.
249 * 2. Over the leftmost group, find the largest single-digit number
250 * whose square is less than that group.
251 * 3. Subtract the result of the previous step (2 or 4, depending) and
252 * bring down the next two-digit group.
253 * 4. For the result R we have so far, find the largest single-digit number
254 * x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
255 * (Note that this is doubling R and performing a decimal left-shift by 1
256 * and searching for the appropriate decimal to fill the one's place.)
257 * The value x is the next digit in the square root.
258 * Repeat steps 3 and 4 until the desired precision is reached. (We're
259 * dealing with integers, so the above is sufficient.)
260 *
261 * In decimal, the square root of 582,734 would be calculated as so:
262 *
263 * __7__6__3
264 * | 58 27 34
265 * -49 (7^2 == 49 => 7 is the first digit in the square root)
266 * --
267 * 9 27 (Subtract and bring down the next group.)
268 * 146 8 76 (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
269 * ----- the square root)
270 * 51 34 (Subtract and bring down the next group.)
271 * 1523 45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
272 * ----- the square root)
273 * 5 65 (remainder)
274 *
275 * The above algorithm applies similarly in binary, but note that the
276 * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
277 * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
278 * preceding difference?
279 *
280 * In binary, the square root of 11011011 would be calculated as so:
281 *
282 * __1__1__1__0
283 * | 11 01 10 11
284 * 01 (0 << 2 + 1 == 1 < 11 => this bit is 1)
285 * --
286 * 10 01 10 11
287 * 101 1 01 (1 << 2 + 1 == 101 < 1001 => next bit is 1)
288 * -----
289 * 1 00 10 11
290 * 1101 11 01 (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
291 * -------
292 * 1 01 11
293 * 11101 1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
294 *
295 */
296 static uint64_t
dt_sqrt_128(uint64_t * square)297 dt_sqrt_128(uint64_t *square)
298 {
299 uint64_t result[2] = { 0, 0 };
300 uint64_t diff[2] = { 0, 0 };
301 uint64_t one[2] = { 1, 0 };
302 uint64_t next_pair[2];
303 uint64_t next_try[2];
304 uint64_t bit_pairs, pair_shift;
305 int i;
306
307 bit_pairs = dt_nbits_128(square) / 2;
308 pair_shift = bit_pairs * 2;
309
310 for (i = 0; i <= bit_pairs; i++) {
311 /*
312 * Bring down the next pair of bits.
313 */
314 next_pair[0] = square[0];
315 next_pair[1] = square[1];
316 dt_shift_128(next_pair, -pair_shift);
317 next_pair[0] &= 0x3;
318 next_pair[1] = 0;
319
320 dt_shift_128(diff, 2);
321 dt_add_128(diff, next_pair, diff);
322
323 /*
324 * next_try = R << 2 + 1
325 */
326 next_try[0] = result[0];
327 next_try[1] = result[1];
328 dt_shift_128(next_try, 2);
329 dt_add_128(next_try, one, next_try);
330
331 if (dt_le_128(next_try, diff)) {
332 dt_subtract_128(diff, next_try, diff);
333 dt_shift_128(result, 1);
334 dt_add_128(result, one, result);
335 } else {
336 dt_shift_128(result, 1);
337 }
338
339 pair_shift -= 2;
340 }
341
342 assert(result[1] == 0);
343
344 return (result[0]);
345 }
346
347 uint64_t
dt_stddev(uint64_t * data,uint64_t normal)348 dt_stddev(uint64_t *data, uint64_t normal)
349 {
350 uint64_t avg_of_squares[2];
351 uint64_t square_of_avg[2];
352 int64_t norm_avg;
353 uint64_t diff[2];
354
355 /*
356 * The standard approximation for standard deviation is
357 * sqrt(average(x**2) - average(x)**2), i.e. the square root
358 * of the average of the squares minus the square of the average.
359 */
360 dt_divide_128(data + 2, normal, avg_of_squares);
361 dt_divide_128(avg_of_squares, data[0], avg_of_squares);
362
363 norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
364
365 if (norm_avg < 0)
366 norm_avg = -norm_avg;
367
368 dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
369
370 dt_subtract_128(avg_of_squares, square_of_avg, diff);
371
372 return (dt_sqrt_128(diff));
373 }
374
375 static int
dt_flowindent(dtrace_hdl_t * dtp,dtrace_probedata_t * data,dtrace_epid_t last,dtrace_bufdesc_t * buf,size_t offs)376 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
377 dtrace_bufdesc_t *buf, size_t offs)
378 {
379 dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
380 dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
381 char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
382 dtrace_flowkind_t flow = DTRACEFLOW_NONE;
383 const char *str = NULL;
384 static const char *e_str[2] = { " -> ", " => " };
385 static const char *r_str[2] = { " <- ", " <= " };
386 static const char *ent = "entry", *ret = "return";
387 static int entlen = 0, retlen = 0;
388 dtrace_epid_t next, id = epd->dtepd_epid;
389 int rval;
390
391 if (entlen == 0) {
392 assert(retlen == 0);
393 entlen = strlen(ent);
394 retlen = strlen(ret);
395 }
396
397 /*
398 * If the name of the probe is "entry" or ends with "-entry", we
399 * treat it as an entry; if it is "return" or ends with "-return",
400 * we treat it as a return. (This allows application-provided probes
401 * like "method-entry" or "function-entry" to participate in flow
402 * indentation -- without accidentally misinterpreting popular probe
403 * names like "carpentry", "gentry" or "Coventry".)
404 */
405 if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
406 (sub == n || sub[-1] == '-')) {
407 flow = DTRACEFLOW_ENTRY;
408 str = e_str[strcmp(p, "syscall") == 0];
409 } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
410 (sub == n || sub[-1] == '-')) {
411 flow = DTRACEFLOW_RETURN;
412 str = r_str[strcmp(p, "syscall") == 0];
413 }
414
415 /*
416 * If we're going to indent this, we need to check the ID of our last
417 * call. If we're looking at the same probe ID but a different EPID,
418 * we _don't_ want to indent. (Yes, there are some minor holes in
419 * this scheme -- it's a heuristic.)
420 */
421 if (flow == DTRACEFLOW_ENTRY) {
422 if ((last != DTRACE_EPIDNONE && id != last &&
423 pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
424 flow = DTRACEFLOW_NONE;
425 }
426
427 /*
428 * If we're going to unindent this, it's more difficult to see if
429 * we don't actually want to unindent it -- we need to look at the
430 * _next_ EPID.
431 */
432 if (flow == DTRACEFLOW_RETURN) {
433 offs += epd->dtepd_size;
434
435 do {
436 if (offs >= buf->dtbd_size) {
437 /*
438 * We're at the end -- maybe. If the oldest
439 * record is non-zero, we need to wrap.
440 */
441 if (buf->dtbd_oldest != 0) {
442 offs = 0;
443 } else {
444 goto out;
445 }
446 }
447
448 next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
449
450 if (next == DTRACE_EPIDNONE)
451 offs += sizeof (id);
452 } while (next == DTRACE_EPIDNONE);
453
454 if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
455 return (rval);
456
457 if (next != id && npd->dtpd_id == pd->dtpd_id)
458 flow = DTRACEFLOW_NONE;
459 }
460
461 out:
462 if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
463 data->dtpda_prefix = str;
464 } else {
465 data->dtpda_prefix = "| ";
466 }
467
468 if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
469 data->dtpda_indent -= 2;
470
471 data->dtpda_flow = flow;
472
473 return (0);
474 }
475
476 static int
dt_nullprobe()477 dt_nullprobe()
478 {
479 return (DTRACE_CONSUME_THIS);
480 }
481
482 static int
dt_nullrec()483 dt_nullrec()
484 {
485 return (DTRACE_CONSUME_NEXT);
486 }
487
488 int
dt_print_quantline(dtrace_hdl_t * dtp,FILE * fp,int64_t val,uint64_t normal,long double total,char positives,char negatives)489 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
490 uint64_t normal, long double total, char positives, char negatives)
491 {
492 long double f;
493 uint_t depth, len = 40;
494
495 const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
496 const char *spaces = " ";
497
498 assert(strlen(ats) == len && strlen(spaces) == len);
499 assert(!(total == 0 && (positives || negatives)));
500 assert(!(val < 0 && !negatives));
501 assert(!(val > 0 && !positives));
502 assert(!(val != 0 && total == 0));
503
504 if (!negatives) {
505 if (positives) {
506 f = (dt_fabsl((long double)val) * len) / total;
507 depth = (uint_t)(f + 0.5);
508 } else {
509 depth = 0;
510 }
511
512 return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
513 spaces + depth, (long long)val / normal));
514 }
515
516 if (!positives) {
517 f = (dt_fabsl((long double)val) * len) / total;
518 depth = (uint_t)(f + 0.5);
519
520 return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
521 ats + len - depth, (long long)val / normal));
522 }
523
524 /*
525 * If we're here, we have both positive and negative bucket values.
526 * To express this graphically, we're going to generate both positive
527 * and negative bars separated by a centerline. These bars are half
528 * the size of normal quantize()/lquantize() bars, so we divide the
529 * length in half before calculating the bar length.
530 */
531 len /= 2;
532 ats = &ats[len];
533 spaces = &spaces[len];
534
535 f = (dt_fabsl((long double)val) * len) / total;
536 depth = (uint_t)(f + 0.5);
537
538 if (val <= 0) {
539 return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
540 ats + len - depth, len, "", (long long)val / normal));
541 } else {
542 return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
543 ats + len - depth, spaces + depth,
544 (long long)val / normal));
545 }
546 }
547
548 int
dt_print_quantize(dtrace_hdl_t * dtp,FILE * fp,const void * addr,size_t size,uint64_t normal)549 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
550 size_t size, uint64_t normal)
551 {
552 const int64_t *data = addr;
553 int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
554 long double total = 0;
555 char positives = 0, negatives = 0;
556
557 if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
558 return (dt_set_errno(dtp, EDT_DMISMATCH));
559
560 while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
561 first_bin++;
562
563 if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
564 /*
565 * There isn't any data. This is possible if (and only if)
566 * negative increment values have been used. In this case,
567 * we'll print the buckets around 0.
568 */
569 first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
570 last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
571 } else {
572 if (first_bin > 0)
573 first_bin--;
574
575 while (last_bin > 0 && data[last_bin] == 0)
576 last_bin--;
577
578 if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
579 last_bin++;
580 }
581
582 for (i = first_bin; i <= last_bin; i++) {
583 positives |= (data[i] > 0);
584 negatives |= (data[i] < 0);
585 total += dt_fabsl((long double)data[i]);
586 }
587
588 if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
589 "------------- Distribution -------------", "count") < 0)
590 return (-1);
591
592 for (i = first_bin; i <= last_bin; i++) {
593 if (dt_printf(dtp, fp, "%16lld ",
594 (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
595 return (-1);
596
597 if (dt_print_quantline(dtp, fp, data[i], normal, total,
598 positives, negatives) < 0)
599 return (-1);
600 }
601
602 return (0);
603 }
604
605 int
dt_print_lquantize(dtrace_hdl_t * dtp,FILE * fp,const void * addr,size_t size,uint64_t normal)606 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
607 size_t size, uint64_t normal)
608 {
609 const int64_t *data = addr;
610 int i, first_bin, last_bin, base;
611 uint64_t arg;
612 long double total = 0;
613 uint16_t step, levels;
614 char positives = 0, negatives = 0;
615
616 if (size < sizeof (uint64_t))
617 return (dt_set_errno(dtp, EDT_DMISMATCH));
618
619 arg = *data++;
620 size -= sizeof (uint64_t);
621
622 base = DTRACE_LQUANTIZE_BASE(arg);
623 step = DTRACE_LQUANTIZE_STEP(arg);
624 levels = DTRACE_LQUANTIZE_LEVELS(arg);
625
626 first_bin = 0;
627 last_bin = levels + 1;
628
629 if (size != sizeof (uint64_t) * (levels + 2))
630 return (dt_set_errno(dtp, EDT_DMISMATCH));
631
632 while (first_bin <= levels + 1 && data[first_bin] == 0)
633 first_bin++;
634
635 if (first_bin > levels + 1) {
636 first_bin = 0;
637 last_bin = 2;
638 } else {
639 if (first_bin > 0)
640 first_bin--;
641
642 while (last_bin > 0 && data[last_bin] == 0)
643 last_bin--;
644
645 if (last_bin < levels + 1)
646 last_bin++;
647 }
648
649 for (i = first_bin; i <= last_bin; i++) {
650 positives |= (data[i] > 0);
651 negatives |= (data[i] < 0);
652 total += dt_fabsl((long double)data[i]);
653 }
654
655 if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
656 "------------- Distribution -------------", "count") < 0)
657 return (-1);
658
659 for (i = first_bin; i <= last_bin; i++) {
660 char c[32];
661 int err;
662
663 if (i == 0) {
664 (void) snprintf(c, sizeof (c), "< %d",
665 base / (uint32_t)normal);
666 err = dt_printf(dtp, fp, "%16s ", c);
667 } else if (i == levels + 1) {
668 (void) snprintf(c, sizeof (c), ">= %d",
669 base + (levels * step));
670 err = dt_printf(dtp, fp, "%16s ", c);
671 } else {
672 err = dt_printf(dtp, fp, "%16d ",
673 base + (i - 1) * step);
674 }
675
676 if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
677 total, positives, negatives) < 0)
678 return (-1);
679 }
680
681 return (0);
682 }
683
684 /*ARGSUSED*/
685 static int
dt_print_average(dtrace_hdl_t * dtp,FILE * fp,caddr_t addr,size_t size,uint64_t normal)686 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
687 size_t size, uint64_t normal)
688 {
689 /* LINTED - alignment */
690 int64_t *data = (int64_t *)addr;
691
692 return (dt_printf(dtp, fp, " %16lld", data[0] ?
693 (long long)(data[1] / (int64_t)normal / data[0]) : 0));
694 }
695
696 /*ARGSUSED*/
697 static int
dt_print_stddev(dtrace_hdl_t * dtp,FILE * fp,caddr_t addr,size_t size,uint64_t normal)698 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
699 size_t size, uint64_t normal)
700 {
701 /* LINTED - alignment */
702 uint64_t *data = (uint64_t *)addr;
703
704 return (dt_printf(dtp, fp, " %16llu", data[0] ?
705 (unsigned long long) dt_stddev(data, normal) : 0));
706 }
707
708 /*ARGSUSED*/
709 int
dt_print_bytes(dtrace_hdl_t * dtp,FILE * fp,caddr_t addr,size_t nbytes,int width,int quiet)710 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
711 size_t nbytes, int width, int quiet)
712 {
713 /*
714 * If the byte stream is a series of printable characters, followed by
715 * a terminating byte, we print it out as a string. Otherwise, we
716 * assume that it's something else and just print the bytes.
717 */
718 int i, j, margin = 5;
719 char *c = (char *)addr;
720
721 if (nbytes == 0)
722 return (0);
723
724 if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
725 goto raw;
726
727 for (i = 0; i < nbytes; i++) {
728 /*
729 * We define a "printable character" to be one for which
730 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
731 * or a character which is either backspace or the bell.
732 * Backspace and the bell are regrettably special because
733 * they fail the first two tests -- and yet they are entirely
734 * printable. These are the only two control characters that
735 * have meaning for the terminal and for which isprint(3C) and
736 * isspace(3C) return 0.
737 */
738 if (isprint(c[i]) || isspace(c[i]) ||
739 c[i] == '\b' || c[i] == '\a')
740 continue;
741
742 if (c[i] == '\0' && i > 0) {
743 /*
744 * This looks like it might be a string. Before we
745 * assume that it is indeed a string, check the
746 * remainder of the byte range; if it contains
747 * additional non-nul characters, we'll assume that
748 * it's a binary stream that just happens to look like
749 * a string, and we'll print out the individual bytes.
750 */
751 for (j = i + 1; j < nbytes; j++) {
752 if (c[j] != '\0')
753 break;
754 }
755
756 if (j != nbytes)
757 break;
758
759 if (quiet)
760 return (dt_printf(dtp, fp, "%s", c));
761 else
762 return (dt_printf(dtp, fp, " %-*s", width, c));
763 }
764
765 break;
766 }
767
768 if (i == nbytes) {
769 /*
770 * The byte range is all printable characters, but there is
771 * no trailing nul byte. We'll assume that it's a string and
772 * print it as such.
773 */
774 char *s = alloca(nbytes + 1);
775 bcopy(c, s, nbytes);
776 s[nbytes] = '\0';
777 return (dt_printf(dtp, fp, " %-*s", width, s));
778 }
779
780 raw:
781 if (dt_printf(dtp, fp, "\n%*s ", margin, "") < 0)
782 return (-1);
783
784 for (i = 0; i < 16; i++)
785 if (dt_printf(dtp, fp, " %c", "0123456789abcdef"[i]) < 0)
786 return (-1);
787
788 if (dt_printf(dtp, fp, " 0123456789abcdef\n") < 0)
789 return (-1);
790
791
792 for (i = 0; i < nbytes; i += 16) {
793 if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
794 return (-1);
795
796 for (j = i; j < i + 16 && j < nbytes; j++) {
797 if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
798 return (-1);
799 }
800
801 while (j++ % 16) {
802 if (dt_printf(dtp, fp, " ") < 0)
803 return (-1);
804 }
805
806 if (dt_printf(dtp, fp, " ") < 0)
807 return (-1);
808
809 for (j = i; j < i + 16 && j < nbytes; j++) {
810 if (dt_printf(dtp, fp, "%c",
811 c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
812 return (-1);
813 }
814
815 if (dt_printf(dtp, fp, "\n") < 0)
816 return (-1);
817 }
818
819 return (0);
820 }
821
822 int
dt_print_stack(dtrace_hdl_t * dtp,FILE * fp,const char * format,caddr_t addr,int depth,int size)823 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
824 caddr_t addr, int depth, int size)
825 {
826 dtrace_syminfo_t dts;
827 GElf_Sym sym;
828 int i, indent;
829 char c[PATH_MAX * 2];
830 uint64_t pc;
831
832 if (dt_printf(dtp, fp, "\n") < 0)
833 return (-1);
834
835 if (format == NULL)
836 format = "%s";
837
838 if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
839 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
840 else
841 indent = _dtrace_stkindent;
842
843 for (i = 0; i < depth; i++) {
844 switch (size) {
845 case sizeof (uint32_t):
846 /* LINTED - alignment */
847 pc = *((uint32_t *)addr);
848 break;
849
850 case sizeof (uint64_t):
851 /* LINTED - alignment */
852 pc = *((uint64_t *)addr);
853 break;
854
855 default:
856 return (dt_set_errno(dtp, EDT_BADSTACKPC));
857 }
858
859 if (pc == NULL)
860 break;
861
862 addr += size;
863
864 if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
865 return (-1);
866
867 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
868 if (pc > sym.st_value) {
869 (void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
870 dts.dts_object, dts.dts_name,
871 pc - sym.st_value);
872 } else {
873 (void) snprintf(c, sizeof (c), "%s`%s",
874 dts.dts_object, dts.dts_name);
875 }
876 } else {
877 /*
878 * We'll repeat the lookup, but this time we'll specify
879 * a NULL GElf_Sym -- indicating that we're only
880 * interested in the containing module.
881 */
882 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
883 (void) snprintf(c, sizeof (c), "%s`0x%llx",
884 dts.dts_object, pc);
885 } else {
886 (void) snprintf(c, sizeof (c), "0x%llx", pc);
887 }
888 }
889
890 if (dt_printf(dtp, fp, format, c) < 0)
891 return (-1);
892
893 if (dt_printf(dtp, fp, "\n") < 0)
894 return (-1);
895 }
896
897 return (0);
898 }
899
900 int
dt_print_ustack(dtrace_hdl_t * dtp,FILE * fp,const char * format,caddr_t addr,uint64_t arg)901 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
902 caddr_t addr, uint64_t arg)
903 {
904 /* LINTED - alignment */
905 uint64_t *pc = (uint64_t *)addr;
906 uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
907 uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
908 const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
909 const char *str = strsize ? strbase : NULL;
910 int err = 0;
911
912 char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
913 struct ps_prochandle *P;
914 GElf_Sym sym;
915 int i, indent;
916 pid_t pid;
917
918 if (depth == 0)
919 return (0);
920
921 pid = (pid_t)*pc++;
922
923 if (dt_printf(dtp, fp, "\n") < 0)
924 return (-1);
925
926 if (format == NULL)
927 format = "%s";
928
929 if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
930 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
931 else
932 indent = _dtrace_stkindent;
933
934 /*
935 * Ultimately, we need to add an entry point in the library vector for
936 * determining <symbol, offset> from <pid, address>. For now, if
937 * this is a vector open, we just print the raw address or string.
938 */
939 if (dtp->dt_vector == NULL)
940 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
941 else
942 P = NULL;
943
944 if (P != NULL)
945 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
946
947 for (i = 0; i < depth && pc[i] != NULL; i++) {
948 const prmap_t *map;
949
950 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
951 break;
952
953 if (P != NULL && Plookup_by_addr(P, pc[i],
954 name, sizeof (name), &sym) == 0) {
955 (void) Pobjname(P, pc[i], objname, sizeof (objname));
956
957 if (pc[i] > sym.st_value) {
958 (void) snprintf(c, sizeof (c),
959 "%s`%s+0x%llx", dt_basename(objname), name,
960 (u_longlong_t)(pc[i] - sym.st_value));
961 } else {
962 (void) snprintf(c, sizeof (c),
963 "%s`%s", dt_basename(objname), name);
964 }
965 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
966 (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
967 (map->pr_mflags & MA_WRITE)))) {
968 /*
969 * If the current string pointer in the string table
970 * does not point to an empty string _and_ the program
971 * counter falls in a writable region, we'll use the
972 * string from the string table instead of the raw
973 * address. This last condition is necessary because
974 * some (broken) ustack helpers will return a string
975 * even for a program counter that they can't
976 * identify. If we have a string for a program
977 * counter that falls in a segment that isn't
978 * writable, we assume that we have fallen into this
979 * case and we refuse to use the string.
980 */
981 (void) snprintf(c, sizeof (c), "%s", str);
982 } else {
983 if (P != NULL && Pobjname(P, pc[i], objname,
984 sizeof (objname)) != NULL) {
985 (void) snprintf(c, sizeof (c), "%s`0x%llx",
986 dt_basename(objname), (u_longlong_t)pc[i]);
987 } else {
988 (void) snprintf(c, sizeof (c), "0x%llx",
989 (u_longlong_t)pc[i]);
990 }
991 }
992
993 if ((err = dt_printf(dtp, fp, format, c)) < 0)
994 break;
995
996 if ((err = dt_printf(dtp, fp, "\n")) < 0)
997 break;
998
999 if (str != NULL && str[0] == '@') {
1000 /*
1001 * If the first character of the string is an "at" sign,
1002 * then the string is inferred to be an annotation --
1003 * and it is printed out beneath the frame and offset
1004 * with brackets.
1005 */
1006 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1007 break;
1008
1009 (void) snprintf(c, sizeof (c), " [ %s ]", &str[1]);
1010
1011 if ((err = dt_printf(dtp, fp, format, c)) < 0)
1012 break;
1013
1014 if ((err = dt_printf(dtp, fp, "\n")) < 0)
1015 break;
1016 }
1017
1018 if (str != NULL) {
1019 str += strlen(str) + 1;
1020 if (str - strbase >= strsize)
1021 str = NULL;
1022 }
1023 }
1024
1025 if (P != NULL) {
1026 dt_proc_unlock(dtp, P);
1027 dt_proc_release(dtp, P);
1028 }
1029
1030 return (err);
1031 }
1032
1033 static int
dt_print_usym(dtrace_hdl_t * dtp,FILE * fp,caddr_t addr,dtrace_actkind_t act)1034 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1035 {
1036 /* LINTED - alignment */
1037 uint64_t pid = ((uint64_t *)addr)[0];
1038 /* LINTED - alignment */
1039 uint64_t pc = ((uint64_t *)addr)[1];
1040 const char *format = " %-50s";
1041 char *s;
1042 int n, len = 256;
1043
1044 if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1045 struct ps_prochandle *P;
1046
1047 if ((P = dt_proc_grab(dtp, pid,
1048 PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1049 GElf_Sym sym;
1050
1051 dt_proc_lock(dtp, P);
1052
1053 if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1054 pc = sym.st_value;
1055
1056 dt_proc_unlock(dtp, P);
1057 dt_proc_release(dtp, P);
1058 }
1059 }
1060
1061 do {
1062 n = len;
1063 s = alloca(n);
1064 } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1065
1066 return (dt_printf(dtp, fp, format, s));
1067 }
1068
1069 int
dt_print_umod(dtrace_hdl_t * dtp,FILE * fp,const char * format,caddr_t addr)1070 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1071 {
1072 /* LINTED - alignment */
1073 uint64_t pid = ((uint64_t *)addr)[0];
1074 /* LINTED - alignment */
1075 uint64_t pc = ((uint64_t *)addr)[1];
1076 int err = 0;
1077
1078 char objname[PATH_MAX], c[PATH_MAX * 2];
1079 struct ps_prochandle *P;
1080
1081 if (format == NULL)
1082 format = " %-50s";
1083
1084 /*
1085 * See the comment in dt_print_ustack() for the rationale for
1086 * printing raw addresses in the vectored case.
1087 */
1088 if (dtp->dt_vector == NULL)
1089 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1090 else
1091 P = NULL;
1092
1093 if (P != NULL)
1094 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1095
1096 if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != NULL) {
1097 (void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1098 } else {
1099 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1100 }
1101
1102 err = dt_printf(dtp, fp, format, c);
1103
1104 if (P != NULL) {
1105 dt_proc_unlock(dtp, P);
1106 dt_proc_release(dtp, P);
1107 }
1108
1109 return (err);
1110 }
1111
1112 static int
dt_print_sym(dtrace_hdl_t * dtp,FILE * fp,const char * format,caddr_t addr)1113 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1114 {
1115 /* LINTED - alignment */
1116 uint64_t pc = *((uint64_t *)addr);
1117 dtrace_syminfo_t dts;
1118 GElf_Sym sym;
1119 char c[PATH_MAX * 2];
1120
1121 if (format == NULL)
1122 format = " %-50s";
1123
1124 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1125 (void) snprintf(c, sizeof (c), "%s`%s",
1126 dts.dts_object, dts.dts_name);
1127 } else {
1128 /*
1129 * We'll repeat the lookup, but this time we'll specify a
1130 * NULL GElf_Sym -- indicating that we're only interested in
1131 * the containing module.
1132 */
1133 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1134 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1135 dts.dts_object, (u_longlong_t)pc);
1136 } else {
1137 (void) snprintf(c, sizeof (c), "0x%llx",
1138 (u_longlong_t)pc);
1139 }
1140 }
1141
1142 if (dt_printf(dtp, fp, format, c) < 0)
1143 return (-1);
1144
1145 return (0);
1146 }
1147
1148 int
dt_print_mod(dtrace_hdl_t * dtp,FILE * fp,const char * format,caddr_t addr)1149 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1150 {
1151 /* LINTED - alignment */
1152 uint64_t pc = *((uint64_t *)addr);
1153 dtrace_syminfo_t dts;
1154 char c[PATH_MAX * 2];
1155
1156 if (format == NULL)
1157 format = " %-50s";
1158
1159 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1160 (void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1161 } else {
1162 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1163 }
1164
1165 if (dt_printf(dtp, fp, format, c) < 0)
1166 return (-1);
1167
1168 return (0);
1169 }
1170
1171 typedef struct dt_normal {
1172 dtrace_aggvarid_t dtnd_id;
1173 uint64_t dtnd_normal;
1174 } dt_normal_t;
1175
1176 static int
dt_normalize_agg(const dtrace_aggdata_t * aggdata,void * arg)1177 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1178 {
1179 dt_normal_t *normal = arg;
1180 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1181 dtrace_aggvarid_t id = normal->dtnd_id;
1182
1183 if (agg->dtagd_nrecs == 0)
1184 return (DTRACE_AGGWALK_NEXT);
1185
1186 if (agg->dtagd_varid != id)
1187 return (DTRACE_AGGWALK_NEXT);
1188
1189 ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1190 return (DTRACE_AGGWALK_NORMALIZE);
1191 }
1192
1193 static int
dt_normalize(dtrace_hdl_t * dtp,caddr_t base,dtrace_recdesc_t * rec)1194 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1195 {
1196 dt_normal_t normal;
1197 caddr_t addr;
1198
1199 /*
1200 * We (should) have two records: the aggregation ID followed by the
1201 * normalization value.
1202 */
1203 addr = base + rec->dtrd_offset;
1204
1205 if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1206 return (dt_set_errno(dtp, EDT_BADNORMAL));
1207
1208 /* LINTED - alignment */
1209 normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1210 rec++;
1211
1212 if (rec->dtrd_action != DTRACEACT_LIBACT)
1213 return (dt_set_errno(dtp, EDT_BADNORMAL));
1214
1215 if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1216 return (dt_set_errno(dtp, EDT_BADNORMAL));
1217
1218 addr = base + rec->dtrd_offset;
1219
1220 switch (rec->dtrd_size) {
1221 case sizeof (uint64_t):
1222 /* LINTED - alignment */
1223 normal.dtnd_normal = *((uint64_t *)addr);
1224 break;
1225 case sizeof (uint32_t):
1226 /* LINTED - alignment */
1227 normal.dtnd_normal = *((uint32_t *)addr);
1228 break;
1229 case sizeof (uint16_t):
1230 /* LINTED - alignment */
1231 normal.dtnd_normal = *((uint16_t *)addr);
1232 break;
1233 case sizeof (uint8_t):
1234 normal.dtnd_normal = *((uint8_t *)addr);
1235 break;
1236 default:
1237 return (dt_set_errno(dtp, EDT_BADNORMAL));
1238 }
1239
1240 (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1241
1242 return (0);
1243 }
1244
1245 static int
dt_denormalize_agg(const dtrace_aggdata_t * aggdata,void * arg)1246 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1247 {
1248 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1249 dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1250
1251 if (agg->dtagd_nrecs == 0)
1252 return (DTRACE_AGGWALK_NEXT);
1253
1254 if (agg->dtagd_varid != id)
1255 return (DTRACE_AGGWALK_NEXT);
1256
1257 return (DTRACE_AGGWALK_DENORMALIZE);
1258 }
1259
1260 static int
dt_clear_agg(const dtrace_aggdata_t * aggdata,void * arg)1261 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1262 {
1263 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1264 dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1265
1266 if (agg->dtagd_nrecs == 0)
1267 return (DTRACE_AGGWALK_NEXT);
1268
1269 if (agg->dtagd_varid != id)
1270 return (DTRACE_AGGWALK_NEXT);
1271
1272 return (DTRACE_AGGWALK_CLEAR);
1273 }
1274
1275 typedef struct dt_trunc {
1276 dtrace_aggvarid_t dttd_id;
1277 uint64_t dttd_remaining;
1278 } dt_trunc_t;
1279
1280 static int
dt_trunc_agg(const dtrace_aggdata_t * aggdata,void * arg)1281 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1282 {
1283 dt_trunc_t *trunc = arg;
1284 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1285 dtrace_aggvarid_t id = trunc->dttd_id;
1286
1287 if (agg->dtagd_nrecs == 0)
1288 return (DTRACE_AGGWALK_NEXT);
1289
1290 if (agg->dtagd_varid != id)
1291 return (DTRACE_AGGWALK_NEXT);
1292
1293 if (trunc->dttd_remaining == 0)
1294 return (DTRACE_AGGWALK_REMOVE);
1295
1296 trunc->dttd_remaining--;
1297 return (DTRACE_AGGWALK_NEXT);
1298 }
1299
1300 static int
dt_trunc(dtrace_hdl_t * dtp,caddr_t base,dtrace_recdesc_t * rec)1301 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1302 {
1303 dt_trunc_t trunc;
1304 caddr_t addr;
1305 int64_t remaining;
1306 int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1307
1308 /*
1309 * We (should) have two records: the aggregation ID followed by the
1310 * number of aggregation entries after which the aggregation is to be
1311 * truncated.
1312 */
1313 addr = base + rec->dtrd_offset;
1314
1315 if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1316 return (dt_set_errno(dtp, EDT_BADTRUNC));
1317
1318 /* LINTED - alignment */
1319 trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1320 rec++;
1321
1322 if (rec->dtrd_action != DTRACEACT_LIBACT)
1323 return (dt_set_errno(dtp, EDT_BADTRUNC));
1324
1325 if (rec->dtrd_arg != DT_ACT_TRUNC)
1326 return (dt_set_errno(dtp, EDT_BADTRUNC));
1327
1328 addr = base + rec->dtrd_offset;
1329
1330 switch (rec->dtrd_size) {
1331 case sizeof (uint64_t):
1332 /* LINTED - alignment */
1333 remaining = *((int64_t *)addr);
1334 break;
1335 case sizeof (uint32_t):
1336 /* LINTED - alignment */
1337 remaining = *((int32_t *)addr);
1338 break;
1339 case sizeof (uint16_t):
1340 /* LINTED - alignment */
1341 remaining = *((int16_t *)addr);
1342 break;
1343 case sizeof (uint8_t):
1344 remaining = *((int8_t *)addr);
1345 break;
1346 default:
1347 return (dt_set_errno(dtp, EDT_BADNORMAL));
1348 }
1349
1350 if (remaining < 0) {
1351 func = dtrace_aggregate_walk_valsorted;
1352 remaining = -remaining;
1353 } else {
1354 func = dtrace_aggregate_walk_valrevsorted;
1355 }
1356
1357 assert(remaining >= 0);
1358 trunc.dttd_remaining = remaining;
1359
1360 (void) func(dtp, dt_trunc_agg, &trunc);
1361
1362 return (0);
1363 }
1364
1365 static int
dt_print_datum(dtrace_hdl_t * dtp,FILE * fp,dtrace_recdesc_t * rec,caddr_t addr,size_t size,uint64_t normal)1366 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1367 caddr_t addr, size_t size, uint64_t normal)
1368 {
1369 int err;
1370 dtrace_actkind_t act = rec->dtrd_action;
1371
1372 switch (act) {
1373 case DTRACEACT_STACK:
1374 return (dt_print_stack(dtp, fp, NULL, addr,
1375 rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1376
1377 case DTRACEACT_USTACK:
1378 case DTRACEACT_JSTACK:
1379 return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1380
1381 case DTRACEACT_USYM:
1382 case DTRACEACT_UADDR:
1383 return (dt_print_usym(dtp, fp, addr, act));
1384
1385 case DTRACEACT_UMOD:
1386 return (dt_print_umod(dtp, fp, NULL, addr));
1387
1388 case DTRACEACT_SYM:
1389 return (dt_print_sym(dtp, fp, NULL, addr));
1390
1391 case DTRACEACT_MOD:
1392 return (dt_print_mod(dtp, fp, NULL, addr));
1393
1394 case DTRACEAGG_QUANTIZE:
1395 return (dt_print_quantize(dtp, fp, addr, size, normal));
1396
1397 case DTRACEAGG_LQUANTIZE:
1398 return (dt_print_lquantize(dtp, fp, addr, size, normal));
1399
1400 case DTRACEAGG_AVG:
1401 return (dt_print_average(dtp, fp, addr, size, normal));
1402
1403 case DTRACEAGG_STDDEV:
1404 return (dt_print_stddev(dtp, fp, addr, size, normal));
1405
1406 default:
1407 break;
1408 }
1409
1410 switch (size) {
1411 case sizeof (uint64_t):
1412 err = dt_printf(dtp, fp, " %16lld",
1413 /* LINTED - alignment */
1414 (long long)*((uint64_t *)addr) / normal);
1415 break;
1416 case sizeof (uint32_t):
1417 /* LINTED - alignment */
1418 err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1419 (uint32_t)normal);
1420 break;
1421 case sizeof (uint16_t):
1422 /* LINTED - alignment */
1423 err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1424 (uint32_t)normal);
1425 break;
1426 case sizeof (uint8_t):
1427 err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1428 (uint32_t)normal);
1429 break;
1430 default:
1431 err = dt_print_bytes(dtp, fp, addr, size, 50, 0);
1432 break;
1433 }
1434
1435 return (err);
1436 }
1437
1438 int
dt_print_aggs(const dtrace_aggdata_t ** aggsdata,int naggvars,void * arg)1439 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1440 {
1441 int i, aggact = 0;
1442 dt_print_aggdata_t *pd = arg;
1443 const dtrace_aggdata_t *aggdata = aggsdata[0];
1444 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1445 FILE *fp = pd->dtpa_fp;
1446 dtrace_hdl_t *dtp = pd->dtpa_dtp;
1447 dtrace_recdesc_t *rec;
1448 dtrace_actkind_t act;
1449 caddr_t addr;
1450 size_t size;
1451
1452 /*
1453 * Iterate over each record description in the key, printing the traced
1454 * data, skipping the first datum (the tuple member created by the
1455 * compiler).
1456 */
1457 for (i = 1; i < agg->dtagd_nrecs; i++) {
1458 rec = &agg->dtagd_rec[i];
1459 act = rec->dtrd_action;
1460 addr = aggdata->dtada_data + rec->dtrd_offset;
1461 size = rec->dtrd_size;
1462
1463 if (DTRACEACT_ISAGG(act)) {
1464 aggact = i;
1465 break;
1466 }
1467
1468 if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1469 return (-1);
1470
1471 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1472 DTRACE_BUFDATA_AGGKEY) < 0)
1473 return (-1);
1474 }
1475
1476 assert(aggact != 0);
1477
1478 for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1479 uint64_t normal;
1480
1481 aggdata = aggsdata[i];
1482 agg = aggdata->dtada_desc;
1483 rec = &agg->dtagd_rec[aggact];
1484 act = rec->dtrd_action;
1485 addr = aggdata->dtada_data + rec->dtrd_offset;
1486 size = rec->dtrd_size;
1487
1488 assert(DTRACEACT_ISAGG(act));
1489 normal = aggdata->dtada_normal;
1490
1491 if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1492 return (-1);
1493
1494 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1495 DTRACE_BUFDATA_AGGVAL) < 0)
1496 return (-1);
1497
1498 if (!pd->dtpa_allunprint)
1499 agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1500 }
1501
1502 if (dt_printf(dtp, fp, "\n") < 0)
1503 return (-1);
1504
1505 if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1506 DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1507 return (-1);
1508
1509 return (0);
1510 }
1511
1512 int
dt_print_agg(const dtrace_aggdata_t * aggdata,void * arg)1513 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1514 {
1515 dt_print_aggdata_t *pd = arg;
1516 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1517 dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1518
1519 if (pd->dtpa_allunprint) {
1520 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1521 return (0);
1522 } else {
1523 /*
1524 * If we're not printing all unprinted aggregations, then the
1525 * aggregation variable ID denotes a specific aggregation
1526 * variable that we should print -- skip any other aggregations
1527 * that we encounter.
1528 */
1529 if (agg->dtagd_nrecs == 0)
1530 return (0);
1531
1532 if (aggvarid != agg->dtagd_varid)
1533 return (0);
1534 }
1535
1536 return (dt_print_aggs(&aggdata, 1, arg));
1537 }
1538
1539 int
dt_setopt(dtrace_hdl_t * dtp,const dtrace_probedata_t * data,const char * option,const char * value)1540 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1541 const char *option, const char *value)
1542 {
1543 int len, rval;
1544 char *msg;
1545 const char *errstr;
1546 dtrace_setoptdata_t optdata;
1547
1548 bzero(&optdata, sizeof (optdata));
1549 (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1550
1551 if (dtrace_setopt(dtp, option, value) == 0) {
1552 (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1553 optdata.dtsda_probe = data;
1554 optdata.dtsda_option = option;
1555 optdata.dtsda_handle = dtp;
1556
1557 if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1558 return (rval);
1559
1560 return (0);
1561 }
1562
1563 errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
1564 len = strlen(option) + strlen(value) + strlen(errstr) + 80;
1565 msg = alloca(len);
1566
1567 (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
1568 option, value, errstr);
1569
1570 if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
1571 return (0);
1572
1573 return (rval);
1574 }
1575
1576 static int
dt_consume_cpu(dtrace_hdl_t * dtp,FILE * fp,int cpu,dtrace_bufdesc_t * buf,dtrace_consume_probe_f * efunc,dtrace_consume_rec_f * rfunc,void * arg)1577 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
1578 dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
1579 {
1580 dtrace_epid_t id;
1581 size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
1582 int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
1583 int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1584 int rval, i, n;
1585 dtrace_epid_t last = DTRACE_EPIDNONE;
1586 dtrace_probedata_t data;
1587 uint64_t drops;
1588 caddr_t addr;
1589
1590 bzero(&data, sizeof (data));
1591 data.dtpda_handle = dtp;
1592 data.dtpda_cpu = cpu;
1593
1594 again:
1595 for (offs = start; offs < end; ) {
1596 dtrace_eprobedesc_t *epd;
1597
1598 /*
1599 * We're guaranteed to have an ID.
1600 */
1601 id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
1602
1603 if (id == DTRACE_EPIDNONE) {
1604 /*
1605 * This is filler to assure proper alignment of the
1606 * next record; we simply ignore it.
1607 */
1608 offs += sizeof (id);
1609 continue;
1610 }
1611
1612 if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
1613 &data.dtpda_pdesc)) != 0)
1614 return (rval);
1615
1616 epd = data.dtpda_edesc;
1617 data.dtpda_data = buf->dtbd_data + offs;
1618
1619 if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
1620 rval = dt_handle(dtp, &data);
1621
1622 if (rval == DTRACE_CONSUME_NEXT)
1623 goto nextepid;
1624
1625 if (rval == DTRACE_CONSUME_ERROR)
1626 return (-1);
1627 }
1628
1629 if (flow)
1630 (void) dt_flowindent(dtp, &data, last, buf, offs);
1631
1632 rval = (*efunc)(&data, arg);
1633
1634 if (flow) {
1635 if (data.dtpda_flow == DTRACEFLOW_ENTRY)
1636 data.dtpda_indent += 2;
1637 }
1638
1639 if (rval == DTRACE_CONSUME_NEXT)
1640 goto nextepid;
1641
1642 if (rval == DTRACE_CONSUME_ABORT)
1643 return (dt_set_errno(dtp, EDT_DIRABORT));
1644
1645 if (rval != DTRACE_CONSUME_THIS)
1646 return (dt_set_errno(dtp, EDT_BADRVAL));
1647
1648 for (i = 0; i < epd->dtepd_nrecs; i++) {
1649 dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
1650 dtrace_actkind_t act = rec->dtrd_action;
1651
1652 data.dtpda_data = buf->dtbd_data + offs +
1653 rec->dtrd_offset;
1654 addr = data.dtpda_data;
1655
1656 if (act == DTRACEACT_LIBACT) {
1657 uint64_t arg = rec->dtrd_arg;
1658 dtrace_aggvarid_t id;
1659
1660 switch (arg) {
1661 case DT_ACT_CLEAR:
1662 /* LINTED - alignment */
1663 id = *((dtrace_aggvarid_t *)addr);
1664 (void) dtrace_aggregate_walk(dtp,
1665 dt_clear_agg, &id);
1666 continue;
1667
1668 case DT_ACT_DENORMALIZE:
1669 /* LINTED - alignment */
1670 id = *((dtrace_aggvarid_t *)addr);
1671 (void) dtrace_aggregate_walk(dtp,
1672 dt_denormalize_agg, &id);
1673 continue;
1674
1675 case DT_ACT_FTRUNCATE:
1676 if (fp == NULL)
1677 continue;
1678
1679 (void) fflush(fp);
1680 (void) ftruncate(fileno(fp), 0);
1681 (void) fseeko(fp, 0, SEEK_SET);
1682 continue;
1683
1684 case DT_ACT_NORMALIZE:
1685 if (i == epd->dtepd_nrecs - 1)
1686 return (dt_set_errno(dtp,
1687 EDT_BADNORMAL));
1688
1689 if (dt_normalize(dtp,
1690 buf->dtbd_data + offs, rec) != 0)
1691 return (-1);
1692
1693 i++;
1694 continue;
1695
1696 case DT_ACT_SETOPT: {
1697 uint64_t *opts = dtp->dt_options;
1698 dtrace_recdesc_t *valrec;
1699 uint32_t valsize;
1700 caddr_t val;
1701 int rv;
1702
1703 if (i == epd->dtepd_nrecs - 1) {
1704 return (dt_set_errno(dtp,
1705 EDT_BADSETOPT));
1706 }
1707
1708 valrec = &epd->dtepd_rec[++i];
1709 valsize = valrec->dtrd_size;
1710
1711 if (valrec->dtrd_action != act ||
1712 valrec->dtrd_arg != arg) {
1713 return (dt_set_errno(dtp,
1714 EDT_BADSETOPT));
1715 }
1716
1717 if (valsize > sizeof (uint64_t)) {
1718 val = buf->dtbd_data + offs +
1719 valrec->dtrd_offset;
1720 } else {
1721 val = "1";
1722 }
1723
1724 rv = dt_setopt(dtp, &data, addr, val);
1725
1726 if (rv != 0)
1727 return (-1);
1728
1729 flow = (opts[DTRACEOPT_FLOWINDENT] !=
1730 DTRACEOPT_UNSET);
1731 quiet = (opts[DTRACEOPT_QUIET] !=
1732 DTRACEOPT_UNSET);
1733
1734 continue;
1735 }
1736
1737 case DT_ACT_TRUNC:
1738 if (i == epd->dtepd_nrecs - 1)
1739 return (dt_set_errno(dtp,
1740 EDT_BADTRUNC));
1741
1742 if (dt_trunc(dtp,
1743 buf->dtbd_data + offs, rec) != 0)
1744 return (-1);
1745
1746 i++;
1747 continue;
1748
1749 default:
1750 continue;
1751 }
1752 }
1753
1754 rval = (*rfunc)(&data, rec, arg);
1755
1756 if (rval == DTRACE_CONSUME_NEXT)
1757 continue;
1758
1759 if (rval == DTRACE_CONSUME_ABORT)
1760 return (dt_set_errno(dtp, EDT_DIRABORT));
1761
1762 if (rval != DTRACE_CONSUME_THIS)
1763 return (dt_set_errno(dtp, EDT_BADRVAL));
1764
1765 if (act == DTRACEACT_STACK) {
1766 int depth = rec->dtrd_arg;
1767
1768 if (dt_print_stack(dtp, fp, NULL, addr, depth,
1769 rec->dtrd_size / depth) < 0)
1770 return (-1);
1771 goto nextrec;
1772 }
1773
1774 if (act == DTRACEACT_USTACK ||
1775 act == DTRACEACT_JSTACK) {
1776 if (dt_print_ustack(dtp, fp, NULL,
1777 addr, rec->dtrd_arg) < 0)
1778 return (-1);
1779 goto nextrec;
1780 }
1781
1782 if (act == DTRACEACT_SYM) {
1783 if (dt_print_sym(dtp, fp, NULL, addr) < 0)
1784 return (-1);
1785 goto nextrec;
1786 }
1787
1788 if (act == DTRACEACT_MOD) {
1789 if (dt_print_mod(dtp, fp, NULL, addr) < 0)
1790 return (-1);
1791 goto nextrec;
1792 }
1793
1794 if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
1795 if (dt_print_usym(dtp, fp, addr, act) < 0)
1796 return (-1);
1797 goto nextrec;
1798 }
1799
1800 if (act == DTRACEACT_UMOD) {
1801 if (dt_print_umod(dtp, fp, NULL, addr) < 0)
1802 return (-1);
1803 goto nextrec;
1804 }
1805
1806 if (DTRACEACT_ISPRINTFLIKE(act)) {
1807 void *fmtdata;
1808 int (*func)(dtrace_hdl_t *, FILE *, void *,
1809 const dtrace_probedata_t *,
1810 const dtrace_recdesc_t *, uint_t,
1811 const void *buf, size_t);
1812
1813 if ((fmtdata = dt_format_lookup(dtp,
1814 rec->dtrd_format)) == NULL)
1815 goto nofmt;
1816
1817 switch (act) {
1818 case DTRACEACT_PRINTF:
1819 func = dtrace_fprintf;
1820 break;
1821 case DTRACEACT_PRINTA:
1822 func = dtrace_fprinta;
1823 break;
1824 case DTRACEACT_SYSTEM:
1825 func = dtrace_system;
1826 break;
1827 case DTRACEACT_FREOPEN:
1828 func = dtrace_freopen;
1829 break;
1830 }
1831
1832 n = (*func)(dtp, fp, fmtdata, &data,
1833 rec, epd->dtepd_nrecs - i,
1834 (uchar_t *)buf->dtbd_data + offs,
1835 buf->dtbd_size - offs);
1836
1837 if (n < 0)
1838 return (-1); /* errno is set for us */
1839
1840 if (n > 0)
1841 i += n - 1;
1842 goto nextrec;
1843 }
1844
1845 nofmt:
1846 if (act == DTRACEACT_PRINTA) {
1847 dt_print_aggdata_t pd;
1848 dtrace_aggvarid_t *aggvars;
1849 int j, naggvars = 0;
1850 size_t size = ((epd->dtepd_nrecs - i) *
1851 sizeof (dtrace_aggvarid_t));
1852
1853 if ((aggvars = dt_alloc(dtp, size)) == NULL)
1854 return (-1);
1855
1856 /*
1857 * This might be a printa() with multiple
1858 * aggregation variables. We need to scan
1859 * forward through the records until we find
1860 * a record from a different statement.
1861 */
1862 for (j = i; j < epd->dtepd_nrecs; j++) {
1863 dtrace_recdesc_t *nrec;
1864 caddr_t naddr;
1865
1866 nrec = &epd->dtepd_rec[j];
1867
1868 if (nrec->dtrd_uarg != rec->dtrd_uarg)
1869 break;
1870
1871 if (nrec->dtrd_action != act) {
1872 return (dt_set_errno(dtp,
1873 EDT_BADAGG));
1874 }
1875
1876 naddr = buf->dtbd_data + offs +
1877 nrec->dtrd_offset;
1878
1879 aggvars[naggvars++] =
1880 /* LINTED - alignment */
1881 *((dtrace_aggvarid_t *)naddr);
1882 }
1883
1884 i = j - 1;
1885 bzero(&pd, sizeof (pd));
1886 pd.dtpa_dtp = dtp;
1887 pd.dtpa_fp = fp;
1888
1889 assert(naggvars >= 1);
1890
1891 if (naggvars == 1) {
1892 pd.dtpa_id = aggvars[0];
1893 dt_free(dtp, aggvars);
1894
1895 if (dt_printf(dtp, fp, "\n") < 0 ||
1896 dtrace_aggregate_walk_sorted(dtp,
1897 dt_print_agg, &pd) < 0)
1898 return (-1);
1899 goto nextrec;
1900 }
1901
1902 if (dt_printf(dtp, fp, "\n") < 0 ||
1903 dtrace_aggregate_walk_joined(dtp, aggvars,
1904 naggvars, dt_print_aggs, &pd) < 0) {
1905 dt_free(dtp, aggvars);
1906 return (-1);
1907 }
1908
1909 dt_free(dtp, aggvars);
1910 goto nextrec;
1911 }
1912
1913 switch (rec->dtrd_size) {
1914 case sizeof (uint64_t):
1915 n = dt_printf(dtp, fp,
1916 quiet ? "%lld" : " %16lld",
1917 /* LINTED - alignment */
1918 *((unsigned long long *)addr));
1919 break;
1920 case sizeof (uint32_t):
1921 n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
1922 /* LINTED - alignment */
1923 *((uint32_t *)addr));
1924 break;
1925 case sizeof (uint16_t):
1926 n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
1927 /* LINTED - alignment */
1928 *((uint16_t *)addr));
1929 break;
1930 case sizeof (uint8_t):
1931 n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
1932 *((uint8_t *)addr));
1933 break;
1934 default:
1935 n = dt_print_bytes(dtp, fp, addr,
1936 rec->dtrd_size, 33, quiet);
1937 break;
1938 }
1939
1940 if (n < 0)
1941 return (-1); /* errno is set for us */
1942
1943 nextrec:
1944 if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
1945 return (-1); /* errno is set for us */
1946 }
1947
1948 /*
1949 * Call the record callback with a NULL record to indicate
1950 * that we're done processing this EPID.
1951 */
1952 rval = (*rfunc)(&data, NULL, arg);
1953 nextepid:
1954 offs += epd->dtepd_size;
1955 last = id;
1956 }
1957
1958 if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
1959 end = buf->dtbd_oldest;
1960 start = 0;
1961 goto again;
1962 }
1963
1964 if ((drops = buf->dtbd_drops) == 0)
1965 return (0);
1966
1967 /*
1968 * Explicitly zero the drops to prevent us from processing them again.
1969 */
1970 buf->dtbd_drops = 0;
1971
1972 return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
1973 }
1974
1975 typedef struct dt_begin {
1976 dtrace_consume_probe_f *dtbgn_probefunc;
1977 dtrace_consume_rec_f *dtbgn_recfunc;
1978 void *dtbgn_arg;
1979 dtrace_handle_err_f *dtbgn_errhdlr;
1980 void *dtbgn_errarg;
1981 int dtbgn_beginonly;
1982 } dt_begin_t;
1983
1984 static int
dt_consume_begin_probe(const dtrace_probedata_t * data,void * arg)1985 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
1986 {
1987 dt_begin_t *begin = (dt_begin_t *)arg;
1988 dtrace_probedesc_t *pd = data->dtpda_pdesc;
1989
1990 int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
1991 int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
1992
1993 if (begin->dtbgn_beginonly) {
1994 if (!(r1 && r2))
1995 return (DTRACE_CONSUME_NEXT);
1996 } else {
1997 if (r1 && r2)
1998 return (DTRACE_CONSUME_NEXT);
1999 }
2000
2001 /*
2002 * We have a record that we're interested in. Now call the underlying
2003 * probe function...
2004 */
2005 return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2006 }
2007
2008 static int
dt_consume_begin_record(const dtrace_probedata_t * data,const dtrace_recdesc_t * rec,void * arg)2009 dt_consume_begin_record(const dtrace_probedata_t *data,
2010 const dtrace_recdesc_t *rec, void *arg)
2011 {
2012 dt_begin_t *begin = (dt_begin_t *)arg;
2013
2014 return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2015 }
2016
2017 static int
dt_consume_begin_error(const dtrace_errdata_t * data,void * arg)2018 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2019 {
2020 dt_begin_t *begin = (dt_begin_t *)arg;
2021 dtrace_probedesc_t *pd = data->dteda_pdesc;
2022
2023 int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2024 int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2025
2026 if (begin->dtbgn_beginonly) {
2027 if (!(r1 && r2))
2028 return (DTRACE_HANDLE_OK);
2029 } else {
2030 if (r1 && r2)
2031 return (DTRACE_HANDLE_OK);
2032 }
2033
2034 return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2035 }
2036
2037 static int
dt_consume_begin(dtrace_hdl_t * dtp,FILE * fp,dtrace_bufdesc_t * buf,dtrace_consume_probe_f * pf,dtrace_consume_rec_f * rf,void * arg)2038 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2039 dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2040 {
2041 /*
2042 * There's this idea that the BEGIN probe should be processed before
2043 * everything else, and that the END probe should be processed after
2044 * anything else. In the common case, this is pretty easy to deal
2045 * with. However, a situation may arise where the BEGIN enabling and
2046 * END enabling are on the same CPU, and some enabling in the middle
2047 * occurred on a different CPU. To deal with this (blech!) we need to
2048 * consume the BEGIN buffer up until the end of the BEGIN probe, and
2049 * then set it aside. We will then process every other CPU, and then
2050 * we'll return to the BEGIN CPU and process the rest of the data
2051 * (which will inevitably include the END probe, if any). Making this
2052 * even more complicated (!) is the library's ERROR enabling. Because
2053 * this enabling is processed before we even get into the consume call
2054 * back, any ERROR firing would result in the library's ERROR enabling
2055 * being processed twice -- once in our first pass (for BEGIN probes),
2056 * and again in our second pass (for everything but BEGIN probes). To
2057 * deal with this, we interpose on the ERROR handler to assure that we
2058 * only process ERROR enablings induced by BEGIN enablings in the
2059 * first pass, and that we only process ERROR enablings _not_ induced
2060 * by BEGIN enablings in the second pass.
2061 */
2062 dt_begin_t begin;
2063 processorid_t cpu = dtp->dt_beganon;
2064 dtrace_bufdesc_t nbuf;
2065 int rval, i;
2066 static int max_ncpus;
2067 dtrace_optval_t size;
2068
2069 dtp->dt_beganon = -1;
2070
2071 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2072 /*
2073 * We really don't expect this to fail, but it is at least
2074 * technically possible for this to fail with ENOENT. In this
2075 * case, we just drive on...
2076 */
2077 if (errno == ENOENT)
2078 return (0);
2079
2080 return (dt_set_errno(dtp, errno));
2081 }
2082
2083 if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2084 /*
2085 * This is the simple case. We're either not stopped, or if
2086 * we are, we actually processed any END probes on another
2087 * CPU. We can simply consume this buffer and return.
2088 */
2089 return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2090 }
2091
2092 begin.dtbgn_probefunc = pf;
2093 begin.dtbgn_recfunc = rf;
2094 begin.dtbgn_arg = arg;
2095 begin.dtbgn_beginonly = 1;
2096
2097 /*
2098 * We need to interpose on the ERROR handler to be sure that we
2099 * only process ERRORs induced by BEGIN.
2100 */
2101 begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2102 begin.dtbgn_errarg = dtp->dt_errarg;
2103 dtp->dt_errhdlr = dt_consume_begin_error;
2104 dtp->dt_errarg = &begin;
2105
2106 rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2107 dt_consume_begin_record, &begin);
2108
2109 dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2110 dtp->dt_errarg = begin.dtbgn_errarg;
2111
2112 if (rval != 0)
2113 return (rval);
2114
2115 /*
2116 * Now allocate a new buffer. We'll use this to deal with every other
2117 * CPU.
2118 */
2119 bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2120 (void) dtrace_getopt(dtp, "bufsize", &size);
2121 if ((nbuf.dtbd_data = malloc(size)) == NULL)
2122 return (dt_set_errno(dtp, EDT_NOMEM));
2123
2124 if (max_ncpus == 0)
2125 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2126
2127 for (i = 0; i < max_ncpus; i++) {
2128 nbuf.dtbd_cpu = i;
2129
2130 if (i == cpu)
2131 continue;
2132
2133 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2134 /*
2135 * If we failed with ENOENT, it may be because the
2136 * CPU was unconfigured -- this is okay. Any other
2137 * error, however, is unexpected.
2138 */
2139 if (errno == ENOENT)
2140 continue;
2141
2142 free(nbuf.dtbd_data);
2143
2144 return (dt_set_errno(dtp, errno));
2145 }
2146
2147 if ((rval = dt_consume_cpu(dtp, fp,
2148 i, &nbuf, pf, rf, arg)) != 0) {
2149 free(nbuf.dtbd_data);
2150 return (rval);
2151 }
2152 }
2153
2154 free(nbuf.dtbd_data);
2155
2156 /*
2157 * Okay -- we're done with the other buffers. Now we want to
2158 * reconsume the first buffer -- but this time we're looking for
2159 * everything _but_ BEGIN. And of course, in order to only consume
2160 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2161 * ERROR interposition function...
2162 */
2163 begin.dtbgn_beginonly = 0;
2164
2165 assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2166 assert(begin.dtbgn_errarg == dtp->dt_errarg);
2167 dtp->dt_errhdlr = dt_consume_begin_error;
2168 dtp->dt_errarg = &begin;
2169
2170 rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2171 dt_consume_begin_record, &begin);
2172
2173 dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2174 dtp->dt_errarg = begin.dtbgn_errarg;
2175
2176 return (rval);
2177 }
2178
2179 int
dtrace_consume(dtrace_hdl_t * dtp,FILE * fp,dtrace_consume_probe_f * pf,dtrace_consume_rec_f * rf,void * arg)2180 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2181 dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2182 {
2183 dtrace_bufdesc_t *buf = &dtp->dt_buf;
2184 dtrace_optval_t size;
2185 static int max_ncpus;
2186 int i, rval;
2187 dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2188 hrtime_t now = gethrtime();
2189
2190 if (dtp->dt_lastswitch != 0) {
2191 if (now - dtp->dt_lastswitch < interval)
2192 return (0);
2193
2194 dtp->dt_lastswitch += interval;
2195 } else {
2196 dtp->dt_lastswitch = now;
2197 }
2198
2199 if (!dtp->dt_active)
2200 return (dt_set_errno(dtp, EINVAL));
2201
2202 if (max_ncpus == 0)
2203 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2204
2205 if (pf == NULL)
2206 pf = (dtrace_consume_probe_f *)dt_nullprobe;
2207
2208 if (rf == NULL)
2209 rf = (dtrace_consume_rec_f *)dt_nullrec;
2210
2211 if (buf->dtbd_data == NULL) {
2212 (void) dtrace_getopt(dtp, "bufsize", &size);
2213 if ((buf->dtbd_data = malloc(size)) == NULL)
2214 return (dt_set_errno(dtp, EDT_NOMEM));
2215
2216 buf->dtbd_size = size;
2217 }
2218
2219 /*
2220 * If we have just begun, we want to first process the CPU that
2221 * executed the BEGIN probe (if any).
2222 */
2223 if (dtp->dt_active && dtp->dt_beganon != -1) {
2224 buf->dtbd_cpu = dtp->dt_beganon;
2225 if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2226 return (rval);
2227 }
2228
2229 for (i = 0; i < max_ncpus; i++) {
2230 buf->dtbd_cpu = i;
2231
2232 /*
2233 * If we have stopped, we want to process the CPU on which the
2234 * END probe was processed only _after_ we have processed
2235 * everything else.
2236 */
2237 if (dtp->dt_stopped && (i == dtp->dt_endedon))
2238 continue;
2239
2240 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2241 /*
2242 * If we failed with ENOENT, it may be because the
2243 * CPU was unconfigured -- this is okay. Any other
2244 * error, however, is unexpected.
2245 */
2246 if (errno == ENOENT)
2247 continue;
2248
2249 return (dt_set_errno(dtp, errno));
2250 }
2251
2252 if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2253 return (rval);
2254 }
2255
2256 if (!dtp->dt_stopped)
2257 return (0);
2258
2259 buf->dtbd_cpu = dtp->dt_endedon;
2260
2261 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2262 /*
2263 * This _really_ shouldn't fail, but it is strictly speaking
2264 * possible for this to return ENOENT if the CPU that called
2265 * the END enabling somehow managed to become unconfigured.
2266 * It's unclear how the user can possibly expect anything
2267 * rational to happen in this case -- the state has been thrown
2268 * out along with the unconfigured CPU -- so we'll just drive
2269 * on...
2270 */
2271 if (errno == ENOENT)
2272 return (0);
2273
2274 return (dt_set_errno(dtp, errno));
2275 }
2276
2277 return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2278 }
2279