1 /* $NetBSD: dtrace_subr.c,v 1.14 2022/08/21 18:58:45 riastradh Exp $ */ 2 3 /* 4 * CDDL HEADER START 5 * 6 * The contents of this file are subject to the terms of the 7 * Common Development and Distribution License, Version 1.0 only 8 * (the "License"). You may not use this file except in compliance 9 * with the License. 10 * 11 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 12 * or http://www.opensolaris.org/os/licensing. 13 * See the License for the specific language governing permissions 14 * and limitations under the License. 15 * 16 * When distributing Covered Code, include this CDDL HEADER in each 17 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 18 * If applicable, add the following below this CDDL HEADER, with the 19 * fields enclosed by brackets "[]" replaced with your own identifying 20 * information: Portions Copyright [yyyy] [name of copyright owner] 21 * 22 * CDDL HEADER END 23 * 24 * $FreeBSD: head/sys/cddl/dev/dtrace/i386/dtrace_subr.c 313850 2017-02-17 03:27:20Z markj $ 25 * 26 */ 27 /* 28 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 29 * Use is subject to license terms. 30 */ 31 32 /* 33 * Copyright (c) 2011, Joyent, Inc. All rights reserved. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/kmem.h> 42 #include <sys/xcall.h> 43 #include <sys/cpu.h> 44 #include <sys/cpuvar.h> 45 #include <sys/dtrace_impl.h> 46 #include <sys/dtrace_bsd.h> 47 #include <machine/cpu.h> 48 #include <machine/cpufunc.h> 49 #include <machine/clock.h> 50 #include <machine/frame.h> 51 #include <uvm/uvm_pglist.h> 52 #include <uvm/uvm_prot.h> 53 #include <uvm/uvm_pmap.h> 54 55 #include <x86/include/cpu_counter.h> 56 57 extern uintptr_t kernelbase; 58 59 extern void dtrace_getnanotime(struct timespec *tsp); 60 61 int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); 62 63 typedef struct dtrace_invop_hdlr { 64 int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t); 65 struct dtrace_invop_hdlr *dtih_next; 66 } dtrace_invop_hdlr_t; 67 68 dtrace_invop_hdlr_t *dtrace_invop_hdlr; 69 70 void dtrace_gethrtime_init(void *arg); 71 72 int 73 dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) 74 { 75 dtrace_invop_hdlr_t *hdlr; 76 int rval; 77 78 for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) 79 if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0) 80 return (rval); 81 82 return (0); 83 } 84 85 void 86 dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) 87 { 88 dtrace_invop_hdlr_t *hdlr; 89 90 hdlr = kmem_alloc(sizeof(*hdlr), KM_SLEEP); 91 hdlr->dtih_func = func; 92 hdlr->dtih_next = dtrace_invop_hdlr; 93 dtrace_invop_hdlr = hdlr; 94 } 95 96 void 97 dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) 98 { 99 dtrace_invop_hdlr_t *hdlr = dtrace_invop_hdlr, *prev = NULL; 100 101 for (;;) { 102 if (hdlr == NULL) 103 panic("attempt to remove non-existent invop handler"); 104 105 if (hdlr->dtih_func == func) 106 break; 107 108 prev = hdlr; 109 hdlr = hdlr->dtih_next; 110 } 111 112 if (prev == NULL) { 113 ASSERT(dtrace_invop_hdlr == hdlr); 114 dtrace_invop_hdlr = hdlr->dtih_next; 115 } else { 116 ASSERT(dtrace_invop_hdlr != hdlr); 117 prev->dtih_next = hdlr->dtih_next; 118 } 119 120 kmem_free(hdlr, sizeof(*hdlr)); 121 } 122 123 void 124 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) 125 { 126 (*func)(0, kernelbase); 127 } 128 129 static void 130 xcall_func(void *arg0, void *arg1) 131 { 132 dtrace_xcall_t func = arg0; 133 134 (*func)(arg1); 135 } 136 137 void 138 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) 139 { 140 uint64_t where; 141 142 if (cpu == DTRACE_CPUALL) { 143 where = xc_broadcast(0, xcall_func, func, arg); 144 } else { 145 struct cpu_info *cinfo = cpu_lookup(cpu); 146 147 KASSERT(cinfo != NULL); 148 where = xc_unicast(0, xcall_func, func, arg, cinfo); 149 } 150 xc_wait(where); 151 152 /* XXX Q. Do we really need the other cpus to wait also? 153 * (see solaris:xc_sync()) 154 */ 155 } 156 157 static void 158 dtrace_sync_func(void) 159 { 160 } 161 162 void 163 dtrace_sync(void) 164 { 165 dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); 166 } 167 168 #ifdef notyet 169 void 170 dtrace_safe_synchronous_signal(void) 171 { 172 kthread_t *t = curthread; 173 struct regs *rp = lwptoregs(ttolwp(t)); 174 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 175 176 ASSERT(t->t_dtrace_on); 177 178 /* 179 * If we're not in the range of scratch addresses, we're not actually 180 * tracing user instructions so turn off the flags. If the instruction 181 * we copied out caused a synchonous trap, reset the pc back to its 182 * original value and turn off the flags. 183 */ 184 if (rp->r_pc < t->t_dtrace_scrpc || 185 rp->r_pc > t->t_dtrace_astpc + isz) { 186 t->t_dtrace_ft = 0; 187 } else if (rp->r_pc == t->t_dtrace_scrpc || 188 rp->r_pc == t->t_dtrace_astpc) { 189 rp->r_pc = t->t_dtrace_pc; 190 t->t_dtrace_ft = 0; 191 } 192 } 193 194 int 195 dtrace_safe_defer_signal(void) 196 { 197 kthread_t *t = curthread; 198 struct regs *rp = lwptoregs(ttolwp(t)); 199 size_t isz = t->t_dtrace_npc - t->t_dtrace_pc; 200 201 ASSERT(t->t_dtrace_on); 202 203 /* 204 * If we're not in the range of scratch addresses, we're not actually 205 * tracing user instructions so turn off the flags. 206 */ 207 if (rp->r_pc < t->t_dtrace_scrpc || 208 rp->r_pc > t->t_dtrace_astpc + isz) { 209 t->t_dtrace_ft = 0; 210 return (0); 211 } 212 213 /* 214 * If we have executed the original instruction, but we have performed 215 * neither the jmp back to t->t_dtrace_npc nor the clean up of any 216 * registers used to emulate %rip-relative instructions in 64-bit mode, 217 * we'll save ourselves some effort by doing that here and taking the 218 * signal right away. We detect this condition by seeing if the program 219 * counter is the range [scrpc + isz, astpc). 220 */ 221 if (rp->r_pc >= t->t_dtrace_scrpc + isz && 222 rp->r_pc < t->t_dtrace_astpc) { 223 #ifdef __amd64 224 /* 225 * If there is a scratch register and we're on the 226 * instruction immediately after the modified instruction, 227 * restore the value of that scratch register. 228 */ 229 if (t->t_dtrace_reg != 0 && 230 rp->r_pc == t->t_dtrace_scrpc + isz) { 231 switch (t->t_dtrace_reg) { 232 case REG_RAX: 233 rp->r_rax = t->t_dtrace_regv; 234 break; 235 case REG_RCX: 236 rp->r_rcx = t->t_dtrace_regv; 237 break; 238 case REG_R8: 239 rp->r_r8 = t->t_dtrace_regv; 240 break; 241 case REG_R9: 242 rp->r_r9 = t->t_dtrace_regv; 243 break; 244 } 245 } 246 #endif 247 rp->r_pc = t->t_dtrace_npc; 248 t->t_dtrace_ft = 0; 249 return (0); 250 } 251 252 /* 253 * Otherwise, make sure we'll return to the kernel after executing 254 * the copied out instruction and defer the signal. 255 */ 256 if (!t->t_dtrace_step) { 257 ASSERT(rp->r_pc < t->t_dtrace_astpc); 258 rp->r_pc += t->t_dtrace_astpc - t->t_dtrace_scrpc; 259 t->t_dtrace_step = 1; 260 } 261 262 t->t_dtrace_ast = 1; 263 264 return (1); 265 } 266 #endif 267 268 static int64_t tgt_cpu_tsc; 269 static int64_t hst_cpu_tsc; 270 static int64_t tsc_skew[MAXCPUS]; 271 static uint64_t nsec_scale; 272 273 /* See below for the explanation of this macro. */ 274 #define SCALE_SHIFT 28 275 276 static __inline uint64_t 277 dtrace_rdtsc(void) 278 { 279 uint64_t rv; 280 281 __asm __volatile("rdtsc" : "=A" (rv)); 282 return (rv); 283 } 284 285 static void 286 dtrace_gethrtime_init_cpu(void *arg) 287 { 288 uintptr_t cpu = (uintptr_t) arg; 289 290 if (cpu == cpu_number()) 291 tgt_cpu_tsc = dtrace_rdtsc(); 292 else 293 hst_cpu_tsc = dtrace_rdtsc(); 294 } 295 296 void 297 dtrace_gethrtime_init(void *arg) 298 { 299 uint64_t tsc_f; 300 CPU_INFO_ITERATOR cpuind; 301 struct cpu_info *cinfo = curcpu(); 302 cpuid_t cur_cpuid = cpu_number(); /* current cpu id */ 303 304 /* 305 * Get TSC frequency known at this moment. 306 * This should be constant if TSC is invariant. 307 * Otherwise tick->time conversion will be inaccurate, but 308 * will preserve monotonic property of TSC. 309 */ 310 tsc_f = cpu_frequency(cinfo); 311 312 /* 313 * The following line checks that nsec_scale calculated below 314 * doesn't overflow 32-bit unsigned integer, so that it can multiply 315 * another 32-bit integer without overflowing 64-bit. 316 * Thus minimum supported TSC frequency is 62.5MHz. 317 */ 318 KASSERTMSG(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), 319 "TSC frequency is too low"); 320 321 /* 322 * We scale up NANOSEC/tsc_f ratio to preserve as much precision 323 * as possible. 324 * 2^28 factor was chosen quite arbitrarily from practical 325 * considerations: 326 * - it supports TSC frequencies as low as 62.5MHz (see above); 327 * - it provides quite good precision (e < 0.01%) up to THz 328 * (terahertz) values; 329 */ 330 nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; 331 332 /* The current CPU is the reference one. */ 333 tsc_skew[cur_cpuid] = 0; 334 335 for (CPU_INFO_FOREACH(cpuind, cinfo)) { 336 /* use skew relative to cpu 0 */ 337 tsc_skew[cpu_index(cinfo)] = cinfo->ci_data.cpu_cc_skew; 338 } 339 340 /* Already handled in x86/tsc.c for ci_data.cpu_cc_skew */ 341 #if 0 342 /* The current CPU is the reference one. */ 343 sched_pin(); 344 tsc_skew[curcpu] = 0; 345 CPU_FOREACH(i) { 346 if (i == curcpu) 347 continue; 348 349 pc = pcpu_find(i); 350 CPU_SETOF(PCPU_GET(cpuid), &map); 351 CPU_SET(pc->pc_cpuid, &map); 352 353 smp_rendezvous_cpus(map, NULL, 354 dtrace_gethrtime_init_cpu, 355 smp_no_rendevous_barrier, (void *)(uintptr_t) i); 356 357 tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; 358 } 359 sched_unpin(); 360 #endif 361 } 362 363 #ifdef __FreeBSD__ 364 #ifdef EARLY_AP_STARTUP 365 SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY, 366 dtrace_gethrtime_init, NULL); 367 #else 368 SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, 369 NULL); 370 #endif 371 #endif 372 373 /* 374 * DTrace needs a high resolution time function which can 375 * be called from a probe context and guaranteed not to have 376 * instrumented with probes itself. 377 * 378 * Returns nanoseconds since boot. 379 */ 380 uint64_t 381 dtrace_gethrtime() 382 { 383 uint64_t tsc; 384 uint32_t lo; 385 uint32_t hi; 386 387 /* 388 * We split TSC value into lower and higher 32-bit halves and separately 389 * scale them with nsec_scale, then we scale them down by 2^28 390 * (see nsec_scale calculations) taking into account 32-bit shift of 391 * the higher half and finally add. 392 */ 393 tsc = dtrace_rdtsc() + tsc_skew[cpu_number()]; 394 lo = tsc; 395 hi = tsc >> 32; 396 return (((lo * nsec_scale) >> SCALE_SHIFT) + 397 ((hi * nsec_scale) << (32 - SCALE_SHIFT))); 398 } 399 400 uint64_t 401 dtrace_gethrestime(void) 402 { 403 struct timespec current_time; 404 405 dtrace_getnanotime(¤t_time); 406 407 return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec); 408 } 409 410 /* Function to handle DTrace traps during probes. See i386/i386/trap.c */ 411 int 412 dtrace_trap(struct trapframe *frame, u_int type) 413 { 414 bool nofault; 415 cpuid_t cpuid = cpu_number(); /* current cpu id */ 416 417 /* 418 * A trap can occur while DTrace executes a probe. Before 419 * executing the probe, DTrace blocks re-scheduling and sets 420 * a flag in its per-cpu flags to indicate that it doesn't 421 * want to fault. On returning from the probe, the no-fault 422 * flag is cleared and finally re-scheduling is enabled. 423 * 424 * Check if DTrace has enabled 'no-fault' mode: 425 */ 426 nofault = (cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0; 427 if (nofault) { 428 KASSERTMSG((x86_read_flags() & PSL_I) == 0, 429 "interrupts enabled"); 430 431 /* 432 * There are only a couple of trap types that are expected. 433 * All the rest will be handled in the usual way. 434 */ 435 switch (type) { 436 /* General protection fault. */ 437 case T_PROTFLT: 438 /* Flag an illegal operation. */ 439 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; 440 441 /* 442 * Offset the instruction pointer to the instruction 443 * following the one causing the fault. 444 */ 445 frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 446 return (1); 447 /* Page fault. */ 448 case T_PAGEFLT: 449 /* Flag a bad address. */ 450 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; 451 cpu_core[cpuid].cpuc_dtrace_illval = rcr2(); 452 453 /* 454 * Offset the instruction pointer to the instruction 455 * following the one causing the fault. 456 */ 457 frame->tf_eip += dtrace_instr_size((u_char *) frame->tf_eip); 458 return (1); 459 default: 460 /* Handle all other traps in the usual way. */ 461 break; 462 } 463 } 464 465 /* Handle the trap in the usual way. */ 466 return (0); 467 } 468