1 /* $OpenBSD: subr_prof.c,v 1.40 2023/10/17 00:04:02 cheloha Exp $ */ 2 /* $NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/atomic.h> 38 #include <sys/clockintr.h> 39 #include <sys/pledge.h> 40 #include <sys/proc.h> 41 #include <sys/resourcevar.h> 42 #include <sys/mount.h> 43 #include <sys/sysctl.h> 44 #include <sys/syscallargs.h> 45 #include <sys/user.h> 46 47 uint64_t profclock_period; 48 49 #if defined(GPROF) || defined(DDBPROF) 50 #include <sys/malloc.h> 51 #include <sys/gmon.h> 52 53 #include <uvm/uvm_extern.h> 54 55 #include <machine/db_machdep.h> 56 #include <ddb/db_extern.h> 57 58 /* 59 * Flag to prevent CPUs from executing the mcount() monitor function 60 * until we're sure they are in a sane state. 61 */ 62 int gmoninit = 0; 63 u_int gmon_cpu_count; /* [K] number of CPUs with profiling enabled */ 64 65 extern char etext[]; 66 67 void gmonclock(struct clockrequest *, void *, void *); 68 69 void 70 prof_init(void) 71 { 72 CPU_INFO_ITERATOR cii; 73 struct cpu_info *ci; 74 struct gmonparam *p; 75 u_long lowpc, highpc, textsize; 76 u_long kcountsize, fromssize, tossize; 77 long tolimit; 78 char *cp; 79 int size; 80 81 /* 82 * Round lowpc and highpc to multiples of the density we're using 83 * so the rest of the scaling (here and in gprof) stays in ints. 84 */ 85 lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER)); 86 highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER)); 87 textsize = highpc - lowpc; 88 #ifdef GPROF 89 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n", 90 textsize, lowpc, highpc); 91 #endif 92 kcountsize = textsize / HISTFRACTION; 93 fromssize = textsize / HASHFRACTION; 94 tolimit = textsize * ARCDENSITY / 100; 95 if (tolimit < MINARCS) 96 tolimit = MINARCS; 97 else if (tolimit > MAXARCS) 98 tolimit = MAXARCS; 99 tossize = tolimit * sizeof(struct tostruct); 100 size = sizeof(*p) + kcountsize + fromssize + tossize; 101 102 /* Allocate and initialize one profiling buffer per CPU. */ 103 CPU_INFO_FOREACH(cii, ci) { 104 ci->ci_gmonclock = clockintr_establish(ci, gmonclock, NULL); 105 if (ci->ci_gmonclock == NULL) { 106 printf("%s: clockintr_establish gmonclock\n", __func__); 107 return; 108 } 109 clockintr_stagger(ci->ci_gmonclock, profclock_period, 110 CPU_INFO_UNIT(ci), MAXCPUS); 111 cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait); 112 if (cp == NULL) { 113 printf("No memory for profiling.\n"); 114 return; 115 } 116 117 p = (struct gmonparam *)cp; 118 cp += sizeof(*p); 119 p->tos = (struct tostruct *)cp; 120 cp += tossize; 121 p->kcount = (u_short *)cp; 122 cp += kcountsize; 123 p->froms = (u_short *)cp; 124 125 p->state = GMON_PROF_OFF; 126 p->lowpc = lowpc; 127 p->highpc = highpc; 128 p->textsize = textsize; 129 p->hashfraction = HASHFRACTION; 130 p->kcountsize = kcountsize; 131 p->fromssize = fromssize; 132 p->tolimit = tolimit; 133 p->tossize = tossize; 134 135 ci->ci_gmon = p; 136 } 137 } 138 139 int 140 prof_state_toggle(struct cpu_info *ci, int oldstate) 141 { 142 struct gmonparam *gp = ci->ci_gmon; 143 int error = 0; 144 145 KERNEL_ASSERT_LOCKED(); 146 147 if (gp->state == oldstate) 148 return (0); 149 150 switch (gp->state) { 151 case GMON_PROF_ON: 152 #if !defined(GPROF) 153 /* 154 * If this is not a profiling kernel, we need to patch 155 * all symbols that can be instrumented. 156 */ 157 error = db_prof_enable(); 158 #endif 159 if (error == 0) { 160 if (++gmon_cpu_count == 1) 161 startprofclock(&process0); 162 clockintr_advance(ci->ci_gmonclock, profclock_period); 163 } 164 break; 165 default: 166 error = EINVAL; 167 gp->state = GMON_PROF_OFF; 168 /* FALLTHROUGH */ 169 case GMON_PROF_OFF: 170 clockintr_cancel(ci->ci_gmonclock); 171 if (--gmon_cpu_count == 0) 172 stopprofclock(&process0); 173 #if !defined(GPROF) 174 db_prof_disable(); 175 #endif 176 break; 177 } 178 179 return (error); 180 } 181 182 /* 183 * Return kernel profiling information. 184 */ 185 int 186 sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 187 size_t newlen) 188 { 189 CPU_INFO_ITERATOR cii; 190 struct cpu_info *ci; 191 struct gmonparam *gp = NULL; 192 int error, cpuid, op, state; 193 194 /* all sysctl names at this level are name and field */ 195 if (namelen != 2) 196 return (ENOTDIR); /* overloaded */ 197 198 op = name[0]; 199 cpuid = name[1]; 200 201 CPU_INFO_FOREACH(cii, ci) { 202 if (cpuid == CPU_INFO_UNIT(ci)) { 203 gp = ci->ci_gmon; 204 break; 205 } 206 } 207 208 if (gp == NULL) 209 return (EOPNOTSUPP); 210 211 /* Assume that if we're here it is safe to execute profiling. */ 212 gmoninit = 1; 213 214 switch (op) { 215 case GPROF_STATE: 216 state = gp->state; 217 error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state); 218 if (error) 219 return (error); 220 return prof_state_toggle(ci, state); 221 case GPROF_COUNT: 222 return (sysctl_struct(oldp, oldlenp, newp, newlen, 223 gp->kcount, gp->kcountsize)); 224 case GPROF_FROMS: 225 return (sysctl_struct(oldp, oldlenp, newp, newlen, 226 gp->froms, gp->fromssize)); 227 case GPROF_TOS: 228 return (sysctl_struct(oldp, oldlenp, newp, newlen, 229 gp->tos, gp->tossize)); 230 case GPROF_GMONPARAM: 231 return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp)); 232 default: 233 return (EOPNOTSUPP); 234 } 235 /* NOTREACHED */ 236 } 237 238 void 239 gmonclock(struct clockrequest *cr, void *cf, void *arg) 240 { 241 uint64_t count; 242 struct clockframe *frame = cf; 243 struct gmonparam *g = curcpu()->ci_gmon; 244 u_long i; 245 246 count = clockrequest_advance(cr, profclock_period); 247 if (count > ULONG_MAX) 248 count = ULONG_MAX; 249 250 /* 251 * Kernel statistics are just like addupc_intr(), only easier. 252 */ 253 if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) { 254 i = CLKF_PC(frame) - g->lowpc; 255 if (i < g->textsize) { 256 i /= HISTFRACTION * sizeof(*g->kcount); 257 g->kcount[i] += (u_long)count; 258 } 259 } 260 } 261 262 #endif /* GPROF || DDBPROF */ 263 264 /* 265 * Profiling system call. 266 * 267 * The scale factor is a fixed point number with 16 bits of fraction, so that 268 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. 269 */ 270 int 271 sys_profil(struct proc *p, void *v, register_t *retval) 272 { 273 struct sys_profil_args /* { 274 syscallarg(caddr_t) samples; 275 syscallarg(size_t) size; 276 syscallarg(u_long) offset; 277 syscallarg(u_int) scale; 278 } */ *uap = v; 279 struct process *pr = p->p_p; 280 struct uprof *upp; 281 int error, s; 282 283 error = pledge_profil(p, SCARG(uap, scale)); 284 if (error) 285 return error; 286 287 if (SCARG(uap, scale) > (1 << 16)) 288 return (EINVAL); 289 if (SCARG(uap, scale) == 0) { 290 stopprofclock(pr); 291 need_resched(curcpu()); 292 return (0); 293 } 294 upp = &pr->ps_prof; 295 296 /* Block profile interrupts while changing state. */ 297 s = splstatclock(); 298 upp->pr_off = SCARG(uap, offset); 299 upp->pr_scale = SCARG(uap, scale); 300 upp->pr_base = (caddr_t)SCARG(uap, samples); 301 upp->pr_size = SCARG(uap, size); 302 startprofclock(pr); 303 splx(s); 304 need_resched(curcpu()); 305 306 return (0); 307 } 308 309 void 310 profclock(struct clockrequest *cr, void *cf, void *arg) 311 { 312 uint64_t count; 313 struct clockframe *frame = cf; 314 struct proc *p = curproc; 315 316 count = clockrequest_advance(cr, profclock_period); 317 if (count > ULONG_MAX) 318 count = ULONG_MAX; 319 320 if (CLKF_USERMODE(frame)) { 321 if (ISSET(p->p_p->ps_flags, PS_PROFIL)) 322 addupc_intr(p, CLKF_PC(frame), (u_long)count); 323 } else { 324 if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL)) 325 addupc_intr(p, PROC_PC(p), (u_long)count); 326 } 327 } 328 329 /* 330 * Scale is a fixed-point number with the binary point 16 bits 331 * into the value, and is <= 1.0. pc is at most 32 bits, so the 332 * intermediate result is at most 48 bits. 333 */ 334 #define PC_TO_INDEX(pc, prof) \ 335 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ 336 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) 337 338 /* 339 * Collect user-level profiling statistics; called on a profiling tick, 340 * when a process is running in user-mode. This routine may be called 341 * from an interrupt context. Schedule an AST that will vector us to 342 * trap() with a context in which copyin and copyout will work. 343 * Trap will then call addupc_task(). 344 */ 345 void 346 addupc_intr(struct proc *p, u_long pc, u_long nticks) 347 { 348 struct uprof *prof; 349 350 prof = &p->p_p->ps_prof; 351 if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size) 352 return; /* out of range; ignore */ 353 354 p->p_prof_addr = pc; 355 p->p_prof_ticks += nticks; 356 atomic_setbits_int(&p->p_flag, P_OWEUPC); 357 need_proftick(p); 358 } 359 360 361 /* 362 * Much like before, but we can afford to take faults here. If the 363 * update fails, we simply turn off profiling. 364 */ 365 void 366 addupc_task(struct proc *p, u_long pc, u_int nticks) 367 { 368 struct process *pr = p->p_p; 369 struct uprof *prof; 370 caddr_t addr; 371 u_int i; 372 u_short v; 373 374 /* Testing PS_PROFIL may be unnecessary, but is certainly safe. */ 375 if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0) 376 return; 377 378 prof = &pr->ps_prof; 379 if (pc < prof->pr_off || 380 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) 381 return; 382 383 addr = prof->pr_base + i; 384 if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) { 385 v += nticks; 386 if (copyout((caddr_t)&v, addr, sizeof(v)) == 0) 387 return; 388 } 389 stopprofclock(pr); 390 } 391