1 /* $OpenBSD: subr_prof.c,v 1.41 2024/01/24 19:23:38 cheloha Exp $ */ 2 /* $NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/atomic.h> 38 #include <sys/clockintr.h> 39 #include <sys/pledge.h> 40 #include <sys/proc.h> 41 #include <sys/resourcevar.h> 42 #include <sys/mount.h> 43 #include <sys/sysctl.h> 44 #include <sys/syscallargs.h> 45 #include <sys/user.h> 46 47 uint64_t profclock_period; 48 49 #if defined(GPROF) || defined(DDBPROF) 50 #include <sys/malloc.h> 51 #include <sys/gmon.h> 52 53 #include <uvm/uvm_extern.h> 54 55 #include <machine/db_machdep.h> 56 #include <ddb/db_extern.h> 57 58 /* 59 * Flag to prevent CPUs from executing the mcount() monitor function 60 * until we're sure they are in a sane state. 61 */ 62 int gmoninit = 0; 63 u_int gmon_cpu_count; /* [K] number of CPUs with profiling enabled */ 64 65 extern char etext[]; 66 67 void gmonclock(struct clockrequest *, void *, void *); 68 69 void 70 prof_init(void) 71 { 72 CPU_INFO_ITERATOR cii; 73 struct cpu_info *ci; 74 struct gmonparam *p; 75 u_long lowpc, highpc, textsize; 76 u_long kcountsize, fromssize, tossize; 77 long tolimit; 78 char *cp; 79 int size; 80 81 /* 82 * Round lowpc and highpc to multiples of the density we're using 83 * so the rest of the scaling (here and in gprof) stays in ints. 84 */ 85 lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER)); 86 highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER)); 87 textsize = highpc - lowpc; 88 #ifdef GPROF 89 printf("Profiling kernel, textsize=%ld [%lx..%lx]\n", 90 textsize, lowpc, highpc); 91 #endif 92 kcountsize = textsize / HISTFRACTION; 93 fromssize = textsize / HASHFRACTION; 94 tolimit = textsize * ARCDENSITY / 100; 95 if (tolimit < MINARCS) 96 tolimit = MINARCS; 97 else if (tolimit > MAXARCS) 98 tolimit = MAXARCS; 99 tossize = tolimit * sizeof(struct tostruct); 100 size = sizeof(*p) + kcountsize + fromssize + tossize; 101 102 /* Allocate and initialize one profiling buffer per CPU. */ 103 CPU_INFO_FOREACH(cii, ci) { 104 cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait); 105 if (cp == NULL) { 106 printf("No memory for profiling.\n"); 107 return; 108 } 109 110 clockintr_bind(&ci->ci_gmonclock, ci, gmonclock, NULL); 111 clockintr_stagger(&ci->ci_gmonclock, profclock_period, 112 CPU_INFO_UNIT(ci), MAXCPUS); 113 114 p = (struct gmonparam *)cp; 115 cp += sizeof(*p); 116 p->tos = (struct tostruct *)cp; 117 cp += tossize; 118 p->kcount = (u_short *)cp; 119 cp += kcountsize; 120 p->froms = (u_short *)cp; 121 122 p->state = GMON_PROF_OFF; 123 p->lowpc = lowpc; 124 p->highpc = highpc; 125 p->textsize = textsize; 126 p->hashfraction = HASHFRACTION; 127 p->kcountsize = kcountsize; 128 p->fromssize = fromssize; 129 p->tolimit = tolimit; 130 p->tossize = tossize; 131 132 ci->ci_gmon = p; 133 } 134 } 135 136 int 137 prof_state_toggle(struct cpu_info *ci, int oldstate) 138 { 139 struct gmonparam *gp = ci->ci_gmon; 140 int error = 0; 141 142 KERNEL_ASSERT_LOCKED(); 143 144 if (gp->state == oldstate) 145 return (0); 146 147 switch (gp->state) { 148 case GMON_PROF_ON: 149 #if !defined(GPROF) 150 /* 151 * If this is not a profiling kernel, we need to patch 152 * all symbols that can be instrumented. 153 */ 154 error = db_prof_enable(); 155 #endif 156 if (error == 0) { 157 if (++gmon_cpu_count == 1) 158 startprofclock(&process0); 159 clockintr_advance(&ci->ci_gmonclock, profclock_period); 160 } 161 break; 162 default: 163 error = EINVAL; 164 gp->state = GMON_PROF_OFF; 165 /* FALLTHROUGH */ 166 case GMON_PROF_OFF: 167 clockintr_cancel(&ci->ci_gmonclock); 168 if (--gmon_cpu_count == 0) 169 stopprofclock(&process0); 170 #if !defined(GPROF) 171 db_prof_disable(); 172 #endif 173 break; 174 } 175 176 return (error); 177 } 178 179 /* 180 * Return kernel profiling information. 181 */ 182 int 183 sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 184 size_t newlen) 185 { 186 CPU_INFO_ITERATOR cii; 187 struct cpu_info *ci; 188 struct gmonparam *gp = NULL; 189 int error, cpuid, op, state; 190 191 /* all sysctl names at this level are name and field */ 192 if (namelen != 2) 193 return (ENOTDIR); /* overloaded */ 194 195 op = name[0]; 196 cpuid = name[1]; 197 198 CPU_INFO_FOREACH(cii, ci) { 199 if (cpuid == CPU_INFO_UNIT(ci)) { 200 gp = ci->ci_gmon; 201 break; 202 } 203 } 204 205 if (gp == NULL) 206 return (EOPNOTSUPP); 207 208 /* Assume that if we're here it is safe to execute profiling. */ 209 gmoninit = 1; 210 211 switch (op) { 212 case GPROF_STATE: 213 state = gp->state; 214 error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state); 215 if (error) 216 return (error); 217 return prof_state_toggle(ci, state); 218 case GPROF_COUNT: 219 return (sysctl_struct(oldp, oldlenp, newp, newlen, 220 gp->kcount, gp->kcountsize)); 221 case GPROF_FROMS: 222 return (sysctl_struct(oldp, oldlenp, newp, newlen, 223 gp->froms, gp->fromssize)); 224 case GPROF_TOS: 225 return (sysctl_struct(oldp, oldlenp, newp, newlen, 226 gp->tos, gp->tossize)); 227 case GPROF_GMONPARAM: 228 return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp)); 229 default: 230 return (EOPNOTSUPP); 231 } 232 /* NOTREACHED */ 233 } 234 235 void 236 gmonclock(struct clockrequest *cr, void *cf, void *arg) 237 { 238 uint64_t count; 239 struct clockframe *frame = cf; 240 struct gmonparam *g = curcpu()->ci_gmon; 241 u_long i; 242 243 count = clockrequest_advance(cr, profclock_period); 244 if (count > ULONG_MAX) 245 count = ULONG_MAX; 246 247 /* 248 * Kernel statistics are just like addupc_intr(), only easier. 249 */ 250 if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) { 251 i = CLKF_PC(frame) - g->lowpc; 252 if (i < g->textsize) { 253 i /= HISTFRACTION * sizeof(*g->kcount); 254 g->kcount[i] += (u_long)count; 255 } 256 } 257 } 258 259 #endif /* GPROF || DDBPROF */ 260 261 /* 262 * Profiling system call. 263 * 264 * The scale factor is a fixed point number with 16 bits of fraction, so that 265 * 1.0 is represented as 0x10000. A scale factor of 0 turns off profiling. 266 */ 267 int 268 sys_profil(struct proc *p, void *v, register_t *retval) 269 { 270 struct sys_profil_args /* { 271 syscallarg(caddr_t) samples; 272 syscallarg(size_t) size; 273 syscallarg(u_long) offset; 274 syscallarg(u_int) scale; 275 } */ *uap = v; 276 struct process *pr = p->p_p; 277 struct uprof *upp; 278 int error, s; 279 280 error = pledge_profil(p, SCARG(uap, scale)); 281 if (error) 282 return error; 283 284 if (SCARG(uap, scale) > (1 << 16)) 285 return (EINVAL); 286 if (SCARG(uap, scale) == 0) { 287 stopprofclock(pr); 288 need_resched(curcpu()); 289 return (0); 290 } 291 upp = &pr->ps_prof; 292 293 /* Block profile interrupts while changing state. */ 294 s = splstatclock(); 295 upp->pr_off = SCARG(uap, offset); 296 upp->pr_scale = SCARG(uap, scale); 297 upp->pr_base = (caddr_t)SCARG(uap, samples); 298 upp->pr_size = SCARG(uap, size); 299 startprofclock(pr); 300 splx(s); 301 need_resched(curcpu()); 302 303 return (0); 304 } 305 306 void 307 profclock(struct clockrequest *cr, void *cf, void *arg) 308 { 309 uint64_t count; 310 struct clockframe *frame = cf; 311 struct proc *p = curproc; 312 313 count = clockrequest_advance(cr, profclock_period); 314 if (count > ULONG_MAX) 315 count = ULONG_MAX; 316 317 if (CLKF_USERMODE(frame)) { 318 if (ISSET(p->p_p->ps_flags, PS_PROFIL)) 319 addupc_intr(p, CLKF_PC(frame), (u_long)count); 320 } else { 321 if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL)) 322 addupc_intr(p, PROC_PC(p), (u_long)count); 323 } 324 } 325 326 /* 327 * Scale is a fixed-point number with the binary point 16 bits 328 * into the value, and is <= 1.0. pc is at most 32 bits, so the 329 * intermediate result is at most 48 bits. 330 */ 331 #define PC_TO_INDEX(pc, prof) \ 332 ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ 333 (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) 334 335 /* 336 * Collect user-level profiling statistics; called on a profiling tick, 337 * when a process is running in user-mode. This routine may be called 338 * from an interrupt context. Schedule an AST that will vector us to 339 * trap() with a context in which copyin and copyout will work. 340 * Trap will then call addupc_task(). 341 */ 342 void 343 addupc_intr(struct proc *p, u_long pc, u_long nticks) 344 { 345 struct uprof *prof; 346 347 prof = &p->p_p->ps_prof; 348 if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size) 349 return; /* out of range; ignore */ 350 351 p->p_prof_addr = pc; 352 p->p_prof_ticks += nticks; 353 atomic_setbits_int(&p->p_flag, P_OWEUPC); 354 need_proftick(p); 355 } 356 357 358 /* 359 * Much like before, but we can afford to take faults here. If the 360 * update fails, we simply turn off profiling. 361 */ 362 void 363 addupc_task(struct proc *p, u_long pc, u_int nticks) 364 { 365 struct process *pr = p->p_p; 366 struct uprof *prof; 367 caddr_t addr; 368 u_int i; 369 u_short v; 370 371 /* Testing PS_PROFIL may be unnecessary, but is certainly safe. */ 372 if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0) 373 return; 374 375 prof = &pr->ps_prof; 376 if (pc < prof->pr_off || 377 (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) 378 return; 379 380 addr = prof->pr_base + i; 381 if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) { 382 v += nticks; 383 if (copyout((caddr_t)&v, addr, sizeof(v)) == 0) 384 return; 385 } 386 stopprofclock(pr); 387 } 388