xref: /openbsd-src/sys/kern/subr_prof.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: subr_prof.c,v 1.40 2023/10/17 00:04:02 cheloha Exp $	*/
2 /*	$NetBSD: subr_prof.c,v 1.12 1996/04/22 01:38:50 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)subr_prof.c	8.3 (Berkeley) 9/23/93
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/atomic.h>
38 #include <sys/clockintr.h>
39 #include <sys/pledge.h>
40 #include <sys/proc.h>
41 #include <sys/resourcevar.h>
42 #include <sys/mount.h>
43 #include <sys/sysctl.h>
44 #include <sys/syscallargs.h>
45 #include <sys/user.h>
46 
47 uint64_t profclock_period;
48 
49 #if defined(GPROF) || defined(DDBPROF)
50 #include <sys/malloc.h>
51 #include <sys/gmon.h>
52 
53 #include <uvm/uvm_extern.h>
54 
55 #include <machine/db_machdep.h>
56 #include <ddb/db_extern.h>
57 
58 /*
59  * Flag to prevent CPUs from executing the mcount() monitor function
60  * until we're sure they are in a sane state.
61  */
62 int gmoninit = 0;
63 u_int gmon_cpu_count;		/* [K] number of CPUs with profiling enabled */
64 
65 extern char etext[];
66 
67 void gmonclock(struct clockrequest *, void *, void *);
68 
69 void
70 prof_init(void)
71 {
72 	CPU_INFO_ITERATOR cii;
73 	struct cpu_info *ci;
74 	struct gmonparam *p;
75 	u_long lowpc, highpc, textsize;
76 	u_long kcountsize, fromssize, tossize;
77 	long tolimit;
78 	char *cp;
79 	int size;
80 
81 	/*
82 	 * Round lowpc and highpc to multiples of the density we're using
83 	 * so the rest of the scaling (here and in gprof) stays in ints.
84 	 */
85 	lowpc = ROUNDDOWN(KERNBASE, HISTFRACTION * sizeof(HISTCOUNTER));
86 	highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
87 	textsize = highpc - lowpc;
88 #ifdef GPROF
89 	printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
90 	    textsize, lowpc, highpc);
91 #endif
92 	kcountsize = textsize / HISTFRACTION;
93 	fromssize = textsize / HASHFRACTION;
94 	tolimit = textsize * ARCDENSITY / 100;
95 	if (tolimit < MINARCS)
96 		tolimit = MINARCS;
97 	else if (tolimit > MAXARCS)
98 		tolimit = MAXARCS;
99 	tossize = tolimit * sizeof(struct tostruct);
100 	size = sizeof(*p) + kcountsize + fromssize + tossize;
101 
102 	/* Allocate and initialize one profiling buffer per CPU. */
103 	CPU_INFO_FOREACH(cii, ci) {
104 		ci->ci_gmonclock = clockintr_establish(ci, gmonclock, NULL);
105 		if (ci->ci_gmonclock == NULL) {
106 			printf("%s: clockintr_establish gmonclock\n", __func__);
107 			return;
108 		}
109 		clockintr_stagger(ci->ci_gmonclock, profclock_period,
110 		    CPU_INFO_UNIT(ci), MAXCPUS);
111 		cp = km_alloc(round_page(size), &kv_any, &kp_zero, &kd_nowait);
112 		if (cp == NULL) {
113 			printf("No memory for profiling.\n");
114 			return;
115 		}
116 
117 		p = (struct gmonparam *)cp;
118 		cp += sizeof(*p);
119 		p->tos = (struct tostruct *)cp;
120 		cp += tossize;
121 		p->kcount = (u_short *)cp;
122 		cp += kcountsize;
123 		p->froms = (u_short *)cp;
124 
125 		p->state = GMON_PROF_OFF;
126 		p->lowpc = lowpc;
127 		p->highpc = highpc;
128 		p->textsize = textsize;
129 		p->hashfraction = HASHFRACTION;
130 		p->kcountsize = kcountsize;
131 		p->fromssize = fromssize;
132 		p->tolimit = tolimit;
133 		p->tossize = tossize;
134 
135 		ci->ci_gmon = p;
136 	}
137 }
138 
139 int
140 prof_state_toggle(struct cpu_info *ci, int oldstate)
141 {
142 	struct gmonparam *gp = ci->ci_gmon;
143 	int error = 0;
144 
145 	KERNEL_ASSERT_LOCKED();
146 
147 	if (gp->state == oldstate)
148 		return (0);
149 
150 	switch (gp->state) {
151 	case GMON_PROF_ON:
152 #if !defined(GPROF)
153 		/*
154 		 * If this is not a profiling kernel, we need to patch
155 		 * all symbols that can be instrumented.
156 		 */
157 		error = db_prof_enable();
158 #endif
159 		if (error == 0) {
160 			if (++gmon_cpu_count == 1)
161 				startprofclock(&process0);
162 			clockintr_advance(ci->ci_gmonclock, profclock_period);
163 		}
164 		break;
165 	default:
166 		error = EINVAL;
167 		gp->state = GMON_PROF_OFF;
168 		/* FALLTHROUGH */
169 	case GMON_PROF_OFF:
170 		clockintr_cancel(ci->ci_gmonclock);
171 		if (--gmon_cpu_count == 0)
172 			stopprofclock(&process0);
173 #if !defined(GPROF)
174 		db_prof_disable();
175 #endif
176 		break;
177 	}
178 
179 	return (error);
180 }
181 
182 /*
183  * Return kernel profiling information.
184  */
185 int
186 sysctl_doprof(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
187     size_t newlen)
188 {
189 	CPU_INFO_ITERATOR cii;
190 	struct cpu_info *ci;
191 	struct gmonparam *gp = NULL;
192 	int error, cpuid, op, state;
193 
194 	/* all sysctl names at this level are name and field */
195 	if (namelen != 2)
196 		return (ENOTDIR);		/* overloaded */
197 
198 	op = name[0];
199 	cpuid = name[1];
200 
201 	CPU_INFO_FOREACH(cii, ci) {
202 		if (cpuid == CPU_INFO_UNIT(ci)) {
203 			gp = ci->ci_gmon;
204 			break;
205 		}
206 	}
207 
208 	if (gp == NULL)
209 		return (EOPNOTSUPP);
210 
211 	/* Assume that if we're here it is safe to execute profiling. */
212 	gmoninit = 1;
213 
214 	switch (op) {
215 	case GPROF_STATE:
216 		state = gp->state;
217 		error = sysctl_int(oldp, oldlenp, newp, newlen, &gp->state);
218 		if (error)
219 			return (error);
220 		return prof_state_toggle(ci, state);
221 	case GPROF_COUNT:
222 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
223 		    gp->kcount, gp->kcountsize));
224 	case GPROF_FROMS:
225 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
226 		    gp->froms, gp->fromssize));
227 	case GPROF_TOS:
228 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
229 		    gp->tos, gp->tossize));
230 	case GPROF_GMONPARAM:
231 		return (sysctl_rdstruct(oldp, oldlenp, newp, gp, sizeof *gp));
232 	default:
233 		return (EOPNOTSUPP);
234 	}
235 	/* NOTREACHED */
236 }
237 
238 void
239 gmonclock(struct clockrequest *cr, void *cf, void *arg)
240 {
241 	uint64_t count;
242 	struct clockframe *frame = cf;
243 	struct gmonparam *g = curcpu()->ci_gmon;
244 	u_long i;
245 
246 	count = clockrequest_advance(cr, profclock_period);
247 	if (count > ULONG_MAX)
248 		count = ULONG_MAX;
249 
250 	/*
251 	 * Kernel statistics are just like addupc_intr(), only easier.
252 	 */
253 	if (!CLKF_USERMODE(frame) && g != NULL && g->state == GMON_PROF_ON) {
254 		i = CLKF_PC(frame) - g->lowpc;
255 		if (i < g->textsize) {
256 			i /= HISTFRACTION * sizeof(*g->kcount);
257 			g->kcount[i] += (u_long)count;
258 		}
259 	}
260 }
261 
262 #endif /* GPROF || DDBPROF */
263 
264 /*
265  * Profiling system call.
266  *
267  * The scale factor is a fixed point number with 16 bits of fraction, so that
268  * 1.0 is represented as 0x10000.  A scale factor of 0 turns off profiling.
269  */
270 int
271 sys_profil(struct proc *p, void *v, register_t *retval)
272 {
273 	struct sys_profil_args /* {
274 		syscallarg(caddr_t) samples;
275 		syscallarg(size_t) size;
276 		syscallarg(u_long) offset;
277 		syscallarg(u_int) scale;
278 	} */ *uap = v;
279 	struct process *pr = p->p_p;
280 	struct uprof *upp;
281 	int error, s;
282 
283 	error = pledge_profil(p, SCARG(uap, scale));
284 	if (error)
285 		return error;
286 
287 	if (SCARG(uap, scale) > (1 << 16))
288 		return (EINVAL);
289 	if (SCARG(uap, scale) == 0) {
290 		stopprofclock(pr);
291 		need_resched(curcpu());
292 		return (0);
293 	}
294 	upp = &pr->ps_prof;
295 
296 	/* Block profile interrupts while changing state. */
297 	s = splstatclock();
298 	upp->pr_off = SCARG(uap, offset);
299 	upp->pr_scale = SCARG(uap, scale);
300 	upp->pr_base = (caddr_t)SCARG(uap, samples);
301 	upp->pr_size = SCARG(uap, size);
302 	startprofclock(pr);
303 	splx(s);
304 	need_resched(curcpu());
305 
306 	return (0);
307 }
308 
309 void
310 profclock(struct clockrequest *cr, void *cf, void *arg)
311 {
312 	uint64_t count;
313 	struct clockframe *frame = cf;
314 	struct proc *p = curproc;
315 
316 	count = clockrequest_advance(cr, profclock_period);
317 	if (count > ULONG_MAX)
318 		count = ULONG_MAX;
319 
320 	if (CLKF_USERMODE(frame)) {
321 		if (ISSET(p->p_p->ps_flags, PS_PROFIL))
322 			addupc_intr(p, CLKF_PC(frame), (u_long)count);
323 	} else {
324 		if (p != NULL && ISSET(p->p_p->ps_flags, PS_PROFIL))
325 			addupc_intr(p, PROC_PC(p), (u_long)count);
326 	}
327 }
328 
329 /*
330  * Scale is a fixed-point number with the binary point 16 bits
331  * into the value, and is <= 1.0.  pc is at most 32 bits, so the
332  * intermediate result is at most 48 bits.
333  */
334 #define	PC_TO_INDEX(pc, prof) \
335 	((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
336 	    (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
337 
338 /*
339  * Collect user-level profiling statistics; called on a profiling tick,
340  * when a process is running in user-mode.  This routine may be called
341  * from an interrupt context. Schedule an AST that will vector us to
342  * trap() with a context in which copyin and copyout will work.
343  * Trap will then call addupc_task().
344  */
345 void
346 addupc_intr(struct proc *p, u_long pc, u_long nticks)
347 {
348 	struct uprof *prof;
349 
350 	prof = &p->p_p->ps_prof;
351 	if (pc < prof->pr_off || PC_TO_INDEX(pc, prof) >= prof->pr_size)
352 		return;			/* out of range; ignore */
353 
354 	p->p_prof_addr = pc;
355 	p->p_prof_ticks += nticks;
356 	atomic_setbits_int(&p->p_flag, P_OWEUPC);
357 	need_proftick(p);
358 }
359 
360 
361 /*
362  * Much like before, but we can afford to take faults here.  If the
363  * update fails, we simply turn off profiling.
364  */
365 void
366 addupc_task(struct proc *p, u_long pc, u_int nticks)
367 {
368 	struct process *pr = p->p_p;
369 	struct uprof *prof;
370 	caddr_t addr;
371 	u_int i;
372 	u_short v;
373 
374 	/* Testing PS_PROFIL may be unnecessary, but is certainly safe. */
375 	if ((pr->ps_flags & PS_PROFIL) == 0 || nticks == 0)
376 		return;
377 
378 	prof = &pr->ps_prof;
379 	if (pc < prof->pr_off ||
380 	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
381 		return;
382 
383 	addr = prof->pr_base + i;
384 	if (copyin(addr, (caddr_t)&v, sizeof(v)) == 0) {
385 		v += nticks;
386 		if (copyout((caddr_t)&v, addr, sizeof(v)) == 0)
387 			return;
388 	}
389 	stopprofclock(pr);
390 }
391