xref: /netbsd-src/sys/arch/shark/shark/profile.c (revision d47bcd296c8b39243dd81e9cc75ea86330d4eeaf)
1 /*	$NetBSD: profile.c,v 1.18 2019/11/10 21:16:32 chs Exp $	*/
2 
3 /*
4  * Copyright 1997
5  * Digital Equipment Corporation. All rights reserved.
6  *
7  * This software is furnished under license and may be used and
8  * copied only in accordance with the following terms and conditions.
9  * Subject to these conditions, you may download, copy, install,
10  * use, modify and distribute this software in source and/or binary
11  * form. No title or ownership is transferred hereby.
12  *
13  * 1) Any source code used, modified or distributed must reproduce
14  *    and retain this copyright notice and list of conditions as
15  *    they appear in the source file.
16  *
17  * 2) No right is granted to use any trade name, trademark, or logo of
18  *    Digital Equipment Corporation. Neither the "Digital Equipment
19  *    Corporation" name nor any trademark or logo of Digital Equipment
20  *    Corporation may be used to endorse or promote products derived
21  *    from this software without the prior written permission of
22  *    Digital Equipment Corporation.
23  *
24  * 3) This software is provided "AS-IS" and any express or implied
25  *    warranties, including but not limited to, any implied warranties
26  *    of merchantability, fitness for a particular purpose, or
27  *    non-infringement are disclaimed. In no event shall DIGITAL be
28  *    liable for any damages whatsoever, and in particular, DIGITAL
29  *    shall not be liable for special, indirect, consequential, or
30  *    incidental damages or damages for lost profits, loss of
31  *    revenue or loss of use, whether such damages arise in contract,
32  *    negligence, tort, under statute, in equity, at law or otherwise,
33  *    even if advised of the possibility of such damage.
34  */
35 
36 /*
37  * The fiq based profiler.
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: profile.c,v 1.18 2019/11/10 21:16:32 chs Exp $");
42 
43 #include "profiler.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/time.h>
49 #include <sys/proc.h>
50 #include <sys/ioctl.h>
51 #include <sys/conf.h>
52 #include <sys/errno.h>
53 #include <sys/fcntl.h>
54 #include <sys/uio.h>
55 #include <sys/malloc.h>
56 
57 #include <shark/shark/hat.h>
58 #include <machine/profileio.h>
59 #include <dev/ic/i8253reg.h>
60 
61 #define PROFILER_DEBUG 1
62 
63 #define countPerTick 500 /* TIMER_FREQ/10000   10 kHz timer */
64 
65 /* Processor Status Defines */
66 #define STATUS_MODE_MASK 0x1f
67 #define USER_MODE        0x10
68 #define FIQ_MODE         0x11
69 #define IRQ_MODE         0x12
70 #define SVC_MODE         0x13
71 #define ABORT_MODE       0x17
72 #define UNDEF_MODE       0x1b
73 #define SYS_MODE         0x1f
74 
75 /* software controller
76  */
77 struct profiler_sc
78 {
79     int state;
80 #define PROF_OPEN 0x01
81 #define PROF_PROFILING 0x02
82 } prof_sc;
83 
84 /*
85  * GLOBAL DATA
86  */
87 
88 /* I need my own stack space for the hat */
89 #define HATSTACKSIZE 1024       /* size of stack used during a FIQ */
90 static unsigned char hatStack[HATSTACKSIZE]; /* actual stack used
91 					      * during a FIQ
92 					      */
93 /* Pointer to the list of hash tables.
94  * A backup table is created for every table malloced, this
95  * is used so that we don't miss samples while copying the
96  * data out. Thus the actual number of tables in the array is twice
97  * what nhashTables says.
98  */
99 struct profHashTable *profTable;
100 struct profHashTable *phashTables[2];
101 int nhashTables;
102 
103 /*
104  * FORWARD DECLARATIONS
105  */
106 static void profFiq(int  x);
107 static void profHatWedge(int nFIQs);
108 void profStop(void);
109 void profStart(struct profStartInfo *);
110 static void profEnter(struct profHashTable * , unsigned int);
111 void displayTable(struct profHashTable * );
112 
113 dev_type_open(profopen);
114 dev_type_close(profclose);
115 dev_type_read(profread);
116 dev_type_ioctl(profioctl);
117 
118 const struct cdevsw prof_cdevsw = {
119 	.d_open = profopen,
120 	.d_close = profclose,
121 	.d_read = profread,
122 	.d_write = nowrite,
123 	.d_ioctl = profioctl,
124 	.d_stop = nostop,
125 	.d_tty = notty,
126 	.d_poll = nopoll,
127 	.d_mmap = nommap,
128 	.d_kqfilter = nokqfilter,
129 	.d_discard = nodiscard,
130 	.d_flag = 0
131 };
132 
133 void
profilerattach(int n)134 profilerattach(int n)
135 {
136     /* reset the profiler state */
137     prof_sc.state = 0;
138 }
139 
140 /*
141  * Open the profiling devicee.
142  * Returns
143  *       ENXIO for illegal minor device
144  *             ie. if the minor device number is not 0.
145  *       EBUSY if file is open by another process.
146  *       EROFS if attempt to open in write mode.
147  */
148 int
profopen(dev_t dev,int flag,int mode,struct proc * p)149 profopen(dev_t dev, int flag, int mode, struct proc *p)
150 {
151 
152     /* check that the minor number is correct. */
153     if (minor(dev) >= NPROFILER)
154     {
155 	return ENXIO;
156     }
157 
158     /* check that the device is not already open. */
159     if (prof_sc.state && PROF_OPEN)
160     {
161 	return EBUSY;
162     }
163 
164     /* check that the flag is set to read only. */
165     if (!(flag && FWRITE))
166     {
167 	return EROFS;
168     }
169     /* flag the device as open. */
170     prof_sc.state |= PROF_OPEN;
171     nhashTables = 0;
172     phashTables[0] = phashTables[1] = NULL;
173     return 0;
174 }
175 
176 /*
177  * Close the descriptor.
178  *
179  */
180 int
profclose(dev_t dev,int flag,int mode,struct proc * p)181 profclose(dev_t dev, int flag, int mode, struct proc *p)
182 {
183     /* clear the state, and stop profiling if
184      * it is happening.
185      */
186     profStop();
187     prof_sc.state &= ~PROF_OPEN;
188     return 0;
189 }
190 
191 int
profread(dev_t dev,struct uio * uio,int flags)192 profread(dev_t dev, struct uio *uio, int flags)
193 {
194     int error;
195     int real, backup;
196 
197     /* must be profiling to read */
198     if (!(prof_sc.state & PROF_PROFILING))
199     {
200 	error = EINVAL;
201     }
202     else
203     {
204 	if (uio->uio_resid != sizeof(struct profHashHeader) +
205 	    profTable->hdr.tableSize * sizeof(struct profHashEntry))
206 	{
207 	    printf("profile read size is incorrect!");
208 	    error = EINVAL;
209 	}
210 	else
211 	{
212 	    /* first work out which table is currently being used.
213 	     */
214 	    if (profTable == phashTables[0])
215 	    {
216 		real = 0;
217 		backup = 1;
218 	    }
219 	    else
220 	    {
221 		if (profTable == phashTables[1])
222 		{
223 		    real = 1;
224 		    backup = 0;
225 		}
226 		else
227 		{
228 		    panic("profiler lost buffer");
229 		}
230 	    }
231 	    /* now initialise the backup copy before switching over.
232 	     */
233 	    memset(phashTables[backup]->entries, 0,
234 		  profTable->hdr.tableSize * sizeof(struct profHashEntry));
235 
236 
237 	    /* now initialise the header */
238 	    phashTables[backup]->hdr.tableSize = phashTables[real]->hdr.tableSize;
239 	    phashTables[backup]->hdr.entries = phashTables[backup]->hdr.last
240 		= phashTables[real]->hdr.entries;
241 	    phashTables[backup]->hdr.samples = 0;
242 	    phashTables[backup]->hdr.missed = 0;
243 	    phashTables[backup]->hdr.fiqs = 0;
244 	    phashTables[backup]->hdr.pid = phashTables[real]->hdr.pid;
245 	    phashTables[backup]->hdr.mode = phashTables[real]->hdr.mode;
246 
247 	    /* ok now swap over.
248 	     * I don't worry about locking the fiq while I change
249 	     * this, at this point it won't matter which table the
250 	     * fiq reads.
251 	     */
252 	    profTable = phashTables[backup];
253 
254 	    /* don't want to send the pointer,
255 	     * make assumption that table follows the header.
256 	     */
257 	    if ( (error = uiomove(phashTables[real],
258 				  sizeof(struct profHashHeader), uio))
259 		!= 0)
260 	    {
261 		printf("uiomove failed error is %d\n", error);
262 	    }
263 	    else
264 	    {
265 		if ( (error = uiomove(phashTables[real]->entries,
266 				      phashTables[real]->hdr.tableSize *
267 				      sizeof(struct profHashEntry), uio))
268 		    != 0)
269 		{
270 		    printf("uiomove failed error is %d\n", error);
271 		}
272 	    }
273 	}
274     }
275     return error;
276 }
277 
278 /*
279  *  PROFIOSTART	  Start Profiling
280  *  PROFIOSTOP	  Stop Profiling
281  */
282 static int profcount = 0;
283 static int ints = 0;
284 int
profioctl(dev_t dev,u_long cmd,void * data,int flag,struct proc * p)285 profioctl(dev_t dev, u_long cmd, void *data, int flag, struct proc *p)
286 {
287     int error = 0;
288     struct profStartInfo *info = (struct profStartInfo *) data;
289 
290     switch (cmd)
291     {
292 	case PROFIOSTART :
293 	    profStart(info);
294 	    break;
295 	case PROFIOSTOP :
296 	    profStop();
297 	    break;
298 	default :
299 	    error = EINVAL;
300 	    break;
301     }
302     return error;
303 }
304 
305 /* start profiling, returning status information in the
306  * profStartInfo structure.
307  *
308  * presumes pid is running, does no checks here.
309  */
310 void
profStart(struct profStartInfo * info)311 profStart(struct profStartInfo *info)
312 {
313     unsigned int savedInts;
314     char *buffer;
315 
316     /* can't already be sampling */
317     if ( prof_sc.state & PROF_PROFILING )
318     {
319 	info->status = ALREADY_SAMPLING;
320 	return ;
321     }
322 
323     /* sanity check that the table sizes are logical */
324     if (info->entries > info->tableSize)
325     {
326 	info->status = BAD_TABLE_SIZE;
327 	return ;
328     }
329 
330     /* now sanity check that we are sampling either the
331      * kernel or a pid or both.
332      */
333     if ( !(info->mode & SAMPLE_MODE_MASK) )
334     {
335 	info->status = ILLEGAL_COMMAND;
336 	return ;
337     }
338 
339     /* alloc two hash tables. */
340     buffer  = malloc(sizeof(struct profHashTable) +
341 		     info->tableSize * sizeof(struct profHashEntry),
342 		     M_DEVBUF, M_WAITOK);
343     phashTables[0] = (struct profHashTable *) buffer;
344     phashTables[0]->entries = (struct profHashEntry *)
345 	( buffer + sizeof(struct profHashTable));
346 
347     buffer  = malloc(sizeof(struct profHashTable) +
348 		     info->tableSize * sizeof(struct profHashEntry),
349 		     M_DEVBUF, M_WAITOK);
350     phashTables[1] = (struct profHashTable *) buffer;
351     phashTables[1]->entries = (struct profHashEntry *)
352 	( buffer + sizeof(struct profHashTable));
353 
354     memset(phashTables[0]->entries, 0,
355 	  info->tableSize * sizeof(struct profHashEntry));
356     memset(phashTables[1]->entries, 0,
357 	  info->tableSize * sizeof(struct profHashEntry));
358 
359     /* now initialise the header */
360     profTable = phashTables[0];
361     profTable->hdr.tableSize = info->tableSize;
362     profTable->hdr.entries = profTable->hdr.last = info->entries;
363     profTable->hdr.samples = 0;
364     profTable->hdr.missed = 0;
365     profTable->hdr.fiqs = 0;
366     profTable->hdr.pid = info->pid;
367     profTable->hdr.mode = info->mode;
368 
369     /* now let the pigeons loose. */
370     savedInts = disable_interrupts(I32_bit | F32_bit);
371     prof_sc.state |= PROF_PROFILING;
372     hatClkOn(countPerTick,
373 	     profFiq,
374 	     (int)&prof_sc,
375 	     hatStack + HATSTACKSIZE - sizeof(unsigned),
376 	     profHatWedge);
377     restore_interrupts(savedInts);
378 }
379 
380 void
profStop(void)381 profStop(void)
382 {
383     unsigned int savedInts;
384     int spl;
385 
386     savedInts = disable_interrupts(I32_bit | F32_bit);
387     hatClkOff();
388     restore_interrupts(savedInts);
389 
390     spl = splbio();
391     /* only free the buffer's if we were profiling,
392      * who cares if we were not, won't alert any one.
393      */
394     if (prof_sc.state & PROF_PROFILING)
395     {
396 	/* now free both buffers. */
397 	free(phashTables[0], M_DEVBUF);
398 	free(phashTables[1], M_DEVBUF);
399     }
400     phashTables[0] = phashTables[1] = NULL;
401     prof_sc.state &= ~PROF_PROFILING;
402     splx(spl);
403 
404 }
405 
406 /*
407 **++
408 **  FUNCTIONAL DESCRIPTION:
409 **
410 **      profFiq
411 **
412 **      This is what the HAT clock calls.   This call drives
413 **      the timeout queues, which in turn drive the state machines
414 **
415 **      Be very carefully when calling a timeout as the function
416 **      that is called may in turn do timeout/untimeout calls
417 **      before returning
418 **
419 **  FORMAL PARAMETERS:
420 **
421 **      int x       - not used
422 **
423 **  IMPLICIT INPUTS:
424 **
425 **      nill
426 **
427 **  IMPLICIT OUTPUTS:
428 **
429 **      nill
430 **
431 **  FUNCTION VALUE:
432 **
433 **      nill
434 **
435 **  SIDE EFFECTS:
436 **
437 **      a timeout may be called if it is due
438 **--
439 */
440 static void
profFiq(int x)441 profFiq(int  x)
442 {
443     int i;
444     int *ip;           /* the fiq stack pointer */
445     unsigned int spsr, stacklr;   /* the link register, off the stack. */
446 
447 
448     /* get the link register and see where we came from.
449      * We do this by getting the stack pointer using,
450      * an inline assembler instruction and then going 9
451      * words up to get the return address from the fiq.
452      *
453      * NOTE: the stack will change if more local variables
454      * are added so beware of modifications to this
455      * function.
456      * the fiq.S handler puts the following on the stack
457      *          stmfd	sp!, {r0-r3, lr}
458      * then this function does
459      *          mov     ip, sp
460      *          stmfd	sp!, {r4, fp, ip, lr, pc}
461      * or some variant of this.
462      *
463      * instead of using sp we can use ip, the saved stack pointer
464      * and be done with the chance of sp changing around on us.
465      *
466      * so by the time we get here we have a stack that looks like.
467      * (see pg 4-23, ARM programming Techniques doco for description
468      * on stm instructions.)
469      *         lr-fiq  (we want this one).
470      *         r3-fiq
471      *         r2-fiq
472      *         r1-fiq
473      * ip-->   r0-fiq
474      *         pc-prof
475      *         lr-prof
476      *         ip-prof
477      *         fp-prof
478      * sp-->   r4-prof
479      * the sp by the time we get to it will point to r4 at the
480      * bottom of the stack. So we go 9 up to get the lr we want.
481      * or even better we have ip pointing to r0 and we can go 4 up
482      * to get the saved link register.
483      *
484      * We are safer this way because fiq.S is coded assembler, we are
485      * at the mercy of the assembler for our stack.
486      *
487      */
488     __asm("mov %0, ip" : "=r" (ip) : );
489     stacklr = *(ip+4);
490 
491     /* get the spsr register
492      */
493     __asm("mrs %0, spsr" : "=r" (spsr) : );
494 
495     /* now check whether we want this sample.
496      * NB. We place kernel and user level samples in the
497      * same table.
498      */
499     if ( (profTable->hdr.mode & SAMPLE_PROC) &&
500 	((spsr & STATUS_MODE_MASK) == USER_MODE) )
501     {
502 	if ( curlwp->p_pid == profTable->hdr.pid )
503 	{
504 	    profEnter(profTable, stacklr-4);
505 	}
506     }
507 
508     if ( profTable->hdr.mode & SAMPLE_KERN )
509     {
510 	if ( ((spsr & STATUS_MODE_MASK) == SVC_MODE)/* ||
511 	    ((spsr & STATUS_MODE_MASK) == IRQ_MODE)*/ )
512 	{
513 	    /* Note: the link register will be two instructions,
514 	     * ahead of the "blamed" instruction. This is actually
515 	     * a most likely case and might not actually highlight the
516 	     * exact cause of problems, some post processing intelligence
517 	     * will be required to make use of this data.
518 	     */
519 	    profEnter(profTable, stacklr-4);
520 	}
521     }
522     /* increment the samples counter */
523     profTable->hdr.fiqs++;
524 }
525 
526 /*
527 **++
528 **  FUNCTIONAL DESCRIPTION:
529 **
530 **      profHatWedge
531 **
532 **      Called if the HAT timer becomes clogged/wedged.  Not
533 **      used by this driver, we let upper layers recover
534 **      from this condition
535 **
536 **  FORMAL PARAMETERS:
537 **
538 **      int nFIQs - not used
539 **
540 **  IMPLICIT INPUTS:
541 **
542 **      nill
543 **
544 **  IMPLICIT OUTPUTS:
545 **
546 **      nill
547 **
548 **  FUNCTION VALUE:
549 **
550 **      nill
551 **
552 **  SIDE EFFECTS:
553 **
554 **      nill
555 **--
556 */
557 static void
profHatWedge(int nFIQs)558 profHatWedge(int nFIQs)
559 {
560     #ifdef PROFILER_DEBUG
561         printf("profHatWedge: nFIQ = %d\n",nFIQs);
562     #endif
563 }
564 
565 /* Enter the data in the table.
566  *
567  * To reduce the time taken to find samples with time
568  * an eviction algorithm is implemented.
569  * When a new entry in the overflow area is required
570  * the first entry in the hash table is copied there
571  * and the new entry placed as the hash table entry. The
572  * displaced entry will then be the first entry accessed in
573  * the table.
574  */
575 static void
profEnter(struct profHashTable * table,unsigned int lr)576 profEnter(struct profHashTable *table, unsigned int lr)
577 {
578     unsigned int entries, hashShift, index, count;
579     struct profHashEntry *sample;
580     struct profHashEntry *first;
581     struct profHashEntry *prev;
582     struct profHashEntry tmpEntry;
583     int tmpIndex;
584 
585     /* work out how many bits
586      * are required to hash the given size.
587      */
588     entries = table->hdr.entries - 1;
589     hashShift = 0;
590     do
591     {
592 	entries = entries << 1;
593 	hashShift++;
594     } while (!(entries & 0x80000000));
595 
596     /* enter the pc in the table. */
597     /* remove redundant bits.
598      * and save the count offset bits
599      */
600     lr = lr >> REDUNDANT_BITS;
601     count = lr & COUNT_BIT_MASK;
602     lr = lr >> COUNT_BITS;
603 
604     /* this is easier than working out how
605      * many bits to or, based on the hashShift.
606      * maybe it would be better to work out at
607      * the start and save time during the fiq.
608      */
609     index = (lr << hashShift) >> hashShift;
610 
611     first = sample = &table->entries[index];
612     /* now loop until we either find the entry
613      * or the next free space.
614      */
615     while ( (sample->pc != lr) && (table->hdr.last < table->hdr.tableSize) )
616     {
617 	if (sample->pc == 0)
618 	{
619 	    /* go ahead and stick it in */
620 	    sample->pc = lr;
621 	}
622 	else
623 	{
624 	    if (sample->next != 0)
625 	    {
626 		/* move along and continue */
627 		prev = sample;
628 		sample = &table->entries[sample->next];
629 	    }
630 	    else
631 	    {
632 		/* create a new entry if available */
633 		if (table->hdr.last < table->hdr.tableSize)
634 		{
635 		    sample = &table->entries[table->hdr.last];
636 		    /* copy the first sample into the new
637 		     * field.
638 		     */
639 		    memcpy(sample, first, sizeof(struct profHashEntry));
640 		    /* now update the new entry in the first position.
641 		     */
642 		    first->pc = lr;
643 		    first->next = table->hdr.last;
644 		    first->counts[0] = 0;
645 		    first->counts[1] = 0;
646 		    first->counts[2] = 0;
647 		    first->counts[3] = 0;
648 		    table->hdr.last++;
649 		    /* update the sample pointer so that we
650 		     * can insert the count.
651 		     */
652 		    sample = first;
653 		}
654 	    }
655 	}
656     }
657 
658     /* check if we need to do an eviction. */
659     if (sample != first)
660     {
661 	/* copy the sample out of the table. */
662 	memcpy(&tmpEntry, sample, sizeof(struct profHashEntry));
663 	/* remove the sample from the chain. */
664 	tmpIndex = prev->next;
665 	prev->next = sample->next;
666 	/* now insert it at the beginning. */
667 	memcpy(sample, first, sizeof(struct profHashEntry));
668 	memcpy(first, &tmpEntry, sizeof(struct profHashEntry));
669 	/* now make the new first entry point to the old
670 	 * first entry.
671 	 */
672 	first->next = tmpIndex;
673     }
674 
675     /* must now check the lr
676      * to see if the table is full.
677      */
678     if (sample->pc == lr)
679     {
680 	/* update the count */
681 	sample->counts[count]++;
682 	table->hdr.samples++;
683     }
684     else
685     {
686 	table->hdr.missed++;
687     }
688 }
689 
690 void
displayTable(struct profHashTable * table)691 displayTable(struct profHashTable *table)
692 {
693     int i;
694     struct profHashEntry *sample;
695     char buff[100] = ".............................................\n";
696 
697     for (i=0; i < table->hdr.tableSize; i++)
698     {
699 	sample = &table->entries[i];
700 	if ((i * table->hdr.tableSize) >= table->hdr.entries)
701 	{
702 	    printf("%s", buff);
703 	    buff[0] = '\0';
704 	}
705 	printf("i = %d, pc = 0x%x, next = %d, counts %d %d %d %d\n",
706 	       i, sample->pc, sample->next, sample->counts[0],
707 	       sample->counts[1], sample->counts[2], sample->counts[3]);
708     }
709     return;
710 }
711