xref: /onnv-gate/usr/src/uts/sun4/io/trapstat.c (revision 4204)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51772Sjl139090  * Common Development and Distribution License (the "License").
61772Sjl139090  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223434Sesaxe  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/systm.h>
290Sstevel@tonic-gate #include <sys/conf.h>
300Sstevel@tonic-gate #include <sys/stat.h>
310Sstevel@tonic-gate #include <sys/ddi.h>
320Sstevel@tonic-gate #include <sys/sunddi.h>
330Sstevel@tonic-gate #include <sys/modctl.h>
340Sstevel@tonic-gate #include <sys/cpu_module.h>
350Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
360Sstevel@tonic-gate #include <vm/seg_kmem.h>
370Sstevel@tonic-gate #include <vm/seg_kpm.h>
380Sstevel@tonic-gate #include <vm/vm_dep.h>
390Sstevel@tonic-gate #include <sys/machsystm.h>
400Sstevel@tonic-gate #include <sys/machasi.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate #include <sys/callb.h>
430Sstevel@tonic-gate #include <sys/archsystm.h>
440Sstevel@tonic-gate #include <sys/trapstat.h>
450Sstevel@tonic-gate #ifdef sun4v
460Sstevel@tonic-gate #include <sys/hypervisor_api.h>
470Sstevel@tonic-gate #endif
481772Sjl139090 #ifndef sun4v
493434Sesaxe #include <sys/pghw.h>
501772Sjl139090 #endif
510Sstevel@tonic-gate 
520Sstevel@tonic-gate /* BEGIN CSTYLED */
530Sstevel@tonic-gate /*
540Sstevel@tonic-gate  * trapstat:  Trap Statistics through Dynamic Trap Table Interposition
550Sstevel@tonic-gate  * -------------------------------------------------------------------
560Sstevel@tonic-gate  *
570Sstevel@tonic-gate  * Motivation and Overview
580Sstevel@tonic-gate  *
590Sstevel@tonic-gate  * Despite being a fundamental indicator of system behavior, there has
600Sstevel@tonic-gate  * historically been very little insight provided into the frequency and cost
610Sstevel@tonic-gate  * of machine-specific traps.  The lack of insight has been especially acute
620Sstevel@tonic-gate  * on UltraSPARC microprocessors:  because these microprocessors handle TLB
630Sstevel@tonic-gate  * misses as software traps, the frequency and duration of traps play a
640Sstevel@tonic-gate  * decisive role in the performance of the memory system.  As applications have
650Sstevel@tonic-gate  * increasingly outstripped TLB reach, this has become increasingly true.
660Sstevel@tonic-gate  *
670Sstevel@tonic-gate  * Part of the difficulty of observing trap behavior is that the trap handlers
680Sstevel@tonic-gate  * are so frequently called (e.g. millions of times per second) that any
690Sstevel@tonic-gate  * permanently enabled instrumentation would induce an unacceptable performance
700Sstevel@tonic-gate  * degradation.  Thus, it is a constraint on any trap observability
710Sstevel@tonic-gate  * infrastructure that it have no probe effect when not explicitly enabled.
720Sstevel@tonic-gate  *
730Sstevel@tonic-gate  * The basic idea, then, is to create an interposing trap table in which each
740Sstevel@tonic-gate  * entry increments a per-trap, in-memory counter and then jumps to the actual,
750Sstevel@tonic-gate  * underlying trap table entry.  To enable trapstat, we atomically write to the
760Sstevel@tonic-gate  * trap base address (%tba) register to point to our interposing trap table.
770Sstevel@tonic-gate  * (Note that per-CPU statistics fall out by creating a different trap table
780Sstevel@tonic-gate  * for each CPU.)
790Sstevel@tonic-gate  *
800Sstevel@tonic-gate  * Implementation Details
810Sstevel@tonic-gate  *
820Sstevel@tonic-gate  * While the idea is straight-forward, a nuance of SPARC V9 slightly
830Sstevel@tonic-gate  * complicates the implementation.  Unlike its predecessors, SPARC V9 supports
840Sstevel@tonic-gate  * the notion of nested traps.  The trap level is kept in the TL register:
850Sstevel@tonic-gate  * during normal operation it is 0; when a trap is taken, the TL register is
860Sstevel@tonic-gate  * incremented by 1.  To aid system software, SPARC V9 breaks the trap table
870Sstevel@tonic-gate  * into two halves:  the lower half contains the trap handlers for traps taken
880Sstevel@tonic-gate  * when TL is 0; the upper half contains the trap handlers for traps taken
890Sstevel@tonic-gate  * when TL is greater than 0.  Each half is further subdivided into two
900Sstevel@tonic-gate  * subsequent halves:  the lower half contains the trap handlers for traps
910Sstevel@tonic-gate  * other than those induced by the trap instruction (Tcc variants); the upper
920Sstevel@tonic-gate  * half contains the trap handlers for traps induced by the trap instruction.
930Sstevel@tonic-gate  * This gives a total of four ranges, with each range containing 256 traps:
940Sstevel@tonic-gate  *
950Sstevel@tonic-gate  *       +--------------------------------+- 3ff
960Sstevel@tonic-gate  *       |                                |   .
970Sstevel@tonic-gate  *       |     Trap instruction, TL>0     |   .
980Sstevel@tonic-gate  *       |                                |   .
990Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 300
1000Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 2ff
1010Sstevel@tonic-gate  *       |                                |   .
1020Sstevel@tonic-gate  *       |   Non-trap instruction, TL>0   |   .
1030Sstevel@tonic-gate  *       |                                |   .
1040Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 200
1050Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 1ff
1060Sstevel@tonic-gate  *       |                                |   .
1070Sstevel@tonic-gate  *       |     Trap instruction, TL=0     |   .
1080Sstevel@tonic-gate  *       |                                |   .
1090Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 100
1100Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 0ff
1110Sstevel@tonic-gate  *       |                                |   .
1120Sstevel@tonic-gate  *       |   Non-trap instruction, TL=0   |   .
1130Sstevel@tonic-gate  *       |                                |   .
1140Sstevel@tonic-gate  *       +--------------------------------+- 000
1150Sstevel@tonic-gate  *
1160Sstevel@tonic-gate  *
1170Sstevel@tonic-gate  * Solaris, however, doesn't have reason to support trap instructions when
1180Sstevel@tonic-gate  * TL>0 (only privileged code may execute at TL>0; not supporting this only
1190Sstevel@tonic-gate  * constrains our own implementation).  The trap table actually looks like:
1200Sstevel@tonic-gate  *
1210Sstevel@tonic-gate  *       +--------------------------------+- 2ff
1220Sstevel@tonic-gate  *       |                                |   .
1230Sstevel@tonic-gate  *       |   Non-trap instruction, TL>0   |   .
1240Sstevel@tonic-gate  *       |                                |   .
1250Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 200
1260Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 1ff
1270Sstevel@tonic-gate  *       |                                |   .
1280Sstevel@tonic-gate  *       |     Trap instruction, TL=0     |   .
1290Sstevel@tonic-gate  *       |                                |   .
1300Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 100
1310Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 0ff
1320Sstevel@tonic-gate  *       |                                |   .
1330Sstevel@tonic-gate  *       |   Non-trap instruction, TL=0   |   .
1340Sstevel@tonic-gate  *       |                                |   .
1350Sstevel@tonic-gate  *       +--------------------------------+- 000
1360Sstevel@tonic-gate  *
1370Sstevel@tonic-gate  * Putatively to aid system software, SPARC V9 has the notion of multiple
1380Sstevel@tonic-gate  * sets of global registers.  UltraSPARC defines four sets of global
1390Sstevel@tonic-gate  * registers:
1400Sstevel@tonic-gate  *
1410Sstevel@tonic-gate  *    Normal Globals
1420Sstevel@tonic-gate  *    Alternate Globals (AGs)
1430Sstevel@tonic-gate  *    MMU Globals (MGs)
1440Sstevel@tonic-gate  *    Interrupt Globals (IGs)
1450Sstevel@tonic-gate  *
1460Sstevel@tonic-gate  * The set of globals in use is controlled by bits in PSTATE; when TL is 0
1470Sstevel@tonic-gate  * (and PSTATE has not been otherwise explicitly modified), the Normal Globals
1480Sstevel@tonic-gate  * are in use.  When a trap is issued, PSTATE is modified to point to a set of
1490Sstevel@tonic-gate  * globals corresponding to the trap type.  Most traps correspond to the
1500Sstevel@tonic-gate  * Alternate Globals, with a minority corresponding to the MMU Globals, and
1510Sstevel@tonic-gate  * only the interrupt-vector trap (vector 0x60) corresponding to the Interrupt
1520Sstevel@tonic-gate  * Globals.  (The complete mapping can be found in the UltraSPARC I&II User's
1530Sstevel@tonic-gate  * Manual.)
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  * Note that the sets of globals are per trap _type_, not per trap _level_.
1560Sstevel@tonic-gate  * Thus, when executing a TL>0 trap handler, one may not have registers
1570Sstevel@tonic-gate  * available (for example, both trap-instruction traps and spill traps execute
1580Sstevel@tonic-gate  * on the alternate globals; if a trap-instruction trap induces a window spill,
1590Sstevel@tonic-gate  * the window spill handler has no available globals).  For trapstat, this is
1600Sstevel@tonic-gate  * problematic:  a register is required to transfer control from one arbitrary
1610Sstevel@tonic-gate  * location (in the interposing trap table) to another (in the actual trap
1620Sstevel@tonic-gate  * table).
1630Sstevel@tonic-gate  *
1640Sstevel@tonic-gate  * We solve this problem by exploiting the trap table's location at the bottom
1650Sstevel@tonic-gate  * of valid kernel memory (i.e. at KERNELBASE).  We locate the interposing trap
1660Sstevel@tonic-gate  * tables just below KERNELBASE -- thereby allowing us to use a branch-always
1670Sstevel@tonic-gate  * instruction (ba) instead of a jump instruction (jmp) to transfer control
1680Sstevel@tonic-gate  * from the TL>0 entries in the interposing trap table to the TL>0 entries in
1690Sstevel@tonic-gate  * the actual trap table.  (N.B. while this allows trap table interposition to
1700Sstevel@tonic-gate  * work, it necessarily limits trapstat to only recording information about
1710Sstevel@tonic-gate  * TL=0 traps -- there is no way to increment a counter without using a
1720Sstevel@tonic-gate  * register.)  Diagrammatically:
1730Sstevel@tonic-gate  *
1740Sstevel@tonic-gate  *  Actual trap table:
1750Sstevel@tonic-gate  *
1760Sstevel@tonic-gate  *       +--------------------------------+- 2ff
1770Sstevel@tonic-gate  *       |                                |   .
1780Sstevel@tonic-gate  *       |   Non-trap instruction, TL>0   |   .   <-----------------------+
1790Sstevel@tonic-gate  *       |                                |   .   <-----------------------|-+
1800Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 200  <-----------------------|-|-+
1810Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 1ff                          | | |
1820Sstevel@tonic-gate  *       |                                |   .                           | | |
1830Sstevel@tonic-gate  *       |     Trap instruction, TL=0     |   .   <-----------------+     | | |
1840Sstevel@tonic-gate  *       |                                |   .   <-----------------|-+   | | |
1850Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 100  <-----------------|-|-+ | | |
1860Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 0ff                    | | | | | |
1870Sstevel@tonic-gate  *       |                                |   .                     | | | | | |
1880Sstevel@tonic-gate  *       |   Non-trap instruction, TL=0   |   .   <-----------+     | | | | | |
1890Sstevel@tonic-gate  *       |                                |   .   <-----------|-+   | | | | | |
1900Sstevel@tonic-gate  *       +--------------------------------+- 000  <-----------|-|-+ | | | | | |
1910Sstevel@tonic-gate  *        KERNELBASE                                          | | | | | | | | |
1920Sstevel@tonic-gate  *                                                            | | | | | | | | |
1930Sstevel@tonic-gate  *                                                            | | | | | | | | |
1940Sstevel@tonic-gate  *  Interposing trap table:                                   | | | | | | | | |
1950Sstevel@tonic-gate  *                                                            | | | | | | | | |
1960Sstevel@tonic-gate  *       +--------------------------------+- 2ff              | | | | | | | | |
1970Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | | | | |
1980Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | | | | |
1990Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | | | | |
2000Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 203              | | | | | | | | |
2010Sstevel@tonic-gate  *       |  ba,a                          |      -------------|-|-|-|-|-|-+ | |
2020Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 202              | | | | | |   | |
2030Sstevel@tonic-gate  *       |  ba,a                          |      -------------|-|-|-|-|-|---+ |
2040Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 201              | | | | | |     |
2050Sstevel@tonic-gate  *       |  ba,a                          |      -------------|-|-|-|-|-|-----+
2060Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 200              | | | | | |
2070Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | |
2080Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | |
2090Sstevel@tonic-gate  *       |  ...                           |   .               | | | | | |
2100Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 103              | | | | | |
2110Sstevel@tonic-gate  *       |  (Increment counter)           |                   | | | | | |
2120Sstevel@tonic-gate  *       |  ba,a                          |      -------------------+ | |
2130Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 102              | | |   | |
2140Sstevel@tonic-gate  *       |  (Increment counter)           |                   | | |   | |
2150Sstevel@tonic-gate  *       |  ba,a                          |      ---------------------+ |
2160Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 101              | | |     |
2170Sstevel@tonic-gate  *       |  (Increment counter)           |                   | | |     |
2180Sstevel@tonic-gate  *       |  ba,a                          |      -----------------------+
2190Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 100              | | |
2200Sstevel@tonic-gate  *       |  ...                           |   .               | | |
2210Sstevel@tonic-gate  *       |  ...                           |   .               | | |
2220Sstevel@tonic-gate  *       |  ...                           |   .               | | |
2230Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 003              | | |
2240Sstevel@tonic-gate  *       |  (Increment counter)           |                   | | |
2250Sstevel@tonic-gate  *       |  ba,a                          |      -------------+ | |
2260Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 002                | |
2270Sstevel@tonic-gate  *       |  (Increment counter)           |                     | |
2280Sstevel@tonic-gate  *       |  ba,a                          |      ---------------+ |
2290Sstevel@tonic-gate  *       |- - - - - - - - - - - - - - - - +- 001                  |
2300Sstevel@tonic-gate  *       |  (Increment counter)           |                       |
2310Sstevel@tonic-gate  *       |  ba,a                          |      -----------------+
2320Sstevel@tonic-gate  *       +--------------------------------+- 000
2330Sstevel@tonic-gate  *        KERNELBASE - tstat_total_size
2340Sstevel@tonic-gate  *
2350Sstevel@tonic-gate  * tstat_total_size is the number of pages required for each trap table.  It
2360Sstevel@tonic-gate  * must be true that KERNELBASE - tstat_total_size is less than the maximum
2370Sstevel@tonic-gate  * branch displacement; if each CPU were to consume a disjoint virtual range
2380Sstevel@tonic-gate  * below KERNELBASE for its trap table, we could support at most
2390Sstevel@tonic-gate  * (maximum_branch_displacement / tstat_total_size) CPUs.  The maximum branch
2400Sstevel@tonic-gate  * displacement for Bicc variants is just under eight megabytes, and (because
2410Sstevel@tonic-gate  * the %tba must be 32K aligned), tstat_total_size must be at least 32K; if
2420Sstevel@tonic-gate  * each CPU were to consume a disjoint virtual range, we would have an
2430Sstevel@tonic-gate  * unacceptably low upper bound of 256 CPUs.
2440Sstevel@tonic-gate  *
2450Sstevel@tonic-gate  * While there are tricks that one could use to address this constraint (e.g.,
2460Sstevel@tonic-gate  * creating trampolines every maximum_branch_displacement bytes), we instead
2470Sstevel@tonic-gate  * solve this by not permitting each CPU to consume a disjoint virtual range.
2480Sstevel@tonic-gate  * Rather, we have each CPU's interposing trap table use the _same_ virtual
2490Sstevel@tonic-gate  * range, but we back the trap tables with disjoint physical memory.  Normally,
2500Sstevel@tonic-gate  * such one-to-many virtual-to-physical mappings are illegal; this is
2510Sstevel@tonic-gate  * permissible here only because the pages for the interposing trap table are
2520Sstevel@tonic-gate  * necessarily locked in the TLB.  (The CPUs thus never have the opportunity to
2530Sstevel@tonic-gate  * discover that they have conflicting translations.)
2540Sstevel@tonic-gate  *
2550Sstevel@tonic-gate  * On CMT architectures in which CPUs can share MMUs, the above trick will not
2560Sstevel@tonic-gate  * work: two CPUs that share an MMU cannot have the same virtual address map
2570Sstevel@tonic-gate  * to disjoint physical pages.  On these architectures, any CPUs sharing the
2580Sstevel@tonic-gate  * same MMU must consume a disjoint 32K virtual address range -- limiting the
2590Sstevel@tonic-gate  * number of CPUs sharing an MMU on these architectures to 256 due to the
2600Sstevel@tonic-gate  * branch displacement limitation described above.  On the sun4v architecture,
2610Sstevel@tonic-gate  * there is a further limitation: a guest may not have more than eight locked
2620Sstevel@tonic-gate  * TLB entries per MMU.  To allow operation under this restriction, the
2630Sstevel@tonic-gate  * interposing trap table and the trap statistics are each accessed through
2640Sstevel@tonic-gate  * a single 4M TLB entry.  This limits the footprint to two locked entries
2650Sstevel@tonic-gate  * (one for the I-TLB and one for the D-TLB), but further restricts the number
2660Sstevel@tonic-gate  * of CPUs to 128 per MMU.  However, support for more than 128 CPUs can easily
2670Sstevel@tonic-gate  * be added via a hybrid scheme, where the same 4M virtual address is used
2680Sstevel@tonic-gate  * on different MMUs.
2690Sstevel@tonic-gate  *
2700Sstevel@tonic-gate  *
2710Sstevel@tonic-gate  * TLB Statistics
2720Sstevel@tonic-gate  *
2730Sstevel@tonic-gate  * Because TLB misses are an important component of system performance, we wish
2740Sstevel@tonic-gate  * to know much more about these traps than simply the number received.
2750Sstevel@tonic-gate  * Specifically, we wish to know:
2760Sstevel@tonic-gate  *
2770Sstevel@tonic-gate  *  (a)	The amount of time spent executing the TLB miss handler
2780Sstevel@tonic-gate  *  (b)	TLB misses versus TSB misses
2790Sstevel@tonic-gate  *  (c) Kernel-level misses versus user-level misses
2800Sstevel@tonic-gate  *  (d) Misses per pagesize
2810Sstevel@tonic-gate  *
2820Sstevel@tonic-gate  * TLB Statistics: Time Spent Executing
2830Sstevel@tonic-gate  *
2840Sstevel@tonic-gate  * To accurately determine the amount of time spent executing the TLB miss
2850Sstevel@tonic-gate  * handler, one must get a timestamp on trap entry and trap exit, subtract the
2860Sstevel@tonic-gate  * latter from the former, and add the result to an accumulating count.
2870Sstevel@tonic-gate  * Consider flow of control during normal TLB miss processing (where "ldx
2880Sstevel@tonic-gate  * [%g2], %g2" is an arbitrary TLB-missing instruction):
2890Sstevel@tonic-gate  *
2900Sstevel@tonic-gate  * + - - - - - - - -+
2910Sstevel@tonic-gate  * :                :
2920Sstevel@tonic-gate  * : ldx [%g2], %g2 :<-------------------------------------------------------+
2930Sstevel@tonic-gate  * :                :              Return from trap:                         |
2940Sstevel@tonic-gate  * + - - - - - - - -+                TL <- TL - 1 (0)                        |
2950Sstevel@tonic-gate  *	  |                          %pc <- TSTATE[TL].TPC (address of load) |
2960Sstevel@tonic-gate  *	  | TLB miss:                                                        |
2970Sstevel@tonic-gate  *        |   TL <- TL + 1 (1)                                               |
2980Sstevel@tonic-gate  *        |   %pc <- TLB-miss-trap-handler                                   |
2990Sstevel@tonic-gate  *        |                                                                  |
3000Sstevel@tonic-gate  *        v                                                                  |
3010Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - +                                         |
3020Sstevel@tonic-gate  * :                               :                                         |
3030Sstevel@tonic-gate  * : Lookup VA in TSB              :                                         |
3040Sstevel@tonic-gate  * : If (hit)                      :                                         |
3050Sstevel@tonic-gate  * :     Fill TLB                  :                                         |
3060Sstevel@tonic-gate  * : Else                          :                                         |
3070Sstevel@tonic-gate  * :     Lookup VA (hme hash table :                                         |
3080Sstevel@tonic-gate  * :                or segkpm)     :                                         |
3090Sstevel@tonic-gate  * :     Fill TLB                  :                                         |
3100Sstevel@tonic-gate  * : Endif                         :                                         |
3110Sstevel@tonic-gate  * : Issue "retry"  ---------------------------------------------------------+
3120Sstevel@tonic-gate  * :                               :
3130Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - +
3140Sstevel@tonic-gate  *  TLB-miss-trap-handler
3150Sstevel@tonic-gate  *
3160Sstevel@tonic-gate  *
3170Sstevel@tonic-gate  * As the above diagram indicates, interposing on the trap table allows one
3180Sstevel@tonic-gate  * only to determine a timestamp on trap _entry_:  when the TLB miss handler
3190Sstevel@tonic-gate  * has completed filling the TLB, a "retry" will be issued, and control will
3200Sstevel@tonic-gate  * transfer immediately back to the missing %pc.
3210Sstevel@tonic-gate  *
3220Sstevel@tonic-gate  * To obtain a timestamp on trap exit, we must then somehow interpose between
3230Sstevel@tonic-gate  * the "retry" and the subsequent control transfer to the TLB-missing
3240Sstevel@tonic-gate  * instruction.  To do this, we _push_ a trap level.  The basic idea is to
3250Sstevel@tonic-gate  * spoof a TLB miss by raising TL, setting the %tpc to be within text
3260Sstevel@tonic-gate  * controlled by trapstat (the "TLB return entry") and branching to the
3270Sstevel@tonic-gate  * underlying TLB miss handler.  When the TLB miss handler issues its "retry",
3280Sstevel@tonic-gate  * control will transfer not to the TLB-missing instruction, but rather to the
3290Sstevel@tonic-gate  * TLB return entry.  This code can then obtain a timestamp, and issue its own
3300Sstevel@tonic-gate  * "retry" -- thereby correctly returning to the TLB-missing instruction.
3310Sstevel@tonic-gate  * Here is the above TLB miss flow control diagram modified to reflect
3320Sstevel@tonic-gate  * trapstat's operation:
3330Sstevel@tonic-gate  *
3340Sstevel@tonic-gate  * + - - - - - - - -+
3350Sstevel@tonic-gate  * :                :
3360Sstevel@tonic-gate  * : ldx [%g2], %g2 :<-------------------------------------------------------+
3370Sstevel@tonic-gate  * :                :             Return from trap:                          |
3380Sstevel@tonic-gate  * + - - - - - - - -+               TL <- TL - 1 (0)                         |
3390Sstevel@tonic-gate  *	  |                         %pc <- TSTATE[TL].TPC (address of load)  |
3400Sstevel@tonic-gate  *	  | TLB miss:                                                        |
3410Sstevel@tonic-gate  *        |   TL <- TL + 1 (1)                                               |
3420Sstevel@tonic-gate  *        |   %pc <- TLB-miss-trap-handler (trapstat)                        |
3430Sstevel@tonic-gate  *        |                                                                  |
3440Sstevel@tonic-gate  *        v                                    TLB-return-entry (trapstat)   |
3450Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - - - - +    + - - - - - - - - - - - - - +  |
3460Sstevel@tonic-gate  * :                                     :    :                           :  |
3470Sstevel@tonic-gate  * : Record timestamp                    :    : Record timestamp          :  |
3480Sstevel@tonic-gate  * : TL <- 2                             :    : Take timestamp difference :  |
3490Sstevel@tonic-gate  * : TSTATE[1].TPC <- TLB-return-entry   :    : Add to running total      :  |
3500Sstevel@tonic-gate  * : ba,a TLB-miss-trap-handler -----------+  : Issue "retry"  --------------+
3510Sstevel@tonic-gate  * :                                     : |  :                           :
3520Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - - - - + |  + - - - - - - - - - - - - - +
3530Sstevel@tonic-gate  *  TLB-miss-trap-handler	           |                  ^
3540Sstevel@tonic-gate  *  (trapstat)                             |                  |
3550Sstevel@tonic-gate  *                                         |                  |
3560Sstevel@tonic-gate  *                                         |                  |
3570Sstevel@tonic-gate  *                 +-----------------------+                  |
3580Sstevel@tonic-gate  *                 |                                          |
3590Sstevel@tonic-gate  *                 |                                          |
3600Sstevel@tonic-gate  *                 v                                          |
3610Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - +                          |
3620Sstevel@tonic-gate  * :                               :                          |
3630Sstevel@tonic-gate  * : Lookup VA in TSB              :                          |
3640Sstevel@tonic-gate  * : If (hit)                      :                          |
3650Sstevel@tonic-gate  * :     Fill TLB                  :                          |
3660Sstevel@tonic-gate  * : Else                          :                          |
3670Sstevel@tonic-gate  * :     Lookup VA (hme hash table :                          |
3680Sstevel@tonic-gate  * :                or segkpm)     :                          |
3690Sstevel@tonic-gate  * :     Fill TLB                  :                          |
3700Sstevel@tonic-gate  * : Endif                         :                          |
3710Sstevel@tonic-gate  * : Issue "retry"  ------------------------------------------+
3720Sstevel@tonic-gate  * :                               : Return from trap:
3730Sstevel@tonic-gate  * + - - - - - - - - - - - - - - - +   TL <- TL - 1 (1)
3740Sstevel@tonic-gate  *  TLB-miss-trap-handler              %pc <- TSTATE[TL].TPC (TLB-return-entry)
3750Sstevel@tonic-gate  *
3760Sstevel@tonic-gate  *
3770Sstevel@tonic-gate  * A final subterfuge is required to complete our artifice:  if we miss in
3780Sstevel@tonic-gate  * the TLB, the TSB _and_ the subsequent hash or segkpm lookup (that is, if
3790Sstevel@tonic-gate  * there is no valid translation for the TLB-missing address), common system
3800Sstevel@tonic-gate  * software will need to accurately determine the %tpc as part of its page
3810Sstevel@tonic-gate  * fault handling. We therefore modify the kernel to check the %tpc in this
3820Sstevel@tonic-gate  * case: if the %tpc falls within the VA range controlled by trapstat and
3830Sstevel@tonic-gate  * the TL is 2, TL is simply lowered back to 1 (this check is implemented
3840Sstevel@tonic-gate  * by the TSTAT_CHECK_TL1 macro).  Lowering TL to 1 has the effect of
3850Sstevel@tonic-gate  * discarding the state pushed by trapstat.
3860Sstevel@tonic-gate  *
3870Sstevel@tonic-gate  * TLB Statistics: TLB Misses versus TSB Misses
3880Sstevel@tonic-gate  *
3890Sstevel@tonic-gate  * Distinguishing TLB misses from TSB misses requires further interposition
3900Sstevel@tonic-gate  * on the TLB miss handler:  we cannot know a priori or a posteriori if a
3910Sstevel@tonic-gate  * given VA will or has hit in the TSB.
3920Sstevel@tonic-gate  *
3930Sstevel@tonic-gate  * We achieve this distinction by adding a second TLB return entry almost
3940Sstevel@tonic-gate  * identical to the first -- differing only in the address to which it
3950Sstevel@tonic-gate  * stores its results.  We then modify the TLB miss handlers of the kernel
3960Sstevel@tonic-gate  * such that they check the %tpc when they determine that a TLB miss has
3970Sstevel@tonic-gate  * subsequently missed in the TSB:  if the %tpc lies within trapstat's VA
3980Sstevel@tonic-gate  * range and TL is 2 (that is, if trapstat is running), the TLB miss handler
3990Sstevel@tonic-gate  * _increments_ the %tpc by the size of the TLB return entry.  The ensuing
4000Sstevel@tonic-gate  * "retry" will thus transfer control to the second TLB return entry, and
4010Sstevel@tonic-gate  * the time spent in the handler will be accumulated in a memory location
4020Sstevel@tonic-gate  * specific to TSB misses.
4030Sstevel@tonic-gate  *
4040Sstevel@tonic-gate  * N.B.:  To minimize the amount of knowledge the kernel must have of trapstat,
4050Sstevel@tonic-gate  * we do not allow the kernel to hard-code the size of the TLB return entry.
4060Sstevel@tonic-gate  * Rather, the actual tsbmiss handler executes a known instruction at the
4070Sstevel@tonic-gate  * corresponding tsbmiss patch points (see the tstat_tsbmiss_patch_table) with
4080Sstevel@tonic-gate  * the %tpc in %g7:  when trapstat is not running, these points contain the
4090Sstevel@tonic-gate  * harmless TSTAT_TSBMISS_INSTR instruction ("add %g7, 0, %g7"). Before
4100Sstevel@tonic-gate  * running, trapstat modifies the instructions at these patch points such
4110Sstevel@tonic-gate  * that the simm13 equals the size of the TLB return entry.
4120Sstevel@tonic-gate  *
4130Sstevel@tonic-gate  * TLB Statistics: Kernel-level Misses versus User-level Misses
4140Sstevel@tonic-gate  *
4150Sstevel@tonic-gate  * Differentiating user-level misses from kernel-level misses employs a
4160Sstevel@tonic-gate  * similar technique, but is simplified by the ability to distinguish a
4170Sstevel@tonic-gate  * user-level miss from a kernel-level miss a priori by reading the context
4180Sstevel@tonic-gate  * register:  we implement kernel-/user-level differentiation by again doubling
4190Sstevel@tonic-gate  * the number of TLB return entries, and setting the %tpc to the appropriate
4200Sstevel@tonic-gate  * TLB return entry in trapstat's TLB miss handler.  Together with the doubling
4210Sstevel@tonic-gate  * of entries required for TLB-miss/TSB-miss differentiation, this yields a
4220Sstevel@tonic-gate  * total of four TLB return entries:
4230Sstevel@tonic-gate  *
4240Sstevel@tonic-gate  *	Level		TSB hit?	Structure member
4250Sstevel@tonic-gate  *	------------------------------------------------------------
4260Sstevel@tonic-gate  *	Kernel		Yes		tstat_tlbret_t.ttlbr_ktlb
4270Sstevel@tonic-gate  *	Kernel		No		tstat_tlbret_t.ttlbr_ktsb
4280Sstevel@tonic-gate  *	User		Yes		tstat_tlbret_t.ttlbr_utlb
4290Sstevel@tonic-gate  *	User		No		tstat_tlbret_t.ttlbr_utsb
4300Sstevel@tonic-gate  *
4310Sstevel@tonic-gate  * TLB Statistics: Misses per Pagesize
4320Sstevel@tonic-gate  *
4330Sstevel@tonic-gate  * As with the TLB-/TSB-miss differentiation, we have no way of determining
4340Sstevel@tonic-gate  * pagesize a priori.  This is therefore implemented by mandating a new rule:
4350Sstevel@tonic-gate  * whenever the kernel fills the TLB in its TLB miss handler, the TTE
4360Sstevel@tonic-gate  * corresponding to the TLB-missing VA must be in %g5 when the handler
4370Sstevel@tonic-gate  * executes its "retry".  This allows the TLB return entry to determine
4380Sstevel@tonic-gate  * pagesize by simply looking at the pagesize field in the TTE stored in
4390Sstevel@tonic-gate  * %g5.
4400Sstevel@tonic-gate  *
4410Sstevel@tonic-gate  * TLB Statistics: Probe Effect
4420Sstevel@tonic-gate  *
4430Sstevel@tonic-gate  * As one might imagine, gathering TLB statistics by pushing a trap level
4440Sstevel@tonic-gate  * induces significant probe effect.  To account for this probe effect,
4450Sstevel@tonic-gate  * trapstat attempts to observe it by executing a code sequence with a known
4460Sstevel@tonic-gate  * number of TLB misses both before and after interposing on the trap table.
4470Sstevel@tonic-gate  * This allows trapstat to determine a per-trap probe effect which can then be
4480Sstevel@tonic-gate  * factored into the "%tim" fields of the trapstat command.
4490Sstevel@tonic-gate  *
4500Sstevel@tonic-gate  * Note that on sun4v platforms, TLB misses are normally handled by the
4510Sstevel@tonic-gate  * hypervisor or the hardware TSB walker. Thus no fast MMU miss information
4521050Sgirish  * is reported for normal operation. However, when trapstat is invoked
4531050Sgirish  * with -t or -T option to collect detailed TLB statistics, kernel takes
4540Sstevel@tonic-gate  * over TLB miss handling. This results in significantly more overhead
4550Sstevel@tonic-gate  * and TLB statistics may not be as accurate as on sun4u platforms.
4561050Sgirish  * On some processors, hypervisor or hardware may provide a low overhead
4571050Sgirish  * interface to collect TSB hit statistics. This support is exposed via
4581050Sgirish  * a well defined CPU module interface (cpu_trapstat_conf to enable this
4591050Sgirish  * interface and cpu_trapstat_data to get detailed TSB hit statistics).
4601050Sgirish  * In this scenario, TSB miss statistics is collected by intercepting the
4611050Sgirish  * IMMU_miss and DMMU_miss traps using above mentioned trap interposition
4621050Sgirish  * approach.
4630Sstevel@tonic-gate  *
4640Sstevel@tonic-gate  * Locking
4650Sstevel@tonic-gate  *
4660Sstevel@tonic-gate  * The implementation uses two locks:  tstat_lock (a local lock) and the global
4670Sstevel@tonic-gate  * cpu_lock.  tstat_lock is used to assure trapstat's consistency in the
4680Sstevel@tonic-gate  * presence of multithreaded /dev/trapstat consumers (while as of this writing
4690Sstevel@tonic-gate  * the only consumer of /dev/trapstat is single threaded, it is obviously
4700Sstevel@tonic-gate  * necessary to correctly support multithreaded access).  cpu_lock is held
4710Sstevel@tonic-gate  * whenever CPUs are being manipulated directly, to prevent them from
4720Sstevel@tonic-gate  * disappearing in the process.  Because trapstat's DR callback
4730Sstevel@tonic-gate  * (trapstat_cpu_setup()) must grab tstat_lock and is called with cpu_lock
4740Sstevel@tonic-gate  * held, the lock ordering is necessarily cpu_lock before tstat_lock.
4750Sstevel@tonic-gate  *
4760Sstevel@tonic-gate  */
4770Sstevel@tonic-gate /* END CSTYLED */
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate static dev_info_t	*tstat_devi;	/* saved in xxattach() for xxinfo() */
4800Sstevel@tonic-gate static int		tstat_open;	/* set if driver is open */
4810Sstevel@tonic-gate static kmutex_t		tstat_lock;	/* serialize access */
4820Sstevel@tonic-gate static vmem_t		*tstat_arena;	/* arena for TLB-locked pages */
4830Sstevel@tonic-gate static tstat_percpu_t	*tstat_percpu;	/* per-CPU data */
4840Sstevel@tonic-gate static int		tstat_running;	/* set if trapstat is running */
4850Sstevel@tonic-gate static tstat_data_t	*tstat_buffer;	/* staging buffer for outgoing data */
4860Sstevel@tonic-gate static int		tstat_options;	/* bit-wise indication of options */
4870Sstevel@tonic-gate static int		*tstat_enabled;	/* map of enabled trap entries */
4880Sstevel@tonic-gate static int		tstat_tsbmiss_patched; /* tsbmiss patch flag */
4890Sstevel@tonic-gate static callb_id_t	tstat_cprcb;	/* CPR callback */
4900Sstevel@tonic-gate static char		*tstat_probe_area; /* VA range used for probe effect */
4910Sstevel@tonic-gate static caddr_t		tstat_probe_phys; /* physical to back above VA */
4920Sstevel@tonic-gate static hrtime_t		tstat_probe_time; /* time spent on probe effect */
4930Sstevel@tonic-gate static hrtime_t		tstat_probe_before[TSTAT_PROBE_NLAPS];
4940Sstevel@tonic-gate static hrtime_t		tstat_probe_after[TSTAT_PROBE_NLAPS];
4950Sstevel@tonic-gate static uint_t		tstat_pgszs;		/* # of kernel page sizes */
4960Sstevel@tonic-gate static uint_t		tstat_user_pgszs;	/* # of user page sizes */
4970Sstevel@tonic-gate 
4980Sstevel@tonic-gate /*
4990Sstevel@tonic-gate  * sizeof tstat_data_t + pgsz data for the kernel.  For simplicity's sake, when
5000Sstevel@tonic-gate  * we collect data, we do it based upon szc, but when we report data back to
5010Sstevel@tonic-gate  * userland, we have to do it based upon the userszc which may not match.
5020Sstevel@tonic-gate  * So, these two variables are for internal use and exported use respectively.
5030Sstevel@tonic-gate  */
5040Sstevel@tonic-gate static size_t		tstat_data_t_size;
5050Sstevel@tonic-gate static size_t		tstat_data_t_exported_size;
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate static size_t		tstat_data_pages;  /* number of pages of tstat data */
5080Sstevel@tonic-gate static size_t		tstat_data_size;   /* tstat data size in bytes */
5090Sstevel@tonic-gate static size_t		tstat_total_pages; /* #data pages + #instr pages */
5100Sstevel@tonic-gate static size_t		tstat_total_size;  /* tstat data size + instr size */
5110Sstevel@tonic-gate #ifdef sun4v
5120Sstevel@tonic-gate static caddr_t		tstat_va;	/* VA of memory reserved for TBA */
5130Sstevel@tonic-gate static pfn_t		tstat_pfn;	/* PFN of memory reserved for TBA */
5141050Sgirish static boolean_t	tstat_fast_tlbstat = B_FALSE;
5150Sstevel@tonic-gate #endif
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate /*
5180Sstevel@tonic-gate  * In the above block comment, see "TLB Statistics: TLB Misses versus
5190Sstevel@tonic-gate  * TSB Misses" for an explanation of the tsbmiss patch points.
5200Sstevel@tonic-gate  */
5210Sstevel@tonic-gate extern uint32_t		tsbmiss_trapstat_patch_point;
5220Sstevel@tonic-gate extern uint32_t		tsbmiss_trapstat_patch_point_kpm;
5230Sstevel@tonic-gate extern uint32_t		tsbmiss_trapstat_patch_point_kpm_small;
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate /*
5260Sstevel@tonic-gate  * Trapstat tsbmiss patch table
5270Sstevel@tonic-gate  */
5280Sstevel@tonic-gate tstat_tsbmiss_patch_entry_t tstat_tsbmiss_patch_table[] = {
5290Sstevel@tonic-gate 	{(uint32_t *)&tsbmiss_trapstat_patch_point, 0},
5300Sstevel@tonic-gate 	{(uint32_t *)&tsbmiss_trapstat_patch_point_kpm, 0},
5310Sstevel@tonic-gate 	{(uint32_t *)&tsbmiss_trapstat_patch_point_kpm_small, 0},
5320Sstevel@tonic-gate 	{(uint32_t *)NULL, 0}
5330Sstevel@tonic-gate };
5340Sstevel@tonic-gate 
5350Sstevel@tonic-gate /*
5360Sstevel@tonic-gate  * We define some general SPARC-specific constants to allow more readable
5370Sstevel@tonic-gate  * relocations.
5380Sstevel@tonic-gate  */
5390Sstevel@tonic-gate #define	NOP	0x01000000
5400Sstevel@tonic-gate #define	HI22(v) ((uint32_t)(v) >> 10)
5410Sstevel@tonic-gate #define	LO10(v) ((uint32_t)(v) & 0x3ff)
5420Sstevel@tonic-gate #define	LO12(v) ((uint32_t)(v) & 0xfff)
5430Sstevel@tonic-gate #define	DISP22(from, to) \
5440Sstevel@tonic-gate 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
5450Sstevel@tonic-gate #define	ASI(asi)	((asi) << 5)
5460Sstevel@tonic-gate 
5470Sstevel@tonic-gate /*
5480Sstevel@tonic-gate  * The interposing trap table must be locked in the I-TLB, and any data
5490Sstevel@tonic-gate  * referred to in the interposing trap handler must be locked in the D-TLB.
5500Sstevel@tonic-gate  * This function locks these pages in the appropriate TLBs by creating TTEs
5510Sstevel@tonic-gate  * from whole cloth, and manually loading them into the TLB.  This function is
5520Sstevel@tonic-gate  * called from cross call context.
5530Sstevel@tonic-gate  *
5540Sstevel@tonic-gate  * On sun4v platforms, we use 4M page size mappings to minimize the number
5550Sstevel@tonic-gate  * of locked down entries (i.e. permanent mappings). Each CPU uses a
5560Sstevel@tonic-gate  * reserved portion of that 4M page for its TBA and data.
5570Sstevel@tonic-gate  */
5580Sstevel@tonic-gate static void
5590Sstevel@tonic-gate trapstat_load_tlb(void)
5600Sstevel@tonic-gate {
5610Sstevel@tonic-gate #ifndef sun4v
5620Sstevel@tonic-gate 	int i;
5630Sstevel@tonic-gate #else
5640Sstevel@tonic-gate 	uint64_t ret;
5650Sstevel@tonic-gate #endif
5660Sstevel@tonic-gate 	tte_t tte;
5670Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id];
5680Sstevel@tonic-gate 	caddr_t va = tcpu->tcpu_vabase;
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
5710Sstevel@tonic-gate 	ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate #ifndef sun4v
5740Sstevel@tonic-gate 	for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) {
5750Sstevel@tonic-gate 		tte.tte_inthi = TTE_VALID_INT | TTE_SZ_INT(TTE8K) |
5760Sstevel@tonic-gate 			TTE_PFN_INTHI(tcpu->tcpu_pfn[i]);
5770Sstevel@tonic-gate 		if (i < TSTAT_INSTR_PAGES) {
5780Sstevel@tonic-gate 			tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) |
5790Sstevel@tonic-gate 				TTE_LCK_INT | TTE_CP_INT | TTE_PRIV_INT;
5802241Shuah 			sfmmu_itlb_ld_kva(va, &tte);
5810Sstevel@tonic-gate 		} else {
5820Sstevel@tonic-gate 			tte.tte_intlo = TTE_PFN_INTLO(tcpu->tcpu_pfn[i]) |
5830Sstevel@tonic-gate 				TTE_LCK_INT | TTE_CP_INT | TTE_CV_INT |
5840Sstevel@tonic-gate 				TTE_PRIV_INT | TTE_HWWR_INT;
5852241Shuah 			sfmmu_dtlb_ld_kva(va, &tte);
5860Sstevel@tonic-gate 		}
5870Sstevel@tonic-gate 	}
5880Sstevel@tonic-gate #else /* sun4v */
5890Sstevel@tonic-gate 	tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(tstat_pfn);
5900Sstevel@tonic-gate 	tte.tte_intlo = TTE_PFN_INTLO(tstat_pfn) | TTE_CP_INT |
5910Sstevel@tonic-gate 		TTE_CV_INT | TTE_PRIV_INT | TTE_HWWR_INT |
5920Sstevel@tonic-gate 		TTE_SZ_INTLO(TTE4M);
5930Sstevel@tonic-gate 	ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte,
5940Sstevel@tonic-gate 		MAP_ITLB | MAP_DTLB);
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 	if (ret != H_EOK)
5970Sstevel@tonic-gate 		cmn_err(CE_PANIC, "trapstat: cannot map new TBA "
5980Sstevel@tonic-gate 		    "for cpu %d  (error: 0x%lx)", CPU->cpu_id, ret);
5990Sstevel@tonic-gate #endif /* sun4v */
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * As mentioned in the "TLB Statistics: TLB Misses versus TSB Misses" section
6040Sstevel@tonic-gate  * of the block comment, TLB misses are differentiated from TSB misses in
6050Sstevel@tonic-gate  * part by hot-patching the instructions at the tsbmiss patch points (see
6060Sstevel@tonic-gate  * tstat_tsbmiss_patch_table). This routine is used both to initially patch
6070Sstevel@tonic-gate  * the instructions, and to patch them back to their original values upon
6080Sstevel@tonic-gate  * restoring the original trap table.
6090Sstevel@tonic-gate  */
6100Sstevel@tonic-gate static void
6110Sstevel@tonic-gate trapstat_hotpatch()
6120Sstevel@tonic-gate {
6130Sstevel@tonic-gate 	uint32_t instr;
6140Sstevel@tonic-gate 	uint32_t simm13;
6150Sstevel@tonic-gate 	tstat_tsbmiss_patch_entry_t *ep;
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate 	if (!(tstat_options & TSTAT_OPT_TLBDATA))
6200Sstevel@tonic-gate 		return;
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	if (!tstat_tsbmiss_patched) {
6230Sstevel@tonic-gate 		/*
6240Sstevel@tonic-gate 		 * We haven't patched the TSB paths; do so now.
6250Sstevel@tonic-gate 		 */
6260Sstevel@tonic-gate 		/*CONSTCOND*/
6270Sstevel@tonic-gate 		ASSERT(offsetof(tstat_tlbret_t, ttlbr_ktsb) -
6280Sstevel@tonic-gate 		    offsetof(tstat_tlbret_t, ttlbr_ktlb) ==
6290Sstevel@tonic-gate 		    offsetof(tstat_tlbret_t, ttlbr_utsb) -
6300Sstevel@tonic-gate 		    offsetof(tstat_tlbret_t, ttlbr_utlb));
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		simm13 = offsetof(tstat_tlbret_t, ttlbr_ktsb) -
6330Sstevel@tonic-gate 		    offsetof(tstat_tlbret_t, ttlbr_ktlb);
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 		for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) {
6360Sstevel@tonic-gate 			ASSERT(ep->tpe_instr == 0);
6370Sstevel@tonic-gate 			instr = ep->tpe_instr = *ep->tpe_addr;
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 			/*
6400Sstevel@tonic-gate 			 * Assert that the instruction we're about to patch is
6410Sstevel@tonic-gate 			 * "add %g7, 0, %g7" (0x8e01e000).
6420Sstevel@tonic-gate 			 */
6430Sstevel@tonic-gate 			ASSERT(instr == TSTAT_TSBMISS_INSTR);
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 			instr |= simm13;
6460Sstevel@tonic-gate 			hot_patch_kernel_text((caddr_t)ep->tpe_addr,
6470Sstevel@tonic-gate 			    instr, sizeof (instr));
6480Sstevel@tonic-gate 		}
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 		tstat_tsbmiss_patched = 1;
6510Sstevel@tonic-gate 
6520Sstevel@tonic-gate 	} else {
6530Sstevel@tonic-gate 		/*
6540Sstevel@tonic-gate 		 * Remove patches from the TSB paths.
6550Sstevel@tonic-gate 		 */
6560Sstevel@tonic-gate 		for (ep = tstat_tsbmiss_patch_table; ep->tpe_addr; ep++) {
6570Sstevel@tonic-gate 			ASSERT(ep->tpe_instr == TSTAT_TSBMISS_INSTR);
6580Sstevel@tonic-gate 			hot_patch_kernel_text((caddr_t)ep->tpe_addr,
6590Sstevel@tonic-gate 			    ep->tpe_instr, sizeof (instr));
6600Sstevel@tonic-gate 			ep->tpe_instr = 0;
6610Sstevel@tonic-gate 		}
6620Sstevel@tonic-gate 
6630Sstevel@tonic-gate 		tstat_tsbmiss_patched = 0;
6640Sstevel@tonic-gate 	}
6650Sstevel@tonic-gate }
6660Sstevel@tonic-gate 
6670Sstevel@tonic-gate /*
6680Sstevel@tonic-gate  * This is the routine executed to clock the performance of the trap table,
6690Sstevel@tonic-gate  * executed both before and after interposing on the trap table to attempt to
6700Sstevel@tonic-gate  * determine probe effect.  The probe effect is used to adjust the "%tim"
6710Sstevel@tonic-gate  * fields of trapstat's -t and -T output; we only use TLB misses to clock the
6720Sstevel@tonic-gate  * trap table.  We execute the inner loop (which is designed to exceed the
6730Sstevel@tonic-gate  * TLB's reach) nlaps times, taking the best time as our time (thereby
6740Sstevel@tonic-gate  * factoring out the effects of interrupts, cache misses or other perturbing
6750Sstevel@tonic-gate  * events.
6760Sstevel@tonic-gate  */
6770Sstevel@tonic-gate static hrtime_t
6780Sstevel@tonic-gate trapstat_probe_laps(int nlaps, hrtime_t *buf)
6790Sstevel@tonic-gate {
6800Sstevel@tonic-gate 	int i, j = 0;
6810Sstevel@tonic-gate 	hrtime_t ts, best = INT64_MAX;
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate 	while (nlaps--) {
6840Sstevel@tonic-gate 		ts = rdtick();
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 		for (i = 0; i < TSTAT_PROBE_SIZE; i += MMU_PAGESIZE)
6870Sstevel@tonic-gate 			*((volatile char *)&tstat_probe_area[i]);
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate 		if ((ts = rdtick() - ts) < best)
6900Sstevel@tonic-gate 			best = ts;
6910Sstevel@tonic-gate 		buf[j++] = ts;
6920Sstevel@tonic-gate 	}
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	return (best);
6950Sstevel@tonic-gate }
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate /*
6980Sstevel@tonic-gate  * This routine determines the probe effect by calling trapstat_probe_laps()
6990Sstevel@tonic-gate  * both without and with the interposing trap table.  Note that this is
7000Sstevel@tonic-gate  * called from a cross call on the desired CPU, and that it is called on
7010Sstevel@tonic-gate  * every CPU (this is necessary because the probe effect may differ from
7020Sstevel@tonic-gate  * one CPU to another).
7030Sstevel@tonic-gate  */
7040Sstevel@tonic-gate static void
7050Sstevel@tonic-gate trapstat_probe()
7060Sstevel@tonic-gate {
7070Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id];
7080Sstevel@tonic-gate 	hrtime_t before, after;
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED))
7110Sstevel@tonic-gate 		return;
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	if (tstat_probe_area == NULL || (tstat_options & TSTAT_OPT_NOGO))
7140Sstevel@tonic-gate 		return;
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	/*
7170Sstevel@tonic-gate 	 * We very much expect the %tba to be KERNELBASE; this is a
7180Sstevel@tonic-gate 	 * precautionary measure to assure that trapstat doesn't melt the
7190Sstevel@tonic-gate 	 * machine should the %tba point unexpectedly elsewhere.
7200Sstevel@tonic-gate 	 */
7210Sstevel@tonic-gate 	if (get_tba() != (caddr_t)KERNELBASE)
7220Sstevel@tonic-gate 		return;
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 	/*
7250Sstevel@tonic-gate 	 * Preserve this CPU's data before destroying it by enabling the
7260Sstevel@tonic-gate 	 * interposing trap table.  We can safely use tstat_buffer because
7270Sstevel@tonic-gate 	 * the caller of the trapstat_probe() cross call is holding tstat_lock.
7280Sstevel@tonic-gate 	 */
7290Sstevel@tonic-gate 	bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size);
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	tstat_probe_time = gethrtime();
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	before = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_before);
7340Sstevel@tonic-gate 	(void) set_tba(tcpu->tcpu_ibase);
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 	after = trapstat_probe_laps(TSTAT_PROBE_NLAPS, tstat_probe_after);
7370Sstevel@tonic-gate 	(void) set_tba((caddr_t)KERNELBASE);
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	tstat_probe_time = gethrtime() - tstat_probe_time;
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 	bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size);
7420Sstevel@tonic-gate 	tcpu->tcpu_data->tdata_peffect = (after - before) / TSTAT_PROBE_NPAGES;
7430Sstevel@tonic-gate }
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate static void
7460Sstevel@tonic-gate trapstat_probe_alloc()
7470Sstevel@tonic-gate {
7480Sstevel@tonic-gate 	pfn_t pfn;
7490Sstevel@tonic-gate 	caddr_t va;
7500Sstevel@tonic-gate 	int i;
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
7530Sstevel@tonic-gate 	ASSERT(tstat_probe_area == NULL);
7540Sstevel@tonic-gate 	ASSERT(tstat_probe_phys == NULL);
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate 	if (!(tstat_options & TSTAT_OPT_TLBDATA))
7570Sstevel@tonic-gate 		return;
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	/*
7600Sstevel@tonic-gate 	 * Grab some virtual from the heap arena.
7610Sstevel@tonic-gate 	 */
7620Sstevel@tonic-gate 	tstat_probe_area = vmem_alloc(heap_arena, TSTAT_PROBE_SIZE, VM_SLEEP);
7630Sstevel@tonic-gate 	va = tstat_probe_area;
7640Sstevel@tonic-gate 
7650Sstevel@tonic-gate 	/*
7660Sstevel@tonic-gate 	 * Grab a single physical page.
7670Sstevel@tonic-gate 	 */
7680Sstevel@tonic-gate 	tstat_probe_phys = vmem_alloc(tstat_arena, MMU_PAGESIZE, VM_SLEEP);
7690Sstevel@tonic-gate 	pfn = hat_getpfnum(kas.a_hat, tstat_probe_phys);
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	/*
7720Sstevel@tonic-gate 	 * Now set the translation for every page in our virtual range
7730Sstevel@tonic-gate 	 * to be our allocated physical page.
7740Sstevel@tonic-gate 	 */
7750Sstevel@tonic-gate 	for (i = 0; i < TSTAT_PROBE_NPAGES; i++) {
7760Sstevel@tonic-gate 		hat_devload(kas.a_hat, va, MMU_PAGESIZE, pfn, PROT_READ,
7770Sstevel@tonic-gate 		    HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
7780Sstevel@tonic-gate 		va += MMU_PAGESIZE;
7790Sstevel@tonic-gate 	}
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate static void
7830Sstevel@tonic-gate trapstat_probe_free()
7840Sstevel@tonic-gate {
7850Sstevel@tonic-gate 	caddr_t va;
7860Sstevel@tonic-gate 	int i;
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	if ((va = tstat_probe_area) == NULL)
7910Sstevel@tonic-gate 		return;
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate 	for (i = 0; i < TSTAT_PROBE_NPAGES; i++) {
7940Sstevel@tonic-gate 		hat_unload(kas.a_hat, va, MMU_PAGESIZE, HAT_UNLOAD_UNLOCK);
7950Sstevel@tonic-gate 		va += MMU_PAGESIZE;
7960Sstevel@tonic-gate 	}
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate 	vmem_free(tstat_arena, tstat_probe_phys, MMU_PAGESIZE);
7990Sstevel@tonic-gate 	vmem_free(heap_arena, tstat_probe_area, TSTAT_PROBE_SIZE);
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 	tstat_probe_phys = NULL;
8020Sstevel@tonic-gate 	tstat_probe_area = NULL;
8030Sstevel@tonic-gate }
8040Sstevel@tonic-gate 
8050Sstevel@tonic-gate /*
8060Sstevel@tonic-gate  * This routine actually enables a CPU by setting its %tba to be the
8070Sstevel@tonic-gate  * CPU's interposing trap table.  It is called out of cross call context.
8080Sstevel@tonic-gate  */
8090Sstevel@tonic-gate static void
8100Sstevel@tonic-gate trapstat_enable()
8110Sstevel@tonic-gate {
8120Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id];
8130Sstevel@tonic-gate 
8140Sstevel@tonic-gate 	if (!(tcpu->tcpu_flags & TSTAT_CPU_SELECTED))
8150Sstevel@tonic-gate 		return;
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
8180Sstevel@tonic-gate 	ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	if (get_tba() != (caddr_t)KERNELBASE)
8210Sstevel@tonic-gate 		return;
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	if (!(tstat_options & TSTAT_OPT_NOGO))
8240Sstevel@tonic-gate 		(void) set_tba(tcpu->tcpu_ibase);
8250Sstevel@tonic-gate 	tcpu->tcpu_flags |= TSTAT_CPU_ENABLED;
8260Sstevel@tonic-gate #ifdef sun4v
8271050Sgirish 	if ((tstat_options & TSTAT_OPT_TLBDATA) &&
8281050Sgirish 	    !(tstat_options & TSTAT_OPT_NOGO)) {
8291050Sgirish 		if (tstat_fast_tlbstat) {
8301050Sgirish 			/*
8311050Sgirish 			 * Invoke processor specific interface to enable
8321050Sgirish 			 * collection of TSB hit statistics.
8331050Sgirish 			 */
8341050Sgirish 			cpu_trapstat_conf(CPU_TSTATCONF_ENABLE);
8351050Sgirish 		} else {
8361050Sgirish 			/*
8371050Sgirish 			 * Collect TLB miss statistics by taking over
8381050Sgirish 			 * TLB miss handling from the hypervisor. This
8391050Sgirish 			 * is done by telling the hypervisor that there
8401050Sgirish 			 * is no TSB configured. Also set TSTAT_TLB_STATS
8411050Sgirish 			 * flag so that no user TSB is configured during
8421050Sgirish 			 * context switch time.
8431050Sgirish 			 */
8441050Sgirish 			cpu_t *cp = CPU;
8450Sstevel@tonic-gate 
8461050Sgirish 			cp->cpu_m.cpu_tstat_flags |= TSTAT_TLB_STATS;
8471050Sgirish 			(void) hv_set_ctx0(NULL, NULL);
8481050Sgirish 			(void) hv_set_ctxnon0(NULL, NULL);
8491050Sgirish 		}
8500Sstevel@tonic-gate 	}
8510Sstevel@tonic-gate #endif
8520Sstevel@tonic-gate }
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate /*
8550Sstevel@tonic-gate  * This routine disables a CPU (vis a vis trapstat) by setting its %tba to be
8560Sstevel@tonic-gate  * the actual, underlying trap table.  It is called out of cross call context.
8570Sstevel@tonic-gate  */
8580Sstevel@tonic-gate static void
8590Sstevel@tonic-gate trapstat_disable()
8600Sstevel@tonic-gate {
8610Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id];
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 	if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED))
8640Sstevel@tonic-gate 		return;
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
8670Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	if (!(tstat_options & TSTAT_OPT_NOGO))
8700Sstevel@tonic-gate 		(void) set_tba((caddr_t)KERNELBASE);
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED;
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate #ifdef sun4v
8751050Sgirish 	if ((tstat_options & TSTAT_OPT_TLBDATA) &&
8761050Sgirish 	    !(tstat_options & TSTAT_OPT_NOGO)) {
8771050Sgirish 		if (tstat_fast_tlbstat) {
8781050Sgirish 			/*
8791050Sgirish 			 * Invoke processor specific interface to disable
8801050Sgirish 			 * collection of TSB hit statistics on each processor.
8811050Sgirish 			 */
8821050Sgirish 			cpu_trapstat_conf(CPU_TSTATCONF_DISABLE);
8831050Sgirish 		} else {
8841050Sgirish 			/*
8851050Sgirish 			 * As part of collecting TLB miss statistics, we took
8861050Sgirish 			 * over TLB miss handling from the hypervisor by
8871050Sgirish 			 * telling the hypervisor that NO TSB is configured.
8881050Sgirish 			 * We need to restore that by communicating proper
8891050Sgirish 			 * kernel/user TSB information so that TLB misses
8901050Sgirish 			 * can be handled by the hypervisor or the hardware
8911050Sgirish 			 * more efficiently.
8921050Sgirish 			 *
8931050Sgirish 			 * We restore kernel TSB information right away.
8941050Sgirish 			 * However, to minimize any locking dependency, we
8951050Sgirish 			 * don't restore user TSB information right away.
8961050Sgirish 			 * Instead, we simply clear the TSTAT_TLB_STATS flag
8971050Sgirish 			 * so that the user TSB information is automatically
8981050Sgirish 			 * restored on next context switch.
8991050Sgirish 			 *
9001050Sgirish 			 * Note that the call to restore kernel TSB information
9011050Sgirish 			 * will normally not fail, unless wrong information is
9021050Sgirish 			 * passed here. In that scenario, system will still
9031050Sgirish 			 * continue to function properly with the exception of
9041050Sgirish 			 * kernel handling all the TLB misses.
9051050Sgirish 			 */
9061050Sgirish 			struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock;
9071050Sgirish 			cpu_t *cp = CPU;
9080Sstevel@tonic-gate 
9091050Sgirish 			cp->cpu_m.cpu_tstat_flags &= ~TSTAT_TLB_STATS;
9101050Sgirish 			(void) hv_set_ctx0(hvbp->hv_tsb_info_cnt,
9111050Sgirish 			    hvbp->hv_tsb_info_pa);
9121050Sgirish 		}
9130Sstevel@tonic-gate 	}
9140Sstevel@tonic-gate #endif
9150Sstevel@tonic-gate }
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate /*
9180Sstevel@tonic-gate  * We use %tick as the time base when recording the time spent executing
9190Sstevel@tonic-gate  * the trap handler.  %tick, however, is not necessarily kept in sync
9200Sstevel@tonic-gate  * across CPUs (indeed, different CPUs may have different %tick frequencies).
9210Sstevel@tonic-gate  * We therefore cross call onto a CPU to get a snapshot of its data to
9220Sstevel@tonic-gate  * copy out; this is the routine executed out of that cross call.
9230Sstevel@tonic-gate  */
9240Sstevel@tonic-gate static void
9250Sstevel@tonic-gate trapstat_snapshot()
9260Sstevel@tonic-gate {
9270Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[CPU->cpu_id];
9280Sstevel@tonic-gate 	tstat_data_t *data = tcpu->tcpu_data;
9290Sstevel@tonic-gate 
9300Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
9310Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
9320Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ENABLED);
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate 	data->tdata_snapts = gethrtime();
9350Sstevel@tonic-gate 	data->tdata_snaptick = rdtick();
9360Sstevel@tonic-gate 	bcopy(data, tstat_buffer, tstat_data_t_size);
9371050Sgirish #ifdef sun4v
9381050Sgirish 	/*
9391050Sgirish 	 * Invoke processor specific interface to collect TSB hit
9401050Sgirish 	 * statistics on each processor.
9411050Sgirish 	 */
9421050Sgirish 	if ((tstat_options & TSTAT_OPT_TLBDATA) && tstat_fast_tlbstat)
9431050Sgirish 		cpu_trapstat_data((void *) tstat_buffer->tdata_pgsz,
9441050Sgirish 		    tstat_pgszs);
9451050Sgirish #endif
9460Sstevel@tonic-gate }
9470Sstevel@tonic-gate 
9480Sstevel@tonic-gate /*
9490Sstevel@tonic-gate  * The TSTAT_RETENT_* constants define offsets in the TLB return entry.
9500Sstevel@tonic-gate  * They are used only in trapstat_tlbretent() (below) and #undef'd
9510Sstevel@tonic-gate  * immediately afterwards.  Any change to "retent" in trapstat_tlbretent()
9520Sstevel@tonic-gate  * will likely require changes to these constants.
9530Sstevel@tonic-gate  */
9540Sstevel@tonic-gate 
9551050Sgirish #ifndef sun4v
9560Sstevel@tonic-gate #define	TSTAT_RETENT_STATHI	1
9570Sstevel@tonic-gate #define	TSTAT_RETENT_STATLO	2
958490Ssusans #define	TSTAT_RETENT_SHIFT	11
959490Ssusans #define	TSTAT_RETENT_COUNT_LD	13
960490Ssusans #define	TSTAT_RETENT_COUNT_ST	15
961490Ssusans #define	TSTAT_RETENT_TMPTSHI	16
962490Ssusans #define	TSTAT_RETENT_TMPTSLO	17
963490Ssusans #define	TSTAT_RETENT_TIME_LD	19
964490Ssusans #define	TSTAT_RETENT_TIME_ST	21
9650Sstevel@tonic-gate #else /* sun4v */
9660Sstevel@tonic-gate #define	TSTAT_RETENT_STATHI	1
9670Sstevel@tonic-gate #define	TSTAT_RETENT_STATLO	2
9680Sstevel@tonic-gate #define	TSTAT_RETENT_SHIFT	5
9690Sstevel@tonic-gate #define	TSTAT_RETENT_COUNT_LD	7
9700Sstevel@tonic-gate #define	TSTAT_RETENT_COUNT_ST	9
9710Sstevel@tonic-gate #define	TSTAT_RETENT_TMPTSHI	10
9720Sstevel@tonic-gate #define	TSTAT_RETENT_TMPTSLO	11
9730Sstevel@tonic-gate #define	TSTAT_RETENT_TIME_LD	13
9740Sstevel@tonic-gate #define	TSTAT_RETENT_TIME_ST	15
9750Sstevel@tonic-gate #endif /* sun4v */
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate static void
9780Sstevel@tonic-gate trapstat_tlbretent(tstat_percpu_t *tcpu, tstat_tlbretent_t *ret,
9790Sstevel@tonic-gate     tstat_missdata_t *data)
9800Sstevel@tonic-gate {
9810Sstevel@tonic-gate 	uint32_t *ent = ret->ttlbrent_instr, shift;
9820Sstevel@tonic-gate 	uintptr_t base, tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick);
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate 	/*
9850Sstevel@tonic-gate 	 * This is the entry executed upon return from the TLB/TSB miss
9860Sstevel@tonic-gate 	 * handler (i.e. the code interpositioned between the "retry" and
9870Sstevel@tonic-gate 	 * the actual return to the TLB-missing instruction).  Detail on its
9880Sstevel@tonic-gate 	 * theory of operation can be found in the "TLB Statistics" section
9890Sstevel@tonic-gate 	 * of the block comment.  Note that we expect the TTE just loaded
9900Sstevel@tonic-gate 	 * into the TLB to be in %g5; all other globals are available as
9910Sstevel@tonic-gate 	 * scratch.  Finally, note that the page size information in sun4v is
9920Sstevel@tonic-gate 	 * located in the lower bits of the TTE -- requiring us to have a
9930Sstevel@tonic-gate 	 * different return entry on sun4v.
9940Sstevel@tonic-gate 	 */
9950Sstevel@tonic-gate 	static const uint32_t retent[TSTAT_TLBRET_NINSTR] = {
9960Sstevel@tonic-gate #ifndef sun4v
9970Sstevel@tonic-gate 	    0x87410000,		/* rd    %tick, %g3			*/
9980Sstevel@tonic-gate 	    0x03000000, 	/* sethi %hi(stat), %g1			*/
9990Sstevel@tonic-gate 	    0x82106000,		/* or    %g1, %lo(stat), %g1		*/
10000Sstevel@tonic-gate 	    0x89297001,		/* sllx  %g5, 1, %g4			*/
10010Sstevel@tonic-gate 	    0x8931303e,		/* srlx  %g4, 62, %g4			*/
10020Sstevel@tonic-gate 	    0x8531702e,		/* srlx  %g5, 46, %g2			*/
10030Sstevel@tonic-gate 	    0x8408a004,		/* and   %g2, 4, %g2			*/
10040Sstevel@tonic-gate 	    0x88110002,		/* or    %g4, %g2, %g4			*/
1005490Ssusans 	    0x80a12005,		/* cmp   %g4, 5				*/
1006490Ssusans 	    0x34400002,		/* bg,a,pn %icc, +8			*/
1007490Ssusans 	    0x88102004,		/* mov   4, %g4				*/
10080Sstevel@tonic-gate 	    0x89292000,		/* sll   %g4, shift, %g4		*/
10090Sstevel@tonic-gate 	    0x82004004,		/* add   %g1, %g4, %g1			*/
10100Sstevel@tonic-gate 	    0xc4586000,		/* ldx   [%g1 + tmiss_count], %g2	*/
10110Sstevel@tonic-gate 	    0x8400a001,		/* add   %g2, 1, %g2			*/
10120Sstevel@tonic-gate 	    0xc4706000,		/* stx   %g2, [%g1 + tmiss_count]	*/
10130Sstevel@tonic-gate 	    0x0d000000, 	/* sethi %hi(tdata_tmptick), %g6	*/
10140Sstevel@tonic-gate 	    0xc459a000, 	/* ldx   [%g6 + %lo(tdata_tmptick)], %g2 */
10150Sstevel@tonic-gate 	    0x8620c002,		/* sub   %g3, %g2, %g3			*/
10160Sstevel@tonic-gate 	    0xc4586000,		/* ldx   [%g1 + tmiss_time], %g2	*/
10170Sstevel@tonic-gate 	    0x84008003,		/* add   %g2, %g3, %g2			*/
10180Sstevel@tonic-gate 	    0xc4706000,		/* stx   %g2, [%g1 + tmiss_time]	*/
10190Sstevel@tonic-gate 	    0x83f00000		/* retry				*/
10200Sstevel@tonic-gate #else /* sun4v */
10210Sstevel@tonic-gate 	    0x87410000,		/* rd    %tick, %g3			*/
10220Sstevel@tonic-gate 	    0x03000000, 	/* sethi %hi(stat), %g1			*/
10230Sstevel@tonic-gate 	    0x82106000,		/* or    %g1, %lo(stat), %g1		*/
10240Sstevel@tonic-gate 	    0x8929703d,		/* sllx  %g5, 61, %g4			*/
10250Sstevel@tonic-gate 	    0x8931303d,		/* srlx  %g4, 61, %g4			*/
10260Sstevel@tonic-gate 	    0x89292000,		/* sll   %g4, shift, %g4		*/
10270Sstevel@tonic-gate 	    0x82004004,		/* add   %g1, %g4, %g1			*/
10280Sstevel@tonic-gate 	    0xc4586000,		/* ldx   [%g1 + tmiss_count], %g2	*/
10290Sstevel@tonic-gate 	    0x8400a001,		/* add   %g2, 1, %g2			*/
10300Sstevel@tonic-gate 	    0xc4706000,		/* stx   %g2, [%g1 + tmiss_count]	*/
10310Sstevel@tonic-gate 	    0x0d000000, 	/* sethi %hi(tdata_tmptick), %g6	*/
10320Sstevel@tonic-gate 	    0xc459a000, 	/* ldx   [%g6 + %lo(tdata_tmptick)], %g2 */
10330Sstevel@tonic-gate 	    0x8620c002,		/* sub   %g3, %g2, %g3			*/
10340Sstevel@tonic-gate 	    0xc4586000,		/* ldx   [%g1 + tmiss_time], %g2	*/
10350Sstevel@tonic-gate 	    0x84008003,		/* add   %g2, %g3, %g2			*/
10360Sstevel@tonic-gate 	    0xc4706000,		/* stx   %g2, [%g1 + tmiss_time]	*/
10370Sstevel@tonic-gate 	    0x83f00000		/* retry				*/
10380Sstevel@tonic-gate #endif /* sun4v */
10390Sstevel@tonic-gate 	};
10400Sstevel@tonic-gate 
10410Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
10420Sstevel@tonic-gate 	/*CONSTCOND*/
10430Sstevel@tonic-gate 	ASSERT(offsetof(tstat_missdata_t, tmiss_count) <= LO10(-1));
10440Sstevel@tonic-gate 	/*CONSTCOND*/
10450Sstevel@tonic-gate 	ASSERT(offsetof(tstat_missdata_t, tmiss_time) <= LO10(-1));
10460Sstevel@tonic-gate 	/*CONSTCOND*/
10470Sstevel@tonic-gate 	ASSERT(!((sizeof (tstat_pgszdata_t) - 1) & sizeof (tstat_pgszdata_t)));
10480Sstevel@tonic-gate 
10490Sstevel@tonic-gate 	for (shift = 1; (1 << shift) != sizeof (tstat_pgszdata_t); shift++)
10500Sstevel@tonic-gate 		continue;
10510Sstevel@tonic-gate 
10520Sstevel@tonic-gate 	base = (uintptr_t)tcpu->tcpu_dbase +
10530Sstevel@tonic-gate 	    ((uintptr_t)data - (uintptr_t)tcpu->tcpu_data);
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 	bcopy(retent, ent, sizeof (retent));
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 	ent[TSTAT_RETENT_STATHI] |= HI22(base);
10580Sstevel@tonic-gate 	ent[TSTAT_RETENT_STATLO] |= LO10(base);
10590Sstevel@tonic-gate 	ent[TSTAT_RETENT_SHIFT] |= shift;
10600Sstevel@tonic-gate 	/* LINTED E_EXPR_NULL_EFFECT */
10610Sstevel@tonic-gate 	ent[TSTAT_RETENT_COUNT_LD] |= offsetof(tstat_missdata_t, tmiss_count);
10620Sstevel@tonic-gate 	/* LINTED E_EXPR_NULL_EFFECT */
10630Sstevel@tonic-gate 	ent[TSTAT_RETENT_COUNT_ST] |= offsetof(tstat_missdata_t, tmiss_count);
10640Sstevel@tonic-gate 	ent[TSTAT_RETENT_TMPTSHI] |= HI22(tmptick);
10650Sstevel@tonic-gate 	ent[TSTAT_RETENT_TMPTSLO] |= LO10(tmptick);
10660Sstevel@tonic-gate 	ent[TSTAT_RETENT_TIME_LD] |= offsetof(tstat_missdata_t, tmiss_time);
10670Sstevel@tonic-gate 	ent[TSTAT_RETENT_TIME_ST] |= offsetof(tstat_missdata_t, tmiss_time);
10680Sstevel@tonic-gate }
10690Sstevel@tonic-gate 
10700Sstevel@tonic-gate #undef TSTAT_RETENT_STATHI
10710Sstevel@tonic-gate #undef TSTAT_RETENT_STATLO
10720Sstevel@tonic-gate #undef TSTAT_RETENT_SHIFT
10730Sstevel@tonic-gate #undef TSTAT_RETENT_COUNT_LD
10740Sstevel@tonic-gate #undef TSTAT_RETENT_COUNT_ST
10750Sstevel@tonic-gate #undef TSTAT_RETENT_TMPTSHI
10760Sstevel@tonic-gate #undef TSTAT_RETENT_TMPTSLO
10770Sstevel@tonic-gate #undef TSTAT_RETENT_TIME_LD
10780Sstevel@tonic-gate #undef TSTAT_RETENT_TIME_ST
10790Sstevel@tonic-gate 
10800Sstevel@tonic-gate /*
10810Sstevel@tonic-gate  * The TSTAT_TLBENT_* constants define offsets in the TLB entry.  They are
10820Sstevel@tonic-gate  * used only in trapstat_tlbent() (below) and #undef'd immediately afterwards.
10830Sstevel@tonic-gate  * Any change to "tlbent" in trapstat_tlbent() will likely require changes
10840Sstevel@tonic-gate  * to these constants.
10850Sstevel@tonic-gate  */
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate #ifndef sun4v
10880Sstevel@tonic-gate #define	TSTAT_TLBENT_STATHI	0
10890Sstevel@tonic-gate #define	TSTAT_TLBENT_STATLO_LD	1
10900Sstevel@tonic-gate #define	TSTAT_TLBENT_STATLO_ST	3
10910Sstevel@tonic-gate #define	TSTAT_TLBENT_MMUASI	15
10920Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCHI	18
10930Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCLO_USER	19
10940Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCLO_KERN	21
10950Sstevel@tonic-gate #define	TSTAT_TLBENT_TSHI	25
10960Sstevel@tonic-gate #define	TSTAT_TLBENT_TSLO	27
10970Sstevel@tonic-gate #define	TSTAT_TLBENT_BA		28
10980Sstevel@tonic-gate #else /* sun4v */
10990Sstevel@tonic-gate #define	TSTAT_TLBENT_STATHI	0
11000Sstevel@tonic-gate #define	TSTAT_TLBENT_STATLO_LD	1
11010Sstevel@tonic-gate #define	TSTAT_TLBENT_STATLO_ST	3
11020Sstevel@tonic-gate #define	TSTAT_TLBENT_TAGTARGET	19
11030Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCHI	21
11040Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCLO_USER	22
11050Sstevel@tonic-gate #define	TSTAT_TLBENT_TPCLO_KERN	24
11060Sstevel@tonic-gate #define	TSTAT_TLBENT_TSHI	28
11070Sstevel@tonic-gate #define	TSTAT_TLBENT_TSLO	30
11080Sstevel@tonic-gate #define	TSTAT_TLBENT_BA		31
11090Sstevel@tonic-gate #endif /* sun4v */
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate static void
11120Sstevel@tonic-gate trapstat_tlbent(tstat_percpu_t *tcpu, int entno)
11130Sstevel@tonic-gate {
11140Sstevel@tonic-gate 	uint32_t *ent;
11150Sstevel@tonic-gate 	uintptr_t orig, va, baoffs;
11161050Sgirish #ifndef sun4v
11170Sstevel@tonic-gate 	int itlb = entno == TSTAT_ENT_ITLBMISS;
11181050Sgirish #else
11191050Sgirish 	int itlb = (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_ITLBMISS);
11201050Sgirish #endif
11210Sstevel@tonic-gate 	int entoffs = entno << TSTAT_ENT_SHIFT;
11220Sstevel@tonic-gate 	uintptr_t tmptick, stat, tpc, utpc;
11230Sstevel@tonic-gate 	tstat_pgszdata_t *data = &tcpu->tcpu_data->tdata_pgsz[0];
11240Sstevel@tonic-gate 	tstat_tlbdata_t *udata, *kdata;
11250Sstevel@tonic-gate 	tstat_tlbret_t *ret;
11260Sstevel@tonic-gate #ifndef sun4v
11270Sstevel@tonic-gate 	uint32_t asi = itlb ? ASI(ASI_IMMU) : ASI(ASI_DMMU);
11280Sstevel@tonic-gate #else
11290Sstevel@tonic-gate 	uint32_t tagtarget_off = itlb ? MMFSA_I_CTX : MMFSA_D_CTX;
11300Sstevel@tonic-gate #endif
11310Sstevel@tonic-gate 
11320Sstevel@tonic-gate 	/*
11330Sstevel@tonic-gate 	 * When trapstat is run with TLB statistics, this is the entry for
11340Sstevel@tonic-gate 	 * both I- and D-TLB misses; this code performs trap level pushing,
11350Sstevel@tonic-gate 	 * as described in the "TLB Statistics" section of the block comment.
11360Sstevel@tonic-gate 	 * This code is executing at TL 1; %tstate[0] contains the saved
11370Sstevel@tonic-gate 	 * state at the time of the TLB miss.  Pushing trap level 1 (and thus
11380Sstevel@tonic-gate 	 * raising TL to 2) requires us to fill in %tstate[1] with our %pstate,
11390Sstevel@tonic-gate 	 * %cwp and %asi.  We leave %tt unchanged, and we set %tpc and %tnpc to
11400Sstevel@tonic-gate 	 * the appropriate TLB return entry (based on the context of the miss).
11410Sstevel@tonic-gate 	 * Finally, we sample %tick, and stash it in the tdata_tmptick member
11420Sstevel@tonic-gate 	 * the per-CPU tstat_data structure.  tdata_tmptick will be used in
11430Sstevel@tonic-gate 	 * the TLB return entry to determine the amount of time spent in the
11440Sstevel@tonic-gate 	 * TLB miss handler.
11450Sstevel@tonic-gate 	 *
1146158Sgirish 	 * Note that on sun4v platforms, we must obtain the context information
1147158Sgirish 	 * from the MMU fault status area. (The base address of this MMU fault
1148158Sgirish 	 * status area is kept in the scratchpad register 0.)
11490Sstevel@tonic-gate 	 */
11500Sstevel@tonic-gate 	static const uint32_t tlbent[] = {
11510Sstevel@tonic-gate #ifndef sun4v
11520Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(stat), %g1		*/
11530Sstevel@tonic-gate 	    0xc4586000,			/* ldx   [%g1 + %lo(stat)], %g2	*/
11540Sstevel@tonic-gate 	    0x8400a001,			/* add   %g2, 1, %g2		*/
11550Sstevel@tonic-gate 	    0xc4706000,			/* stx   %g2, [%g1 + %lo(stat)]	*/
11560Sstevel@tonic-gate 	    0x85524000,			/* rdpr  %cwp, %g2		*/
11570Sstevel@tonic-gate 	    0x87518000,			/* rdpr  %pstate, %g3		*/
11580Sstevel@tonic-gate 	    0x8728f008,			/* sllx  %g3, 8, %g3		*/
11590Sstevel@tonic-gate 	    0x84108003,			/* or    %g2, %g3, %g2		*/
11600Sstevel@tonic-gate 	    0x8740c000,			/* rd    %asi, %g3		*/
11610Sstevel@tonic-gate 	    0x8728f018,			/* sllx  %g3, 24, %g3		*/
11620Sstevel@tonic-gate 	    0x84108003,			/* or    %g2, %g3, %g2		*/
11630Sstevel@tonic-gate 	    0x8350c000,			/* rdpr  %tt, %g1		*/
11640Sstevel@tonic-gate 	    0x8f902002,			/* wrpr  %g0, 2, %tl		*/
11650Sstevel@tonic-gate 	    0x85908000,			/* wrpr  %g2, %g0, %tstate	*/
11660Sstevel@tonic-gate 	    0x87904000,			/* wrpr  %g1, %g0, %tt		*/
11670Sstevel@tonic-gate 	    0xc2d80000,			/* ldxa  [%g0]ASI_MMU, %g1	*/
11680Sstevel@tonic-gate 	    0x83307030,			/* srlx  %g1, CTXSHIFT, %g1	*/
11690Sstevel@tonic-gate 	    0x02c04004,			/* brz,pn %g1, .+0x10		*/
11700Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(new_tpc), %g1	*/
11710Sstevel@tonic-gate 	    0x82106000,			/* or    %g1, %lo(new_tpc), %g1	*/
11720Sstevel@tonic-gate 	    0x30800002,			/* ba,a  .+0x8			*/
11730Sstevel@tonic-gate 	    0x82106000,			/* or    %g1, %lo(new_tpc), %g1	*/
11740Sstevel@tonic-gate 	    0x81904000,			/* wrpr  %g1, %g0, %tpc		*/
11750Sstevel@tonic-gate 	    0x82006004,			/* add   %g1, 4, %g1		*/
11760Sstevel@tonic-gate 	    0x83904000,			/* wrpr  %g1, %g0, %tnpc	*/
11770Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(tmptick), %g1	*/
11780Sstevel@tonic-gate 	    0x85410000,			/* rd    %tick, %g2		*/
11790Sstevel@tonic-gate 	    0xc4706000,			/* stx   %g2, [%g1 + %lo(tmptick)] */
11800Sstevel@tonic-gate 	    0x30800000,			/* ba,a  addr			*/
11810Sstevel@tonic-gate 	    NOP, NOP, NOP
11820Sstevel@tonic-gate #else /* sun4v */
11830Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(stat), %g1		*/
11840Sstevel@tonic-gate 	    0xc4586000,			/* ldx   [%g1 + %lo(stat)], %g2	*/
11850Sstevel@tonic-gate 	    0x8400a001,			/* add   %g2, 1, %g2		*/
11860Sstevel@tonic-gate 	    0xc4706000,			/* stx   %g2, [%g1 + %lo(stat)]	*/
11870Sstevel@tonic-gate 	    0x85524000,			/* rdpr  %cwp, %g2		*/
11880Sstevel@tonic-gate 	    0x87518000,			/* rdpr  %pstate, %g3		*/
11890Sstevel@tonic-gate 	    0x8728f008,			/* sllx  %g3, 8, %g3		*/
11900Sstevel@tonic-gate 	    0x84108003,			/* or    %g2, %g3, %g2		*/
11910Sstevel@tonic-gate 	    0x8740c000,			/* rd    %asi, %g3		*/
1192158Sgirish 	    0x8728f018,			/* sllx  %g3, 24, %g3		*/
1193158Sgirish 	    0x83540000,			/* rdpr  %gl, %g1		*/
1194158Sgirish 	    0x83287028,			/* sllx  %g1, 40, %g1		*/
11950Sstevel@tonic-gate 	    0x86104003,			/* or    %g1, %g3, %g3		*/
11960Sstevel@tonic-gate 	    0x84108003,			/* or    %g2, %g3, %g2		*/
11970Sstevel@tonic-gate 	    0x8350c000,			/* rdpr  %tt, %g1		*/
11980Sstevel@tonic-gate 	    0x8f902002,			/* wrpr  %g0, 2, %tl		*/
11990Sstevel@tonic-gate 	    0x85908000,			/* wrpr  %g2, %g0, %tstate	*/
12000Sstevel@tonic-gate 	    0x87904000,			/* wrpr  %g1, %g0, %tt		*/
12010Sstevel@tonic-gate 	    0xc2d80400,			/* ldxa  [%g0]ASI_SCRATCHPAD, %g1 */
12020Sstevel@tonic-gate 	    0xc2586000,			/* ldx  [%g1 + MMFSA_?_CTX], %g1 */
12030Sstevel@tonic-gate 	    0x02c04004,			/* brz,pn %g1, .+0x10		*/
12040Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(new_tpc), %g1	*/
12050Sstevel@tonic-gate 	    0x82106000,			/* or    %g1, %lo(new_tpc), %g1	*/
12060Sstevel@tonic-gate 	    0x30800002,			/* ba,a  .+0x8			*/
12070Sstevel@tonic-gate 	    0x82106000,			/* or    %g1, %lo(new_tpc), %g1	*/
12080Sstevel@tonic-gate 	    0x81904000,			/* wrpr  %g1, %g0, %tpc		*/
12090Sstevel@tonic-gate 	    0x82006004,			/* add   %g1, 4, %g1		*/
12100Sstevel@tonic-gate 	    0x83904000,			/* wrpr  %g1, %g0, %tnpc	*/
12110Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(tmptick), %g1	*/
12120Sstevel@tonic-gate 	    0x85410000,			/* rd    %tick, %g2		*/
12130Sstevel@tonic-gate 	    0xc4706000,			/* stx   %g2, [%g1 + %lo(tmptick)] */
12140Sstevel@tonic-gate 	    0x30800000			/* ba,a  addr			*/
12150Sstevel@tonic-gate #endif /* sun4v */
12160Sstevel@tonic-gate 	};
12170Sstevel@tonic-gate 
12180Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
12191050Sgirish #ifndef sun4v
12200Sstevel@tonic-gate 	ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS);
12211050Sgirish #else
12221050Sgirish 	ASSERT(entno == TSTAT_ENT_ITLBMISS || entno == TSTAT_ENT_DTLBMISS ||
12231050Sgirish 	    entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS);
12241050Sgirish #endif
12250Sstevel@tonic-gate 
12260Sstevel@tonic-gate 	stat = TSTAT_DATA_OFFS(tcpu, tdata_traps) + entoffs;
12270Sstevel@tonic-gate 	tmptick = TSTAT_DATA_OFFS(tcpu, tdata_tmptick);
12280Sstevel@tonic-gate 
12290Sstevel@tonic-gate 	if (itlb) {
12300Sstevel@tonic-gate 		ret = &tcpu->tcpu_instr->tinst_itlbret;
12310Sstevel@tonic-gate 		udata = &data->tpgsz_user.tmode_itlb;
12320Sstevel@tonic-gate 		kdata = &data->tpgsz_kernel.tmode_itlb;
12330Sstevel@tonic-gate 		tpc = TSTAT_INSTR_OFFS(tcpu, tinst_itlbret.ttlbr_ktlb);
12340Sstevel@tonic-gate 	} else {
12350Sstevel@tonic-gate 		ret = &tcpu->tcpu_instr->tinst_dtlbret;
12360Sstevel@tonic-gate 		udata = &data->tpgsz_user.tmode_dtlb;
12370Sstevel@tonic-gate 		kdata = &data->tpgsz_kernel.tmode_dtlb;
12380Sstevel@tonic-gate 		tpc = TSTAT_INSTR_OFFS(tcpu, tinst_dtlbret.ttlbr_ktlb);
12390Sstevel@tonic-gate 	}
12400Sstevel@tonic-gate 
12410Sstevel@tonic-gate 	utpc = tpc + offsetof(tstat_tlbret_t, ttlbr_utlb) -
12420Sstevel@tonic-gate 	    offsetof(tstat_tlbret_t, ttlbr_ktlb);
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	ASSERT(HI22(tpc) == HI22(utpc));
12450Sstevel@tonic-gate 
12460Sstevel@tonic-gate 	ent = (uint32_t *)((uintptr_t)tcpu->tcpu_instr + entoffs);
12470Sstevel@tonic-gate 	orig = KERNELBASE + entoffs;
12480Sstevel@tonic-gate 	va = (uintptr_t)tcpu->tcpu_ibase + entoffs;
12490Sstevel@tonic-gate 	baoffs = TSTAT_TLBENT_BA * sizeof (uint32_t);
12500Sstevel@tonic-gate 
12511050Sgirish #ifdef sun4v
12521050Sgirish 	if (entno == TSTAT_ENT_IMMUMISS || entno == TSTAT_ENT_DMMUMISS) {
12531050Sgirish 		/*
12541050Sgirish 		 * Because of lack of space, interposing tlbent trap
12551050Sgirish 		 * handler for IMMU_miss and DMMU_miss traps cannot be
12561050Sgirish 		 * placed in-line. Instead, we copy it to the space set
12571050Sgirish 		 * aside for these traps in per CPU trapstat area and
12581050Sgirish 		 * invoke it by placing a branch in the trap table itself.
12591050Sgirish 		 */
12601050Sgirish 		static const uint32_t mmumiss[TSTAT_ENT_NINSTR] = {
12611050Sgirish 		    0x30800000,			/* ba,a addr */
12621050Sgirish 		    NOP, NOP, NOP, NOP, NOP, NOP, NOP
12631050Sgirish 		};
12641050Sgirish 		uint32_t *tent = ent;		/* trap vector entry */
12651050Sgirish 		uintptr_t tentva = va;		/* trap vector entry va */
12661050Sgirish 
12671050Sgirish 		if (itlb) {
12681050Sgirish 			ent = (uint32_t *)((uintptr_t)
12691050Sgirish 				&tcpu->tcpu_instr->tinst_immumiss);
12701050Sgirish 			va = TSTAT_INSTR_OFFS(tcpu, tinst_immumiss);
12711050Sgirish 		} else {
12721050Sgirish 			ent = (uint32_t *)((uintptr_t)
12731050Sgirish 				&tcpu->tcpu_instr->tinst_dmmumiss);
12741050Sgirish 			va = TSTAT_INSTR_OFFS(tcpu, tinst_dmmumiss);
12751050Sgirish 		}
12761050Sgirish 		bcopy(mmumiss, tent, sizeof (mmumiss));
12771050Sgirish 		tent[0] |= DISP22(tentva, va);
12781050Sgirish 	}
12791050Sgirish #endif /* sun4v */
12801050Sgirish 
12810Sstevel@tonic-gate 	bcopy(tlbent, ent, sizeof (tlbent));
12820Sstevel@tonic-gate 
12830Sstevel@tonic-gate 	ent[TSTAT_TLBENT_STATHI] |= HI22(stat);
12840Sstevel@tonic-gate 	ent[TSTAT_TLBENT_STATLO_LD] |= LO10(stat);
12850Sstevel@tonic-gate 	ent[TSTAT_TLBENT_STATLO_ST] |= LO10(stat);
12860Sstevel@tonic-gate #ifndef sun4v
12870Sstevel@tonic-gate 	ent[TSTAT_TLBENT_MMUASI] |= asi;
12880Sstevel@tonic-gate #else
12890Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TAGTARGET] |= tagtarget_off;
12900Sstevel@tonic-gate #endif
12910Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TPCHI] |= HI22(tpc);
12920Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TPCLO_USER] |= LO10(utpc);
12930Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TPCLO_KERN] |= LO10(tpc);
12940Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TSHI] |= HI22(tmptick);
12950Sstevel@tonic-gate 	ent[TSTAT_TLBENT_TSLO] |= LO10(tmptick);
12960Sstevel@tonic-gate 	ent[TSTAT_TLBENT_BA] |= DISP22(va + baoffs, orig);
12970Sstevel@tonic-gate 
12980Sstevel@tonic-gate 	/*
12990Sstevel@tonic-gate 	 * And now set up the TLB return entries.
13000Sstevel@tonic-gate 	 */
13010Sstevel@tonic-gate 	trapstat_tlbretent(tcpu, &ret->ttlbr_ktlb, &kdata->ttlb_tlb);
13020Sstevel@tonic-gate 	trapstat_tlbretent(tcpu, &ret->ttlbr_ktsb, &kdata->ttlb_tsb);
13030Sstevel@tonic-gate 	trapstat_tlbretent(tcpu, &ret->ttlbr_utlb, &udata->ttlb_tlb);
13040Sstevel@tonic-gate 	trapstat_tlbretent(tcpu, &ret->ttlbr_utsb, &udata->ttlb_tsb);
13050Sstevel@tonic-gate }
13060Sstevel@tonic-gate 
13070Sstevel@tonic-gate #undef TSTAT_TLBENT_STATHI
13080Sstevel@tonic-gate #undef TSTAT_TLBENT_STATLO_LD
13090Sstevel@tonic-gate #undef TSTAT_TLBENT_STATLO_ST
13100Sstevel@tonic-gate #ifndef sun4v
13110Sstevel@tonic-gate #undef TSTAT_TLBENT_MMUASI
13120Sstevel@tonic-gate #else
13130Sstevel@tonic-gate #undef TSTAT_TLBENT_TAGTARGET
13140Sstevel@tonic-gate #endif
13150Sstevel@tonic-gate #undef TSTAT_TLBENT_TPCHI
13160Sstevel@tonic-gate #undef TSTAT_TLBENT_TPCLO_USER
13170Sstevel@tonic-gate #undef TSTAT_TLBENT_TPCLO_KERN
13180Sstevel@tonic-gate #undef TSTAT_TLBENT_TSHI
13190Sstevel@tonic-gate #undef TSTAT_TLBENT_TSLO
13200Sstevel@tonic-gate #undef TSTAT_TLBENT_BA
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate /*
13230Sstevel@tonic-gate  * The TSTAT_ENABLED_* constants define offsets in the enabled entry; the
13240Sstevel@tonic-gate  * TSTAT_DISABLED_BA constant defines an offset in the disabled entry.  Both
13250Sstevel@tonic-gate  * sets of constants are used only in trapstat_make_traptab() (below) and
13260Sstevel@tonic-gate  * #undef'd immediately afterwards.  Any change to "enabled" or "disabled"
13270Sstevel@tonic-gate  * in trapstat_make_traptab() will likely require changes to these constants.
13280Sstevel@tonic-gate  */
13290Sstevel@tonic-gate #define	TSTAT_ENABLED_STATHI	0
13300Sstevel@tonic-gate #define	TSTAT_ENABLED_STATLO_LD	1
13310Sstevel@tonic-gate #define	TSTAT_ENABLED_STATLO_ST 3
13320Sstevel@tonic-gate #define	TSTAT_ENABLED_BA	4
13330Sstevel@tonic-gate #define	TSTAT_DISABLED_BA	0
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate static void
13360Sstevel@tonic-gate trapstat_make_traptab(tstat_percpu_t *tcpu)
13370Sstevel@tonic-gate {
13380Sstevel@tonic-gate 	uint32_t *ent;
13390Sstevel@tonic-gate 	uint64_t *stat;
13400Sstevel@tonic-gate 	uintptr_t orig, va, en_baoffs, dis_baoffs;
13410Sstevel@tonic-gate 	int nent;
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate 	/*
13440Sstevel@tonic-gate 	 * This is the entry in the interposing trap table for enabled trap
13450Sstevel@tonic-gate 	 * table entries.  It loads a counter, increments it and stores it
13460Sstevel@tonic-gate 	 * back before branching to the actual trap table entry.
13470Sstevel@tonic-gate 	 */
13480Sstevel@tonic-gate 	static const uint32_t enabled[TSTAT_ENT_NINSTR] = {
13490Sstevel@tonic-gate 	    0x03000000, 		/* sethi %hi(stat), %g1		*/
13500Sstevel@tonic-gate 	    0xc4586000,			/* ldx   [%g1 + %lo(stat)], %g2	*/
13510Sstevel@tonic-gate 	    0x8400a001,			/* add   %g2, 1, %g2		*/
13520Sstevel@tonic-gate 	    0xc4706000,			/* stx   %g2, [%g1 + %lo(stat)]	*/
13530Sstevel@tonic-gate 	    0x30800000,			/* ba,a addr			*/
13540Sstevel@tonic-gate 	    NOP, NOP, NOP
13550Sstevel@tonic-gate 	};
13560Sstevel@tonic-gate 
13570Sstevel@tonic-gate 	/*
13580Sstevel@tonic-gate 	 * This is the entry in the interposing trap table for disabled trap
13590Sstevel@tonic-gate 	 * table entries.  It simply branches to the actual, underlying trap
13600Sstevel@tonic-gate 	 * table entry.  As explained in the "Implementation Details" section
13610Sstevel@tonic-gate 	 * of the block comment, all TL>0 traps _must_ use the disabled entry;
13620Sstevel@tonic-gate 	 * additional entries may be explicitly disabled through the use
13630Sstevel@tonic-gate 	 * of TSTATIOC_ENTRY/TSTATIOC_NOENTRY.
13640Sstevel@tonic-gate 	 */
13650Sstevel@tonic-gate 	static const uint32_t disabled[TSTAT_ENT_NINSTR] = {
13660Sstevel@tonic-gate 	    0x30800000,			/* ba,a addr			*/
13670Sstevel@tonic-gate 	    NOP, NOP, NOP, NOP, NOP, NOP, NOP,
13680Sstevel@tonic-gate 	};
13690Sstevel@tonic-gate 
13700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
13710Sstevel@tonic-gate 
13720Sstevel@tonic-gate 	ent = tcpu->tcpu_instr->tinst_traptab;
13730Sstevel@tonic-gate 	stat = (uint64_t *)TSTAT_DATA_OFFS(tcpu, tdata_traps);
13740Sstevel@tonic-gate 	orig = KERNELBASE;
13750Sstevel@tonic-gate 	va = (uintptr_t)tcpu->tcpu_ibase;
13760Sstevel@tonic-gate 	en_baoffs = TSTAT_ENABLED_BA * sizeof (uint32_t);
13770Sstevel@tonic-gate 	dis_baoffs = TSTAT_DISABLED_BA * sizeof (uint32_t);
13780Sstevel@tonic-gate 
13790Sstevel@tonic-gate 	for (nent = 0; nent < TSTAT_TOTAL_NENT; nent++) {
13800Sstevel@tonic-gate 		if (tstat_enabled[nent]) {
13810Sstevel@tonic-gate 			bcopy(enabled, ent, sizeof (enabled));
1382567Sdmick 			ent[TSTAT_ENABLED_STATHI] |= HI22((uintptr_t)stat);
1383567Sdmick 			ent[TSTAT_ENABLED_STATLO_LD] |= LO10((uintptr_t)stat);
1384567Sdmick 			ent[TSTAT_ENABLED_STATLO_ST] |= LO10((uintptr_t)stat);
13850Sstevel@tonic-gate 			ent[TSTAT_ENABLED_BA] |= DISP22(va + en_baoffs, orig);
13860Sstevel@tonic-gate 		} else {
13870Sstevel@tonic-gate 			bcopy(disabled, ent, sizeof (disabled));
13880Sstevel@tonic-gate 			ent[TSTAT_DISABLED_BA] |= DISP22(va + dis_baoffs, orig);
13890Sstevel@tonic-gate 		}
13900Sstevel@tonic-gate 
13910Sstevel@tonic-gate 		stat++;
13920Sstevel@tonic-gate 		orig += sizeof (enabled);
13930Sstevel@tonic-gate 		ent += sizeof (enabled) / sizeof (*ent);
13940Sstevel@tonic-gate 		va += sizeof (enabled);
13950Sstevel@tonic-gate 	}
13960Sstevel@tonic-gate }
13970Sstevel@tonic-gate 
13980Sstevel@tonic-gate #undef TSTAT_ENABLED_STATHI
13990Sstevel@tonic-gate #undef TSTAT_ENABLED_STATLO_LD
14000Sstevel@tonic-gate #undef TSTAT_ENABLED_STATLO_ST
14010Sstevel@tonic-gate #undef TSTAT_ENABLED_BA
14020Sstevel@tonic-gate #undef TSTAT_DISABLED_BA
14030Sstevel@tonic-gate 
14041772Sjl139090 #ifndef sun4v
14051772Sjl139090 /*
14061772Sjl139090  * See Section A.6 in SPARC v9 Manual.
14071772Sjl139090  * max branch = 4*((2^21)-1) = 8388604
14081772Sjl139090  */
14091772Sjl139090 #define	MAX_BICC_BRANCH_DISPLACEMENT (4 * ((1 << 21) - 1))
14101772Sjl139090 #endif
14111772Sjl139090 
14120Sstevel@tonic-gate static void
14130Sstevel@tonic-gate trapstat_setup(processorid_t cpu)
14140Sstevel@tonic-gate {
14150Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[cpu];
14160Sstevel@tonic-gate #ifndef sun4v
14170Sstevel@tonic-gate 	int i;
14180Sstevel@tonic-gate 	caddr_t va;
14190Sstevel@tonic-gate 	pfn_t *pfn;
14201772Sjl139090 	cpu_t *cp;
14211772Sjl139090 	uint_t strand_idx;
14221772Sjl139090 	size_t tstat_offset;
14230Sstevel@tonic-gate #endif
14240Sstevel@tonic-gate 
14250Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_pfn == NULL);
14260Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_instr == NULL);
14270Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_data == NULL);
14280Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
14290Sstevel@tonic-gate 	ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED));
14300Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
14310Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 	/*
14340Sstevel@tonic-gate 	 * The lower fifteen bits of the %tba are always read as zero; we must
14350Sstevel@tonic-gate 	 * align our instruction base address appropriately.
14360Sstevel@tonic-gate 	 */
14370Sstevel@tonic-gate #ifndef sun4v
14381772Sjl139090 	tstat_offset = tstat_total_size;
14391772Sjl139090 
14401772Sjl139090 	cp = cpu_get(cpu);
14411772Sjl139090 	ASSERT(cp != NULL);
14423434Sesaxe 	if ((strand_idx = cpu ^ pg_plat_hw_instance_id(cp, PGHW_IPIPE)) != 0) {
14431772Sjl139090 		/*
14441772Sjl139090 		 * On sun4u platforms with multiple CPUs sharing the MMU
14451772Sjl139090 		 * (Olympus-C has 2 strands per core), each CPU uses a
14461772Sjl139090 		 * disjoint trap table.  The indexing is based on the
14471772Sjl139090 		 * strand id, which is obtained by XOR'ing the cpuid with
14481772Sjl139090 		 * the coreid.
14491772Sjl139090 		 */
14501772Sjl139090 		tstat_offset += tstat_total_size * strand_idx;
14511772Sjl139090 
14521772Sjl139090 		/*
14531772Sjl139090 		 * Offset must be less than the maximum PC-relative branch
14541772Sjl139090 		 * displacement for Bicc variants.  See the Implementation
14551772Sjl139090 		 * Details comment.
14561772Sjl139090 		 */
14571772Sjl139090 		ASSERT(tstat_offset <= MAX_BICC_BRANCH_DISPLACEMENT);
14581772Sjl139090 	}
14591772Sjl139090 
14601772Sjl139090 	tcpu->tcpu_ibase = (caddr_t)((KERNELBASE - tstat_offset)
14610Sstevel@tonic-gate 		& TSTAT_TBA_MASK);
14620Sstevel@tonic-gate 	tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE;
14630Sstevel@tonic-gate 	tcpu->tcpu_vabase = tcpu->tcpu_ibase;
14640Sstevel@tonic-gate 
14650Sstevel@tonic-gate 	tcpu->tcpu_pfn = vmem_alloc(tstat_arena, tstat_total_pages, VM_SLEEP);
14660Sstevel@tonic-gate 	bzero(tcpu->tcpu_pfn, tstat_total_pages);
14670Sstevel@tonic-gate 	pfn = tcpu->tcpu_pfn;
14680Sstevel@tonic-gate 
14690Sstevel@tonic-gate 	tcpu->tcpu_instr = vmem_alloc(tstat_arena, TSTAT_INSTR_SIZE, VM_SLEEP);
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	va = (caddr_t)tcpu->tcpu_instr;
14720Sstevel@tonic-gate 	for (i = 0; i < TSTAT_INSTR_PAGES; i++, va += MMU_PAGESIZE)
14730Sstevel@tonic-gate 		*pfn++ = hat_getpfnum(kas.a_hat, va);
14740Sstevel@tonic-gate 
14750Sstevel@tonic-gate 	/*
14760Sstevel@tonic-gate 	 * We must be sure that the pages that we will use to examine the data
14770Sstevel@tonic-gate 	 * have the same virtual color as the pages to which the data is being
14780Sstevel@tonic-gate 	 * recorded, hence the alignment and phase constraints on the
14790Sstevel@tonic-gate 	 * allocation.
14800Sstevel@tonic-gate 	 */
14810Sstevel@tonic-gate 	tcpu->tcpu_data = vmem_xalloc(tstat_arena, tstat_data_size,
14820Sstevel@tonic-gate 	    shm_alignment, (uintptr_t)tcpu->tcpu_dbase & (shm_alignment - 1),
14830Sstevel@tonic-gate 	    0, 0, NULL, VM_SLEEP);
14840Sstevel@tonic-gate 	bzero(tcpu->tcpu_data, tstat_data_size);
14850Sstevel@tonic-gate 	tcpu->tcpu_data->tdata_cpuid = cpu;
14860Sstevel@tonic-gate 
14870Sstevel@tonic-gate 	va = (caddr_t)tcpu->tcpu_data;
14880Sstevel@tonic-gate 	for (i = 0; i < tstat_data_pages; i++, va += MMU_PAGESIZE)
14890Sstevel@tonic-gate 		*pfn++ = hat_getpfnum(kas.a_hat, va);
14900Sstevel@tonic-gate #else /* sun4v */
14910Sstevel@tonic-gate 	ASSERT(!(tstat_total_size > (1 + ~TSTAT_TBA_MASK)));
14920Sstevel@tonic-gate 	tcpu->tcpu_vabase = (caddr_t)(KERNELBASE - MMU_PAGESIZE4M);
14930Sstevel@tonic-gate 	tcpu->tcpu_ibase = tcpu->tcpu_vabase + (cpu * (1 + ~TSTAT_TBA_MASK));
14940Sstevel@tonic-gate 	tcpu->tcpu_dbase = tcpu->tcpu_ibase + TSTAT_INSTR_SIZE;
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	tcpu->tcpu_pfn = &tstat_pfn;
14970Sstevel@tonic-gate 	tcpu->tcpu_instr = (tstat_instr_t *)(tstat_va + (cpu *
14980Sstevel@tonic-gate 		(1 + ~TSTAT_TBA_MASK)));
14990Sstevel@tonic-gate 	tcpu->tcpu_data = (tstat_data_t *)(tstat_va + (cpu *
15000Sstevel@tonic-gate 		(1 + ~TSTAT_TBA_MASK)) + TSTAT_INSTR_SIZE);
15010Sstevel@tonic-gate 	bzero(tcpu->tcpu_data, tstat_data_size);
15020Sstevel@tonic-gate 	tcpu->tcpu_data->tdata_cpuid = cpu;
15030Sstevel@tonic-gate #endif /* sun4v */
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	/*
15060Sstevel@tonic-gate 	 * Now that we have all of the instruction and data pages allocated,
15070Sstevel@tonic-gate 	 * make the trap table from scratch.
15080Sstevel@tonic-gate 	 */
15090Sstevel@tonic-gate 	trapstat_make_traptab(tcpu);
15100Sstevel@tonic-gate 
15110Sstevel@tonic-gate 	if (tstat_options & TSTAT_OPT_TLBDATA) {
15120Sstevel@tonic-gate 		/*
15130Sstevel@tonic-gate 		 * TLB Statistics have been specified; set up the I- and D-TLB
15140Sstevel@tonic-gate 		 * entries and corresponding TLB return entries.
15150Sstevel@tonic-gate 		 */
15161050Sgirish #ifndef sun4v
15170Sstevel@tonic-gate 		trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS);
15180Sstevel@tonic-gate 		trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS);
15191050Sgirish #else
15201050Sgirish 		if (tstat_fast_tlbstat) {
15211050Sgirish 			trapstat_tlbent(tcpu, TSTAT_ENT_IMMUMISS);
15221050Sgirish 			trapstat_tlbent(tcpu, TSTAT_ENT_DMMUMISS);
15231050Sgirish 		} else {
15241050Sgirish 			trapstat_tlbent(tcpu, TSTAT_ENT_ITLBMISS);
15251050Sgirish 			trapstat_tlbent(tcpu, TSTAT_ENT_DTLBMISS);
15261050Sgirish 		}
15271050Sgirish #endif
15280Sstevel@tonic-gate 	}
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 	tcpu->tcpu_flags |= TSTAT_CPU_ALLOCATED;
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate 	/*
15330Sstevel@tonic-gate 	 * Finally, get the target CPU to load the locked pages into its TLBs.
15340Sstevel@tonic-gate 	 */
15350Sstevel@tonic-gate 	xc_one(cpu, (xcfunc_t *)trapstat_load_tlb, 0, 0);
15360Sstevel@tonic-gate }
15370Sstevel@tonic-gate 
15380Sstevel@tonic-gate static void
15390Sstevel@tonic-gate trapstat_teardown(processorid_t cpu)
15400Sstevel@tonic-gate {
15410Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[cpu];
15420Sstevel@tonic-gate #ifndef sun4v
15430Sstevel@tonic-gate 	int i;
15440Sstevel@tonic-gate #endif
15450Sstevel@tonic-gate 	caddr_t va = tcpu->tcpu_vabase;
15460Sstevel@tonic-gate 
15470Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_pfn != NULL);
15480Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_instr != NULL);
15490Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_data != NULL);
15500Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
15510Sstevel@tonic-gate 	ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
15520Sstevel@tonic-gate 	ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
15530Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
15540Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&tstat_lock));
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate #ifndef sun4v
15570Sstevel@tonic-gate 	vmem_free(tstat_arena, tcpu->tcpu_pfn, tstat_total_pages);
15580Sstevel@tonic-gate 	vmem_free(tstat_arena, tcpu->tcpu_instr, TSTAT_INSTR_SIZE);
15590Sstevel@tonic-gate 	vmem_free(tstat_arena, tcpu->tcpu_data, tstat_data_size);
15600Sstevel@tonic-gate 
15610Sstevel@tonic-gate 	for (i = 0; i < tstat_total_pages; i++, va += MMU_PAGESIZE) {
15622241Shuah 		xt_one(cpu, vtag_flushpage_tl1, (uint64_t)va,
15632241Shuah 		    (uint64_t)ksfmmup);
15640Sstevel@tonic-gate 	}
15650Sstevel@tonic-gate #else
15660Sstevel@tonic-gate 	xt_one(cpu, vtag_unmap_perm_tl1, (uint64_t)va, KCONTEXT);
15670Sstevel@tonic-gate #endif
15680Sstevel@tonic-gate 
15690Sstevel@tonic-gate 	tcpu->tcpu_pfn = NULL;
15700Sstevel@tonic-gate 	tcpu->tcpu_instr = NULL;
15710Sstevel@tonic-gate 	tcpu->tcpu_data = NULL;
15720Sstevel@tonic-gate 	tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED;
15730Sstevel@tonic-gate }
15740Sstevel@tonic-gate 
15750Sstevel@tonic-gate static int
15760Sstevel@tonic-gate trapstat_go()
15770Sstevel@tonic-gate {
15780Sstevel@tonic-gate 	cpu_t *cp;
15790Sstevel@tonic-gate 
15800Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
15810Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
15820Sstevel@tonic-gate 
15830Sstevel@tonic-gate 	if (tstat_running) {
15840Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
15850Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
15860Sstevel@tonic-gate 		return (EBUSY);
15870Sstevel@tonic-gate 	}
15880Sstevel@tonic-gate 
15890Sstevel@tonic-gate #ifdef sun4v
15900Sstevel@tonic-gate 	/*
15911050Sgirish 	 * Allocate large page to hold interposing tables.
15920Sstevel@tonic-gate 	 */
15930Sstevel@tonic-gate 	tstat_va = contig_mem_alloc(MMU_PAGESIZE4M);
15940Sstevel@tonic-gate 	tstat_pfn = va_to_pfn(tstat_va);
1595*4204Sha137994 	if (tstat_pfn == PFN_INVALID) {
1596*4204Sha137994 		mutex_exit(&tstat_lock);
1597*4204Sha137994 		mutex_exit(&cpu_lock);
15980Sstevel@tonic-gate 		return (EAGAIN);
1599*4204Sha137994 	}
16001050Sgirish 
16011050Sgirish 	/*
16021050Sgirish 	 * For detailed TLB statistics, invoke CPU specific interface
16031050Sgirish 	 * to see if it supports a low overhead interface to collect
16041050Sgirish 	 * TSB hit statistics. If so, make set tstat_fast_tlbstat flag
16051050Sgirish 	 * to reflect that.
16061050Sgirish 	 */
16071050Sgirish 	if (tstat_options & TSTAT_OPT_TLBDATA) {
16081050Sgirish 		int error;
16091050Sgirish 
16101050Sgirish 		error = cpu_trapstat_conf(CPU_TSTATCONF_INIT);
16111050Sgirish 		if (error == 0)
16121050Sgirish 			tstat_fast_tlbstat = B_TRUE;
16131050Sgirish 		else if (error != ENOTSUP) {
16141050Sgirish 			contig_mem_free(tstat_va, MMU_PAGESIZE4M);
1615*4204Sha137994 			mutex_exit(&tstat_lock);
1616*4204Sha137994 			mutex_exit(&cpu_lock);
16171050Sgirish 			return (error);
16181050Sgirish 		}
16190Sstevel@tonic-gate 	}
16200Sstevel@tonic-gate #endif
16210Sstevel@tonic-gate 
16220Sstevel@tonic-gate 	/*
16230Sstevel@tonic-gate 	 * First, perform any necessary hot patching.
16240Sstevel@tonic-gate 	 */
16250Sstevel@tonic-gate 	trapstat_hotpatch();
16260Sstevel@tonic-gate 
16270Sstevel@tonic-gate 	/*
16280Sstevel@tonic-gate 	 * Allocate the resources we'll need to measure probe effect.
16290Sstevel@tonic-gate 	 */
16300Sstevel@tonic-gate 	trapstat_probe_alloc();
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 
16330Sstevel@tonic-gate 	cp = cpu_list;
16340Sstevel@tonic-gate 	do {
16350Sstevel@tonic-gate 		if (!(tstat_percpu[cp->cpu_id].tcpu_flags & TSTAT_CPU_SELECTED))
16360Sstevel@tonic-gate 			continue;
16370Sstevel@tonic-gate 
16380Sstevel@tonic-gate 		trapstat_setup(cp->cpu_id);
16390Sstevel@tonic-gate 
16400Sstevel@tonic-gate 		/*
16410Sstevel@tonic-gate 		 * Note that due to trapstat_probe()'s use of global data,
16420Sstevel@tonic-gate 		 * we determine the probe effect on each CPU serially instead
16430Sstevel@tonic-gate 		 * of in parallel with an xc_all().
16440Sstevel@tonic-gate 		 */
16450Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)trapstat_probe, 0, 0);
16460Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
16470Sstevel@tonic-gate 
16480Sstevel@tonic-gate 	xc_all((xcfunc_t *)trapstat_enable, 0, 0);
16490Sstevel@tonic-gate 
16500Sstevel@tonic-gate 	trapstat_probe_free();
16510Sstevel@tonic-gate 	tstat_running = 1;
16520Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
16530Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 	return (0);
16560Sstevel@tonic-gate }
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate static int
16590Sstevel@tonic-gate trapstat_stop()
16600Sstevel@tonic-gate {
16610Sstevel@tonic-gate 	int i;
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
16640Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
16650Sstevel@tonic-gate 	if (!tstat_running) {
16660Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
16670Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
16680Sstevel@tonic-gate 		return (ENXIO);
16690Sstevel@tonic-gate 	}
16700Sstevel@tonic-gate 
16710Sstevel@tonic-gate 	xc_all((xcfunc_t *)trapstat_disable, 0, 0);
16720Sstevel@tonic-gate 
16730Sstevel@tonic-gate 	for (i = 0; i <= max_cpuid; i++) {
16740Sstevel@tonic-gate 		if (tstat_percpu[i].tcpu_flags & TSTAT_CPU_ALLOCATED)
16750Sstevel@tonic-gate 			trapstat_teardown(i);
16760Sstevel@tonic-gate 	}
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate #ifdef sun4v
16791050Sgirish 	if (tstat_options & TSTAT_OPT_TLBDATA)
16801050Sgirish 		cpu_trapstat_conf(CPU_TSTATCONF_FINI);
16810Sstevel@tonic-gate 	contig_mem_free(tstat_va, MMU_PAGESIZE4M);
16820Sstevel@tonic-gate #endif
16830Sstevel@tonic-gate 	trapstat_hotpatch();
16840Sstevel@tonic-gate 	tstat_running = 0;
16850Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
16860Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
16870Sstevel@tonic-gate 
16880Sstevel@tonic-gate 	return (0);
16890Sstevel@tonic-gate }
16900Sstevel@tonic-gate 
16910Sstevel@tonic-gate /*
16920Sstevel@tonic-gate  * This is trapstat's DR CPU configuration callback.  It's called (with
16930Sstevel@tonic-gate  * cpu_lock held) to unconfigure a newly powered-off CPU, or to configure a
16940Sstevel@tonic-gate  * powered-off CPU that is to be brought into the system.  We need only take
16950Sstevel@tonic-gate  * action in the unconfigure case:  because a powered-off CPU will have its
16960Sstevel@tonic-gate  * trap table restored to KERNELBASE if it is ever powered back on, we must
16970Sstevel@tonic-gate  * update the flags to reflect that trapstat is no longer enabled on the
16980Sstevel@tonic-gate  * powered-off CPU.  Note that this means that a TSTAT_CPU_ENABLED CPU that
16990Sstevel@tonic-gate  * is unconfigured/powered off and later powered back on/reconfigured will
17000Sstevel@tonic-gate  * _not_ be re-TSTAT_CPU_ENABLED.
17010Sstevel@tonic-gate  */
17020Sstevel@tonic-gate static int
17030Sstevel@tonic-gate trapstat_cpu_setup(cpu_setup_t what, processorid_t cpu)
17040Sstevel@tonic-gate {
17050Sstevel@tonic-gate 	tstat_percpu_t *tcpu = &tstat_percpu[cpu];
17060Sstevel@tonic-gate 
17070Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
17080Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
17090Sstevel@tonic-gate 
17100Sstevel@tonic-gate 	if (!tstat_running) {
17110Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
17120Sstevel@tonic-gate 		return (0);
17130Sstevel@tonic-gate 	}
17140Sstevel@tonic-gate 
17150Sstevel@tonic-gate 	switch (what) {
17160Sstevel@tonic-gate 	case CPU_CONFIG:
17170Sstevel@tonic-gate 		ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
17180Sstevel@tonic-gate 		break;
17190Sstevel@tonic-gate 
17200Sstevel@tonic-gate 	case CPU_UNCONFIG:
17211991Sheppo 		if (tcpu->tcpu_flags & TSTAT_CPU_ENABLED) {
17220Sstevel@tonic-gate 			tcpu->tcpu_flags &= ~TSTAT_CPU_ENABLED;
17231991Sheppo #ifdef	sun4v
17241991Sheppo 			/*
17251991Sheppo 			 * A power-off, causes the cpu mondo queues to be
17261991Sheppo 			 * unconfigured on sun4v. Since we can't teardown
17271991Sheppo 			 * trapstat's mappings on the cpu that is going away,
17281991Sheppo 			 * we simply mark it as not allocated. This will
17291991Sheppo 			 * prevent a teardown on a cpu with the same cpu id
17301991Sheppo 			 * that might have been added while trapstat is running.
17311991Sheppo 			 */
17321991Sheppo 			if (tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED) {
17331991Sheppo 				tcpu->tcpu_pfn = NULL;
17341991Sheppo 				tcpu->tcpu_instr = NULL;
17351991Sheppo 				tcpu->tcpu_data = NULL;
17361991Sheppo 				tcpu->tcpu_flags &= ~TSTAT_CPU_ALLOCATED;
17371991Sheppo 			}
17381991Sheppo #endif
17391991Sheppo 		}
17400Sstevel@tonic-gate 		break;
17410Sstevel@tonic-gate 
17420Sstevel@tonic-gate 	default:
17430Sstevel@tonic-gate 		break;
17440Sstevel@tonic-gate 	}
17450Sstevel@tonic-gate 
17460Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
17470Sstevel@tonic-gate 	return (0);
17480Sstevel@tonic-gate }
17490Sstevel@tonic-gate 
17500Sstevel@tonic-gate /*
17510Sstevel@tonic-gate  * This is called before a CPR suspend and after a CPR resume.  We don't have
17520Sstevel@tonic-gate  * anything to do before a suspend, but after a restart we must restore the
17530Sstevel@tonic-gate  * trap table to be our interposing trap table.  However, we don't actually
17540Sstevel@tonic-gate  * know whether or not the CPUs have been powered off -- this routine may be
17550Sstevel@tonic-gate  * called while restoring from a failed CPR suspend.  We thus run through each
17560Sstevel@tonic-gate  * TSTAT_CPU_ENABLED CPU, and explicitly destroy and reestablish its
17570Sstevel@tonic-gate  * interposing trap table.  This assures that our state is correct regardless
17580Sstevel@tonic-gate  * of whether or not the CPU has been newly powered on.
17590Sstevel@tonic-gate  */
17600Sstevel@tonic-gate /*ARGSUSED*/
17610Sstevel@tonic-gate static boolean_t
17620Sstevel@tonic-gate trapstat_cpr(void *arg, int code)
17630Sstevel@tonic-gate {
17640Sstevel@tonic-gate 	cpu_t *cp;
17650Sstevel@tonic-gate 
17660Sstevel@tonic-gate 	if (code == CB_CODE_CPR_CHKPT)
17670Sstevel@tonic-gate 		return (B_TRUE);
17680Sstevel@tonic-gate 
17690Sstevel@tonic-gate 	ASSERT(code == CB_CODE_CPR_RESUME);
17700Sstevel@tonic-gate 
17710Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
17720Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
17730Sstevel@tonic-gate 
17740Sstevel@tonic-gate 	if (!tstat_running) {
17750Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
17760Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
17770Sstevel@tonic-gate 		return (B_TRUE);
17780Sstevel@tonic-gate 	}
17790Sstevel@tonic-gate 
17800Sstevel@tonic-gate 	cp = cpu_list;
17810Sstevel@tonic-gate 	do {
17820Sstevel@tonic-gate 		tstat_percpu_t *tcpu = &tstat_percpu[cp->cpu_id];
17830Sstevel@tonic-gate 
17840Sstevel@tonic-gate 		if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED))
17850Sstevel@tonic-gate 			continue;
17860Sstevel@tonic-gate 
17870Sstevel@tonic-gate 		ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
17880Sstevel@tonic-gate 		ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
17890Sstevel@tonic-gate 
17900Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)trapstat_disable, 0, 0);
17910Sstevel@tonic-gate 		ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
17920Sstevel@tonic-gate 
17930Sstevel@tonic-gate 		/*
17940Sstevel@tonic-gate 		 * Preserve this CPU's data in tstat_buffer and rip down its
17950Sstevel@tonic-gate 		 * interposing trap table.
17960Sstevel@tonic-gate 		 */
17970Sstevel@tonic-gate 		bcopy(tcpu->tcpu_data, tstat_buffer, tstat_data_t_size);
17980Sstevel@tonic-gate 		trapstat_teardown(cp->cpu_id);
17990Sstevel@tonic-gate 		ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED));
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 		/*
18020Sstevel@tonic-gate 		 * Reestablish the interposing trap table and restore the old
18030Sstevel@tonic-gate 		 * data.
18040Sstevel@tonic-gate 		 */
18050Sstevel@tonic-gate 		trapstat_setup(cp->cpu_id);
18060Sstevel@tonic-gate 		ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
18070Sstevel@tonic-gate 		bcopy(tstat_buffer, tcpu->tcpu_data, tstat_data_t_size);
18080Sstevel@tonic-gate 
18090Sstevel@tonic-gate 		xc_one(cp->cpu_id, (xcfunc_t *)trapstat_enable, 0, 0);
18100Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
18110Sstevel@tonic-gate 
18120Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
18130Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
18140Sstevel@tonic-gate 
18150Sstevel@tonic-gate 	return (B_TRUE);
18160Sstevel@tonic-gate }
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate /*ARGSUSED*/
18190Sstevel@tonic-gate static int
18200Sstevel@tonic-gate trapstat_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
18210Sstevel@tonic-gate {
18220Sstevel@tonic-gate 	int i;
18230Sstevel@tonic-gate 
18240Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
18250Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
18260Sstevel@tonic-gate 	if (tstat_open != 0) {
18270Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
18280Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
18290Sstevel@tonic-gate 		return (EBUSY);
18300Sstevel@tonic-gate 	}
18310Sstevel@tonic-gate 
18320Sstevel@tonic-gate 	/*
18330Sstevel@tonic-gate 	 * Register this in open() rather than in attach() to prevent deadlock
18340Sstevel@tonic-gate 	 * with DR code. During attach, I/O device tree locks are grabbed
18350Sstevel@tonic-gate 	 * before trapstat_attach() is invoked - registering in attach
18360Sstevel@tonic-gate 	 * will result in the lock order: device tree lock, cpu_lock.
18370Sstevel@tonic-gate 	 * DR code however requires that cpu_lock be acquired before
18380Sstevel@tonic-gate 	 * device tree locks.
18390Sstevel@tonic-gate 	 */
18400Sstevel@tonic-gate 	ASSERT(!tstat_running);
18410Sstevel@tonic-gate 	register_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL);
18420Sstevel@tonic-gate 
18430Sstevel@tonic-gate 	/*
18440Sstevel@tonic-gate 	 * Clear all options.  And until specific CPUs are specified, we'll
18450Sstevel@tonic-gate 	 * mark all CPUs as selected.
18460Sstevel@tonic-gate 	 */
18470Sstevel@tonic-gate 	tstat_options = 0;
18480Sstevel@tonic-gate 
18490Sstevel@tonic-gate 	for (i = 0; i <= max_cpuid; i++)
18500Sstevel@tonic-gate 		tstat_percpu[i].tcpu_flags |= TSTAT_CPU_SELECTED;
18510Sstevel@tonic-gate 
18520Sstevel@tonic-gate 	/*
18530Sstevel@tonic-gate 	 * By default, all traps at TL=0 are enabled.  Traps at TL>0 must
18540Sstevel@tonic-gate 	 * be disabled.
18550Sstevel@tonic-gate 	 */
18560Sstevel@tonic-gate 	for (i = 0; i < TSTAT_TOTAL_NENT; i++)
18570Sstevel@tonic-gate 		tstat_enabled[i] = i < TSTAT_NENT ? 1 : 0;
18580Sstevel@tonic-gate 
18590Sstevel@tonic-gate 	tstat_open = 1;
18600Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
18610Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
18620Sstevel@tonic-gate 
18630Sstevel@tonic-gate 	return (0);
18640Sstevel@tonic-gate }
18650Sstevel@tonic-gate 
18660Sstevel@tonic-gate /*ARGSUSED*/
18670Sstevel@tonic-gate static int
18680Sstevel@tonic-gate trapstat_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
18690Sstevel@tonic-gate {
18700Sstevel@tonic-gate 	(void) trapstat_stop();
18710Sstevel@tonic-gate 
18720Sstevel@tonic-gate 	ASSERT(!tstat_running);
18730Sstevel@tonic-gate 
18740Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
18750Sstevel@tonic-gate 	unregister_cpu_setup_func((cpu_setup_func_t *)trapstat_cpu_setup, NULL);
18760Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
18770Sstevel@tonic-gate 
18780Sstevel@tonic-gate 	tstat_open = 0;
18790Sstevel@tonic-gate 	return (DDI_SUCCESS);
18800Sstevel@tonic-gate }
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate static int
18830Sstevel@tonic-gate trapstat_option(int option)
18840Sstevel@tonic-gate {
18850Sstevel@tonic-gate 	mutex_enter(&tstat_lock);
18860Sstevel@tonic-gate 
18870Sstevel@tonic-gate 	if (tstat_running) {
18880Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
18890Sstevel@tonic-gate 		return (EBUSY);
18900Sstevel@tonic-gate 	}
18910Sstevel@tonic-gate 
18920Sstevel@tonic-gate 	tstat_options |= option;
18930Sstevel@tonic-gate 	mutex_exit(&tstat_lock);
18940Sstevel@tonic-gate 
18950Sstevel@tonic-gate 	return (0);
18960Sstevel@tonic-gate }
18970Sstevel@tonic-gate 
18980Sstevel@tonic-gate /*ARGSUSED*/
18990Sstevel@tonic-gate static int
19000Sstevel@tonic-gate trapstat_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *crd, int *rval)
19010Sstevel@tonic-gate {
19020Sstevel@tonic-gate 	int i, j, out;
19030Sstevel@tonic-gate 	size_t dsize;
19040Sstevel@tonic-gate 
19050Sstevel@tonic-gate 	switch (cmd) {
19060Sstevel@tonic-gate 	case TSTATIOC_GO:
19070Sstevel@tonic-gate 		return (trapstat_go());
19080Sstevel@tonic-gate 
19090Sstevel@tonic-gate 	case TSTATIOC_NOGO:
19100Sstevel@tonic-gate 		return (trapstat_option(TSTAT_OPT_NOGO));
19110Sstevel@tonic-gate 
19120Sstevel@tonic-gate 	case TSTATIOC_STOP:
19130Sstevel@tonic-gate 		return (trapstat_stop());
19140Sstevel@tonic-gate 
19150Sstevel@tonic-gate 	case TSTATIOC_CPU:
19160Sstevel@tonic-gate 		if (arg < 0 || arg > max_cpuid)
19170Sstevel@tonic-gate 			return (EINVAL);
19180Sstevel@tonic-gate 		/*FALLTHROUGH*/
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 	case TSTATIOC_NOCPU:
19210Sstevel@tonic-gate 		mutex_enter(&tstat_lock);
19220Sstevel@tonic-gate 
19230Sstevel@tonic-gate 		if (tstat_running) {
19240Sstevel@tonic-gate 			mutex_exit(&tstat_lock);
19250Sstevel@tonic-gate 			return (EBUSY);
19260Sstevel@tonic-gate 		}
19270Sstevel@tonic-gate 
19280Sstevel@tonic-gate 		/*
19290Sstevel@tonic-gate 		 * If this is the first CPU to be specified (or if we are
19300Sstevel@tonic-gate 		 * being asked to explicitly de-select CPUs), disable all CPUs.
19310Sstevel@tonic-gate 		 */
19320Sstevel@tonic-gate 		if (!(tstat_options & TSTAT_OPT_CPU) || cmd == TSTATIOC_NOCPU) {
19330Sstevel@tonic-gate 			tstat_options |= TSTAT_OPT_CPU;
19340Sstevel@tonic-gate 
19350Sstevel@tonic-gate 			for (i = 0; i <= max_cpuid; i++) {
19360Sstevel@tonic-gate 				tstat_percpu_t *tcpu = &tstat_percpu[i];
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate 				ASSERT(cmd == TSTATIOC_NOCPU ||
19390Sstevel@tonic-gate 				    (tcpu->tcpu_flags & TSTAT_CPU_SELECTED));
19400Sstevel@tonic-gate 				tcpu->tcpu_flags &= ~TSTAT_CPU_SELECTED;
19410Sstevel@tonic-gate 			}
19420Sstevel@tonic-gate 		}
19430Sstevel@tonic-gate 
19440Sstevel@tonic-gate 		if (cmd == TSTATIOC_CPU)
19450Sstevel@tonic-gate 			tstat_percpu[arg].tcpu_flags |= TSTAT_CPU_SELECTED;
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
19480Sstevel@tonic-gate 
19490Sstevel@tonic-gate 		return (0);
19500Sstevel@tonic-gate 
19510Sstevel@tonic-gate 	case TSTATIOC_ENTRY:
19520Sstevel@tonic-gate 		mutex_enter(&tstat_lock);
19530Sstevel@tonic-gate 
19540Sstevel@tonic-gate 		if (tstat_running) {
19550Sstevel@tonic-gate 			mutex_exit(&tstat_lock);
19560Sstevel@tonic-gate 			return (EBUSY);
19570Sstevel@tonic-gate 		}
19580Sstevel@tonic-gate 
19590Sstevel@tonic-gate 		if (arg >= TSTAT_NENT || arg < 0) {
19600Sstevel@tonic-gate 			mutex_exit(&tstat_lock);
19610Sstevel@tonic-gate 			return (EINVAL);
19620Sstevel@tonic-gate 		}
19630Sstevel@tonic-gate 
19640Sstevel@tonic-gate 		if (!(tstat_options & TSTAT_OPT_ENTRY)) {
19650Sstevel@tonic-gate 			/*
19660Sstevel@tonic-gate 			 * If this is the first entry that we are explicitly
19670Sstevel@tonic-gate 			 * enabling, explicitly disable every TL=0 entry.
19680Sstevel@tonic-gate 			 */
19690Sstevel@tonic-gate 			for (i = 0; i < TSTAT_NENT; i++)
19700Sstevel@tonic-gate 				tstat_enabled[i] = 0;
19710Sstevel@tonic-gate 
19720Sstevel@tonic-gate 			tstat_options |= TSTAT_OPT_ENTRY;
19730Sstevel@tonic-gate 		}
19740Sstevel@tonic-gate 
19750Sstevel@tonic-gate 		tstat_enabled[arg] = 1;
19760Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
19770Sstevel@tonic-gate 		return (0);
19780Sstevel@tonic-gate 
19790Sstevel@tonic-gate 	case TSTATIOC_NOENTRY:
19800Sstevel@tonic-gate 		mutex_enter(&tstat_lock);
19810Sstevel@tonic-gate 
19820Sstevel@tonic-gate 		if (tstat_running) {
19830Sstevel@tonic-gate 			mutex_exit(&tstat_lock);
19840Sstevel@tonic-gate 			return (EBUSY);
19850Sstevel@tonic-gate 		}
19860Sstevel@tonic-gate 
19870Sstevel@tonic-gate 		for (i = 0; i < TSTAT_NENT; i++)
19880Sstevel@tonic-gate 			tstat_enabled[i] = 0;
19890Sstevel@tonic-gate 
19900Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
19910Sstevel@tonic-gate 		return (0);
19920Sstevel@tonic-gate 
19930Sstevel@tonic-gate 	case TSTATIOC_READ:
19940Sstevel@tonic-gate 		mutex_enter(&tstat_lock);
19950Sstevel@tonic-gate 
19960Sstevel@tonic-gate 		if (tstat_options & TSTAT_OPT_TLBDATA) {
19970Sstevel@tonic-gate 			dsize = tstat_data_t_exported_size;
19980Sstevel@tonic-gate 		} else {
19990Sstevel@tonic-gate 			dsize = sizeof (tstat_data_t);
20000Sstevel@tonic-gate 		}
20010Sstevel@tonic-gate 
20020Sstevel@tonic-gate 		for (i = 0, out = 0; i <= max_cpuid; i++) {
20030Sstevel@tonic-gate 			tstat_percpu_t *tcpu = &tstat_percpu[i];
20040Sstevel@tonic-gate 
20050Sstevel@tonic-gate 			if (!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED))
20060Sstevel@tonic-gate 				continue;
20070Sstevel@tonic-gate 
20080Sstevel@tonic-gate 			ASSERT(tcpu->tcpu_flags & TSTAT_CPU_SELECTED);
20090Sstevel@tonic-gate 			ASSERT(tcpu->tcpu_flags & TSTAT_CPU_ALLOCATED);
20100Sstevel@tonic-gate 
20110Sstevel@tonic-gate 			tstat_buffer->tdata_cpuid = -1;
20120Sstevel@tonic-gate 			xc_one(i, (xcfunc_t *)trapstat_snapshot, 0, 0);
20130Sstevel@tonic-gate 
20140Sstevel@tonic-gate 			if (tstat_buffer->tdata_cpuid == -1) {
20150Sstevel@tonic-gate 				/*
20160Sstevel@tonic-gate 				 * This CPU is not currently responding to
20170Sstevel@tonic-gate 				 * cross calls; we have caught it while it is
20180Sstevel@tonic-gate 				 * being unconfigured.  We'll drop tstat_lock
20190Sstevel@tonic-gate 				 * and pick up and drop cpu_lock.  By the
20200Sstevel@tonic-gate 				 * time we acquire cpu_lock, the DR operation
20210Sstevel@tonic-gate 				 * will appear consistent and we can assert
20220Sstevel@tonic-gate 				 * that trapstat_cpu_setup() has cleared
20230Sstevel@tonic-gate 				 * TSTAT_CPU_ENABLED.
20240Sstevel@tonic-gate 				 */
20250Sstevel@tonic-gate 				mutex_exit(&tstat_lock);
20260Sstevel@tonic-gate 				mutex_enter(&cpu_lock);
20270Sstevel@tonic-gate 				mutex_exit(&cpu_lock);
20280Sstevel@tonic-gate 				mutex_enter(&tstat_lock);
20290Sstevel@tonic-gate 				ASSERT(!(tcpu->tcpu_flags & TSTAT_CPU_ENABLED));
20300Sstevel@tonic-gate 				continue;
20310Sstevel@tonic-gate 			}
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate 			/*
20340Sstevel@tonic-gate 			 * Need to compensate for the difference between page
20350Sstevel@tonic-gate 			 * sizes exported to users and page sizes available
20360Sstevel@tonic-gate 			 * within the kernel.
20370Sstevel@tonic-gate 			 */
20380Sstevel@tonic-gate 			if ((tstat_options & TSTAT_OPT_TLBDATA) &&
20390Sstevel@tonic-gate 			    (tstat_pgszs != tstat_user_pgszs)) {
20400Sstevel@tonic-gate 				tstat_pgszdata_t *tp;
20410Sstevel@tonic-gate 				uint_t szc;
20420Sstevel@tonic-gate 
20430Sstevel@tonic-gate 				tp = &tstat_buffer->tdata_pgsz[0];
20440Sstevel@tonic-gate 				for (j = 0; j < tstat_user_pgszs; j++) {
20450Sstevel@tonic-gate 					if ((szc = USERSZC_2_SZC(j)) != j) {
20460Sstevel@tonic-gate 						bcopy(&tp[szc], &tp[j],
20470Sstevel@tonic-gate 						    sizeof (tstat_pgszdata_t));
20480Sstevel@tonic-gate 					}
20490Sstevel@tonic-gate 				}
20500Sstevel@tonic-gate 			}
20510Sstevel@tonic-gate 
20520Sstevel@tonic-gate 			if (copyout(tstat_buffer, (void *)arg, dsize) != 0) {
20530Sstevel@tonic-gate 				mutex_exit(&tstat_lock);
20540Sstevel@tonic-gate 				return (EFAULT);
20550Sstevel@tonic-gate 			}
20560Sstevel@tonic-gate 
20570Sstevel@tonic-gate 			out++;
20580Sstevel@tonic-gate 			arg += dsize;
20590Sstevel@tonic-gate 		}
20600Sstevel@tonic-gate 
20610Sstevel@tonic-gate 		if (out != max_cpuid + 1) {
20620Sstevel@tonic-gate 			processorid_t cpuid = -1;
20630Sstevel@tonic-gate 			arg += offsetof(tstat_data_t, tdata_cpuid);
20640Sstevel@tonic-gate 
20650Sstevel@tonic-gate 			if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) {
20660Sstevel@tonic-gate 				mutex_exit(&tstat_lock);
20670Sstevel@tonic-gate 				return (EFAULT);
20680Sstevel@tonic-gate 			}
20690Sstevel@tonic-gate 		}
20700Sstevel@tonic-gate 
20710Sstevel@tonic-gate 		mutex_exit(&tstat_lock);
20720Sstevel@tonic-gate 
20730Sstevel@tonic-gate 		return (0);
20740Sstevel@tonic-gate 
20750Sstevel@tonic-gate 	case TSTATIOC_TLBDATA:
20760Sstevel@tonic-gate 		return (trapstat_option(TSTAT_OPT_TLBDATA));
20770Sstevel@tonic-gate 
20780Sstevel@tonic-gate 	default:
20790Sstevel@tonic-gate 		break;
20800Sstevel@tonic-gate 	}
20810Sstevel@tonic-gate 
20820Sstevel@tonic-gate 	return (ENOTTY);
20830Sstevel@tonic-gate }
20840Sstevel@tonic-gate 
20850Sstevel@tonic-gate /*ARGSUSED*/
20860Sstevel@tonic-gate static int
20870Sstevel@tonic-gate trapstat_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
20880Sstevel@tonic-gate {
20890Sstevel@tonic-gate 	int error;
20900Sstevel@tonic-gate 
20910Sstevel@tonic-gate 	switch (infocmd) {
20920Sstevel@tonic-gate 	case DDI_INFO_DEVT2DEVINFO:
20930Sstevel@tonic-gate 		*result = (void *)tstat_devi;
20940Sstevel@tonic-gate 		error = DDI_SUCCESS;
20950Sstevel@tonic-gate 		break;
20960Sstevel@tonic-gate 	case DDI_INFO_DEVT2INSTANCE:
20970Sstevel@tonic-gate 		*result = (void *)0;
20980Sstevel@tonic-gate 		error = DDI_SUCCESS;
20990Sstevel@tonic-gate 		break;
21000Sstevel@tonic-gate 	default:
21010Sstevel@tonic-gate 		error = DDI_FAILURE;
21020Sstevel@tonic-gate 	}
21030Sstevel@tonic-gate 	return (error);
21040Sstevel@tonic-gate }
21050Sstevel@tonic-gate 
21060Sstevel@tonic-gate static int
21070Sstevel@tonic-gate trapstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
21080Sstevel@tonic-gate {
21090Sstevel@tonic-gate 	switch (cmd) {
21100Sstevel@tonic-gate 	case DDI_ATTACH:
21110Sstevel@tonic-gate 		break;
21120Sstevel@tonic-gate 
21130Sstevel@tonic-gate 	case DDI_RESUME:
21140Sstevel@tonic-gate 		return (DDI_SUCCESS);
21150Sstevel@tonic-gate 
21160Sstevel@tonic-gate 	default:
21170Sstevel@tonic-gate 		return (DDI_FAILURE);
21180Sstevel@tonic-gate 	}
21190Sstevel@tonic-gate 
21200Sstevel@tonic-gate 	if (ddi_create_minor_node(devi, "trapstat", S_IFCHR,
21210Sstevel@tonic-gate 	    0, DDI_PSEUDO, 0) == DDI_FAILURE) {
21220Sstevel@tonic-gate 		ddi_remove_minor_node(devi, NULL);
21230Sstevel@tonic-gate 		return (DDI_FAILURE);
21240Sstevel@tonic-gate 	}
21250Sstevel@tonic-gate 
21260Sstevel@tonic-gate 	ddi_report_dev(devi);
21270Sstevel@tonic-gate 	tstat_devi = devi;
21280Sstevel@tonic-gate 
21290Sstevel@tonic-gate 	tstat_pgszs = page_num_pagesizes();
21300Sstevel@tonic-gate 	tstat_user_pgszs = page_num_user_pagesizes();
21310Sstevel@tonic-gate 	tstat_data_t_size = sizeof (tstat_data_t) +
21320Sstevel@tonic-gate 	    (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t);
21330Sstevel@tonic-gate 	tstat_data_t_exported_size = sizeof (tstat_data_t) +
21340Sstevel@tonic-gate 	    (tstat_user_pgszs - 1) * sizeof (tstat_pgszdata_t);
21350Sstevel@tonic-gate #ifndef sun4v
21360Sstevel@tonic-gate 	tstat_data_pages = (tstat_data_t_size >> MMU_PAGESHIFT) + 1;
21370Sstevel@tonic-gate 	tstat_total_pages = TSTAT_INSTR_PAGES + tstat_data_pages;
21380Sstevel@tonic-gate 	tstat_data_size = tstat_data_pages * MMU_PAGESIZE;
21390Sstevel@tonic-gate 	tstat_total_size = TSTAT_INSTR_SIZE + tstat_data_size;
21400Sstevel@tonic-gate #else
21410Sstevel@tonic-gate 	tstat_data_pages = 0;
21420Sstevel@tonic-gate 	tstat_data_size = tstat_data_t_size;
21430Sstevel@tonic-gate 	tstat_total_pages = ((TSTAT_INSTR_SIZE + tstat_data_size) >>
21440Sstevel@tonic-gate 		MMU_PAGESHIFT) + 1;
21450Sstevel@tonic-gate 	tstat_total_size = tstat_total_pages * MMU_PAGESIZE;
21460Sstevel@tonic-gate #endif
21470Sstevel@tonic-gate 
21480Sstevel@tonic-gate 	tstat_percpu = kmem_zalloc((max_cpuid + 1) *
21490Sstevel@tonic-gate 	    sizeof (tstat_percpu_t), KM_SLEEP);
21500Sstevel@tonic-gate 
21510Sstevel@tonic-gate 	/*
21520Sstevel@tonic-gate 	 * Create our own arena backed by segkmem to assure a source of
21530Sstevel@tonic-gate 	 * MMU_PAGESIZE-aligned allocations.  We allocate out of the
21540Sstevel@tonic-gate 	 * heap32_arena to assure that we can address the allocated memory with
21550Sstevel@tonic-gate 	 * a single sethi/simm13 pair in the interposing trap table entries.
21560Sstevel@tonic-gate 	 */
21570Sstevel@tonic-gate 	tstat_arena = vmem_create("trapstat", NULL, 0, MMU_PAGESIZE,
21580Sstevel@tonic-gate 	    segkmem_alloc_permanent, segkmem_free, heap32_arena, 0, VM_SLEEP);
21590Sstevel@tonic-gate 
21600Sstevel@tonic-gate 	tstat_enabled = kmem_alloc(TSTAT_TOTAL_NENT * sizeof (int), KM_SLEEP);
21610Sstevel@tonic-gate 	tstat_buffer = kmem_alloc(tstat_data_t_size, KM_SLEEP);
21620Sstevel@tonic-gate 
21630Sstevel@tonic-gate 	/*
21640Sstevel@tonic-gate 	 * CB_CL_CPR_POST_USER is the class that executes from cpr_resume()
21650Sstevel@tonic-gate 	 * after user threads can be restarted.  By executing in this class,
21660Sstevel@tonic-gate 	 * we are assured of the availability of system services needed to
21670Sstevel@tonic-gate 	 * resume trapstat (specifically, we are assured that all CPUs are
21680Sstevel@tonic-gate 	 * restarted and responding to cross calls).
21690Sstevel@tonic-gate 	 */
21700Sstevel@tonic-gate 	tstat_cprcb =
21710Sstevel@tonic-gate 	    callb_add(trapstat_cpr, NULL, CB_CL_CPR_POST_USER, "trapstat");
21720Sstevel@tonic-gate 
21730Sstevel@tonic-gate 	return (DDI_SUCCESS);
21740Sstevel@tonic-gate }
21750Sstevel@tonic-gate 
21760Sstevel@tonic-gate static int
21770Sstevel@tonic-gate trapstat_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
21780Sstevel@tonic-gate {
21790Sstevel@tonic-gate 	int rval;
21800Sstevel@tonic-gate 
21810Sstevel@tonic-gate 	ASSERT(devi == tstat_devi);
21820Sstevel@tonic-gate 
21830Sstevel@tonic-gate 	switch (cmd) {
21840Sstevel@tonic-gate 	case DDI_DETACH:
21850Sstevel@tonic-gate 		break;
21860Sstevel@tonic-gate 
21870Sstevel@tonic-gate 	case DDI_SUSPEND:
21880Sstevel@tonic-gate 		return (DDI_SUCCESS);
21890Sstevel@tonic-gate 
21900Sstevel@tonic-gate 	default:
21910Sstevel@tonic-gate 		return (DDI_FAILURE);
21920Sstevel@tonic-gate 	}
21930Sstevel@tonic-gate 
21940Sstevel@tonic-gate 	ASSERT(!tstat_running);
21950Sstevel@tonic-gate 
21960Sstevel@tonic-gate 	rval = callb_delete(tstat_cprcb);
21970Sstevel@tonic-gate 	ASSERT(rval == 0);
21980Sstevel@tonic-gate 
21990Sstevel@tonic-gate 	kmem_free(tstat_buffer, tstat_data_t_size);
22000Sstevel@tonic-gate 	kmem_free(tstat_enabled, TSTAT_TOTAL_NENT * sizeof (int));
22010Sstevel@tonic-gate 	vmem_destroy(tstat_arena);
22020Sstevel@tonic-gate 	kmem_free(tstat_percpu, (max_cpuid + 1) * sizeof (tstat_percpu_t));
22030Sstevel@tonic-gate 	ddi_remove_minor_node(devi, NULL);
22040Sstevel@tonic-gate 
22050Sstevel@tonic-gate 	return (DDI_SUCCESS);
22060Sstevel@tonic-gate }
22070Sstevel@tonic-gate 
22080Sstevel@tonic-gate /*
22090Sstevel@tonic-gate  * Configuration data structures
22100Sstevel@tonic-gate  */
22110Sstevel@tonic-gate static struct cb_ops trapstat_cb_ops = {
22120Sstevel@tonic-gate 	trapstat_open,		/* open */
22130Sstevel@tonic-gate 	trapstat_close,		/* close */
22140Sstevel@tonic-gate 	nulldev,		/* strategy */
22150Sstevel@tonic-gate 	nulldev,		/* print */
22160Sstevel@tonic-gate 	nodev,			/* dump */
22170Sstevel@tonic-gate 	nodev,			/* read */
22180Sstevel@tonic-gate 	nodev,			/* write */
22190Sstevel@tonic-gate 	trapstat_ioctl,		/* ioctl */
22200Sstevel@tonic-gate 	nodev,			/* devmap */
22210Sstevel@tonic-gate 	nodev,			/* mmap */
22220Sstevel@tonic-gate 	nodev,			/* segmap */
22230Sstevel@tonic-gate 	nochpoll,		/* poll */
22240Sstevel@tonic-gate 	ddi_prop_op,		/* cb_prop_op */
22250Sstevel@tonic-gate 	0,			/* streamtab */
22260Sstevel@tonic-gate 	D_MP | D_NEW		/* Driver compatibility flag */
22270Sstevel@tonic-gate };
22280Sstevel@tonic-gate 
22290Sstevel@tonic-gate static struct dev_ops trapstat_ops = {
22300Sstevel@tonic-gate 	DEVO_REV,		/* devo_rev, */
22310Sstevel@tonic-gate 	0,			/* refcnt */
22320Sstevel@tonic-gate 	trapstat_info,		/* getinfo */
22330Sstevel@tonic-gate 	nulldev,		/* identify */
22340Sstevel@tonic-gate 	nulldev,		/* probe */
22350Sstevel@tonic-gate 	trapstat_attach,	/* attach */
22360Sstevel@tonic-gate 	trapstat_detach,	/* detach */
22370Sstevel@tonic-gate 	nulldev,		/* reset */
22380Sstevel@tonic-gate 	&trapstat_cb_ops,	/* cb_ops */
22390Sstevel@tonic-gate 	(struct bus_ops *)0,	/* bus_ops */
22400Sstevel@tonic-gate };
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate static struct modldrv modldrv = {
22430Sstevel@tonic-gate 	&mod_driverops,		/* Type of module.  This one is a driver */
22440Sstevel@tonic-gate 	"Trap Statistics",	/* name of module */
22450Sstevel@tonic-gate 	&trapstat_ops,		/* driver ops */
22460Sstevel@tonic-gate };
22470Sstevel@tonic-gate 
22480Sstevel@tonic-gate static struct modlinkage modlinkage = {
22490Sstevel@tonic-gate 	MODREV_1, (void *)&modldrv, NULL
22500Sstevel@tonic-gate };
22510Sstevel@tonic-gate 
22520Sstevel@tonic-gate int
22530Sstevel@tonic-gate _init(void)
22540Sstevel@tonic-gate {
22550Sstevel@tonic-gate 	return (mod_install(&modlinkage));
22560Sstevel@tonic-gate }
22570Sstevel@tonic-gate 
22580Sstevel@tonic-gate int
22590Sstevel@tonic-gate _fini(void)
22600Sstevel@tonic-gate {
22610Sstevel@tonic-gate 	return (mod_remove(&modlinkage));
22620Sstevel@tonic-gate }
22630Sstevel@tonic-gate 
22640Sstevel@tonic-gate int
22650Sstevel@tonic-gate _info(struct modinfo *modinfop)
22660Sstevel@tonic-gate {
22670Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
22680Sstevel@tonic-gate }
2269