1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/cpuvar.h>
27 #include <sys/cpu_event.h>
28 #include <sys/regset.h>
29 #include <sys/psw.h>
30 #include <sys/types.h>
31 #include <sys/thread.h>
32 #include <sys/systm.h>
33 #include <sys/segments.h>
34 #include <sys/pcb.h>
35 #include <sys/trap.h>
36 #include <sys/ftrace.h>
37 #include <sys/traptrace.h>
38 #include <sys/clock.h>
39 #include <sys/panic.h>
40 #include <sys/disp.h>
41 #include <vm/seg_kp.h>
42 #include <sys/stack.h>
43 #include <sys/sysmacros.h>
44 #include <sys/cmn_err.h>
45 #include <sys/kstat.h>
46 #include <sys/smp_impldefs.h>
47 #include <sys/pool_pset.h>
48 #include <sys/zone.h>
49 #include <sys/bitmap.h>
50 #include <sys/archsystm.h>
51 #include <sys/machsystm.h>
52 #include <sys/ontrap.h>
53 #include <sys/x86_archext.h>
54 #include <sys/promif.h>
55 #include <vm/hat_i86.h>
56 #if defined(__xpv)
57 #include <sys/hypervisor.h>
58 #endif
59
60
61 #if defined(__xpv) && defined(DEBUG)
62
63 /*
64 * This panic message is intended as an aid to interrupt debugging.
65 *
66 * The associated assertion tests the condition of enabling
67 * events when events are already enabled. The implication
68 * being that whatever code the programmer thought was
69 * protected by having events disabled until the second
70 * enable happened really wasn't protected at all ..
71 */
72
73 int stistipanic = 1; /* controls the debug panic check */
74 const char *stistimsg = "stisti";
75 ulong_t laststi[NCPU];
76
77 /*
78 * This variable tracks the last place events were disabled on each cpu
79 * it assists in debugging when asserts that interrupts are enabled trip.
80 */
81 ulong_t lastcli[NCPU];
82
83 #endif
84
85 void do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
86
87 void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *) = do_interrupt;
88 uintptr_t (*get_intr_handler)(int, short) = NULL;
89
90 /*
91 * Set cpu's base SPL level to the highest active interrupt level
92 */
93 void
set_base_spl(void)94 set_base_spl(void)
95 {
96 struct cpu *cpu = CPU;
97 uint16_t active = (uint16_t)cpu->cpu_intr_actv;
98
99 cpu->cpu_base_spl = active == 0 ? 0 : bsrw_insn(active);
100 }
101
102 /*
103 * Do all the work necessary to set up the cpu and thread structures
104 * to dispatch a high-level interrupt.
105 *
106 * Returns 0 if we're -not- already on the high-level interrupt stack,
107 * (and *must* switch to it), non-zero if we are already on that stack.
108 *
109 * Called with interrupts masked.
110 * The 'pil' is already set to the appropriate level for rp->r_trapno.
111 */
112 static int
hilevel_intr_prolog(struct cpu * cpu,uint_t pil,uint_t oldpil,struct regs * rp)113 hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp)
114 {
115 struct machcpu *mcpu = &cpu->cpu_m;
116 uint_t mask;
117 hrtime_t intrtime;
118 hrtime_t now = tsc_read();
119
120 ASSERT(pil > LOCK_LEVEL);
121
122 if (pil == CBE_HIGH_PIL) {
123 cpu->cpu_profile_pil = oldpil;
124 if (USERMODE(rp->r_cs)) {
125 cpu->cpu_profile_pc = 0;
126 cpu->cpu_profile_upc = rp->r_pc;
127 cpu->cpu_cpcprofile_pc = 0;
128 cpu->cpu_cpcprofile_upc = rp->r_pc;
129 } else {
130 cpu->cpu_profile_pc = rp->r_pc;
131 cpu->cpu_profile_upc = 0;
132 cpu->cpu_cpcprofile_pc = rp->r_pc;
133 cpu->cpu_cpcprofile_upc = 0;
134 }
135 }
136
137 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
138 if (mask != 0) {
139 int nestpil;
140
141 /*
142 * We have interrupted another high-level interrupt.
143 * Load starting timestamp, compute interval, update
144 * cumulative counter.
145 */
146 nestpil = bsrw_insn((uint16_t)mask);
147 ASSERT(nestpil < pil);
148 intrtime = now -
149 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
150 mcpu->intrstat[nestpil][0] += intrtime;
151 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
152 /*
153 * Another high-level interrupt is active below this one, so
154 * there is no need to check for an interrupt thread. That
155 * will be done by the lowest priority high-level interrupt
156 * active.
157 */
158 } else {
159 kthread_t *t = cpu->cpu_thread;
160
161 /*
162 * See if we are interrupting a low-level interrupt thread.
163 * If so, account for its time slice only if its time stamp
164 * is non-zero.
165 */
166 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
167 intrtime = now - t->t_intr_start;
168 mcpu->intrstat[t->t_pil][0] += intrtime;
169 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
170 t->t_intr_start = 0;
171 }
172 }
173
174 /*
175 * Store starting timestamp in CPU structure for this PIL.
176 */
177 mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
178
179 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
180
181 if (pil == 15) {
182 /*
183 * To support reentrant level 15 interrupts, we maintain a
184 * recursion count in the top half of cpu_intr_actv. Only
185 * when this count hits zero do we clear the PIL 15 bit from
186 * the lower half of cpu_intr_actv.
187 */
188 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
189 (*refcntp)++;
190 }
191
192 mask = cpu->cpu_intr_actv;
193
194 cpu->cpu_intr_actv |= (1 << pil);
195
196 return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
197 }
198
199 /*
200 * Does most of the work of returning from a high level interrupt.
201 *
202 * Returns 0 if there are no more high level interrupts (in which
203 * case we must switch back to the interrupted thread stack) or
204 * non-zero if there are more (in which case we should stay on it).
205 *
206 * Called with interrupts masked
207 */
208 static int
hilevel_intr_epilog(struct cpu * cpu,uint_t pil,uint_t oldpil,uint_t vecnum)209 hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum)
210 {
211 struct machcpu *mcpu = &cpu->cpu_m;
212 uint_t mask;
213 hrtime_t intrtime;
214 hrtime_t now = tsc_read();
215
216 ASSERT(mcpu->mcpu_pri == pil);
217
218 cpu->cpu_stats.sys.intr[pil - 1]++;
219
220 ASSERT(cpu->cpu_intr_actv & (1 << pil));
221
222 if (pil == 15) {
223 /*
224 * To support reentrant level 15 interrupts, we maintain a
225 * recursion count in the top half of cpu_intr_actv. Only
226 * when this count hits zero do we clear the PIL 15 bit from
227 * the lower half of cpu_intr_actv.
228 */
229 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
230
231 ASSERT(*refcntp > 0);
232
233 if (--(*refcntp) == 0)
234 cpu->cpu_intr_actv &= ~(1 << pil);
235 } else {
236 cpu->cpu_intr_actv &= ~(1 << pil);
237 }
238
239 ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
240
241 intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
242 mcpu->intrstat[pil][0] += intrtime;
243 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
244
245 /*
246 * Check for lower-pil nested high-level interrupt beneath
247 * current one. If so, place a starting timestamp in its
248 * pil_high_start entry.
249 */
250 mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
251 if (mask != 0) {
252 int nestpil;
253
254 /*
255 * find PIL of nested interrupt
256 */
257 nestpil = bsrw_insn((uint16_t)mask);
258 ASSERT(nestpil < pil);
259 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
260 /*
261 * (Another high-level interrupt is active below this one,
262 * so there is no need to check for an interrupt
263 * thread. That will be done by the lowest priority
264 * high-level interrupt active.)
265 */
266 } else {
267 /*
268 * Check to see if there is a low-level interrupt active.
269 * If so, place a starting timestamp in the thread
270 * structure.
271 */
272 kthread_t *t = cpu->cpu_thread;
273
274 if (t->t_flag & T_INTR_THREAD)
275 t->t_intr_start = now;
276 }
277
278 mcpu->mcpu_pri = oldpil;
279 (void) (*setlvlx)(oldpil, vecnum);
280
281 return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
282 }
283
284 /*
285 * Set up the cpu, thread and interrupt thread structures for
286 * executing an interrupt thread. The new stack pointer of the
287 * interrupt thread (which *must* be switched to) is returned.
288 */
289 static caddr_t
intr_thread_prolog(struct cpu * cpu,caddr_t stackptr,uint_t pil)290 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil)
291 {
292 struct machcpu *mcpu = &cpu->cpu_m;
293 kthread_t *t, *volatile it;
294 hrtime_t now = tsc_read();
295
296 ASSERT(pil > 0);
297 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
298 cpu->cpu_intr_actv |= (1 << pil);
299
300 /*
301 * Get set to run an interrupt thread.
302 * There should always be an interrupt thread, since we
303 * allocate one for each level on each CPU.
304 *
305 * t_intr_start could be zero due to cpu_intr_swtch_enter.
306 */
307 t = cpu->cpu_thread;
308 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
309 hrtime_t intrtime = now - t->t_intr_start;
310 mcpu->intrstat[t->t_pil][0] += intrtime;
311 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
312 t->t_intr_start = 0;
313 }
314
315 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
316
317 t->t_sp = (uintptr_t)stackptr; /* mark stack in curthread for resume */
318
319 /*
320 * unlink the interrupt thread off the cpu
321 *
322 * Note that the code in kcpc_overflow_intr -relies- on the
323 * ordering of events here - in particular that t->t_lwp of
324 * the interrupt thread is set to the pinned thread *before*
325 * curthread is changed.
326 */
327 it = cpu->cpu_intr_thread;
328 cpu->cpu_intr_thread = it->t_link;
329 it->t_intr = t;
330 it->t_lwp = t->t_lwp;
331
332 /*
333 * (threads on the interrupt thread free list could have state
334 * preset to TS_ONPROC, but it helps in debugging if
335 * they're TS_FREE.)
336 */
337 it->t_state = TS_ONPROC;
338
339 cpu->cpu_thread = it; /* new curthread on this cpu */
340 it->t_pil = (uchar_t)pil;
341 it->t_pri = intr_pri + (pri_t)pil;
342 it->t_intr_start = now;
343
344 return (it->t_stk);
345 }
346
347
348 #ifdef DEBUG
349 int intr_thread_cnt;
350 #endif
351
352 /*
353 * Called with interrupts disabled
354 */
355 static void
intr_thread_epilog(struct cpu * cpu,uint_t vec,uint_t oldpil)356 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil)
357 {
358 struct machcpu *mcpu = &cpu->cpu_m;
359 kthread_t *t;
360 kthread_t *it = cpu->cpu_thread; /* curthread */
361 uint_t pil, basespl;
362 hrtime_t intrtime;
363 hrtime_t now = tsc_read();
364
365 pil = it->t_pil;
366 cpu->cpu_stats.sys.intr[pil - 1]++;
367
368 ASSERT(it->t_intr_start != 0);
369 intrtime = now - it->t_intr_start;
370 mcpu->intrstat[pil][0] += intrtime;
371 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
372
373 ASSERT(cpu->cpu_intr_actv & (1 << pil));
374 cpu->cpu_intr_actv &= ~(1 << pil);
375
376 /*
377 * If there is still an interrupted thread underneath this one
378 * then the interrupt was never blocked and the return is
379 * fairly simple. Otherwise it isn't.
380 */
381 if ((t = it->t_intr) == NULL) {
382 /*
383 * The interrupted thread is no longer pinned underneath
384 * the interrupt thread. This means the interrupt must
385 * have blocked, and the interrupted thread has been
386 * unpinned, and has probably been running around the
387 * system for a while.
388 *
389 * Since there is no longer a thread under this one, put
390 * this interrupt thread back on the CPU's free list and
391 * resume the idle thread which will dispatch the next
392 * thread to run.
393 */
394 #ifdef DEBUG
395 intr_thread_cnt++;
396 #endif
397 cpu->cpu_stats.sys.intrblk++;
398 /*
399 * Set CPU's base SPL based on active interrupts bitmask
400 */
401 set_base_spl();
402 basespl = cpu->cpu_base_spl;
403 mcpu->mcpu_pri = basespl;
404 (*setlvlx)(basespl, vec);
405 (void) splhigh();
406 sti();
407 it->t_state = TS_FREE;
408 /*
409 * Return interrupt thread to pool
410 */
411 it->t_link = cpu->cpu_intr_thread;
412 cpu->cpu_intr_thread = it;
413 swtch();
414 panic("intr_thread_epilog: swtch returned");
415 /*NOTREACHED*/
416 }
417
418 /*
419 * Return interrupt thread to the pool
420 */
421 it->t_link = cpu->cpu_intr_thread;
422 cpu->cpu_intr_thread = it;
423 it->t_state = TS_FREE;
424
425 basespl = cpu->cpu_base_spl;
426 pil = MAX(oldpil, basespl);
427 mcpu->mcpu_pri = pil;
428 (*setlvlx)(pil, vec);
429 t->t_intr_start = now;
430 cpu->cpu_thread = t;
431 }
432
433 /*
434 * intr_get_time() is a resource for interrupt handlers to determine how
435 * much time has been spent handling the current interrupt. Such a function
436 * is needed because higher level interrupts can arrive during the
437 * processing of an interrupt. intr_get_time() only returns time spent in the
438 * current interrupt handler.
439 *
440 * The caller must be calling from an interrupt handler running at a pil
441 * below or at lock level. Timings are not provided for high-level
442 * interrupts.
443 *
444 * The first time intr_get_time() is called while handling an interrupt,
445 * it returns the time since the interrupt handler was invoked. Subsequent
446 * calls will return the time since the prior call to intr_get_time(). Time
447 * is returned as ticks. Use scalehrtimef() to convert ticks to nsec.
448 *
449 * Theory Of Intrstat[][]:
450 *
451 * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two
452 * uint64_ts per pil.
453 *
454 * intrstat[pil][0] is a cumulative count of the number of ticks spent
455 * handling all interrupts at the specified pil on this CPU. It is
456 * exported via kstats to the user.
457 *
458 * intrstat[pil][1] is always a count of ticks less than or equal to the
459 * value in [0]. The difference between [1] and [0] is the value returned
460 * by a call to intr_get_time(). At the start of interrupt processing,
461 * [0] and [1] will be equal (or nearly so). As the interrupt consumes
462 * time, [0] will increase, but [1] will remain the same. A call to
463 * intr_get_time() will return the difference, then update [1] to be the
464 * same as [0]. Future calls will return the time since the last call.
465 * Finally, when the interrupt completes, [1] is updated to the same as [0].
466 *
467 * Implementation:
468 *
469 * intr_get_time() works much like a higher level interrupt arriving. It
470 * "checkpoints" the timing information by incrementing intrstat[pil][0]
471 * to include elapsed running time, and by setting t_intr_start to rdtsc.
472 * It then sets the return value to intrstat[pil][0] - intrstat[pil][1],
473 * and updates intrstat[pil][1] to be the same as the new value of
474 * intrstat[pil][0].
475 *
476 * In the normal handling of interrupts, after an interrupt handler returns
477 * and the code in intr_thread() updates intrstat[pil][0], it then sets
478 * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1],
479 * the timings are reset, i.e. intr_get_time() will return [0] - [1] which
480 * is 0.
481 *
482 * Whenever interrupts arrive on a CPU which is handling a lower pil
483 * interrupt, they update the lower pil's [0] to show time spent in the
484 * handler that they've interrupted. This results in a growing discrepancy
485 * between [0] and [1], which is returned the next time intr_get_time() is
486 * called. Time spent in the higher-pil interrupt will not be returned in
487 * the next intr_get_time() call from the original interrupt, because
488 * the higher-pil interrupt's time is accumulated in intrstat[higherpil][].
489 */
490 uint64_t
intr_get_time(void)491 intr_get_time(void)
492 {
493 struct cpu *cpu;
494 struct machcpu *mcpu;
495 kthread_t *t;
496 uint64_t time, delta, ret;
497 uint_t pil;
498
499 cli();
500 cpu = CPU;
501 mcpu = &cpu->cpu_m;
502 t = cpu->cpu_thread;
503 pil = t->t_pil;
504 ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0);
505 ASSERT(t->t_flag & T_INTR_THREAD);
506 ASSERT(pil != 0);
507 ASSERT(t->t_intr_start != 0);
508
509 time = tsc_read();
510 delta = time - t->t_intr_start;
511 t->t_intr_start = time;
512
513 time = mcpu->intrstat[pil][0] + delta;
514 ret = time - mcpu->intrstat[pil][1];
515 mcpu->intrstat[pil][0] = time;
516 mcpu->intrstat[pil][1] = time;
517 cpu->cpu_intracct[cpu->cpu_mstate] += delta;
518
519 sti();
520 return (ret);
521 }
522
523 static caddr_t
dosoftint_prolog(struct cpu * cpu,caddr_t stackptr,uint32_t st_pending,uint_t oldpil)524 dosoftint_prolog(
525 struct cpu *cpu,
526 caddr_t stackptr,
527 uint32_t st_pending,
528 uint_t oldpil)
529 {
530 kthread_t *t, *volatile it;
531 struct machcpu *mcpu = &cpu->cpu_m;
532 uint_t pil;
533 hrtime_t now;
534
535 top:
536 ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending);
537
538 pil = bsrw_insn((uint16_t)st_pending);
539 if (pil <= oldpil || pil <= cpu->cpu_base_spl)
540 return (0);
541
542 /*
543 * XX64 Sigh.
544 *
545 * This is a transliteration of the i386 assembler code for
546 * soft interrupts. One question is "why does this need
547 * to be atomic?" One possible race is -other- processors
548 * posting soft interrupts to us in set_pending() i.e. the
549 * CPU might get preempted just after the address computation,
550 * but just before the atomic transaction, so another CPU would
551 * actually set the original CPU's st_pending bit. However,
552 * it looks like it would be simpler to disable preemption there.
553 * Are there other races for which preemption control doesn't work?
554 *
555 * The i386 assembler version -also- checks to see if the bit
556 * being cleared was actually set; if it wasn't, it rechecks
557 * for more. This seems a bit strange, as the only code that
558 * ever clears the bit is -this- code running with interrupts
559 * disabled on -this- CPU. This code would probably be cheaper:
560 *
561 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending,
562 * ~(1 << pil));
563 *
564 * and t->t_preempt--/++ around set_pending() even cheaper,
565 * but at this point, correctness is critical, so we slavishly
566 * emulate the i386 port.
567 */
568 if (atomic_btr32((uint32_t *)
569 &mcpu->mcpu_softinfo.st_pending, pil) == 0) {
570 st_pending = mcpu->mcpu_softinfo.st_pending;
571 goto top;
572 }
573
574 mcpu->mcpu_pri = pil;
575 (*setspl)(pil);
576
577 now = tsc_read();
578
579 /*
580 * Get set to run interrupt thread.
581 * There should always be an interrupt thread since we
582 * allocate one for each level on the CPU.
583 */
584 it = cpu->cpu_intr_thread;
585 cpu->cpu_intr_thread = it->t_link;
586
587 /* t_intr_start could be zero due to cpu_intr_swtch_enter. */
588 t = cpu->cpu_thread;
589 if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
590 hrtime_t intrtime = now - t->t_intr_start;
591 mcpu->intrstat[pil][0] += intrtime;
592 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
593 t->t_intr_start = 0;
594 }
595
596 /*
597 * Note that the code in kcpc_overflow_intr -relies- on the
598 * ordering of events here - in particular that t->t_lwp of
599 * the interrupt thread is set to the pinned thread *before*
600 * curthread is changed.
601 */
602 it->t_lwp = t->t_lwp;
603 it->t_state = TS_ONPROC;
604
605 /*
606 * Push interrupted thread onto list from new thread.
607 * Set the new thread as the current one.
608 * Set interrupted thread's T_SP because if it is the idle thread,
609 * resume() may use that stack between threads.
610 */
611
612 ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
613 t->t_sp = (uintptr_t)stackptr;
614
615 it->t_intr = t;
616 cpu->cpu_thread = it;
617
618 /*
619 * Set bit for this pil in CPU's interrupt active bitmask.
620 */
621 ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
622 cpu->cpu_intr_actv |= (1 << pil);
623
624 /*
625 * Initialize thread priority level from intr_pri
626 */
627 it->t_pil = (uchar_t)pil;
628 it->t_pri = (pri_t)pil + intr_pri;
629 it->t_intr_start = now;
630
631 return (it->t_stk);
632 }
633
634 static void
dosoftint_epilog(struct cpu * cpu,uint_t oldpil)635 dosoftint_epilog(struct cpu *cpu, uint_t oldpil)
636 {
637 struct machcpu *mcpu = &cpu->cpu_m;
638 kthread_t *t, *it;
639 uint_t pil, basespl;
640 hrtime_t intrtime;
641 hrtime_t now = tsc_read();
642
643 it = cpu->cpu_thread;
644 pil = it->t_pil;
645
646 cpu->cpu_stats.sys.intr[pil - 1]++;
647
648 ASSERT(cpu->cpu_intr_actv & (1 << pil));
649 cpu->cpu_intr_actv &= ~(1 << pil);
650 intrtime = now - it->t_intr_start;
651 mcpu->intrstat[pil][0] += intrtime;
652 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
653
654 /*
655 * If there is still an interrupted thread underneath this one
656 * then the interrupt was never blocked and the return is
657 * fairly simple. Otherwise it isn't.
658 */
659 if ((t = it->t_intr) == NULL) {
660 /*
661 * Put thread back on the interrupt thread list.
662 * This was an interrupt thread, so set CPU's base SPL.
663 */
664 set_base_spl();
665 it->t_state = TS_FREE;
666 it->t_link = cpu->cpu_intr_thread;
667 cpu->cpu_intr_thread = it;
668 (void) splhigh();
669 sti();
670 swtch();
671 /*NOTREACHED*/
672 panic("dosoftint_epilog: swtch returned");
673 }
674 it->t_link = cpu->cpu_intr_thread;
675 cpu->cpu_intr_thread = it;
676 it->t_state = TS_FREE;
677 cpu->cpu_thread = t;
678 if (t->t_flag & T_INTR_THREAD)
679 t->t_intr_start = now;
680 basespl = cpu->cpu_base_spl;
681 pil = MAX(oldpil, basespl);
682 mcpu->mcpu_pri = pil;
683 (*setspl)(pil);
684 }
685
686
687 /*
688 * Make the interrupted thread 'to' be runnable.
689 *
690 * Since t->t_sp has already been saved, t->t_pc is all
691 * that needs to be set in this function.
692 *
693 * Returns the interrupt level of the interrupt thread.
694 */
695 int
intr_passivate(kthread_t * it,kthread_t * t)696 intr_passivate(
697 kthread_t *it, /* interrupt thread */
698 kthread_t *t) /* interrupted thread */
699 {
700 extern void _sys_rtt();
701
702 ASSERT(it->t_flag & T_INTR_THREAD);
703 ASSERT(SA(t->t_sp) == t->t_sp);
704
705 t->t_pc = (uintptr_t)_sys_rtt;
706 return (it->t_pil);
707 }
708
709 /*
710 * Create interrupt kstats for this CPU.
711 */
712 void
cpu_create_intrstat(cpu_t * cp)713 cpu_create_intrstat(cpu_t *cp)
714 {
715 int i;
716 kstat_t *intr_ksp;
717 kstat_named_t *knp;
718 char name[KSTAT_STRLEN];
719 zoneid_t zoneid;
720
721 ASSERT(MUTEX_HELD(&cpu_lock));
722
723 if (pool_pset_enabled())
724 zoneid = GLOBAL_ZONEID;
725 else
726 zoneid = ALL_ZONES;
727
728 intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
729 KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
730
731 /*
732 * Initialize each PIL's named kstat
733 */
734 if (intr_ksp != NULL) {
735 intr_ksp->ks_update = cpu_kstat_intrstat_update;
736 knp = (kstat_named_t *)intr_ksp->ks_data;
737 intr_ksp->ks_private = cp;
738 for (i = 0; i < PIL_MAX; i++) {
739 (void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
740 i + 1);
741 kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
742 (void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
743 i + 1);
744 kstat_named_init(&knp[(i * 2) + 1], name,
745 KSTAT_DATA_UINT64);
746 }
747 kstat_install(intr_ksp);
748 }
749 }
750
751 /*
752 * Delete interrupt kstats for this CPU.
753 */
754 void
cpu_delete_intrstat(cpu_t * cp)755 cpu_delete_intrstat(cpu_t *cp)
756 {
757 kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
758 }
759
760 /*
761 * Convert interrupt statistics from CPU ticks to nanoseconds and
762 * update kstat.
763 */
764 int
cpu_kstat_intrstat_update(kstat_t * ksp,int rw)765 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
766 {
767 kstat_named_t *knp = ksp->ks_data;
768 cpu_t *cpup = (cpu_t *)ksp->ks_private;
769 int i;
770 hrtime_t hrt;
771
772 if (rw == KSTAT_WRITE)
773 return (EACCES);
774
775 for (i = 0; i < PIL_MAX; i++) {
776 hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0];
777 scalehrtimef(&hrt);
778 knp[i * 2].value.ui64 = (uint64_t)hrt;
779 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
780 }
781
782 return (0);
783 }
784
785 /*
786 * An interrupt thread is ending a time slice, so compute the interval it
787 * ran for and update the statistic for its PIL.
788 */
789 void
cpu_intr_swtch_enter(kthread_id_t t)790 cpu_intr_swtch_enter(kthread_id_t t)
791 {
792 uint64_t interval;
793 uint64_t start;
794 cpu_t *cpu;
795
796 ASSERT((t->t_flag & T_INTR_THREAD) != 0);
797 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
798
799 /*
800 * We could be here with a zero timestamp. This could happen if:
801 * an interrupt thread which no longer has a pinned thread underneath
802 * it (i.e. it blocked at some point in its past) has finished running
803 * its handler. intr_thread() updated the interrupt statistic for its
804 * PIL and zeroed its timestamp. Since there was no pinned thread to
805 * return to, swtch() gets called and we end up here.
806 *
807 * Note that we use atomic ops below (cas64 and atomic_add_64), which
808 * we don't use in the functions above, because we're not called
809 * with interrupts blocked, but the epilog/prolog functions are.
810 */
811 if (t->t_intr_start) {
812 do {
813 start = t->t_intr_start;
814 interval = tsc_read() - start;
815 } while (cas64(&t->t_intr_start, start, 0) != start);
816 cpu = CPU;
817 cpu->cpu_m.intrstat[t->t_pil][0] += interval;
818
819 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
820 interval);
821 } else
822 ASSERT(t->t_intr == NULL);
823 }
824
825 /*
826 * An interrupt thread is returning from swtch(). Place a starting timestamp
827 * in its thread structure.
828 */
829 void
cpu_intr_swtch_exit(kthread_id_t t)830 cpu_intr_swtch_exit(kthread_id_t t)
831 {
832 uint64_t ts;
833
834 ASSERT((t->t_flag & T_INTR_THREAD) != 0);
835 ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
836
837 do {
838 ts = t->t_intr_start;
839 } while (cas64(&t->t_intr_start, ts, tsc_read()) != ts);
840 }
841
842 /*
843 * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
844 */
845 /*ARGSUSED*/
846 static void
dispatch_hilevel(uint_t vector,uint_t arg2)847 dispatch_hilevel(uint_t vector, uint_t arg2)
848 {
849 sti();
850 av_dispatch_autovect(vector);
851 cli();
852 }
853
854 /*
855 * Dispatch a soft interrupt
856 */
857 /*ARGSUSED*/
858 static void
dispatch_softint(uint_t oldpil,uint_t arg2)859 dispatch_softint(uint_t oldpil, uint_t arg2)
860 {
861 struct cpu *cpu = CPU;
862
863 sti();
864 av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
865 cli();
866
867 /*
868 * Must run softint_epilog() on the interrupt thread stack, since
869 * there may not be a return from it if the interrupt thread blocked.
870 */
871 dosoftint_epilog(cpu, oldpil);
872 }
873
874 /*
875 * Dispatch a normal interrupt
876 */
877 static void
dispatch_hardint(uint_t vector,uint_t oldipl)878 dispatch_hardint(uint_t vector, uint_t oldipl)
879 {
880 struct cpu *cpu = CPU;
881
882 sti();
883 av_dispatch_autovect(vector);
884 cli();
885
886 /*
887 * Must run intr_thread_epilog() on the interrupt thread stack, since
888 * there may not be a return from it if the interrupt thread blocked.
889 */
890 intr_thread_epilog(cpu, vector, oldipl);
891 }
892
893 /*
894 * Deliver any softints the current interrupt priority allows.
895 * Called with interrupts disabled.
896 */
897 void
dosoftint(struct regs * regs)898 dosoftint(struct regs *regs)
899 {
900 struct cpu *cpu = CPU;
901 int oldipl;
902 caddr_t newsp;
903
904 while (cpu->cpu_softinfo.st_pending) {
905 oldipl = cpu->cpu_pri;
906 newsp = dosoftint_prolog(cpu, (caddr_t)regs,
907 cpu->cpu_softinfo.st_pending, oldipl);
908 /*
909 * If returned stack pointer is NULL, priority is too high
910 * to run any of the pending softints now.
911 * Break out and they will be run later.
912 */
913 if (newsp == NULL)
914 break;
915 switch_sp_and_call(newsp, dispatch_softint, oldipl, 0);
916 }
917 }
918
919 /*
920 * Interrupt service routine, called with interrupts disabled.
921 */
922 /*ARGSUSED*/
923 void
do_interrupt(struct regs * rp,trap_trace_rec_t * ttp)924 do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
925 {
926 struct cpu *cpu = CPU;
927 int newipl, oldipl = cpu->cpu_pri;
928 uint_t vector;
929 caddr_t newsp;
930
931 #ifdef TRAPTRACE
932 ttp->ttr_marker = TT_INTERRUPT;
933 ttp->ttr_ipl = 0xff;
934 ttp->ttr_pri = oldipl;
935 ttp->ttr_spl = cpu->cpu_base_spl;
936 ttp->ttr_vector = 0xff;
937 #endif /* TRAPTRACE */
938
939 cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
940
941 ++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
942
943 /*
944 * If it's a softint go do it now.
945 */
946 if (rp->r_trapno == T_SOFTINT) {
947 dosoftint(rp);
948 ASSERT(!interrupts_enabled());
949 return;
950 }
951
952 /*
953 * Raise the interrupt priority.
954 */
955 newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
956 #ifdef TRAPTRACE
957 ttp->ttr_ipl = newipl;
958 #endif /* TRAPTRACE */
959
960 /*
961 * Bail if it is a spurious interrupt
962 */
963 if (newipl == -1)
964 return;
965 cpu->cpu_pri = newipl;
966 vector = rp->r_trapno;
967 #ifdef TRAPTRACE
968 ttp->ttr_vector = vector;
969 #endif /* TRAPTRACE */
970 if (newipl > LOCK_LEVEL) {
971 /*
972 * High priority interrupts run on this cpu's interrupt stack.
973 */
974 if (hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
975 newsp = cpu->cpu_intr_stack;
976 switch_sp_and_call(newsp, dispatch_hilevel, vector, 0);
977 } else { /* already on the interrupt stack */
978 dispatch_hilevel(vector, 0);
979 }
980 (void) hilevel_intr_epilog(cpu, newipl, oldipl, vector);
981 } else {
982 /*
983 * Run this interrupt in a separate thread.
984 */
985 newsp = intr_thread_prolog(cpu, (caddr_t)rp, newipl);
986 switch_sp_and_call(newsp, dispatch_hardint, vector, oldipl);
987 }
988
989 #if !defined(__xpv)
990 /*
991 * Deliver any pending soft interrupts.
992 */
993 if (cpu->cpu_softinfo.st_pending)
994 dosoftint(rp);
995 #endif /* !__xpv */
996 }
997
998
999 /*
1000 * Common tasks always done by _sys_rtt, called with interrupts disabled.
1001 * Returns 1 if returning to userland, 0 if returning to system mode.
1002 */
1003 int
sys_rtt_common(struct regs * rp)1004 sys_rtt_common(struct regs *rp)
1005 {
1006 kthread_t *tp;
1007 extern void mutex_exit_critical_start();
1008 extern long mutex_exit_critical_size;
1009 extern void mutex_owner_running_critical_start();
1010 extern long mutex_owner_running_critical_size;
1011
1012 loop:
1013
1014 /*
1015 * Check if returning to user
1016 */
1017 tp = CPU->cpu_thread;
1018 if (USERMODE(rp->r_cs)) {
1019 /*
1020 * Check if AST pending.
1021 */
1022 if (tp->t_astflag) {
1023 /*
1024 * Let trap() handle the AST
1025 */
1026 sti();
1027 rp->r_trapno = T_AST;
1028 trap(rp, (caddr_t)0, CPU->cpu_id);
1029 cli();
1030 goto loop;
1031 }
1032
1033 #if defined(__amd64)
1034 /*
1035 * We are done if segment registers do not need updating.
1036 */
1037 if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0)
1038 return (1);
1039
1040 if (update_sregs(rp, tp->t_lwp)) {
1041 /*
1042 * 1 or more of the selectors is bad.
1043 * Deliver a SIGSEGV.
1044 */
1045 proc_t *p = ttoproc(tp);
1046
1047 sti();
1048 mutex_enter(&p->p_lock);
1049 tp->t_lwp->lwp_cursig = SIGSEGV;
1050 mutex_exit(&p->p_lock);
1051 psig();
1052 tp->t_sig_check = 1;
1053 cli();
1054 }
1055 tp->t_lwp->lwp_pcb.pcb_rupdate = 0;
1056
1057 #endif /* __amd64 */
1058 return (1);
1059 }
1060
1061 /*
1062 * Here if we are returning to supervisor mode.
1063 * Check for a kernel preemption request.
1064 */
1065 if (CPU->cpu_kprunrun && (rp->r_ps & PS_IE)) {
1066
1067 /*
1068 * Do nothing if already in kpreempt
1069 */
1070 if (!tp->t_preempt_lk) {
1071 tp->t_preempt_lk = 1;
1072 sti();
1073 kpreempt(1); /* asynchronous kpreempt call */
1074 cli();
1075 tp->t_preempt_lk = 0;
1076 }
1077 }
1078
1079 /*
1080 * If we interrupted the mutex_exit() critical region we must
1081 * reset the PC back to the beginning to prevent missed wakeups
1082 * See the comments in mutex_exit() for details.
1083 */
1084 if ((uintptr_t)rp->r_pc - (uintptr_t)mutex_exit_critical_start <
1085 mutex_exit_critical_size) {
1086 rp->r_pc = (greg_t)mutex_exit_critical_start;
1087 }
1088
1089 /*
1090 * If we interrupted the mutex_owner_running() critical region we
1091 * must reset the PC back to the beginning to prevent dereferencing
1092 * of a freed thread pointer. See the comments in mutex_owner_running
1093 * for details.
1094 */
1095 if ((uintptr_t)rp->r_pc -
1096 (uintptr_t)mutex_owner_running_critical_start <
1097 mutex_owner_running_critical_size) {
1098 rp->r_pc = (greg_t)mutex_owner_running_critical_start;
1099 }
1100
1101 return (0);
1102 }
1103
1104 void
send_dirint(int cpuid,int int_level)1105 send_dirint(int cpuid, int int_level)
1106 {
1107 (*send_dirintf)(cpuid, int_level);
1108 }
1109
1110 #define IS_FAKE_SOFTINT(flag, newpri) \
1111 (((flag) & PS_IE) && \
1112 (((*get_pending_spl)() > (newpri)) || \
1113 bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > (newpri)))
1114
1115 /*
1116 * do_splx routine, takes new ipl to set
1117 * returns the old ipl.
1118 * We are careful not to set priority lower than CPU->cpu_base_pri,
1119 * even though it seems we're raising the priority, it could be set
1120 * higher at any time by an interrupt routine, so we must block interrupts
1121 * and look at CPU->cpu_base_pri
1122 */
1123 int
do_splx(int newpri)1124 do_splx(int newpri)
1125 {
1126 ulong_t flag;
1127 cpu_t *cpu;
1128 int curpri, basepri;
1129
1130 flag = intr_clear();
1131 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */
1132 curpri = cpu->cpu_m.mcpu_pri;
1133 basepri = cpu->cpu_base_spl;
1134 if (newpri < basepri)
1135 newpri = basepri;
1136 cpu->cpu_m.mcpu_pri = newpri;
1137 (*setspl)(newpri);
1138 /*
1139 * If we are going to reenable interrupts see if new priority level
1140 * allows pending softint delivery.
1141 */
1142 if (IS_FAKE_SOFTINT(flag, newpri))
1143 fakesoftint();
1144 ASSERT(!interrupts_enabled());
1145 intr_restore(flag);
1146 return (curpri);
1147 }
1148
1149 /*
1150 * Common spl raise routine, takes new ipl to set
1151 * returns the old ipl, will not lower ipl.
1152 */
1153 int
splr(int newpri)1154 splr(int newpri)
1155 {
1156 ulong_t flag;
1157 cpu_t *cpu;
1158 int curpri, basepri;
1159
1160 flag = intr_clear();
1161 cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */
1162 curpri = cpu->cpu_m.mcpu_pri;
1163 /*
1164 * Only do something if new priority is larger
1165 */
1166 if (newpri > curpri) {
1167 basepri = cpu->cpu_base_spl;
1168 if (newpri < basepri)
1169 newpri = basepri;
1170 cpu->cpu_m.mcpu_pri = newpri;
1171 (*setspl)(newpri);
1172 /*
1173 * See if new priority level allows pending softint delivery
1174 */
1175 if (IS_FAKE_SOFTINT(flag, newpri))
1176 fakesoftint();
1177 }
1178 intr_restore(flag);
1179 return (curpri);
1180 }
1181
1182 int
getpil(void)1183 getpil(void)
1184 {
1185 return (CPU->cpu_m.mcpu_pri);
1186 }
1187
1188 int
spl_xcall(void)1189 spl_xcall(void)
1190 {
1191 return (splr(ipltospl(XCALL_PIL)));
1192 }
1193
1194 int
interrupts_enabled(void)1195 interrupts_enabled(void)
1196 {
1197 ulong_t flag;
1198
1199 flag = getflags();
1200 return ((flag & PS_IE) == PS_IE);
1201 }
1202
1203 #ifdef DEBUG
1204 void
assert_ints_enabled(void)1205 assert_ints_enabled(void)
1206 {
1207 ASSERT(!interrupts_unleashed || interrupts_enabled());
1208 }
1209 #endif /* DEBUG */
1210