15788Smv143129 /* 25788Smv143129 * CDDL HEADER START 35788Smv143129 * 45788Smv143129 * The contents of this file are subject to the terms of the 55788Smv143129 * Common Development and Distribution License (the "License"). 65788Smv143129 * You may not use this file except in compliance with the License. 75788Smv143129 * 85788Smv143129 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 95788Smv143129 * or http://www.opensolaris.org/os/licensing. 105788Smv143129 * See the License for the specific language governing permissions 115788Smv143129 * and limitations under the License. 125788Smv143129 * 135788Smv143129 * When distributing Covered Code, include this CDDL HEADER in each 145788Smv143129 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 155788Smv143129 * If applicable, add the following below this CDDL HEADER, with the 165788Smv143129 * fields enclosed by brackets "[]" replaced with your own identifying 175788Smv143129 * information: Portions Copyright [yyyy] [name of copyright owner] 185788Smv143129 * 195788Smv143129 * CDDL HEADER END 205788Smv143129 */ 215788Smv143129 225788Smv143129 /* 239039SMadhavan.Venkataraman@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 245788Smv143129 * Use is subject to license terms. 255788Smv143129 */ 265788Smv143129 275788Smv143129 #include <sys/thread.h> 285788Smv143129 #include <sys/proc.h> 295788Smv143129 #include <sys/task.h> 305788Smv143129 #include <sys/cmn_err.h> 315788Smv143129 #include <sys/class.h> 325788Smv143129 #include <sys/sdt.h> 335788Smv143129 #include <sys/atomic.h> 345788Smv143129 #include <sys/cpu.h> 355788Smv143129 #include <sys/clock_tick.h> 36*11066Srafael.vanoni@sun.com #include <sys/clock_impl.h> 375788Smv143129 #include <sys/sysmacros.h> 385788Smv143129 #include <vm/rm.h> 395788Smv143129 405788Smv143129 /* 415788Smv143129 * This file contains the implementation of clock tick accounting for threads. 425788Smv143129 * Every tick, user threads running on various CPUs are located and charged 435788Smv143129 * with a tick to account for their use of CPU time. 445788Smv143129 * 455788Smv143129 * Every tick, the clock() handler calls clock_tick_schedule() to perform tick 465788Smv143129 * accounting for all the threads in the system. Tick accounting is done in 475788Smv143129 * two phases: 485788Smv143129 * 495788Smv143129 * Tick scheduling Done in clock_tick_schedule(). In this phase, cross 505788Smv143129 * calls are scheduled to multiple CPUs to perform 515788Smv143129 * multi-threaded tick accounting. The CPUs are chosen 525788Smv143129 * on a rotational basis so as to distribute the tick 535788Smv143129 * accounting load evenly across all CPUs. 545788Smv143129 * 555788Smv143129 * Tick execution Done in clock_tick_execute(). In this phase, tick 565788Smv143129 * accounting is actually performed by softint handlers 575788Smv143129 * on multiple CPUs. 585788Smv143129 * 595788Smv143129 * This implementation gives us a multi-threaded tick processing facility that 605788Smv143129 * is suitable for configurations with a large number of CPUs. On smaller 615788Smv143129 * configurations it may be desirable to let the processing be single-threaded 625788Smv143129 * and just allow clock() to do it as it has been done traditionally. To 635788Smv143129 * facilitate this, a variable, clock_tick_threshold, is defined. Platforms 645788Smv143129 * that desire multi-threading should set this variable to something 655788Smv143129 * appropriate. A recommended value may be found in clock_tick.h. At boot time, 665788Smv143129 * if the number of CPUs is greater than clock_tick_threshold, multi-threading 675788Smv143129 * kicks in. Note that this is a decision made at boot time. If more CPUs 685788Smv143129 * are dynamically added later on to exceed the threshold, no attempt is made 695788Smv143129 * to switch to multi-threaded. Similarly, if CPUs are removed dynamically 705788Smv143129 * no attempt is made to switch to single-threaded. This is to keep the 715788Smv143129 * implementation simple. Also note that the threshold can be changed for a 725788Smv143129 * specific customer configuration via /etc/system. 735788Smv143129 * 745788Smv143129 * The boot time decision is reflected in clock_tick_single_threaded. 755788Smv143129 */ 765788Smv143129 775788Smv143129 /* 785788Smv143129 * clock_tick_threshold 795788Smv143129 * If the number of CPUs at boot time exceeds this threshold, 805788Smv143129 * multi-threaded tick accounting kicks in. 815788Smv143129 * 825788Smv143129 * clock_tick_ncpus 835788Smv143129 * The number of CPUs in a set. Each set is scheduled for tick execution 845788Smv143129 * on a separate processor. 855788Smv143129 * 865788Smv143129 * clock_tick_single_threaded 875788Smv143129 * Indicates whether or not tick accounting is single threaded. 885788Smv143129 * 895788Smv143129 * clock_tick_total_cpus 905788Smv143129 * Total number of online CPUs. 915788Smv143129 * 925788Smv143129 * clock_tick_cpus 935788Smv143129 * Array of online CPU pointers. 945788Smv143129 * 955788Smv143129 * clock_tick_cpu 965788Smv143129 * Per-CPU, cache-aligned data structures to facilitate multi-threading. 975788Smv143129 * 985788Smv143129 * clock_tick_active 995788Smv143129 * Counter that indicates the number of active tick processing softints 1005788Smv143129 * in the system. 1015788Smv143129 * 1025788Smv143129 * clock_tick_pending 1035788Smv143129 * Number of pending ticks that need to be accounted by the softint 1045788Smv143129 * handlers. 1055788Smv143129 * 1065788Smv143129 * clock_tick_lock 1075788Smv143129 * Mutex to synchronize between clock_tick_schedule() and 1085788Smv143129 * CPU online/offline. 1095788Smv143129 * 1105788Smv143129 * clock_cpu_id 1115788Smv143129 * CPU id of the clock() CPU. Used to detect when the clock CPU 1125788Smv143129 * is offlined. 1135788Smv143129 * 1145788Smv143129 * clock_tick_online_cpuset 1155788Smv143129 * CPU set of all online processors that can be X-called. 1165788Smv143129 * 1175788Smv143129 * clock_tick_proc_max 1185788Smv143129 * Each process is allowed to accumulate a few ticks before checking 1195788Smv143129 * for the task CPU time resource limit. We lower the number of calls 1205788Smv143129 * to rctl_test() to make tick accounting more scalable. The tradeoff 1215788Smv143129 * is that the limit may not get enforced in a timely manner. This is 1225788Smv143129 * typically not a problem. 1235788Smv143129 * 1245788Smv143129 * clock_tick_set 1255788Smv143129 * Per-set structures. Each structure contains the range of CPUs 1265788Smv143129 * to be processed for the set. 1275788Smv143129 * 1285788Smv143129 * clock_tick_nsets; 1295788Smv143129 * Number of sets. 1305788Smv143129 * 1315788Smv143129 * clock_tick_scan 1325788Smv143129 * Where to begin the scan for single-threaded mode. In multi-threaded, 1335788Smv143129 * the clock_tick_set itself contains a field for this. 1345788Smv143129 */ 1355788Smv143129 int clock_tick_threshold; 1365788Smv143129 int clock_tick_ncpus; 1375788Smv143129 int clock_tick_single_threaded; 1385788Smv143129 int clock_tick_total_cpus; 1395788Smv143129 cpu_t *clock_tick_cpus[NCPU]; 1405788Smv143129 clock_tick_cpu_t *clock_tick_cpu[NCPU]; 1415788Smv143129 ulong_t clock_tick_active; 1425788Smv143129 int clock_tick_pending; 1435788Smv143129 kmutex_t clock_tick_lock; 1445788Smv143129 processorid_t clock_cpu_id; 1455788Smv143129 cpuset_t clock_tick_online_cpuset; 1465788Smv143129 clock_t clock_tick_proc_max; 1475788Smv143129 clock_tick_set_t *clock_tick_set; 1485788Smv143129 int clock_tick_nsets; 1495788Smv143129 int clock_tick_scan; 1509039SMadhavan.Venkataraman@Sun.COM ulong_t clock_tick_intr; 1515788Smv143129 1525788Smv143129 static uint_t clock_tick_execute(caddr_t, caddr_t); 1535788Smv143129 static void clock_tick_execute_common(int, int, int, clock_t, int); 1545788Smv143129 1555788Smv143129 #define CLOCK_TICK_ALIGN 64 /* cache alignment */ 1565788Smv143129 1575788Smv143129 /* 1585788Smv143129 * Clock tick initialization is done in two phases: 1595788Smv143129 * 1605788Smv143129 * 1. Before clock_init() is called, clock_tick_init_pre() is called to set 1615788Smv143129 * up single-threading so the clock() can begin to do its job. 1625788Smv143129 * 1635788Smv143129 * 2. After the slave CPUs are initialized at boot time, we know the number 1645788Smv143129 * of CPUs. clock_tick_init_post() is called to set up multi-threading if 1655788Smv143129 * required. 1665788Smv143129 */ 1675788Smv143129 void 1685788Smv143129 clock_tick_init_pre(void) 1695788Smv143129 { 1705788Smv143129 clock_tick_cpu_t *ctp; 1715788Smv143129 int i, n; 1725788Smv143129 clock_tick_set_t *csp; 1735788Smv143129 uintptr_t buf; 1745788Smv143129 size_t size; 1755788Smv143129 1765788Smv143129 clock_tick_single_threaded = 1; 1775788Smv143129 1785788Smv143129 size = P2ROUNDUP(sizeof (clock_tick_cpu_t), CLOCK_TICK_ALIGN); 1795788Smv143129 buf = (uintptr_t)kmem_zalloc(size * NCPU + CLOCK_TICK_ALIGN, KM_SLEEP); 1805788Smv143129 buf = P2ROUNDUP(buf, CLOCK_TICK_ALIGN); 1815788Smv143129 1825788Smv143129 /* 1835788Smv143129 * Perform initialization in case multi-threading is chosen later. 1845788Smv143129 */ 1859039SMadhavan.Venkataraman@Sun.COM if (&create_softint != NULL) { 1869039SMadhavan.Venkataraman@Sun.COM clock_tick_intr = create_softint(LOCK_LEVEL, 1879039SMadhavan.Venkataraman@Sun.COM clock_tick_execute, (caddr_t)NULL); 1889039SMadhavan.Venkataraman@Sun.COM } 1895788Smv143129 for (i = 0; i < NCPU; i++, buf += size) { 1905788Smv143129 ctp = (clock_tick_cpu_t *)buf; 1915788Smv143129 clock_tick_cpu[i] = ctp; 1925788Smv143129 mutex_init(&ctp->ct_lock, NULL, MUTEX_DEFAULT, NULL); 1935788Smv143129 if (&create_softint != NULL) { 1949039SMadhavan.Venkataraman@Sun.COM ctp->ct_intr = clock_tick_intr; 1955788Smv143129 } 1965788Smv143129 ctp->ct_pending = 0; 1975788Smv143129 } 1985788Smv143129 1995788Smv143129 mutex_init(&clock_tick_lock, NULL, MUTEX_DEFAULT, NULL); 2005788Smv143129 2015788Smv143129 /* 2025788Smv143129 * Compute clock_tick_ncpus here. We need it to compute the 2035788Smv143129 * maximum number of tick sets we need to support. 2045788Smv143129 */ 2055788Smv143129 ASSERT(clock_tick_ncpus >= 0); 2065788Smv143129 if (clock_tick_ncpus == 0) 2075788Smv143129 clock_tick_ncpus = CLOCK_TICK_NCPUS; 2085788Smv143129 if (clock_tick_ncpus > max_ncpus) 2095788Smv143129 clock_tick_ncpus = max_ncpus; 2105788Smv143129 2115788Smv143129 /* 2125788Smv143129 * Allocate and initialize the tick sets. 2135788Smv143129 */ 2145788Smv143129 n = (max_ncpus + clock_tick_ncpus - 1)/clock_tick_ncpus; 2155788Smv143129 clock_tick_set = kmem_zalloc(sizeof (clock_tick_set_t) * n, KM_SLEEP); 2165788Smv143129 for (i = 0; i < n; i++) { 2175788Smv143129 csp = &clock_tick_set[i]; 2185788Smv143129 csp->ct_start = i * clock_tick_ncpus; 2195788Smv143129 csp->ct_scan = csp->ct_start; 2205788Smv143129 csp->ct_end = csp->ct_start; 2215788Smv143129 } 2225788Smv143129 } 2235788Smv143129 2245788Smv143129 void 2255788Smv143129 clock_tick_init_post(void) 2265788Smv143129 { 2275788Smv143129 /* 2285788Smv143129 * If a platform does not provide create_softint() and invoke_softint(), 2295788Smv143129 * then we assume single threaded. 2305788Smv143129 */ 2315788Smv143129 if (&invoke_softint == NULL) 2325788Smv143129 clock_tick_threshold = 0; 2335788Smv143129 2345788Smv143129 ASSERT(clock_tick_threshold >= 0); 2355788Smv143129 2365788Smv143129 if (clock_tick_threshold == 0) 2375788Smv143129 clock_tick_threshold = max_ncpus; 2385788Smv143129 2395788Smv143129 /* 2405788Smv143129 * If a platform does not specify a threshold or if the number of CPUs 2415788Smv143129 * at boot time does not exceed the threshold, tick accounting remains 2425788Smv143129 * single-threaded. 2435788Smv143129 */ 2445788Smv143129 if (ncpus <= clock_tick_threshold) { 2455788Smv143129 clock_tick_ncpus = max_ncpus; 2465788Smv143129 clock_tick_proc_max = 1; 2475788Smv143129 return; 2485788Smv143129 } 2495788Smv143129 2505788Smv143129 /* 2515788Smv143129 * OK. Multi-thread tick processing. If a platform has not specified 2525788Smv143129 * the CPU set size for multi-threading, then use the default value. 2535788Smv143129 * This value has been arrived through measurements on large 2545788Smv143129 * configuration systems. 2555788Smv143129 */ 2565788Smv143129 clock_tick_single_threaded = 0; 2575788Smv143129 if (clock_tick_proc_max == 0) { 2585788Smv143129 clock_tick_proc_max = CLOCK_TICK_PROC_MAX; 2595788Smv143129 if (hires_tick) 2605788Smv143129 clock_tick_proc_max *= 10; 2615788Smv143129 } 2625788Smv143129 } 2635788Smv143129 2645788Smv143129 static void 2655788Smv143129 clock_tick_schedule_one(clock_tick_set_t *csp, int pending, processorid_t cid) 2665788Smv143129 { 2675788Smv143129 clock_tick_cpu_t *ctp; 2685788Smv143129 2695788Smv143129 ASSERT(&invoke_softint != NULL); 2709039SMadhavan.Venkataraman@Sun.COM 2719039SMadhavan.Venkataraman@Sun.COM atomic_inc_ulong(&clock_tick_active); 2729039SMadhavan.Venkataraman@Sun.COM 2735788Smv143129 /* 2745788Smv143129 * Schedule tick accounting for a set of CPUs. 2755788Smv143129 */ 2765788Smv143129 ctp = clock_tick_cpu[cid]; 2775788Smv143129 mutex_enter(&ctp->ct_lock); 278*11066Srafael.vanoni@sun.com ctp->ct_lbolt = (clock_t)LBOLT_NO_ACCOUNT; 2795788Smv143129 ctp->ct_pending += pending; 2805788Smv143129 ctp->ct_start = csp->ct_start; 2815788Smv143129 ctp->ct_end = csp->ct_end; 2825788Smv143129 ctp->ct_scan = csp->ct_scan; 2835788Smv143129 mutex_exit(&ctp->ct_lock); 2845788Smv143129 2855788Smv143129 invoke_softint(cid, ctp->ct_intr); 2865788Smv143129 /* 2875788Smv143129 * Return without waiting for the softint to finish. 2885788Smv143129 */ 2895788Smv143129 } 2905788Smv143129 2915788Smv143129 static void 2925788Smv143129 clock_tick_process(cpu_t *cp, clock_t mylbolt, int pending) 2935788Smv143129 { 2945788Smv143129 kthread_t *t; 2955788Smv143129 kmutex_t *plockp; 2965788Smv143129 int notick, intr; 2975788Smv143129 klwp_id_t lwp; 2985788Smv143129 2995788Smv143129 /* 3005788Smv143129 * The locking here is rather tricky. thread_free_prevent() 3015788Smv143129 * prevents the thread returned from being freed while we 3025788Smv143129 * are looking at it. We can then check if the thread 3035788Smv143129 * is exiting and get the appropriate p_lock if it 3045788Smv143129 * is not. We have to be careful, though, because 3055788Smv143129 * the _process_ can still be freed while we've 3065788Smv143129 * prevented thread free. To avoid touching the 3075788Smv143129 * proc structure we put a pointer to the p_lock in the 3085788Smv143129 * thread structure. The p_lock is persistent so we 3095788Smv143129 * can acquire it even if the process is gone. At that 3105788Smv143129 * point we can check (again) if the thread is exiting 3115788Smv143129 * and either drop the lock or do the tick processing. 3125788Smv143129 */ 3135788Smv143129 t = cp->cpu_thread; /* Current running thread */ 3145788Smv143129 if (CPU == cp) { 3155788Smv143129 /* 3165788Smv143129 * 't' will be the tick processing thread on this 3175788Smv143129 * CPU. Use the pinned thread (if any) on this CPU 3185788Smv143129 * as the target of the clock tick. 3195788Smv143129 */ 3205788Smv143129 if (t->t_intr != NULL) 3215788Smv143129 t = t->t_intr; 3225788Smv143129 } 3235788Smv143129 3245788Smv143129 /* 3255788Smv143129 * We use thread_free_prevent to keep the currently running 3265788Smv143129 * thread from being freed or recycled while we're 3275788Smv143129 * looking at it. 3285788Smv143129 */ 3295788Smv143129 thread_free_prevent(t); 3305788Smv143129 /* 3315788Smv143129 * We cannot hold the cpu_lock to prevent the 3325788Smv143129 * cpu_active from changing in the clock interrupt. 3335788Smv143129 * As long as we don't block (or don't get pre-empted) 3345788Smv143129 * the cpu_list will not change (all threads are paused 3355788Smv143129 * before list modification). 3365788Smv143129 */ 3375788Smv143129 if (CLOCK_TICK_CPU_OFFLINE(cp)) { 3385788Smv143129 thread_free_allow(t); 3395788Smv143129 return; 3405788Smv143129 } 3415788Smv143129 3425788Smv143129 /* 3435788Smv143129 * Make sure the thread is still on the CPU. 3445788Smv143129 */ 3455788Smv143129 if ((t != cp->cpu_thread) && 3465788Smv143129 ((cp != CPU) || (t != cp->cpu_thread->t_intr))) { 3475788Smv143129 /* 3485788Smv143129 * We could not locate the thread. Skip this CPU. Race 3495788Smv143129 * conditions while performing these checks are benign. 3505788Smv143129 * These checks are not perfect and they don't need 3515788Smv143129 * to be. 3525788Smv143129 */ 3535788Smv143129 thread_free_allow(t); 3545788Smv143129 return; 3555788Smv143129 } 3565788Smv143129 3575788Smv143129 intr = t->t_flag & T_INTR_THREAD; 3585788Smv143129 lwp = ttolwp(t); 3595788Smv143129 if (lwp == NULL || (t->t_proc_flag & TP_LWPEXIT) || intr) { 3605788Smv143129 /* 3615788Smv143129 * Thread is exiting (or uninteresting) so don't 3625788Smv143129 * do tick processing. 3635788Smv143129 */ 3645788Smv143129 thread_free_allow(t); 3655788Smv143129 return; 3665788Smv143129 } 3675788Smv143129 3685788Smv143129 /* 3695788Smv143129 * OK, try to grab the process lock. See 3705788Smv143129 * comments above for why we're not using 3715788Smv143129 * ttoproc(t)->p_lockp here. 3725788Smv143129 */ 3735788Smv143129 plockp = t->t_plockp; 3745788Smv143129 mutex_enter(plockp); 3755788Smv143129 /* See above comment. */ 3765788Smv143129 if (CLOCK_TICK_CPU_OFFLINE(cp)) { 3775788Smv143129 mutex_exit(plockp); 3785788Smv143129 thread_free_allow(t); 3795788Smv143129 return; 3805788Smv143129 } 3815788Smv143129 3825788Smv143129 /* 3835788Smv143129 * The thread may have exited between when we 3845788Smv143129 * checked above, and when we got the p_lock. 3855788Smv143129 */ 3865788Smv143129 if (t->t_proc_flag & TP_LWPEXIT) { 3875788Smv143129 mutex_exit(plockp); 3885788Smv143129 thread_free_allow(t); 3895788Smv143129 return; 3905788Smv143129 } 3915788Smv143129 3925788Smv143129 /* 3935788Smv143129 * Either we have the p_lock for the thread's process, 3945788Smv143129 * or we don't care about the thread structure any more. 3955788Smv143129 * Either way we can allow thread free. 3965788Smv143129 */ 3975788Smv143129 thread_free_allow(t); 3985788Smv143129 3995788Smv143129 /* 4005788Smv143129 * If we haven't done tick processing for this 4015788Smv143129 * lwp, then do it now. Since we don't hold the 4025788Smv143129 * lwp down on a CPU it can migrate and show up 4035788Smv143129 * more than once, hence the lbolt check. mylbolt 4045788Smv143129 * is copied at the time of tick scheduling to prevent 4055788Smv143129 * lbolt mismatches. 4065788Smv143129 * 4075788Smv143129 * Also, make sure that it's okay to perform the 4085788Smv143129 * tick processing before calling clock_tick. 4095788Smv143129 * Setting notick to a TRUE value (ie. not 0) 4105788Smv143129 * results in tick processing not being performed for 4115788Smv143129 * that thread. 4125788Smv143129 */ 4135788Smv143129 notick = ((cp->cpu_flags & CPU_QUIESCED) || CPU_ON_INTR(cp) || 4145788Smv143129 (cp->cpu_dispthread == cp->cpu_idle_thread)); 4155788Smv143129 4165788Smv143129 if ((!notick) && (t->t_lbolt < mylbolt)) { 4175788Smv143129 t->t_lbolt = mylbolt; 4185788Smv143129 clock_tick(t, pending); 4195788Smv143129 } 4205788Smv143129 4215788Smv143129 mutex_exit(plockp); 4225788Smv143129 } 4235788Smv143129 4245788Smv143129 void 4255788Smv143129 clock_tick_schedule(int one_sec) 4265788Smv143129 { 4275788Smv143129 ulong_t active; 4285788Smv143129 int i, end; 4295788Smv143129 clock_tick_set_t *csp; 4305788Smv143129 cpu_t *cp; 4315788Smv143129 4325788Smv143129 if (clock_cpu_id != CPU->cpu_id) 4335788Smv143129 clock_cpu_id = CPU->cpu_id; 4345788Smv143129 4355788Smv143129 if (clock_tick_single_threaded) { 4365788Smv143129 /* 4375788Smv143129 * Each tick cycle, start the scan from a different 4385788Smv143129 * CPU for the sake of fairness. 4395788Smv143129 */ 4405788Smv143129 end = clock_tick_total_cpus; 4415788Smv143129 clock_tick_scan++; 4425788Smv143129 if (clock_tick_scan >= end) 4435788Smv143129 clock_tick_scan = 0; 4445788Smv143129 445*11066Srafael.vanoni@sun.com clock_tick_execute_common(0, clock_tick_scan, end, 446*11066Srafael.vanoni@sun.com (clock_t)LBOLT_NO_ACCOUNT, 1); 4475788Smv143129 4485788Smv143129 return; 4495788Smv143129 } 4505788Smv143129 4515788Smv143129 /* 4525788Smv143129 * If the previous invocation of handlers is not yet finished, then 4535788Smv143129 * simply increment a pending count and return. Eventually when they 4545788Smv143129 * finish, the pending count is passed down to the next set of 4555788Smv143129 * handlers to process. This way, ticks that have already elapsed 4565788Smv143129 * in the past are handled as quickly as possible to minimize the 4575788Smv143129 * chances of threads getting away before their pending ticks are 4585788Smv143129 * accounted. The other benefit is that if the pending count is 4595788Smv143129 * more than one, it can be handled by a single invocation of 4605788Smv143129 * clock_tick(). This is a good optimization for large configuration 4615788Smv143129 * busy systems where tick accounting can get backed up for various 4625788Smv143129 * reasons. 4635788Smv143129 */ 4645788Smv143129 clock_tick_pending++; 4655788Smv143129 4665788Smv143129 active = clock_tick_active; 4675788Smv143129 active = atomic_cas_ulong(&clock_tick_active, active, active); 4685788Smv143129 if (active) 4695788Smv143129 return; 4705788Smv143129 4715788Smv143129 /* 4725788Smv143129 * We want to handle the clock CPU here. If we 4735788Smv143129 * scheduled the accounting for the clock CPU to another 4745788Smv143129 * processor, that processor will find only the clock() thread 4755788Smv143129 * running and not account for any user thread below it. Also, 4765788Smv143129 * we want to handle this before we block on anything and allow 4775788Smv143129 * the pinned thread below the current thread to escape. 4785788Smv143129 */ 479*11066Srafael.vanoni@sun.com clock_tick_process(CPU, (clock_t)LBOLT_NO_ACCOUNT, clock_tick_pending); 4805788Smv143129 4815788Smv143129 mutex_enter(&clock_tick_lock); 4825788Smv143129 4835788Smv143129 /* 4845788Smv143129 * Schedule each set on a separate processor. 4855788Smv143129 */ 4865788Smv143129 cp = clock_cpu_list; 4875788Smv143129 for (i = 0; i < clock_tick_nsets; i++) { 4885788Smv143129 csp = &clock_tick_set[i]; 4895788Smv143129 4905788Smv143129 /* 4915788Smv143129 * Pick the next online CPU in list for scheduling tick 4925788Smv143129 * accounting. The clock_tick_lock is held by the caller. 4935788Smv143129 * So, CPU online/offline cannot muck with this while 4945788Smv143129 * we are picking our CPU to X-call. 4955788Smv143129 */ 4965788Smv143129 if (cp == CPU) 4975788Smv143129 cp = cp->cpu_next_onln; 4985788Smv143129 4995788Smv143129 /* 5005788Smv143129 * Each tick cycle, start the scan from a different 5015788Smv143129 * CPU for the sake of fairness. 5025788Smv143129 */ 5035788Smv143129 csp->ct_scan++; 5045788Smv143129 if (csp->ct_scan >= csp->ct_end) 5055788Smv143129 csp->ct_scan = csp->ct_start; 5065788Smv143129 5075788Smv143129 clock_tick_schedule_one(csp, clock_tick_pending, cp->cpu_id); 5085788Smv143129 5095788Smv143129 cp = cp->cpu_next_onln; 5105788Smv143129 } 5115788Smv143129 5125788Smv143129 if (one_sec) { 5135788Smv143129 /* 5145788Smv143129 * Move the CPU pointer around every second. This is so 5155788Smv143129 * all the CPUs can be X-called in a round-robin fashion 5165788Smv143129 * to evenly distribute the X-calls. We don't do this 5175788Smv143129 * at a faster rate than this because we don't want 5185788Smv143129 * to affect cache performance negatively. 5195788Smv143129 */ 5205788Smv143129 clock_cpu_list = clock_cpu_list->cpu_next_onln; 5215788Smv143129 } 5225788Smv143129 5235788Smv143129 mutex_exit(&clock_tick_lock); 5245788Smv143129 5255788Smv143129 clock_tick_pending = 0; 5265788Smv143129 } 5275788Smv143129 5285788Smv143129 static void 5295788Smv143129 clock_tick_execute_common(int start, int scan, int end, clock_t mylbolt, 5305788Smv143129 int pending) 5315788Smv143129 { 5325788Smv143129 cpu_t *cp; 5335788Smv143129 int i; 5345788Smv143129 5355788Smv143129 ASSERT((start <= scan) && (scan <= end)); 5365788Smv143129 5375788Smv143129 /* 5385788Smv143129 * Handle the thread on current CPU first. This is to prevent a 5395788Smv143129 * pinned thread from escaping if we ever block on something. 5405788Smv143129 * Note that in the single-threaded mode, this handles the clock 5415788Smv143129 * CPU. 5425788Smv143129 */ 5435788Smv143129 clock_tick_process(CPU, mylbolt, pending); 5445788Smv143129 5455788Smv143129 /* 5465788Smv143129 * Perform tick accounting for the threads running on 5475788Smv143129 * the scheduled CPUs. 5485788Smv143129 */ 5495788Smv143129 for (i = scan; i < end; i++) { 5505788Smv143129 cp = clock_tick_cpus[i]; 5515788Smv143129 if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id)) 5525788Smv143129 continue; 5535788Smv143129 clock_tick_process(cp, mylbolt, pending); 5545788Smv143129 } 5555788Smv143129 5565788Smv143129 for (i = start; i < scan; i++) { 5575788Smv143129 cp = clock_tick_cpus[i]; 5585788Smv143129 if ((cp == NULL) || (cp == CPU) || (cp->cpu_id == clock_cpu_id)) 5595788Smv143129 continue; 5605788Smv143129 clock_tick_process(cp, mylbolt, pending); 5615788Smv143129 } 5625788Smv143129 } 5635788Smv143129 5645788Smv143129 /*ARGSUSED*/ 5655788Smv143129 static uint_t 5665788Smv143129 clock_tick_execute(caddr_t arg1, caddr_t arg2) 5675788Smv143129 { 5685788Smv143129 clock_tick_cpu_t *ctp; 5695788Smv143129 int start, scan, end, pending; 5705788Smv143129 clock_t mylbolt; 5715788Smv143129 5725788Smv143129 /* 5735788Smv143129 * We could have raced with cpu offline. We don't want to 5745788Smv143129 * process anything on an offlined CPU. If we got blocked 5755788Smv143129 * on anything, we may not get scheduled when we wakeup 5765788Smv143129 * later on. 5775788Smv143129 */ 5785788Smv143129 if (!CLOCK_TICK_XCALL_SAFE(CPU)) 5799039SMadhavan.Venkataraman@Sun.COM goto out; 5805788Smv143129 5819039SMadhavan.Venkataraman@Sun.COM ctp = clock_tick_cpu[CPU->cpu_id]; 5825788Smv143129 5835788Smv143129 mutex_enter(&ctp->ct_lock); 5845788Smv143129 pending = ctp->ct_pending; 5855788Smv143129 if (pending == 0) { 5865788Smv143129 /* 5875788Smv143129 * If a CPU is busy at LOCK_LEVEL, then an invocation 5885788Smv143129 * of this softint may be queued for some time. In that case, 5895788Smv143129 * clock_tick_active will not be incremented. 5905788Smv143129 * clock_tick_schedule() will then assume that the previous 5915788Smv143129 * invocation is done and post a new softint. The first one 5925788Smv143129 * that gets in will reset the pending count so the 5935788Smv143129 * second one is a noop. 5945788Smv143129 */ 5955788Smv143129 mutex_exit(&ctp->ct_lock); 5965788Smv143129 goto out; 5975788Smv143129 } 5985788Smv143129 ctp->ct_pending = 0; 5995788Smv143129 start = ctp->ct_start; 6005788Smv143129 end = ctp->ct_end; 6015788Smv143129 scan = ctp->ct_scan; 6025788Smv143129 mylbolt = ctp->ct_lbolt; 6035788Smv143129 mutex_exit(&ctp->ct_lock); 6045788Smv143129 6055788Smv143129 clock_tick_execute_common(start, scan, end, mylbolt, pending); 6065788Smv143129 6075788Smv143129 out: 6085788Smv143129 /* 6095788Smv143129 * Signal completion to the clock handler. 6105788Smv143129 */ 6115788Smv143129 atomic_dec_ulong(&clock_tick_active); 6125788Smv143129 6135788Smv143129 return (1); 6145788Smv143129 } 6155788Smv143129 6165788Smv143129 /*ARGSUSED*/ 6175788Smv143129 static int 6185788Smv143129 clock_tick_cpu_setup(cpu_setup_t what, int cid, void *arg) 6195788Smv143129 { 6205788Smv143129 cpu_t *cp, *ncp; 6215788Smv143129 int i, set; 6225788Smv143129 clock_tick_set_t *csp; 6235788Smv143129 6245788Smv143129 /* 6255788Smv143129 * This function performs some computations at CPU offline/online 6265788Smv143129 * time. The computed values are used during tick scheduling and 6275788Smv143129 * execution phases. This avoids having to compute things on 6285788Smv143129 * an every tick basis. The other benefit is that we perform the 6295788Smv143129 * computations only for onlined CPUs (not offlined ones). As a 6305788Smv143129 * result, no tick processing is attempted for offlined CPUs. 6315788Smv143129 * 6325788Smv143129 * Also, cpu_offline() calls this function before checking for 6335788Smv143129 * active interrupt threads. This allows us to avoid posting 6345788Smv143129 * cross calls to CPUs that are being offlined. 6355788Smv143129 */ 6365788Smv143129 6375788Smv143129 cp = cpu[cid]; 6385788Smv143129 6395788Smv143129 mutex_enter(&clock_tick_lock); 6405788Smv143129 6415788Smv143129 switch (what) { 6425788Smv143129 case CPU_ON: 6435788Smv143129 clock_tick_cpus[clock_tick_total_cpus] = cp; 6445788Smv143129 set = clock_tick_total_cpus / clock_tick_ncpus; 6455788Smv143129 csp = &clock_tick_set[set]; 6465788Smv143129 csp->ct_end++; 6475788Smv143129 clock_tick_total_cpus++; 6485788Smv143129 clock_tick_nsets = 6495788Smv143129 (clock_tick_total_cpus + clock_tick_ncpus - 1) / 6505788Smv143129 clock_tick_ncpus; 6515788Smv143129 CPUSET_ADD(clock_tick_online_cpuset, cp->cpu_id); 6525788Smv143129 membar_sync(); 6535788Smv143129 break; 6545788Smv143129 6555788Smv143129 case CPU_OFF: 6565788Smv143129 if (&sync_softint != NULL) 6575788Smv143129 sync_softint(clock_tick_online_cpuset); 6585788Smv143129 CPUSET_DEL(clock_tick_online_cpuset, cp->cpu_id); 6595788Smv143129 clock_tick_total_cpus--; 6605788Smv143129 clock_tick_cpus[clock_tick_total_cpus] = NULL; 6615788Smv143129 clock_tick_nsets = 6625788Smv143129 (clock_tick_total_cpus + clock_tick_ncpus - 1) / 6635788Smv143129 clock_tick_ncpus; 6645788Smv143129 set = clock_tick_total_cpus / clock_tick_ncpus; 6655788Smv143129 csp = &clock_tick_set[set]; 6665788Smv143129 csp->ct_end--; 6675788Smv143129 6685788Smv143129 i = 0; 6695788Smv143129 ncp = cpu_active; 6705788Smv143129 do { 6715788Smv143129 if (cp == ncp) 6725788Smv143129 continue; 6735788Smv143129 clock_tick_cpus[i] = ncp; 6745788Smv143129 i++; 6755788Smv143129 } while ((ncp = ncp->cpu_next_onln) != cpu_active); 6765788Smv143129 ASSERT(i == clock_tick_total_cpus); 6775788Smv143129 membar_sync(); 6785788Smv143129 break; 6795788Smv143129 6805788Smv143129 default: 6815788Smv143129 break; 6825788Smv143129 } 6835788Smv143129 6845788Smv143129 mutex_exit(&clock_tick_lock); 6855788Smv143129 6865788Smv143129 return (0); 6875788Smv143129 } 6885788Smv143129 6895788Smv143129 6905788Smv143129 void 6915788Smv143129 clock_tick_mp_init(void) 6925788Smv143129 { 6935788Smv143129 cpu_t *cp; 6945788Smv143129 6955788Smv143129 mutex_enter(&cpu_lock); 6965788Smv143129 6975788Smv143129 cp = cpu_active; 6985788Smv143129 do { 6995788Smv143129 (void) clock_tick_cpu_setup(CPU_ON, cp->cpu_id, NULL); 7005788Smv143129 } while ((cp = cp->cpu_next_onln) != cpu_active); 7015788Smv143129 7025788Smv143129 register_cpu_setup_func(clock_tick_cpu_setup, NULL); 7035788Smv143129 7045788Smv143129 mutex_exit(&cpu_lock); 7055788Smv143129 } 706