1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * The System Duty Cycle (SDC) scheduling class
27 * --------------------------------------------
28 *
29 * Background
30 *
31 * Kernel threads in Solaris have traditionally not been large consumers
32 * of CPU time. They typically wake up, perform a small amount of
33 * work, then go back to sleep waiting for either a timeout or another
34 * signal. On the assumption that the small amount of work that they do
35 * is important for the behavior of the whole system, these threads are
36 * treated kindly by the dispatcher and the SYS scheduling class: they run
37 * without preemption from anything other than real-time and interrupt
38 * threads; when preempted, they are put at the front of the queue, so they
39 * generally do not migrate between CPUs; and they are allowed to stay
40 * running until they voluntarily give up the CPU.
41 *
42 * As Solaris has evolved, new workloads have emerged which require the
43 * kernel to perform significant amounts of CPU-intensive work. One
44 * example of such a workload is ZFS's transaction group sync processing.
45 * Each sync operation generates a large batch of I/Os, and each I/O
46 * may need to be compressed and/or checksummed before it is written to
47 * storage. The taskq threads which perform the compression and checksums
48 * will run nonstop as long as they have work to do; a large sync operation
49 * on a compression-heavy dataset can keep them busy for seconds on end.
50 * This causes human-time-scale dispatch latency bubbles for any other
51 * threads which have the misfortune to share a CPU with the taskq threads.
52 *
53 * The SDC scheduling class is a solution to this problem.
54 *
55 *
56 * Overview
57 *
58 * SDC is centered around the concept of a thread's duty cycle (DC):
59 *
60 * ONPROC time
61 * Duty Cycle = ----------------------
62 * ONPROC + Runnable time
63 *
64 * This is the ratio of the time that the thread spent running on a CPU
65 * divided by the time it spent running or trying to run. It is unaffected
66 * by any time the thread spent sleeping, stopped, etc.
67 *
68 * A thread joining the SDC class specifies a "target" DC that it wants
69 * to run at. To implement this policy, the routine sysdc_update() scans
70 * the list of active SDC threads every few ticks and uses each thread's
71 * microstate data to compute the actual duty cycle that that thread
72 * has experienced recently. If the thread is under its target DC, its
73 * priority is increased to the maximum available (sysdc_maxpri, which is
74 * 99 by default). If the thread is over its target DC, its priority is
75 * reduced to the minimum available (sysdc_minpri, 0 by default). This
76 * is a fairly primitive approach, in that it doesn't use any of the
77 * intermediate priorities, but it's not completely inappropriate. Even
78 * though threads in the SDC class might take a while to do their job, they
79 * are by some definition important if they're running inside the kernel,
80 * so it is reasonable that they should get to run at priority 99.
81 *
82 * If a thread is running when sysdc_update() calculates its actual duty
83 * cycle, and there are other threads of equal or greater priority on its
84 * CPU's dispatch queue, sysdc_update() preempts that thread. The thread
85 * acknowledges the preemption by calling sysdc_preempt(), which calls
86 * setbackdq(), which gives other threads with the same priority a chance
87 * to run. This creates a de facto time quantum for threads in the SDC
88 * scheduling class.
89 *
90 * An SDC thread which is assigned priority 0 can continue to run if
91 * nothing else needs to use the CPU that it's running on. Similarly, an
92 * SDC thread at priority 99 might not get to run as much as it wants to
93 * if there are other priority-99 or higher threads on its CPU. These
94 * situations would cause the thread to get ahead of or behind its target
95 * DC; the longer the situations lasted, the further ahead or behind the
96 * thread would get. Rather than condemning a thread to a lifetime of
97 * paying for its youthful indiscretions, SDC keeps "base" values for
98 * ONPROC and Runnable times in each thread's sysdc data, and updates these
99 * values periodically. The duty cycle is then computed using the elapsed
100 * amount of ONPROC and Runnable times since those base times.
101 *
102 * Since sysdc_update() scans SDC threads fairly frequently, it tries to
103 * keep the list of "active" threads small by pruning out threads which
104 * have been asleep for a brief time. They are not pruned immediately upon
105 * going to sleep, since some threads may bounce back and forth between
106 * sleeping and being runnable.
107 *
108 *
109 * Interfaces
110 *
111 * void sysdc_thread_enter(t, dc, flags)
112 *
113 * Moves a kernel thread from the SYS scheduling class to the
114 * SDC class. t must have an associated LWP (created by calling
115 * lwp_kernel_create()). The thread will have a target DC of dc.
116 * Flags should be either 0 or SYSDC_THREAD_BATCH. If
117 * SYSDC_THREAD_BATCH is specified, the thread is expected to be
118 * doing large amounts of processing.
119 *
120 *
121 * Complications
122 *
123 * - Run queue balancing
124 *
125 * The Solaris dispatcher is biased towards letting a thread run
126 * on the same CPU which it last ran on, if no more than 3 ticks
127 * (i.e. rechoose_interval) have passed since the thread last ran.
128 * This helps to preserve cache warmth. On the other hand, it also
129 * tries to keep the per-CPU run queues fairly balanced; if the CPU
130 * chosen for a runnable thread has a run queue which is three or
131 * more threads longer than a neighboring CPU's queue, the runnable
132 * thread is dispatched onto the neighboring CPU instead.
133 *
134 * These policies work well for some workloads, but not for many SDC
135 * threads. The taskq client of SDC, for example, has many discrete
136 * units of work to do. The work units are largely independent, so
137 * cache warmth is not an important consideration. It is important
138 * that the threads fan out quickly to different CPUs, since the
139 * amount of work these threads have to do (a few seconds worth at a
140 * time) doesn't leave much time to correct thread placement errors
141 * (i.e. two SDC threads being dispatched to the same CPU).
142 *
143 * To fix this, SDC uses the TS_RUNQMATCH flag introduced for FSS.
144 * This tells the dispatcher to keep neighboring run queues' lengths
145 * more evenly matched, which allows SDC threads to migrate more
146 * easily.
147 *
148 * - LWPs and system processes
149 *
150 * SDC can only be used for kernel threads. Since SDC uses microstate
151 * accounting data to compute each thread's actual duty cycle, all
152 * threads entering the SDC class must have associated LWPs (which
153 * store the microstate data). This means that the threads have to
154 * be associated with an SSYS process, i.e. one created by newproc().
155 * If the microstate accounting information is ever moved into the
156 * kthread_t, this restriction could be lifted.
157 *
158 * - Dealing with oversubscription
159 *
160 * Since SDC duty cycles are per-thread, it is possible that the
161 * aggregate requested duty cycle of all SDC threads in a processor
162 * set could be greater than the total CPU time available in that set.
163 * The FSS scheduling class has an analogous situation, which it deals
164 * with by reducing each thread's allotted CPU time proportionally.
165 * Since SDC doesn't need to be as precise as FSS, it uses a simpler
166 * solution to the oversubscription problem.
167 *
168 * sysdc_update() accumulates the amount of time that max-priority SDC
169 * threads have spent on-CPU in each processor set, and uses that sum
170 * to create an implied duty cycle for that processor set:
171 *
172 * accumulated CPU time
173 * pset DC = -----------------------------------
174 * (# CPUs) * time since last update
175 *
176 * If this implied duty cycle is above a maximum pset duty cycle (90%
177 * by default), sysdc_update() sets the priority of all SDC threads
178 * in that processor set to sysdc_minpri for a "break" period. After
179 * the break period, it waits for a "nobreak" period before trying to
180 * enforce the pset duty cycle limit again.
181 *
182 * - Processor sets
183 *
184 * As the above implies, SDC is processor set aware, but it does not
185 * currently allow threads to change processor sets while in the SDC
186 * class. Instead, those threads must join the desired processor set
187 * before entering SDC. [1]
188 *
189 * - Batch threads
190 *
191 * A thread joining the SDC class can specify the SDC_THREAD_BATCH
192 * flag. This flag currently has no effect, but marks threads which
193 * do bulk processing.
194 *
195 * - t_kpri_req
196 *
197 * The TS and FSS scheduling classes pay attention to t_kpri_req,
198 * which provides a simple form of priority inheritance for
199 * synchronization primitives (such as rwlocks held as READER) which
200 * cannot be traced to a unique thread. The SDC class does not honor
201 * t_kpri_req, for a few reasons:
202 *
203 * 1. t_kpri_req is notoriously inaccurate. A measure of its
204 * inaccuracy is that it needs to be cleared every time a thread
205 * returns to user mode, because it is frequently non-zero at that
206 * point. This can happen because "ownership" of synchronization
207 * primitives that use t_kpri_req can be silently handed off,
208 * leaving no opportunity to will the t_kpri_req inheritance.
209 *
210 * 2. Unlike in TS and FSS, threads in SDC *will* eventually run at
211 * kernel priority. This means that even if an SDC thread
212 * is holding a synchronization primitive and running at low
213 * priority, its priority will eventually be raised above 60,
214 * allowing it to drive on and release the resource.
215 *
216 * 3. The first consumer of SDC uses the taskq subsystem, which holds
217 * a reader lock for the duration of the task's execution. This
218 * would mean that SDC threads would never drop below kernel
219 * priority in practice, which defeats one of the purposes of SDC.
220 *
221 * - Why not FSS?
222 *
223 * It might seem that the existing FSS scheduling class could solve
224 * the problems that SDC is attempting to solve. FSS's more precise
225 * solution to the oversubscription problem would hardly cause
226 * trouble, as long as it performed well. SDC is implemented as
227 * a separate scheduling class for two main reasons: the initial
228 * consumer of SDC does not map well onto the "project" abstraction
229 * that is central to FSS, and FSS does not expect to run at kernel
230 * priorities.
231 *
232 *
233 * Tunables
234 *
235 * - sysdc_update_interval_msec: Number of milliseconds between
236 * consecutive thread priority updates.
237 *
238 * - sysdc_reset_interval_msec: Number of milliseconds between
239 * consecutive resets of a thread's base ONPROC and Runnable
240 * times.
241 *
242 * - sysdc_prune_interval_msec: Number of milliseconds of sleeping
243 * before a thread is pruned from the active list.
244 *
245 * - sysdc_max_pset_DC: Allowable percentage of a processor set's
246 * CPU time which SDC can give to its high-priority threads.
247 *
248 * - sysdc_break_msec: Number of milliseconds of "break" taken when
249 * sysdc_max_pset_DC is exceeded.
250 *
251 *
252 * Future work (in SDC and related subsystems)
253 *
254 * - Per-thread rechoose interval (0 for SDC)
255 *
256 * Allow each thread to specify its own rechoose interval. SDC
257 * threads would specify an interval of zero, which would rechoose
258 * the CPU with the lowest priority once per update.
259 *
260 * - Allow threads to change processor sets after joining the SDC class
261 *
262 * - Thread groups and per-group DC
263 *
264 * It might be nice to be able to specify a duty cycle which applies
265 * to a group of threads in aggregate.
266 *
267 * - Per-group DC callback to allow dynamic DC tuning
268 *
269 * Currently, DCs are assigned when the thread joins SDC. Some
270 * workloads could benefit from being able to tune their DC using
271 * subsystem-specific knowledge about the workload.
272 *
273 * - Finer-grained priority updates
274 *
275 * - More nuanced management of oversubscription
276 *
277 * - Moving other CPU-intensive threads into SDC
278 *
279 * - Move msacct data into kthread_t
280 *
281 * This would allow kernel threads without LWPs to join SDC.
282 *
283 *
284 * Footnotes
285 *
286 * [1] The details of doing so are left as an exercise for the reader.
287 */
288
289 #include <sys/types.h>
290 #include <sys/sysdc.h>
291 #include <sys/sysdc_impl.h>
292
293 #include <sys/class.h>
294 #include <sys/cmn_err.h>
295 #include <sys/cpuvar.h>
296 #include <sys/cpupart.h>
297 #include <sys/debug.h>
298 #include <sys/disp.h>
299 #include <sys/errno.h>
300 #include <sys/inline.h>
301 #include <sys/kmem.h>
302 #include <sys/modctl.h>
303 #include <sys/schedctl.h>
304 #include <sys/sdt.h>
305 #include <sys/sunddi.h>
306 #include <sys/sysmacros.h>
307 #include <sys/systm.h>
308 #include <sys/var.h>
309
310 /*
311 * Tunables - loaded into the internal state at module load time
312 */
313 uint_t sysdc_update_interval_msec = 20;
314 uint_t sysdc_reset_interval_msec = 400;
315 uint_t sysdc_prune_interval_msec = 100;
316 uint_t sysdc_max_pset_DC = 90;
317 uint_t sysdc_break_msec = 80;
318
319 /*
320 * Internal state - constants set up by sysdc_initparam()
321 */
322 static clock_t sysdc_update_ticks; /* ticks between updates */
323 static uint_t sysdc_prune_updates; /* updates asleep before pruning */
324 static uint_t sysdc_reset_updates; /* # of updates before reset */
325 static uint_t sysdc_break_updates; /* updates to break */
326 static uint_t sysdc_nobreak_updates; /* updates to not check */
327 static uint_t sysdc_minDC; /* minimum allowed DC */
328 static uint_t sysdc_maxDC; /* maximum allowed DC */
329 static pri_t sysdc_minpri; /* minimum allowed priority */
330 static pri_t sysdc_maxpri; /* maximum allowed priority */
331
332 /*
333 * Internal state
334 */
335 static kmutex_t sysdc_pset_lock; /* lock protecting pset data */
336 static list_t sysdc_psets; /* list of psets with SDC threads */
337 static uint_t sysdc_param_init; /* sysdc_initparam() has been called */
338 static uint_t sysdc_update_timeout_started; /* update timeout is active */
339 static hrtime_t sysdc_last_update; /* time of last sysdc_update() */
340 static sysdc_t sysdc_dummy; /* used to terminate active lists */
341
342 /*
343 * Internal state - active hash table
344 */
345 #define SYSDC_NLISTS 8
346 #define SYSDC_HASH(sdc) (((uintptr_t)(sdc) >> 6) & (SYSDC_NLISTS - 1))
347 static sysdc_list_t sysdc_active[SYSDC_NLISTS];
348 #define SYSDC_LIST(sdc) (&sysdc_active[SYSDC_HASH(sdc)])
349
350 #ifdef DEBUG
351 static struct {
352 uint64_t sysdc_update_times_asleep;
353 uint64_t sysdc_update_times_base_ran_backwards;
354 uint64_t sysdc_update_times_already_done;
355 uint64_t sysdc_update_times_cur_ran_backwards;
356 uint64_t sysdc_compute_pri_breaking;
357 uint64_t sysdc_activate_enter;
358 uint64_t sysdc_update_enter;
359 uint64_t sysdc_update_exited;
360 uint64_t sysdc_update_not_sdc;
361 uint64_t sysdc_update_idle;
362 uint64_t sysdc_update_take_break;
363 uint64_t sysdc_update_no_psets;
364 uint64_t sysdc_tick_not_sdc;
365 uint64_t sysdc_tick_quantum_expired;
366 uint64_t sysdc_thread_enter_enter;
367 } sysdc_stats;
368
369 #define SYSDC_INC_STAT(x) (sysdc_stats.x++)
370 #else
371 #define SYSDC_INC_STAT(x) ((void)0)
372 #endif
373
374 /* macros are UPPER CASE */
375 #define HOWMANY(a, b) howmany((a), (b))
376 #define MSECTOTICKS(a) HOWMANY((a) * 1000, usec_per_tick)
377
378 static void
sysdc_initparam(void)379 sysdc_initparam(void)
380 {
381 uint_t sysdc_break_ticks;
382
383 /* update / prune intervals */
384 sysdc_update_ticks = MSECTOTICKS(sysdc_update_interval_msec);
385
386 sysdc_prune_updates = HOWMANY(sysdc_prune_interval_msec,
387 sysdc_update_interval_msec);
388 sysdc_reset_updates = HOWMANY(sysdc_reset_interval_msec,
389 sysdc_update_interval_msec);
390
391 /* We must get at least a little time on CPU. */
392 sysdc_minDC = 1;
393 sysdc_maxDC = SYSDC_DC_MAX;
394 sysdc_minpri = 0;
395 sysdc_maxpri = maxclsyspri;
396
397 /* break parameters */
398 if (sysdc_max_pset_DC > SYSDC_DC_MAX) {
399 sysdc_max_pset_DC = SYSDC_DC_MAX;
400 }
401 sysdc_break_ticks = MSECTOTICKS(sysdc_break_msec);
402 sysdc_break_updates = HOWMANY(sysdc_break_ticks, sysdc_update_ticks);
403
404 /*
405 * We want:
406 *
407 * sysdc_max_pset_DC = (nobreak / (break + nobreak))
408 *
409 * ==> nobreak = sysdc_max_pset_DC * (break + nobreak)
410 *
411 * sysdc_max_pset_DC * break
412 * ==> nobreak = -------------------------
413 * 1 - sysdc_max_pset_DC
414 */
415 sysdc_nobreak_updates =
416 HOWMANY((uint64_t)sysdc_break_updates * sysdc_max_pset_DC,
417 (SYSDC_DC_MAX - sysdc_max_pset_DC));
418
419 sysdc_param_init = 1;
420 }
421
422 #undef HOWMANY
423 #undef MSECTOTICKS
424
425 #define SDC_UPDATE_INITIAL 0x1 /* for the initial update */
426 #define SDC_UPDATE_TIMEOUT 0x2 /* from sysdc_update() */
427 #define SDC_UPDATE_TICK 0x4 /* from sysdc_tick(), on expiry */
428
429 /*
430 * Updates the recorded times in the sdc, and returns the elapsed ONPROC
431 * and Runnable times since the last reset.
432 *
433 * newO is the thread's actual ONPROC time; it's used during sysdc_update()
434 * to track processor set usage.
435 */
436 static void
sysdc_update_times(sysdc_t * sdc,uint_t flags,hrtime_t * O,hrtime_t * R,hrtime_t * newO)437 sysdc_update_times(sysdc_t *sdc, uint_t flags,
438 hrtime_t *O, hrtime_t *R, hrtime_t *newO)
439 {
440 kthread_t *const t = sdc->sdc_thread;
441 const uint_t initial = (flags & SDC_UPDATE_INITIAL);
442 const uint_t update = (flags & SDC_UPDATE_TIMEOUT);
443 const clock_t now = ddi_get_lbolt();
444 uint_t do_reset;
445
446 ASSERT(THREAD_LOCK_HELD(t));
447
448 *O = *R = 0;
449
450 /* If we've been sleeping, we know we haven't had any ONPROC time. */
451 if (sdc->sdc_sleep_updates != 0 &&
452 sdc->sdc_sleep_updates != sdc->sdc_nupdates) {
453 *newO = sdc->sdc_last_base_O;
454 SYSDC_INC_STAT(sysdc_update_times_asleep);
455 return;
456 }
457
458 /*
459 * If this is our first update, or we've hit the reset point,
460 * we need to reset our base_{O,R}. Once we've updated them, we
461 * report O and R for the entire prior interval.
462 */
463 do_reset = initial;
464 if (update) {
465 ++sdc->sdc_nupdates;
466 if ((sdc->sdc_nupdates % sysdc_reset_updates) == 0)
467 do_reset = 1;
468 }
469 if (do_reset) {
470 hrtime_t baseO, baseR;
471 if (initial) {
472 /*
473 * Start off our cycle count somewhere in the middle,
474 * to keep the resets from all happening at once.
475 *
476 * 4999 is a handy prime much larger than
477 * sysdc_reset_updates, so that we don't run into
478 * trouble if the resolution is a multiple of
479 * sysdc_reset_updates.
480 */
481 sdc->sdc_nupdates = (uint_t)((gethrtime() % 4999) %
482 sysdc_reset_updates);
483 baseO = baseR = 0;
484 } else {
485 baseO = sdc->sdc_base_O;
486 baseR = sdc->sdc_base_R;
487 }
488
489 mstate_systhread_times(t, &sdc->sdc_base_O, &sdc->sdc_base_R);
490 *newO = sdc->sdc_base_O;
491
492 sdc->sdc_reset = now;
493 sdc->sdc_pri_check = -1; /* force mismatch below */
494
495 /*
496 * See below for rationale.
497 */
498 if (baseO > sdc->sdc_base_O || baseR > sdc->sdc_base_R) {
499 SYSDC_INC_STAT(sysdc_update_times_base_ran_backwards);
500 baseO = sdc->sdc_base_O;
501 baseR = sdc->sdc_base_R;
502 }
503
504 /* compute based on the entire interval */
505 *O = (sdc->sdc_base_O - baseO);
506 *R = (sdc->sdc_base_R - baseR);
507 return;
508 }
509
510 /*
511 * If we're called from sysdc_update(), we *must* return a value
512 * for newO, so we always call mstate_systhread_times().
513 *
514 * Otherwise, if we've already done a pri check this tick,
515 * we can skip it.
516 */
517 if (!update && sdc->sdc_pri_check == now) {
518 SYSDC_INC_STAT(sysdc_update_times_already_done);
519 return;
520 }
521
522 /* Get the current times from the thread */
523 sdc->sdc_pri_check = now;
524 mstate_systhread_times(t, &sdc->sdc_cur_O, &sdc->sdc_cur_R);
525 *newO = sdc->sdc_cur_O;
526
527 /*
528 * The updating of microstate accounting is not done under a
529 * consistent set of locks, particularly the t_waitrq field. This
530 * can lead to narrow windows in which we account for time in the
531 * wrong bucket, which on the next read will be accounted for
532 * correctly.
533 *
534 * If our sdc_base_* fields were affected by one of these blips, we
535 * throw away the old data, and pretend this tick didn't happen.
536 */
537 if (sdc->sdc_cur_O < sdc->sdc_base_O ||
538 sdc->sdc_cur_R < sdc->sdc_base_R) {
539
540 sdc->sdc_base_O = sdc->sdc_cur_O;
541 sdc->sdc_base_R = sdc->sdc_cur_R;
542
543 SYSDC_INC_STAT(sysdc_update_times_cur_ran_backwards);
544 return;
545 }
546
547 *O = sdc->sdc_cur_O - sdc->sdc_base_O;
548 *R = sdc->sdc_cur_R - sdc->sdc_base_R;
549 }
550
551 /*
552 * sysdc_compute_pri()
553 *
554 * Recomputes the priority of the thread, leaving the result in
555 * sdc->sdc_epri. Returns 1 if a priority update should occur
556 * (which will also trigger a cpu_surrender()), otherwise
557 * returns 0.
558 */
559 static uint_t
sysdc_compute_pri(sysdc_t * sdc,uint_t flags)560 sysdc_compute_pri(sysdc_t *sdc, uint_t flags)
561 {
562 kthread_t *const t = sdc->sdc_thread;
563 const uint_t update = (flags & SDC_UPDATE_TIMEOUT);
564 const uint_t tick = (flags & SDC_UPDATE_TICK);
565
566 hrtime_t O, R;
567 hrtime_t newO = -1;
568
569 ASSERT(THREAD_LOCK_HELD(t));
570
571 sysdc_update_times(sdc, flags, &O, &R, &newO);
572 ASSERT(!update || newO != -1);
573
574 /* If we have new data, recompute our priority. */
575 if ((O + R) != 0) {
576 sdc->sdc_cur_DC = (O * SYSDC_DC_MAX) / (O + R);
577
578 /* Adjust our priority to move our DC closer to the target. */
579 if (sdc->sdc_cur_DC < sdc->sdc_target_DC)
580 sdc->sdc_pri = sdc->sdc_maxpri;
581 else
582 sdc->sdc_pri = sdc->sdc_minpri;
583 }
584
585 /*
586 * If our per-pset duty cycle goes over the max, we will take a break.
587 * This forces all sysdc threads in the pset to minimum priority, in
588 * order to let everyone else have a chance at the CPU.
589 */
590 if (sdc->sdc_pset->sdp_need_break) {
591 SYSDC_INC_STAT(sysdc_compute_pri_breaking);
592 sdc->sdc_epri = sdc->sdc_minpri;
593 } else {
594 sdc->sdc_epri = sdc->sdc_pri;
595 }
596
597 DTRACE_PROBE4(sysdc__compute__pri,
598 kthread_t *, t, pri_t, sdc->sdc_epri, uint_t, sdc->sdc_cur_DC,
599 uint_t, sdc->sdc_target_DC);
600
601 /*
602 * For sysdc_update(), we compute the ONPROC time for high-priority
603 * threads, which is used to calculate the per-pset duty cycle. We
604 * will always tell our callers to update the thread's priority,
605 * since we want to force a cpu_surrender().
606 *
607 * We reset sdc_update_ticks so that sysdc_tick() will only update
608 * the thread's priority if our timeout is delayed by a tick or
609 * more.
610 */
611 if (update) {
612 /* SDC threads are not allowed to change cpupart bindings. */
613 ASSERT(t->t_cpupart == sdc->sdc_pset->sdp_cpupart);
614
615 /* If we were at MAXPRI, account for our onproc time. */
616 if (t->t_pri == sdc->sdc_maxpri &&
617 sdc->sdc_last_base_O != 0 &&
618 sdc->sdc_last_base_O < newO) {
619 sdc->sdc_last_O = newO - sdc->sdc_last_base_O;
620 sdc->sdc_pset->sdp_onproc_time +=
621 (uint64_t)sdc->sdc_last_O;
622 sdc->sdc_pset->sdp_onproc_threads++;
623 } else {
624 sdc->sdc_last_O = 0;
625 }
626 sdc->sdc_last_base_O = newO;
627
628 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks + 1;
629 return (1);
630 }
631
632 /*
633 * Like sysdc_update(), sysdc_tick() always wants to update the
634 * thread's priority, so that the CPU is surrendered if necessary.
635 * We reset sdc_update_ticks so that if the timeout continues to be
636 * delayed, we'll update at the regular interval.
637 */
638 if (tick) {
639 ASSERT(sdc->sdc_ticks == sdc->sdc_update_ticks);
640 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks;
641 return (1);
642 }
643
644 /*
645 * Otherwise, only tell our callers to update the priority if it has
646 * changed.
647 */
648 return (sdc->sdc_epri != t->t_pri);
649 }
650
651 static void
sysdc_update_pri(sysdc_t * sdc,uint_t flags)652 sysdc_update_pri(sysdc_t *sdc, uint_t flags)
653 {
654 kthread_t *t = sdc->sdc_thread;
655
656 ASSERT(THREAD_LOCK_HELD(t));
657
658 if (sysdc_compute_pri(sdc, flags)) {
659 if (!thread_change_pri(t, sdc->sdc_epri, 0)) {
660 cpu_surrender(t);
661 }
662 }
663 }
664
665 /*
666 * Add a thread onto the active list. It will only be removed by
667 * sysdc_update().
668 */
669 static void
sysdc_activate(sysdc_t * sdc)670 sysdc_activate(sysdc_t *sdc)
671 {
672 sysdc_t *volatile *headp = &SYSDC_LIST(sdc)->sdl_list;
673 sysdc_t *head;
674 kthread_t *t = sdc->sdc_thread;
675
676 SYSDC_INC_STAT(sysdc_activate_enter);
677
678 ASSERT(sdc->sdc_next == NULL);
679 ASSERT(THREAD_LOCK_HELD(t));
680
681 do {
682 head = *headp;
683 sdc->sdc_next = head;
684 } while (atomic_cas_ptr(headp, head, sdc) != head);
685 }
686
687 /*
688 * sysdc_update() has two jobs:
689 *
690 * 1. It updates the priorities of all active SDC threads on the system.
691 * 2. It measures pset CPU usage and enforces sysdc_max_pset_DC.
692 */
693 static void
sysdc_update(void * arg)694 sysdc_update(void *arg)
695 {
696 int idx;
697 sysdc_t *freelist = NULL;
698 sysdc_pset_t *cur;
699 hrtime_t now, diff;
700 uint_t redeploy = 1;
701
702 SYSDC_INC_STAT(sysdc_update_enter);
703
704 ASSERT(sysdc_update_timeout_started);
705
706 /*
707 * If this is our first time through, diff will be gigantic, and
708 * no breaks will be necessary.
709 */
710 now = gethrtime();
711 diff = now - sysdc_last_update;
712 sysdc_last_update = now;
713
714 mutex_enter(&sysdc_pset_lock);
715 for (cur = list_head(&sysdc_psets); cur != NULL;
716 cur = list_next(&sysdc_psets, cur)) {
717 boolean_t breaking = (cur->sdp_should_break != 0);
718
719 if (cur->sdp_need_break != breaking) {
720 DTRACE_PROBE2(sdc__pset__break, sysdc_pset_t *, cur,
721 boolean_t, breaking);
722 }
723 cur->sdp_onproc_time = 0;
724 cur->sdp_onproc_threads = 0;
725 cur->sdp_need_break = breaking;
726 }
727 mutex_exit(&sysdc_pset_lock);
728
729 for (idx = 0; idx < SYSDC_NLISTS; idx++) {
730 sysdc_list_t *sdl = &sysdc_active[idx];
731 sysdc_t *volatile *headp = &sdl->sdl_list;
732 sysdc_t *head, *tail;
733 sysdc_t **prevptr;
734
735 if (*headp == &sysdc_dummy)
736 continue;
737
738 /* Prevent any threads from exiting while we're poking them. */
739 mutex_enter(&sdl->sdl_lock);
740
741 /*
742 * Each sdl_list contains a singly-linked list of active
743 * threads. Threads which become active while we are
744 * processing the list will be added to sdl_list. Since we
745 * don't want that to interfere with our own processing, we
746 * swap in an empty list. Any newly active threads will
747 * go on to this empty list. When finished, we'll put any
748 * such threads at the end of the processed list.
749 */
750 head = atomic_swap_ptr(headp, &sysdc_dummy);
751 prevptr = &head;
752 while (*prevptr != &sysdc_dummy) {
753 sysdc_t *const sdc = *prevptr;
754 kthread_t *const t = sdc->sdc_thread;
755
756 /*
757 * If the thread has exited, move its sysdc_t onto
758 * freelist, to be freed later.
759 */
760 if (t == NULL) {
761 *prevptr = sdc->sdc_next;
762 SYSDC_INC_STAT(sysdc_update_exited);
763 sdc->sdc_next = freelist;
764 freelist = sdc;
765 continue;
766 }
767
768 thread_lock(t);
769 if (t->t_cid != sysdccid) {
770 thread_unlock(t);
771 prevptr = &sdc->sdc_next;
772 SYSDC_INC_STAT(sysdc_update_not_sdc);
773 continue;
774 }
775 ASSERT(t->t_cldata == sdc);
776
777 /*
778 * If the thread has been sleeping for longer
779 * than sysdc_prune_interval, make it inactive by
780 * removing it from the list.
781 */
782 if (!(t->t_state & (TS_RUN | TS_ONPROC)) &&
783 sdc->sdc_sleep_updates != 0 &&
784 (sdc->sdc_sleep_updates - sdc->sdc_nupdates) >
785 sysdc_prune_updates) {
786 *prevptr = sdc->sdc_next;
787 SYSDC_INC_STAT(sysdc_update_idle);
788 sdc->sdc_next = NULL;
789 thread_unlock(t);
790 continue;
791 }
792 sysdc_update_pri(sdc, SDC_UPDATE_TIMEOUT);
793 thread_unlock(t);
794
795 prevptr = &sdc->sdc_next;
796 }
797
798 /*
799 * Add our list to the bucket, putting any new entries
800 * added while we were working at the tail of the list.
801 */
802 do {
803 tail = *headp;
804 *prevptr = tail;
805 } while (atomic_cas_ptr(headp, tail, head) != tail);
806
807 mutex_exit(&sdl->sdl_lock);
808 }
809
810 mutex_enter(&sysdc_pset_lock);
811 for (cur = list_head(&sysdc_psets); cur != NULL;
812 cur = list_next(&sysdc_psets, cur)) {
813
814 cur->sdp_vtime_last_interval =
815 diff * cur->sdp_cpupart->cp_ncpus;
816 cur->sdp_DC_last_interval =
817 (cur->sdp_onproc_time * SYSDC_DC_MAX) /
818 cur->sdp_vtime_last_interval;
819
820 if (cur->sdp_should_break > 0) {
821 cur->sdp_should_break--; /* breaking */
822 continue;
823 }
824 if (cur->sdp_dont_break > 0) {
825 cur->sdp_dont_break--; /* waiting before checking */
826 continue;
827 }
828 if (cur->sdp_DC_last_interval > sysdc_max_pset_DC) {
829 cur->sdp_should_break = sysdc_break_updates;
830 cur->sdp_dont_break = sysdc_nobreak_updates;
831 SYSDC_INC_STAT(sysdc_update_take_break);
832 }
833 }
834
835 /*
836 * If there are no sysdc_psets, there can be no threads, so
837 * we can stop doing our timeout. Since we're holding the
838 * sysdc_pset_lock, no new sysdc_psets can come in, which will
839 * prevent anyone from racing with this and dropping our timeout
840 * on the floor.
841 */
842 if (list_is_empty(&sysdc_psets)) {
843 SYSDC_INC_STAT(sysdc_update_no_psets);
844 ASSERT(sysdc_update_timeout_started);
845 sysdc_update_timeout_started = 0;
846
847 redeploy = 0;
848 }
849 mutex_exit(&sysdc_pset_lock);
850
851 while (freelist != NULL) {
852 sysdc_t *cur = freelist;
853 freelist = cur->sdc_next;
854 kmem_free(cur, sizeof (*cur));
855 }
856
857 if (redeploy) {
858 (void) timeout(sysdc_update, arg, sysdc_update_ticks);
859 }
860 }
861
862 static void
sysdc_preempt(kthread_t * t)863 sysdc_preempt(kthread_t *t)
864 {
865 ASSERT(t == curthread);
866 ASSERT(THREAD_LOCK_HELD(t));
867
868 setbackdq(t); /* give others a chance to run */
869 }
870
871 static void
sysdc_tick(kthread_t * t)872 sysdc_tick(kthread_t *t)
873 {
874 sysdc_t *sdc;
875
876 thread_lock(t);
877 if (t->t_cid != sysdccid) {
878 SYSDC_INC_STAT(sysdc_tick_not_sdc);
879 thread_unlock(t);
880 return;
881 }
882 sdc = t->t_cldata;
883 if (t->t_state == TS_ONPROC &&
884 t->t_pri < t->t_disp_queue->disp_maxrunpri) {
885 cpu_surrender(t);
886 }
887
888 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) {
889 ASSERT(sdc->sdc_sleep_updates == 0);
890 }
891
892 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks);
893 sdc->sdc_ticks++;
894 if (sdc->sdc_ticks == sdc->sdc_update_ticks) {
895 SYSDC_INC_STAT(sysdc_tick_quantum_expired);
896 sysdc_update_pri(sdc, SDC_UPDATE_TICK);
897 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks);
898 }
899 thread_unlock(t);
900 }
901
902 static void
sysdc_setrun(kthread_t * t)903 sysdc_setrun(kthread_t *t)
904 {
905 sysdc_t *sdc = t->t_cldata;
906
907 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */
908
909 sdc->sdc_sleep_updates = 0;
910
911 if (sdc->sdc_next == NULL) {
912 /*
913 * Since we're in transition, we don't want to use the
914 * full thread_update_pri().
915 */
916 if (sysdc_compute_pri(sdc, 0)) {
917 THREAD_CHANGE_PRI(t, sdc->sdc_epri);
918 }
919 sysdc_activate(sdc);
920
921 ASSERT(sdc->sdc_next != NULL);
922 }
923
924 setbackdq(t);
925 }
926
927 static void
sysdc_wakeup(kthread_t * t)928 sysdc_wakeup(kthread_t *t)
929 {
930 sysdc_setrun(t);
931 }
932
933 static void
sysdc_sleep(kthread_t * t)934 sysdc_sleep(kthread_t *t)
935 {
936 sysdc_t *sdc = t->t_cldata;
937
938 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */
939
940 sdc->sdc_sleep_updates = sdc->sdc_nupdates;
941 }
942
943 /*ARGSUSED*/
944 static int
sysdc_enterclass(kthread_t * t,id_t cid,void * parmsp,cred_t * reqpcredp,void * bufp)945 sysdc_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
946 void *bufp)
947 {
948 cpupart_t *const cpupart = t->t_cpupart;
949 sysdc_t *sdc = bufp;
950 sysdc_params_t *sdpp = parmsp;
951 sysdc_pset_t *newpset = sdc->sdc_pset;
952 sysdc_pset_t *pset;
953 int start_timeout;
954
955 if (t->t_cid != syscid)
956 return (EPERM);
957
958 ASSERT(ttolwp(t) != NULL);
959 ASSERT(sdpp != NULL);
960 ASSERT(newpset != NULL);
961 ASSERT(sysdc_param_init);
962
963 ASSERT(sdpp->sdp_minpri >= sysdc_minpri);
964 ASSERT(sdpp->sdp_maxpri <= sysdc_maxpri);
965 ASSERT(sdpp->sdp_DC >= sysdc_minDC);
966 ASSERT(sdpp->sdp_DC <= sysdc_maxDC);
967
968 sdc->sdc_thread = t;
969 sdc->sdc_pri = sdpp->sdp_maxpri; /* start off maximally */
970 sdc->sdc_minpri = sdpp->sdp_minpri;
971 sdc->sdc_maxpri = sdpp->sdp_maxpri;
972 sdc->sdc_target_DC = sdpp->sdp_DC;
973 sdc->sdc_ticks = 0;
974 sdc->sdc_update_ticks = sysdc_update_ticks + 1;
975
976 /* Assign ourselves to the appropriate pset. */
977 sdc->sdc_pset = NULL;
978 mutex_enter(&sysdc_pset_lock);
979 for (pset = list_head(&sysdc_psets); pset != NULL;
980 pset = list_next(&sysdc_psets, pset)) {
981 if (pset->sdp_cpupart == cpupart) {
982 break;
983 }
984 }
985 if (pset == NULL) {
986 pset = newpset;
987 newpset = NULL;
988 pset->sdp_cpupart = cpupart;
989 list_insert_tail(&sysdc_psets, pset);
990 }
991 pset->sdp_nthreads++;
992 ASSERT(pset->sdp_nthreads > 0);
993
994 sdc->sdc_pset = pset;
995
996 start_timeout = (sysdc_update_timeout_started == 0);
997 sysdc_update_timeout_started = 1;
998 mutex_exit(&sysdc_pset_lock);
999
1000 if (newpset != NULL)
1001 kmem_free(newpset, sizeof (*newpset));
1002
1003 /* Update t's scheduling class and priority. */
1004 thread_lock(t);
1005 t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
1006 t->t_cid = cid;
1007 t->t_cldata = sdc;
1008 t->t_schedflag |= TS_RUNQMATCH;
1009
1010 sysdc_update_pri(sdc, SDC_UPDATE_INITIAL);
1011 thread_unlock(t);
1012
1013 /* Kick off the thread timeout if we're the first one in. */
1014 if (start_timeout) {
1015 (void) timeout(sysdc_update, NULL, sysdc_update_ticks);
1016 }
1017
1018 return (0);
1019 }
1020
1021 static void
sysdc_leave(sysdc_t * sdc)1022 sysdc_leave(sysdc_t *sdc)
1023 {
1024 sysdc_pset_t *sdp = sdc->sdc_pset;
1025 sysdc_list_t *sdl = SYSDC_LIST(sdc);
1026 uint_t freedc;
1027
1028 mutex_enter(&sdl->sdl_lock); /* block sysdc_update() */
1029 sdc->sdc_thread = NULL;
1030 freedc = (sdc->sdc_next == NULL);
1031 mutex_exit(&sdl->sdl_lock);
1032
1033 mutex_enter(&sysdc_pset_lock);
1034 ASSERT(sdp != NULL);
1035 ASSERT(sdp->sdp_nthreads > 0);
1036 --sdp->sdp_nthreads;
1037 if (sdp->sdp_nthreads == 0) {
1038 list_remove(&sysdc_psets, sdp);
1039 } else {
1040 sdp = NULL;
1041 }
1042 mutex_exit(&sysdc_pset_lock);
1043
1044 if (freedc)
1045 kmem_free(sdc, sizeof (*sdc));
1046 if (sdp != NULL)
1047 kmem_free(sdp, sizeof (*sdp));
1048 }
1049
1050 static void
sysdc_exitclass(void * buf)1051 sysdc_exitclass(void *buf)
1052 {
1053 sysdc_leave((sysdc_t *)buf);
1054 }
1055
1056 /*ARGSUSED*/
1057 static int
sysdc_canexit(kthread_t * t,cred_t * reqpcredp)1058 sysdc_canexit(kthread_t *t, cred_t *reqpcredp)
1059 {
1060 /* Threads cannot exit SDC once joined, except in a body bag. */
1061 return (EPERM);
1062 }
1063
1064 static void
sysdc_exit(kthread_t * t)1065 sysdc_exit(kthread_t *t)
1066 {
1067 sysdc_t *sdc;
1068
1069 /* We're exiting, so we just rejoin the SYS class. */
1070 thread_lock(t);
1071 ASSERT(t->t_cid == sysdccid);
1072 sdc = t->t_cldata;
1073 t->t_cid = syscid;
1074 t->t_cldata = NULL;
1075 t->t_clfuncs = &(sclass[syscid].cl_funcs->thread);
1076 (void) thread_change_pri(t, maxclsyspri, 0);
1077 t->t_schedflag &= ~TS_RUNQMATCH;
1078 thread_unlock_nopreempt(t);
1079
1080 /* Unlink the sdc from everything. */
1081 sysdc_leave(sdc);
1082 }
1083
1084 /*ARGSUSED*/
1085 static int
sysdc_fork(kthread_t * t,kthread_t * ct,void * bufp)1086 sysdc_fork(kthread_t *t, kthread_t *ct, void *bufp)
1087 {
1088 /*
1089 * Threads cannot be created with SDC as their class; they must
1090 * be created as SYS and then added with sysdc_thread_enter().
1091 * Because of this restriction, sysdc_fork() should never be called.
1092 */
1093 panic("sysdc cannot be forked");
1094
1095 return (ENOSYS);
1096 }
1097
1098 /*ARGSUSED*/
1099 static void
sysdc_forkret(kthread_t * t,kthread_t * ct)1100 sysdc_forkret(kthread_t *t, kthread_t *ct)
1101 {
1102 /* SDC threads are part of system processes, which never fork. */
1103 panic("sysdc cannot be forked");
1104 }
1105
1106 static pri_t
sysdc_globpri(kthread_t * t)1107 sysdc_globpri(kthread_t *t)
1108 {
1109 return (t->t_epri);
1110 }
1111
1112 /*ARGSUSED*/
1113 static pri_t
sysdc_no_swap(kthread_t * t,int flags)1114 sysdc_no_swap(kthread_t *t, int flags)
1115 {
1116 /* SDC threads cannot be swapped. */
1117 return (-1);
1118 }
1119
1120 /*
1121 * Get maximum and minimum priorities enjoyed by SDC threads.
1122 */
1123 static int
sysdc_getclpri(pcpri_t * pcprip)1124 sysdc_getclpri(pcpri_t *pcprip)
1125 {
1126 pcprip->pc_clpmax = sysdc_maxpri;
1127 pcprip->pc_clpmin = sysdc_minpri;
1128 return (0);
1129 }
1130
1131 /*ARGSUSED*/
1132 static int
sysdc_getclinfo(void * arg)1133 sysdc_getclinfo(void *arg)
1134 {
1135 return (0); /* no class-specific info */
1136 }
1137
1138 /*ARGSUSED*/
1139 static int
sysdc_alloc(void ** p,int flag)1140 sysdc_alloc(void **p, int flag)
1141 {
1142 sysdc_t *new;
1143
1144 *p = NULL;
1145 if ((new = kmem_zalloc(sizeof (*new), flag)) == NULL) {
1146 return (ENOMEM);
1147 }
1148 if ((new->sdc_pset = kmem_zalloc(sizeof (*new->sdc_pset), flag)) ==
1149 NULL) {
1150 kmem_free(new, sizeof (*new));
1151 return (ENOMEM);
1152 }
1153 *p = new;
1154 return (0);
1155 }
1156
1157 static void
sysdc_free(void * p)1158 sysdc_free(void *p)
1159 {
1160 sysdc_t *sdc = p;
1161
1162 if (sdc != NULL) {
1163 /*
1164 * We must have failed CL_ENTERCLASS(), so our pset should be
1165 * there and unused.
1166 */
1167 ASSERT(sdc->sdc_pset != NULL);
1168 ASSERT(sdc->sdc_pset->sdp_cpupart == NULL);
1169 kmem_free(sdc->sdc_pset, sizeof (*sdc->sdc_pset));
1170 kmem_free(sdc, sizeof (*sdc));
1171 }
1172 }
1173
1174 static int sysdc_enosys(); /* Boy, ANSI-C's K&R compatibility is weird. */
1175 static int sysdc_einval();
1176 static void sysdc_nullsys();
1177
1178 static struct classfuncs sysdc_classfuncs = {
1179 /* messages to class manager */
1180 {
1181 sysdc_enosys, /* admin */
1182 sysdc_getclinfo,
1183 sysdc_enosys, /* parmsin */
1184 sysdc_enosys, /* parmsout */
1185 sysdc_enosys, /* vaparmsin */
1186 sysdc_enosys, /* vaparmsout */
1187 sysdc_getclpri,
1188 sysdc_alloc,
1189 sysdc_free,
1190 },
1191 /* operations on threads */
1192 {
1193 sysdc_enterclass,
1194 sysdc_exitclass,
1195 sysdc_canexit,
1196 sysdc_fork,
1197 sysdc_forkret,
1198 sysdc_nullsys, /* parmsget */
1199 sysdc_enosys, /* parmsset */
1200 sysdc_nullsys, /* stop */
1201 sysdc_exit,
1202 sysdc_nullsys, /* active */
1203 sysdc_nullsys, /* inactive */
1204 sysdc_no_swap, /* swapin */
1205 sysdc_no_swap, /* swapout */
1206 sysdc_nullsys, /* trapret */
1207 sysdc_preempt,
1208 sysdc_setrun,
1209 sysdc_sleep,
1210 sysdc_tick,
1211 sysdc_wakeup,
1212 sysdc_einval, /* donice */
1213 sysdc_globpri,
1214 sysdc_nullsys, /* set_process_group */
1215 sysdc_nullsys, /* yield */
1216 sysdc_einval, /* doprio */
1217 }
1218 };
1219
1220 static int
sysdc_enosys()1221 sysdc_enosys()
1222 {
1223 return (ENOSYS);
1224 }
1225
1226 static int
sysdc_einval()1227 sysdc_einval()
1228 {
1229 return (EINVAL);
1230 }
1231
1232 static void
sysdc_nullsys()1233 sysdc_nullsys()
1234 {
1235 }
1236
1237 /*ARGSUSED*/
1238 static pri_t
sysdc_init(id_t cid,int clparmsz,classfuncs_t ** clfuncspp)1239 sysdc_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
1240 {
1241 int idx;
1242
1243 list_create(&sysdc_psets, sizeof (sysdc_pset_t),
1244 offsetof(sysdc_pset_t, sdp_node));
1245
1246 for (idx = 0; idx < SYSDC_NLISTS; idx++) {
1247 sysdc_active[idx].sdl_list = &sysdc_dummy;
1248 }
1249
1250 sysdc_initparam();
1251
1252 sysdccid = cid;
1253 *clfuncspp = &sysdc_classfuncs;
1254
1255 return ((pri_t)v.v_maxsyspri);
1256 }
1257
1258 static struct sclass csw = {
1259 "SDC",
1260 sysdc_init,
1261 0
1262 };
1263
1264 static struct modlsched modlsched = {
1265 &mod_schedops, "system duty cycle scheduling class", &csw
1266 };
1267
1268 static struct modlinkage modlinkage = {
1269 MODREV_1, (void *)&modlsched, NULL
1270 };
1271
1272 int
_init()1273 _init()
1274 {
1275 return (mod_install(&modlinkage));
1276 }
1277
1278 int
_fini()1279 _fini()
1280 {
1281 return (EBUSY); /* can't unload for now */
1282 }
1283
1284 int
_info(struct modinfo * modinfop)1285 _info(struct modinfo *modinfop)
1286 {
1287 return (mod_info(&modlinkage, modinfop));
1288 }
1289
1290 /* --- consolidation-private interfaces --- */
1291 void
sysdc_thread_enter(kthread_t * t,uint_t dc,uint_t flags)1292 sysdc_thread_enter(kthread_t *t, uint_t dc, uint_t flags)
1293 {
1294 void *buf = NULL;
1295 sysdc_params_t sdp;
1296
1297 SYSDC_INC_STAT(sysdc_thread_enter_enter);
1298
1299 ASSERT(sysdc_param_init);
1300 ASSERT(sysdccid >= 0);
1301
1302 ASSERT((flags & ~SYSDC_THREAD_BATCH) == 0);
1303
1304 sdp.sdp_minpri = sysdc_minpri;
1305 sdp.sdp_maxpri = sysdc_maxpri;
1306 sdp.sdp_DC = MAX(MIN(dc, sysdc_maxDC), sysdc_minDC);
1307
1308 VERIFY3U(CL_ALLOC(&buf, sysdccid, KM_SLEEP), ==, 0);
1309
1310 ASSERT(t->t_lwp != NULL);
1311 ASSERT(t->t_cid == syscid);
1312 ASSERT(t->t_cldata == NULL);
1313 VERIFY3U(CL_CANEXIT(t, NULL), ==, 0);
1314 VERIFY3U(CL_ENTERCLASS(t, sysdccid, &sdp, kcred, buf), ==, 0);
1315 CL_EXITCLASS(syscid, NULL);
1316 }
1317