1 /* $OpenBSD: hfsc.c,v 1.50 2024/10/29 23:57:54 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org> 5 * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. 6 * 7 * Permission to use, copy, modify, and distribute this software and 8 * its documentation is hereby granted (including for commercial or 9 * for-profit use), provided that both the copyright notice and this 10 * permission notice appear in all copies of the software, derivative 11 * works, or modified versions, and any portions thereof. 12 * 13 * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF 14 * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS 15 * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED 16 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 21 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26 * DAMAGE. 27 * 28 * Carnegie Mellon encourages (but does not require) users of this 29 * software to return any improvements or extensions that they make, 30 * and to grant Carnegie Mellon the rights to redistribute these 31 * changes without encumbrance. 32 */ 33 /* 34 * H-FSC is described in Proceedings of SIGCOMM'97, 35 * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, 36 * Real-Time and Priority Service" 37 * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. 38 * 39 * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. 40 * when a class has an upperlimit, the fit-time is computed from the 41 * upperlimit service curve. the link-sharing scheduler does not schedule 42 * a class whose fit-time exceeds the current time. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/malloc.h> 47 #include <sys/pool.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/systm.h> 51 #include <sys/errno.h> 52 #include <sys/queue.h> 53 #include <sys/kernel.h> 54 #include <sys/timeout.h> 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <netinet/in.h> 59 60 #include <net/pfvar.h> 61 #include <net/hfsc.h> 62 63 /* 64 * kernel internal service curve representation 65 * coordinates are given by 64 bit unsigned integers. 66 * x-axis: unit is clock count. for the intel x86 architecture, 67 * the raw Pentium TSC (Timestamp Counter) value is used. 68 * virtual time is also calculated in this time scale. 69 * y-axis: unit is byte. 70 * 71 * the service curve parameters are converted to the internal 72 * representation. 73 * the slope values are scaled to avoid overflow. 74 * the inverse slope values as well as the y-projection of the 1st 75 * segment are kept in order to avoid 64-bit divide operations 76 * that are expensive on 32-bit architectures. 77 * 78 * note: Intel Pentium TSC never wraps around in several thousands of years. 79 * x-axis doesn't wrap around for 1089 years with 1GHz clock. 80 * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth. 81 */ 82 83 /* kernel internal representation of a service curve */ 84 struct hfsc_internal_sc { 85 u_int64_t sm1; /* scaled slope of the 1st segment */ 86 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ 87 u_int64_t dx; /* the x-projection of the 1st segment */ 88 u_int64_t dy; /* the y-projection of the 1st segment */ 89 u_int64_t sm2; /* scaled slope of the 2nd segment */ 90 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ 91 }; 92 93 /* runtime service curve */ 94 struct hfsc_runtime_sc { 95 u_int64_t x; /* current starting position on x-axis */ 96 u_int64_t y; /* current starting position on x-axis */ 97 u_int64_t sm1; /* scaled slope of the 1st segment */ 98 u_int64_t ism1; /* scaled inverse-slope of the 1st segment */ 99 u_int64_t dx; /* the x-projection of the 1st segment */ 100 u_int64_t dy; /* the y-projection of the 1st segment */ 101 u_int64_t sm2; /* scaled slope of the 2nd segment */ 102 u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */ 103 }; 104 105 struct hfsc_classq { 106 struct mbuf_list q; /* Queue of packets */ 107 int qlimit; /* Queue limit */ 108 }; 109 110 /* for TAILQ based ellist and actlist implementation */ 111 struct hfsc_class; 112 TAILQ_HEAD(hfsc_eligible, hfsc_class); 113 TAILQ_HEAD(hfsc_active, hfsc_class); 114 #define hfsc_actlist_last(s) TAILQ_LAST(s, hfsc_active) 115 116 struct hfsc_class { 117 u_int cl_id; /* class id (just for debug) */ 118 u_int32_t cl_handle; /* class handle */ 119 int cl_flags; /* misc flags */ 120 121 struct hfsc_class *cl_parent; /* parent class */ 122 struct hfsc_class *cl_siblings; /* sibling classes */ 123 struct hfsc_class *cl_children; /* child classes */ 124 125 struct hfsc_classq cl_q; /* class queue structure */ 126 127 const struct pfq_ops *cl_qops; /* queue manager */ 128 void *cl_qdata; /* queue manager data */ 129 void *cl_cookie; /* queue manager cookie */ 130 131 u_int64_t cl_total; /* total work in bytes */ 132 u_int64_t cl_cumul; /* cumulative work in bytes 133 done by real-time criteria */ 134 u_int64_t cl_d; /* deadline */ 135 u_int64_t cl_e; /* eligible time */ 136 u_int64_t cl_vt; /* virtual time */ 137 u_int64_t cl_f; /* time when this class will fit for 138 link-sharing, max(myf, cfmin) */ 139 u_int64_t cl_myf; /* my fit-time (as calculated from this 140 class's own upperlimit curve) */ 141 u_int64_t cl_myfadj; /* my fit-time adjustment 142 (to cancel history dependence) */ 143 u_int64_t cl_cfmin; /* earliest children's fit-time (used 144 with cl_myf to obtain cl_f) */ 145 u_int64_t cl_cvtmin; /* minimal virtual time among the 146 children fit for link-sharing 147 (monotonic within a period) */ 148 u_int64_t cl_vtadj; /* intra-period cumulative vt 149 adjustment */ 150 u_int64_t cl_vtoff; /* inter-period cumulative vt offset */ 151 u_int64_t cl_cvtmax; /* max child's vt in the last period */ 152 153 u_int64_t cl_initvt; /* init virtual time (for debugging) */ 154 155 struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */ 156 struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */ 157 struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */ 158 struct hfsc_runtime_sc cl_deadline; /* deadline curve */ 159 struct hfsc_runtime_sc cl_eligible; /* eligible curve */ 160 struct hfsc_runtime_sc cl_virtual; /* virtual curve */ 161 struct hfsc_runtime_sc cl_ulimit; /* upperlimit curve */ 162 163 u_int cl_vtperiod; /* vt period sequence no */ 164 u_int cl_parentperiod; /* parent's vt period seqno */ 165 int cl_nactive; /* number of active children */ 166 struct hfsc_active cl_actc; /* active children list */ 167 168 TAILQ_ENTRY(hfsc_class) cl_actlist; /* active children list entry */ 169 TAILQ_ENTRY(hfsc_class) cl_ellist; /* eligible list entry */ 170 171 struct { 172 struct hfsc_pktcntr xmit_cnt; 173 struct hfsc_pktcntr drop_cnt; 174 u_int period; 175 } cl_stats; 176 }; 177 178 /* 179 * hfsc interface state 180 */ 181 struct hfsc_if { 182 struct hfsc_if *hif_next; /* interface state list */ 183 struct hfsc_class *hif_rootclass; /* root class */ 184 struct hfsc_class *hif_defaultclass; /* default class */ 185 struct hfsc_class **hif_class_tbl; 186 187 u_int64_t hif_microtime; /* time at deq_begin */ 188 189 u_int hif_allocated; /* # of slots in hif_class_tbl */ 190 u_int hif_classes; /* # of classes in the tree */ 191 u_int hif_classid; /* class id sequence number */ 192 193 struct hfsc_eligible hif_eligible; /* eligible list */ 194 struct timeout hif_defer; /* for queues that weren't ready */ 195 }; 196 197 /* 198 * function prototypes 199 */ 200 struct hfsc_class *hfsc_class_create(struct hfsc_if *, 201 struct hfsc_sc *, struct hfsc_sc *, 202 struct hfsc_sc *, struct hfsc_class *, int, 203 int, int); 204 int hfsc_class_destroy(struct hfsc_if *, 205 struct hfsc_class *); 206 struct hfsc_class *hfsc_nextclass(struct hfsc_class *); 207 208 void hfsc_cl_purge(struct hfsc_if *, struct hfsc_class *, 209 struct mbuf_list *); 210 211 void hfsc_update_sc(struct hfsc_if *, struct hfsc_class *, int); 212 void hfsc_deferred(void *); 213 void hfsc_update_cfmin(struct hfsc_class *); 214 void hfsc_set_active(struct hfsc_if *, struct hfsc_class *, int); 215 void hfsc_set_passive(struct hfsc_if *, struct hfsc_class *); 216 void hfsc_init_ed(struct hfsc_if *, struct hfsc_class *, int); 217 void hfsc_update_ed(struct hfsc_if *, struct hfsc_class *, int); 218 void hfsc_update_d(struct hfsc_class *, int); 219 void hfsc_init_vf(struct hfsc_class *, int); 220 void hfsc_update_vf(struct hfsc_class *, int, u_int64_t); 221 void hfsc_ellist_insert(struct hfsc_if *, struct hfsc_class *); 222 void hfsc_ellist_remove(struct hfsc_if *, struct hfsc_class *); 223 void hfsc_ellist_update(struct hfsc_if *, struct hfsc_class *); 224 struct hfsc_class *hfsc_ellist_get_mindl(struct hfsc_if *, u_int64_t); 225 void hfsc_actlist_insert(struct hfsc_class *); 226 void hfsc_actlist_remove(struct hfsc_class *); 227 void hfsc_actlist_update(struct hfsc_class *); 228 229 struct hfsc_class *hfsc_actlist_firstfit(struct hfsc_class *, 230 u_int64_t); 231 232 static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); 233 static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); 234 static __inline u_int64_t m2sm(u_int); 235 static __inline u_int64_t m2ism(u_int); 236 static __inline u_int64_t d2dx(u_int); 237 static __inline u_int sm2m(u_int64_t); 238 static __inline u_int dx2d(u_int64_t); 239 240 void hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *); 241 void hfsc_rtsc_init(struct hfsc_runtime_sc *, 242 struct hfsc_internal_sc *, u_int64_t, u_int64_t); 243 u_int64_t hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t); 244 u_int64_t hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t); 245 void hfsc_rtsc_min(struct hfsc_runtime_sc *, 246 struct hfsc_internal_sc *, u_int64_t, u_int64_t); 247 248 void hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *); 249 struct hfsc_class *hfsc_clh2cph(struct hfsc_if *, u_int32_t); 250 251 #define HFSC_FREQ 1000000000LL 252 #define HFSC_CLK_PER_TICK tick_nsec 253 #define HFSC_HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ 254 255 #define hfsc_uptime() nsecuptime() 256 257 struct pool hfsc_class_pl, hfsc_internal_sc_pl; 258 259 /* 260 * ifqueue glue. 261 */ 262 263 unsigned int hfsc_idx(unsigned int, const struct mbuf *); 264 struct mbuf *hfsc_enq(struct ifqueue *, struct mbuf *); 265 struct mbuf *hfsc_deq_begin(struct ifqueue *, void **); 266 void hfsc_deq_commit(struct ifqueue *, struct mbuf *, void *); 267 void hfsc_purge(struct ifqueue *, struct mbuf_list *); 268 void *hfsc_alloc(unsigned int, void *); 269 void hfsc_free(unsigned int, void *); 270 271 const struct ifq_ops hfsc_ops = { 272 hfsc_idx, 273 hfsc_enq, 274 hfsc_deq_begin, 275 hfsc_deq_commit, 276 hfsc_purge, 277 hfsc_alloc, 278 hfsc_free, 279 }; 280 281 const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops; 282 283 /* 284 * pf queue glue. 285 */ 286 287 void *hfsc_pf_alloc(struct ifnet *); 288 int hfsc_pf_addqueue(void *, struct pf_queuespec *); 289 void hfsc_pf_free(void *); 290 int hfsc_pf_qstats(struct pf_queuespec *, void *, int *); 291 unsigned int hfsc_pf_qlength(void *); 292 struct mbuf * hfsc_pf_enqueue(void *, struct mbuf *); 293 struct mbuf * hfsc_pf_deq_begin(void *, void **, struct mbuf_list *); 294 void hfsc_pf_deq_commit(void *, struct mbuf *, void *); 295 void hfsc_pf_purge(void *, struct mbuf_list *); 296 297 const struct pfq_ops hfsc_pf_ops = { 298 hfsc_pf_alloc, 299 hfsc_pf_addqueue, 300 hfsc_pf_free, 301 hfsc_pf_qstats, 302 hfsc_pf_qlength, 303 hfsc_pf_enqueue, 304 hfsc_pf_deq_begin, 305 hfsc_pf_deq_commit, 306 hfsc_pf_purge 307 }; 308 309 const struct pfq_ops * const pfq_hfsc_ops = &hfsc_pf_ops; 310 311 /* 312 * shortcuts for repeated use 313 */ 314 static inline unsigned int 315 hfsc_class_qlength(struct hfsc_class *cl) 316 { 317 /* Only leaf classes have a queue */ 318 if (cl->cl_qops != NULL) 319 return cl->cl_qops->pfq_qlength(cl->cl_qdata); 320 return 0; 321 } 322 323 static inline struct mbuf * 324 hfsc_class_enqueue(struct hfsc_class *cl, struct mbuf *m) 325 { 326 return cl->cl_qops->pfq_enqueue(cl->cl_qdata, m); 327 } 328 329 static inline struct mbuf * 330 hfsc_class_deq_begin(struct hfsc_class *cl, struct mbuf_list *ml) 331 { 332 return cl->cl_qops->pfq_deq_begin(cl->cl_qdata, &cl->cl_cookie, ml); 333 } 334 335 static inline void 336 hfsc_class_deq_commit(struct hfsc_class *cl, struct mbuf *m) 337 { 338 return cl->cl_qops->pfq_deq_commit(cl->cl_qdata, m, cl->cl_cookie); 339 } 340 341 static inline void 342 hfsc_class_purge(struct hfsc_class *cl, struct mbuf_list *ml) 343 { 344 /* Only leaf classes have a queue */ 345 if (cl->cl_qops != NULL) 346 return cl->cl_qops->pfq_purge(cl->cl_qdata, ml); 347 } 348 349 static inline u_int 350 hfsc_more_slots(u_int current) 351 { 352 u_int want = current * 2; 353 354 return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want); 355 } 356 357 static void 358 hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany) 359 { 360 struct hfsc_class **newtbl, **old; 361 size_t oldlen = sizeof(void *) * hif->hif_allocated; 362 363 newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF, 364 M_WAITOK | M_ZERO); 365 old = hif->hif_class_tbl; 366 367 memcpy(newtbl, old, oldlen); 368 hif->hif_class_tbl = newtbl; 369 hif->hif_allocated = howmany; 370 371 free(old, M_DEVBUF, oldlen); 372 } 373 374 void 375 hfsc_initialize(void) 376 { 377 pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0, 378 IPL_NONE, PR_WAITOK, "hfscclass", NULL); 379 pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0, 380 IPL_NONE, PR_WAITOK, "hfscintsc", NULL); 381 } 382 383 void * 384 hfsc_pf_alloc(struct ifnet *ifp) 385 { 386 struct hfsc_if *hif; 387 388 KASSERT(ifp != NULL); 389 390 hif = malloc(sizeof(*hif), M_DEVBUF, M_WAITOK | M_ZERO); 391 TAILQ_INIT(&hif->hif_eligible); 392 hif->hif_class_tbl = mallocarray(HFSC_DEFAULT_CLASSES, sizeof(void *), 393 M_DEVBUF, M_WAITOK | M_ZERO); 394 hif->hif_allocated = HFSC_DEFAULT_CLASSES; 395 396 timeout_set(&hif->hif_defer, hfsc_deferred, ifp); 397 398 return (hif); 399 } 400 401 int 402 hfsc_pf_addqueue(void *arg, struct pf_queuespec *q) 403 { 404 struct hfsc_if *hif = arg; 405 struct hfsc_class *cl, *parent, *np = NULL; 406 struct hfsc_sc rtsc, lssc, ulsc; 407 int error = 0; 408 409 KASSERT(hif != NULL); 410 KASSERT(q->qid != 0); 411 412 /* Root queue must have non-zero linksharing parameters */ 413 if (q->linkshare.m1.absolute == 0 && q->linkshare.m2.absolute == 0 && 414 q->parent_qid == 0) 415 return (EINVAL); 416 417 if (q->parent_qid == 0 && hif->hif_rootclass == NULL) { 418 np = hfsc_class_create(hif, NULL, NULL, NULL, NULL, 419 0, 0, HFSC_ROOT_CLASS | q->qid); 420 if (np == NULL) 421 return (EINVAL); 422 parent = np; 423 } else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL) 424 return (EINVAL); 425 426 if (hfsc_clh2cph(hif, q->qid) != NULL) { 427 hfsc_class_destroy(hif, np); 428 return (EBUSY); 429 } 430 431 rtsc.m1 = q->realtime.m1.absolute; 432 rtsc.d = q->realtime.d; 433 rtsc.m2 = q->realtime.m2.absolute; 434 lssc.m1 = q->linkshare.m1.absolute; 435 lssc.d = q->linkshare.d; 436 lssc.m2 = q->linkshare.m2.absolute; 437 ulsc.m1 = q->upperlimit.m1.absolute; 438 ulsc.d = q->upperlimit.d; 439 ulsc.m2 = q->upperlimit.m2.absolute; 440 441 if ((cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, 442 parent, q->qlimit, q->flags, q->qid)) == NULL) { 443 hfsc_class_destroy(hif, np); 444 return (ENOMEM); 445 } 446 447 /* Attach a queue manager if specified */ 448 cl->cl_qops = pf_queue_manager(q); 449 /* Realtime class cannot be used with an external queue manager */ 450 if (cl->cl_qops == NULL || cl->cl_rsc != NULL) { 451 cl->cl_qops = pfq_hfsc_ops; 452 cl->cl_qdata = &cl->cl_q; 453 } else { 454 cl->cl_qdata = cl->cl_qops->pfq_alloc(q->kif->pfik_ifp); 455 if (cl->cl_qdata == NULL) { 456 cl->cl_qops = NULL; 457 hfsc_class_destroy(hif, cl); 458 hfsc_class_destroy(hif, np); 459 return (ENOMEM); 460 } 461 error = cl->cl_qops->pfq_addqueue(cl->cl_qdata, q); 462 if (error) { 463 cl->cl_qops->pfq_free(cl->cl_qdata); 464 cl->cl_qops = NULL; 465 hfsc_class_destroy(hif, cl); 466 hfsc_class_destroy(hif, np); 467 return (error); 468 } 469 } 470 471 KASSERT(cl->cl_qops != NULL); 472 KASSERT(cl->cl_qdata != NULL); 473 474 return (0); 475 } 476 477 int 478 hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes) 479 { 480 struct ifnet *ifp = q->kif->pfik_ifp; 481 struct hfsc_if *hif; 482 struct hfsc_class *cl; 483 struct hfsc_class_stats stats; 484 int error = 0; 485 486 if (ifp == NULL) 487 return (EBADF); 488 489 if (*nbytes < sizeof(stats)) 490 return (EINVAL); 491 492 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops); 493 if (hif == NULL) 494 return (EBADF); 495 496 if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL) { 497 ifq_q_leave(&ifp->if_snd, hif); 498 return (EINVAL); 499 } 500 501 hfsc_getclstats(&stats, cl); 502 ifq_q_leave(&ifp->if_snd, hif); 503 504 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 505 return (error); 506 507 *nbytes = sizeof(stats); 508 return (0); 509 } 510 511 void 512 hfsc_pf_free(void *arg) 513 { 514 hfsc_free(0, arg); 515 } 516 517 unsigned int 518 hfsc_pf_qlength(void *arg) 519 { 520 struct hfsc_classq *cq = arg; 521 522 return ml_len(&cq->q); 523 } 524 525 struct mbuf * 526 hfsc_pf_enqueue(void *arg, struct mbuf *m) 527 { 528 struct hfsc_classq *cq = arg; 529 530 if (ml_len(&cq->q) >= cq->qlimit) 531 return (m); 532 533 ml_enqueue(&cq->q, m); 534 return (NULL); 535 } 536 537 struct mbuf * 538 hfsc_pf_deq_begin(void *arg, void **cookiep, struct mbuf_list *free_ml) 539 { 540 struct hfsc_classq *cq = arg; 541 542 return MBUF_LIST_FIRST(&cq->q); 543 } 544 545 void 546 hfsc_pf_deq_commit(void *arg, struct mbuf *m, void *cookie) 547 { 548 struct hfsc_classq *cq = arg; 549 550 ml_dequeue(&cq->q); 551 } 552 553 void 554 hfsc_pf_purge(void *arg, struct mbuf_list *ml) 555 { 556 struct hfsc_classq *cq = arg; 557 558 ml_enlist(ml, &cq->q); 559 } 560 561 unsigned int 562 hfsc_idx(unsigned int nqueues, const struct mbuf *m) 563 { 564 /* 565 * hfsc can only function on a single ifq and the stack understands 566 * this. when the first ifq on an interface is switched to hfsc, 567 * this gets used to map all mbufs to the first and only ifq that 568 * is set up for hfsc. 569 */ 570 return (0); 571 } 572 573 void * 574 hfsc_alloc(unsigned int idx, void *q) 575 { 576 struct hfsc_if *hif = q; 577 578 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */ 579 KASSERT(hif != NULL); 580 return (hif); 581 } 582 583 void 584 hfsc_free(unsigned int idx, void *q) 585 { 586 struct hfsc_if *hif = q; 587 struct hfsc_class *cl; 588 int i, restart; 589 590 KERNEL_ASSERT_LOCKED(); 591 KASSERT(idx == 0); /* when hfsc is enabled we only use the first ifq */ 592 593 timeout_del(&hif->hif_defer); 594 595 do { 596 restart = 0; 597 for (i = 0; i < hif->hif_allocated; i++) { 598 cl = hif->hif_class_tbl[i]; 599 if (hfsc_class_destroy(hif, cl) == EBUSY) 600 restart++; 601 } 602 } while (restart > 0); 603 604 free(hif->hif_class_tbl, M_DEVBUF, hif->hif_allocated * sizeof(void *)); 605 free(hif, M_DEVBUF, sizeof(*hif)); 606 } 607 608 void 609 hfsc_purge(struct ifqueue *ifq, struct mbuf_list *ml) 610 { 611 struct hfsc_if *hif = ifq->ifq_q; 612 struct hfsc_class *cl; 613 614 for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) 615 hfsc_cl_purge(hif, cl, ml); 616 } 617 618 struct hfsc_class * 619 hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc, 620 struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent, 621 int qlimit, int flags, int qid) 622 { 623 struct hfsc_class *cl, *p; 624 int i, s; 625 626 if (qlimit == 0) 627 qlimit = HFSC_DEFAULT_QLIMIT; 628 629 if (hif->hif_classes >= hif->hif_allocated) { 630 u_int newslots = hfsc_more_slots(hif->hif_allocated); 631 632 if (newslots == hif->hif_allocated) 633 return (NULL); 634 hfsc_grow_class_tbl(hif, newslots); 635 } 636 637 cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO); 638 TAILQ_INIT(&cl->cl_actc); 639 640 ml_init(&cl->cl_q.q); 641 cl->cl_q.qlimit = qlimit; 642 cl->cl_flags = flags; 643 644 if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) { 645 cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 646 hfsc_sc2isc(rsc, cl->cl_rsc); 647 hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0); 648 hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0); 649 } 650 if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) { 651 cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 652 hfsc_sc2isc(fsc, cl->cl_fsc); 653 hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0); 654 } 655 if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) { 656 cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK); 657 hfsc_sc2isc(usc, cl->cl_usc); 658 hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0); 659 } 660 661 cl->cl_id = hif->hif_classid++; 662 cl->cl_handle = qid; 663 cl->cl_parent = parent; 664 665 s = splnet(); 666 hif->hif_classes++; 667 668 /* 669 * find a free slot in the class table. if the slot matching 670 * the lower bits of qid is free, use this slot. otherwise, 671 * use the first free slot. 672 */ 673 i = qid % hif->hif_allocated; 674 if (hif->hif_class_tbl[i] == NULL) 675 hif->hif_class_tbl[i] = cl; 676 else { 677 for (i = 0; i < hif->hif_allocated; i++) 678 if (hif->hif_class_tbl[i] == NULL) { 679 hif->hif_class_tbl[i] = cl; 680 break; 681 } 682 if (i == hif->hif_allocated) { 683 splx(s); 684 goto err_ret; 685 } 686 } 687 688 if (flags & HFSC_DEFAULTCLASS) 689 hif->hif_defaultclass = cl; 690 691 if (parent == NULL) 692 hif->hif_rootclass = cl; 693 else { 694 /* add this class to the children list of the parent */ 695 if ((p = parent->cl_children) == NULL) 696 parent->cl_children = cl; 697 else { 698 while (p->cl_siblings != NULL) 699 p = p->cl_siblings; 700 p->cl_siblings = cl; 701 } 702 } 703 splx(s); 704 705 return (cl); 706 707 err_ret: 708 if (cl->cl_fsc != NULL) 709 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc); 710 if (cl->cl_rsc != NULL) 711 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc); 712 if (cl->cl_usc != NULL) 713 pool_put(&hfsc_internal_sc_pl, cl->cl_usc); 714 pool_put(&hfsc_class_pl, cl); 715 return (NULL); 716 } 717 718 int 719 hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl) 720 { 721 int i, s; 722 723 if (cl == NULL) 724 return (0); 725 726 if (cl->cl_children != NULL) 727 return (EBUSY); 728 729 s = splnet(); 730 KASSERT(hfsc_class_qlength(cl) == 0); 731 732 if (cl->cl_parent != NULL) { 733 struct hfsc_class *p = cl->cl_parent->cl_children; 734 735 if (p == cl) 736 cl->cl_parent->cl_children = cl->cl_siblings; 737 else do { 738 if (p->cl_siblings == cl) { 739 p->cl_siblings = cl->cl_siblings; 740 break; 741 } 742 } while ((p = p->cl_siblings) != NULL); 743 } 744 745 for (i = 0; i < hif->hif_allocated; i++) 746 if (hif->hif_class_tbl[i] == cl) { 747 hif->hif_class_tbl[i] = NULL; 748 break; 749 } 750 751 hif->hif_classes--; 752 splx(s); 753 754 KASSERT(TAILQ_EMPTY(&cl->cl_actc)); 755 756 if (cl == hif->hif_rootclass) 757 hif->hif_rootclass = NULL; 758 if (cl == hif->hif_defaultclass) 759 hif->hif_defaultclass = NULL; 760 761 /* Free external queue manager resources */ 762 if (cl->cl_qops && cl->cl_qops != pfq_hfsc_ops) 763 cl->cl_qops->pfq_free(cl->cl_qdata); 764 765 if (cl->cl_usc != NULL) 766 pool_put(&hfsc_internal_sc_pl, cl->cl_usc); 767 if (cl->cl_fsc != NULL) 768 pool_put(&hfsc_internal_sc_pl, cl->cl_fsc); 769 if (cl->cl_rsc != NULL) 770 pool_put(&hfsc_internal_sc_pl, cl->cl_rsc); 771 pool_put(&hfsc_class_pl, cl); 772 773 return (0); 774 } 775 776 /* 777 * hfsc_nextclass returns the next class in the tree. 778 * usage: 779 * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl)) 780 * do_something; 781 */ 782 struct hfsc_class * 783 hfsc_nextclass(struct hfsc_class *cl) 784 { 785 if (cl->cl_children != NULL) 786 cl = cl->cl_children; 787 else if (cl->cl_siblings != NULL) 788 cl = cl->cl_siblings; 789 else { 790 while ((cl = cl->cl_parent) != NULL) 791 if (cl->cl_siblings) { 792 cl = cl->cl_siblings; 793 break; 794 } 795 } 796 797 return (cl); 798 } 799 800 struct mbuf * 801 hfsc_enq(struct ifqueue *ifq, struct mbuf *m) 802 { 803 struct hfsc_if *hif = ifq->ifq_q; 804 struct hfsc_class *cl; 805 struct mbuf *dm; 806 807 if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL || 808 cl->cl_children != NULL) { 809 cl = hif->hif_defaultclass; 810 if (cl == NULL) 811 return (m); 812 } 813 814 dm = hfsc_class_enqueue(cl, m); 815 816 /* successfully queued. */ 817 if (dm != m && hfsc_class_qlength(cl) == 1) { 818 hfsc_set_active(hif, cl, m->m_pkthdr.len); 819 if (!timeout_pending(&hif->hif_defer)) 820 timeout_add(&hif->hif_defer, 1); 821 } 822 823 /* drop occurred. */ 824 if (dm != NULL) 825 PKTCNTR_INC(&cl->cl_stats.drop_cnt, dm->m_pkthdr.len); 826 827 return (dm); 828 } 829 830 struct mbuf * 831 hfsc_deq_begin(struct ifqueue *ifq, void **cookiep) 832 { 833 struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); 834 struct hfsc_if *hif = ifq->ifq_q; 835 struct hfsc_class *cl, *tcl; 836 struct mbuf *m; 837 u_int64_t cur_time; 838 839 cur_time = hfsc_uptime(); 840 841 /* 842 * if there are eligible classes, use real-time criteria. 843 * find the class with the minimum deadline among 844 * the eligible classes. 845 */ 846 cl = hfsc_ellist_get_mindl(hif, cur_time); 847 if (cl == NULL) { 848 /* 849 * use link-sharing criteria 850 * get the class with the minimum vt in the hierarchy 851 */ 852 cl = NULL; 853 tcl = hif->hif_rootclass; 854 855 while (tcl != NULL && tcl->cl_children != NULL) { 856 tcl = hfsc_actlist_firstfit(tcl, cur_time); 857 if (tcl == NULL) 858 continue; 859 860 /* 861 * update parent's cl_cvtmin. 862 * don't update if the new vt is smaller. 863 */ 864 if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt) 865 tcl->cl_parent->cl_cvtmin = tcl->cl_vt; 866 867 cl = tcl; 868 } 869 /* XXX HRTIMER plan hfsc_deferred precisely here. */ 870 if (cl == NULL) 871 return (NULL); 872 } 873 874 m = hfsc_class_deq_begin(cl, &free_ml); 875 ifq_mfreeml(ifq, &free_ml); 876 if (m == NULL) { 877 hfsc_update_sc(hif, cl, 0); 878 return (NULL); 879 } 880 881 hif->hif_microtime = cur_time; 882 *cookiep = cl; 883 return (m); 884 } 885 886 void 887 hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) 888 { 889 struct hfsc_if *hif = ifq->ifq_q; 890 struct hfsc_class *cl = cookie; 891 892 hfsc_class_deq_commit(cl, m); 893 hfsc_update_sc(hif, cl, m->m_pkthdr.len); 894 895 PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len); 896 } 897 898 void 899 hfsc_update_sc(struct hfsc_if *hif, struct hfsc_class *cl, int len) 900 { 901 int next_len, realtime = 0; 902 u_int64_t cur_time = hif->hif_microtime; 903 904 /* check if the class was scheduled by real-time criteria */ 905 if (cl->cl_rsc != NULL) 906 realtime = (cl->cl_e <= cur_time); 907 908 hfsc_update_vf(cl, len, cur_time); 909 if (realtime) 910 cl->cl_cumul += len; 911 912 if (hfsc_class_qlength(cl) > 0) { 913 /* 914 * Realtime queue needs to look into the future and make 915 * calculations based on that. This is the reason it can't 916 * be used with an external queue manager. 917 */ 918 if (cl->cl_rsc != NULL) { 919 struct mbuf *m0; 920 921 /* update ed */ 922 KASSERT(cl->cl_qops == pfq_hfsc_ops); 923 m0 = MBUF_LIST_FIRST(&cl->cl_q.q); 924 next_len = m0->m_pkthdr.len; 925 926 if (realtime) 927 hfsc_update_ed(hif, cl, next_len); 928 else 929 hfsc_update_d(cl, next_len); 930 } 931 } else { 932 /* the class becomes passive */ 933 hfsc_set_passive(hif, cl); 934 } 935 } 936 937 void 938 hfsc_deferred(void *arg) 939 { 940 struct ifnet *ifp = arg; 941 struct ifqueue *ifq = &ifp->if_snd; 942 struct hfsc_if *hif; 943 944 if (!HFSC_ENABLED(ifq)) 945 return; 946 947 if (!ifq_empty(ifq)) 948 ifq_start(ifq); 949 950 hif = ifq_q_enter(&ifp->if_snd, ifq_hfsc_ops); 951 if (hif == NULL) 952 return; 953 /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */ 954 timeout_add(&hif->hif_defer, 1); 955 ifq_q_leave(&ifp->if_snd, hif); 956 } 957 958 void 959 hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml) 960 { 961 struct mbuf_list ml2 = MBUF_LIST_INITIALIZER(); 962 963 hfsc_class_purge(cl, &ml2); 964 if (ml_empty(&ml2)) 965 return; 966 967 ml_enlist(ml, &ml2); 968 969 hfsc_update_vf(cl, 0, 0); /* remove cl from the actlist */ 970 hfsc_set_passive(hif, cl); 971 } 972 973 void 974 hfsc_set_active(struct hfsc_if *hif, struct hfsc_class *cl, int len) 975 { 976 if (cl->cl_rsc != NULL) 977 hfsc_init_ed(hif, cl, len); 978 if (cl->cl_fsc != NULL) 979 hfsc_init_vf(cl, len); 980 981 cl->cl_stats.period++; 982 } 983 984 void 985 hfsc_set_passive(struct hfsc_if *hif, struct hfsc_class *cl) 986 { 987 if (cl->cl_rsc != NULL) 988 hfsc_ellist_remove(hif, cl); 989 990 /* 991 * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0, 992 * 0) needs to be called explicitly to remove a class from actlist 993 */ 994 } 995 996 void 997 hfsc_init_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len) 998 { 999 u_int64_t cur_time; 1000 1001 cur_time = hfsc_uptime(); 1002 1003 /* update the deadline curve */ 1004 hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul); 1005 1006 /* 1007 * update the eligible curve. 1008 * for concave, it is equal to the deadline curve. 1009 * for convex, it is a linear curve with slope m2. 1010 */ 1011 cl->cl_eligible = cl->cl_deadline; 1012 if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) { 1013 cl->cl_eligible.dx = 0; 1014 cl->cl_eligible.dy = 0; 1015 } 1016 1017 /* compute e and d */ 1018 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 1019 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1020 1021 hfsc_ellist_insert(hif, cl); 1022 } 1023 1024 void 1025 hfsc_update_ed(struct hfsc_if *hif, struct hfsc_class *cl, int next_len) 1026 { 1027 cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); 1028 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1029 1030 hfsc_ellist_update(hif, cl); 1031 } 1032 1033 void 1034 hfsc_update_d(struct hfsc_class *cl, int next_len) 1035 { 1036 cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); 1037 } 1038 1039 void 1040 hfsc_init_vf(struct hfsc_class *cl, int len) 1041 { 1042 struct hfsc_class *max_cl, *p; 1043 u_int64_t vt, f, cur_time; 1044 int go_active; 1045 1046 cur_time = 0; 1047 go_active = 1; 1048 for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) { 1049 if (go_active && cl->cl_nactive++ == 0) 1050 go_active = 1; 1051 else 1052 go_active = 0; 1053 1054 if (go_active) { 1055 max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, 1056 hfsc_active); 1057 if (max_cl != NULL) { 1058 /* 1059 * set vt to the average of the min and max 1060 * classes. if the parent's period didn't 1061 * change, don't decrease vt of the class. 1062 */ 1063 vt = max_cl->cl_vt; 1064 if (cl->cl_parent->cl_cvtmin != 0) 1065 vt = (cl->cl_parent->cl_cvtmin + vt)/2; 1066 1067 if (cl->cl_parent->cl_vtperiod != 1068 cl->cl_parentperiod || vt > cl->cl_vt) 1069 cl->cl_vt = vt; 1070 } else { 1071 /* 1072 * first child for a new parent backlog period. 1073 * add parent's cvtmax to vtoff of children 1074 * to make a new vt (vtoff + vt) larger than 1075 * the vt in the last period for all children. 1076 */ 1077 vt = cl->cl_parent->cl_cvtmax; 1078 for (p = cl->cl_parent->cl_children; p != NULL; 1079 p = p->cl_siblings) 1080 p->cl_vtoff += vt; 1081 cl->cl_vt = 0; 1082 cl->cl_parent->cl_cvtmax = 0; 1083 cl->cl_parent->cl_cvtmin = 0; 1084 } 1085 cl->cl_initvt = cl->cl_vt; 1086 1087 /* update the virtual curve */ 1088 vt = cl->cl_vt + cl->cl_vtoff; 1089 hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, 1090 cl->cl_total); 1091 if (cl->cl_virtual.x == vt) { 1092 cl->cl_virtual.x -= cl->cl_vtoff; 1093 cl->cl_vtoff = 0; 1094 } 1095 cl->cl_vtadj = 0; 1096 1097 cl->cl_vtperiod++; /* increment vt period */ 1098 cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; 1099 if (cl->cl_parent->cl_nactive == 0) 1100 cl->cl_parentperiod++; 1101 cl->cl_f = 0; 1102 1103 hfsc_actlist_insert(cl); 1104 1105 if (cl->cl_usc != NULL) { 1106 /* class has upper limit curve */ 1107 if (cur_time == 0) 1108 cur_time = hfsc_uptime(); 1109 1110 /* update the ulimit curve */ 1111 hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time, 1112 cl->cl_total); 1113 /* compute myf */ 1114 cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit, 1115 cl->cl_total); 1116 cl->cl_myfadj = 0; 1117 } 1118 } 1119 1120 if (cl->cl_myf > cl->cl_cfmin) 1121 f = cl->cl_myf; 1122 else 1123 f = cl->cl_cfmin; 1124 if (f != cl->cl_f) { 1125 cl->cl_f = f; 1126 hfsc_update_cfmin(cl->cl_parent); 1127 } 1128 } 1129 } 1130 1131 void 1132 hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time) 1133 { 1134 u_int64_t f, myf_bound, delta; 1135 int go_passive = 0; 1136 1137 if (hfsc_class_qlength(cl) == 0) 1138 go_passive = 1; 1139 1140 for (; cl->cl_parent != NULL; cl = cl->cl_parent) { 1141 cl->cl_total += len; 1142 1143 if (cl->cl_fsc == NULL || cl->cl_nactive == 0) 1144 continue; 1145 1146 if (go_passive && --cl->cl_nactive == 0) 1147 go_passive = 1; 1148 else 1149 go_passive = 0; 1150 1151 if (go_passive) { 1152 /* no more active child, going passive */ 1153 1154 /* update cvtmax of the parent class */ 1155 if (cl->cl_vt > cl->cl_parent->cl_cvtmax) 1156 cl->cl_parent->cl_cvtmax = cl->cl_vt; 1157 1158 /* remove this class from the vt list */ 1159 hfsc_actlist_remove(cl); 1160 1161 hfsc_update_cfmin(cl->cl_parent); 1162 1163 continue; 1164 } 1165 1166 /* 1167 * update vt and f 1168 */ 1169 cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total) 1170 - cl->cl_vtoff + cl->cl_vtadj; 1171 1172 /* 1173 * if vt of the class is smaller than cvtmin, 1174 * the class was skipped in the past due to non-fit. 1175 * if so, we need to adjust vtadj. 1176 */ 1177 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { 1178 cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; 1179 cl->cl_vt = cl->cl_parent->cl_cvtmin; 1180 } 1181 1182 /* update the vt list */ 1183 hfsc_actlist_update(cl); 1184 1185 if (cl->cl_usc != NULL) { 1186 cl->cl_myf = cl->cl_myfadj + 1187 hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total); 1188 1189 /* 1190 * if myf lags behind by more than one clock tick 1191 * from the current time, adjust myfadj to prevent 1192 * a rate-limited class from going greedy. 1193 * in a steady state under rate-limiting, myf 1194 * fluctuates within one clock tick. 1195 */ 1196 myf_bound = cur_time - HFSC_CLK_PER_TICK; 1197 if (cl->cl_myf < myf_bound) { 1198 delta = cur_time - cl->cl_myf; 1199 cl->cl_myfadj += delta; 1200 cl->cl_myf += delta; 1201 } 1202 } 1203 1204 /* cl_f is max(cl_myf, cl_cfmin) */ 1205 if (cl->cl_myf > cl->cl_cfmin) 1206 f = cl->cl_myf; 1207 else 1208 f = cl->cl_cfmin; 1209 if (f != cl->cl_f) { 1210 cl->cl_f = f; 1211 hfsc_update_cfmin(cl->cl_parent); 1212 } 1213 } 1214 } 1215 1216 void 1217 hfsc_update_cfmin(struct hfsc_class *cl) 1218 { 1219 struct hfsc_class *p; 1220 u_int64_t cfmin; 1221 1222 if (TAILQ_EMPTY(&cl->cl_actc)) { 1223 cl->cl_cfmin = 0; 1224 return; 1225 } 1226 cfmin = HFSC_HT_INFINITY; 1227 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) { 1228 if (p->cl_f == 0) { 1229 cl->cl_cfmin = 0; 1230 return; 1231 } 1232 if (p->cl_f < cfmin) 1233 cfmin = p->cl_f; 1234 } 1235 cl->cl_cfmin = cfmin; 1236 } 1237 1238 /* 1239 * eligible list holds backlogged classes being sorted by their eligible times. 1240 * there is one eligible list per interface. 1241 */ 1242 void 1243 hfsc_ellist_insert(struct hfsc_if *hif, struct hfsc_class *cl) 1244 { 1245 struct hfsc_class *p; 1246 1247 /* check the last entry first */ 1248 if ((p = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible)) == NULL || 1249 p->cl_e <= cl->cl_e) { 1250 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); 1251 return; 1252 } 1253 1254 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { 1255 if (cl->cl_e < p->cl_e) { 1256 TAILQ_INSERT_BEFORE(p, cl, cl_ellist); 1257 return; 1258 } 1259 } 1260 } 1261 1262 void 1263 hfsc_ellist_remove(struct hfsc_if *hif, struct hfsc_class *cl) 1264 { 1265 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1266 } 1267 1268 void 1269 hfsc_ellist_update(struct hfsc_if *hif, struct hfsc_class *cl) 1270 { 1271 struct hfsc_class *p, *last; 1272 1273 /* 1274 * the eligible time of a class increases monotonically. 1275 * if the next entry has a larger eligible time, nothing to do. 1276 */ 1277 p = TAILQ_NEXT(cl, cl_ellist); 1278 if (p == NULL || cl->cl_e <= p->cl_e) 1279 return; 1280 1281 /* check the last entry */ 1282 last = TAILQ_LAST(&hif->hif_eligible, hfsc_eligible); 1283 if (last->cl_e <= cl->cl_e) { 1284 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1285 TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist); 1286 return; 1287 } 1288 1289 /* 1290 * the new position must be between the next entry 1291 * and the last entry 1292 */ 1293 while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) { 1294 if (cl->cl_e < p->cl_e) { 1295 TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist); 1296 TAILQ_INSERT_BEFORE(p, cl, cl_ellist); 1297 return; 1298 } 1299 } 1300 } 1301 1302 /* find the class with the minimum deadline among the eligible classes */ 1303 struct hfsc_class * 1304 hfsc_ellist_get_mindl(struct hfsc_if *hif, u_int64_t cur_time) 1305 { 1306 struct hfsc_class *p, *cl = NULL; 1307 1308 TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) { 1309 if (p->cl_e > cur_time) 1310 break; 1311 if (cl == NULL || p->cl_d < cl->cl_d) 1312 cl = p; 1313 } 1314 return (cl); 1315 } 1316 1317 /* 1318 * active children list holds backlogged child classes being sorted 1319 * by their virtual time. 1320 * each intermediate class has one active children list. 1321 */ 1322 void 1323 hfsc_actlist_insert(struct hfsc_class *cl) 1324 { 1325 struct hfsc_class *p; 1326 1327 /* check the last entry first */ 1328 if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active)) == NULL 1329 || p->cl_vt <= cl->cl_vt) { 1330 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); 1331 return; 1332 } 1333 1334 TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) { 1335 if (cl->cl_vt < p->cl_vt) { 1336 TAILQ_INSERT_BEFORE(p, cl, cl_actlist); 1337 return; 1338 } 1339 } 1340 } 1341 1342 void 1343 hfsc_actlist_remove(struct hfsc_class *cl) 1344 { 1345 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1346 } 1347 1348 void 1349 hfsc_actlist_update(struct hfsc_class *cl) 1350 { 1351 struct hfsc_class *p, *last; 1352 1353 /* 1354 * the virtual time of a class increases monotonically during its 1355 * backlogged period. 1356 * if the next entry has a larger virtual time, nothing to do. 1357 */ 1358 p = TAILQ_NEXT(cl, cl_actlist); 1359 if (p == NULL || cl->cl_vt < p->cl_vt) 1360 return; 1361 1362 /* check the last entry */ 1363 last = TAILQ_LAST(&cl->cl_parent->cl_actc, hfsc_active); 1364 if (last->cl_vt <= cl->cl_vt) { 1365 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1366 TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist); 1367 return; 1368 } 1369 1370 /* 1371 * the new position must be between the next entry 1372 * and the last entry 1373 */ 1374 while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) { 1375 if (cl->cl_vt < p->cl_vt) { 1376 TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist); 1377 TAILQ_INSERT_BEFORE(p, cl, cl_actlist); 1378 return; 1379 } 1380 } 1381 } 1382 1383 struct hfsc_class * 1384 hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) 1385 { 1386 struct hfsc_class *p; 1387 1388 TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) 1389 if (p->cl_f <= cur_time) 1390 return (p); 1391 1392 return (NULL); 1393 } 1394 1395 /* 1396 * service curve support functions 1397 * 1398 * external service curve parameters 1399 * m: bits/sec 1400 * d: msec 1401 * internal service curve parameters 1402 * sm: (bytes/tsc_interval) << SM_SHIFT 1403 * ism: (tsc_count/byte) << ISM_SHIFT 1404 * dx: tsc_count 1405 * 1406 * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. 1407 * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU 1408 * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective 1409 * digits in decimal using the following table. 1410 * 1411 * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps 1412 * ----------+------------------------------------------------------- 1413 * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 1414 * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 1415 * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 1416 * 1417 * nsec/byte 80000 8000 800 80 8 1418 * ism(500MHz) 40000 4000 400 40 4 1419 * ism(200MHz) 16000 1600 160 16 1.6 1420 */ 1421 #define SM_SHIFT 24 1422 #define ISM_SHIFT 10 1423 1424 #define SM_MASK ((1LL << SM_SHIFT) - 1) 1425 #define ISM_MASK ((1LL << ISM_SHIFT) - 1) 1426 1427 static __inline u_int64_t 1428 seg_x2y(u_int64_t x, u_int64_t sm) 1429 { 1430 u_int64_t y; 1431 1432 /* 1433 * compute 1434 * y = x * sm >> SM_SHIFT 1435 * but divide it for the upper and lower bits to avoid overflow 1436 */ 1437 y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); 1438 return (y); 1439 } 1440 1441 static __inline u_int64_t 1442 seg_y2x(u_int64_t y, u_int64_t ism) 1443 { 1444 u_int64_t x; 1445 1446 if (y == 0) 1447 x = 0; 1448 else if (ism == HFSC_HT_INFINITY) 1449 x = HFSC_HT_INFINITY; 1450 else { 1451 x = (y >> ISM_SHIFT) * ism 1452 + (((y & ISM_MASK) * ism) >> ISM_SHIFT); 1453 } 1454 return (x); 1455 } 1456 1457 static __inline u_int64_t 1458 m2sm(u_int m) 1459 { 1460 u_int64_t sm; 1461 1462 sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ; 1463 return (sm); 1464 } 1465 1466 static __inline u_int64_t 1467 m2ism(u_int m) 1468 { 1469 u_int64_t ism; 1470 1471 if (m == 0) 1472 ism = HFSC_HT_INFINITY; 1473 else 1474 ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m; 1475 return (ism); 1476 } 1477 1478 static __inline u_int64_t 1479 d2dx(u_int d) 1480 { 1481 u_int64_t dx; 1482 1483 dx = ((u_int64_t)d * HFSC_FREQ) / 1000; 1484 return (dx); 1485 } 1486 1487 static __inline u_int 1488 sm2m(u_int64_t sm) 1489 { 1490 u_int64_t m; 1491 1492 m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT; 1493 return ((u_int)m); 1494 } 1495 1496 static __inline u_int 1497 dx2d(u_int64_t dx) 1498 { 1499 u_int64_t d; 1500 1501 d = dx * 1000 / HFSC_FREQ; 1502 return ((u_int)d); 1503 } 1504 1505 void 1506 hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc) 1507 { 1508 isc->sm1 = m2sm(sc->m1); 1509 isc->ism1 = m2ism(sc->m1); 1510 isc->dx = d2dx(sc->d); 1511 isc->dy = seg_x2y(isc->dx, isc->sm1); 1512 isc->sm2 = m2sm(sc->m2); 1513 isc->ism2 = m2ism(sc->m2); 1514 } 1515 1516 /* 1517 * initialize the runtime service curve with the given internal 1518 * service curve starting at (x, y). 1519 */ 1520 void 1521 hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc, 1522 u_int64_t x, u_int64_t y) 1523 { 1524 rtsc->x = x; 1525 rtsc->y = y; 1526 rtsc->sm1 = isc->sm1; 1527 rtsc->ism1 = isc->ism1; 1528 rtsc->dx = isc->dx; 1529 rtsc->dy = isc->dy; 1530 rtsc->sm2 = isc->sm2; 1531 rtsc->ism2 = isc->ism2; 1532 } 1533 1534 /* 1535 * calculate the y-projection of the runtime service curve by the 1536 * given x-projection value 1537 */ 1538 u_int64_t 1539 hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y) 1540 { 1541 u_int64_t x; 1542 1543 if (y < rtsc->y) 1544 x = rtsc->x; 1545 else if (y <= rtsc->y + rtsc->dy) { 1546 /* x belongs to the 1st segment */ 1547 if (rtsc->dy == 0) 1548 x = rtsc->x + rtsc->dx; 1549 else 1550 x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); 1551 } else { 1552 /* x belongs to the 2nd segment */ 1553 x = rtsc->x + rtsc->dx 1554 + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); 1555 } 1556 return (x); 1557 } 1558 1559 u_int64_t 1560 hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x) 1561 { 1562 u_int64_t y; 1563 1564 if (x <= rtsc->x) 1565 y = rtsc->y; 1566 else if (x <= rtsc->x + rtsc->dx) 1567 /* y belongs to the 1st segment */ 1568 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); 1569 else 1570 /* y belongs to the 2nd segment */ 1571 y = rtsc->y + rtsc->dy 1572 + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); 1573 return (y); 1574 } 1575 1576 /* 1577 * update the runtime service curve by taking the minimum of the current 1578 * runtime service curve and the service curve starting at (x, y). 1579 */ 1580 void 1581 hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc, 1582 u_int64_t x, u_int64_t y) 1583 { 1584 u_int64_t y1, y2, dx, dy; 1585 1586 if (isc->sm1 <= isc->sm2) { 1587 /* service curve is convex */ 1588 y1 = hfsc_rtsc_x2y(rtsc, x); 1589 if (y1 < y) 1590 /* the current rtsc is smaller */ 1591 return; 1592 rtsc->x = x; 1593 rtsc->y = y; 1594 return; 1595 } 1596 1597 /* 1598 * service curve is concave 1599 * compute the two y values of the current rtsc 1600 * y1: at x 1601 * y2: at (x + dx) 1602 */ 1603 y1 = hfsc_rtsc_x2y(rtsc, x); 1604 if (y1 <= y) { 1605 /* rtsc is below isc, no change to rtsc */ 1606 return; 1607 } 1608 1609 y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx); 1610 if (y2 >= y + isc->dy) { 1611 /* rtsc is above isc, replace rtsc by isc */ 1612 rtsc->x = x; 1613 rtsc->y = y; 1614 rtsc->dx = isc->dx; 1615 rtsc->dy = isc->dy; 1616 return; 1617 } 1618 1619 /* 1620 * the two curves intersect 1621 * compute the offsets (dx, dy) using the reverse 1622 * function of seg_x2y() 1623 * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) 1624 */ 1625 dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2); 1626 /* 1627 * check if (x, y1) belongs to the 1st segment of rtsc. 1628 * if so, add the offset. 1629 */ 1630 if (rtsc->x + rtsc->dx > x) 1631 dx += rtsc->x + rtsc->dx - x; 1632 dy = seg_x2y(dx, isc->sm1); 1633 1634 rtsc->x = x; 1635 rtsc->y = y; 1636 rtsc->dx = dx; 1637 rtsc->dy = dy; 1638 return; 1639 } 1640 1641 void 1642 hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl) 1643 { 1644 sp->class_id = cl->cl_id; 1645 sp->class_handle = cl->cl_handle; 1646 1647 if (cl->cl_rsc != NULL) { 1648 sp->rsc.m1 = sm2m(cl->cl_rsc->sm1); 1649 sp->rsc.d = dx2d(cl->cl_rsc->dx); 1650 sp->rsc.m2 = sm2m(cl->cl_rsc->sm2); 1651 } else { 1652 sp->rsc.m1 = 0; 1653 sp->rsc.d = 0; 1654 sp->rsc.m2 = 0; 1655 } 1656 if (cl->cl_fsc != NULL) { 1657 sp->fsc.m1 = sm2m(cl->cl_fsc->sm1); 1658 sp->fsc.d = dx2d(cl->cl_fsc->dx); 1659 sp->fsc.m2 = sm2m(cl->cl_fsc->sm2); 1660 } else { 1661 sp->fsc.m1 = 0; 1662 sp->fsc.d = 0; 1663 sp->fsc.m2 = 0; 1664 } 1665 if (cl->cl_usc != NULL) { 1666 sp->usc.m1 = sm2m(cl->cl_usc->sm1); 1667 sp->usc.d = dx2d(cl->cl_usc->dx); 1668 sp->usc.m2 = sm2m(cl->cl_usc->sm2); 1669 } else { 1670 sp->usc.m1 = 0; 1671 sp->usc.d = 0; 1672 sp->usc.m2 = 0; 1673 } 1674 1675 sp->total = cl->cl_total; 1676 sp->cumul = cl->cl_cumul; 1677 1678 sp->d = cl->cl_d; 1679 sp->e = cl->cl_e; 1680 sp->vt = cl->cl_vt; 1681 sp->f = cl->cl_f; 1682 1683 sp->initvt = cl->cl_initvt; 1684 sp->vtperiod = cl->cl_vtperiod; 1685 sp->parentperiod = cl->cl_parentperiod; 1686 sp->nactive = cl->cl_nactive; 1687 sp->vtoff = cl->cl_vtoff; 1688 sp->cvtmax = cl->cl_cvtmax; 1689 sp->myf = cl->cl_myf; 1690 sp->cfmin = cl->cl_cfmin; 1691 sp->cvtmin = cl->cl_cvtmin; 1692 sp->myfadj = cl->cl_myfadj; 1693 sp->vtadj = cl->cl_vtadj; 1694 1695 sp->cur_time = hfsc_uptime(); 1696 sp->machclk_freq = HFSC_FREQ; 1697 1698 sp->qlength = hfsc_class_qlength(cl); 1699 sp->qlimit = cl->cl_q.qlimit; 1700 sp->xmit_cnt = cl->cl_stats.xmit_cnt; 1701 sp->drop_cnt = cl->cl_stats.drop_cnt; 1702 sp->period = cl->cl_stats.period; 1703 1704 sp->qtype = 0; 1705 } 1706 1707 /* convert a class handle to the corresponding class pointer */ 1708 struct hfsc_class * 1709 hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle) 1710 { 1711 int i; 1712 struct hfsc_class *cl; 1713 1714 if (chandle == 0) 1715 return (NULL); 1716 /* 1717 * first, try the slot corresponding to the lower bits of the handle. 1718 * if it does not match, do the linear table search. 1719 */ 1720 i = chandle % hif->hif_allocated; 1721 if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) 1722 return (cl); 1723 for (i = 0; i < hif->hif_allocated; i++) 1724 if ((cl = hif->hif_class_tbl[i]) != NULL && 1725 cl->cl_handle == chandle) 1726 return (cl); 1727 return (NULL); 1728 } 1729