1 /* $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $ */ 2 /* $NetBSD: dccp_tcplike.c,v 1.4 2019/06/04 10:15:22 msaitoh Exp $ */ 3 4 /* 5 * Copyright (c) 2003 Magnus Erixzon 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * TCP-like congestion control for DCCP 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.4 2019/06/04 10:15:22 msaitoh Exp $"); 37 38 #ifdef _KERNEL_OPT 39 #include "opt_dccp.h" 40 #endif 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/proc.h> 50 #include <sys/protosw.h> 51 #include <sys/signalvar.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/mutex.h> 55 #include <sys/sysctl.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/ip.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/in_var.h> 65 66 #include <netinet/ip_icmp.h> 67 #include <netinet/icmp_var.h> 68 #include <netinet/ip_var.h> 69 70 #include <netinet/dccp.h> 71 #include <netinet/dccp_var.h> 72 #include <netinet/dccp_tcplike.h> 73 74 #define TCPLIKE_DEBUG(args) dccp_log args 75 #define MALLOC_DEBUG(args) log args 76 #define CWND_DEBUG(args) dccp_log args 77 #define ACKRATIO_DEBUG(args) dccp_log args 78 #define LOSS_DEBUG(args) dccp_log args 79 #define TIMEOUT_DEBUG(args) dccp_log args 80 81 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000 82 #define INP_INFO_LOCK_INIT(x,y) 83 #define INP_INFO_WLOCK(x) 84 #define INP_INFO_WUNLOCK(x) 85 #define INP_INFO_RLOCK(x) 86 #define INP_INFO_RUNLOCK(x) 87 #define INP_LOCK(x) 88 #define INP_UNLOCK(x) 89 #endif 90 91 /* Sender side */ 92 93 void tcplike_rto_timeout(void *); 94 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t); 95 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t); 96 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t); 97 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t); 98 int _cwndvector_size(struct tcplike_send_ccb *); 99 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t); 100 101 void tcplike_send_term(void *); 102 void tcplike_recv_term(void *); 103 104 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t); 105 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t); 106 107 /* extern Ack Vector functions */ 108 extern void dccp_use_ackvector(struct dccpcb *); 109 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t); 110 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t); 111 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *); 112 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t); 113 114 extern int dccp_get_option(char *, int, int, char *, int); 115 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t); 116 117 /* 118 * RTO timer activated 119 */ 120 void 121 tcplike_rto_timeout(void *ccb) 122 { 123 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 124 /*struct inpcb *inp;*/ 125 int s; 126 127 mutex_enter(&(cb->mutex)); 128 129 cb->ssthresh = cb->cwnd >>1; 130 cb->cwnd = 1; /* allowing 1 packet to be sent */ 131 cb->outstanding = 0; /* is this correct? */ 132 cb->rto_timer_callout = 0; 133 cb->rto = cb->rto << 1; 134 TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto)); 135 136 cb->sample_rtt = 0; 137 138 cb->ack_last = 0; 139 cb->ack_miss = 0; 140 141 cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */ 142 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 143 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 144 (char *) &cb->rcvr_ackratio, 1); 145 cb->acked_in_win = 0; 146 cb->acked_windows = 0; 147 cb->oldcwnd_ts = cb->pcb->seq_snd; 148 149 LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n", 150 cb->cwnd, cb->outstanding)); 151 mutex_exit(&(cb->mutex)); 152 153 /* lock'n run dccp_output */ 154 s = splnet(); 155 INP_INFO_RLOCK(&dccpbinfo); 156 /*inp = cb->pcb->d_inpcb;*/ 157 INP_LOCK(inp); 158 INP_INFO_RUNLOCK(&dccpbinfo); 159 160 dccp_output(cb->pcb, 1); 161 162 INP_UNLOCK(inp); 163 splx(s); 164 } 165 166 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample) 167 { 168 u_int16_t err; 169 170 if (cb->rtt == 0xffff) { 171 /* hmmmmm. */ 172 cb->rtt = sample; 173 cb->rto = cb->rtt << 1; 174 return; 175 } 176 177 /* This is how the Linux implementation is doing it.. */ 178 if (sample >= cb->rtt) { 179 err = sample - cb->rtt; 180 cb->rtt = cb->rtt + (err >> 3); 181 } else { 182 err = cb->rtt - sample; 183 cb->rtt = cb->rtt - (err >> 3); 184 } 185 cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2); 186 if (cb->rtt < TCPLIKE_MIN_RTT) 187 cb->rtt = TCPLIKE_MIN_RTT; 188 cb->rto = cb->rtt + (cb->rtt_d << 2); 189 190 191 /* 5 million ways to calculate RTT ...*/ 192 #if 0 193 cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample); 194 if (cb->srtt < TCPLIKE_MIN_RTT) 195 cb->srtt = TCPLIKE_MIN_RTT; 196 cb->rto = cb->srtt << 1; 197 #endif 198 199 LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto)); 200 } 201 202 /* Functions declared in struct dccp_cc_sw */ 203 204 /* 205 * Initialises the sender side 206 * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0 207 */ 208 void * 209 tcplike_send_init(struct dccpcb* pcb) 210 { 211 struct tcplike_send_ccb *cb; 212 213 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n")); 214 215 cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO); 216 if (cb == 0) { 217 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n")); 218 dccpstat.tcplikes_send_memerr++; 219 return 0; 220 } 221 memset(cb, 0, sizeof (struct tcplike_send_ccb)); 222 223 /* init sender */ 224 cb->pcb = pcb; 225 226 cb->cwnd = TCPLIKE_INITIAL_CWND; 227 cb->ssthresh = 0xafff; /* lim-> infinity */ 228 cb->oldcwnd_ts = 0; 229 cb->outstanding = 0; 230 cb->rcvr_ackratio = 2; /* Ack Ratio */ 231 cb->acked_in_win = 0; 232 cb->acked_windows = 0; 233 234 CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n", 235 cb->cwnd, cb->outstanding)); 236 cb->rtt = 0xffff; 237 cb->rto = TIMEOUT_UBOUND; 238 callout_init(&cb->rto_timer, 0); 239 callout_init(&cb->free_timer, 0); 240 cb->rto_timer_callout = 0; 241 cb->rtt_d = 0; 242 cb->timestamp = 0; 243 244 cb->sample_rtt = 1; 245 246 cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR; 247 /* 1 bit per entry */ 248 cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO); 249 if (cb->cwndvector == NULL) { 250 MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n")); 251 /* What to do now? */ 252 cb->cv_size = 0; 253 dccpstat.tcplikes_send_memerr++; 254 return 0; 255 } 256 memset(cb->cwndvector, 0, cb->cv_size / 8); 257 cb->cv_hs = cb->cv_ts = 0; 258 cb->cv_hp = cb->cwndvector; 259 260 cb->ack_last = 0; 261 cb->ack_miss = 0; 262 263 mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET); 264 265 TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n")); 266 dccpstat.tcplikes_send_conn++; 267 return cb; 268 } 269 270 void tcplike_send_term(void *ccb) 271 { 272 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 273 if (ccb == 0) 274 return; 275 276 mutex_destroy(&(cb->mutex)); 277 278 free(cb, M_PCB); 279 TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n")); 280 } 281 282 /* 283 * Free the sender side 284 * args: ccb - ccb of sender 285 */ 286 void 287 tcplike_send_free(void *ccb) 288 { 289 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 290 291 LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n")); 292 293 if (ccb == 0) 294 return; 295 296 mutex_enter(&(cb->mutex)); 297 298 free(cb->cwndvector, M_PCB); 299 cb->cv_hs = cb->cv_ts = 0; 300 301 /* untimeout any active timer */ 302 if (cb->rto_timer_callout) { 303 TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n")); 304 callout_stop(&cb->rto_timer); 305 cb->rto_timer_callout = 0; 306 } 307 308 mutex_exit(&(cb->mutex)); 309 310 callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb); 311 } 312 313 /* 314 * Ask TCPlike wheter one can send a packet or not 315 * args: ccb - ccb block for current connection 316 * returns: 0 if ok, else <> 0. 317 */ 318 int 319 tcplike_send_packet(void *ccb, long datasize) 320 { 321 /* check if one can send here */ 322 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 323 long ticks; 324 char feature[1]; 325 326 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n")); 327 328 if (datasize == 0) { 329 TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n")); 330 return 1; 331 } 332 333 mutex_enter(&(cb->mutex)); 334 335 if (cb->cwnd <= cb->outstanding) { 336 /* May not send. trigger RTO */ 337 DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding)); 338 if (!cb->rto_timer_callout) { 339 LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto)); 340 ticks = (long)cb->rto; 341 callout_reset(&cb->rto_timer, ticks, 342 tcplike_rto_timeout, (void *)cb); 343 cb->rto_timer_callout = 1; 344 } 345 mutex_exit(&(cb->mutex)); 346 return 0; 347 } 348 349 /* We're allowed to send */ 350 351 feature[0] = 1; 352 if (cb->pcb->remote_ackvector == 0) { 353 ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n")); 354 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR); 355 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1); 356 } 357 358 /* untimeout any active timer */ 359 if (cb->rto_timer_callout) { 360 LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n")); 361 callout_stop(&cb->rto_timer); 362 cb->rto_timer_callout = 0; 363 } 364 365 if (!cb->sample_rtt) { 366 struct timeval stamp; 367 microtime(&stamp); 368 cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec; 369 dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4); 370 /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/ 371 cb->sample_rtt = 1; 372 } 373 374 mutex_exit(&(cb->mutex)); 375 return 1; 376 377 } 378 379 /* 380 * Notify sender that a packet has been sent 381 * args: ccb - ccb block for current connection 382 * moreToSend - if there exists more packets to send 383 */ 384 void 385 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize) 386 { 387 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 388 389 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize)); 390 391 if (datasize == 0) { 392 TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n")); 393 return; 394 } 395 396 mutex_enter(&(cb->mutex)); 397 398 cb->outstanding++; 399 TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding)); 400 401 /* stash the seqnr in cwndvector */ 402 /* Dont do this if we're only sending an ACK ! */ 403 _add_to_cwndvector(cb, cb->pcb->seq_snd); 404 CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding)); 405 406 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 407 mutex_exit(&(cb->mutex)); 408 } 409 410 /* 411 * Notify that an ack package was received 412 * args: ccb - ccb block for current connection 413 */ 414 void 415 tcplike_send_packet_recv(void *ccb, char *options, int optlen) 416 { 417 dccp_seq acknum, lastok; 418 u_int16_t numlostpackets, avsize, i, prev_size; 419 u_int8_t length, state, numokpackets, ackratiocnt; 420 u_char av[10]; 421 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 422 423 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n")); 424 mutex_enter(&(cb->mutex)); 425 426 if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) { 427 u_int32_t echo, elapsed; 428 429 TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n")); 430 bcopy(av, &echo, 4); 431 bcopy(av + 4, &elapsed, 4); 432 433 if (echo == cb->timestamp) { 434 struct timeval time; 435 u_int32_t c_stamp; 436 u_int16_t diff; 437 438 microtime(&time); 439 c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec; 440 441 diff = (u_int16_t) c_stamp - cb->timestamp - elapsed; 442 diff = (u_int16_t)(diff / 1000); 443 TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n", 444 echo, elapsed, diff)); 445 tcplike_rtt_sample(cb, diff); 446 } 447 } 448 449 if (cb->pcb->ack_rcv == 0) { 450 /* There was no Ack. There is no spoon */ 451 452 /* We'll clear the missingacks data here, since the other host 453 * is also sending data. 454 * I guess we could deal with this, using the NDP field in the 455 * header. Let's stick a *TODO* mark here for now. 456 * The missingacks mechanism will activate if other host goes to 457 * only sending DCCP-Ack packets. 458 */ 459 cb->ack_last = 0; 460 cb->ack_miss = 0; 461 ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n")); 462 mutex_exit(&(cb->mutex)); 463 return; 464 } 465 466 cb->sample_rtt = 0; 467 468 /* check ackVector for lost packets. cmp with cv_list */ 469 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10); 470 if (avsize == 0) 471 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10); 472 473 if (avsize > 0) 474 dccpstat.tcplikes_send_ackrecv++; 475 476 acknum = cb->pcb->ack_rcv; 477 numlostpackets = 0; 478 numokpackets = 0; 479 lastok = 0; 480 prev_size = _cwndvector_size(cb); 481 482 TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize)); 483 if (avsize == 0) 484 _remove_from_cwndvector(cb, acknum); 485 486 for (i=0; i < avsize; i++) { 487 state = (av[i] & 0xc0) >> 6; 488 length = (av[i] & 0x3f) +1; 489 while (length > 0) { 490 if (state == 0) { 491 CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum)); 492 numokpackets++; 493 lastok = acknum; 494 _remove_from_cwndvector(cb, acknum); 495 } else { 496 if (acknum > cb->oldcwnd_ts) { 497 LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state)); 498 numlostpackets++; 499 dccpstat.tcplikes_send_reploss++; 500 } 501 } 502 acknum--; 503 length--; 504 } 505 } 506 if (lastok) 507 if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) { 508 LOSS_DEBUG((LOG_INFO, "Packets were lost\n")); 509 if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) { 510 numlostpackets++; 511 dccpstat.tcplikes_send_assloss++; 512 } 513 } 514 515 lastok = cb->cv_hs; 516 while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts) 517 lastok++; 518 if (lastok != cb->cv_hs) 519 _chop_cwndvector(cb, lastok); 520 521 cb->outstanding = _cwndvector_size(cb); 522 CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding)); 523 if (prev_size == cb->outstanding) { 524 /* Nothing dropped from cwndvector */ 525 mutex_exit(&(cb->mutex)); 526 return; 527 } 528 529 cb->acked_in_win += numokpackets; 530 531 if (cb->cwnd < cb->ssthresh) { 532 /* Slow start */ 533 534 if (numlostpackets > 0) { 535 /* Packet loss */ 536 LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n")); 537 cb->cwnd = cb->cwnd>>1; 538 if (cb->cwnd < 1) 539 cb->cwnd = 1; 540 cb->ssthresh = cb->cwnd; 541 cb->acked_in_win = 0; 542 cb->acked_windows = 0; 543 cb->oldcwnd_ts = cb->pcb->seq_snd; 544 545 } else { 546 cb->cwnd++; 547 } 548 549 } else if (cb->cwnd >= cb->ssthresh) { 550 551 if (numlostpackets > 0) { 552 /* Packet loss */ 553 LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n")); 554 cb->cwnd = cb->cwnd>>1; 555 if (cb->cwnd < 1) 556 cb->cwnd = 1; 557 cb->ssthresh = cb->cwnd; 558 cb->acked_in_win = 0; 559 cb->acked_windows = 0; 560 cb->oldcwnd_ts = cb->pcb->seq_snd; 561 562 } else if (cb->acked_in_win > cb->cwnd) { 563 cb->cwnd++; 564 } 565 } 566 567 /* Ok let's check if there are missing Ack packets */ 568 ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n", 569 cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss)); 570 571 if (cb->ack_last == 0) { 572 /* First received ack (or first after Data packet). Yey */ 573 cb->ack_last = cb->pcb->seq_rcv; 574 cb->ack_miss = 0; 575 } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) { 576 /* This is correct, non-congestion, in-order behaviour */ 577 cb->ack_last = cb->pcb->seq_rcv; 578 579 } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) { 580 /* Might be an Ack we've been missing */ 581 /* This code has a flaw; If we miss 2 Ack packets, we only care 582 * about the older one. This means that the next-to-oldest one could 583 * be lost without any action beeing taken. 584 * Time will tell if that is going to be a Giant Problem(r) 585 */ 586 if (cb->pcb->seq_rcv == cb->ack_miss) { 587 /* Yea it was. great */ 588 cb->ack_miss = 0; 589 } 590 591 } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) { 592 /* There is a jump in Ack seqnums.. */ 593 cb->ack_miss = cb->ack_last + 1; 594 cb->ack_last = cb->pcb->seq_rcv; 595 } 596 597 if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) { 598 /* Alert! Alert! Ack packets are MIA. 599 * Decrease Ack Ratio 600 */ 601 cb->rcvr_ackratio = cb->rcvr_ackratio<<1; 602 if (cb->rcvr_ackratio > (cb->cwnd>>1)) { 603 /* Constraint 2 */ 604 cb->rcvr_ackratio = cb->cwnd>>1; 605 } 606 if (cb->rcvr_ackratio == 0) 607 cb->rcvr_ackratio = 1; 608 ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd)); 609 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 610 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 611 (char *) &cb->rcvr_ackratio, 1); 612 613 cb->ack_miss = 0; 614 cb->acked_windows = 0; 615 cb->acked_in_win = 0; 616 dccpstat.tcplikes_send_missack++; 617 618 } else if (cb->acked_in_win > cb->cwnd) { 619 cb->acked_in_win = 0; 620 cb->acked_windows++; 621 if (cb->rcvr_ackratio == 1) { 622 /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some 623 * heavy congestion so we can increase it 624 */ 625 cb->acked_windows = 0; 626 } 627 } 628 629 if (cb->acked_windows >= 1) { 630 ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio)); 631 if (cb->acked_windows >= ackratiocnt) { 632 if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) { 633 /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */ 634 cb->rcvr_ackratio--; 635 ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio)); 636 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 637 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 638 (char *) &cb->rcvr_ackratio, 1); 639 } 640 cb->acked_in_win = 0; 641 cb->acked_windows = 0; 642 } 643 } 644 645 CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n", 646 cb->cwnd, cb->outstanding)); 647 648 if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) { 649 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n", 650 cb->cwnd, cb->outstanding)); 651 callout_stop(&cb->rto_timer); 652 cb->rto_timer_callout = 0; 653 654 mutex_exit(&(cb->mutex)); 655 dccp_output(cb->pcb, 1); 656 return; 657 } 658 mutex_exit(&(cb->mutex)); 659 } 660 661 int 662 _cwndvector_size(struct tcplike_send_ccb *cb) 663 { 664 u_int64_t gap, offset, seqnr; 665 u_int32_t cnt; 666 u_char *t; 667 668 TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n")); 669 cnt = 0; 670 for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) { 671 gap = seqnr - cb->cv_hs; 672 673 offset = gap % 8; 674 t = cb->cv_hp + (gap/8); 675 if (t >= (cb->cwndvector + (cb->cv_size/8))) 676 t -= (cb->cv_size / 8); /* wrapped */ 677 678 if (((*t & (0x01 << offset)) >> offset) == 0x01) 679 cnt++; 680 } 681 return cnt; 682 } 683 684 u_char 685 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr) 686 { 687 u_int64_t gap, offset; 688 u_char *t; 689 690 /* Check for wrapping */ 691 if (seqnr >= cb->cv_hs) { 692 /* Not wrapped */ 693 gap = seqnr - cb->cv_hs; 694 } else { 695 /* Wrapped XXXXX */ 696 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 697 } 698 699 if (gap >= cb->cv_size) { 700 /* gap is bigger than cwndvector size? baaad */ 701 return 0x01; 702 } 703 704 offset = gap % 8; 705 t = cb->cv_hp + (gap/8); 706 if (t >= (cb->cwndvector + (cb->cv_size/8))) 707 t -= (cb->cv_size / 8); /* wrapped */ 708 709 return ((*t & (0x01 << offset)) >> offset); 710 } 711 712 void 713 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 714 { 715 u_int64_t offset, dc, gap; 716 u_char *t, *n; 717 718 TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n")); 719 720 if (cb->cv_hs == cb->cv_ts) { 721 /* Empty cwndvector */ 722 cb->cv_hs = cb->cv_ts = seqnr; 723 } 724 725 /* Check for wrapping */ 726 if (seqnr >= cb->cv_hs) { 727 /* Not wrapped */ 728 gap = seqnr - cb->cv_hs; 729 } else { 730 /* Wrapped */ 731 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 732 } 733 734 if (gap >= cb->cv_size) { 735 /* gap is bigger than cwndvector size? baaad */ 736 /* maybe we should increase the cwndvector here */ 737 CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n", 738 gap, cb->cv_size, seqnr)); 739 dccpstat.tcplikes_send_badseq++; 740 return; 741 } 742 743 offset = gap % 8; /* bit to mark */ 744 t = cb->cv_hp + (gap/8); 745 if (t >= (cb->cwndvector + (cb->cv_size/8))) 746 t -= (cb->cv_size / 8); /* cwndvector wrapped */ 747 748 *t = *t | (0x01 << offset); /* turn on bit */ 749 750 cb->cv_ts = seqnr+1; 751 if (cb->cv_ts == 0x1000000000000LL) 752 cb->cv_ts = 0; 753 754 if (gap > (cb->cv_size - 128)) { 755 MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n")); 756 n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */ 757 if (n == NULL) { 758 MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n")); 759 dccpstat.tcplikes_send_memerr++; 760 return; 761 } 762 memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */ 763 dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp; 764 memcpy (n,cb->cv_hp, dc); /* tail to end */ 765 memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */ 766 cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */ 767 free (cb->cwndvector, M_PCB); 768 cb->cv_hp = cb->cwndvector = n; 769 } 770 } 771 772 void 773 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 774 { 775 u_int64_t offset; 776 int64_t gap; 777 u_char *t; 778 779 DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n")); 780 781 if (cb->cv_hs == cb->cv_ts) { 782 /* Empty cwndvector */ 783 return; 784 } 785 786 /* Check for wrapping */ 787 if (seqnr >= cb->cv_hs) { 788 /* Not wrapped */ 789 gap = seqnr - cb->cv_hs; 790 } else { 791 /* Wrapped */ 792 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 793 } 794 795 if (gap >= cb->cv_size) { 796 /* gap is bigger than cwndvector size. has already been chopped */ 797 return; 798 } 799 800 offset = gap % 8; /* hi or low 2 bits to mark */ 801 t = cb->cv_hp + (gap/8); 802 if (t >= (cb->cwndvector + (cb->cv_size/8))) 803 t -= (cb->cv_size / 8); /* cwndvector wrapped */ 804 805 *t = *t & (~(0x01 << offset)); /* turn off bits */ 806 } 807 808 int 809 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 810 { 811 int64_t gap, bytegap; 812 u_char *t; 813 814 CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr)); 815 816 if (cb->cv_hs == cb->cv_ts) 817 return 0; 818 819 if (seqnr > cb->cv_hs) { 820 gap = seqnr - cb->cv_hs; 821 } else { 822 /* We received obsolete information */ 823 return 0; 824 } 825 826 bytegap = gap/8; 827 if (bytegap == 0) 828 return 0; 829 830 t = cb->cv_hp + bytegap; 831 if (t >= (cb->cwndvector + (cb->cv_size/8))) 832 t -= (cb->cv_size / 8); /* ackvector wrapped */ 833 cb->cv_hp = t; 834 cb->cv_hs += bytegap*8; 835 return 1; 836 } 837 838 839 /* Receiver side */ 840 841 842 /* Functions declared in struct dccp_cc_sw */ 843 844 /* Initialises the receiver side 845 * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0 846 */ 847 void * 848 tcplike_recv_init(struct dccpcb *pcb) 849 { 850 struct tcplike_recv_ccb *ccb; 851 852 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n")); 853 854 ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO); 855 if (ccb == 0) { 856 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n")); 857 dccpstat.tcplikes_recv_memerr++; 858 return 0; 859 } 860 861 memset(ccb, 0, sizeof (struct tcplike_recv_ccb)); 862 863 ccb->pcb = pcb; 864 ccb->unacked = 0; 865 ccb->pcb->ack_ratio = 2; 866 867 ccb->pcb->remote_ackvector = 1; 868 dccp_use_ackvector(ccb->pcb); 869 870 callout_init(&ccb->free_timer, 0); 871 872 mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET); 873 874 TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n")); 875 dccpstat.tcplikes_recv_conn++; 876 return ccb; 877 } 878 879 void tcplike_recv_term(void *ccb) 880 { 881 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 882 if (ccb == 0) 883 return; 884 885 mutex_destroy(&(cb->mutex)); 886 free(cb, M_PCB); 887 TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n")); 888 } 889 890 /* Free the receiver side 891 * args: ccb - ccb of receiver 892 */ 893 void 894 tcplike_recv_free(void *ccb) 895 { 896 struct ack_list *a; 897 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 898 899 LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n")); 900 901 if (ccb == 0) 902 return; 903 904 mutex_enter(&(cb->mutex)); 905 906 a = cb->av_list; 907 while (a) { 908 cb->av_list = a->next; 909 free(a, M_TEMP); 910 a = cb->av_list; 911 } 912 913 cb->pcb->av_size = 0; 914 free(cb->pcb->ackvector, M_PCB); 915 916 mutex_exit(&(cb->mutex)); 917 callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb); 918 } 919 920 /* 921 * Tell TCPlike that a packet has been received 922 * args: ccb - ccb block for current connection 923 */ 924 void 925 tcplike_recv_packet_recv(void *ccb, char *options, int optlen) 926 { 927 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 928 u_char ackvector[16]; 929 u_int16_t avsize; 930 u_char av_rcv[10]; 931 932 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n")); 933 934 mutex_enter(&(cb->mutex)); 935 936 if (cb->pcb->type_rcv == DCCP_TYPE_DATA || 937 cb->pcb->type_rcv == DCCP_TYPE_DATAACK) 938 dccpstat.tcplikes_recv_datarecv++; 939 940 /* Grab Ack Vector 0 or 1 */ 941 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10); 942 if (avsize == 0) 943 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10); 944 945 /* We are only interested in acks-on-acks here. 946 * The "real" ack handling is done be the sender */ 947 if (avsize == 0 && cb->pcb->ack_rcv) { 948 u_int64_t ackthru; 949 /* We got an Ack without an ackvector. 950 * This would mean it's an ack on an ack. 951 */ 952 ackthru = _avlist_get(cb, cb->pcb->ack_rcv); 953 ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru)); 954 if (ackthru) { 955 dccp_update_ackvector(cb->pcb, ackthru); 956 dccpstat.tcplikes_recv_ackack++; 957 } 958 } else if (avsize > 0 && cb->pcb->ack_rcv) { 959 /* We received an AckVector */ 960 u_int32_t acknum, ackthru; 961 int i; 962 ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n")); 963 /* gotta loop through the ackvector */ 964 acknum = cb->pcb->ack_rcv; 965 for (i=0; i<avsize; i++) { 966 u_int8_t state, len; 967 state = (av_rcv[i] & 0xc0) >> 6; 968 len = (av_rcv[i] & 0x2f) + 1; 969 if (state != 0) { 970 /* Drops in ackvector! Will be noted and taken care of by the sender part */ 971 ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum)); 972 continue; 973 } 974 975 while (len>0) { 976 ackthru = _avlist_get(cb, acknum); 977 ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru)); 978 if (ackthru) { 979 dccp_update_ackvector(cb->pcb, ackthru); 980 dccpstat.tcplikes_recv_ackack++; 981 } 982 acknum--; 983 len--; 984 } 985 } 986 } 987 988 ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv)); 989 dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv); 990 cb->unacked++; 991 992 if (cb->unacked >= cb->pcb->ack_ratio) { 993 /* Time to send an Ack */ 994 995 avsize = dccp_generate_ackvector(cb->pcb, ackvector); 996 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector)); 997 cb->unacked = 0; 998 if (avsize > 0) { 999 dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize); 1000 cb->pcb->ack_snd = cb->pcb->seq_rcv; 1001 _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd); 1002 ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd)); 1003 dccpstat.tcplikes_recv_acksent++; 1004 dccp_output(cb->pcb, 1); 1005 } 1006 } 1007 mutex_exit(&(cb->mutex)); 1008 } 1009 1010 void 1011 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru) 1012 { 1013 struct ack_list *a; 1014 ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru)); 1015 /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/ 1016 a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT); 1017 if (a == NULL) { 1018 MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n")); 1019 dccpstat.tcplikes_recv_memerr++; 1020 return; 1021 } 1022 memset(a, 0, sizeof(struct ack_list)); 1023 a->localseq = localseq; 1024 a->ackthru = ackthru; 1025 a->next = cb->av_list; 1026 cb->av_list = a; 1027 } 1028 1029 /* 1030 * Searches the av_list. if 'localseq' found, drop it from list and return 1031 * ackthru 1032 */ 1033 u_int64_t 1034 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq) 1035 { 1036 struct ack_list *a, *n, *p; 1037 u_int64_t ackthru; 1038 1039 ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq)); 1040 a = cb->av_list; 1041 p = 0; 1042 while (a) { 1043 n = a->next; 1044 if (a->localseq == localseq) { 1045 if (p) 1046 p->next = n; 1047 else 1048 cb->av_list = n; 1049 ackthru = a->ackthru; 1050 /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/ 1051 free(a, M_TEMP); 1052 return ackthru; 1053 } 1054 p = a; 1055 a = n; 1056 } 1057 /* Not found. return 0 */ 1058 return 0; 1059 } 1060 1061 /* 1062 int tcplike_option_recv(void); 1063 */ 1064