1 /* $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $ */ 2 /* $NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $ */ 3 4 /* 5 * Copyright (c) 2003 Magnus Erixzon 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * TCP-like congestion control for DCCP 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $"); 37 38 #ifdef _KERNEL_OPT 39 #include "opt_dccp.h" 40 #endif 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/domain.h> 45 #include <sys/kernel.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/proc.h> 50 #include <sys/protosw.h> 51 #include <sys/signalvar.h> 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/mutex.h> 55 #include <sys/sysctl.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/route.h> 60 61 #include <netinet/in.h> 62 #include <netinet/in_systm.h> 63 #include <netinet/ip.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/in_var.h> 66 67 #include <netinet/ip_icmp.h> 68 #include <netinet/icmp_var.h> 69 #include <netinet/ip_var.h> 70 71 #include <netinet/dccp.h> 72 #include <netinet/dccp_var.h> 73 #include <netinet/dccp_tcplike.h> 74 75 #define TCPLIKE_DEBUG(args) dccp_log args 76 #define MALLOC_DEBUG(args) log args 77 #define CWND_DEBUG(args) dccp_log args 78 #define ACKRATIO_DEBUG(args) dccp_log args 79 #define LOSS_DEBUG(args) dccp_log args 80 #define TIMEOUT_DEBUG(args) dccp_log args 81 82 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000 83 #define INP_INFO_LOCK_INIT(x,y) 84 #define INP_INFO_WLOCK(x) 85 #define INP_INFO_WUNLOCK(x) 86 #define INP_INFO_RLOCK(x) 87 #define INP_INFO_RUNLOCK(x) 88 #define INP_LOCK(x) 89 #define INP_UNLOCK(x) 90 #endif 91 92 /* Sender side */ 93 94 void tcplike_rto_timeout(void *); 95 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t); 96 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t); 97 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t); 98 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t); 99 int _cwndvector_size(struct tcplike_send_ccb *); 100 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t); 101 102 void tcplike_send_term(void *); 103 void tcplike_recv_term(void *); 104 105 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t); 106 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t); 107 108 /* extern Ack Vector functions */ 109 extern void dccp_use_ackvector(struct dccpcb *); 110 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t); 111 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t); 112 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *); 113 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t); 114 115 extern int dccp_get_option(char *, int, int, char *, int); 116 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t); 117 118 /* 119 * RTO timer activated 120 */ 121 void 122 tcplike_rto_timeout(void *ccb) 123 { 124 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 125 /*struct inpcb *inp;*/ 126 int s; 127 128 mutex_enter(&(cb->mutex)); 129 130 cb->ssthresh = cb->cwnd >>1; 131 cb->cwnd = 1; /* allowing 1 packet to be sent */ 132 cb->outstanding = 0; /* is this correct? */ 133 cb->rto_timer_callout = 0; 134 cb->rto = cb->rto << 1; 135 TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto)); 136 137 cb->sample_rtt = 0; 138 139 cb->ack_last = 0; 140 cb->ack_miss = 0; 141 142 cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */ 143 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 144 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 145 (char *) &cb->rcvr_ackratio, 1); 146 cb->acked_in_win = 0; 147 cb->acked_windows = 0; 148 cb->oldcwnd_ts = cb->pcb->seq_snd; 149 150 LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n", 151 cb->cwnd, cb->outstanding)); 152 mutex_exit(&(cb->mutex)); 153 154 /* lock'n run dccp_output */ 155 s = splnet(); 156 INP_INFO_RLOCK(&dccpbinfo); 157 /*inp = cb->pcb->d_inpcb;*/ 158 INP_LOCK(inp); 159 INP_INFO_RUNLOCK(&dccpbinfo); 160 161 dccp_output(cb->pcb, 1); 162 163 INP_UNLOCK(inp); 164 splx(s); 165 } 166 167 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample) 168 { 169 u_int16_t err; 170 171 if (cb->rtt == 0xffff) { 172 /* hmmmmm. */ 173 cb->rtt = sample; 174 cb->rto = cb->rtt << 1; 175 return; 176 } 177 178 /* This is how the Linux implementation is doing it.. */ 179 if (sample >= cb->rtt) { 180 err = sample - cb->rtt; 181 cb->rtt = cb->rtt + (err >> 3); 182 } else { 183 err = cb->rtt - sample; 184 cb->rtt = cb->rtt - (err >> 3); 185 } 186 cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2); 187 if (cb->rtt < TCPLIKE_MIN_RTT) 188 cb->rtt = TCPLIKE_MIN_RTT; 189 cb->rto = cb->rtt + (cb->rtt_d << 2); 190 191 192 /* 5 million ways to calculate RTT ...*/ 193 #if 0 194 cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample); 195 if (cb->srtt < TCPLIKE_MIN_RTT) 196 cb->srtt = TCPLIKE_MIN_RTT; 197 cb->rto = cb->srtt << 1; 198 #endif 199 200 LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto)); 201 } 202 203 /* Functions declared in struct dccp_cc_sw */ 204 205 /* 206 * Initialises the sender side 207 * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0 208 */ 209 void * 210 tcplike_send_init(struct dccpcb* pcb) 211 { 212 struct tcplike_send_ccb *cb; 213 214 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n")); 215 216 cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO); 217 if (cb == 0) { 218 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n")); 219 dccpstat.tcplikes_send_memerr++; 220 return 0; 221 } 222 memset(cb, 0, sizeof (struct tcplike_send_ccb)); 223 224 /* init sender */ 225 cb->pcb = pcb; 226 227 cb->cwnd = TCPLIKE_INITIAL_CWND; 228 cb->ssthresh = 0xafff; /* lim-> infinity */ 229 cb->oldcwnd_ts = 0; 230 cb->outstanding = 0; 231 cb->rcvr_ackratio = 2; /* Ack Ratio */ 232 cb->acked_in_win = 0; 233 cb->acked_windows = 0; 234 235 CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n", 236 cb->cwnd, cb->outstanding)); 237 cb->rtt = 0xffff; 238 cb->rto = TIMEOUT_UBOUND; 239 callout_init(&cb->rto_timer, 0); 240 callout_init(&cb->free_timer, 0); 241 cb->rto_timer_callout = 0; 242 cb->rtt_d = 0; 243 cb->timestamp = 0; 244 245 cb->sample_rtt = 1; 246 247 cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR; 248 /* 1 bit per entry */ 249 cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO); 250 if (cb->cwndvector == NULL) { 251 MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n")); 252 /* What to do now? */ 253 cb->cv_size = 0; 254 dccpstat.tcplikes_send_memerr++; 255 return 0; 256 } 257 memset(cb->cwndvector, 0, cb->cv_size / 8); 258 cb->cv_hs = cb->cv_ts = 0; 259 cb->cv_hp = cb->cwndvector; 260 261 cb->ack_last = 0; 262 cb->ack_miss = 0; 263 264 mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET); 265 266 TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n")); 267 dccpstat.tcplikes_send_conn++; 268 return cb; 269 } 270 271 void tcplike_send_term(void *ccb) 272 { 273 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 274 if (ccb == 0) 275 return; 276 277 mutex_destroy(&(cb->mutex)); 278 279 free(cb, M_PCB); 280 TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n")); 281 } 282 283 /* 284 * Free the sender side 285 * args: ccb - ccb of sender 286 */ 287 void 288 tcplike_send_free(void *ccb) 289 { 290 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 291 292 LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n")); 293 294 if (ccb == 0) 295 return; 296 297 mutex_enter(&(cb->mutex)); 298 299 free(cb->cwndvector, M_PCB); 300 cb->cv_hs = cb->cv_ts = 0; 301 302 /* untimeout any active timer */ 303 if (cb->rto_timer_callout) { 304 TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n")); 305 callout_stop(&cb->rto_timer); 306 cb->rto_timer_callout = 0; 307 } 308 309 mutex_exit(&(cb->mutex)); 310 311 callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb); 312 } 313 314 /* 315 * Ask TCPlike wheter one can send a packet or not 316 * args: ccb - ccb block for current connection 317 * returns: 0 if ok, else <> 0. 318 */ 319 int 320 tcplike_send_packet(void *ccb, long datasize) 321 { 322 /* check if one can send here */ 323 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 324 long ticks; 325 char feature[1]; 326 327 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n")); 328 329 if (datasize == 0) { 330 TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n")); 331 return 1; 332 } 333 334 mutex_enter(&(cb->mutex)); 335 336 if (cb->cwnd <= cb->outstanding) { 337 /* May not send. trigger RTO */ 338 DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding)); 339 if (!cb->rto_timer_callout) { 340 LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto)); 341 ticks = (long)cb->rto; 342 callout_reset(&cb->rto_timer, ticks, 343 tcplike_rto_timeout, (void *)cb); 344 cb->rto_timer_callout = 1; 345 } 346 mutex_exit(&(cb->mutex)); 347 return 0; 348 } 349 350 /* We're allowed to send */ 351 352 feature[0] = 1; 353 if (cb->pcb->remote_ackvector == 0) { 354 ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n")); 355 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR); 356 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1); 357 } 358 359 /* untimeout any active timer */ 360 if (cb->rto_timer_callout) { 361 LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n")); 362 callout_stop(&cb->rto_timer); 363 cb->rto_timer_callout = 0; 364 } 365 366 if (!cb->sample_rtt) { 367 struct timeval stamp; 368 microtime(&stamp); 369 cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec; 370 dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4); 371 /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/ 372 cb->sample_rtt = 1; 373 } 374 375 mutex_exit(&(cb->mutex)); 376 return 1; 377 378 } 379 380 /* 381 * Notify sender that a packet has been sent 382 * args: ccb - ccb block for current connection 383 * moreToSend - if there exists more packets to send 384 */ 385 void 386 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize) 387 { 388 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 389 390 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize)); 391 392 if (datasize == 0) { 393 TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n")); 394 return; 395 } 396 397 mutex_enter(&(cb->mutex)); 398 399 cb->outstanding++; 400 TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding)); 401 402 /* stash the seqnr in cwndvector */ 403 /* Dont do this if we're only sending an ACK ! */ 404 _add_to_cwndvector(cb, cb->pcb->seq_snd); 405 CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding)); 406 407 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 408 mutex_exit(&(cb->mutex)); 409 } 410 411 /* 412 * Notify that an ack package was received 413 * args: ccb - ccb block for current connection 414 */ 415 void 416 tcplike_send_packet_recv(void *ccb, char *options, int optlen) 417 { 418 dccp_seq acknum, lastok; 419 u_int16_t numlostpackets, avsize, i, prev_size; 420 u_int8_t length, state, numokpackets, ackratiocnt; 421 u_char av[10]; 422 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb; 423 424 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n")); 425 mutex_enter(&(cb->mutex)); 426 427 if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) { 428 u_int32_t echo, elapsed; 429 430 TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n")); 431 bcopy(av, &echo, 4); 432 bcopy(av + 4, &elapsed, 4); 433 434 if (echo == cb->timestamp) { 435 struct timeval time; 436 u_int32_t c_stamp; 437 u_int16_t diff; 438 439 microtime(&time); 440 c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec; 441 442 diff = (u_int16_t) c_stamp - cb->timestamp - elapsed; 443 diff = (u_int16_t)(diff / 1000); 444 TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n", 445 echo, elapsed, diff)); 446 tcplike_rtt_sample(cb, diff); 447 } 448 } 449 450 if (cb->pcb->ack_rcv == 0) { 451 /* There was no Ack. There is no spoon */ 452 453 /* We'll clear the missingacks data here, since the other host 454 * is also sending data. 455 * I guess we could deal with this, using the NDP field in the 456 * header. Let's stick a *TODO* mark here for now. 457 * The missingacks mechanism will activate if other host goes to 458 * only sending DCCP-Ack packets. 459 */ 460 cb->ack_last = 0; 461 cb->ack_miss = 0; 462 ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n")); 463 mutex_exit(&(cb->mutex)); 464 return; 465 } 466 467 cb->sample_rtt = 0; 468 469 /* check ackVector for lost packets. cmp with cv_list */ 470 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10); 471 if (avsize == 0) 472 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10); 473 474 if (avsize > 0) 475 dccpstat.tcplikes_send_ackrecv++; 476 477 acknum = cb->pcb->ack_rcv; 478 numlostpackets = 0; 479 numokpackets = 0; 480 lastok = 0; 481 prev_size = _cwndvector_size(cb); 482 483 TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize)); 484 if (avsize == 0) 485 _remove_from_cwndvector(cb, acknum); 486 487 for (i=0; i < avsize; i++) { 488 state = (av[i] & 0xc0) >> 6; 489 length = (av[i] & 0x3f) +1; 490 while (length > 0) { 491 if (state == 0) { 492 CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum)); 493 numokpackets++; 494 lastok = acknum; 495 _remove_from_cwndvector(cb, acknum); 496 } else { 497 if (acknum > cb->oldcwnd_ts) { 498 LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state)); 499 numlostpackets++; 500 dccpstat.tcplikes_send_reploss++; 501 } 502 } 503 acknum--; 504 length--; 505 } 506 } 507 if (lastok) 508 if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) { 509 LOSS_DEBUG((LOG_INFO, "Packets were lost\n")); 510 if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) { 511 numlostpackets++; 512 dccpstat.tcplikes_send_assloss++; 513 } 514 } 515 516 lastok = cb->cv_hs; 517 while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts) 518 lastok++; 519 if (lastok != cb->cv_hs) 520 _chop_cwndvector(cb, lastok); 521 522 cb->outstanding = _cwndvector_size(cb); 523 CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding)); 524 if (prev_size == cb->outstanding) { 525 /* Nothing dropped from cwndvector */ 526 mutex_exit(&(cb->mutex)); 527 return; 528 } 529 530 cb->acked_in_win += numokpackets; 531 532 if (cb->cwnd < cb->ssthresh) { 533 /* Slow start */ 534 535 if (numlostpackets > 0) { 536 /* Packet loss */ 537 LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n")); 538 cb->cwnd = cb->cwnd>>1; 539 if (cb->cwnd < 1) 540 cb->cwnd = 1; 541 cb->ssthresh = cb->cwnd; 542 cb->acked_in_win = 0; 543 cb->acked_windows = 0; 544 cb->oldcwnd_ts = cb->pcb->seq_snd; 545 546 } else { 547 cb->cwnd++; 548 } 549 550 } else if (cb->cwnd >= cb->ssthresh) { 551 552 if (numlostpackets > 0) { 553 /* Packet loss */ 554 LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n")); 555 cb->cwnd = cb->cwnd>>1; 556 if (cb->cwnd < 1) 557 cb->cwnd = 1; 558 cb->ssthresh = cb->cwnd; 559 cb->acked_in_win = 0; 560 cb->acked_windows = 0; 561 cb->oldcwnd_ts = cb->pcb->seq_snd; 562 563 } else if (cb->acked_in_win > cb->cwnd) { 564 cb->cwnd++; 565 } 566 } 567 568 /* Ok let's check if there are missing Ack packets */ 569 ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n", 570 cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss)); 571 572 if (cb->ack_last == 0) { 573 /* First received ack (or first after Data packet). Yey */ 574 cb->ack_last = cb->pcb->seq_rcv; 575 cb->ack_miss = 0; 576 } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) { 577 /* This is correct, non-congestion, in-order behaviour */ 578 cb->ack_last = cb->pcb->seq_rcv; 579 580 } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) { 581 /* Might be an Ack we've been missing */ 582 /* This code has a flaw; If we miss 2 Ack packets, we only care 583 * about the older one. This means that the next-to-oldest one could 584 * be lost without any action beeing taken. 585 * Time will tell if that is going to be a Giant Problem(r) 586 */ 587 if (cb->pcb->seq_rcv == cb->ack_miss) { 588 /* Yea it was. great */ 589 cb->ack_miss = 0; 590 } 591 592 } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) { 593 /* There is a jump in Ack seqnums.. */ 594 cb->ack_miss = cb->ack_last + 1; 595 cb->ack_last = cb->pcb->seq_rcv; 596 } 597 598 if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) { 599 /* Alert! Alert! Ack packets are MIA. 600 * Decrease Ack Ratio 601 */ 602 cb->rcvr_ackratio = cb->rcvr_ackratio<<1; 603 if (cb->rcvr_ackratio > (cb->cwnd>>1)) { 604 /* Constraint 2 */ 605 cb->rcvr_ackratio = cb->cwnd>>1; 606 } 607 if (cb->rcvr_ackratio == 0) 608 cb->rcvr_ackratio = 1; 609 ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd)); 610 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 611 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 612 (char *) &cb->rcvr_ackratio, 1); 613 614 cb->ack_miss = 0; 615 cb->acked_windows = 0; 616 cb->acked_in_win = 0; 617 dccpstat.tcplikes_send_missack++; 618 619 } else if (cb->acked_in_win > cb->cwnd) { 620 cb->acked_in_win = 0; 621 cb->acked_windows++; 622 if (cb->rcvr_ackratio == 1) { 623 /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some 624 * heavy congestion so we can increase it 625 */ 626 cb->acked_windows = 0; 627 } 628 } 629 630 if (cb->acked_windows >= 1) { 631 ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio)); 632 if (cb->acked_windows >= ackratiocnt) { 633 if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) { 634 /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */ 635 cb->rcvr_ackratio--; 636 ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio)); 637 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO); 638 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO, 639 (char *) &cb->rcvr_ackratio, 1); 640 } 641 cb->acked_in_win = 0; 642 cb->acked_windows = 0; 643 } 644 } 645 646 CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n", 647 cb->cwnd, cb->outstanding)); 648 649 if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) { 650 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n", 651 cb->cwnd, cb->outstanding)); 652 callout_stop(&cb->rto_timer); 653 cb->rto_timer_callout = 0; 654 655 mutex_exit(&(cb->mutex)); 656 dccp_output(cb->pcb, 1); 657 return; 658 } 659 mutex_exit(&(cb->mutex)); 660 } 661 662 int 663 _cwndvector_size(struct tcplike_send_ccb *cb) 664 { 665 u_int64_t gap, offset, seqnr; 666 u_int32_t cnt; 667 u_char *t; 668 669 TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n")); 670 cnt = 0; 671 for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) { 672 gap = seqnr - cb->cv_hs; 673 674 offset = gap % 8; 675 t = cb->cv_hp + (gap/8); 676 if (t >= (cb->cwndvector + (cb->cv_size/8))) 677 t -= (cb->cv_size / 8); /* wrapped */ 678 679 if (((*t & (0x01 << offset)) >> offset) == 0x01) 680 cnt++; 681 } 682 return cnt; 683 } 684 685 u_char 686 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr) 687 { 688 u_int64_t gap, offset; 689 u_char *t; 690 691 /* Check for wrapping */ 692 if (seqnr >= cb->cv_hs) { 693 /* Not wrapped */ 694 gap = seqnr - cb->cv_hs; 695 } else { 696 /* Wrapped XXXXX */ 697 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 698 } 699 700 if (gap >= cb->cv_size) { 701 /* gap is bigger than cwndvector size? baaad */ 702 return 0x01; 703 } 704 705 offset = gap % 8; 706 t = cb->cv_hp + (gap/8); 707 if (t >= (cb->cwndvector + (cb->cv_size/8))) 708 t -= (cb->cv_size / 8); /* wrapped */ 709 710 return ((*t & (0x01 << offset)) >> offset); 711 } 712 713 void 714 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 715 { 716 u_int64_t offset, dc, gap; 717 u_char *t, *n; 718 719 TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n")); 720 721 if (cb->cv_hs == cb->cv_ts) { 722 /* Empty cwndvector */ 723 cb->cv_hs = cb->cv_ts = seqnr; 724 } 725 726 /* Check for wrapping */ 727 if (seqnr >= cb->cv_hs) { 728 /* Not wrapped */ 729 gap = seqnr - cb->cv_hs; 730 } else { 731 /* Wrapped */ 732 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 733 } 734 735 if (gap >= cb->cv_size) { 736 /* gap is bigger than cwndvector size? baaad */ 737 /* maybe we should increase the cwndvector here */ 738 CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n", 739 gap, cb->cv_size, seqnr)); 740 dccpstat.tcplikes_send_badseq++; 741 return; 742 } 743 744 offset = gap % 8; /* bit to mark */ 745 t = cb->cv_hp + (gap/8); 746 if (t >= (cb->cwndvector + (cb->cv_size/8))) 747 t -= (cb->cv_size / 8); /* cwndvector wrapped */ 748 749 *t = *t | (0x01 << offset); /* turn on bit */ 750 751 cb->cv_ts = seqnr+1; 752 if (cb->cv_ts == 0x1000000000000LL) 753 cb->cv_ts = 0; 754 755 if (gap > (cb->cv_size - 128)) { 756 MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n")); 757 n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */ 758 if (n == NULL) { 759 MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n")); 760 dccpstat.tcplikes_send_memerr++; 761 return; 762 } 763 memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */ 764 dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp; 765 memcpy (n,cb->cv_hp, dc); /* tail to end */ 766 memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */ 767 cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */ 768 free (cb->cwndvector, M_PCB); 769 cb->cv_hp = cb->cwndvector = n; 770 } 771 } 772 773 void 774 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 775 { 776 u_int64_t offset; 777 int64_t gap; 778 u_char *t; 779 780 DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n")); 781 782 if (cb->cv_hs == cb->cv_ts) { 783 /* Empty cwndvector */ 784 return; 785 } 786 787 /* Check for wrapping */ 788 if (seqnr >= cb->cv_hs) { 789 /* Not wrapped */ 790 gap = seqnr - cb->cv_hs; 791 } else { 792 /* Wrapped */ 793 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */ 794 } 795 796 if (gap >= cb->cv_size) { 797 /* gap is bigger than cwndvector size. has already been chopped */ 798 return; 799 } 800 801 offset = gap % 8; /* hi or low 2 bits to mark */ 802 t = cb->cv_hp + (gap/8); 803 if (t >= (cb->cwndvector + (cb->cv_size/8))) 804 t -= (cb->cv_size / 8); /* cwndvector wrapped */ 805 806 *t = *t & (~(0x01 << offset)); /* turn off bits */ 807 } 808 809 int 810 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr) 811 { 812 int64_t gap, bytegap; 813 u_char *t; 814 815 CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr)); 816 817 if (cb->cv_hs == cb->cv_ts) 818 return 0; 819 820 if (seqnr > cb->cv_hs) { 821 gap = seqnr - cb->cv_hs; 822 } else { 823 /* We received obsolete information */ 824 return 0; 825 } 826 827 bytegap = gap/8; 828 if (bytegap == 0) 829 return 0; 830 831 t = cb->cv_hp + bytegap; 832 if (t >= (cb->cwndvector + (cb->cv_size/8))) 833 t -= (cb->cv_size / 8); /* ackvector wrapped */ 834 cb->cv_hp = t; 835 cb->cv_hs += bytegap*8; 836 return 1; 837 } 838 839 840 /* Receiver side */ 841 842 843 /* Functions declared in struct dccp_cc_sw */ 844 845 /* Initialises the receiver side 846 * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0 847 */ 848 void * 849 tcplike_recv_init(struct dccpcb *pcb) 850 { 851 struct tcplike_recv_ccb *ccb; 852 853 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n")); 854 855 ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO); 856 if (ccb == 0) { 857 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n")); 858 dccpstat.tcplikes_recv_memerr++; 859 return 0; 860 } 861 862 memset(ccb, 0, sizeof (struct tcplike_recv_ccb)); 863 864 ccb->pcb = pcb; 865 ccb->unacked = 0; 866 ccb->pcb->ack_ratio = 2; 867 868 ccb->pcb->remote_ackvector = 1; 869 dccp_use_ackvector(ccb->pcb); 870 871 callout_init(&ccb->free_timer, 0); 872 873 mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET); 874 875 TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n")); 876 dccpstat.tcplikes_recv_conn++; 877 return ccb; 878 } 879 880 void tcplike_recv_term(void *ccb) 881 { 882 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 883 if (ccb == 0) 884 return; 885 886 mutex_destroy(&(cb->mutex)); 887 free(cb, M_PCB); 888 TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n")); 889 } 890 891 /* Free the receiver side 892 * args: ccb - ccb of recevier 893 */ 894 void 895 tcplike_recv_free(void *ccb) 896 { 897 struct ack_list *a; 898 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 899 900 LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n")); 901 902 if (ccb == 0) 903 return; 904 905 mutex_enter(&(cb->mutex)); 906 907 a = cb->av_list; 908 while (a) { 909 cb->av_list = a->next; 910 free(a, M_TEMP); 911 a = cb->av_list; 912 } 913 914 cb->pcb->av_size = 0; 915 free(cb->pcb->ackvector, M_PCB); 916 917 mutex_exit(&(cb->mutex)); 918 callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb); 919 } 920 921 /* 922 * Tell TCPlike that a packet has been received 923 * args: ccb - ccb block for current connection 924 */ 925 void 926 tcplike_recv_packet_recv(void *ccb, char *options, int optlen) 927 { 928 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb; 929 u_char ackvector[16]; 930 u_int16_t avsize; 931 u_char av_rcv[10]; 932 933 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n")); 934 935 mutex_enter(&(cb->mutex)); 936 937 if (cb->pcb->type_rcv == DCCP_TYPE_DATA || 938 cb->pcb->type_rcv == DCCP_TYPE_DATAACK) 939 dccpstat.tcplikes_recv_datarecv++; 940 941 /* Grab Ack Vector 0 or 1 */ 942 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10); 943 if (avsize == 0) 944 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10); 945 946 /* We are only interested in acks-on-acks here. 947 * The "real" ack handling is done be the sender */ 948 if (avsize == 0 && cb->pcb->ack_rcv) { 949 u_int64_t ackthru; 950 /* We got an Ack without an ackvector. 951 * This would mean it's an ack on an ack. 952 */ 953 ackthru = _avlist_get(cb, cb->pcb->ack_rcv); 954 ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru)); 955 if (ackthru) { 956 dccp_update_ackvector(cb->pcb, ackthru); 957 dccpstat.tcplikes_recv_ackack++; 958 } 959 } else if (avsize > 0 && cb->pcb->ack_rcv) { 960 /* We received an AckVector */ 961 u_int32_t acknum, ackthru; 962 int i; 963 ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n")); 964 /* gotta loop through the ackvector */ 965 acknum = cb->pcb->ack_rcv; 966 for (i=0; i<avsize; i++) { 967 u_int8_t state, len; 968 state = (av_rcv[i] & 0xc0) >> 6; 969 len = (av_rcv[i] & 0x2f) + 1; 970 if (state != 0) { 971 /* Drops in ackvector! Will be noted and taken care of by the sender part */ 972 ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum)); 973 continue; 974 } 975 976 while (len>0) { 977 ackthru = _avlist_get(cb, acknum); 978 ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru)); 979 if (ackthru) { 980 dccp_update_ackvector(cb->pcb, ackthru); 981 dccpstat.tcplikes_recv_ackack++; 982 } 983 acknum--; 984 len--; 985 } 986 } 987 } 988 989 ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv)); 990 dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv); 991 cb->unacked++; 992 993 if (cb->unacked >= cb->pcb->ack_ratio) { 994 /* Time to send an Ack */ 995 996 avsize = dccp_generate_ackvector(cb->pcb, ackvector); 997 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector)); 998 cb->unacked = 0; 999 if (avsize > 0) { 1000 dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize); 1001 cb->pcb->ack_snd = cb->pcb->seq_rcv; 1002 _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd); 1003 ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd)); 1004 dccpstat.tcplikes_recv_acksent++; 1005 dccp_output(cb->pcb, 1); 1006 } 1007 } 1008 mutex_exit(&(cb->mutex)); 1009 } 1010 1011 void 1012 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru) 1013 { 1014 struct ack_list *a; 1015 ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru)); 1016 /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/ 1017 a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT); 1018 if (a == NULL) { 1019 MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n")); 1020 dccpstat.tcplikes_recv_memerr++; 1021 return; 1022 } 1023 memset(a, 0, sizeof(struct ack_list)); 1024 a->localseq = localseq; 1025 a->ackthru = ackthru; 1026 a->next = cb->av_list; 1027 cb->av_list = a; 1028 } 1029 1030 /* 1031 * Searches the av_list. if 'localseq' found, drop it from list and return 1032 * ackthru 1033 */ 1034 u_int64_t 1035 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq) 1036 { 1037 struct ack_list *a, *n, *p; 1038 u_int64_t ackthru; 1039 1040 ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq)); 1041 a = cb->av_list; 1042 p = 0; 1043 while (a) { 1044 n = a->next; 1045 if (a->localseq == localseq) { 1046 if (p) 1047 p->next = n; 1048 else 1049 cb->av_list = n; 1050 ackthru = a->ackthru; 1051 /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/ 1052 free(a, M_TEMP); 1053 return ackthru; 1054 } 1055 p = a; 1056 a = n; 1057 } 1058 /* Not found. return 0 */ 1059 return 0; 1060 } 1061 1062 /* 1063 int tcplike_option_recv(void); 1064 */ 1065