xref: /netbsd-src/sys/netinet/dccp_tcplike.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*	$KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $	*/
2 /*	$NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $ */
3 
4 /*
5  * Copyright (c) 2003 Magnus Erixzon
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * TCP-like congestion control for DCCP
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.2 2015/08/24 22:21:26 pooka Exp $");
37 
38 #ifdef _KERNEL_OPT
39 #include "opt_dccp.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/proc.h>
50 #include <sys/protosw.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/mutex.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/route.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/in_var.h>
66 
67 #include <netinet/ip_icmp.h>
68 #include <netinet/icmp_var.h>
69 #include <netinet/ip_var.h>
70 
71 #include <netinet/dccp.h>
72 #include <netinet/dccp_var.h>
73 #include <netinet/dccp_tcplike.h>
74 
75 #define TCPLIKE_DEBUG(args) dccp_log args
76 #define MALLOC_DEBUG(args) log args
77 #define CWND_DEBUG(args) dccp_log args
78 #define ACKRATIO_DEBUG(args) dccp_log args
79 #define LOSS_DEBUG(args) dccp_log args
80 #define TIMEOUT_DEBUG(args) dccp_log args
81 
82 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
83 #define	INP_INFO_LOCK_INIT(x,y)
84 #define	INP_INFO_WLOCK(x)
85 #define INP_INFO_WUNLOCK(x)
86 #define	INP_INFO_RLOCK(x)
87 #define INP_INFO_RUNLOCK(x)
88 #define	INP_LOCK(x)
89 #define INP_UNLOCK(x)
90 #endif
91 
92 /* Sender side */
93 
94 void tcplike_rto_timeout(void *);
95 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
96 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
98 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
99 int _cwndvector_size(struct tcplike_send_ccb *);
100 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
101 
102 void tcplike_send_term(void *);
103 void tcplike_recv_term(void *);
104 
105 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
106 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
107 
108 /* extern Ack Vector functions */
109 extern void dccp_use_ackvector(struct dccpcb *);
110 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
111 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
112 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
113 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
114 
115 extern int dccp_get_option(char *, int, int, char *, int);
116 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
117 
118 /*
119  * RTO timer activated
120  */
121 void
122 tcplike_rto_timeout(void *ccb)
123 {
124 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
125 	/*struct inpcb *inp;*/
126 	int s;
127 
128 	mutex_enter(&(cb->mutex));
129 
130 	cb->ssthresh = cb->cwnd >>1;
131 	cb->cwnd = 1; /* allowing 1 packet to be sent */
132 	cb->outstanding = 0; /* is this correct? */
133 	cb->rto_timer_callout = 0;
134 	cb->rto = cb->rto << 1;
135 	TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
136 
137 	cb->sample_rtt = 0;
138 
139 	cb->ack_last = 0;
140 	cb->ack_miss = 0;
141 
142 	cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
143 	dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
144 	dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
145 				 (char *) &cb->rcvr_ackratio, 1);
146 	cb->acked_in_win = 0;
147 	cb->acked_windows = 0;
148 	cb->oldcwnd_ts = cb->pcb->seq_snd;
149 
150 	LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
151 	    cb->cwnd, cb->outstanding));
152 	mutex_exit(&(cb->mutex));
153 
154 	/* lock'n run dccp_output */
155 	s = splnet();
156 	INP_INFO_RLOCK(&dccpbinfo);
157 	/*inp = cb->pcb->d_inpcb;*/
158 	INP_LOCK(inp);
159 	INP_INFO_RUNLOCK(&dccpbinfo);
160 
161 	dccp_output(cb->pcb, 1);
162 
163 	INP_UNLOCK(inp);
164 	splx(s);
165 }
166 
167 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
168 {
169 	u_int16_t err;
170 
171 	if (cb->rtt == 0xffff) {
172 		/* hmmmmm. */
173 		cb->rtt = sample;
174 		cb->rto = cb->rtt << 1;
175 		return;
176 	}
177 
178 	/* This is how the Linux implementation is doing it.. */
179 	if (sample >= cb->rtt) {
180 		err = sample - cb->rtt;
181 		cb->rtt = cb->rtt + (err >> 3);
182 	} else {
183 		err = cb->rtt - sample;
184 		cb->rtt = cb->rtt - (err >> 3);
185 	}
186 	cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
187 	if (cb->rtt < TCPLIKE_MIN_RTT)
188 		cb->rtt = TCPLIKE_MIN_RTT;
189 	cb->rto = cb->rtt + (cb->rtt_d << 2);
190 
191 
192 	/* 5 million ways to calculate RTT ...*/
193 #if 0
194 	cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
195 	if (cb->srtt < TCPLIKE_MIN_RTT)
196 		cb->srtt = TCPLIKE_MIN_RTT;
197 	cb->rto = cb->srtt << 1;
198 #endif
199 
200 	LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
201 }
202 
203 /* Functions declared in struct dccp_cc_sw */
204 
205 /*
206  * Initialises the sender side
207  * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
208  */
209 void *
210 tcplike_send_init(struct dccpcb* pcb)
211 {
212 	struct tcplike_send_ccb *cb;
213 
214 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
215 
216 	cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
217 	if (cb == 0) {
218 		TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
219 		dccpstat.tcplikes_send_memerr++;
220 		return 0;
221 	}
222 	memset(cb, 0, sizeof (struct tcplike_send_ccb));
223 
224 	/* init sender */
225 	cb->pcb = pcb;
226 
227 	cb->cwnd = TCPLIKE_INITIAL_CWND;
228 	cb->ssthresh = 0xafff; /* lim-> infinity */
229 	cb->oldcwnd_ts = 0;
230 	cb->outstanding = 0;
231 	cb->rcvr_ackratio = 2; /* Ack Ratio */
232 	cb->acked_in_win = 0;
233 	cb->acked_windows = 0;
234 
235 	CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
236 		    cb->cwnd, cb->outstanding));
237 	cb->rtt = 0xffff;
238 	cb->rto = TIMEOUT_UBOUND;
239 	callout_init(&cb->rto_timer, 0);
240 	callout_init(&cb->free_timer, 0);
241 	cb->rto_timer_callout = 0;
242 	cb->rtt_d = 0;
243 	cb->timestamp = 0;
244 
245 	cb->sample_rtt = 1;
246 
247 	cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
248 	/* 1 bit per entry */
249 	cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
250 	if (cb->cwndvector == NULL) {
251 		MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
252 		/* What to do now? */
253 		cb->cv_size = 0;
254 		dccpstat.tcplikes_send_memerr++;
255 		return 0;
256 	}
257 	memset(cb->cwndvector, 0, cb->cv_size / 8);
258 	cb->cv_hs = cb->cv_ts = 0;
259 	cb->cv_hp = cb->cwndvector;
260 
261 	cb->ack_last = 0;
262 	cb->ack_miss = 0;
263 
264 	mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
265 
266 	TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
267 	dccpstat.tcplikes_send_conn++;
268 	return cb;
269 }
270 
271 void tcplike_send_term(void *ccb)
272 {
273 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
274 	if (ccb == 0)
275 		return;
276 
277 	mutex_destroy(&(cb->mutex));
278 
279 	free(cb, M_PCB);
280 	TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
281 }
282 
283 /*
284  * Free the sender side
285  * args: ccb - ccb of sender
286  */
287 void
288 tcplike_send_free(void *ccb)
289 {
290 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
291 
292 	LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
293 
294 	if (ccb == 0)
295 		return;
296 
297 	mutex_enter(&(cb->mutex));
298 
299 	free(cb->cwndvector, M_PCB);
300 	cb->cv_hs = cb->cv_ts = 0;
301 
302 	/* untimeout any active timer */
303 	if (cb->rto_timer_callout) {
304 		TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
305 		callout_stop(&cb->rto_timer);
306 		cb->rto_timer_callout = 0;
307 	}
308 
309 	mutex_exit(&(cb->mutex));
310 
311 	callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
312 }
313 
314 /*
315  * Ask TCPlike wheter one can send a packet or not
316  * args: ccb  -  ccb block for current connection
317  * returns: 0 if ok, else <> 0.
318  */
319 int
320 tcplike_send_packet(void *ccb, long datasize)
321 {
322 	/* check if one can send here */
323 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
324 	long ticks;
325 	char feature[1];
326 
327 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
328 
329 	if (datasize == 0) {
330 		TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
331 		return 1;
332 	}
333 
334 	mutex_enter(&(cb->mutex));
335 
336 	if (cb->cwnd <= cb->outstanding) {
337 		/* May not send. trigger RTO */
338 		DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
339 		if (!cb->rto_timer_callout) {
340 			LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
341 			ticks = (long)cb->rto;
342 			callout_reset(&cb->rto_timer, ticks,
343 			    tcplike_rto_timeout, (void *)cb);
344 			cb->rto_timer_callout = 1;
345 		}
346 		mutex_exit(&(cb->mutex));
347 		return 0;
348 	}
349 
350 	/* We're allowed to send */
351 
352 	feature[0] = 1;
353 	if (cb->pcb->remote_ackvector == 0) {
354 		ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
355 		dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
356 		dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
357 	}
358 
359 	/* untimeout any active timer */
360 	if (cb->rto_timer_callout) {
361 		LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
362 		callout_stop(&cb->rto_timer);
363 		cb->rto_timer_callout = 0;
364 	}
365 
366 	if (!cb->sample_rtt) {
367 		struct timeval stamp;
368 		microtime(&stamp);
369 		cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
370 		dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
371 		/*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
372 		cb->sample_rtt = 1;
373 	}
374 
375 	mutex_exit(&(cb->mutex));
376 	return 1;
377 
378 }
379 
380 /*
381  * Notify sender that a packet has been sent
382  * args: ccb - ccb block for current connection
383  *	 moreToSend - if there exists more packets to send
384  */
385 void
386 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
387 {
388 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
389 
390 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
391 
392 	if (datasize == 0) {
393 		TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
394 		return;
395 	}
396 
397 	mutex_enter(&(cb->mutex));
398 
399 	cb->outstanding++;
400 	TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
401 
402 	/* stash the seqnr in cwndvector */
403 	/* Dont do this if we're only sending an ACK ! */
404 	_add_to_cwndvector(cb, cb->pcb->seq_snd);
405 	CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
406 
407 	dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
408 	mutex_exit(&(cb->mutex));
409 }
410 
411 /*
412  * Notify that an ack package was received
413  * args: ccb  -  ccb block for current connection
414  */
415 void
416 tcplike_send_packet_recv(void *ccb, char *options, int optlen)
417 {
418 	dccp_seq acknum, lastok;
419 	u_int16_t numlostpackets, avsize, i, prev_size;
420 	u_int8_t length, state, numokpackets, ackratiocnt;
421 	u_char av[10];
422 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
423 
424 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
425 	mutex_enter(&(cb->mutex));
426 
427 	if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
428 		u_int32_t echo, elapsed;
429 
430 		TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
431 		bcopy(av, &echo, 4);
432 		bcopy(av + 4, &elapsed, 4);
433 
434 		if (echo == cb->timestamp) {
435 			struct timeval time;
436 			u_int32_t c_stamp;
437 			u_int16_t diff;
438 
439 			microtime(&time);
440 			c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
441 
442 			diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
443 			diff = (u_int16_t)(diff / 1000);
444 			TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
445 				       echo, elapsed, diff));
446 			tcplike_rtt_sample(cb, diff);
447 		}
448 	}
449 
450 	if (cb->pcb->ack_rcv == 0) {
451 		/* There was no Ack. There is no spoon */
452 
453 		/* We'll clear the missingacks data here, since the other host
454 		 * is also sending data.
455 		 * I guess we could deal with this, using the NDP field in the
456 		 * header. Let's stick a *TODO* mark here for now.
457 		 * The missingacks mechanism will activate if other host goes to
458 		 * only sending DCCP-Ack packets.
459 		 */
460 		cb->ack_last = 0;
461 		cb->ack_miss = 0;
462 		ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
463 		mutex_exit(&(cb->mutex));
464 		return;
465 	}
466 
467 	cb->sample_rtt = 0;
468 
469 	/* check ackVector for lost packets. cmp with cv_list */
470 	avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
471 	if (avsize == 0)
472 		avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
473 
474 	if (avsize > 0)
475 		dccpstat.tcplikes_send_ackrecv++;
476 
477 	acknum = cb->pcb->ack_rcv;
478 	numlostpackets = 0;
479 	numokpackets = 0;
480 	lastok = 0;
481 	prev_size = _cwndvector_size(cb);
482 
483 	TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
484 	if (avsize == 0)
485 		_remove_from_cwndvector(cb, acknum);
486 
487 	for (i=0; i < avsize; i++) {
488 		state = (av[i] & 0xc0) >> 6;
489 		length = (av[i] & 0x3f) +1;
490 		while (length > 0) {
491 			if (state == 0) {
492 				CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
493 				numokpackets++;
494 				lastok = acknum;
495 				_remove_from_cwndvector(cb, acknum);
496 			} else {
497 				if (acknum > cb->oldcwnd_ts) {
498 					LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
499 					numlostpackets++;
500 					dccpstat.tcplikes_send_reploss++;
501 				}
502 			}
503 			acknum--;
504 			length--;
505 		}
506 	}
507 	if (lastok)
508 		if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
509 			LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
510 			if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
511 				numlostpackets++;
512 				dccpstat.tcplikes_send_assloss++;
513 			}
514 		}
515 
516 	lastok = cb->cv_hs;
517 	while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
518 		lastok++;
519 	if (lastok != cb->cv_hs)
520 		_chop_cwndvector(cb, lastok);
521 
522 	cb->outstanding = _cwndvector_size(cb);
523 	CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
524 	if (prev_size == cb->outstanding) {
525 		/* Nothing dropped from cwndvector  */
526 		mutex_exit(&(cb->mutex));
527 		return;
528 	}
529 
530 	cb->acked_in_win += numokpackets;
531 
532 	if (cb->cwnd < cb->ssthresh) {
533 		/* Slow start */
534 
535 		if (numlostpackets > 0) {
536 			/* Packet loss */
537 			LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
538 			cb->cwnd = cb->cwnd>>1;
539 			if (cb->cwnd < 1)
540 				cb->cwnd = 1;
541 			cb->ssthresh = cb->cwnd;
542 			cb->acked_in_win = 0;
543 			cb->acked_windows = 0;
544 			cb->oldcwnd_ts = cb->pcb->seq_snd;
545 
546 		} else {
547 			cb->cwnd++;
548 		}
549 
550 	} else if (cb->cwnd >= cb->ssthresh) {
551 
552 		if (numlostpackets > 0) {
553 			/* Packet loss */
554 			LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
555 			cb->cwnd = cb->cwnd>>1;
556 			if (cb->cwnd < 1)
557 				cb->cwnd = 1;
558 			cb->ssthresh = cb->cwnd;
559 			cb->acked_in_win = 0;
560 			cb->acked_windows = 0;
561 			cb->oldcwnd_ts = cb->pcb->seq_snd;
562 
563 		} else if (cb->acked_in_win > cb->cwnd) {
564 			cb->cwnd++;
565 		}
566 	}
567 
568 	/* Ok let's check if there are missing Ack packets */
569 	ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
570 			cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
571 
572 	if (cb->ack_last == 0) {
573 		/* First received ack (or first after Data packet). Yey */
574 		cb->ack_last = cb->pcb->seq_rcv;
575 		cb->ack_miss = 0;
576 	} else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
577 		/* This is correct, non-congestion, in-order behaviour */
578 		cb->ack_last = cb->pcb->seq_rcv;
579 
580 	} else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
581 		/* Might be an Ack we've been missing */
582 		/* This code has a flaw; If we miss 2 Ack packets, we only care
583 		 * about the older one. This means that the next-to-oldest one could
584 		 * be lost without any action beeing taken.
585 		 * Time will tell if that is going to be a Giant Problem(r)
586 		 */
587 		if (cb->pcb->seq_rcv == cb->ack_miss) {
588 			/* Yea it was. great */
589 			cb->ack_miss = 0;
590 		}
591 
592 	} else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
593 		/* There is a jump in Ack seqnums.. */
594 		cb->ack_miss = cb->ack_last + 1;
595 		cb->ack_last = cb->pcb->seq_rcv;
596 	}
597 
598 	if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
599 		/* Alert! Alert! Ack packets are MIA.
600 		 * Decrease Ack Ratio
601 		 */
602 		cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
603 		if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
604 			/* Constraint 2 */
605 			cb->rcvr_ackratio = cb->cwnd>>1;
606 		}
607 		if (cb->rcvr_ackratio == 0)
608 			cb->rcvr_ackratio = 1;
609 		ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
610 		dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
611 		dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
612 				 (char *) &cb->rcvr_ackratio, 1);
613 
614 		cb->ack_miss = 0;
615 		cb->acked_windows = 0;
616 		cb->acked_in_win = 0;
617 		dccpstat.tcplikes_send_missack++;
618 
619 	} else if (cb->acked_in_win > cb->cwnd) {
620 		cb->acked_in_win = 0;
621 		cb->acked_windows++;
622 		if (cb->rcvr_ackratio == 1) {
623 			/* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
624 			 * heavy congestion so we can increase it
625 			 */
626 			cb->acked_windows = 0;
627 		}
628 	}
629 
630 	if (cb->acked_windows >= 1) {
631 		ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
632 		if (cb->acked_windows >= ackratiocnt) {
633 			if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
634 				/* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
635 				cb->rcvr_ackratio--;
636 				ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
637 				dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
638 				dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
639 						 (char *) &cb->rcvr_ackratio, 1);
640 			}
641 			cb->acked_in_win = 0;
642 			cb->acked_windows = 0;
643 		}
644 	}
645 
646 	CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
647 		    cb->cwnd, cb->outstanding));
648 
649 	if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
650                 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
651                             cb->cwnd, cb->outstanding));
652 		callout_stop(&cb->rto_timer);
653 		cb->rto_timer_callout = 0;
654 
655 		mutex_exit(&(cb->mutex));
656                 dccp_output(cb->pcb, 1);
657 		return;
658         }
659 	mutex_exit(&(cb->mutex));
660 }
661 
662 int
663 _cwndvector_size(struct tcplike_send_ccb *cb)
664 {
665 	u_int64_t gap, offset, seqnr;
666 	u_int32_t cnt;
667 	u_char *t;
668 
669 	TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
670 	cnt = 0;
671 	for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
672 		gap = seqnr - cb->cv_hs;
673 
674 		offset = gap % 8;
675 		t = cb->cv_hp + (gap/8);
676 		if (t >= (cb->cwndvector + (cb->cv_size/8)))
677 			t -= (cb->cv_size / 8); /* wrapped */
678 
679 		if (((*t & (0x01 << offset)) >> offset) == 0x01)
680 			cnt++;
681 	}
682 	return cnt;
683 }
684 
685 u_char
686 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
687 {
688 	u_int64_t gap, offset;
689 	u_char *t;
690 
691 	/* Check for wrapping */
692 	if (seqnr >= cb->cv_hs) {
693 		/* Not wrapped */
694 		gap = seqnr - cb->cv_hs;
695 	} else {
696 		/* Wrapped XXXXX */
697 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
698 	}
699 
700 	if (gap >= cb->cv_size) {
701 		/* gap is bigger than cwndvector size? baaad */
702 		return 0x01;
703 	}
704 
705 	offset = gap % 8;
706 	t = cb->cv_hp + (gap/8);
707 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
708 		t -= (cb->cv_size / 8); /* wrapped */
709 
710 	return ((*t & (0x01 << offset)) >> offset);
711 }
712 
713 void
714 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
715 {
716 	u_int64_t offset, dc, gap;
717 	u_char *t, *n;
718 
719 	TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
720 
721 	if (cb->cv_hs == cb->cv_ts) {
722 		/* Empty cwndvector */
723 		cb->cv_hs = cb->cv_ts = seqnr;
724 	}
725 
726 	/* Check for wrapping */
727 	if (seqnr >= cb->cv_hs) {
728 		/* Not wrapped */
729 		gap = seqnr - cb->cv_hs;
730 	} else {
731 		/* Wrapped */
732 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
733 	}
734 
735 	if (gap >= cb->cv_size) {
736 		/* gap is bigger than cwndvector size? baaad */
737 		/* maybe we should increase the cwndvector here */
738 		CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
739 			    gap, cb->cv_size, seqnr));
740 		dccpstat.tcplikes_send_badseq++;
741 		return;
742 	}
743 
744 	offset = gap % 8; /* bit to mark */
745 	t = cb->cv_hp + (gap/8);
746 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
747 		t -= (cb->cv_size / 8); /* cwndvector wrapped */
748 
749 	*t = *t | (0x01 << offset); /* turn on bit */
750 
751 	cb->cv_ts = seqnr+1;
752 	if (cb->cv_ts == 0x1000000000000LL)
753 		cb->cv_ts = 0;
754 
755 	if (gap > (cb->cv_size - 128)) {
756 		MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
757 		n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
758 		if (n == NULL) {
759 			MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
760 			dccpstat.tcplikes_send_memerr++;
761 			return;
762 		}
763 		memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
764 		dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
765 		memcpy (n,cb->cv_hp, dc); /* tail to end */
766 		memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
767 		cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
768 		free (cb->cwndvector, M_PCB);
769 		cb->cv_hp = cb->cwndvector = n;
770 	}
771 }
772 
773 void
774 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
775 {
776 	u_int64_t offset;
777 	int64_t gap;
778 	u_char *t;
779 
780 	DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
781 
782 	if (cb->cv_hs == cb->cv_ts) {
783 		/* Empty cwndvector */
784 		return;
785 	}
786 
787 	/* Check for wrapping */
788 	if (seqnr >= cb->cv_hs) {
789 		/* Not wrapped */
790 		gap = seqnr - cb->cv_hs;
791 	} else {
792 		/* Wrapped */
793 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
794 	}
795 
796 	if (gap >= cb->cv_size) {
797 		/* gap is bigger than cwndvector size. has already been chopped */
798 		return;
799 	}
800 
801 	offset = gap % 8; /* hi or low 2 bits to mark */
802 	t = cb->cv_hp + (gap/8);
803 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
804 		t -= (cb->cv_size / 8); /* cwndvector wrapped */
805 
806 	*t = *t & (~(0x01 << offset)); /* turn off bits */
807 }
808 
809 int
810 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
811 {
812 	int64_t gap, bytegap;
813 	u_char *t;
814 
815 	CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
816 
817 	if (cb->cv_hs == cb->cv_ts)
818 		return 0;
819 
820 	if (seqnr > cb->cv_hs) {
821 		gap = seqnr - cb->cv_hs;
822 	} else {
823 		/* We received obsolete information */
824 		return 0;
825 	}
826 
827 	bytegap = gap/8;
828 	if (bytegap == 0)
829 		return 0;
830 
831 	t = cb->cv_hp + bytegap;
832 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
833 		t -= (cb->cv_size / 8); /* ackvector wrapped */
834 	cb->cv_hp = t;
835 	cb->cv_hs += bytegap*8;
836 	return 1;
837 }
838 
839 
840 /* Receiver side */
841 
842 
843 /* Functions declared in struct dccp_cc_sw */
844 
845 /* Initialises the receiver side
846  * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
847  */
848 void *
849 tcplike_recv_init(struct dccpcb *pcb)
850 {
851 	struct tcplike_recv_ccb *ccb;
852 
853 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
854 
855 	ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
856 	if (ccb == 0) {
857 		TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
858 		dccpstat.tcplikes_recv_memerr++;
859 		return 0;
860 	}
861 
862 	memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
863 
864 	ccb->pcb = pcb;
865 	ccb->unacked = 0;
866 	ccb->pcb->ack_ratio = 2;
867 
868 	ccb->pcb->remote_ackvector = 1;
869 	dccp_use_ackvector(ccb->pcb);
870 
871 	callout_init(&ccb->free_timer, 0);
872 
873 	mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
874 
875 	TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
876 	dccpstat.tcplikes_recv_conn++;
877 	return ccb;
878 }
879 
880 void tcplike_recv_term(void *ccb)
881 {
882 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
883 	if (ccb == 0)
884 		return;
885 
886 	mutex_destroy(&(cb->mutex));
887 	free(cb, M_PCB);
888 	TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
889 }
890 
891 /* Free the receiver side
892  * args: ccb - ccb of recevier
893  */
894 void
895 tcplike_recv_free(void *ccb)
896 {
897 	struct ack_list *a;
898 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
899 
900 	LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
901 
902 	if (ccb == 0)
903 		return;
904 
905 	mutex_enter(&(cb->mutex));
906 
907 	a = cb->av_list;
908 	while (a) {
909 		cb->av_list = a->next;
910 		free(a, M_TEMP);
911 		a = cb->av_list;
912 	}
913 
914 	cb->pcb->av_size = 0;
915 	free(cb->pcb->ackvector, M_PCB);
916 
917 	mutex_exit(&(cb->mutex));
918 	callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
919 }
920 
921 /*
922  * Tell TCPlike that a packet has been received
923  * args: ccb  -  ccb block for current connection
924  */
925 void
926 tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
927 {
928 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
929 	u_char ackvector[16];
930 	u_int16_t avsize;
931 	u_char av_rcv[10];
932 
933 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
934 
935 	mutex_enter(&(cb->mutex));
936 
937 	if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
938 	    cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
939 		dccpstat.tcplikes_recv_datarecv++;
940 
941 	/* Grab Ack Vector 0 or 1 */
942 	avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
943 	if (avsize == 0)
944 		avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
945 
946 	/* We are only interested in acks-on-acks here.
947 	 * The "real" ack handling is done be the sender */
948 	if (avsize == 0 && cb->pcb->ack_rcv) {
949 		u_int64_t ackthru;
950 		/* We got an Ack without an ackvector.
951 		 * This would mean it's an ack on an ack.
952 		 */
953 		ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
954 		ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
955 		if (ackthru) {
956 			dccp_update_ackvector(cb->pcb, ackthru);
957 			dccpstat.tcplikes_recv_ackack++;
958 		}
959 	} else if (avsize > 0 && cb->pcb->ack_rcv) {
960 		/* We received an AckVector */
961 		u_int32_t acknum, ackthru;
962 		int i;
963 		ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
964 		/* gotta loop through the ackvector */
965 		acknum = cb->pcb->ack_rcv;
966 		for (i=0; i<avsize; i++) {
967 			u_int8_t state, len;
968 			state = (av_rcv[i] & 0xc0) >> 6;
969 			len = (av_rcv[i] & 0x2f) + 1;
970 			if (state != 0) {
971 				/* Drops in ackvector! Will be noted and taken care of by the sender part */
972 				ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
973 				continue;
974 			}
975 
976 			while (len>0) {
977 				ackthru = _avlist_get(cb, acknum);
978 				ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
979 				if (ackthru) {
980 					dccp_update_ackvector(cb->pcb, ackthru);
981 					dccpstat.tcplikes_recv_ackack++;
982 				}
983 				acknum--;
984 				len--;
985 			}
986 		}
987 	}
988 
989 	ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
990 	dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
991 	cb->unacked++;
992 
993 	if (cb->unacked >= cb->pcb->ack_ratio) {
994 		/* Time to send an Ack */
995 
996 		avsize = dccp_generate_ackvector(cb->pcb, ackvector);
997 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
998 		cb->unacked = 0;
999 		if (avsize > 0) {
1000 			dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
1001 			cb->pcb->ack_snd = cb->pcb->seq_rcv;
1002 			_avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1003 			ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1004 			dccpstat.tcplikes_recv_acksent++;
1005 			dccp_output(cb->pcb, 1);
1006 		}
1007 	}
1008 	mutex_exit(&(cb->mutex));
1009 }
1010 
1011 void
1012 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1013 {
1014 	struct ack_list *a;
1015 	ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1016 	/*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1017 	a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1018 	if (a == NULL) {
1019 		MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1020 		dccpstat.tcplikes_recv_memerr++;
1021 		return;
1022 	}
1023 	memset(a, 0, sizeof(struct ack_list));
1024 	a->localseq = localseq;
1025 	a->ackthru = ackthru;
1026 	a->next = cb->av_list;
1027 	cb->av_list = a;
1028 }
1029 
1030 /*
1031  * Searches the av_list. if 'localseq' found, drop it from list and return
1032  * ackthru
1033  */
1034 u_int64_t
1035 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1036 {
1037 	struct ack_list *a, *n, *p;
1038 	u_int64_t ackthru;
1039 
1040 	ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1041 	a = cb->av_list;
1042 	p = 0;
1043 	while (a) {
1044 		n = a->next;
1045 		if (a->localseq == localseq) {
1046 			if (p)
1047 				p->next = n;
1048 			else
1049 				cb->av_list = n;
1050 			ackthru = a->ackthru;
1051 			/*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1052 			free(a, M_TEMP);
1053 			return ackthru;
1054 		}
1055 		p = a;
1056 		a = n;
1057 	}
1058 	/* Not found. return 0 */
1059 	return 0;
1060 }
1061 
1062 /*
1063 int tcplike_option_recv(void);
1064 */
1065