xref: /netbsd-src/sys/kern/uipc_socket2.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: uipc_socket2.c,v 1.36 2000/03/30 09:27:14 augustss Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)uipc_socket2.c	8.2 (Berkeley) 2/14/95
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/file.h>
42 #include <sys/buf.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/signalvar.h>
49 
50 /*
51  * Primitive routines for operating on sockets and socket buffers
52  */
53 
54 /* strings for sleep message: */
55 const char	netio[] = "netio";
56 const char	netcon[] = "netcon";
57 const char	netcls[] = "netcls";
58 
59 /*
60  * Procedures to manipulate state flags of socket
61  * and do appropriate wakeups.  Normal sequence from the
62  * active (originating) side is that soisconnecting() is
63  * called during processing of connect() call,
64  * resulting in an eventual call to soisconnected() if/when the
65  * connection is established.  When the connection is torn down
66  * soisdisconnecting() is called during processing of disconnect() call,
67  * and soisdisconnected() is called when the connection to the peer
68  * is totally severed.  The semantics of these routines are such that
69  * connectionless protocols can call soisconnected() and soisdisconnected()
70  * only, bypassing the in-progress calls when setting up a ``connection''
71  * takes no time.
72  *
73  * From the passive side, a socket is created with
74  * two queues of sockets: so_q0 for connections in progress
75  * and so_q for connections already made and awaiting user acceptance.
76  * As a protocol is preparing incoming connections, it creates a socket
77  * structure queued on so_q0 by calling sonewconn().  When the connection
78  * is established, soisconnected() is called, and transfers the
79  * socket structure to so_q, making it available to accept().
80  *
81  * If a socket is closed with sockets on either
82  * so_q0 or so_q, these sockets are dropped.
83  *
84  * If higher level protocols are implemented in
85  * the kernel, the wakeups done here will sometimes
86  * cause software-interrupt process scheduling.
87  */
88 
89 void
90 soisconnecting(so)
91 	struct socket *so;
92 {
93 
94 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
95 	so->so_state |= SS_ISCONNECTING;
96 }
97 
98 void
99 soisconnected(so)
100 	struct socket *so;
101 {
102 	struct socket *head = so->so_head;
103 
104 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
105 	so->so_state |= SS_ISCONNECTED;
106 	if (head && soqremque(so, 0)) {
107 		soqinsque(head, so, 1);
108 		sorwakeup(head);
109 		wakeup((caddr_t)&head->so_timeo);
110 	} else {
111 		wakeup((caddr_t)&so->so_timeo);
112 		sorwakeup(so);
113 		sowwakeup(so);
114 	}
115 }
116 
117 void
118 soisdisconnecting(so)
119 	struct socket *so;
120 {
121 
122 	so->so_state &= ~SS_ISCONNECTING;
123 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
124 	wakeup((caddr_t)&so->so_timeo);
125 	sowwakeup(so);
126 	sorwakeup(so);
127 }
128 
129 void
130 soisdisconnected(so)
131 	struct socket *so;
132 {
133 
134 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
135 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
136 	wakeup((caddr_t)&so->so_timeo);
137 	sowwakeup(so);
138 	sorwakeup(so);
139 }
140 
141 /*
142  * When an attempt at a new connection is noted on a socket
143  * which accepts connections, sonewconn is called.  If the
144  * connection is possible (subject to space constraints, etc.)
145  * then we allocate a new structure, propoerly linked into the
146  * data structure of the original socket, and return this.
147  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
148  *
149  * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
150  * to catch calls that are missing the (new) second parameter.
151  */
152 struct socket *
153 sonewconn1(head, connstatus)
154 	struct socket *head;
155 	int connstatus;
156 {
157 	struct socket *so;
158 	int soqueue = connstatus ? 1 : 0;
159 
160 	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
161 		return ((struct socket *)0);
162 	so = pool_get(&socket_pool, PR_NOWAIT);
163 	if (so == NULL)
164 		return (NULL);
165 	memset((caddr_t)so, 0, sizeof(*so));
166 	so->so_type = head->so_type;
167 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
168 	so->so_linger = head->so_linger;
169 	so->so_state = head->so_state | SS_NOFDREF;
170 	so->so_proto = head->so_proto;
171 	so->so_timeo = head->so_timeo;
172 	so->so_pgid = head->so_pgid;
173 	so->so_send = head->so_send;
174 	so->so_receive = head->so_receive;
175 	so->so_uid = head->so_uid;
176 	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
177 	soqinsque(head, so, soqueue);
178 	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
179 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
180 	    (struct proc *)0)) {
181 		(void) soqremque(so, soqueue);
182 		pool_put(&socket_pool, so);
183 		return (NULL);
184 	}
185 	if (connstatus) {
186 		sorwakeup(head);
187 		wakeup((caddr_t)&head->so_timeo);
188 		so->so_state |= connstatus;
189 	}
190 	return (so);
191 }
192 
193 void
194 soqinsque(head, so, q)
195 	struct socket *head, *so;
196 	int q;
197 {
198 
199 #ifdef DIAGNOSTIC
200 	if (so->so_onq != NULL)
201 		panic("soqinsque");
202 #endif
203 
204 	so->so_head = head;
205 	if (q == 0) {
206 		head->so_q0len++;
207 		so->so_onq = &head->so_q0;
208 	} else {
209 		head->so_qlen++;
210 		so->so_onq = &head->so_q;
211 	}
212 	TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
213 }
214 
215 int
216 soqremque(so, q)
217 	struct socket *so;
218 	int q;
219 {
220 	struct socket *head = so->so_head;
221 
222 	if (q == 0) {
223 		if (so->so_onq != &head->so_q0)
224 			return (0);
225 		head->so_q0len--;
226 	} else {
227 		if (so->so_onq != &head->so_q)
228 			return (0);
229 		head->so_qlen--;
230 	}
231 	TAILQ_REMOVE(so->so_onq, so, so_qe);
232 	so->so_onq = NULL;
233 	so->so_head = NULL;
234 	return (1);
235 }
236 
237 /*
238  * Socantsendmore indicates that no more data will be sent on the
239  * socket; it would normally be applied to a socket when the user
240  * informs the system that no more data is to be sent, by the protocol
241  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
242  * will be received, and will normally be applied to the socket by a
243  * protocol when it detects that the peer will send no more data.
244  * Data queued for reading in the socket may yet be read.
245  */
246 
247 void
248 socantsendmore(so)
249 	struct socket *so;
250 {
251 
252 	so->so_state |= SS_CANTSENDMORE;
253 	sowwakeup(so);
254 }
255 
256 void
257 socantrcvmore(so)
258 	struct socket *so;
259 {
260 
261 	so->so_state |= SS_CANTRCVMORE;
262 	sorwakeup(so);
263 }
264 
265 /*
266  * Wait for data to arrive at/drain from a socket buffer.
267  */
268 int
269 sbwait(sb)
270 	struct sockbuf *sb;
271 {
272 
273 	sb->sb_flags |= SB_WAIT;
274 	return (tsleep((caddr_t)&sb->sb_cc,
275 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
276 	    sb->sb_timeo));
277 }
278 
279 /*
280  * Lock a sockbuf already known to be locked;
281  * return any error returned from sleep (EINTR).
282  */
283 int
284 sb_lock(sb)
285 	struct sockbuf *sb;
286 {
287 	int error;
288 
289 	while (sb->sb_flags & SB_LOCK) {
290 		sb->sb_flags |= SB_WANT;
291 		error = tsleep((caddr_t)&sb->sb_flags,
292 			       (sb->sb_flags & SB_NOINTR) ?
293 					PSOCK : PSOCK|PCATCH, netio, 0);
294 		if (error)
295 			return (error);
296 	}
297 	sb->sb_flags |= SB_LOCK;
298 	return (0);
299 }
300 
301 /*
302  * Wakeup processes waiting on a socket buffer.
303  * Do asynchronous notification via SIGIO
304  * if the socket has the SS_ASYNC flag set.
305  */
306 void
307 sowakeup(so, sb)
308 	struct socket *so;
309 	struct sockbuf *sb;
310 {
311 	struct proc *p;
312 
313 	selwakeup(&sb->sb_sel);
314 	sb->sb_flags &= ~SB_SEL;
315 	if (sb->sb_flags & SB_WAIT) {
316 		sb->sb_flags &= ~SB_WAIT;
317 		wakeup((caddr_t)&sb->sb_cc);
318 	}
319 	if (so->so_state & SS_ASYNC) {
320 		if (so->so_pgid < 0)
321 			gsignal(-so->so_pgid, SIGIO);
322 		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
323 			psignal(p, SIGIO);
324 	}
325 	if (sb->sb_flags & SB_UPCALL)
326 		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
327 }
328 
329 /*
330  * Socket buffer (struct sockbuf) utility routines.
331  *
332  * Each socket contains two socket buffers: one for sending data and
333  * one for receiving data.  Each buffer contains a queue of mbufs,
334  * information about the number of mbufs and amount of data in the
335  * queue, and other fields allowing poll() statements and notification
336  * on data availability to be implemented.
337  *
338  * Data stored in a socket buffer is maintained as a list of records.
339  * Each record is a list of mbufs chained together with the m_next
340  * field.  Records are chained together with the m_nextpkt field. The upper
341  * level routine soreceive() expects the following conventions to be
342  * observed when placing information in the receive buffer:
343  *
344  * 1. If the protocol requires each message be preceded by the sender's
345  *    name, then a record containing that name must be present before
346  *    any associated data (mbuf's must be of type MT_SONAME).
347  * 2. If the protocol supports the exchange of ``access rights'' (really
348  *    just additional data associated with the message), and there are
349  *    ``rights'' to be received, then a record containing this data
350  *    should be present (mbuf's must be of type MT_CONTROL).
351  * 3. If a name or rights record exists, then it must be followed by
352  *    a data record, perhaps of zero length.
353  *
354  * Before using a new socket structure it is first necessary to reserve
355  * buffer space to the socket, by calling sbreserve().  This should commit
356  * some of the available buffer space in the system buffer pool for the
357  * socket (currently, it does nothing but enforce limits).  The space
358  * should be released by calling sbrelease() when the socket is destroyed.
359  */
360 
361 int
362 soreserve(so, sndcc, rcvcc)
363 	struct socket *so;
364 	u_long sndcc, rcvcc;
365 {
366 
367 	if (sbreserve(&so->so_snd, sndcc) == 0)
368 		goto bad;
369 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
370 		goto bad2;
371 	if (so->so_rcv.sb_lowat == 0)
372 		so->so_rcv.sb_lowat = 1;
373 	if (so->so_snd.sb_lowat == 0)
374 		so->so_snd.sb_lowat = MCLBYTES;
375 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
376 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
377 	return (0);
378 bad2:
379 	sbrelease(&so->so_snd);
380 bad:
381 	return (ENOBUFS);
382 }
383 
384 /*
385  * Allot mbufs to a sockbuf.
386  * Attempt to scale mbmax so that mbcnt doesn't become limiting
387  * if buffering efficiency is near the normal case.
388  */
389 int
390 sbreserve(sb, cc)
391 	struct sockbuf *sb;
392 	u_long cc;
393 {
394 
395 	if (cc == 0 || cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
396 		return (0);
397 	sb->sb_hiwat = cc;
398 	sb->sb_mbmax = min(cc * 2, sb_max);
399 	if (sb->sb_lowat > sb->sb_hiwat)
400 		sb->sb_lowat = sb->sb_hiwat;
401 	return (1);
402 }
403 
404 /*
405  * Free mbufs held by a socket, and reserved mbuf space.
406  */
407 void
408 sbrelease(sb)
409 	struct sockbuf *sb;
410 {
411 
412 	sbflush(sb);
413 	sb->sb_hiwat = sb->sb_mbmax = 0;
414 }
415 
416 /*
417  * Routines to add and remove
418  * data from an mbuf queue.
419  *
420  * The routines sbappend() or sbappendrecord() are normally called to
421  * append new mbufs to a socket buffer, after checking that adequate
422  * space is available, comparing the function sbspace() with the amount
423  * of data to be added.  sbappendrecord() differs from sbappend() in
424  * that data supplied is treated as the beginning of a new record.
425  * To place a sender's address, optional access rights, and data in a
426  * socket receive buffer, sbappendaddr() should be used.  To place
427  * access rights and data in a socket receive buffer, sbappendrights()
428  * should be used.  In either case, the new data begins a new record.
429  * Note that unlike sbappend() and sbappendrecord(), these routines check
430  * for the caller that there will be enough space to store the data.
431  * Each fails if there is not enough space, or if it cannot find mbufs
432  * to store additional information in.
433  *
434  * Reliable protocols may use the socket send buffer to hold data
435  * awaiting acknowledgement.  Data is normally copied from a socket
436  * send buffer in a protocol with m_copy for output to a peer,
437  * and then removing the data from the socket buffer with sbdrop()
438  * or sbdroprecord() when the data is acknowledged by the peer.
439  */
440 
441 /*
442  * Append mbuf chain m to the last record in the
443  * socket buffer sb.  The additional space associated
444  * the mbuf chain is recorded in sb.  Empty mbufs are
445  * discarded and mbufs are compacted where possible.
446  */
447 void
448 sbappend(sb, m)
449 	struct sockbuf *sb;
450 	struct mbuf *m;
451 {
452 	struct mbuf *n;
453 
454 	if (m == 0)
455 		return;
456 	if ((n = sb->sb_mb) != NULL) {
457 		while (n->m_nextpkt)
458 			n = n->m_nextpkt;
459 		do {
460 			if (n->m_flags & M_EOR) {
461 				sbappendrecord(sb, m); /* XXXXXX!!!! */
462 				return;
463 			}
464 		} while (n->m_next && (n = n->m_next));
465 	}
466 	sbcompress(sb, m, n);
467 }
468 
469 #ifdef SOCKBUF_DEBUG
470 void
471 sbcheck(sb)
472 	struct sockbuf *sb;
473 {
474 	struct mbuf *m;
475 	int len = 0, mbcnt = 0;
476 
477 	for (m = sb->sb_mb; m; m = m->m_next) {
478 		len += m->m_len;
479 		mbcnt += MSIZE;
480 		if (m->m_flags & M_EXT)
481 			mbcnt += m->m_ext.ext_size;
482 		if (m->m_nextpkt)
483 			panic("sbcheck nextpkt");
484 	}
485 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
486 		printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
487 		    mbcnt, sb->sb_mbcnt);
488 		panic("sbcheck");
489 	}
490 }
491 #endif
492 
493 /*
494  * As above, except the mbuf chain
495  * begins a new record.
496  */
497 void
498 sbappendrecord(sb, m0)
499 	struct sockbuf *sb;
500 	struct mbuf *m0;
501 {
502 	struct mbuf *m;
503 
504 	if (m0 == 0)
505 		return;
506 	if ((m = sb->sb_mb) != NULL)
507 		while (m->m_nextpkt)
508 			m = m->m_nextpkt;
509 	/*
510 	 * Put the first mbuf on the queue.
511 	 * Note this permits zero length records.
512 	 */
513 	sballoc(sb, m0);
514 	if (m)
515 		m->m_nextpkt = m0;
516 	else
517 		sb->sb_mb = m0;
518 	m = m0->m_next;
519 	m0->m_next = 0;
520 	if (m && (m0->m_flags & M_EOR)) {
521 		m0->m_flags &= ~M_EOR;
522 		m->m_flags |= M_EOR;
523 	}
524 	sbcompress(sb, m, m0);
525 }
526 
527 /*
528  * As above except that OOB data
529  * is inserted at the beginning of the sockbuf,
530  * but after any other OOB data.
531  */
532 void
533 sbinsertoob(sb, m0)
534 	struct sockbuf *sb;
535 	struct mbuf *m0;
536 {
537 	struct mbuf *m;
538 	struct mbuf **mp;
539 
540 	if (m0 == 0)
541 		return;
542 	for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
543 	    again:
544 		switch (m->m_type) {
545 
546 		case MT_OOBDATA:
547 			continue;		/* WANT next train */
548 
549 		case MT_CONTROL:
550 			if ((m = m->m_next) != NULL)
551 				goto again;	/* inspect THIS train further */
552 		}
553 		break;
554 	}
555 	/*
556 	 * Put the first mbuf on the queue.
557 	 * Note this permits zero length records.
558 	 */
559 	sballoc(sb, m0);
560 	m0->m_nextpkt = *mp;
561 	*mp = m0;
562 	m = m0->m_next;
563 	m0->m_next = 0;
564 	if (m && (m0->m_flags & M_EOR)) {
565 		m0->m_flags &= ~M_EOR;
566 		m->m_flags |= M_EOR;
567 	}
568 	sbcompress(sb, m, m0);
569 }
570 
571 /*
572  * Append address and data, and optionally, control (ancillary) data
573  * to the receive queue of a socket.  If present,
574  * m0 must include a packet header with total length.
575  * Returns 0 if no space in sockbuf or insufficient mbufs.
576  */
577 int
578 sbappendaddr(sb, asa, m0, control)
579 	struct sockbuf *sb;
580 	struct sockaddr *asa;
581 	struct mbuf *m0, *control;
582 {
583 	struct mbuf *m, *n;
584 	int space = asa->sa_len;
585 
586 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
587 panic("sbappendaddr");
588 	if (m0)
589 		space += m0->m_pkthdr.len;
590 	for (n = control; n; n = n->m_next) {
591 		space += n->m_len;
592 		if (n->m_next == 0)	/* keep pointer to last control buf */
593 			break;
594 	}
595 	if (space > sbspace(sb))
596 		return (0);
597 	MGET(m, M_DONTWAIT, MT_SONAME);
598 	if (m == 0)
599 		return (0);
600 	if (asa->sa_len > MLEN) {
601 		MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
602 		if ((m->m_flags & M_EXT) == 0) {
603 			m_free(m);
604 			return (0);
605 		}
606 	}
607 	m->m_len = asa->sa_len;
608 	memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len);
609 	if (n)
610 		n->m_next = m0;		/* concatenate data to control */
611 	else
612 		control = m0;
613 	m->m_next = control;
614 	for (n = m; n; n = n->m_next)
615 		sballoc(sb, n);
616 	if ((n = sb->sb_mb) != NULL) {
617 		while (n->m_nextpkt)
618 			n = n->m_nextpkt;
619 		n->m_nextpkt = m;
620 	} else
621 		sb->sb_mb = m;
622 	return (1);
623 }
624 
625 int
626 sbappendcontrol(sb, m0, control)
627 	struct sockbuf *sb;
628 	struct mbuf *m0, *control;
629 {
630 	struct mbuf *m, *n;
631 	int space = 0;
632 
633 	if (control == 0)
634 		panic("sbappendcontrol");
635 	for (m = control; ; m = m->m_next) {
636 		space += m->m_len;
637 		if (m->m_next == 0)
638 			break;
639 	}
640 	n = m;			/* save pointer to last control buffer */
641 	for (m = m0; m; m = m->m_next)
642 		space += m->m_len;
643 	if (space > sbspace(sb))
644 		return (0);
645 	n->m_next = m0;			/* concatenate data to control */
646 	for (m = control; m; m = m->m_next)
647 		sballoc(sb, m);
648 	if ((n = sb->sb_mb) != NULL) {
649 		while (n->m_nextpkt)
650 			n = n->m_nextpkt;
651 		n->m_nextpkt = control;
652 	} else
653 		sb->sb_mb = control;
654 	return (1);
655 }
656 
657 /*
658  * Compress mbuf chain m into the socket
659  * buffer sb following mbuf n.  If n
660  * is null, the buffer is presumed empty.
661  */
662 void
663 sbcompress(sb, m, n)
664 	struct sockbuf *sb;
665 	struct mbuf *m, *n;
666 {
667 	int eor = 0;
668 	struct mbuf *o;
669 
670 	while (m) {
671 		eor |= m->m_flags & M_EOR;
672 		if (m->m_len == 0 &&
673 		    (eor == 0 ||
674 		     (((o = m->m_next) || (o = n)) &&
675 		      o->m_type == m->m_type))) {
676 			m = m_free(m);
677 			continue;
678 		}
679 		if (n && (n->m_flags & M_EOR) == 0 && n->m_type == m->m_type &&
680 		    (((n->m_flags & M_EXT) == 0 &&
681 		      n->m_data + n->m_len + m->m_len <= &n->m_dat[MLEN]) ||
682 		     ((~n->m_flags & (M_EXT|M_CLUSTER)) == 0 &&
683 		      !MCLISREFERENCED(n) &&
684 		      n->m_data + n->m_len + m->m_len <= &n->m_ext.ext_buf[MCLBYTES]))) {
685 			memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
686 			    (unsigned)m->m_len);
687 			n->m_len += m->m_len;
688 			sb->sb_cc += m->m_len;
689 			m = m_free(m);
690 			continue;
691 		}
692 		if (n)
693 			n->m_next = m;
694 		else
695 			sb->sb_mb = m;
696 		sballoc(sb, m);
697 		n = m;
698 		m->m_flags &= ~M_EOR;
699 		m = m->m_next;
700 		n->m_next = 0;
701 	}
702 	if (eor) {
703 		if (n)
704 			n->m_flags |= eor;
705 		else
706 			printf("semi-panic: sbcompress\n");
707 	}
708 }
709 
710 /*
711  * Free all mbufs in a sockbuf.
712  * Check that all resources are reclaimed.
713  */
714 void
715 sbflush(sb)
716 	struct sockbuf *sb;
717 {
718 
719 	if (sb->sb_flags & SB_LOCK)
720 		panic("sbflush");
721 	while (sb->sb_mbcnt)
722 		sbdrop(sb, (int)sb->sb_cc);
723 	if (sb->sb_cc || sb->sb_mb)
724 		panic("sbflush 2");
725 }
726 
727 /*
728  * Drop data from (the front of) a sockbuf.
729  */
730 void
731 sbdrop(sb, len)
732 	struct sockbuf *sb;
733 	int len;
734 {
735 	struct mbuf *m, *mn;
736 	struct mbuf *next;
737 
738 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
739 	while (len > 0) {
740 		if (m == 0) {
741 			if (next == 0)
742 				panic("sbdrop");
743 			m = next;
744 			next = m->m_nextpkt;
745 			continue;
746 		}
747 		if (m->m_len > len) {
748 			m->m_len -= len;
749 			m->m_data += len;
750 			sb->sb_cc -= len;
751 			break;
752 		}
753 		len -= m->m_len;
754 		sbfree(sb, m);
755 		MFREE(m, mn);
756 		m = mn;
757 	}
758 	while (m && m->m_len == 0) {
759 		sbfree(sb, m);
760 		MFREE(m, mn);
761 		m = mn;
762 	}
763 	if (m) {
764 		sb->sb_mb = m;
765 		m->m_nextpkt = next;
766 	} else
767 		sb->sb_mb = next;
768 }
769 
770 /*
771  * Drop a record off the front of a sockbuf
772  * and move the next record to the front.
773  */
774 void
775 sbdroprecord(sb)
776 	struct sockbuf *sb;
777 {
778 	struct mbuf *m, *mn;
779 
780 	m = sb->sb_mb;
781 	if (m) {
782 		sb->sb_mb = m->m_nextpkt;
783 		do {
784 			sbfree(sb, m);
785 			MFREE(m, mn);
786 		} while ((m = mn) != NULL);
787 	}
788 }
789 
790 /*
791  * Create a "control" mbuf containing the specified data
792  * with the specified type for presentation on a socket buffer.
793  */
794 struct mbuf *
795 sbcreatecontrol(p, size, type, level)
796 	caddr_t p;
797 	int size;
798 	int type, level;
799 {
800 	struct cmsghdr *cp;
801 	struct mbuf *m;
802 
803 	if (CMSG_SPACE(size) > MCLBYTES) {
804 		printf("sbcreatecontrol: message too large %d\n", size);
805 		return NULL;
806 	}
807 
808 	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
809 		return ((struct mbuf *) NULL);
810 	if (CMSG_SPACE(size) > MLEN) {
811 		MCLGET(m, M_DONTWAIT);
812 		if ((m->m_flags & M_EXT) == 0) {
813 			m_free(m);
814 			return NULL;
815 		}
816 	}
817 	cp = mtod(m, struct cmsghdr *);
818 	memcpy(CMSG_DATA(cp), p, size);
819 	m->m_len = CMSG_SPACE(size);
820 	cp->cmsg_len = CMSG_LEN(size);
821 	cp->cmsg_level = level;
822 	cp->cmsg_type = type;
823 	return (m);
824 }
825