xref: /csrg-svn/sys/kern/uipc_socket2.c (revision 34491)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that this notice is preserved and that due credit is given
7  * to the University of California at Berkeley. The name of the University
8  * may not be used to endorse or promote products derived from this
9  * software without specific prior written permission. This software
10  * is provided ``as is'' without express or implied warranty.
11  *
12  *	@(#)uipc_socket2.c	7.4 (Berkeley) 05/26/88
13  */
14 
15 #include "param.h"
16 #include "systm.h"
17 #include "dir.h"
18 #include "user.h"
19 #include "proc.h"
20 #include "file.h"
21 #include "inode.h"
22 #include "buf.h"
23 #include "mbuf.h"
24 #include "protosw.h"
25 #include "socket.h"
26 #include "socketvar.h"
27 
28 /*
29  * Primitive routines for operating on sockets and socket buffers
30  */
31 
32 /*
33  * Procedures to manipulate state flags of socket
34  * and do appropriate wakeups.  Normal sequence from the
35  * active (originating) side is that soisconnecting() is
36  * called during processing of connect() call,
37  * resulting in an eventual call to soisconnected() if/when the
38  * connection is established.  When the connection is torn down
39  * soisdisconnecting() is called during processing of disconnect() call,
40  * and soisdisconnected() is called when the connection to the peer
41  * is totally severed.  The semantics of these routines are such that
42  * connectionless protocols can call soisconnected() and soisdisconnected()
43  * only, bypassing the in-progress calls when setting up a ``connection''
44  * takes no time.
45  *
46  * From the passive side, a socket is created with
47  * two queues of sockets: so_q0 for connections in progress
48  * and so_q for connections already made and awaiting user acceptance.
49  * As a protocol is preparing incoming connections, it creates a socket
50  * structure queued on so_q0 by calling sonewconn().  When the connection
51  * is established, soisconnected() is called, and transfers the
52  * socket structure to so_q, making it available to accept().
53  *
54  * If a socket is closed with sockets on either
55  * so_q0 or so_q, these sockets are dropped.
56  *
57  * If higher level protocols are implemented in
58  * the kernel, the wakeups done here will sometimes
59  * cause software-interrupt process scheduling.
60  */
61 
62 soisconnecting(so)
63 	register struct socket *so;
64 {
65 
66 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
67 	so->so_state |= SS_ISCONNECTING;
68 	wakeup((caddr_t)&so->so_timeo);
69 }
70 
71 soisconnected(so)
72 	register struct socket *so;
73 {
74 	register struct socket *head = so->so_head;
75 
76 	if (head) {
77 		if (soqremque(so, 0) == 0)
78 			panic("soisconnected");
79 		soqinsque(head, so, 1);
80 		sorwakeup(head);
81 		wakeup((caddr_t)&head->so_timeo);
82 	}
83 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
84 	so->so_state |= SS_ISCONNECTED;
85 	wakeup((caddr_t)&so->so_timeo);
86 	sorwakeup(so);
87 	sowwakeup(so);
88 }
89 
90 soisdisconnecting(so)
91 	register struct socket *so;
92 {
93 
94 	so->so_state &= ~SS_ISCONNECTING;
95 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
96 	wakeup((caddr_t)&so->so_timeo);
97 	sowwakeup(so);
98 	sorwakeup(so);
99 }
100 
101 soisdisconnected(so)
102 	register struct socket *so;
103 {
104 
105 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
106 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
107 	wakeup((caddr_t)&so->so_timeo);
108 	sowwakeup(so);
109 	sorwakeup(so);
110 }
111 
112 /*
113  * When an attempt at a new connection is noted on a socket
114  * which accepts connections, sonewconn is called.  If the
115  * connection is possible (subject to space constraints, etc.)
116  * then we allocate a new structure, propoerly linked into the
117  * data structure of the original socket, and return this.
118  */
119 struct socket *
120 sonewconn(head)
121 	register struct socket *head;
122 {
123 	register struct socket *so;
124 	register struct mbuf *m;
125 
126 	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
127 		goto bad;
128 	m = m_getclr(M_DONTWAIT, MT_SOCKET);
129 	if (m == NULL)
130 		goto bad;
131 	so = mtod(m, struct socket *);
132 	so->so_type = head->so_type;
133 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
134 	so->so_linger = head->so_linger;
135 	so->so_state = head->so_state | SS_NOFDREF;
136 	so->so_proto = head->so_proto;
137 	so->so_timeo = head->so_timeo;
138 	so->so_pgrp = head->so_pgrp;
139 	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_snd.sb_hiwat);
140 	soqinsque(head, so, 0);
141 	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
142 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
143 		(void) soqremque(so, 0);
144 		(void) m_free(m);
145 		goto bad;
146 	}
147 	return (so);
148 bad:
149 	return ((struct socket *)0);
150 }
151 
152 soqinsque(head, so, q)
153 	register struct socket *head, *so;
154 	int q;
155 {
156 
157 	so->so_head = head;
158 	if (q == 0) {
159 		head->so_q0len++;
160 		so->so_q0 = head->so_q0;
161 		head->so_q0 = so;
162 	} else {
163 		head->so_qlen++;
164 		so->so_q = head->so_q;
165 		head->so_q = so;
166 	}
167 }
168 
169 soqremque(so, q)
170 	register struct socket *so;
171 	int q;
172 {
173 	register struct socket *head, *prev, *next;
174 
175 	head = so->so_head;
176 	prev = head;
177 	for (;;) {
178 		next = q ? prev->so_q : prev->so_q0;
179 		if (next == so)
180 			break;
181 		if (next == head)
182 			return (0);
183 		prev = next;
184 	}
185 	if (q == 0) {
186 		prev->so_q0 = next->so_q0;
187 		head->so_q0len--;
188 	} else {
189 		prev->so_q = next->so_q;
190 		head->so_qlen--;
191 	}
192 	next->so_q0 = next->so_q = 0;
193 	next->so_head = 0;
194 	return (1);
195 }
196 
197 /*
198  * Socantsendmore indicates that no more data will be sent on the
199  * socket; it would normally be applied to a socket when the user
200  * informs the system that no more data is to be sent, by the protocol
201  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
202  * will be received, and will normally be applied to the socket by a
203  * protocol when it detects that the peer will send no more data.
204  * Data queued for reading in the socket may yet be read.
205  */
206 
207 socantsendmore(so)
208 	struct socket *so;
209 {
210 
211 	so->so_state |= SS_CANTSENDMORE;
212 	sowwakeup(so);
213 }
214 
215 socantrcvmore(so)
216 	struct socket *so;
217 {
218 
219 	so->so_state |= SS_CANTRCVMORE;
220 	sorwakeup(so);
221 }
222 
223 /*
224  * Socket select/wakeup routines.
225  */
226 
227 /*
228  * Queue a process for a select on a socket buffer.
229  */
230 sbselqueue(sb)
231 	struct sockbuf *sb;
232 {
233 	register struct proc *p;
234 
235 	if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait)
236 		sb->sb_flags |= SB_COLL;
237 	else
238 		sb->sb_sel = u.u_procp;
239 }
240 
241 /*
242  * Wait for data to arrive at/drain from a socket buffer.
243  */
244 sbwait(sb)
245 	struct sockbuf *sb;
246 {
247 
248 	sb->sb_flags |= SB_WAIT;
249 	sleep((caddr_t)&sb->sb_cc, PZERO+1);
250 }
251 
252 /*
253  * Wakeup processes waiting on a socket buffer.
254  */
255 sbwakeup(sb)
256 	register struct sockbuf *sb;
257 {
258 
259 	if (sb->sb_sel) {
260 		selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL);
261 		sb->sb_sel = 0;
262 		sb->sb_flags &= ~SB_COLL;
263 	}
264 	if (sb->sb_flags & SB_WAIT) {
265 		sb->sb_flags &= ~SB_WAIT;
266 		wakeup((caddr_t)&sb->sb_cc);
267 	}
268 }
269 
270 /*
271  * Wakeup socket readers and writers.
272  * Do asynchronous notification via SIGIO
273  * if the socket has the SS_ASYNC flag set.
274  */
275 sowakeup(so, sb)
276 	register struct socket *so;
277 	struct sockbuf *sb;
278 {
279 	register struct proc *p;
280 
281 	sbwakeup(sb);
282 	if (so->so_state & SS_ASYNC) {
283 		if (so->so_pgrp < 0)
284 			gsignal(-so->so_pgrp, SIGIO);
285 		else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0)
286 			psignal(p, SIGIO);
287 	}
288 }
289 
290 /*
291  * Socket buffer (struct sockbuf) utility routines.
292  *
293  * Each socket contains two socket buffers: one for sending data and
294  * one for receiving data.  Each buffer contains a queue of mbufs,
295  * information about the number of mbufs and amount of data in the
296  * queue, and other fields allowing select() statements and notification
297  * on data availability to be implemented.
298  *
299  * Data stored in a socket buffer is maintained as a list of records.
300  * Each record is a list of mbufs chained together with the m_next
301  * field.  Records are chained together with the m_act field. The upper
302  * level routine soreceive() expects the following conventions to be
303  * observed when placing information in the receive buffer:
304  *
305  * 1. If the protocol requires each message be preceded by the sender's
306  *    name, then a record containing that name must be present before
307  *    any associated data (mbuf's must be of type MT_SONAME).
308  * 2. If the protocol supports the exchange of ``access rights'' (really
309  *    just additional data associated with the message), and there are
310  *    ``rights'' to be received, then a record containing this data
311  *    should be present (mbuf's must be of type MT_RIGHTS).
312  * 3. If a name or rights record exists, then it must be followed by
313  *    a data record, perhaps of zero length.
314  *
315  * Before using a new socket structure it is first necessary to reserve
316  * buffer space to the socket, by calling sbreserve().  This should commit
317  * some of the available buffer space in the system buffer pool for the
318  * socket (currently, it does nothing but enforce limits).  The space
319  * should be released by calling sbrelease() when the socket is destroyed.
320  */
321 
322 soreserve(so, sndcc, rcvcc)
323 	register struct socket *so;
324 	u_long sndcc, rcvcc;
325 {
326 
327 	if (sbreserve(&so->so_snd, sndcc) == 0)
328 		goto bad;
329 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
330 		goto bad2;
331 	return (0);
332 bad2:
333 	sbrelease(&so->so_snd);
334 bad:
335 	return (ENOBUFS);
336 }
337 
338 /*
339  * Allot mbufs to a sockbuf.
340  * Attempt to scale cc so that mbcnt doesn't become limiting
341  * if buffering efficiency is near the normal case.
342  */
343 sbreserve(sb, cc)
344 	struct sockbuf *sb;
345 	u_long cc;
346 {
347 
348 	if (cc > (u_long)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES))
349 		return (0);
350 	sb->sb_hiwat = cc;
351 	sb->sb_mbmax = MIN(cc * 2, SB_MAX);
352 	return (1);
353 }
354 
355 /*
356  * Free mbufs held by a socket, and reserved mbuf space.
357  */
358 sbrelease(sb)
359 	struct sockbuf *sb;
360 {
361 
362 	sbflush(sb);
363 	sb->sb_hiwat = sb->sb_mbmax = 0;
364 }
365 
366 /*
367  * Routines to add and remove
368  * data from an mbuf queue.
369  *
370  * The routines sbappend() or sbappendrecord() are normally called to
371  * append new mbufs to a socket buffer, after checking that adequate
372  * space is available, comparing the function sbspace() with the amount
373  * of data to be added.  sbappendrecord() differs from sbappend() in
374  * that data supplied is treated as the beginning of a new record.
375  * To place a sender's address, optional access rights, and data in a
376  * socket receive buffer, sbappendaddr() should be used.  To place
377  * access rights and data in a socket receive buffer, sbappendrights()
378  * should be used.  In either case, the new data begins a new record.
379  * Note that unlike sbappend() and sbappendrecord(), these routines check
380  * for the caller that there will be enough space to store the data.
381  * Each fails if there is not enough space, or if it cannot find mbufs
382  * to store additional information in.
383  *
384  * Reliable protocols may use the socket send buffer to hold data
385  * awaiting acknowledgement.  Data is normally copied from a socket
386  * send buffer in a protocol with m_copy for output to a peer,
387  * and then removing the data from the socket buffer with sbdrop()
388  * or sbdroprecord() when the data is acknowledged by the peer.
389  */
390 
391 /*
392  * Append mbuf chain m to the last record in the
393  * socket buffer sb.  The additional space associated
394  * the mbuf chain is recorded in sb.  Empty mbufs are
395  * discarded and mbufs are compacted where possible.
396  */
397 sbappend(sb, m)
398 	struct sockbuf *sb;
399 	struct mbuf *m;
400 {
401 	register struct mbuf *n;
402 
403 	if (m == 0)
404 		return;
405 	if (n = sb->sb_mb) {
406 		while (n->m_act)
407 			n = n->m_act;
408 		while (n->m_next)
409 			n = n->m_next;
410 	}
411 	sbcompress(sb, m, n);
412 }
413 
414 /*
415  * As above, except the mbuf chain
416  * begins a new record.
417  */
418 sbappendrecord(sb, m0)
419 	register struct sockbuf *sb;
420 	register struct mbuf *m0;
421 {
422 	register struct mbuf *m;
423 
424 	if (m0 == 0)
425 		return;
426 	if (m = sb->sb_mb)
427 		while (m->m_act)
428 			m = m->m_act;
429 	/*
430 	 * Put the first mbuf on the queue.
431 	 * Note this permits zero length records.
432 	 */
433 	sballoc(sb, m0);
434 	if (m)
435 		m->m_act = m0;
436 	else
437 		sb->sb_mb = m0;
438 	m = m0->m_next;
439 	m0->m_next = 0;
440 	sbcompress(sb, m, m0);
441 }
442 
443 /*
444  * Append address and data, and optionally, rights
445  * to the receive queue of a socket.  Return 0 if
446  * no space in sockbuf or insufficient mbufs.
447  */
448 sbappendaddr(sb, asa, m0, rights0)
449 	register struct sockbuf *sb;
450 	struct sockaddr *asa;
451 	struct mbuf *rights0, *m0;
452 {
453 	register struct mbuf *m, *n;
454 	int space = sizeof (*asa);
455 
456 	for (m = m0; m; m = m->m_next)
457 		space += m->m_len;
458 	if (rights0)
459 		space += rights0->m_len;
460 	if (space > sbspace(sb))
461 		return (0);
462 	MGET(m, M_DONTWAIT, MT_SONAME);
463 	if (m == 0)
464 		return (0);
465 	*mtod(m, struct sockaddr *) = *asa;
466 	m->m_len = sizeof (*asa);
467 	if (rights0 && rights0->m_len) {
468 		m->m_next = m_copy(rights0, 0, rights0->m_len);
469 		if (m->m_next == 0) {
470 			m_freem(m);
471 			return (0);
472 		}
473 		sballoc(sb, m->m_next);
474 	}
475 	sballoc(sb, m);
476 	if (n = sb->sb_mb) {
477 		while (n->m_act)
478 			n = n->m_act;
479 		n->m_act = m;
480 	} else
481 		sb->sb_mb = m;
482 	if (m->m_next)
483 		m = m->m_next;
484 	if (m0)
485 		sbcompress(sb, m0, m);
486 	return (1);
487 }
488 
489 sbappendrights(sb, m0, rights)
490 	struct sockbuf *sb;
491 	struct mbuf *rights, *m0;
492 {
493 	register struct mbuf *m, *n;
494 	int space = 0;
495 
496 	if (rights == 0)
497 		panic("sbappendrights");
498 	for (m = m0; m; m = m->m_next)
499 		space += m->m_len;
500 	space += rights->m_len;
501 	if (space > sbspace(sb))
502 		return (0);
503 	m = m_copy(rights, 0, rights->m_len);
504 	if (m == 0)
505 		return (0);
506 	sballoc(sb, m);
507 	if (n = sb->sb_mb) {
508 		while (n->m_act)
509 			n = n->m_act;
510 		n->m_act = m;
511 	} else
512 		sb->sb_mb = m;
513 	if (m0)
514 		sbcompress(sb, m0, m);
515 	return (1);
516 }
517 
518 /*
519  * Compress mbuf chain m into the socket
520  * buffer sb following mbuf n.  If n
521  * is null, the buffer is presumed empty.
522  */
523 sbcompress(sb, m, n)
524 	register struct sockbuf *sb;
525 	register struct mbuf *m, *n;
526 {
527 
528 	while (m) {
529 		if (m->m_len == 0) {
530 			m = m_free(m);
531 			continue;
532 		}
533 		if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF &&
534 		    (n->m_off + n->m_len + m->m_len) <= MMAXOFF &&
535 		    n->m_type == m->m_type) {
536 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
537 			    (unsigned)m->m_len);
538 			n->m_len += m->m_len;
539 			sb->sb_cc += m->m_len;
540 			m = m_free(m);
541 			continue;
542 		}
543 		sballoc(sb, m);
544 		if (n)
545 			n->m_next = m;
546 		else
547 			sb->sb_mb = m;
548 		n = m;
549 		m = m->m_next;
550 		n->m_next = 0;
551 	}
552 }
553 
554 /*
555  * Free all mbufs in a sockbuf.
556  * Check that all resources are reclaimed.
557  */
558 sbflush(sb)
559 	register struct sockbuf *sb;
560 {
561 
562 	if (sb->sb_flags & SB_LOCK)
563 		panic("sbflush");
564 	while (sb->sb_mbcnt)
565 		sbdrop(sb, (int)sb->sb_cc);
566 	if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb)
567 		panic("sbflush 2");
568 }
569 
570 /*
571  * Drop data from (the front of) a sockbuf.
572  */
573 sbdrop(sb, len)
574 	register struct sockbuf *sb;
575 	register int len;
576 {
577 	register struct mbuf *m, *mn;
578 	struct mbuf *next;
579 
580 	next = (m = sb->sb_mb) ? m->m_act : 0;
581 	while (len > 0) {
582 		if (m == 0) {
583 			if (next == 0)
584 				panic("sbdrop");
585 			m = next;
586 			next = m->m_act;
587 			continue;
588 		}
589 		if (m->m_len > len) {
590 			m->m_len -= len;
591 			m->m_off += len;
592 			sb->sb_cc -= len;
593 			break;
594 		}
595 		len -= m->m_len;
596 		sbfree(sb, m);
597 		MFREE(m, mn);
598 		m = mn;
599 	}
600 	while (m && m->m_len == 0) {
601 		sbfree(sb, m);
602 		MFREE(m, mn);
603 		m = mn;
604 	}
605 	if (m) {
606 		sb->sb_mb = m;
607 		m->m_act = next;
608 	} else
609 		sb->sb_mb = next;
610 }
611 
612 /*
613  * Drop a record off the front of a sockbuf
614  * and move the next record to the front.
615  */
616 sbdroprecord(sb)
617 	register struct sockbuf *sb;
618 {
619 	register struct mbuf *m, *mn;
620 
621 	m = sb->sb_mb;
622 	if (m) {
623 		sb->sb_mb = m->m_act;
624 		do {
625 			sbfree(sb, m);
626 			MFREE(m, mn);
627 		} while (m = mn);
628 	}
629 }
630