xref: /csrg-svn/sys/kern/uipc_socket2.c (revision 21765)
1 /*	uipc_socket2.c	6.8	85/06/02	*/
2 
3 #include "param.h"
4 #include "systm.h"
5 #include "dir.h"
6 #include "user.h"
7 #include "proc.h"
8 #include "file.h"
9 #include "inode.h"
10 #include "buf.h"
11 #include "mbuf.h"
12 #include "protosw.h"
13 #include "socket.h"
14 #include "socketvar.h"
15 
16 /*
17  * Primitive routines for operating on sockets and socket buffers
18  */
19 
20 /*
21  * Procedures to manipulate state flags of socket
22  * and do appropriate wakeups.  Normal sequence from the
23  * active (originating) side is that soisconnecting() is
24  * called during processing of connect() call,
25  * resulting in an eventual call to soisconnected() if/when the
26  * connection is established.  When the connection is torn down
27  * soisdisconnecting() is called during processing of disconnect() call,
28  * and soisdisconnected() is called when the connection to the peer
29  * is totally severed.  The semantics of these routines are such that
30  * connectionless protocols can call soisconnected() and soisdisconnected()
31  * only, bypassing the in-progress calls when setting up a ``connection''
32  * takes no time.
33  *
34  * From the passive side, a socket is created with
35  * two queues of sockets: so_q0 for connections in progress
36  * and so_q for connections already made and awaiting user acceptance.
37  * As a protocol is preparing incoming connections, it creates a socket
38  * structure queued on so_q0 by calling sonewconn().  When the connection
39  * is established, soisconnected() is called, and transfers the
40  * socket structure to so_q, making it available to accept().
41  *
42  * If a socket is closed with sockets on either
43  * so_q0 or so_q, these sockets are dropped.
44  *
45  * If higher level protocols are implemented in
46  * the kernel, the wakeups done here will sometimes
47  * cause software-interrupt process scheduling.
48  */
49 
50 soisconnecting(so)
51 	register struct socket *so;
52 {
53 
54 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
55 	so->so_state |= SS_ISCONNECTING;
56 	wakeup((caddr_t)&so->so_timeo);
57 }
58 
59 soisconnected(so)
60 	register struct socket *so;
61 {
62 	register struct socket *head = so->so_head;
63 
64 	if (head) {
65 		if (soqremque(so, 0) == 0)
66 			panic("soisconnected");
67 		soqinsque(head, so, 1);
68 		sorwakeup(head);
69 		wakeup((caddr_t)&head->so_timeo);
70 	}
71 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
72 	so->so_state |= SS_ISCONNECTED;
73 	wakeup((caddr_t)&so->so_timeo);
74 	sorwakeup(so);
75 	sowwakeup(so);
76 }
77 
78 soisdisconnecting(so)
79 	register struct socket *so;
80 {
81 
82 	so->so_state &= ~SS_ISCONNECTING;
83 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
84 	wakeup((caddr_t)&so->so_timeo);
85 	sowwakeup(so);
86 	sorwakeup(so);
87 }
88 
89 soisdisconnected(so)
90 	register struct socket *so;
91 {
92 
93 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
94 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
95 	wakeup((caddr_t)&so->so_timeo);
96 	sowwakeup(so);
97 	sorwakeup(so);
98 }
99 
100 /*
101  * When an attempt at a new connection is noted on a socket
102  * which accepts connections, sonewconn is called.  If the
103  * connection is possible (subject to space constraints, etc.)
104  * then we allocate a new structure, propoerly linked into the
105  * data structure of the original socket, and return this.
106  */
107 struct socket *
108 sonewconn(head)
109 	register struct socket *head;
110 {
111 	register struct socket *so;
112 	register struct mbuf *m;
113 
114 	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
115 		goto bad;
116 	m = m_getclr(M_DONTWAIT, MT_SOCKET);
117 	if (m == NULL)
118 		goto bad;
119 	so = mtod(m, struct socket *);
120 	so->so_type = head->so_type;
121 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
122 	so->so_linger = head->so_linger;
123 	so->so_state = head->so_state | SS_NOFDREF;
124 	so->so_proto = head->so_proto;
125 	so->so_timeo = head->so_timeo;
126 	so->so_pgrp = head->so_pgrp;
127 	soqinsque(head, so, 0);
128 	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
129 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
130 		(void) soqremque(so, 0);
131 		(void) m_free(m);
132 		goto bad;
133 	}
134 	return (so);
135 bad:
136 	return ((struct socket *)0);
137 }
138 
139 soqinsque(head, so, q)
140 	register struct socket *head, *so;
141 	int q;
142 {
143 
144 	so->so_head = head;
145 	if (q == 0) {
146 		head->so_q0len++;
147 		so->so_q0 = head->so_q0;
148 		head->so_q0 = so;
149 	} else {
150 		head->so_qlen++;
151 		so->so_q = head->so_q;
152 		head->so_q = so;
153 	}
154 }
155 
156 soqremque(so, q)
157 	register struct socket *so;
158 	int q;
159 {
160 	register struct socket *head, *prev, *next;
161 
162 	head = so->so_head;
163 	prev = head;
164 	for (;;) {
165 		next = q ? prev->so_q : prev->so_q0;
166 		if (next == so)
167 			break;
168 		if (next == head)
169 			return (0);
170 		prev = next;
171 	}
172 	if (q == 0) {
173 		prev->so_q0 = next->so_q0;
174 		head->so_q0len--;
175 	} else {
176 		prev->so_q = next->so_q;
177 		head->so_qlen--;
178 	}
179 	next->so_q0 = next->so_q = 0;
180 	next->so_head = 0;
181 	return (1);
182 }
183 
184 /*
185  * Socantsendmore indicates that no more data will be sent on the
186  * socket; it would normally be applied to a socket when the user
187  * informs the system that no more data is to be sent, by the protocol
188  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
189  * will be received, and will normally be applied to the socket by a
190  * protocol when it detects that the peer will send no more data.
191  * Data queued for reading in the socket may yet be read.
192  */
193 
194 socantsendmore(so)
195 	struct socket *so;
196 {
197 
198 	so->so_state |= SS_CANTSENDMORE;
199 	sowwakeup(so);
200 }
201 
202 socantrcvmore(so)
203 	struct socket *so;
204 {
205 
206 	so->so_state |= SS_CANTRCVMORE;
207 	sorwakeup(so);
208 }
209 
210 /*
211  * Socket select/wakeup routines.
212  */
213 
214 /*
215  * Queue a process for a select on a socket buffer.
216  */
217 sbselqueue(sb)
218 	struct sockbuf *sb;
219 {
220 	register struct proc *p;
221 
222 	if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait)
223 		sb->sb_flags |= SB_COLL;
224 	else
225 		sb->sb_sel = u.u_procp;
226 }
227 
228 /*
229  * Wait for data to arrive at/drain from a socket buffer.
230  */
231 sbwait(sb)
232 	struct sockbuf *sb;
233 {
234 
235 	sb->sb_flags |= SB_WAIT;
236 	sleep((caddr_t)&sb->sb_cc, PZERO+1);
237 }
238 
239 /*
240  * Wakeup processes waiting on a socket buffer.
241  */
242 sbwakeup(sb)
243 	register struct sockbuf *sb;
244 {
245 
246 	if (sb->sb_sel) {
247 		selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL);
248 		sb->sb_sel = 0;
249 		sb->sb_flags &= ~SB_COLL;
250 	}
251 	if (sb->sb_flags & SB_WAIT) {
252 		sb->sb_flags &= ~SB_WAIT;
253 		wakeup((caddr_t)&sb->sb_cc);
254 	}
255 }
256 
257 /*
258  * Wakeup socket readers and writers.
259  * Do asynchronous notification via SIGIO
260  * if the socket has the SS_ASYNC flag set.
261  */
262 sowakeup(so, sb)
263 	register struct socket *so;
264 	struct sockbuf *sb;
265 {
266 	register struct proc *p;
267 
268 	sbwakeup(sb);
269 	if (so->so_state & SS_ASYNC) {
270 		if (so->so_pgrp == 0)
271 			return;
272 		else if (so->so_pgrp > 0)
273 			gsignal(so->so_pgrp, SIGIO);
274 		else if ((p = pfind(-so->so_pgrp)) != 0)
275 			psignal(p, SIGIO);
276 	}
277 }
278 
279 /*
280  * Socket buffer (struct sockbuf) utility routines.
281  *
282  * Each socket contains two socket buffers: one for sending data and
283  * one for receiving data.  Each buffer contains a queue of mbufs,
284  * information about the number of mbufs and amount of data in the
285  * queue, and other fields allowing select() statements and notification
286  * on data availability to be implemented.
287  *
288  * Data stored in a socket buffer is maintained as a list of records.
289  * Each record is a list of mbufs chained together with the m_next
290  * field.  Records are chained together with the m_act field. The upper
291  * level routine soreceive() expects the following conventions to be
292  * observed when placing information in the receive buffer:
293  *
294  * 1. If the protocol requires each message be preceded by the sender's
295  *    name, then a record containing that name must be present before
296  *    any associated data (mbuf's must be of type MT_SONAME).
297  * 2. If the protocol supports the exchange of ``access rights'' (really
298  *    just additional data associated with the message), and there are
299  *    ``rights'' to be received, then a record containing this data
300  *    should be present (mbuf's must be of type MT_RIGHTS).
301  * 3. If a name or rights record exists, then it must be followed by
302  *    a data record, perhaps of zero length.
303  *
304  * Before using a new socket structure it is first necessary to reserve
305  * buffer space to the socket, by calling sbreserve().  This commits
306  * some of the available buffer space in the system buffer pool for the
307  * socket.  The space should be released by calling sbrelease() when the
308  * socket is destroyed.
309  *
310  * The routines sbappend() or sbappendrecord() are normally called to
311  * append new mbufs to a socket buffer, after checking that adequate
312  * space is available, comparing the function sbspace() with the amount
313  * of data to be added.  sbappendrecord() differs from sbappend() in
314  * that data supplied is treated as the beginning of a new record.
315  * Data is normally removed from a socket buffer in a protocol by
316  * first calling m_copy on the socket buffer mbuf chain and sending this
317  * to a peer, and then removing the data from the socket buffer with
318  * sbdrop() or sbdroprecord() when the data is acknowledged by the peer
319  * (or immediately in the case of unreliable protocols.)
320  *
321  * To place a sender's name, optionally, access rights, and data in a
322  * socket buffer sbappendaddr() should be used.  To place access rights
323  * and data in a socket buffer sbappendrights() should be used.  Note
324  * that unlike sbappend() and sbappendrecord(), these routines check
325  * for the caller that there will be enough space to store the data.
326  * Each fails if there is not enough space, or if it cannot find mbufs
327  * to store additional information in.
328  */
329 
330 soreserve(so, sndcc, rcvcc)
331 	register struct socket *so;
332 	int sndcc, rcvcc;
333 {
334 
335 	if (sbreserve(&so->so_snd, sndcc) == 0)
336 		goto bad;
337 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
338 		goto bad2;
339 	return (0);
340 bad2:
341 	sbrelease(&so->so_snd);
342 bad:
343 	return (ENOBUFS);
344 }
345 
346 /*
347  * Allot mbufs to a sockbuf.
348  */
349 sbreserve(sb, cc)
350 	struct sockbuf *sb;
351 {
352 
353 	if ((unsigned) cc > SB_MAX)
354 		return (0);
355 	/* someday maybe this routine will fail... */
356 	sb->sb_hiwat = cc;
357 	/* * 2 implies names can be no more than 1 mbuf each */
358 	sb->sb_mbmax = MIN(cc * 2, SB_MAX);
359 	return (1);
360 }
361 
362 /*
363  * Free mbufs held by a socket, and reserved mbuf space.
364  */
365 sbrelease(sb)
366 	struct sockbuf *sb;
367 {
368 
369 	sbflush(sb);
370 	sb->sb_hiwat = sb->sb_mbmax = 0;
371 }
372 
373 /*
374  * Routines to add and remove
375  * data from an mbuf queue.
376  */
377 
378 /*
379  * Append mbuf chain m to the last record in the
380  * socket buffer sb.  The additional space associated
381  * the mbuf chain is recorded in sb.  Empty mbufs are
382  * discarded and mbufs are compacted where possible.
383  */
384 sbappend(sb, m)
385 	struct sockbuf *sb;
386 	struct mbuf *m;
387 {
388 	register struct mbuf *n;
389 
390 	if (m == 0)
391 		return;
392 	if (n = sb->sb_mb) {
393 		while (n->m_act)
394 			n = n->m_act;
395 		while (n->m_next)
396 			n = n->m_next;
397 	}
398 	sbcompress(sb, m, n);
399 }
400 
401 /*
402  * As above, except the mbuf chain
403  * begins a new record.
404  */
405 sbappendrecord(sb, m0)
406 	register struct sockbuf *sb;
407 	register struct mbuf *m0;
408 {
409 	register struct mbuf *m;
410 
411 	if (m0 == 0)
412 		return;
413 	if (m = sb->sb_mb)
414 		while (m->m_act)
415 			m = m->m_act;
416 	/*
417 	 * Put the first mbuf on the queue.
418 	 * Note this permits zero length records.
419 	 */
420 	sballoc(sb, m0);
421 	if (m)
422 		m->m_act = m0;
423 	else
424 		sb->sb_mb = m0;
425 	m = m0->m_next;
426 	m0->m_next = 0;
427 	sbcompress(sb, m, m0);
428 }
429 
430 /*
431  * Append address and data, and optionally, rights
432  * to the receive queue of a socket.  Return 0 if
433  * no space in sockbuf or insufficient mbufs.
434  */
435 sbappendaddr(sb, asa, m0, rights0)		/* XXX */
436 	register struct sockbuf *sb;
437 	struct sockaddr *asa;
438 	struct mbuf *rights0, *m0;
439 {
440 	register struct mbuf *m, *n;
441 	int space = sizeof (*asa);
442 
443 	m = m0;
444 	if (m == 0)
445 		panic("sbappendaddr");
446 	do {
447 		space += m->m_len;
448 		m = m->m_next;
449 	} while (m);
450 	if (rights0)
451 		space += rights0->m_len;
452 	if (space > sbspace(sb))
453 		return (0);
454 	m = m_get(M_DONTWAIT, MT_SONAME);
455 	if (m == 0)
456 		return (0);
457 	*mtod(m, struct sockaddr *) = *asa;
458 	m->m_len = sizeof (*asa);
459 	if (rights0) {
460 		m->m_act = m_copy(rights0, 0, rights0->m_len);
461 		if (m->m_act == 0) {
462 			m_freem(m);
463 			return (0);
464 		}
465 		sballoc(sb, m->m_act);
466 	}
467 	sballoc(sb, m);
468 	if (n = sb->sb_mb) {
469 		while (n->m_act)
470 			n = n->m_act;
471 		n->m_act = m;
472 	} else
473 		sb->sb_mb = m;
474 	if (m->m_act)
475 		m = m->m_act;
476 	sballoc(sb, m0);
477 	m->m_act = m0;
478 	m = m0->m_next;
479 	m0->m_next = 0;
480 	if (m)
481 		sbcompress(sb, m, m0);
482 	return (1);
483 }
484 
485 #ifdef notdef
486 sbappendrights(sb, rights, m0)
487 	struct sockbuf *sb;
488 	struct mbuf *rights, *m;
489 {
490 	register struct mbuf *m, *n;
491 	int space = 0;
492 
493 	m = m0;
494 	if (m == 0 || rights == 0)
495 		panic("sbappendrights");
496 	do {
497 		space += m->m_len;
498 		m = m->m_next;
499 	} while (m);
500 	space += rights->m_len;
501 	if (space > sbspace(sb))
502 		return (0);
503 	m = m_copy(rights, 0, rights->m_len);
504 	if (m == 0)
505 		return (0);
506 	sballoc(sb, m);
507 	if (n = sb->sb_mb) {
508 		while (n->m_act)
509 			n = n->m_act;
510 		n->m_act = m;
511 	} else
512 		n->m_act = m;
513 	sballoc(sb, m0);
514 	m->m_act = m0;
515 	m = m0->m_next;
516 	m0->m_next = 0;
517 	if (m)
518 		sbcompress(sb, m, m0);
519 	return (1);
520 }
521 #endif
522 
523 /*
524  * Compress mbuf chain m into the socket
525  * buffer sb following mbuf n.  If n
526  * is null, the buffer is presumed empty.
527  */
528 sbcompress(sb, m, n)
529 	register struct sockbuf *sb;
530 	register struct mbuf *m, *n;
531 {
532 
533 	while (m) {
534 		if (m->m_len == 0) {
535 			m = m_free(m);
536 			continue;
537 		}
538 		if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF &&
539 		    (n->m_off + n->m_len + m->m_len) <= MMAXOFF) {
540 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
541 			    (unsigned)m->m_len);
542 			n->m_len += m->m_len;
543 			sb->sb_cc += m->m_len;
544 			m = m_free(m);
545 			continue;
546 		}
547 		sballoc(sb, m);
548 		if (n)
549 			n->m_next = m;
550 		else
551 			sb->sb_mb = m;
552 		n = m;
553 		m = m->m_next;
554 		n->m_next = 0;
555 	}
556 }
557 
558 /*
559  * Free all mbufs in a sockbuf.
560  * Check that all resources are reclaimed.
561  */
562 sbflush(sb)
563 	register struct sockbuf *sb;
564 {
565 
566 	if (sb->sb_flags & SB_LOCK)
567 		panic("sbflush");
568 	if (sb->sb_cc)
569 		sbdrop(sb, sb->sb_cc);
570 	if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb)
571 		panic("sbflush 2");
572 }
573 
574 /*
575  * Drop data from (the front of) a sockbuf.
576  */
577 struct mbuf *
578 sbdrop(sb, len)
579 	register struct sockbuf *sb;
580 	register int len;
581 {
582 	register struct mbuf *m, *mn;
583 	struct mbuf *next;
584 
585 	next = (m = sb->sb_mb) ? m->m_act : 0;
586 	while (len > 0) {
587 		if (m == 0) {
588 			if (next == 0)
589 				panic("sbdrop");
590 			m = next;
591 			next = m->m_act;
592 			continue;
593 		}
594 		if (m->m_len > len) {
595 			m->m_len -= len;
596 			m->m_off += len;
597 			sb->sb_cc -= len;
598 			break;
599 		}
600 		len -= m->m_len;
601 		sbfree(sb, m);
602 		MFREE(m, mn);
603 		m = mn;
604 	}
605 	while (m && m->m_len == 0) {
606 		sbfree(sb, m);
607 		MFREE(m, mn);
608 		m = mn;
609 	}
610 	if (m) {
611 		sb->sb_mb = m;
612 		m->m_act = next;
613 	} else
614 		sb->sb_mb = next;
615 	return (sb->sb_mb);
616 }
617 
618 /*
619  * Drop a record off the front of a sockbuf
620  * and move the next record to the front.
621  */
622 struct mbuf *
623 sbdroprecord(sb)
624 	register struct sockbuf *sb;
625 {
626 	register struct mbuf *m, *mn;
627 
628 	m = sb->sb_mb;
629 	if (m) {
630 		sb->sb_mb = m->m_act;
631 		do {
632 			sbfree(sb, m);
633 			MFREE(m, mn);
634 		} while (m = mn);
635 	}
636 	return (sb->sb_mb);
637 }
638