xref: /csrg-svn/sys/kern/uipc_socket2.c (revision 9027)
1*9027Sroot /*	uipc_socket2.c	4.31	82/11/03	*/
24903Swnj 
34903Swnj #include "../h/param.h"
44903Swnj #include "../h/systm.h"
54903Swnj #include "../h/dir.h"
64903Swnj #include "../h/user.h"
74903Swnj #include "../h/proc.h"
84903Swnj #include "../h/file.h"
94903Swnj #include "../h/inode.h"
104903Swnj #include "../h/buf.h"
114903Swnj #include "../h/mbuf.h"
124903Swnj #include "../h/protosw.h"
134903Swnj #include "../h/socket.h"
144903Swnj #include "../h/socketvar.h"
154903Swnj 
164903Swnj /*
174903Swnj  * Primitive routines for operating on sockets and socket buffers
184903Swnj  */
194903Swnj 
204903Swnj /*
214903Swnj  * Procedures to manipulate state flags of socket
227509Sroot  * and do appropriate wakeups.  Normal sequence from the
237509Sroot  * active (originating) side is that soisconnecting() is
247509Sroot  * called during processing of connect() call,
255169Swnj  * resulting in an eventual call to soisconnected() if/when the
265169Swnj  * connection is established.  When the connection is torn down
275169Swnj  * soisdisconnecting() is called during processing of disconnect() call,
285169Swnj  * and soisdisconnected() is called when the connection to the peer
295169Swnj  * is totally severed.  The semantics of these routines are such that
305169Swnj  * connectionless protocols can call soisconnected() and soisdisconnected()
315169Swnj  * only, bypassing the in-progress calls when setting up a ``connection''
325169Swnj  * takes no time.
335169Swnj  *
347509Sroot  * From the passive side, a socket is created with SO_ACCEPTCONN
357509Sroot  * creating two queues of sockets: so_q0 for connections in progress
367509Sroot  * and so_q for connections already made and awaiting user acceptance.
377509Sroot  * As a protocol is preparing incoming connections, it creates a socket
387509Sroot  * structure queued on so_q0 by calling sonewconn().  When the connection
397509Sroot  * is established, soisconnected() is called, and transfers the
407509Sroot  * socket structure to so_q, making it available to accept().
417509Sroot  *
427509Sroot  * If a SO_ACCEPTCONN socket is closed with sockets on either
437509Sroot  * so_q0 or so_q, these sockets are dropped.
447509Sroot  *
457509Sroot  * If and when higher level protocols are implemented in
465169Swnj  * the kernel, the wakeups done here will sometimes
475169Swnj  * be implemented as software-interrupt process scheduling.
484903Swnj  */
495169Swnj 
504903Swnj soisconnecting(so)
514903Swnj 	struct socket *so;
524903Swnj {
534903Swnj 
544903Swnj 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
554903Swnj 	so->so_state |= SS_ISCONNECTING;
564903Swnj 	wakeup((caddr_t)&so->so_timeo);
574903Swnj }
584903Swnj 
594903Swnj soisconnected(so)
604903Swnj 	struct socket *so;
614903Swnj {
627509Sroot 	register struct socket *head = so->so_head;
634903Swnj 
647509Sroot 	if (head) {
657509Sroot 		if (soqremque(so, 0) == 0)
667509Sroot 			panic("soisconnected");
677509Sroot 		soqinsque(head, so, 1);
687509Sroot 		wakeup((caddr_t)&head->so_timeo);
697509Sroot 	}
704903Swnj 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
714903Swnj 	so->so_state |= SS_ISCONNECTED;
724903Swnj 	wakeup((caddr_t)&so->so_timeo);
735578Swnj 	sorwakeup(so);
745578Swnj 	sowwakeup(so);
754903Swnj }
764903Swnj 
774903Swnj soisdisconnecting(so)
784903Swnj 	struct socket *so;
794903Swnj {
804903Swnj 
815248Sroot 	so->so_state &= ~SS_ISCONNECTING;
824903Swnj 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
834903Swnj 	wakeup((caddr_t)&so->so_timeo);
845170Swnj 	sowwakeup(so);
855169Swnj 	sorwakeup(so);
864903Swnj }
874903Swnj 
884903Swnj soisdisconnected(so)
894903Swnj 	struct socket *so;
904903Swnj {
914903Swnj 
924903Swnj 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
934903Swnj 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
944903Swnj 	wakeup((caddr_t)&so->so_timeo);
954903Swnj 	sowwakeup(so);
964903Swnj 	sorwakeup(so);
974903Swnj }
984903Swnj 
995169Swnj /*
1007509Sroot  * When an attempt at a new connection is noted on a socket
1017509Sroot  * which accepts connections, sonewconn is called.  If the
1027509Sroot  * connection is possible (subject to space constraints, etc.)
1037509Sroot  * then we allocate a new structure, propoerly linked into the
1047509Sroot  * data structure of the original socket, and return this.
1057509Sroot  */
1067509Sroot struct socket *
1077509Sroot sonewconn(head)
1087509Sroot 	register struct socket *head;
1097509Sroot {
1107509Sroot 	register struct socket *so;
1117509Sroot 	struct mbuf *m;
1127509Sroot 
1137509Sroot 	if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
1147509Sroot 		goto bad;
1157509Sroot 	m = m_getclr(M_DONTWAIT);
1167509Sroot 	if (m == 0)
1177509Sroot 		goto bad;
1187509Sroot 	so = mtod(m, struct socket *);
1197509Sroot 	so->so_type = head->so_type;
1207509Sroot 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
1217509Sroot 	so->so_linger = head->so_linger;
1227509Sroot 	so->so_state = head->so_state;
1237509Sroot 	so->so_proto = head->so_proto;
1247509Sroot 	so->so_timeo = head->so_timeo;
1257509Sroot 	so->so_pgrp = head->so_pgrp;
1267509Sroot 	soqinsque(head, so, 0);
1278301Sroot 	if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 0, 0, 0)) {
1287509Sroot 		(void) soqremque(so, 0);
1298818Sroot 		(void) m_free(m);
1307509Sroot 		goto bad;
1317509Sroot 	}
1327509Sroot 	return (so);
1337509Sroot bad:
1347509Sroot 	return ((struct socket *)0);
1357509Sroot }
1367509Sroot 
1377509Sroot soqinsque(head, so, q)
1387509Sroot 	register struct socket *head, *so;
1397509Sroot 	int q;
1407509Sroot {
1417509Sroot 
1427509Sroot 	so->so_head = head;
1437509Sroot 	if (q == 0) {
1447509Sroot 		head->so_q0len++;
1457509Sroot 		so->so_q0 = head->so_q0;
1467509Sroot 		head->so_q0 = so;
1477509Sroot 	} else {
1487509Sroot 		head->so_qlen++;
1497509Sroot 		so->so_q = head->so_q;
1507509Sroot 		head->so_q = so;
1517509Sroot 	}
1527509Sroot }
1537509Sroot 
1547509Sroot soqremque(so, q)
1557509Sroot 	register struct socket *so;
1567509Sroot 	int q;
1577509Sroot {
1587509Sroot 	register struct socket *head, *prev, *next;
1597509Sroot 
1607509Sroot 	head = so->so_head;
1617509Sroot 	prev = head;
1627509Sroot 	for (;;) {
1637509Sroot 		next = q ? prev->so_q : prev->so_q0;
1647509Sroot 		if (next == so)
1657509Sroot 			break;
1667509Sroot 		if (next == head)
1677509Sroot 			return (0);
1687509Sroot 		prev = next;
1697509Sroot 	}
1707509Sroot 	if (q == 0) {
1717509Sroot 		prev->so_q0 = next->so_q0;
1727509Sroot 		head->so_q0len--;
1737509Sroot 	} else {
1747509Sroot 		prev->so_q = next->so_q;
1757509Sroot 		head->so_qlen--;
1767509Sroot 	}
1777509Sroot 	next->so_q0 = next->so_q = 0;
1787509Sroot 	next->so_head = 0;
1797509Sroot 	return (1);
1807509Sroot }
1817509Sroot 
1827509Sroot /*
1835169Swnj  * Socantsendmore indicates that no more data will be sent on the
1845169Swnj  * socket; it would normally be applied to a socket when the user
1855169Swnj  * informs the system that no more data is to be sent, by the protocol
1865169Swnj  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
1875169Swnj  * will be received, and will normally be applied to the socket by a
1885169Swnj  * protocol when it detects that the peer will send no more data.
1895169Swnj  * Data queued for reading in the socket may yet be read.
1905169Swnj  */
1915169Swnj 
1924917Swnj socantsendmore(so)
1934917Swnj 	struct socket *so;
1944917Swnj {
1954917Swnj 
1964917Swnj 	so->so_state |= SS_CANTSENDMORE;
1974917Swnj 	sowwakeup(so);
1984917Swnj }
1994917Swnj 
2004917Swnj socantrcvmore(so)
2014917Swnj 	struct socket *so;
2024917Swnj {
2034917Swnj 
2044917Swnj 	so->so_state |= SS_CANTRCVMORE;
2054917Swnj 	sorwakeup(so);
2064917Swnj }
2074917Swnj 
2084903Swnj /*
2095169Swnj  * Socket select/wakeup routines.
2104903Swnj  */
2115169Swnj 
2125169Swnj /*
2135169Swnj  * Interface routine to select() system
2145169Swnj  * call for sockets.
2155169Swnj  */
2165577Swnj soselect(so, rw)
2174903Swnj 	register struct socket *so;
2185577Swnj 	int rw;
2194903Swnj {
2205578Swnj 	int s = splnet();
2214903Swnj 
2225577Swnj 	switch (rw) {
2235577Swnj 
2245577Swnj 	case FREAD:
2255578Swnj 		if (soreadable(so)) {
2265578Swnj 			splx(s);
2274903Swnj 			return (1);
2285578Swnj 		}
2294903Swnj 		sbselqueue(&so->so_rcv);
2305577Swnj 		break;
2315577Swnj 
2325577Swnj 	case FWRITE:
2335578Swnj 		if (sowriteable(so)) {
2345578Swnj 			splx(s);
2354903Swnj 			return (1);
2365578Swnj 		}
2374903Swnj 		sbselqueue(&so->so_snd);
2385577Swnj 		break;
2394903Swnj 	}
2405578Swnj 	splx(s);
2414903Swnj 	return (0);
2424903Swnj }
2434903Swnj 
2444903Swnj /*
2454903Swnj  * Queue a process for a select on a socket buffer.
2464903Swnj  */
2474903Swnj sbselqueue(sb)
2484903Swnj 	struct sockbuf *sb;
2494903Swnj {
2504903Swnj 	register struct proc *p;
2514903Swnj 
2524917Swnj 	if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait)
2534903Swnj 		sb->sb_flags |= SB_COLL;
2544903Swnj 	else
2554903Swnj 		sb->sb_sel = u.u_procp;
2564903Swnj }
2574903Swnj 
2584903Swnj /*
2594917Swnj  * Wait for data to arrive at/drain from a socket buffer.
2604917Swnj  */
2614917Swnj sbwait(sb)
2624917Swnj 	struct sockbuf *sb;
2634917Swnj {
2644917Swnj 
2654917Swnj 	sb->sb_flags |= SB_WAIT;
2664917Swnj 	sleep((caddr_t)&sb->sb_cc, PZERO+1);
2674917Swnj }
2684917Swnj 
2694917Swnj /*
2704903Swnj  * Wakeup processes waiting on a socket buffer.
2714903Swnj  */
2724903Swnj sbwakeup(sb)
2734903Swnj 	struct sockbuf *sb;
2744903Swnj {
2754903Swnj 
2764903Swnj 	if (sb->sb_sel) {
2774903Swnj 		selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL);
2784903Swnj 		sb->sb_sel = 0;
2794903Swnj 		sb->sb_flags &= ~SB_COLL;
2804903Swnj 	}
2814903Swnj 	if (sb->sb_flags & SB_WAIT) {
2824903Swnj 		sb->sb_flags &= ~SB_WAIT;
2835013Swnj 		wakeup((caddr_t)&sb->sb_cc);
2844903Swnj 	}
2854903Swnj }
2864903Swnj 
2874903Swnj /*
2885169Swnj  * Socket buffer (struct sockbuf) utility routines.
2895169Swnj  *
2905169Swnj  * Each socket contains two socket buffers: one for sending data and
2915169Swnj  * one for receiving data.  Each buffer contains a queue of mbufs,
2925169Swnj  * information about the number of mbufs and amount of data in the
2935169Swnj  * queue, and other fields allowing select() statements and notification
2945169Swnj  * on data availability to be implemented.
2955169Swnj  *
2965169Swnj  * Before using a new socket structure it is first necessary to reserve
2975169Swnj  * buffer space to the socket, by calling sbreserve.  This commits
2985169Swnj  * some of the available buffer space in the system buffer pool for the
2995169Swnj  * socket.  The space should be released by calling sbrelease when the
3005169Swnj  * socket is destroyed.
3015169Swnj  *
3025169Swnj  * The routine sbappend() is normally called to append new mbufs
3035169Swnj  * to a socket buffer, after checking that adequate space is available
3045169Swnj  * comparing the function spspace() with the amount of data to be added.
3055169Swnj  * Data is normally removed from a socket buffer in a protocol by
3065169Swnj  * first calling m_copy on the socket buffer mbuf chain and sending this
3075169Swnj  * to a peer, and then removing the data from the socket buffer with
3085169Swnj  * sbdrop when the data is acknowledged by the peer (or immediately
3095170Swnj  * in the case of unreliable protocols.)
3105169Swnj  *
3115169Swnj  * Protocols which do not require connections place both source address
3125169Swnj  * and data information in socket buffer queues.  The source addresses
3135169Swnj  * are stored in single mbufs after each data item, and are easily found
3145169Swnj  * as the data items are all marked with end of record markers.  The
3155169Swnj  * sbappendaddr() routine stores a datum and associated address in
3165169Swnj  * a socket buffer.  Note that, unlike sbappend(), this routine checks
3175169Swnj  * for the caller that there will be enough space to store the data.
3185169Swnj  * It fails if there is not enough space, or if it cannot find
3195169Swnj  * a mbuf to store the address in.
3205169Swnj  *
3215169Swnj  * The higher-level routines sosend and soreceive (in socket.c)
3225170Swnj  * also add data to, and remove data from socket buffers repectively.
3235169Swnj  */
3245169Swnj 
325*9027Sroot soreserve(so, sndcc, rcvcc)
326*9027Sroot 	struct socket *so;
327*9027Sroot 	int sndcc, rcvcc;
328*9027Sroot {
329*9027Sroot 
330*9027Sroot 	if (sbreserve(&so->so_snd, sndcc) == 0)
331*9027Sroot 		goto bad;
332*9027Sroot 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
333*9027Sroot 		goto bad2;
334*9027Sroot 	return (0);
335*9027Sroot bad2:
336*9027Sroot 	sbrelease(&so->so_snd);
337*9027Sroot bad:
338*9027Sroot 	return (ENOBUFS);
339*9027Sroot }
340*9027Sroot 
3415169Swnj /*
3424903Swnj  * Allot mbufs to a sockbuf.
3434903Swnj  */
3444903Swnj sbreserve(sb, cc)
3454903Swnj 	struct sockbuf *sb;
3464903Swnj {
3474903Swnj 
3487181Swnj 	/* someday maybe this routine will fail... */
3494980Swnj 	sb->sb_hiwat = cc;
3505042Swnj 	sb->sb_mbmax = cc*2;
3514917Swnj 	return (1);
3524903Swnj }
3534903Swnj 
3544903Swnj /*
3554903Swnj  * Free mbufs held by a socket, and reserved mbuf space.
3564903Swnj  */
3574903Swnj sbrelease(sb)
3584903Swnj 	struct sockbuf *sb;
3594903Swnj {
3604903Swnj 
3614903Swnj 	sbflush(sb);
3624980Swnj 	sb->sb_hiwat = sb->sb_mbmax = 0;
3634903Swnj }
3644903Swnj 
3654903Swnj /*
3664903Swnj  * Routines to add (at the end) and remove (from the beginning)
3674903Swnj  * data from a mbuf queue.
3684903Swnj  */
3694903Swnj 
3704903Swnj /*
3714903Swnj  * Append mbuf queue m to sockbuf sb.
3724903Swnj  */
3734903Swnj sbappend(sb, m)
3744903Swnj 	register struct mbuf *m;
3754903Swnj 	register struct sockbuf *sb;
3764903Swnj {
3776092Sroot 	register struct mbuf *n;
3784903Swnj 
3798549Sroot SBCHECK(sb, "sbappend begin");
3808549Sroot #ifdef notdef
3818549Sroot { struct mbuf *p;
3828549Sroot printf("sba: ");
3838549Sroot for (p = sb->sb_mb; p; p = p->m_next) printf("%x:(%x,%d) ",p,p->m_off,p->m_len);
3848549Sroot printf("+= ");
3858549Sroot for (p = m; p; p = p->m_next) printf("%x:(%x,%d) ",p,p->m_off,p->m_len);
3868549Sroot printf("\n");
3878549Sroot }
3888549Sroot #endif
3896092Sroot 	n = sb->sb_mb;
3906092Sroot 	if (n)
3916092Sroot 		while (n->m_next)
3926092Sroot 			n = n->m_next;
3934903Swnj 	while (m) {
3945266Swnj 		if (m->m_len == 0 && (int)m->m_act == 0) {
3955304Sroot 			m = m_free(m);
3965266Swnj 			continue;
3975266Swnj 		}
3984903Swnj 		if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF &&
3994903Swnj 		   (int)n->m_act == 0 && (int)m->m_act == 0 &&
4005042Swnj 		   (n->m_off + n->m_len + m->m_len) <= MMAXOFF) {
4015042Swnj 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
4024917Swnj 			    (unsigned)m->m_len);
4034903Swnj 			n->m_len += m->m_len;
4044903Swnj 			sb->sb_cc += m->m_len;
4054903Swnj 			m = m_free(m);
4064903Swnj 			continue;
4074903Swnj 		}
4084903Swnj 		sballoc(sb, m);
4096092Sroot 		if (n == 0)
4106092Sroot 			sb->sb_mb = m;
4116092Sroot 		else
4126092Sroot 			n->m_next = m;
4134903Swnj 		n = m;
4144903Swnj 		m = m->m_next;
4156092Sroot 		n->m_next = 0;
4164903Swnj 	}
4178549Sroot #ifdef notdef
4188549Sroot { struct mbuf *p;
4198549Sroot printf("res: ");
4208549Sroot for (p = sb->sb_mb; p; p = p->m_next) printf("%x:(%x,%d) ",p,p->m_off,p->m_len);
4218549Sroot printf("+= ");
4228549Sroot for (p = m; p; p = p->m_next) printf("%x:(%x,%d) ",p,p->m_off,p->m_len);
4238549Sroot printf("\n");
4244903Swnj }
4258549Sroot #endif
4268549Sroot SBCHECK(sb, "sbappend end");
4278549Sroot }
4284903Swnj 
4295169Swnj /*
4305169Swnj  * Append data and address.
4315169Swnj  * Return 0 if no space in sockbuf or if
4325169Swnj  * can't get mbuf to stuff address in.
4335169Swnj  */
4344928Swnj sbappendaddr(sb, asa, m0)
4354928Swnj 	struct sockbuf *sb;
4364928Swnj 	struct sockaddr *asa;
4374928Swnj 	struct mbuf *m0;
4384928Swnj {
4394928Swnj 	struct sockaddr *msa;
4404928Swnj 	register struct mbuf *m;
4414928Swnj 	register int len = sizeof (struct sockaddr);
4424928Swnj 
4438549Sroot SBCHECK(sb, "sbappendaddr begin");
4445042Swnj 	m = m0;
4455042Swnj 	if (m == 0)
4465042Swnj 		panic("sbappendaddr");
4475042Swnj 	for (;;) {
4484928Swnj 		len += m->m_len;
4495042Swnj 		if (m->m_next == 0) {
4505042Swnj 			m->m_act = (struct mbuf *)1;
4515042Swnj 			break;
4525042Swnj 		}
4535042Swnj 		m = m->m_next;
4545042Swnj 	}
4555043Swnj 	if (len > sbspace(sb))
4564928Swnj 		return (0);
4575586Sroot 	m = m_get(M_DONTWAIT);
4585043Swnj 	if (m == 0)
4594928Swnj 		return (0);
4604928Swnj 	m->m_len = sizeof (struct sockaddr);
4614928Swnj 	msa = mtod(m, struct sockaddr *);
4624928Swnj 	*msa = *asa;
4634928Swnj 	m->m_act = (struct mbuf *)1;
4644928Swnj 	sbappend(sb, m);
4654928Swnj 	sbappend(sb, m0);
4668549Sroot SBCHECK(sb, "sbappendaddr end");
4674928Swnj 	return (1);
4684928Swnj }
4694928Swnj 
4708549Sroot SBCHECK(sb, str)
4718549Sroot 	struct sockbuf *sb;
4728549Sroot 	char *str;
4738549Sroot {
4748549Sroot 	register int cnt = sb->sb_cc;
4758549Sroot 	register int mbcnt = sb->sb_mbcnt;
4768549Sroot 	register struct mbuf *m;
4778549Sroot 
4788549Sroot 	for (m = sb->sb_mb; m; m = m->m_next) {
4798549Sroot 		cnt -= m->m_len;
4808549Sroot 		mbcnt -= MSIZE;
4818549Sroot 		if (m->m_off > MMAXOFF)
4828549Sroot 			mbcnt -= CLBYTES;
4838549Sroot 	}
4848549Sroot 	if (cnt || mbcnt) {
4858549Sroot 		printf("cnt %d mbcnt %d\n", cnt, mbcnt);
4868549Sroot 		panic(str);
4878549Sroot 	}
4888549Sroot }
4898549Sroot 
4904903Swnj /*
4914903Swnj  * Free all mbufs on a sockbuf mbuf chain.
4924903Swnj  * Check that resource allocations return to 0.
4934903Swnj  */
4944903Swnj sbflush(sb)
4954903Swnj 	struct sockbuf *sb;
4964903Swnj {
4974903Swnj 
4984903Swnj 	if (sb->sb_flags & SB_LOCK)
4994903Swnj 		panic("sbflush");
5005266Swnj 	if (sb->sb_cc)
5015266Swnj 		sbdrop(sb, sb->sb_cc);
5024903Swnj 	if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb)
5034903Swnj 		panic("sbflush 2");
5044903Swnj }
5054903Swnj 
5064903Swnj /*
5074903Swnj  * Drop data from (the front of) a sockbuf chain.
5084903Swnj  */
5094903Swnj sbdrop(sb, len)
5104903Swnj 	register struct sockbuf *sb;
5114903Swnj 	register int len;
5124903Swnj {
5134903Swnj 	register struct mbuf *m = sb->sb_mb, *mn;
5144903Swnj 
5154903Swnj 	while (len > 0) {
5164903Swnj 		if (m == 0)
5174903Swnj 			panic("sbdrop");
5185064Swnj 		if (m->m_len > len) {
5194903Swnj 			m->m_len -= len;
5204903Swnj 			m->m_off += len;
5214903Swnj 			sb->sb_cc -= len;
5224903Swnj 			break;
5234903Swnj 		}
5245064Swnj 		len -= m->m_len;
5255064Swnj 		sbfree(sb, m);
5265064Swnj 		MFREE(m, mn);
5275064Swnj 		m = mn;
5284903Swnj 	}
5294903Swnj 	sb->sb_mb = m;
5304903Swnj }
531