123421Smckusick /*
263180Sbostic * Copyright (c) 1982, 1986, 1988, 1990, 1993
363180Sbostic * The Regents of the University of California. All rights reserved.
423421Smckusick *
544450Sbostic * %sccs.include.redist.c%
633185Sbostic *
7*69167Smckusick * @(#)uipc_socket.c 8.6 (Berkeley) 05/02/95
823421Smckusick */
94786Swnj
1056517Sbostic #include <sys/param.h>
1156517Sbostic #include <sys/systm.h>
1256517Sbostic #include <sys/proc.h>
1356517Sbostic #include <sys/file.h>
1456517Sbostic #include <sys/malloc.h>
1556517Sbostic #include <sys/mbuf.h>
1656517Sbostic #include <sys/domain.h>
1756517Sbostic #include <sys/kernel.h>
1856517Sbostic #include <sys/protosw.h>
1956517Sbostic #include <sys/socket.h>
2056517Sbostic #include <sys/socketvar.h>
2156517Sbostic #include <sys/resourcevar.h>
224786Swnj
234786Swnj /*
248300Sroot * Socket operation routines.
258300Sroot * These routines are called by the routines in
268300Sroot * sys_socket.c or from a system process, and
278300Sroot * implement the semantics of socket operations by
288300Sroot * switching out to the protocol specific routines.
294786Swnj */
308594Sroot /*ARGSUSED*/
3168325Scgd int
socreate(dom,aso,type,proto)3210267Ssam socreate(dom, aso, type, proto)
3352482Storek int dom;
344786Swnj struct socket **aso;
3512757Ssam register int type;
3612757Ssam int proto;
374786Swnj {
3850942Ssklower struct proc *p = curproc; /* XXX */
394786Swnj register struct protosw *prp;
404786Swnj register struct socket *so;
4112757Ssam register int error;
424786Swnj
434890Swnj if (proto)
4421767Skarels prp = pffindproto(dom, proto, type);
454890Swnj else
469168Ssam prp = pffindtype(dom, type);
4759973Ssklower if (prp == 0 || prp->pr_usrreq == 0)
484890Swnj return (EPROTONOSUPPORT);
498300Sroot if (prp->pr_type != type)
508300Sroot return (EPROTOTYPE);
5137478Ssklower MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
5237478Ssklower bzero((caddr_t)so, sizeof(*so));
539168Ssam so->so_type = type;
5450942Ssklower if (p->p_ucred->cr_uid == 0)
556214Swnj so->so_state = SS_PRIV;
564786Swnj so->so_proto = prp;
5768325Scgd error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0,
5868325Scgd (struct mbuf *)(long)proto, (struct mbuf *)0);
594979Swnj if (error) {
607507Sroot so->so_state |= SS_NOFDREF;
617180Swnj sofree(so);
624890Swnj return (error);
634786Swnj }
644786Swnj *aso = so;
654786Swnj return (0);
664786Swnj }
674786Swnj
6868325Scgd int
sobind(so,nam)6910267Ssam sobind(so, nam)
708300Sroot struct socket *so;
718300Sroot struct mbuf *nam;
728300Sroot {
738300Sroot int s = splnet();
748300Sroot int error;
758300Sroot
7641908Skarels error =
7741908Skarels (*so->so_proto->pr_usrreq)(so, PRU_BIND,
7812757Ssam (struct mbuf *)0, nam, (struct mbuf *)0);
798300Sroot splx(s);
808300Sroot return (error);
818300Sroot }
828300Sroot
8368325Scgd int
solisten(so,backlog)848300Sroot solisten(so, backlog)
8512757Ssam register struct socket *so;
868300Sroot int backlog;
878300Sroot {
8812757Ssam int s = splnet(), error;
898300Sroot
9012757Ssam error =
9112757Ssam (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
9212757Ssam (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
938300Sroot if (error) {
948300Sroot splx(s);
958300Sroot return (error);
968300Sroot }
9738584Skarels if (so->so_q == 0)
988300Sroot so->so_options |= SO_ACCEPTCONN;
998300Sroot if (backlog < 0)
1008300Sroot backlog = 0;
10135384Skarels so->so_qlimit = min(backlog, SOMAXCONN);
10212493Ssam splx(s);
1038300Sroot return (0);
1048300Sroot }
1058300Sroot
10668325Scgd int
sofree(so)1074916Swnj sofree(so)
10812757Ssam register struct socket *so;
1094916Swnj {
1104916Swnj
11131810Skarels if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
11231810Skarels return;
1137507Sroot if (so->so_head) {
1147507Sroot if (!soqremque(so, 0) && !soqremque(so, 1))
1157507Sroot panic("sofree dq");
1167507Sroot so->so_head = 0;
1177507Sroot }
1184950Swnj sbrelease(&so->so_snd);
11912757Ssam sorflush(so);
12037478Ssklower FREE(so, M_SOCKET);
1214916Swnj }
1224916Swnj
1234786Swnj /*
1244890Swnj * Close a socket on last file table reference removal.
1254890Swnj * Initiate disconnect if connected.
1264890Swnj * Free socket when disconnect complete.
1274829Swnj */
12868325Scgd int
soclose(so)12912757Ssam soclose(so)
1304829Swnj register struct socket *so;
1314829Swnj {
1324890Swnj int s = splnet(); /* conservative */
13333372Sbostic int error = 0;
1344829Swnj
1357507Sroot if (so->so_options & SO_ACCEPTCONN) {
13638584Skarels while (so->so_q0)
13710399Ssam (void) soabort(so->so_q0);
13838584Skarels while (so->so_q)
13910399Ssam (void) soabort(so->so_q);
1407507Sroot }
1414890Swnj if (so->so_pcb == 0)
1424890Swnj goto discard;
1434890Swnj if (so->so_state & SS_ISCONNECTED) {
1444890Swnj if ((so->so_state & SS_ISDISCONNECTING) == 0) {
14526245Skarels error = sodisconnect(so);
14612757Ssam if (error)
14712757Ssam goto drop;
1484890Swnj }
14910267Ssam if (so->so_options & SO_LINGER) {
1505281Sroot if ((so->so_state & SS_ISDISCONNECTING) &&
15112757Ssam (so->so_state & SS_NBIO))
15212757Ssam goto drop;
1535281Sroot while (so->so_state & SS_ISCONNECTED)
15440706Skarels if (error = tsleep((caddr_t)&so->so_timeo,
155*69167Smckusick PSOCK | PCATCH, netcls, so->so_linger * hz))
15640706Skarels break;
1574890Swnj }
1584890Swnj }
1595580Sroot drop:
1606880Ssam if (so->so_pcb) {
16112757Ssam int error2 =
16212757Ssam (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
16312757Ssam (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
16412757Ssam if (error == 0)
16512757Ssam error = error2;
1666880Ssam }
1674890Swnj discard:
16810399Ssam if (so->so_state & SS_NOFDREF)
16910399Ssam panic("soclose: NOFDREF");
1707507Sroot so->so_state |= SS_NOFDREF;
1714950Swnj sofree(so);
1724890Swnj splx(s);
17312757Ssam return (error);
1744829Swnj }
1754829Swnj
17610399Ssam /*
17710399Ssam * Must be called at splnet...
17810399Ssam */
17968325Scgd int
soabort(so)18010399Ssam soabort(so)
18110399Ssam struct socket *so;
18210399Ssam {
18310399Ssam
18412757Ssam return (
18512757Ssam (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
18612757Ssam (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
18710399Ssam }
18810399Ssam
18968325Scgd int
soaccept(so,nam)19010267Ssam soaccept(so, nam)
19112757Ssam register struct socket *so;
1928300Sroot struct mbuf *nam;
1934927Swnj {
1944927Swnj int s = splnet();
1954927Swnj int error;
1964927Swnj
19710399Ssam if ((so->so_state & SS_NOFDREF) == 0)
19810399Ssam panic("soaccept: !NOFDREF");
19910267Ssam so->so_state &= ~SS_NOFDREF;
2008300Sroot error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
20112757Ssam (struct mbuf *)0, nam, (struct mbuf *)0);
2024927Swnj splx(s);
2034927Swnj return (error);
2044927Swnj }
2054927Swnj
20668325Scgd int
soconnect(so,nam)20710267Ssam soconnect(so, nam)
20812757Ssam register struct socket *so;
2098300Sroot struct mbuf *nam;
2104786Swnj {
21130414Skarels int s;
2124890Swnj int error;
2134786Swnj
21430414Skarels if (so->so_options & SO_ACCEPTCONN)
21530414Skarels return (EOPNOTSUPP);
21630414Skarels s = splnet();
21724768Skarels /*
21824768Skarels * If protocol is connection-based, can only connect once.
21924768Skarels * Otherwise, if connected, try to disconnect first.
22024768Skarels * This allows user to disconnect by connecting to, e.g.,
22124768Skarels * a null address.
22224768Skarels */
22324768Skarels if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
22424768Skarels ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
22524768Skarels (error = sodisconnect(so))))
2264890Swnj error = EISCONN;
22724768Skarels else
22824768Skarels error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
22924768Skarels (struct mbuf *)0, nam, (struct mbuf *)0);
2304890Swnj splx(s);
2314890Swnj return (error);
2324786Swnj }
2334786Swnj
23468325Scgd int
soconnect2(so1,so2)23512757Ssam soconnect2(so1, so2)
23612757Ssam register struct socket *so1;
23712757Ssam struct socket *so2;
23812757Ssam {
23912757Ssam int s = splnet();
24012757Ssam int error;
24112757Ssam
24213113Ssam error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
24313113Ssam (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
24412757Ssam splx(s);
24512757Ssam return (error);
24612757Ssam }
24712757Ssam
24868325Scgd int
sodisconnect(so)24926245Skarels sodisconnect(so)
25012757Ssam register struct socket *so;
2514786Swnj {
2524890Swnj int s = splnet();
2534890Swnj int error;
2544786Swnj
2554890Swnj if ((so->so_state & SS_ISCONNECTED) == 0) {
2564890Swnj error = ENOTCONN;
2574890Swnj goto bad;
2584890Swnj }
2594890Swnj if (so->so_state & SS_ISDISCONNECTING) {
2604890Swnj error = EALREADY;
2614890Swnj goto bad;
2624890Swnj }
2638300Sroot error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
26426245Skarels (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
2654890Swnj bad:
2664890Swnj splx(s);
2674890Swnj return (error);
2684786Swnj }
2694786Swnj
27051755Smckusick #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
2714786Swnj /*
2724890Swnj * Send on a socket.
2734890Swnj * If send must go all at once and message is larger than
2744890Swnj * send buffering, then hard error.
2754890Swnj * Lock against other senders.
2764890Swnj * If must go all at once and not enough room now, then
2774890Swnj * inform user that this would block and do nothing.
27816412Skarels * Otherwise, if nonblocking, send as much as possible.
27941908Skarels * The data to be sent is described by "uio" if nonzero,
28041908Skarels * otherwise by the mbuf chain "top" (which must be null
28141908Skarels * if uio is not). Data provided in mbuf chain must be small
28241908Skarels * enough to send all at once.
28341908Skarels *
28441908Skarels * Returns nonzero on error, timeout or signal; callers
28541908Skarels * must check for short counts if EINTR/ERESTART are returned.
28641908Skarels * Data and control buffers are freed on return.
2874786Swnj */
28868325Scgd int
sosend(so,addr,uio,top,control,flags)28943417Skarels sosend(so, addr, uio, top, control, flags)
2904786Swnj register struct socket *so;
29141908Skarels struct mbuf *addr;
29241908Skarels struct uio *uio;
29341908Skarels struct mbuf *top;
29441908Skarels struct mbuf *control;
2958319Sroot int flags;
2964786Swnj {
29750942Ssklower struct proc *p = curproc; /* XXX */
29841908Skarels struct mbuf **mp;
29935384Skarels register struct mbuf *m;
30041908Skarels register long space, len, resid;
30141908Skarels int clen = 0, error, s, dontroute, mlen;
30241908Skarels int atomic = sosendallatonce(so) || top;
3034786Swnj
30441908Skarels if (uio)
30541908Skarels resid = uio->uio_resid;
30641908Skarels else
30741908Skarels resid = top->m_pkthdr.len;
30866800Smckusick /*
30966800Smckusick * In theory resid should be unsigned.
31066800Smckusick * However, space must be signed, as it might be less than 0
31166800Smckusick * if we over-committed, and we must use a signed comparison
31266800Smckusick * of space and resid. On the other hand, a negative resid
31366800Smckusick * causes us to loop sending 0-length segments to the protocol.
31466800Smckusick */
31566800Smckusick if (resid < 0)
31666800Smckusick return (EINVAL);
31712757Ssam dontroute =
31812757Ssam (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
31912757Ssam (so->so_proto->pr_flags & PR_ATOMIC);
32050942Ssklower p->p_stats->p_ru.ru_msgsnd++;
32140632Skarels if (control)
32241908Skarels clen = control->m_len;
32316412Skarels #define snderr(errno) { error = errno; splx(s); goto release; }
32416412Skarels
3256419Sroot restart:
32651755Smckusick if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
32741908Skarels goto out;
32816412Skarels do {
32916412Skarels s = splnet();
33021108Skarels if (so->so_state & SS_CANTSENDMORE)
33116412Skarels snderr(EPIPE);
33237478Ssklower if (so->so_error)
33337478Ssklower snderr(so->so_error);
33416412Skarels if ((so->so_state & SS_ISCONNECTED) == 0) {
33537478Ssklower if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
33649059Ssklower if ((so->so_state & SS_ISCONFIRMING) == 0 &&
33749059Ssklower !(resid == 0 && clen != 0))
33837478Ssklower snderr(ENOTCONN);
33941908Skarels } else if (addr == 0)
34016412Skarels snderr(EDESTADDRREQ);
34116412Skarels }
34241908Skarels space = sbspace(&so->so_snd);
34316412Skarels if (flags & MSG_OOB)
34441908Skarels space += 1024;
34551755Smckusick if (atomic && resid > so->so_snd.sb_hiwat ||
34651755Smckusick clen > so->so_snd.sb_hiwat)
34751755Smckusick snderr(EMSGSIZE);
34851755Smckusick if (space < resid + clen && uio &&
34941908Skarels (atomic || space < so->so_snd.sb_lowat || space < clen)) {
35041908Skarels if (so->so_state & SS_NBIO)
35141908Skarels snderr(EWOULDBLOCK);
35241908Skarels sbunlock(&so->so_snd);
35343417Skarels error = sbwait(&so->so_snd);
35441908Skarels splx(s);
35541908Skarels if (error)
35641908Skarels goto out;
35741908Skarels goto restart;
35816412Skarels }
35916412Skarels splx(s);
36016412Skarels mp = ⊤
36141908Skarels space -= clen;
36243417Skarels do {
36343417Skarels if (uio == NULL) {
36441908Skarels /*
36541908Skarels * Data is prepackaged in "top".
36641908Skarels */
36741908Skarels resid = 0;
36841908Skarels if (flags & MSG_EOR)
36941908Skarels top->m_flags |= M_EOR;
37043417Skarels } else do {
37135384Skarels if (top == 0) {
37235384Skarels MGETHDR(m, M_WAIT, MT_DATA);
37335384Skarels mlen = MHLEN;
37435384Skarels m->m_pkthdr.len = 0;
37535384Skarels m->m_pkthdr.rcvif = (struct ifnet *)0;
37635384Skarels } else {
37735384Skarels MGET(m, M_WAIT, MT_DATA);
37835384Skarels mlen = MLEN;
37935384Skarels }
38041908Skarels if (resid >= MINCLSIZE && space >= MCLBYTES) {
38135384Skarels MCLGET(m, M_WAIT);
38235384Skarels if ((m->m_flags & M_EXT) == 0)
38316412Skarels goto nopages;
38435384Skarels mlen = MCLBYTES;
38535384Skarels #ifdef MAPPED_MBUFS
38641908Skarels len = min(MCLBYTES, resid);
38741908Skarels #else
38860996Skarels if (atomic && top == 0) {
38941908Skarels len = min(MCLBYTES - max_hdr, resid);
39035384Skarels m->m_data += max_hdr;
39146452Ssklower } else
39246452Ssklower len = min(MCLBYTES, resid);
39335384Skarels #endif
39435384Skarels space -= MCLBYTES;
39516412Skarels } else {
39616412Skarels nopages:
39741908Skarels len = min(min(mlen, resid), space);
39821767Skarels space -= len;
39935384Skarels /*
40035384Skarels * For datagram protocols, leave room
40135384Skarels * for protocol headers in first mbuf.
40235384Skarels */
40335391Skarels if (atomic && top == 0 && len < mlen)
40435384Skarels MH_ALIGN(m, len);
40516412Skarels }
40645515Skarels error = uiomove(mtod(m, caddr_t), (int)len, uio);
40741908Skarels resid = uio->uio_resid;
40816412Skarels m->m_len = len;
40916412Skarels *mp = m;
41035384Skarels top->m_pkthdr.len += len;
41116412Skarels if (error)
41216412Skarels goto release;
41316412Skarels mp = &m->m_next;
41441908Skarels if (resid <= 0) {
41541908Skarels if (flags & MSG_EOR)
41635384Skarels top->m_flags |= M_EOR;
41721108Skarels break;
41835384Skarels }
41935384Skarels } while (space > 0 && atomic);
42035384Skarels if (dontroute)
42135384Skarels so->so_options |= SO_DONTROUTE;
42235384Skarels s = splnet(); /* XXX */
42335384Skarels error = (*so->so_proto->pr_usrreq)(so,
42435384Skarels (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
42541908Skarels top, addr, control);
42635384Skarels splx(s);
42735384Skarels if (dontroute)
42835384Skarels so->so_options &= ~SO_DONTROUTE;
42941908Skarels clen = 0;
43041908Skarels control = 0;
43135384Skarels top = 0;
43235384Skarels mp = ⊤
43335384Skarels if (error)
43435384Skarels goto release;
43541908Skarels } while (resid && space > 0);
43641908Skarels } while (resid);
4374890Swnj
4384786Swnj release:
4394890Swnj sbunlock(&so->so_snd);
44041908Skarels out:
4416419Sroot if (top)
4426419Sroot m_freem(top);
44341908Skarels if (control)
44441908Skarels m_freem(control);
4454786Swnj return (error);
4464786Swnj }
4474786Swnj
44825629Skarels /*
44925629Skarels * Implement receive operations on a socket.
45025629Skarels * We depend on the way that records are added to the sockbuf
45125629Skarels * by sbappend*. In particular, each record (mbufs linked through m_next)
45225629Skarels * must begin with an address if the protocol so specifies,
45341908Skarels * followed by an optional mbuf or mbufs containing ancillary data,
45441908Skarels * and then zero or more mbufs of data.
45525629Skarels * In order to avoid blocking network interrupts for the entire time here,
45625629Skarels * we splx() while doing the actual copy to user space.
45725629Skarels * Although the sockbuf is locked, new data may still be appended,
45825629Skarels * and thus we must maintain consistency of the sockbuf during that time.
45945515Skarels *
46041908Skarels * The caller may receive the data as a single mbuf chain by supplying
46143417Skarels * an mbuf **mp0 for use in returning the chain. The uio is then used
46241908Skarels * only for the count in uio_resid.
46325629Skarels */
46468325Scgd int
soreceive(so,paddr,uio,mp0,controlp,flagsp)46543417Skarels soreceive(so, paddr, uio, mp0, controlp, flagsp)
4664786Swnj register struct socket *so;
46741908Skarels struct mbuf **paddr;
46841908Skarels struct uio *uio;
46943417Skarels struct mbuf **mp0;
47041908Skarels struct mbuf **controlp;
47135384Skarels int *flagsp;
4724786Swnj {
47343417Skarels register struct mbuf *m, **mp;
47443417Skarels register int flags, len, error, s, offset;
47512757Ssam struct protosw *pr = so->so_proto;
47641908Skarels struct mbuf *nextrecord;
47741908Skarels int moff, type;
47858301Smckusick int orig_resid = uio->uio_resid;
4794786Swnj
48043417Skarels mp = mp0;
48141908Skarels if (paddr)
48241908Skarels *paddr = 0;
48335384Skarels if (controlp)
48435384Skarels *controlp = 0;
48535384Skarels if (flagsp)
48635384Skarels flags = *flagsp &~ MSG_EOR;
48745515Skarels else
48835384Skarels flags = 0;
48912757Ssam if (flags & MSG_OOB) {
4909635Ssam m = m_get(M_WAIT, MT_DATA);
49168325Scgd error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
49268325Scgd (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0);
4938594Sroot if (error)
49410137Ssam goto bad;
4958319Sroot do {
49641908Skarels error = uiomove(mtod(m, caddr_t),
49741908Skarels (int) min(uio->uio_resid, m->m_len), uio);
4988319Sroot m = m_free(m);
4998594Sroot } while (uio->uio_resid && error == 0 && m);
50010137Ssam bad:
5018319Sroot if (m)
5028771Sroot m_freem(m);
5038594Sroot return (error);
5048319Sroot }
50541908Skarels if (mp)
50641908Skarels *mp = (struct mbuf *)0;
50743417Skarels if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
50835384Skarels (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
50935384Skarels (struct mbuf *)0, (struct mbuf *)0);
5108319Sroot
5114890Swnj restart:
51251755Smckusick if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
51340706Skarels return (error);
5148835Sroot s = splnet();
5154890Swnj
51637478Ssklower m = so->so_rcv.sb_mb;
51744383Skarels /*
51844383Skarels * If we have less data than requested, block awaiting more
51944383Skarels * (subject to any timeout) if:
52044383Skarels * 1. the current count is less than the low water mark, or
52144383Skarels * 2. MSG_WAITALL is set, and it is possible to do the entire
52268325Scgd * receive operation at once if we block (resid <= hiwat), or
52368325Scgd * 3. MSG_DONTWAIT is not set.
52444383Skarels * If MSG_WAITALL is set but resid is larger than the receive buffer,
52544383Skarels * we have to do the receive in sections, and thus risk returning
52644383Skarels * a short count if a timeout or signal occurs after we start.
52744383Skarels */
52851755Smckusick if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
52951755Smckusick so->so_rcv.sb_cc < uio->uio_resid) &&
53044383Skarels (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
53150942Ssklower ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
53258301Smckusick m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
53341908Skarels #ifdef DIAGNOSTIC
53441908Skarels if (m == 0 && so->so_rcv.sb_cc)
53537478Ssklower panic("receive 1");
53641908Skarels #endif
5375168Swnj if (so->so_error) {
53846479Skarels if (m)
53951022Ssklower goto dontblock;
5405168Swnj error = so->so_error;
54146479Skarels if ((flags & MSG_PEEK) == 0)
54246479Skarels so->so_error = 0;
5435168Swnj goto release;
5445168Swnj }
54546479Skarels if (so->so_state & SS_CANTRCVMORE) {
54646479Skarels if (m)
54751022Ssklower goto dontblock;
54846479Skarels else
54946479Skarels goto release;
55046479Skarels }
55150942Ssklower for (; m; m = m->m_next)
55250942Ssklower if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
55350942Ssklower m = so->so_rcv.sb_mb;
55450942Ssklower goto dontblock;
55550942Ssklower }
55638584Skarels if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
55732567Sbostic (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
55832567Sbostic error = ENOTCONN;
55932567Sbostic goto release;
5604890Swnj }
56143417Skarels if (uio->uio_resid == 0)
56225629Skarels goto release;
56351755Smckusick if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
56432567Sbostic error = EWOULDBLOCK;
56532567Sbostic goto release;
56632567Sbostic }
5674890Swnj sbunlock(&so->so_rcv);
56843417Skarels error = sbwait(&so->so_rcv);
5695012Swnj splx(s);
57041908Skarels if (error)
57141908Skarels return (error);
5724890Swnj goto restart;
5734786Swnj }
57450942Ssklower dontblock:
57552926Smckusick if (uio->uio_procp)
57652926Smckusick uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
57735384Skarels nextrecord = m->m_nextpkt;
57812757Ssam if (pr->pr_flags & PR_ADDR) {
57941908Skarels #ifdef DIAGNOSTIC
58025629Skarels if (m->m_type != MT_SONAME)
58116993Skarels panic("receive 1a");
58241908Skarels #endif
58358301Smckusick orig_resid = 0;
58416993Skarels if (flags & MSG_PEEK) {
58541908Skarels if (paddr)
58641908Skarels *paddr = m_copy(m, 0, m->m_len);
58725629Skarels m = m->m_next;
58816993Skarels } else {
58925629Skarels sbfree(&so->so_rcv, m);
59041908Skarels if (paddr) {
59141908Skarels *paddr = m;
59235384Skarels so->so_rcv.sb_mb = m->m_next;
59335384Skarels m->m_next = 0;
59435384Skarels m = so->so_rcv.sb_mb;
59525629Skarels } else {
59626958Skarels MFREE(m, so->so_rcv.sb_mb);
59726958Skarels m = so->so_rcv.sb_mb;
59825629Skarels }
59916993Skarels }
60016993Skarels }
60141908Skarels while (m && m->m_type == MT_CONTROL && error == 0) {
60216993Skarels if (flags & MSG_PEEK) {
60335384Skarels if (controlp)
60435384Skarels *controlp = m_copy(m, 0, m->m_len);
60535384Skarels m = m->m_next;
60635384Skarels } else {
60735384Skarels sbfree(&so->so_rcv, m);
60835384Skarels if (controlp) {
60943097Ssklower if (pr->pr_domain->dom_externalize &&
61043097Ssklower mtod(m, struct cmsghdr *)->cmsg_type ==
61143097Ssklower SCM_RIGHTS)
61241908Skarels error = (*pr->pr_domain->dom_externalize)(m);
61335384Skarels *controlp = m;
61435384Skarels so->so_rcv.sb_mb = m->m_next;
61535384Skarels m->m_next = 0;
61635384Skarels m = so->so_rcv.sb_mb;
61735384Skarels } else {
61835384Skarels MFREE(m, so->so_rcv.sb_mb);
61935384Skarels m = so->so_rcv.sb_mb;
62035384Skarels }
62135384Skarels }
62258301Smckusick if (controlp) {
62358301Smckusick orig_resid = 0;
62441908Skarels controlp = &(*controlp)->m_next;
62558301Smckusick }
62635384Skarels }
62741908Skarels if (m) {
62844383Skarels if ((flags & MSG_PEEK) == 0)
62944383Skarels m->m_nextpkt = nextrecord;
63041908Skarels type = m->m_type;
63146452Ssklower if (type == MT_OOBDATA)
63246452Ssklower flags |= MSG_OOB;
63341908Skarels }
6348319Sroot moff = 0;
63532092Skarels offset = 0;
63646452Ssklower while (m && uio->uio_resid > 0 && error == 0) {
63746452Ssklower if (m->m_type == MT_OOBDATA) {
63846452Ssklower if (type != MT_OOBDATA)
63946452Ssklower break;
64046452Ssklower } else if (type == MT_OOBDATA)
64146452Ssklower break;
64241908Skarels #ifdef DIAGNOSTIC
64335384Skarels else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
64425629Skarels panic("receive 3");
64541908Skarels #endif
6467747Sroot so->so_state &= ~SS_RCVATMARK;
64743417Skarels len = uio->uio_resid;
64832092Skarels if (so->so_oobmark && len > so->so_oobmark - offset)
64932092Skarels len = so->so_oobmark - offset;
65021767Skarels if (len > m->m_len - moff)
6518319Sroot len = m->m_len - moff;
65241908Skarels /*
65341908Skarels * If mp is set, just pass back the mbufs.
65441908Skarels * Otherwise copy them out via the uio, then free.
65541908Skarels * Sockbuf must be consistent here (points to current mbuf,
65641908Skarels * it points to next record) when we drop priority;
65741908Skarels * we must note any additions to the sockbuf when we
65841908Skarels * block interrupts again.
65941908Skarels */
66041908Skarels if (mp == 0) {
66141908Skarels splx(s);
66241908Skarels error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
66341908Skarels s = splnet();
66443417Skarels } else
66543417Skarels uio->uio_resid -= len;
66621767Skarels if (len == m->m_len - moff) {
66750942Ssklower if (m->m_flags & M_EOR)
66850942Ssklower flags |= MSG_EOR;
66925629Skarels if (flags & MSG_PEEK) {
67025629Skarels m = m->m_next;
67125629Skarels moff = 0;
67225629Skarels } else {
67335384Skarels nextrecord = m->m_nextpkt;
67425629Skarels sbfree(&so->so_rcv, m);
67541908Skarels if (mp) {
67641908Skarels *mp = m;
67741908Skarels mp = &m->m_next;
67843417Skarels so->so_rcv.sb_mb = m = m->m_next;
67943417Skarels *mp = (struct mbuf *)0;
68041908Skarels } else {
68141908Skarels MFREE(m, so->so_rcv.sb_mb);
68241908Skarels m = so->so_rcv.sb_mb;
68341908Skarels }
68426958Skarels if (m)
68535384Skarels m->m_nextpkt = nextrecord;
68625629Skarels }
6874786Swnj } else {
68812757Ssam if (flags & MSG_PEEK)
6898319Sroot moff += len;
6908319Sroot else {
69143417Skarels if (mp)
69243417Skarels *mp = m_copym(m, 0, len, M_WAIT);
69335384Skarels m->m_data += len;
6948319Sroot m->m_len -= len;
6958319Sroot so->so_rcv.sb_cc -= len;
6968319Sroot }
6974786Swnj }
69832092Skarels if (so->so_oobmark) {
69932092Skarels if ((flags & MSG_PEEK) == 0) {
70032092Skarels so->so_oobmark -= len;
70132092Skarels if (so->so_oobmark == 0) {
70232092Skarels so->so_state |= SS_RCVATMARK;
70332092Skarels break;
70432092Skarels }
70558530Storek } else {
70632092Skarels offset += len;
70758530Storek if (offset == so->so_oobmark)
70858530Storek break;
70958530Storek }
7107747Sroot }
71150942Ssklower if (flags & MSG_EOR)
71240632Skarels break;
71341908Skarels /*
71441908Skarels * If the MSG_WAITALL flag is set (for non-atomic socket),
71543417Skarels * we must not quit until "uio->uio_resid == 0" or an error
71641908Skarels * termination. If a signal/timeout occurs, return
71743417Skarels * with a short count but without error.
71841908Skarels * Keep sockbuf locked against other readers.
71941908Skarels */
72043417Skarels while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
72158301Smckusick !sosendallatonce(so) && !nextrecord) {
72246479Skarels if (so->so_error || so->so_state & SS_CANTRCVMORE)
72346479Skarels break;
72441908Skarels error = sbwait(&so->so_rcv);
72541908Skarels if (error) {
72641908Skarels sbunlock(&so->so_rcv);
72741908Skarels splx(s);
72841908Skarels return (0);
72941908Skarels }
73050942Ssklower if (m = so->so_rcv.sb_mb)
73141908Skarels nextrecord = m->m_nextpkt;
73241908Skarels }
73316993Skarels }
73458301Smckusick
73558301Smckusick if (m && pr->pr_flags & PR_ATOMIC) {
73658301Smckusick flags |= MSG_TRUNC;
73758301Smckusick if ((flags & MSG_PEEK) == 0)
73858301Smckusick (void) sbdroprecord(&so->so_rcv);
73958301Smckusick }
74016993Skarels if ((flags & MSG_PEEK) == 0) {
74126500Skarels if (m == 0)
74216993Skarels so->so_rcv.sb_mb = nextrecord;
74316993Skarels if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
74416993Skarels (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
74568325Scgd (struct mbuf *)(long)flags, (struct mbuf *)0,
74637478Ssklower (struct mbuf *)0);
74716993Skarels }
74858301Smckusick if (orig_resid == uio->uio_resid && orig_resid &&
74958301Smckusick (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
75058301Smckusick sbunlock(&so->so_rcv);
75158301Smckusick splx(s);
75258301Smckusick goto restart;
75358301Smckusick }
75458301Smckusick
75535384Skarels if (flagsp)
75635384Skarels *flagsp |= flags;
7574890Swnj release:
7584916Swnj sbunlock(&so->so_rcv);
7594890Swnj splx(s);
7604916Swnj return (error);
7614786Swnj }
7624786Swnj
76368325Scgd int
soshutdown(so,how)76410267Ssam soshutdown(so, how)
76512757Ssam register struct socket *so;
76612757Ssam register int how;
76710267Ssam {
76812757Ssam register struct protosw *pr = so->so_proto;
76910267Ssam
77010267Ssam how++;
77112757Ssam if (how & FREAD)
77212757Ssam sorflush(so);
77310267Ssam if (how & FWRITE)
77412757Ssam return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
77512757Ssam (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
77610267Ssam return (0);
77710267Ssam }
77810267Ssam
77968325Scgd void
sorflush(so)78012757Ssam sorflush(so)
78112757Ssam register struct socket *so;
78212757Ssam {
78312757Ssam register struct sockbuf *sb = &so->so_rcv;
78412757Ssam register struct protosw *pr = so->so_proto;
78512757Ssam register int s;
78612757Ssam struct sockbuf asb;
78712757Ssam
78840706Skarels sb->sb_flags |= SB_NOINTR;
78951755Smckusick (void) sblock(sb, M_WAITOK);
79012757Ssam s = splimp();
79112757Ssam socantrcvmore(so);
79212757Ssam sbunlock(sb);
79312757Ssam asb = *sb;
79412757Ssam bzero((caddr_t)sb, sizeof (*sb));
79512757Ssam splx(s);
79616993Skarels if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
79716993Skarels (*pr->pr_domain->dom_dispose)(asb.sb_mb);
79812757Ssam sbrelease(&asb);
79912757Ssam }
80012757Ssam
80168325Scgd int
sosetopt(so,level,optname,m0)80218553Skarels sosetopt(so, level, optname, m0)
80312757Ssam register struct socket *so;
80410267Ssam int level, optname;
80518553Skarels struct mbuf *m0;
80610267Ssam {
80717158Ssam int error = 0;
80818553Skarels register struct mbuf *m = m0;
80910267Ssam
81017158Ssam if (level != SOL_SOCKET) {
81118369Skarels if (so->so_proto && so->so_proto->pr_ctloutput)
81218369Skarels return ((*so->so_proto->pr_ctloutput)
81318553Skarels (PRCO_SETOPT, so, level, optname, &m0));
81418369Skarels error = ENOPROTOOPT;
81518369Skarels } else {
81618369Skarels switch (optname) {
81710267Ssam
81818369Skarels case SO_LINGER:
81918369Skarels if (m == NULL || m->m_len != sizeof (struct linger)) {
82018369Skarels error = EINVAL;
82118369Skarels goto bad;
82218369Skarels }
82318369Skarels so->so_linger = mtod(m, struct linger *)->l_linger;
82418369Skarels /* fall thru... */
82517158Ssam
82618369Skarels case SO_DEBUG:
82718369Skarels case SO_KEEPALIVE:
82818369Skarels case SO_DONTROUTE:
82918369Skarels case SO_USELOOPBACK:
83018369Skarels case SO_BROADCAST:
83118369Skarels case SO_REUSEADDR:
83255349Ssklower case SO_REUSEPORT:
83327191Skarels case SO_OOBINLINE:
83418369Skarels if (m == NULL || m->m_len < sizeof (int)) {
83518369Skarels error = EINVAL;
83618369Skarels goto bad;
83718369Skarels }
83818369Skarels if (*mtod(m, int *))
83918369Skarels so->so_options |= optname;
84018369Skarels else
84118369Skarels so->so_options &= ~optname;
84218369Skarels break;
84318369Skarels
84418369Skarels case SO_SNDBUF:
84541908Skarels case SO_RCVBUF:
84640706Skarels case SO_SNDLOWAT:
84741908Skarels case SO_RCVLOWAT:
84818369Skarels if (m == NULL || m->m_len < sizeof (int)) {
84918369Skarels error = EINVAL;
85018369Skarels goto bad;
85118369Skarels }
85218369Skarels switch (optname) {
85318369Skarels
85418369Skarels case SO_SNDBUF:
85518369Skarels case SO_RCVBUF:
85641908Skarels if (sbreserve(optname == SO_SNDBUF ?
85741908Skarels &so->so_snd : &so->so_rcv,
85841908Skarels (u_long) *mtod(m, int *)) == 0) {
85918369Skarels error = ENOBUFS;
86018369Skarels goto bad;
86118369Skarels }
86218369Skarels break;
86318369Skarels
86418369Skarels case SO_SNDLOWAT:
86541908Skarels so->so_snd.sb_lowat = *mtod(m, int *);
86641908Skarels break;
86718369Skarels case SO_RCVLOWAT:
86841908Skarels so->so_rcv.sb_lowat = *mtod(m, int *);
86918369Skarels break;
87044721Skarels }
87144721Skarels break;
87244721Skarels
87344721Skarels case SO_SNDTIMEO:
87444721Skarels case SO_RCVTIMEO:
87544721Skarels {
87644721Skarels struct timeval *tv;
87744721Skarels short val;
87844721Skarels
87944721Skarels if (m == NULL || m->m_len < sizeof (*tv)) {
88044721Skarels error = EINVAL;
88144721Skarels goto bad;
88244721Skarels }
88344721Skarels tv = mtod(m, struct timeval *);
88469137Smckusick if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) {
88544721Skarels error = EDOM;
88644721Skarels goto bad;
88744721Skarels }
88844721Skarels val = tv->tv_sec * hz + tv->tv_usec / tick;
88944721Skarels
89044721Skarels switch (optname) {
89144721Skarels
89218369Skarels case SO_SNDTIMEO:
89344721Skarels so->so_snd.sb_timeo = val;
89441908Skarels break;
89518369Skarels case SO_RCVTIMEO:
89644721Skarels so->so_rcv.sb_timeo = val;
89718369Skarels break;
89818369Skarels }
89918369Skarels break;
90044721Skarels }
90118369Skarels
90218369Skarels default:
90318369Skarels error = ENOPROTOOPT;
90418369Skarels break;
90517158Ssam }
90665370Sbostic if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
90750942Ssklower (void) ((*so->so_proto->pr_ctloutput)
90850942Ssklower (PRCO_SETOPT, so, level, optname, &m0));
90965370Sbostic m = NULL; /* freed by protocol */
91065370Sbostic }
91110267Ssam }
91217158Ssam bad:
91317158Ssam if (m)
91417158Ssam (void) m_free(m);
91517158Ssam return (error);
91610267Ssam }
91710267Ssam
91868325Scgd int
sogetopt(so,level,optname,mp)91917158Ssam sogetopt(so, level, optname, mp)
92012757Ssam register struct socket *so;
92110267Ssam int level, optname;
92217158Ssam struct mbuf **mp;
92317158Ssam {
92412757Ssam register struct mbuf *m;
92510267Ssam
92618369Skarels if (level != SOL_SOCKET) {
92718369Skarels if (so->so_proto && so->so_proto->pr_ctloutput) {
92818369Skarels return ((*so->so_proto->pr_ctloutput)
92918369Skarels (PRCO_GETOPT, so, level, optname, mp));
93045515Skarels } else
93118369Skarels return (ENOPROTOOPT);
93218369Skarels } else {
93317158Ssam m = m_get(M_WAIT, MT_SOOPTS);
93425502Skarels m->m_len = sizeof (int);
93525502Skarels
93618369Skarels switch (optname) {
93717158Ssam
93818369Skarels case SO_LINGER:
93918369Skarels m->m_len = sizeof (struct linger);
94018369Skarels mtod(m, struct linger *)->l_onoff =
94118369Skarels so->so_options & SO_LINGER;
94218369Skarels mtod(m, struct linger *)->l_linger = so->so_linger;
94318369Skarels break;
94410267Ssam
94518369Skarels case SO_USELOOPBACK:
94618369Skarels case SO_DONTROUTE:
94718369Skarels case SO_DEBUG:
94818369Skarels case SO_KEEPALIVE:
94918369Skarels case SO_REUSEADDR:
95055349Ssklower case SO_REUSEPORT:
95118369Skarels case SO_BROADCAST:
95227191Skarels case SO_OOBINLINE:
95318369Skarels *mtod(m, int *) = so->so_options & optname;
95418369Skarels break;
95518369Skarels
95625502Skarels case SO_TYPE:
95725502Skarels *mtod(m, int *) = so->so_type;
95825502Skarels break;
95925502Skarels
96024768Skarels case SO_ERROR:
96124768Skarels *mtod(m, int *) = so->so_error;
96224768Skarels so->so_error = 0;
96324768Skarels break;
96424768Skarels
96518369Skarels case SO_SNDBUF:
96618369Skarels *mtod(m, int *) = so->so_snd.sb_hiwat;
96718369Skarels break;
96818369Skarels
96918369Skarels case SO_RCVBUF:
97018369Skarels *mtod(m, int *) = so->so_rcv.sb_hiwat;
97118369Skarels break;
97218369Skarels
97318369Skarels case SO_SNDLOWAT:
97418369Skarels *mtod(m, int *) = so->so_snd.sb_lowat;
97518369Skarels break;
97618369Skarels
97718369Skarels case SO_RCVLOWAT:
97818369Skarels *mtod(m, int *) = so->so_rcv.sb_lowat;
97918369Skarels break;
98018369Skarels
98118369Skarels case SO_SNDTIMEO:
98244721Skarels case SO_RCVTIMEO:
98344721Skarels {
98444721Skarels int val = (optname == SO_SNDTIMEO ?
98544721Skarels so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
98618369Skarels
98744721Skarels m->m_len = sizeof(struct timeval);
98844721Skarels mtod(m, struct timeval *)->tv_sec = val / hz;
98944721Skarels mtod(m, struct timeval *)->tv_usec =
99069137Smckusick (val % hz) * tick;
99118369Skarels break;
99244721Skarels }
99318369Skarels
99418369Skarels default:
99526362Skarels (void)m_free(m);
99618369Skarels return (ENOPROTOOPT);
99718369Skarels }
99818369Skarels *mp = m;
99918369Skarels return (0);
100010267Ssam }
100110267Ssam }
100210267Ssam
100368325Scgd void
sohasoutofband(so)10045423Swnj sohasoutofband(so)
100512757Ssam register struct socket *so;
10065423Swnj {
100723233Skarels struct proc *p;
10085423Swnj
100937478Ssklower if (so->so_pgid < 0)
101037478Ssklower gsignal(-so->so_pgid, SIGURG);
101137478Ssklower else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
101223233Skarels psignal(p, SIGURG);
101352526Smckusick selwakeup(&so->so_rcv.sb_sel);
10145423Swnj }
1015