1 /* $OpenBSD: socketvar.h,v 1.142 2025/01/27 14:57:13 mvs Exp $ */ 2 /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)socketvar.h 8.1 (Berkeley) 6/2/93 33 */ 34 35 #ifndef _SYS_SOCKETVAR_H_ 36 #define _SYS_SOCKETVAR_H_ 37 38 #include <sys/event.h> 39 #include <sys/queue.h> 40 #include <sys/sigio.h> /* for struct sigio_ref */ 41 #include <sys/task.h> 42 #include <sys/timeout.h> 43 #include <sys/mutex.h> 44 #include <sys/rwlock.h> 45 #include <sys/refcnt.h> 46 47 #ifndef _SOCKLEN_T_DEFINED_ 48 #define _SOCKLEN_T_DEFINED_ 49 typedef __socklen_t socklen_t; /* length type for network syscalls */ 50 #endif 51 52 TAILQ_HEAD(soqhead, socket); 53 54 /* 55 * Locks used to protect global data and struct members: 56 * I immutable after creation 57 * a atomic 58 * mr sb_mxt of so_rcv buffer 59 * ms sb_mtx of so_snd buffer 60 * m sb_mtx 61 * br sblock() of so_rcv buffer 62 * bs sblock() od so_snd buffer 63 * s solock() 64 */ 65 66 /* 67 * XXXSMP: tcp(4) sockets rely on exclusive solock() for all the cases. 68 */ 69 70 /* 71 * Variables for socket splicing, allocated only when needed. 72 */ 73 struct sosplice { 74 struct socket *ssp_socket; /* [mr ms] send data to drain socket */ 75 struct socket *ssp_soback; /* [ms ms] back ref to source socket */ 76 off_t ssp_len; /* [mr] number of bytes spliced */ 77 off_t ssp_max; /* [I] maximum number of bytes */ 78 struct timeval ssp_idletv; /* [I] idle timeout */ 79 struct timeout ssp_idleto; 80 struct task ssp_task; /* task for somove */ 81 }; 82 83 /* 84 * Variables for socket buffering. 85 */ 86 struct sockbuf { 87 struct rwlock sb_lock; 88 struct mutex sb_mtx; 89 /* The following fields are all zeroed on flush. */ 90 #define sb_startzero sb_cc 91 u_long sb_cc; /* [m] actual chars in buffer */ 92 u_long sb_datacc; /* [m] data only chars in buffer */ 93 u_long sb_hiwat; /* [m] max actual char count */ 94 u_long sb_wat; /* [m] default watermark */ 95 u_long sb_mbcnt; /* [m] chars of mbufs used */ 96 u_long sb_mbmax; /* [m] max chars of mbufs to use */ 97 long sb_lowat; /* [m] low water mark */ 98 struct mbuf *sb_mb; /* [m] the mbuf chain */ 99 struct mbuf *sb_mbtail; /* [m] the last mbuf in the chain */ 100 struct mbuf *sb_lastrecord; /* [m] first mbuf of last record in 101 socket buffer */ 102 short sb_flags; /* [m] flags, see below */ 103 /* End area that is zeroed on flush. */ 104 #define sb_endzero sb_flags 105 short sb_state; /* [m] socket state on sockbuf */ 106 uint64_t sb_timeo_nsecs; /* [m] timeout for read/write */ 107 struct klist sb_klist; /* [m] list of knotes */ 108 }; 109 110 #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ 111 #define SB_WAIT 0x0001 /* someone is waiting for data/space */ 112 #define SB_ASYNC 0x0002 /* ASYNC I/O, need signals */ 113 #define SB_SPLICE 0x0004 /* buffer is splice source or drain */ 114 #define SB_NOINTR 0x0008 /* operations not interruptible */ 115 116 /* 117 * Kernel structure per socket. 118 * Contains send and receive buffer queues, 119 * handle on protocol and pointer to protocol 120 * private data and error information. 121 */ 122 struct socket { 123 const struct protosw *so_proto; /* [I] protocol handle */ 124 struct rwlock so_lock; /* this socket lock */ 125 struct refcnt so_refcnt; /* references to this socket */ 126 void *so_pcb; /* [s] protocol control block */ 127 u_int so_state; /* [s] internal state flags SS_*, 128 see below */ 129 short so_type; /* [I] generic type, see socket.h */ 130 short so_options; /* [s] from socket call, see 131 socket.h */ 132 short so_linger; /* [s] time to linger while closing */ 133 /* 134 * Variables for connection queueing. 135 * Socket where accepts occur is so_head in all subsidiary sockets. 136 * If so_head is 0, socket is not related to an accept. 137 * For head socket so_q0 queues partially completed connections, 138 * while so_q is a queue of connections ready to be accepted. 139 * If a connection is aborted and it has so_head set, then 140 * it has to be pulled out of either so_q0 or so_q. 141 * We allow connections to queue up based on current queue lengths 142 * and limit on number of queued connections for this socket. 143 * 144 * Connections queue relies on both socket locks of listening and 145 * unaccepted sockets. Socket lock of listening socket should be 146 * always taken first. 147 */ 148 struct socket *so_head; /* [s] back pointer to accept socket */ 149 struct soqhead *so_onq; /* [s] queue (q or q0) that we're on */ 150 struct soqhead so_q0; /* [s] queue of partial connections */ 151 struct soqhead so_q; /* [s] queue of incoming connections */ 152 struct sigio_ref so_sigio; /* async I/O registration */ 153 TAILQ_ENTRY(socket) so_qe; /* [s] our queue entry (q or q0) */ 154 short so_q0len; /* [s] partials on so_q0 */ 155 short so_qlen; /* [s] number of connections on so_q */ 156 short so_qlimit; /* [s] max number queued connections */ 157 short so_timeo; /* [s] connection timeout */ 158 u_long so_oobmark; /* [mr] chars to oob mark */ 159 u_int so_error; /* [a] error affecting connection */ 160 161 struct sosplice *so_sp; /* [s br] */ 162 163 struct sockbuf so_rcv; 164 struct sockbuf so_snd; 165 166 void (*so_upcall)(struct socket *, caddr_t, int); /* [s] */ 167 caddr_t so_upcallarg; /* [s] Arg for above */ 168 uid_t so_euid; /* [I] who opened the socket */ 169 uid_t so_ruid; /* [I] */ 170 gid_t so_egid; /* [I] */ 171 gid_t so_rgid; /* [I] */ 172 pid_t so_cpid; /* [I] pid of process that opened 173 socket */ 174 }; 175 176 /* 177 * Socket state bits. 178 * 179 * NOTE: The following states should be used with corresponding socket's 180 * buffer `sb_state' only: 181 * 182 * SS_CANTSENDMORE with `so_snd' 183 * SS_ISSENDING with `so_snd' 184 * SS_CANTRCVMORE with `so_rcv' 185 * SS_RCVATMARK with `so_rcv' 186 */ 187 188 #define SS_NOFDREF 0x001 /* no file table ref any more */ 189 #define SS_ISCONNECTED 0x002 /* socket connected to a peer */ 190 #define SS_ISCONNECTING 0x004 /* in process of connecting to peer */ 191 #define SS_ISDISCONNECTING 0x008 /* in process of disconnecting */ 192 #define SS_CANTSENDMORE 0x010 /* can't send more data to peer */ 193 #define SS_CANTRCVMORE 0x020 /* can't receive more data from peer */ 194 #define SS_RCVATMARK 0x040 /* at mark on input */ 195 #define SS_ISDISCONNECTED 0x800 /* socket disconnected from peer */ 196 197 #define SS_PRIV 0x080 /* privileged for broadcast, raw... */ 198 #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */ 199 #define SS_ISSENDING 0x2000 /* hint for lower layer */ 200 #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */ 201 #define SS_YP 0x8000 /* created using ypconnect(2) */ 202 203 #ifdef _KERNEL 204 205 #include <sys/protosw.h> 206 #include <lib/libkern/libkern.h> 207 208 void soassertlocked(struct socket *); 209 void soassertlocked_readonly(struct socket *); 210 211 static inline struct socket * 212 soref(struct socket *so) 213 { 214 if (so == NULL) 215 return NULL; 216 refcnt_take(&so->so_refcnt); 217 return so; 218 } 219 220 /* 221 * Macros for sockets and socket buffering. 222 */ 223 224 #define isspliced(so) ((so)->so_sp && (so)->so_sp->ssp_socket) 225 #define issplicedback(so) ((so)->so_sp && (so)->so_sp->ssp_soback) 226 227 void sbmtxassertlocked(struct sockbuf *); 228 229 /* 230 * Do we need to notify the other side when I/O is possible? 231 */ 232 static inline int 233 sb_notify(struct socket *so, struct sockbuf *sb) 234 { 235 int rv; 236 237 mtx_enter(&sb->sb_mtx); 238 rv = ((sb->sb_flags & (SB_WAIT|SB_ASYNC|SB_SPLICE)) != 0 || 239 !klist_empty(&sb->sb_klist)); 240 mtx_leave(&sb->sb_mtx); 241 242 return rv; 243 } 244 245 /* 246 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? 247 * This is problematical if the fields are unsigned, as the space might 248 * still be negative (cc > hiwat or mbcnt > mbmax). Should detect 249 * overflow and return 0. 250 */ 251 252 static inline long 253 sbspace_locked(struct socket *so, struct sockbuf *sb) 254 { 255 sbmtxassertlocked(sb); 256 257 return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt); 258 } 259 260 static inline long 261 sbspace(struct socket *so, struct sockbuf *sb) 262 { 263 long ret; 264 265 mtx_enter(&sb->sb_mtx); 266 ret = sbspace_locked(so, sb); 267 mtx_leave(&sb->sb_mtx); 268 269 return ret; 270 } 271 272 /* do we have to send all at once on a socket? */ 273 #define sosendallatonce(so) \ 274 ((so)->so_proto->pr_flags & PR_ATOMIC) 275 276 /* are we sending on this socket? */ 277 #define soissending(so) \ 278 ((so)->so_snd.sb_state & SS_ISSENDING) 279 280 /* can we read something from so? */ 281 static inline int 282 soreadable(struct socket *so) 283 { 284 soassertlocked_readonly(so); 285 if (isspliced(so)) 286 return 0; 287 return (so->so_rcv.sb_state & SS_CANTRCVMORE) || so->so_qlen || 288 so->so_error || so->so_rcv.sb_cc >= so->so_rcv.sb_lowat; 289 } 290 291 /* can we write something to so? */ 292 static inline int 293 sowriteable(struct socket *so) 294 { 295 soassertlocked_readonly(so); 296 return ((sbspace(so, &so->so_snd) >= so->so_snd.sb_lowat && 297 ((so->so_state & SS_ISCONNECTED) || 298 (so->so_proto->pr_flags & PR_CONNREQUIRED)==0)) || 299 (so->so_snd.sb_state & SS_CANTSENDMORE) || so->so_error); 300 } 301 302 /* adjust counters in sb reflecting allocation of m */ 303 static inline void 304 sballoc(struct sockbuf *sb, struct mbuf *m) 305 { 306 sb->sb_cc += m->m_len; 307 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 308 sb->sb_datacc += m->m_len; 309 sb->sb_mbcnt += MSIZE; 310 if (m->m_flags & M_EXT) 311 sb->sb_mbcnt += m->m_ext.ext_size; 312 } 313 314 /* adjust counters in sb reflecting freeing of m */ 315 static inline void 316 sbfree(struct sockbuf *sb, struct mbuf *m) 317 { 318 sb->sb_cc -= m->m_len; 319 if (m->m_type != MT_CONTROL && m->m_type != MT_SONAME) 320 sb->sb_datacc -= m->m_len; 321 sb->sb_mbcnt -= MSIZE; 322 if (m->m_flags & M_EXT) 323 sb->sb_mbcnt -= m->m_ext.ext_size; 324 } 325 326 /* 327 * Flags to sblock() 328 */ 329 #define SBL_WAIT 0x01 /* Wait if lock not immediately available. */ 330 #define SBL_NOINTR 0x02 /* Enforce non-interruptible sleep. */ 331 332 /* 333 * Set lock on sockbuf sb; sleep if lock is already held. 334 * Unless SB_NOINTR is set on sockbuf or SBL_NOINTR passed, 335 * sleep is interruptible. Returns error without lock if 336 * sleep is interrupted. 337 */ 338 int sblock(struct sockbuf *, int); 339 340 /* release lock on sockbuf sb */ 341 void sbunlock(struct sockbuf *); 342 343 static inline void 344 sbassertlocked(struct sockbuf *sb) 345 { 346 rw_assert_wrlock(&sb->sb_lock); 347 } 348 349 #define SB_EMPTY_FIXUP(sb) do { \ 350 if ((sb)->sb_mb == NULL) { \ 351 (sb)->sb_mbtail = NULL; \ 352 (sb)->sb_lastrecord = NULL; \ 353 } \ 354 } while (/*CONSTCOND*/0) 355 356 extern u_long sb_max; 357 358 extern struct pool socket_pool; 359 360 struct mbuf; 361 struct sockaddr; 362 struct proc; 363 struct msghdr; 364 struct stat; 365 struct knote; 366 367 /* 368 * File operations on sockets. 369 */ 370 int soo_read(struct file *, struct uio *, int); 371 int soo_write(struct file *, struct uio *, int); 372 int soo_ioctl(struct file *, u_long, caddr_t, struct proc *); 373 int soo_kqfilter(struct file *, struct knote *); 374 int soo_close(struct file *, struct proc *); 375 int soo_stat(struct file *, struct stat *, struct proc *); 376 void sbappend(struct socket *, struct sockbuf *, struct mbuf *); 377 void sbappendstream(struct socket *, struct sockbuf *, struct mbuf *); 378 int sbappendaddr(struct socket *, struct sockbuf *, 379 const struct sockaddr *, struct mbuf *, struct mbuf *); 380 int sbappendcontrol(struct socket *, struct sockbuf *, struct mbuf *, 381 struct mbuf *); 382 void sbappendrecord(struct socket *, struct sockbuf *, struct mbuf *); 383 void sbcompress(struct socket *, struct sockbuf *, struct mbuf *, 384 struct mbuf *); 385 struct mbuf * 386 sbcreatecontrol(const void *, size_t, int, int); 387 void sbdrop(struct socket *, struct sockbuf *, int); 388 void sbdroprecord(struct socket *, struct sockbuf *); 389 void sbflush(struct socket *, struct sockbuf *); 390 void sbrelease(struct socket *, struct sockbuf *); 391 int sbcheckreserve(u_long, u_long); 392 int sbchecklowmem(void); 393 int sbreserve(struct socket *, struct sockbuf *, u_long); 394 int sbwait(struct sockbuf *); 395 void soinit(void); 396 void soabort(struct socket *); 397 int soaccept(struct socket *, struct mbuf *); 398 int sobind(struct socket *, struct mbuf *, struct proc *); 399 void socantrcvmore(struct socket *); 400 void socantsendmore(struct socket *); 401 int soclose(struct socket *, int); 402 int soconnect(struct socket *, struct mbuf *); 403 int soconnect2(struct socket *, struct socket *); 404 int socreate(int, struct socket **, int, int); 405 int sodisconnect(struct socket *); 406 struct socket *soalloc(const struct protosw *, int); 407 void sofree(struct socket *, int); 408 void sorele(struct socket *); 409 int sogetopt(struct socket *, int, int, struct mbuf *); 410 void sohasoutofband(struct socket *); 411 void soisconnected(struct socket *); 412 void soisconnecting(struct socket *); 413 void soisdisconnected(struct socket *); 414 void soisdisconnecting(struct socket *); 415 int solisten(struct socket *, int); 416 struct socket *sonewconn(struct socket *, int, int); 417 void soqinsque(struct socket *, struct socket *, int); 418 int soqremque(struct socket *, int); 419 int soreceive(struct socket *, struct mbuf **, struct uio *, 420 struct mbuf **, struct mbuf **, int *, socklen_t); 421 int soreserve(struct socket *, u_long, u_long); 422 int sosend(struct socket *, struct mbuf *, struct uio *, 423 struct mbuf *, struct mbuf *, int); 424 int sosetopt(struct socket *, int, int, struct mbuf *); 425 int soshutdown(struct socket *, int); 426 void sowakeup(struct socket *, struct sockbuf *); 427 void sorwakeup(struct socket *); 428 void sowwakeup(struct socket *); 429 int sockargs(struct mbuf **, const void *, size_t, int); 430 431 int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t); 432 void solock(struct socket *); 433 void solock_shared(struct socket *); 434 void solock_nonet(struct socket *); 435 int solock_persocket(struct socket *); 436 void solock_pair(struct socket *, struct socket *); 437 void sounlock(struct socket *); 438 void sounlock_shared(struct socket *); 439 void sounlock_nonet(struct socket *); 440 441 int sendit(struct proc *, int, struct msghdr *, int, register_t *); 442 int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *); 443 int doaccept(struct proc *, int, struct sockaddr *, socklen_t *, int, 444 register_t *); 445 446 #ifdef SOCKBUF_DEBUG 447 void sblastrecordchk(struct sockbuf *, const char *); 448 #define SBLASTRECORDCHK(sb, where) sblastrecordchk((sb), (where)) 449 450 void sblastmbufchk(struct sockbuf *, const char *); 451 #define SBLASTMBUFCHK(sb, where) sblastmbufchk((sb), (where)) 452 void sbcheck(struct socket *, struct sockbuf *); 453 #define SBCHECK(so, sb) sbcheck((so), (sb)) 454 #else 455 #define SBLASTRECORDCHK(sb, where) /* nothing */ 456 #define SBLASTMBUFCHK(sb, where) /* nothing */ 457 #define SBCHECK(so, sb) /* nothing */ 458 #endif /* SOCKBUF_DEBUG */ 459 460 #endif /* _KERNEL */ 461 462 #endif /* _SYS_SOCKETVAR_H_ */ 463