1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Multithreaded STREAMS Local Transport Provider.
28 *
29 * OVERVIEW
30 * ========
31 *
32 * This driver provides TLI as well as socket semantics. It provides
33 * connectionless, connection oriented, and connection oriented with orderly
34 * release transports for TLI and sockets. Each transport type has separate name
35 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
36 * this removes any name space conflicts when binding to socket style transport
37 * addresses.
38 *
39 * NOTE: There is one exception: Socket ticots and ticotsord transports share
40 * the same namespace. In fact, sockets always use ticotsord type transport.
41 *
42 * The driver mode is specified during open() by the minor number used for
43 * open.
44 *
45 * The sockets in addition have the following semantic differences:
46 * No support for passing up credentials (TL_SET[U]CRED).
47 *
48 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
49 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
50 * T_OPTDATA_IND.
51 *
52 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
53 * a T_CONN_RES is received from the acceptor. This means that a socket
54 * connect will complete before the peer has called accept.
55 *
56 *
57 * MULTITHREADING
58 * ==============
59 *
60 * The driver does not use STREAMS protection mechanisms. Instead it uses a
61 * generic "serializer" abstraction. Most of the operations are executed behind
62 * the serializer and are, essentially single-threaded. All functions executed
63 * behind the same serializer are strictly serialized. So if one thread calls
64 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
65 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
66 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
67 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
68 * same time.
69 *
70 * Connectionless transport use a single serializer per transport type (one for
71 * TLI and one for sockets. Connection-oriented transports use finer-grained
72 * serializers.
73 *
74 * All COTS-type endpoints start their life with private serializers. During
75 * connection request processing the endpoint serializer is switched to the
76 * listener's serializer and the rest of T_CONN_REQ processing is done on the
77 * listener serializer. During T_CONN_RES processing the eager serializer is
78 * switched from listener to acceptor serializer and after that point all
79 * processing for eager and acceptor happens on this serializer. To avoid races
80 * with endpoint closes while its serializer may be changing closes are blocked
81 * while serializers are manipulated.
82 *
83 * References accounting
84 * ---------------------
85 *
86 * Endpoints are reference counted and freed when the last reference is
87 * dropped. Functions within the serializer may access an endpoint state even
88 * after an endpoint closed. The te_closing being set on the endpoint indicates
89 * that the endpoint entered its close routine.
90 *
91 * One reference is held for each opened endpoint instance. The reference
92 * counter is incremented when the endpoint is linked to another endpoint and
93 * decremented when the link disappears. It is also incremented when the
94 * endpoint is found by the hash table lookup. This increment is atomic with the
95 * lookup itself and happens while the hash table read lock is held.
96 *
97 * Close synchronization
98 * ---------------------
99 *
100 * During close the endpoint as marked as closing using te_closing flag. It is
101 * usually enough to check for te_closing flag since all other state changes
102 * happen after this flag is set and the close entered serializer. Immediately
103 * after setting te_closing flag tl_close() enters serializer and waits until
104 * the callback finishes. This allows all functions called within serializer to
105 * simply check te_closing without any locks.
106 *
107 * Serializer management.
108 * ---------------------
109 *
110 * For COTS transports serializers are created when the endpoint is constructed
111 * and destroyed when the endpoint is destructed. CLTS transports use global
112 * serializers - one for sockets and one for TLI.
113 *
114 * COTS serializers have separate reference counts to deal with several
115 * endpoints sharing the same serializer. There is a subtle problem related to
116 * the serializer destruction. The serializer should never be destroyed by any
117 * function executed inside serializer. This means that close has to wait till
118 * all serializer activity for this endpoint is finished before it can drop the
119 * last reference on the endpoint (which may as well free the serializer). This
120 * is only relevant for COTS transports which manage serializers
121 * dynamically. For CLTS transports close may complete without waiting for all
122 * serializer activity to finish since serializer is only destroyed at driver
123 * detach time.
124 *
125 * COTS endpoints keep track of the number of outstanding requests on the
126 * serializer for the endpoint. The code handling accept() avoids changing
127 * client serializer if it has any pending messages on the serializer and
128 * instead moves acceptor to listener's serializer.
129 *
130 *
131 * Use of hash tables
132 * ------------------
133 *
134 * The driver uses modhash hash table implementation. Each transport uses two
135 * hash tables - one for finding endpoints by acceptor ID and another one for
136 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
137 * pair of hash tables since sockets only use TICOTSORD.
138 *
139 * All hash tables lookups increment a reference count for returned endpoints,
140 * so we may safely check the endpoint state even when the endpoint is removed
141 * from the hash by another thread immediately after it is found.
142 *
143 *
144 * CLOSE processing
145 * ================
146 *
147 * The driver enters serializer twice on close(). The close sequence is the
148 * following:
149 *
150 * 1) Wait until closing is safe (te_closewait becomes zero)
151 * This step is needed to prevent close during serializer switches. In most
152 * cases (close happening after connection establishment) te_closewait is
153 * zero.
154 * 1) Set te_closing.
155 * 2) Call tl_close_ser() within serializer and wait for it to complete.
156 *
157 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
158 * It also needs to clear write-side q_next pointers - this should be done
159 * before qprocsoff().
160 *
161 * This synchronous serializer entry during close is needed to ensure that
162 * the queue is valid everywhere inside the serializer.
163 *
164 * Note that in many cases close will execute tl_close_ser() synchronously,
165 * so it will not wait at all.
166 *
167 * 3) Calls qprocsoff().
168 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
169 * complete (for COTS transports). For CLTS transport there is no wait.
170 *
171 * tl_close_finish_ser() Finishes the close process and wakes up waiting
172 * close if there is any.
173 *
174 * Note that in most cases close will enter te_close_ser_finish()
175 * synchronously and will not wait at all.
176 *
177 *
178 * Flow Control
179 * ============
180 *
181 * The driver implements both read and write side service routines. No one calls
182 * putq() on the read queue. The read side service routine tl_rsrv() is called
183 * when the read side stream is back-enabled. It enters serializer synchronously
184 * (waits till serializer processing is complete). Within serializer it
185 * back-enables all endpoints blocked by the queue for connection-less
186 * transports and enables write side service processing for the peer for
187 * connection-oriented transports.
188 *
189 * Read and write side service routines use special mblk_sized space in the
190 * endpoint structure to enter perimeter.
191 *
192 * Write-side flow control
193 * -----------------------
194 *
195 * Write side flow control is a bit tricky. The driver needs to deal with two
196 * message queues - the explicit STREAMS message queue maintained by
197 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
198 * queues should be synchronized to preserve message ordering and should
199 * maintain a single order determined by the order in which messages enter
200 * tl_wput(). In order to maintain the ordering between these two queues the
201 * STREAMS queue is only manipulated within the serializer, so the ordering is
202 * provided by the serializer.
203 *
204 * Functions called from the tl_wsrv() sometimes may call putbq(). To
205 * immediately stop any further processing of the STREAMS message queues the
206 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
207 * side service processing stops when the flag is set.
208 *
209 * The tl_wsrv() function enters serializer synchronously and waits for it to
210 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
211 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
212 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
213 * always bounded by the amount of messages on the STREAMS queue at the time
214 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
215 * queue from another serialized entry which can't happen in parallel. This
216 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
217 * of it draining forever while writer places new messages on the STREAMS
218 * queue).
219 *
220 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
221 *
222 *
223 * Unix Domain Sockets
224 * ===================
225 *
226 * The driver knows the structure of Unix Domain sockets addresses and treats
227 * them differently from generic TLI addresses. For sockets implicit binds are
228 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
229 * instead of using address length of zero. Explicit binds specify
230 * SOU_MAGIC_EXPLICIT as magic.
231 *
232 * For implicit binds we always use minor number as soua_vp part of the address
233 * and avoid any hash table lookups. This saves two hash tables lookups per
234 * anonymous bind.
235 *
236 * For explicit address we hash the vnode pointer instead of hashing the
237 * full-scale address+zone+length. Hashing by pointer is more efficient then
238 * hashing by the full address.
239 *
240 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
241 * tep structure, so it should be never freed.
242 *
243 * Also for sockets the driver always uses minor number as acceptor id.
244 *
245 * TPI VIOLATIONS
246 * --------------
247 *
248 * This driver violates TPI in several respects for Unix Domain Sockets:
249 *
250 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
251 * is requested and the endpoint is already in use. There is no point in
252 * generating an unused address since this address will be rejected by
253 * sockfs anyway. For implicit binds it always generates a new address
254 * (sets soua_vp to its minor number).
255 *
256 * 2) It always uses minor number as acceptor ID and never uses queue
257 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
258 * message and they do not use the queue pointer.
259 *
260 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
261 * followed by listen(). The listen() should be issued with non-zero
262 * backlog, so sotpi_listen() issues unbind request followed by bind
263 * request to the same address but with a non-zero qlen value. Both
264 * tl_bind() and tl_unbind() require write lock on the hash table to
265 * insert/remove the address. The driver does not remove the address from
266 * the hash for endpoints that are bound to the explicit address and have
267 * backlog of zero. During T_BIND_REQ processing if the address requested
268 * is equal to the address the endpoint already has it updates the backlog
269 * without reinserting the address in the hash table. This optimization
270 * avoids two hash table updates for each listener created. It always
271 * avoids the problem of a "stolen" address when another listener may use
272 * the same address between the unbind and bind and suddenly listen() fails
273 * because address is in use even though the bind() succeeded.
274 *
275 *
276 * CONNECTIONLESS TRANSPORTS
277 * =========================
278 *
279 * Connectionless transports all share the same serializer (one for TLI and one
280 * for Sockets). Functions executing behind serializer can check or modify state
281 * of any endpoint.
282 *
283 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
284 * te_lastep field. The next time X talks to some address A it checks whether A
285 * is the same as Y's address and if it is there is no need to lookup Y. If the
286 * address is different or the state of Y is not appropriate (e.g. closed or not
287 * idle) X does a lookup using tl_find_peer() and caches the new address.
288 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
289 * on the endpoint found.
290 *
291 * During close of endpoint Y it doesn't try to remove itself from other
292 * endpoints caches. They will detect that Y is gone and will search the peer
293 * endpoint again.
294 *
295 * Flow Control Handling.
296 * ----------------------
297 *
298 * Each connectionless endpoint keeps a list of endpoints which are
299 * flow-controlled by its queue. It also keeps a pointer to the queue which
300 * flow-controls itself. Whenever flow control releases for endpoint X it
301 * enables all queues from the list. During close it also back-enables everyone
302 * in the list. If X is flow-controlled when it is closing it removes it from
303 * the peers list.
304 *
305 * DATA STRUCTURES
306 * ===============
307 *
308 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
309 * endpoint state. For connection-oriented transports it has a keeps a list
310 * of pending connections (tl_icon_t). For connectionless transports it keeps a
311 * list of endpoints flow controlled by this one.
312 *
313 * Each transport type is represented by a per-transport data structure
314 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
315 * endpoint address hash tables for each transport. It also contains pointer to
316 * transport serializer for connectionless transports.
317 *
318 * Each endpoint keeps a link to its transport structure, so the code can find
319 * all per-transport information quickly.
320 */
321
322 #include <sys/types.h>
323 #include <sys/inttypes.h>
324 #include <sys/stream.h>
325 #include <sys/stropts.h>
326 #define _SUN_TPI_VERSION 2
327 #include <sys/tihdr.h>
328 #include <sys/strlog.h>
329 #include <sys/debug.h>
330 #include <sys/cred.h>
331 #include <sys/errno.h>
332 #include <sys/kmem.h>
333 #include <sys/id_space.h>
334 #include <sys/modhash.h>
335 #include <sys/mkdev.h>
336 #include <sys/tl.h>
337 #include <sys/stat.h>
338 #include <sys/conf.h>
339 #include <sys/modctl.h>
340 #include <sys/strsun.h>
341 #include <sys/socket.h>
342 #include <sys/socketvar.h>
343 #include <sys/sysmacros.h>
344 #include <sys/xti_xtiopt.h>
345 #include <sys/ddi.h>
346 #include <sys/sunddi.h>
347 #include <sys/zone.h>
348 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
349 #include <inet/optcom.h>
350 #include <sys/strsubr.h>
351 #include <sys/ucred.h>
352 #include <sys/suntpi.h>
353 #include <sys/list.h>
354 #include <sys/serializer.h>
355
356 /*
357 * TBD List
358 * 14 Eliminate state changes through table
359 * 16. AF_UNIX socket options
360 * 17. connect() for ticlts
361 * 18. support for "netstat" to show AF_UNIX plus TLI local
362 * transport connections
363 * 21. sanity check to flushing on sending M_ERROR
364 */
365
366 /*
367 * CONSTANT DECLARATIONS
368 * --------------------
369 */
370
371 /*
372 * Local declarations
373 */
374 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
375
376 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
377 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
378 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
379 /*
380 * Hash tables size.
381 */
382 #define TL_HASH_SIZE 311
383
384 /*
385 * Definitions for module_info
386 */
387 #define TL_ID (104) /* module ID number */
388 #define TL_NAME "tl" /* module name */
389 #define TL_MINPSZ (0) /* min packet size */
390 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
391 #define TL_HIWAT (16*1024) /* hi water mark */
392 #define TL_LOWAT (256) /* lo water mark */
393 /*
394 * Definition of minor numbers/modes for new transport provider modes.
395 * We view the socket use as a separate mode to get a separate name space.
396 */
397 #define TL_TICOTS 0 /* connection oriented transport */
398 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
399 #define TL_TICLTS 2 /* connectionless transport */
400 #define TL_UNUSED 3
401 #define TL_SOCKET 4 /* Socket */
402 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
403 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
404 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
405
406 #define TL_MINOR_MASK 0x7
407 #define TL_MINOR_START (TL_TICLTS + 1)
408
409 /*
410 * LOCAL MACROS
411 */
412 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
413
414 /*
415 * EXTERNAL VARIABLE DECLARATIONS
416 * -----------------------------
417 */
418 /*
419 * state table defined in the OS space.c
420 */
421 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
422
423 /*
424 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
425 */
426 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
427 static int tl_close(queue_t *, int, cred_t *);
428 static void tl_wput(queue_t *, mblk_t *);
429 static void tl_wsrv(queue_t *);
430 static void tl_rsrv(queue_t *);
431
432 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
433 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
434 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
435
436
437 /*
438 * GLOBAL DATA STRUCTURES AND VARIABLES
439 * -----------------------------------
440 */
441
442 /*
443 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
444 * For now, we only manage the SO_RECVUCRED option but we also have
445 * harmless dummy options to make things work with some common code we access.
446 */
447 opdes_t tl_opt_arr[] = {
448 /* The SO_TYPE is needed for the hack below */
449 {
450 SO_TYPE,
451 SOL_SOCKET,
452 OA_R,
453 OA_R,
454 OP_NP,
455 0,
456 sizeof (t_scalar_t),
457 0
458 },
459 {
460 SO_RECVUCRED,
461 SOL_SOCKET,
462 OA_RW,
463 OA_RW,
464 OP_NP,
465 0,
466 sizeof (int),
467 0
468 }
469 };
470
471 /*
472 * Table of all supported levels
473 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
474 * any supported options so we need this info separately.
475 *
476 * This is needed only for topmost tpi providers.
477 */
478 optlevel_t tl_valid_levels_arr[] = {
479 XTI_GENERIC,
480 SOL_SOCKET,
481 TL_PROT_LEVEL
482 };
483
484 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
485 /*
486 * Current upper bound on the amount of space needed to return all options.
487 * Additional options with data size of sizeof(long) are handled automatically.
488 * Others need hand job.
489 */
490 #define TL_MAX_OPT_BUF_LEN \
491 ((A_CNT(tl_opt_arr) << 2) + \
492 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
493 + 64 + sizeof (struct T_optmgmt_ack))
494
495 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
496
497 /*
498 * transport addr structure
499 */
500 typedef struct tl_addr {
501 zoneid_t ta_zoneid; /* Zone scope of address */
502 t_scalar_t ta_alen; /* length of abuf */
503 void *ta_abuf; /* the addr itself */
504 } tl_addr_t;
505
506 /*
507 * Refcounted version of serializer.
508 */
509 typedef struct tl_serializer {
510 uint_t ts_refcnt;
511 serializer_t *ts_serializer;
512 } tl_serializer_t;
513
514 /*
515 * Each transport type has a separate state.
516 * Per-transport state.
517 */
518 typedef struct tl_transport_state {
519 char *tr_name;
520 minor_t tr_minor;
521 uint32_t tr_defaddr;
522 mod_hash_t *tr_ai_hash;
523 mod_hash_t *tr_addr_hash;
524 tl_serializer_t *tr_serializer;
525 } tl_transport_state_t;
526
527 #define TL_DFADDR 0x1000
528
529 static tl_transport_state_t tl_transports[] = {
530 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
531 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
532 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
533 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
534 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
535 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
536 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
537 };
538
539 #define TL_MAXTRANSPORT A_CNT(tl_transports)
540
541 struct tl_endpt;
542 typedef struct tl_endpt tl_endpt_t;
543
544 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
545
546 /*
547 * Data structure used to represent pending connects.
548 * Records enough information so that the connecting peer can close
549 * before the connection gets accepted.
550 */
551 typedef struct tl_icon {
552 list_node_t ti_node;
553 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
554 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
555 t_scalar_t ti_seqno; /* Sequence number */
556 } tl_icon_t;
557
558 typedef struct so_ux_addr soux_addr_t;
559 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
560
561 /*
562 * Maximum number of unaccepted connection indications allowed per listener.
563 */
564 #define TL_MAXQLEN 4096
565 int tl_maxqlen = TL_MAXQLEN;
566
567 /*
568 * transport endpoint structure
569 */
570 struct tl_endpt {
571 queue_t *te_rq; /* stream read queue */
572 queue_t *te_wq; /* stream write queue */
573 uint32_t te_refcnt;
574 int32_t te_state; /* TPI state of endpoint */
575 minor_t te_minor; /* minor number */
576 #define te_seqno te_minor
577 uint_t te_flag; /* flag field */
578 boolean_t te_nowsrv;
579 tl_serializer_t *te_ser; /* Serializer to use */
580 #define te_serializer te_ser->ts_serializer
581
582 soux_addr_t te_uxaddr; /* Socket address */
583 #define te_magic te_uxaddr.soua_magic
584 #define te_vp te_uxaddr.soua_vp
585 tl_addr_t te_ap; /* addr bound to this endpt */
586 #define te_zoneid te_ap.ta_zoneid
587 #define te_alen te_ap.ta_alen
588 #define te_abuf te_ap.ta_abuf
589
590 tl_transport_state_t *te_transport;
591 #define te_addrhash te_transport->tr_addr_hash
592 #define te_aihash te_transport->tr_ai_hash
593 #define te_defaddr te_transport->tr_defaddr
594 cred_t *te_credp; /* endpoint user credentials */
595 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
596
597 /*
598 * State specific for connection-oriented and connectionless transports.
599 */
600 union {
601 /* Connection-oriented state. */
602 struct {
603 t_uscalar_t _te_nicon; /* count of conn requests */
604 t_uscalar_t _te_qlen; /* max conn requests */
605 tl_endpt_t *_te_oconp; /* conn request pending */
606 tl_endpt_t *_te_conp; /* connected endpt */
607 #ifndef _ILP32
608 void *_te_pad;
609 #endif
610 list_t _te_iconp; /* list of conn ind. pending */
611 } _te_cots_state;
612 /* Connection-less state. */
613 struct {
614 tl_endpt_t *_te_lastep; /* last dest. endpoint */
615 tl_endpt_t *_te_flowq; /* flow controlled on whom */
616 list_node_t _te_flows; /* lists of connections */
617 list_t _te_flowlist; /* Who flowcontrols on me */
618 } _te_clts_state;
619 } _te_transport_state;
620 #define te_nicon _te_transport_state._te_cots_state._te_nicon
621 #define te_qlen _te_transport_state._te_cots_state._te_qlen
622 #define te_oconp _te_transport_state._te_cots_state._te_oconp
623 #define te_conp _te_transport_state._te_cots_state._te_conp
624 #define te_iconp _te_transport_state._te_cots_state._te_iconp
625 #define te_lastep _te_transport_state._te_clts_state._te_lastep
626 #define te_flowq _te_transport_state._te_clts_state._te_flowq
627 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
628 #define te_flows _te_transport_state._te_clts_state._te_flows
629
630 bufcall_id_t te_bufcid; /* outstanding bufcall id */
631 timeout_id_t te_timoutid; /* outstanding timeout id */
632 pid_t te_cpid; /* cached pid of endpoint */
633 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
634 /*
635 * Pieces of the endpoint state needed for closing.
636 */
637 kmutex_t te_closelock;
638 kcondvar_t te_closecv;
639 uint8_t te_closing; /* The endpoint started closing */
640 uint8_t te_closewait; /* Wait in close until zero */
641 mblk_t te_closemp; /* for entering serializer on close */
642 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
643 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
644 kmutex_t te_srv_lock;
645 kcondvar_t te_srv_cv;
646 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
647 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
648 /*
649 * Pieces of the endpoint state needed for serializer transitions.
650 */
651 kmutex_t te_ser_lock; /* Protects the count below */
652 uint_t te_ser_count; /* Number of messages on serializer */
653 };
654
655 /*
656 * Flag values. Lower 4 bits specify that transport used.
657 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
658 * they allow to identify the endpoint more easily.
659 */
660 #define TL_LISTENER 0x00010 /* the listener endpoint */
661 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
662 #define TL_EAGER 0x00040 /* connecting endpoint */
663 #define TL_ACCEPTED 0x00080 /* accepted connection */
664 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
665 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
666 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
667 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
668 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
669 /*
670 * Boolean checks for the endpoint type.
671 */
672 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
673 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
674 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
675 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
676
677 /*
678 * Certain operations are always used together. These macros reduce the chance
679 * of missing a part of a combination.
680 */
681 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
682 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
683
684 #define TL_PUTBQ(x, mp) { \
685 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
686 (x)->te_nowsrv = B_TRUE; \
687 (void) putbq((x)->te_wq, mp); \
688 }
689
690 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
691 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
692
693 /*
694 * STREAMS driver glue data structures.
695 */
696 static struct module_info tl_minfo = {
697 TL_ID, /* mi_idnum */
698 TL_NAME, /* mi_idname */
699 TL_MINPSZ, /* mi_minpsz */
700 TL_MAXPSZ, /* mi_maxpsz */
701 TL_HIWAT, /* mi_hiwat */
702 TL_LOWAT /* mi_lowat */
703 };
704
705 static struct qinit tl_rinit = {
706 NULL, /* qi_putp */
707 (int (*)())tl_rsrv, /* qi_srvp */
708 tl_open, /* qi_qopen */
709 tl_close, /* qi_qclose */
710 NULL, /* qi_qadmin */
711 &tl_minfo, /* qi_minfo */
712 NULL /* qi_mstat */
713 };
714
715 static struct qinit tl_winit = {
716 (int (*)())tl_wput, /* qi_putp */
717 (int (*)())tl_wsrv, /* qi_srvp */
718 NULL, /* qi_qopen */
719 NULL, /* qi_qclose */
720 NULL, /* qi_qadmin */
721 &tl_minfo, /* qi_minfo */
722 NULL /* qi_mstat */
723 };
724
725 static struct streamtab tlinfo = {
726 &tl_rinit, /* st_rdinit */
727 &tl_winit, /* st_wrinit */
728 NULL, /* st_muxrinit */
729 NULL /* st_muxwrinit */
730 };
731
732 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
733 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
734
735 static struct modldrv modldrv = {
736 &mod_driverops, /* Type of module -- pseudo driver here */
737 "TPI Local Transport (tl)",
738 &tl_devops, /* driver ops */
739 };
740
741 /*
742 * Module linkage information for the kernel.
743 */
744 static struct modlinkage modlinkage = {
745 MODREV_1,
746 &modldrv,
747 NULL
748 };
749
750 /*
751 * Templates for response to info request
752 * Check sanity of unlimited connect data etc.
753 */
754
755 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
756 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
757
758 static struct T_info_ack tl_cots_info_ack =
759 {
760 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
761 T_INFINITE, /* TSDU size */
762 T_INFINITE, /* ETSDU size */
763 T_INFINITE, /* CDATA_size */
764 T_INFINITE, /* DDATA_size */
765 T_INFINITE, /* ADDR_size */
766 T_INFINITE, /* OPT_size */
767 0, /* TIDU_size - fill at run time */
768 T_COTS, /* SERV_type */
769 -1, /* CURRENT_state */
770 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
771 };
772
773 static struct T_info_ack tl_clts_info_ack =
774 {
775 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
776 0, /* TSDU_size - fill at run time */
777 -2, /* ETSDU_size -2 => not supported */
778 -2, /* CDATA_size -2 => not supported */
779 -2, /* DDATA_size -2 => not supported */
780 -1, /* ADDR_size -1 => unlimited */
781 -1, /* OPT_size */
782 0, /* TIDU_size - fill at run time */
783 T_CLTS, /* SERV_type */
784 -1, /* CURRENT_state */
785 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
786 };
787
788 /*
789 * private copy of devinfo pointer used in tl_info
790 */
791 static dev_info_t *tl_dip;
792
793 /*
794 * Endpoints cache.
795 */
796 static kmem_cache_t *tl_cache;
797 /*
798 * Minor number space.
799 */
800 static id_space_t *tl_minors;
801
802 /*
803 * Default Data Unit size.
804 */
805 static t_scalar_t tl_tidusz;
806
807 /*
808 * Size of hash tables.
809 */
810 static size_t tl_hash_size = TL_HASH_SIZE;
811
812 /*
813 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
814 * for sockets.
815 */
816 static int tl_disable_early_connect = 0;
817 static int tl_client_closing_when_accepting;
818
819 static int tl_serializer_noswitch;
820
821 /*
822 * LOCAL FUNCTION PROTOTYPES
823 * -------------------------
824 */
825 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
826 static void tl_do_proto(mblk_t *, tl_endpt_t *);
827 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
828 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
829 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
830 t_scalar_t);
831 static void tl_bind(mblk_t *, tl_endpt_t *);
832 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
833 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
834 static void tl_unbind(mblk_t *, tl_endpt_t *);
835 static void tl_optmgmt(queue_t *, mblk_t *);
836 static void tl_conn_req(queue_t *, mblk_t *);
837 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
838 static void tl_conn_res(mblk_t *, tl_endpt_t *);
839 static void tl_discon_req(mblk_t *, tl_endpt_t *);
840 static void tl_capability_req(mblk_t *, tl_endpt_t *);
841 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
842 static void tl_info_req(mblk_t *, tl_endpt_t *);
843 static void tl_addr_req(mblk_t *, tl_endpt_t *);
844 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
845 static void tl_data(mblk_t *, tl_endpt_t *);
846 static void tl_exdata(mblk_t *, tl_endpt_t *);
847 static void tl_ordrel(mblk_t *, tl_endpt_t *);
848 static void tl_unitdata(mblk_t *, tl_endpt_t *);
849 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
850 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
851 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
852 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
853 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
854 static void tl_cl_backenable(tl_endpt_t *);
855 static void tl_co_unconnect(tl_endpt_t *);
856 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
857 static void tl_discon_ind(tl_endpt_t *, uint32_t);
858 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
859 static mblk_t *tl_ordrel_ind_alloc(void);
860 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
861 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
862 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
863 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
864 static void tl_icon_freemsgs(mblk_t **);
865 static void tl_merror(queue_t *, mblk_t *, int);
866 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
867 static int tl_default_opt(queue_t *, int, int, uchar_t *);
868 static int tl_get_opt(queue_t *, int, int, uchar_t *);
869 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
870 uchar_t *, void *, cred_t *);
871 static void tl_memrecover(queue_t *, mblk_t *, size_t);
872 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
873 static void tl_free(tl_endpt_t *);
874 static int tl_constructor(void *, void *, int);
875 static void tl_destructor(void *, void *);
876 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
877 static tl_serializer_t *tl_serializer_alloc(int);
878 static void tl_serializer_refhold(tl_serializer_t *);
879 static void tl_serializer_refrele(tl_serializer_t *);
880 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
881 static void tl_serializer_exit(tl_endpt_t *);
882 static boolean_t tl_noclose(tl_endpt_t *);
883 static void tl_closeok(tl_endpt_t *);
884 static void tl_refhold(tl_endpt_t *);
885 static void tl_refrele(tl_endpt_t *);
886 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
887 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
888 static void tl_close_ser(mblk_t *, tl_endpt_t *);
889 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
890 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
891 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
892 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
893 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
895 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
896 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
897 static void tl_addr_unbind(tl_endpt_t *);
898
899 /*
900 * Intialize option database object for TL
901 */
902
903 optdb_obj_t tl_opt_obj = {
904 tl_default_opt, /* TL default value function pointer */
905 tl_get_opt, /* TL get function pointer */
906 tl_set_opt, /* TL set function pointer */
907 TL_OPT_ARR_CNT, /* TL option database count of entries */
908 tl_opt_arr, /* TL option database */
909 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
910 tl_valid_levels_arr /* TL valid level array */
911 };
912
913 /*
914 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
915 * ---------------------------------------
916 */
917
918 /*
919 * Loadable module routines
920 */
921 int
_init(void)922 _init(void)
923 {
924 return (mod_install(&modlinkage));
925 }
926
927 int
_fini(void)928 _fini(void)
929 {
930 return (mod_remove(&modlinkage));
931 }
932
933 int
_info(struct modinfo * modinfop)934 _info(struct modinfo *modinfop)
935 {
936 return (mod_info(&modlinkage, modinfop));
937 }
938
939 /*
940 * Driver Entry Points and Other routines
941 */
942 static int
tl_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)943 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
944 {
945 int i;
946 char name[32];
947
948 /*
949 * Resume from a checkpoint state.
950 */
951 if (cmd == DDI_RESUME)
952 return (DDI_SUCCESS);
953
954 if (cmd != DDI_ATTACH)
955 return (DDI_FAILURE);
956
957 /*
958 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
959 * streams message sizes can be unlimited. We use a defined constant
960 * instead.
961 */
962 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
963
964 /*
965 * Create subdevices for each transport.
966 */
967 for (i = 0; i < TL_UNUSED; i++) {
968 if (ddi_create_minor_node(devi,
969 tl_transports[i].tr_name,
970 S_IFCHR, tl_transports[i].tr_minor,
971 DDI_PSEUDO, NULL) == DDI_FAILURE) {
972 ddi_remove_minor_node(devi, NULL);
973 return (DDI_FAILURE);
974 }
975 }
976
977 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
978 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
979
980 if (tl_cache == NULL) {
981 ddi_remove_minor_node(devi, NULL);
982 return (DDI_FAILURE);
983 }
984
985 tl_minors = id_space_create("tl_minor_space",
986 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
987
988 /*
989 * Create ID space for minor numbers
990 */
991 for (i = 0; i < TL_MAXTRANSPORT; i++) {
992 tl_transport_state_t *t = &tl_transports[i];
993
994 if (i == TL_UNUSED)
995 continue;
996
997 /* Socket COTSORD shares namespace with COTS */
998 if (i == TL_SOCK_COTSORD) {
999 t->tr_ai_hash =
1000 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1001 ASSERT(t->tr_ai_hash != NULL);
1002 t->tr_addr_hash =
1003 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1004 ASSERT(t->tr_addr_hash != NULL);
1005 continue;
1006 }
1007
1008 /*
1009 * Create hash tables.
1010 */
1011 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1012 t->tr_name);
1013 #ifdef _ILP32
1014 if (i & TL_SOCKET)
1015 t->tr_ai_hash =
1016 mod_hash_create_idhash(name, tl_hash_size - 1,
1017 mod_hash_null_valdtor);
1018 else
1019 t->tr_ai_hash =
1020 mod_hash_create_ptrhash(name, tl_hash_size,
1021 mod_hash_null_valdtor, sizeof (queue_t));
1022 #else
1023 t->tr_ai_hash =
1024 mod_hash_create_idhash(name, tl_hash_size - 1,
1025 mod_hash_null_valdtor);
1026 #endif /* _ILP32 */
1027
1028 if (i & TL_SOCKET) {
1029 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1030 t->tr_name);
1031 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1032 tl_hash_size, mod_hash_null_valdtor,
1033 sizeof (uintptr_t));
1034 } else {
1035 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1036 t->tr_name);
1037 t->tr_addr_hash = mod_hash_create_extended(name,
1038 tl_hash_size, mod_hash_null_keydtor,
1039 mod_hash_null_valdtor,
1040 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1041 }
1042
1043 /* Create serializer for connectionless transports. */
1044 if (i & TL_TICLTS)
1045 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1046 }
1047
1048 tl_dip = devi;
1049
1050 return (DDI_SUCCESS);
1051 }
1052
1053 static int
tl_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)1054 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1055 {
1056 int i;
1057
1058 if (cmd == DDI_SUSPEND)
1059 return (DDI_SUCCESS);
1060
1061 if (cmd != DDI_DETACH)
1062 return (DDI_FAILURE);
1063
1064 /*
1065 * Destroy arenas and hash tables.
1066 */
1067 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1068 tl_transport_state_t *t = &tl_transports[i];
1069
1070 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1071 continue;
1072
1073 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1074 if (t->tr_serializer != NULL) {
1075 tl_serializer_refrele(t->tr_serializer);
1076 t->tr_serializer = NULL;
1077 }
1078
1079 #ifdef _ILP32
1080 if (i & TL_SOCKET)
1081 mod_hash_destroy_idhash(t->tr_ai_hash);
1082 else
1083 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1084 #else
1085 mod_hash_destroy_idhash(t->tr_ai_hash);
1086 #endif /* _ILP32 */
1087 t->tr_ai_hash = NULL;
1088 if (i & TL_SOCKET)
1089 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1090 else
1091 mod_hash_destroy_hash(t->tr_addr_hash);
1092 t->tr_addr_hash = NULL;
1093 }
1094
1095 kmem_cache_destroy(tl_cache);
1096 tl_cache = NULL;
1097 id_space_destroy(tl_minors);
1098 tl_minors = NULL;
1099 ddi_remove_minor_node(devi, NULL);
1100 return (DDI_SUCCESS);
1101 }
1102
1103 /* ARGSUSED */
1104 static int
tl_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1105 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1106 {
1107
1108 int retcode = DDI_FAILURE;
1109
1110 switch (infocmd) {
1111
1112 case DDI_INFO_DEVT2DEVINFO:
1113 if (tl_dip != NULL) {
1114 *result = (void *)tl_dip;
1115 retcode = DDI_SUCCESS;
1116 }
1117 break;
1118
1119 case DDI_INFO_DEVT2INSTANCE:
1120 *result = (void *)0;
1121 retcode = DDI_SUCCESS;
1122 break;
1123
1124 default:
1125 break;
1126 }
1127 return (retcode);
1128 }
1129
1130 /*
1131 * Endpoint reference management.
1132 */
1133 static void
tl_refhold(tl_endpt_t * tep)1134 tl_refhold(tl_endpt_t *tep)
1135 {
1136 atomic_add_32(&tep->te_refcnt, 1);
1137 }
1138
1139 static void
tl_refrele(tl_endpt_t * tep)1140 tl_refrele(tl_endpt_t *tep)
1141 {
1142 ASSERT(tep->te_refcnt != 0);
1143
1144 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1145 tl_free(tep);
1146 }
1147
1148 /*ARGSUSED*/
1149 static int
tl_constructor(void * buf,void * cdrarg,int kmflags)1150 tl_constructor(void *buf, void *cdrarg, int kmflags)
1151 {
1152 tl_endpt_t *tep = buf;
1153
1154 bzero(tep, sizeof (tl_endpt_t));
1155 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1156 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1157 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1158 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1159 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1160
1161 return (0);
1162 }
1163
1164 /*ARGSUSED*/
1165 static void
tl_destructor(void * buf,void * cdrarg)1166 tl_destructor(void *buf, void *cdrarg)
1167 {
1168 tl_endpt_t *tep = buf;
1169
1170 mutex_destroy(&tep->te_closelock);
1171 cv_destroy(&tep->te_closecv);
1172 mutex_destroy(&tep->te_srv_lock);
1173 cv_destroy(&tep->te_srv_cv);
1174 mutex_destroy(&tep->te_ser_lock);
1175 }
1176
1177 static void
tl_free(tl_endpt_t * tep)1178 tl_free(tl_endpt_t *tep)
1179 {
1180 ASSERT(tep->te_refcnt == 0);
1181 ASSERT(tep->te_transport != NULL);
1182 ASSERT(tep->te_rq == NULL);
1183 ASSERT(tep->te_wq == NULL);
1184 ASSERT(tep->te_ser != NULL);
1185 ASSERT(tep->te_ser_count == 0);
1186 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1187
1188 if (IS_SOCKET(tep)) {
1189 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1190 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1191 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1192 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1193 } else if (tep->te_abuf != NULL) {
1194 kmem_free(tep->te_abuf, tep->te_alen);
1195 tep->te_alen = -1; /* uninitialized */
1196 tep->te_abuf = NULL;
1197 } else {
1198 ASSERT(tep->te_alen == -1);
1199 }
1200
1201 id_free(tl_minors, tep->te_minor);
1202 ASSERT(tep->te_credp == NULL);
1203
1204 if (tep->te_hash_hndl != NULL)
1205 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1206
1207 if (IS_COTS(tep)) {
1208 TL_REMOVE_PEER(tep->te_conp);
1209 TL_REMOVE_PEER(tep->te_oconp);
1210 tl_serializer_refrele(tep->te_ser);
1211 tep->te_ser = NULL;
1212 ASSERT(tep->te_nicon == 0);
1213 ASSERT(list_head(&tep->te_iconp) == NULL);
1214 } else {
1215 ASSERT(tep->te_lastep == NULL);
1216 ASSERT(list_head(&tep->te_flowlist) == NULL);
1217 ASSERT(tep->te_flowq == NULL);
1218 }
1219
1220 ASSERT(tep->te_bufcid == 0);
1221 ASSERT(tep->te_timoutid == 0);
1222 bzero(&tep->te_ap, sizeof (tep->te_ap));
1223 tep->te_acceptor_id = 0;
1224
1225 ASSERT(tep->te_closewait == 0);
1226 ASSERT(!tep->te_rsrv_active);
1227 ASSERT(!tep->te_wsrv_active);
1228 tep->te_closing = 0;
1229 tep->te_nowsrv = B_FALSE;
1230 tep->te_flag = 0;
1231
1232 kmem_cache_free(tl_cache, tep);
1233 }
1234
1235 /*
1236 * Allocate/free reference-counted wrappers for serializers.
1237 */
1238 static tl_serializer_t *
tl_serializer_alloc(int flags)1239 tl_serializer_alloc(int flags)
1240 {
1241 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1242 serializer_t *ser;
1243
1244 if (s == NULL)
1245 return (NULL);
1246
1247 ser = serializer_create(flags);
1248
1249 if (ser == NULL) {
1250 kmem_free(s, sizeof (tl_serializer_t));
1251 return (NULL);
1252 }
1253
1254 s->ts_refcnt = 1;
1255 s->ts_serializer = ser;
1256 return (s);
1257 }
1258
1259 static void
tl_serializer_refhold(tl_serializer_t * s)1260 tl_serializer_refhold(tl_serializer_t *s)
1261 {
1262 atomic_add_32(&s->ts_refcnt, 1);
1263 }
1264
1265 static void
tl_serializer_refrele(tl_serializer_t * s)1266 tl_serializer_refrele(tl_serializer_t *s)
1267 {
1268 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1269 serializer_destroy(s->ts_serializer);
1270 kmem_free(s, sizeof (tl_serializer_t));
1271 }
1272 }
1273
1274 /*
1275 * Post a request on the endpoint serializer. For COTS transports keep track of
1276 * the number of pending requests.
1277 */
1278 static void
tl_serializer_enter(tl_endpt_t * tep,tlproc_t tlproc,mblk_t * mp)1279 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1280 {
1281 if (IS_COTS(tep)) {
1282 mutex_enter(&tep->te_ser_lock);
1283 tep->te_ser_count++;
1284 mutex_exit(&tep->te_ser_lock);
1285 }
1286 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1287 }
1288
1289 /*
1290 * Complete processing the request on the serializer. Decrement the counter for
1291 * pending requests for COTS transports.
1292 */
1293 static void
tl_serializer_exit(tl_endpt_t * tep)1294 tl_serializer_exit(tl_endpt_t *tep)
1295 {
1296 if (IS_COTS(tep)) {
1297 mutex_enter(&tep->te_ser_lock);
1298 ASSERT(tep->te_ser_count != 0);
1299 tep->te_ser_count--;
1300 mutex_exit(&tep->te_ser_lock);
1301 }
1302 }
1303
1304 /*
1305 * Hash management functions.
1306 */
1307
1308 /*
1309 * Return TRUE if two addresses are equal, false otherwise.
1310 */
1311 static boolean_t
tl_eqaddr(tl_addr_t * ap1,tl_addr_t * ap2)1312 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1313 {
1314 return ((ap1->ta_alen > 0) &&
1315 (ap1->ta_alen == ap2->ta_alen) &&
1316 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1317 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1318 }
1319
1320 /*
1321 * This function is called whenever an endpoint is found in the hash table.
1322 */
1323 /* ARGSUSED0 */
1324 static void
tl_find_callback(mod_hash_key_t key,mod_hash_val_t val)1325 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1326 {
1327 tl_refhold((tl_endpt_t *)val);
1328 }
1329
1330 /*
1331 * Address hash function.
1332 */
1333 /* ARGSUSED */
1334 static uint_t
tl_hash_by_addr(void * hash_data,mod_hash_key_t key)1335 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1336 {
1337 tl_addr_t *ap = (tl_addr_t *)key;
1338 size_t len = ap->ta_alen;
1339 uchar_t *p = ap->ta_abuf;
1340 uint_t i, g;
1341
1342 ASSERT((len > 0) && (p != NULL));
1343
1344 for (i = ap->ta_zoneid; len -- != 0; p++) {
1345 i = (i << 4) + (*p);
1346 if ((g = (i & 0xf0000000U)) != 0) {
1347 i ^= (g >> 24);
1348 i ^= g;
1349 }
1350 }
1351 return (i);
1352 }
1353
1354 /*
1355 * This function is used by hash lookups. It compares two generic addresses.
1356 */
1357 static int
tl_hash_cmp_addr(mod_hash_key_t key1,mod_hash_key_t key2)1358 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1359 {
1360 #ifdef DEBUG
1361 tl_addr_t *ap1 = (tl_addr_t *)key1;
1362 tl_addr_t *ap2 = (tl_addr_t *)key2;
1363
1364 ASSERT(key1 != NULL);
1365 ASSERT(key2 != NULL);
1366
1367 ASSERT(ap1->ta_abuf != NULL);
1368 ASSERT(ap2->ta_abuf != NULL);
1369 ASSERT(ap1->ta_alen > 0);
1370 ASSERT(ap2->ta_alen > 0);
1371 #endif
1372
1373 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1374 }
1375
1376 /*
1377 * Prevent endpoint from closing if possible.
1378 * Return B_TRUE on success, B_FALSE on failure.
1379 */
1380 static boolean_t
tl_noclose(tl_endpt_t * tep)1381 tl_noclose(tl_endpt_t *tep)
1382 {
1383 boolean_t rc = B_FALSE;
1384
1385 mutex_enter(&tep->te_closelock);
1386 if (! tep->te_closing) {
1387 ASSERT(tep->te_closewait == 0);
1388 tep->te_closewait++;
1389 rc = B_TRUE;
1390 }
1391 mutex_exit(&tep->te_closelock);
1392 return (rc);
1393 }
1394
1395 /*
1396 * Allow endpoint to close if needed.
1397 */
1398 static void
tl_closeok(tl_endpt_t * tep)1399 tl_closeok(tl_endpt_t *tep)
1400 {
1401 ASSERT(tep->te_closewait > 0);
1402 mutex_enter(&tep->te_closelock);
1403 ASSERT(tep->te_closewait == 1);
1404 tep->te_closewait--;
1405 cv_signal(&tep->te_closecv);
1406 mutex_exit(&tep->te_closelock);
1407 }
1408
1409 /*
1410 * STREAMS open entry point.
1411 */
1412 /* ARGSUSED */
1413 static int
tl_open(queue_t * rq,dev_t * devp,int oflag,int sflag,cred_t * credp)1414 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1415 {
1416 tl_endpt_t *tep;
1417 minor_t minor = getminor(*devp);
1418
1419 /*
1420 * Driver is called directly. Both CLONEOPEN and MODOPEN
1421 * are illegal
1422 */
1423 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1424 return (ENXIO);
1425
1426 if (rq->q_ptr != NULL)
1427 return (0);
1428
1429 /* Minor number should specify the mode used for the driver. */
1430 if ((minor >= TL_UNUSED))
1431 return (ENXIO);
1432
1433 if (oflag & SO_SOCKSTR) {
1434 minor |= TL_SOCKET;
1435 }
1436
1437 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1438 tep->te_refcnt = 1;
1439 tep->te_cpid = curproc->p_pid;
1440 rq->q_ptr = WR(rq)->q_ptr = tep;
1441 tep->te_state = TS_UNBND;
1442 tep->te_credp = credp;
1443 crhold(credp);
1444 tep->te_zoneid = getzoneid();
1445
1446 tep->te_flag = minor & TL_MINOR_MASK;
1447 tep->te_transport = &tl_transports[minor];
1448
1449 /* Allocate a unique minor number for this instance. */
1450 tep->te_minor = (minor_t)id_alloc(tl_minors);
1451
1452 /* Reserve hash handle for bind(). */
1453 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1454
1455 /* Transport-specific initialization */
1456 if (IS_COTS(tep)) {
1457 /* Use private serializer */
1458 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1459
1460 /* Create list for pending connections */
1461 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1462 offsetof(tl_icon_t, ti_node));
1463 tep->te_qlen = 0;
1464 tep->te_nicon = 0;
1465 tep->te_oconp = NULL;
1466 tep->te_conp = NULL;
1467 } else {
1468 /* Use shared serializer */
1469 tep->te_ser = tep->te_transport->tr_serializer;
1470 bzero(&tep->te_flows, sizeof (list_node_t));
1471 /* Create list for flow control */
1472 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1473 offsetof(tl_endpt_t, te_flows));
1474 tep->te_flowq = NULL;
1475 tep->te_lastep = NULL;
1476
1477 }
1478
1479 /* Initialize endpoint address */
1480 if (IS_SOCKET(tep)) {
1481 /* Socket-specific address handling. */
1482 tep->te_alen = TL_SOUX_ADDRLEN;
1483 tep->te_abuf = &tep->te_uxaddr;
1484 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1485 tep->te_magic = SOU_MAGIC_IMPLICIT;
1486 } else {
1487 tep->te_alen = -1;
1488 tep->te_abuf = NULL;
1489 }
1490
1491 /* clone the driver */
1492 *devp = makedevice(getmajor(*devp), tep->te_minor);
1493
1494 tep->te_rq = rq;
1495 tep->te_wq = WR(rq);
1496
1497 #ifdef _ILP32
1498 if (IS_SOCKET(tep))
1499 tep->te_acceptor_id = tep->te_minor;
1500 else
1501 tep->te_acceptor_id = (t_uscalar_t)rq;
1502 #else
1503 tep->te_acceptor_id = tep->te_minor;
1504 #endif /* _ILP32 */
1505
1506
1507 qprocson(rq);
1508
1509 /*
1510 * Insert acceptor ID in the hash. The AI hash always sleeps on
1511 * insertion so insertion can't fail.
1512 */
1513 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1514 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1515 (mod_hash_val_t)tep);
1516
1517 return (0);
1518 }
1519
1520 /* ARGSUSED1 */
1521 static int
tl_close(queue_t * rq,int flag,cred_t * credp)1522 tl_close(queue_t *rq, int flag, cred_t *credp)
1523 {
1524 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1525 tl_endpt_t *elp = NULL;
1526 queue_t *wq = tep->te_wq;
1527 int rc;
1528
1529 ASSERT(wq == WR(rq));
1530
1531 /*
1532 * Remove the endpoint from acceptor hash.
1533 */
1534 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1535 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1536 (mod_hash_val_t *)&elp);
1537 ASSERT(rc == 0 && tep == elp);
1538 if ((rc != 0) || (tep != elp)) {
1539 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1540 SL_TRACE|SL_ERROR,
1541 "tl_close:inconsistency in AI hash"));
1542 }
1543
1544 /*
1545 * Wait till close is safe, then mark endpoint as closing.
1546 */
1547 mutex_enter(&tep->te_closelock);
1548 while (tep->te_closewait)
1549 cv_wait(&tep->te_closecv, &tep->te_closelock);
1550 tep->te_closing = B_TRUE;
1551 /*
1552 * Will wait for the serializer part of the close to finish, so set
1553 * te_closewait now.
1554 */
1555 tep->te_closewait = 1;
1556 tep->te_nowsrv = B_FALSE;
1557 mutex_exit(&tep->te_closelock);
1558
1559 /*
1560 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1561 * It is safe because close will wait for tl_close_ser to finish.
1562 */
1563 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1564
1565 /*
1566 * Wait for the first phase of close to complete before qprocsoff().
1567 */
1568 mutex_enter(&tep->te_closelock);
1569 while (tep->te_closewait)
1570 cv_wait(&tep->te_closecv, &tep->te_closelock);
1571 mutex_exit(&tep->te_closelock);
1572
1573 qprocsoff(rq);
1574
1575 if (tep->te_bufcid) {
1576 qunbufcall(rq, tep->te_bufcid);
1577 tep->te_bufcid = 0;
1578 }
1579 if (tep->te_timoutid) {
1580 (void) quntimeout(rq, tep->te_timoutid);
1581 tep->te_timoutid = 0;
1582 }
1583
1584 /*
1585 * Finish close behind serializer.
1586 *
1587 * For a CLTS endpoint increase a refcount and continue close processing
1588 * with serializer protection. This processing may happen asynchronously
1589 * with the completion of tl_close().
1590 *
1591 * Fot a COTS endpoint wait before destroying tep since the serializer
1592 * may go away together with tep and we need to destroy serializer
1593 * outside of serializer context.
1594 */
1595 ASSERT(tep->te_closewait == 0);
1596 if (IS_COTS(tep))
1597 tep->te_closewait = 1;
1598 else
1599 tl_refhold(tep);
1600
1601 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1602
1603 /*
1604 * For connection-oriented transports wait for all serializer activity
1605 * to settle down.
1606 */
1607 if (IS_COTS(tep)) {
1608 mutex_enter(&tep->te_closelock);
1609 while (tep->te_closewait)
1610 cv_wait(&tep->te_closecv, &tep->te_closelock);
1611 mutex_exit(&tep->te_closelock);
1612 }
1613
1614 crfree(tep->te_credp);
1615 tep->te_credp = NULL;
1616 tep->te_wq = NULL;
1617 tl_refrele(tep);
1618 /*
1619 * tep is likely to be destroyed now, so can't reference it any more.
1620 */
1621
1622 rq->q_ptr = wq->q_ptr = NULL;
1623 return (0);
1624 }
1625
1626 /*
1627 * First phase of close processing done behind the serializer.
1628 *
1629 * Do not drop the reference in the end - tl_close() wants this reference to
1630 * stay.
1631 */
1632 /* ARGSUSED0 */
1633 static void
tl_close_ser(mblk_t * mp,tl_endpt_t * tep)1634 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1635 {
1636 ASSERT(tep->te_closing);
1637 ASSERT(tep->te_closewait == 1);
1638 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1639
1640 tep->te_flag |= TL_CLOSE_SER;
1641
1642 /*
1643 * Drain out all messages on queue except for TL_TICOTS where the
1644 * abortive release semantics permit discarding of data on close
1645 */
1646 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1647 tl_wsrv_ser(NULL, tep);
1648 }
1649
1650 /* Remove address from hash table. */
1651 tl_addr_unbind(tep);
1652 /*
1653 * qprocsoff() gets confused when q->q_next is not NULL on the write
1654 * queue of the driver, so clear these before qprocsoff() is called.
1655 * Also clear q_next for the peer since this queue is going away.
1656 */
1657 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1658 tl_endpt_t *peer_tep = tep->te_conp;
1659
1660 tep->te_wq->q_next = NULL;
1661 if ((peer_tep != NULL) && !peer_tep->te_closing)
1662 peer_tep->te_wq->q_next = NULL;
1663 }
1664
1665 tep->te_rq = NULL;
1666
1667 /* wake up tl_close() */
1668 tl_closeok(tep);
1669 tl_serializer_exit(tep);
1670 }
1671
1672 /*
1673 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1674 * the reference for CLTS.
1675 *
1676 * Called from serializer. Should drop reference count for CLTS only.
1677 */
1678 /* ARGSUSED0 */
1679 static void
tl_close_finish_ser(mblk_t * mp,tl_endpt_t * tep)1680 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1681 {
1682 ASSERT(tep->te_closing);
1683 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1684 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1685
1686 tep->te_state = -1; /* Uninitialized */
1687 if (IS_COTS(tep)) {
1688 tl_co_unconnect(tep);
1689 } else {
1690 /* Connectionless specific cleanup */
1691 TL_REMOVE_PEER(tep->te_lastep);
1692 /*
1693 * Backenable anybody that is flow controlled waiting for
1694 * this endpoint.
1695 */
1696 tl_cl_backenable(tep);
1697 if (tep->te_flowq != NULL) {
1698 list_remove(&(tep->te_flowq->te_flowlist), tep);
1699 tep->te_flowq = NULL;
1700 }
1701 }
1702
1703 tl_serializer_exit(tep);
1704 if (IS_COTS(tep))
1705 tl_closeok(tep);
1706 else
1707 tl_refrele(tep);
1708 }
1709
1710 /*
1711 * STREAMS write-side put procedure.
1712 * Enter serializer for most of the processing.
1713 *
1714 * The T_CONN_REQ is processed outside of serializer.
1715 */
1716 static void
tl_wput(queue_t * wq,mblk_t * mp)1717 tl_wput(queue_t *wq, mblk_t *mp)
1718 {
1719 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1720 ssize_t msz = MBLKL(mp);
1721 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1722 tlproc_t *tl_proc = NULL;
1723
1724 switch (DB_TYPE(mp)) {
1725 case M_DATA:
1726 /* Only valid for connection-oriented transports */
1727 if (IS_CLTS(tep)) {
1728 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1729 SL_TRACE|SL_ERROR,
1730 "tl_wput:M_DATA invalid for ticlts driver"));
1731 tl_merror(wq, mp, EPROTO);
1732 return;
1733 }
1734 tl_proc = tl_wput_data_ser;
1735 break;
1736
1737 case M_IOCTL:
1738 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1739 case TL_IOC_CREDOPT:
1740 /* FALLTHROUGH */
1741 case TL_IOC_UCREDOPT:
1742 /*
1743 * Serialize endpoint state change.
1744 */
1745 tl_proc = tl_do_ioctl_ser;
1746 break;
1747
1748 default:
1749 miocnak(wq, mp, 0, EINVAL);
1750 return;
1751 }
1752 break;
1753
1754 case M_FLUSH:
1755 /*
1756 * do canonical M_FLUSH processing
1757 */
1758 if (*mp->b_rptr & FLUSHW) {
1759 flushq(wq, FLUSHALL);
1760 *mp->b_rptr &= ~FLUSHW;
1761 }
1762 if (*mp->b_rptr & FLUSHR) {
1763 flushq(RD(wq), FLUSHALL);
1764 qreply(wq, mp);
1765 } else {
1766 freemsg(mp);
1767 }
1768 return;
1769
1770 case M_PROTO:
1771 if (msz < sizeof (prim->type)) {
1772 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1773 SL_TRACE|SL_ERROR,
1774 "tl_wput:M_PROTO data too short"));
1775 tl_merror(wq, mp, EPROTO);
1776 return;
1777 }
1778 switch (prim->type) {
1779 case T_OPTMGMT_REQ:
1780 case T_SVR4_OPTMGMT_REQ:
1781 /*
1782 * Process TPI option management requests immediately
1783 * in put procedure regardless of in-order processing
1784 * of already queued messages.
1785 * (Note: This driver supports AF_UNIX socket
1786 * implementation. Unless we implement this processing,
1787 * setsockopt() on socket endpoint will block on flow
1788 * controlled endpoints which it should not. That is
1789 * required for successful execution of VSU socket tests
1790 * and is consistent with BSD socket behavior).
1791 */
1792 tl_optmgmt(wq, mp);
1793 return;
1794 case O_T_BIND_REQ:
1795 case T_BIND_REQ:
1796 tl_proc = tl_bind_ser;
1797 break;
1798 case T_CONN_REQ:
1799 if (IS_CLTS(tep)) {
1800 tl_merror(wq, mp, EPROTO);
1801 return;
1802 }
1803 tl_conn_req(wq, mp);
1804 return;
1805 case T_DATA_REQ:
1806 case T_OPTDATA_REQ:
1807 case T_EXDATA_REQ:
1808 case T_ORDREL_REQ:
1809 tl_proc = tl_putq_ser;
1810 break;
1811 case T_UNITDATA_REQ:
1812 if (IS_COTS(tep) ||
1813 (msz < sizeof (struct T_unitdata_req))) {
1814 tl_merror(wq, mp, EPROTO);
1815 return;
1816 }
1817 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1818 tl_proc = tl_unitdata_ser;
1819 } else {
1820 tl_proc = tl_putq_ser;
1821 }
1822 break;
1823 default:
1824 /*
1825 * process in service procedure if message already
1826 * queued (maintain in-order processing)
1827 */
1828 if (wq->q_first != NULL) {
1829 tl_proc = tl_putq_ser;
1830 } else {
1831 tl_proc = tl_wput_ser;
1832 }
1833 break;
1834 }
1835 break;
1836
1837 case M_PCPROTO:
1838 /*
1839 * Check that the message has enough data to figure out TPI
1840 * primitive.
1841 */
1842 if (msz < sizeof (prim->type)) {
1843 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1844 SL_TRACE|SL_ERROR,
1845 "tl_wput:M_PCROTO data too short"));
1846 tl_merror(wq, mp, EPROTO);
1847 return;
1848 }
1849 switch (prim->type) {
1850 case T_CAPABILITY_REQ:
1851 tl_capability_req(mp, tep);
1852 return;
1853 case T_INFO_REQ:
1854 tl_proc = tl_info_req_ser;
1855 break;
1856 default:
1857 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1858 SL_TRACE|SL_ERROR,
1859 "tl_wput:unknown TPI msg primitive"));
1860 tl_merror(wq, mp, EPROTO);
1861 return;
1862 }
1863 break;
1864 default:
1865 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1866 "tl_wput:default:unexpected Streams message"));
1867 freemsg(mp);
1868 return;
1869 }
1870
1871 /*
1872 * Continue processing via serializer.
1873 */
1874 ASSERT(tl_proc != NULL);
1875 tl_refhold(tep);
1876 tl_serializer_enter(tep, tl_proc, mp);
1877 }
1878
1879 /*
1880 * Place message on the queue while preserving order.
1881 */
1882 static void
tl_putq_ser(mblk_t * mp,tl_endpt_t * tep)1883 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1884 {
1885 if (tep->te_closing) {
1886 tl_wput_ser(mp, tep);
1887 } else {
1888 TL_PUTQ(tep, mp);
1889 tl_serializer_exit(tep);
1890 tl_refrele(tep);
1891 }
1892
1893 }
1894
1895 static void
tl_wput_common_ser(mblk_t * mp,tl_endpt_t * tep)1896 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1897 {
1898 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1899
1900 switch (DB_TYPE(mp)) {
1901 case M_DATA:
1902 tl_data(mp, tep);
1903 break;
1904 case M_PROTO:
1905 tl_do_proto(mp, tep);
1906 break;
1907 default:
1908 freemsg(mp);
1909 break;
1910 }
1911 }
1912
1913 /*
1914 * Write side put procedure called from serializer.
1915 */
1916 static void
tl_wput_ser(mblk_t * mp,tl_endpt_t * tep)1917 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1918 {
1919 tl_wput_common_ser(mp, tep);
1920 tl_serializer_exit(tep);
1921 tl_refrele(tep);
1922 }
1923
1924 /*
1925 * M_DATA processing. Called from serializer.
1926 */
1927 static void
tl_wput_data_ser(mblk_t * mp,tl_endpt_t * tep)1928 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1929 {
1930 tl_endpt_t *peer_tep = tep->te_conp;
1931 queue_t *peer_rq;
1932
1933 ASSERT(DB_TYPE(mp) == M_DATA);
1934 ASSERT(IS_COTS(tep));
1935
1936 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1937
1938 /*
1939 * fastpath for data. Ignore flow control if tep is closing.
1940 */
1941 if ((peer_tep != NULL) &&
1942 !peer_tep->te_closing &&
1943 ((tep->te_state == TS_DATA_XFER) ||
1944 (tep->te_state == TS_WREQ_ORDREL)) &&
1945 (tep->te_wq != NULL) &&
1946 (tep->te_wq->q_first == NULL) &&
1947 ((peer_tep->te_state == TS_DATA_XFER) ||
1948 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1949 ((peer_rq = peer_tep->te_rq) != NULL) &&
1950 (canputnext(peer_rq) || tep->te_closing)) {
1951 putnext(peer_rq, mp);
1952 } else if (tep->te_closing) {
1953 /*
1954 * It is possible that by the time we got here tep started to
1955 * close. If the write queue is not empty, and the state is
1956 * TS_DATA_XFER the data should be delivered in order, so we
1957 * call putq() instead of freeing the data.
1958 */
1959 if ((tep->te_wq != NULL) &&
1960 ((tep->te_state == TS_DATA_XFER) ||
1961 (tep->te_state == TS_WREQ_ORDREL))) {
1962 TL_PUTQ(tep, mp);
1963 } else {
1964 freemsg(mp);
1965 }
1966 } else {
1967 TL_PUTQ(tep, mp);
1968 }
1969
1970 tl_serializer_exit(tep);
1971 tl_refrele(tep);
1972 }
1973
1974 /*
1975 * Write side service routine.
1976 *
1977 * All actual processing happens within serializer which is entered
1978 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1979 * messages that need processing may have arrived, so tl_wsrv repeats until
1980 * queue is empty or te_nowsrv is set.
1981 */
1982 static void
tl_wsrv(queue_t * wq)1983 tl_wsrv(queue_t *wq)
1984 {
1985 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1986
1987 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1988 mutex_enter(&tep->te_srv_lock);
1989 ASSERT(tep->te_wsrv_active == B_FALSE);
1990 tep->te_wsrv_active = B_TRUE;
1991 mutex_exit(&tep->te_srv_lock);
1992
1993 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
1994
1995 /*
1996 * Wait for serializer job to complete.
1997 */
1998 mutex_enter(&tep->te_srv_lock);
1999 while (tep->te_wsrv_active) {
2000 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2001 }
2002 cv_signal(&tep->te_srv_cv);
2003 mutex_exit(&tep->te_srv_lock);
2004 }
2005 }
2006
2007 /*
2008 * Serialized write side processing of the STREAMS queue.
2009 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2010 * is NULL.
2011 */
2012 static void
tl_wsrv_ser(mblk_t * ser_mp,tl_endpt_t * tep)2013 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2014 {
2015 mblk_t *mp;
2016 queue_t *wq = tep->te_wq;
2017
2018 ASSERT(wq != NULL);
2019 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2020 tl_wput_common_ser(mp, tep);
2021 }
2022
2023 /*
2024 * Wakeup service routine unless called from close.
2025 * If ser_mp is specified, the caller is tl_wsrv().
2026 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2027 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2028 * be no matching tl_serializer_exit() in this case.
2029 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2030 * waiting on te_srv_cv.
2031 */
2032 if (ser_mp != NULL) {
2033 /*
2034 * We are called from tl_wsrv.
2035 */
2036 mutex_enter(&tep->te_srv_lock);
2037 ASSERT(tep->te_wsrv_active);
2038 tep->te_wsrv_active = B_FALSE;
2039 cv_signal(&tep->te_srv_cv);
2040 mutex_exit(&tep->te_srv_lock);
2041 tl_serializer_exit(tep);
2042 }
2043 }
2044
2045 /*
2046 * Called when the stream is backenabled. Enter serializer and qenable everyone
2047 * flow controlled by tep.
2048 *
2049 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2050 * is possible that two instances of tl_rsrv will be running reusing the same
2051 * rsrv mblk.
2052 */
2053 static void
tl_rsrv(queue_t * rq)2054 tl_rsrv(queue_t *rq)
2055 {
2056 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2057
2058 ASSERT(rq->q_first == NULL);
2059 ASSERT(tep->te_rsrv_active == 0);
2060
2061 tep->te_rsrv_active = B_TRUE;
2062 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2063 /*
2064 * Wait for serializer job to complete.
2065 */
2066 mutex_enter(&tep->te_srv_lock);
2067 while (tep->te_rsrv_active) {
2068 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2069 }
2070 cv_signal(&tep->te_srv_cv);
2071 mutex_exit(&tep->te_srv_lock);
2072 }
2073
2074 /* ARGSUSED */
2075 static void
tl_rsrv_ser(mblk_t * mp,tl_endpt_t * tep)2076 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2077 {
2078 tl_endpt_t *peer_tep;
2079
2080 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2081 tl_cl_backenable(tep);
2082 } else if (
2083 IS_COTS(tep) &&
2084 ((peer_tep = tep->te_conp) != NULL) &&
2085 !peer_tep->te_closing &&
2086 ((tep->te_state == TS_DATA_XFER) ||
2087 (tep->te_state == TS_WIND_ORDREL)||
2088 (tep->te_state == TS_WREQ_ORDREL))) {
2089 TL_QENABLE(peer_tep);
2090 }
2091
2092 /*
2093 * Wakeup read side service routine.
2094 */
2095 mutex_enter(&tep->te_srv_lock);
2096 ASSERT(tep->te_rsrv_active);
2097 tep->te_rsrv_active = B_FALSE;
2098 cv_signal(&tep->te_srv_cv);
2099 mutex_exit(&tep->te_srv_lock);
2100 tl_serializer_exit(tep);
2101 }
2102
2103 /*
2104 * process M_PROTO messages. Always called from serializer.
2105 */
2106 static void
tl_do_proto(mblk_t * mp,tl_endpt_t * tep)2107 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2108 {
2109 ssize_t msz = MBLKL(mp);
2110 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2111
2112 /* Message size was validated by tl_wput(). */
2113 ASSERT(msz >= sizeof (prim->type));
2114
2115 switch (prim->type) {
2116 case T_UNBIND_REQ:
2117 tl_unbind(mp, tep);
2118 break;
2119
2120 case T_ADDR_REQ:
2121 tl_addr_req(mp, tep);
2122 break;
2123
2124 case O_T_CONN_RES:
2125 case T_CONN_RES:
2126 if (IS_CLTS(tep)) {
2127 tl_merror(tep->te_wq, mp, EPROTO);
2128 break;
2129 }
2130 tl_conn_res(mp, tep);
2131 break;
2132
2133 case T_DISCON_REQ:
2134 if (IS_CLTS(tep)) {
2135 tl_merror(tep->te_wq, mp, EPROTO);
2136 break;
2137 }
2138 tl_discon_req(mp, tep);
2139 break;
2140
2141 case T_DATA_REQ:
2142 if (IS_CLTS(tep)) {
2143 tl_merror(tep->te_wq, mp, EPROTO);
2144 break;
2145 }
2146 tl_data(mp, tep);
2147 break;
2148
2149 case T_OPTDATA_REQ:
2150 if (IS_CLTS(tep)) {
2151 tl_merror(tep->te_wq, mp, EPROTO);
2152 break;
2153 }
2154 tl_data(mp, tep);
2155 break;
2156
2157 case T_EXDATA_REQ:
2158 if (IS_CLTS(tep)) {
2159 tl_merror(tep->te_wq, mp, EPROTO);
2160 break;
2161 }
2162 tl_exdata(mp, tep);
2163 break;
2164
2165 case T_ORDREL_REQ:
2166 if (! IS_COTSORD(tep)) {
2167 tl_merror(tep->te_wq, mp, EPROTO);
2168 break;
2169 }
2170 tl_ordrel(mp, tep);
2171 break;
2172
2173 case T_UNITDATA_REQ:
2174 if (IS_COTS(tep)) {
2175 tl_merror(tep->te_wq, mp, EPROTO);
2176 break;
2177 }
2178 tl_unitdata(mp, tep);
2179 break;
2180
2181 default:
2182 tl_merror(tep->te_wq, mp, EPROTO);
2183 break;
2184 }
2185 }
2186
2187 /*
2188 * Process ioctl from serializer.
2189 * This is a wrapper around tl_do_ioctl().
2190 */
2191 static void
tl_do_ioctl_ser(mblk_t * mp,tl_endpt_t * tep)2192 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2193 {
2194 if (! tep->te_closing)
2195 tl_do_ioctl(mp, tep);
2196 else
2197 freemsg(mp);
2198
2199 tl_serializer_exit(tep);
2200 tl_refrele(tep);
2201 }
2202
2203 static void
tl_do_ioctl(mblk_t * mp,tl_endpt_t * tep)2204 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2205 {
2206 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2207 int cmd = iocbp->ioc_cmd;
2208 queue_t *wq = tep->te_wq;
2209 int error;
2210 int thisopt, otheropt;
2211
2212 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2213
2214 switch (cmd) {
2215 case TL_IOC_CREDOPT:
2216 if (cmd == TL_IOC_CREDOPT) {
2217 thisopt = TL_SETCRED;
2218 otheropt = TL_SETUCRED;
2219 } else {
2220 /* FALLTHROUGH */
2221 case TL_IOC_UCREDOPT:
2222 thisopt = TL_SETUCRED;
2223 otheropt = TL_SETCRED;
2224 }
2225 /*
2226 * The credentials passing does not apply to sockets.
2227 * Only one of the cred options can be set at a given time.
2228 */
2229 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2230 miocnak(wq, mp, 0, EINVAL);
2231 return;
2232 }
2233
2234 /*
2235 * Turn on generation of credential options for
2236 * T_conn_req, T_conn_con, T_unidata_ind.
2237 */
2238 error = miocpullup(mp, sizeof (uint32_t));
2239 if (error != 0) {
2240 miocnak(wq, mp, 0, error);
2241 return;
2242 }
2243 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2244 miocnak(wq, mp, 0, EINVAL);
2245 return;
2246 }
2247
2248 if (*(uint32_t *)mp->b_cont->b_rptr)
2249 tep->te_flag |= thisopt;
2250 else
2251 tep->te_flag &= ~thisopt;
2252
2253 miocack(wq, mp, 0, 0);
2254 break;
2255
2256 default:
2257 /* Should not be here */
2258 miocnak(wq, mp, 0, EINVAL);
2259 break;
2260 }
2261 }
2262
2263
2264 /*
2265 * send T_ERROR_ACK
2266 * Note: assumes enough memory or caller passed big enough mp
2267 * - no recovery from allocb failures
2268 */
2269
2270 static void
tl_error_ack(queue_t * wq,mblk_t * mp,t_scalar_t tli_err,t_scalar_t unix_err,t_scalar_t type)2271 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2272 t_scalar_t unix_err, t_scalar_t type)
2273 {
2274 struct T_error_ack *err_ack;
2275 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2276 M_PCPROTO, T_ERROR_ACK);
2277
2278 if (ackmp == NULL) {
2279 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2280 "tl_error_ack:out of mblk memory"));
2281 tl_merror(wq, NULL, ENOSR);
2282 return;
2283 }
2284 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2285 err_ack->ERROR_prim = type;
2286 err_ack->TLI_error = tli_err;
2287 err_ack->UNIX_error = unix_err;
2288
2289 /*
2290 * send error ack message
2291 */
2292 qreply(wq, ackmp);
2293 }
2294
2295
2296
2297 /*
2298 * send T_OK_ACK
2299 * Note: assumes enough memory or caller passed big enough mp
2300 * - no recovery from allocb failures
2301 */
2302 static void
tl_ok_ack(queue_t * wq,mblk_t * mp,t_scalar_t type)2303 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2304 {
2305 struct T_ok_ack *ok_ack;
2306 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2307 M_PCPROTO, T_OK_ACK);
2308
2309 if (ackmp == NULL) {
2310 tl_merror(wq, NULL, ENOMEM);
2311 return;
2312 }
2313
2314 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2315 ok_ack->CORRECT_prim = type;
2316
2317 (void) qreply(wq, ackmp);
2318 }
2319
2320 /*
2321 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2322 * This is a wrapper around tl_bind().
2323 */
2324 static void
tl_bind_ser(mblk_t * mp,tl_endpt_t * tep)2325 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2326 {
2327 if (! tep->te_closing)
2328 tl_bind(mp, tep);
2329 else
2330 freemsg(mp);
2331
2332 tl_serializer_exit(tep);
2333 tl_refrele(tep);
2334 }
2335
2336 /*
2337 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2338 * Assumes that the endpoint is in the unbound.
2339 */
2340 static void
tl_bind(mblk_t * mp,tl_endpt_t * tep)2341 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2342 {
2343 queue_t *wq = tep->te_wq;
2344 struct T_bind_ack *b_ack;
2345 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2346 mblk_t *ackmp, *bamp;
2347 soux_addr_t ux_addr;
2348 t_uscalar_t qlen = 0;
2349 t_scalar_t alen, aoff;
2350 tl_addr_t addr_req;
2351 void *addr_startp;
2352 ssize_t msz = MBLKL(mp), basize;
2353 t_scalar_t tli_err = 0, unix_err = 0;
2354 t_scalar_t save_prim_type = bind->PRIM_type;
2355 t_scalar_t save_state = tep->te_state;
2356
2357 if (tep->te_state != TS_UNBND) {
2358 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2359 SL_TRACE|SL_ERROR,
2360 "tl_wput:bind_request:out of state, state=%d",
2361 tep->te_state));
2362 tli_err = TOUTSTATE;
2363 goto error;
2364 }
2365
2366 if (msz < sizeof (struct T_bind_req)) {
2367 tli_err = TSYSERR; unix_err = EINVAL;
2368 goto error;
2369 }
2370
2371 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2372
2373 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2374 (bind->PRIM_type == T_BIND_REQ));
2375
2376 alen = bind->ADDR_length;
2377 aoff = bind->ADDR_offset;
2378
2379 /* negotiate max conn req pending */
2380 if (IS_COTS(tep)) {
2381 qlen = bind->CONIND_number;
2382 if (qlen > tl_maxqlen)
2383 qlen = tl_maxqlen;
2384 }
2385
2386 /*
2387 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2388 * and bound again.
2389 */
2390 if ((tep->te_hash_hndl == NULL) &&
2391 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2392 mod_hash_reserve_nosleep(tep->te_addrhash,
2393 &tep->te_hash_hndl) != 0) {
2394 tli_err = TSYSERR; unix_err = ENOSR;
2395 goto error;
2396 }
2397
2398 /*
2399 * Verify address correctness.
2400 */
2401 if (IS_SOCKET(tep)) {
2402 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2403
2404 if ((alen != TL_SOUX_ADDRLEN) ||
2405 (aoff < 0) ||
2406 (aoff + alen > msz)) {
2407 (void) (STRLOG(TL_ID, tep->te_minor,
2408 1, SL_TRACE|SL_ERROR,
2409 "tl_bind: invalid socket addr"));
2410 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2411 tli_err = TSYSERR; unix_err = EINVAL;
2412 goto error;
2413 }
2414 /* Copy address from message to local buffer. */
2415 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2416 /*
2417 * Check that we got correct address from sockets
2418 */
2419 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2420 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2421 (void) (STRLOG(TL_ID, tep->te_minor,
2422 1, SL_TRACE|SL_ERROR,
2423 "tl_bind: invalid socket magic"));
2424 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2425 tli_err = TSYSERR; unix_err = EINVAL;
2426 goto error;
2427 }
2428 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2429 (ux_addr.soua_vp != NULL)) {
2430 (void) (STRLOG(TL_ID, tep->te_minor,
2431 1, SL_TRACE|SL_ERROR,
2432 "tl_bind: implicit addr non-empty"));
2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2434 tli_err = TSYSERR; unix_err = EINVAL;
2435 goto error;
2436 }
2437 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2438 (ux_addr.soua_vp == NULL)) {
2439 (void) (STRLOG(TL_ID, tep->te_minor,
2440 1, SL_TRACE|SL_ERROR,
2441 "tl_bind: explicit addr empty"));
2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2443 tli_err = TSYSERR; unix_err = EINVAL;
2444 goto error;
2445 }
2446 } else {
2447 if ((alen > 0) && ((aoff < 0) ||
2448 ((ssize_t)(aoff + alen) > msz) ||
2449 ((aoff + alen) < 0))) {
2450 (void) (STRLOG(TL_ID, tep->te_minor,
2451 1, SL_TRACE|SL_ERROR,
2452 "tl_bind: invalid message"));
2453 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2454 tli_err = TSYSERR; unix_err = EINVAL;
2455 goto error;
2456 }
2457 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2458 (void) (STRLOG(TL_ID, tep->te_minor,
2459 1, SL_TRACE|SL_ERROR,
2460 "tl_bind: bad addr in message"));
2461 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462 tli_err = TBADADDR;
2463 goto error;
2464 }
2465 #ifdef DEBUG
2466 /*
2467 * Mild form of ASSERT()ion to detect broken TPI apps.
2468 * if (! assertion)
2469 * log warning;
2470 */
2471 if (! ((alen == 0 && aoff == 0) ||
2472 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2473 (void) (STRLOG(TL_ID, tep->te_minor,
2474 3, SL_TRACE|SL_ERROR,
2475 "tl_bind: addr overlaps TPI message"));
2476 }
2477 #endif
2478 }
2479
2480 /*
2481 * Bind the address provided or allocate one if requested.
2482 * Allow rebinds with a new qlen value.
2483 */
2484 if (IS_SOCKET(tep)) {
2485 /*
2486 * For anonymous requests the te_ap is already set up properly
2487 * so use minor number as an address.
2488 * For explicit requests need to check whether the address is
2489 * already in use.
2490 */
2491 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2492 int rc;
2493
2494 if (tep->te_flag & TL_ADDRHASHED) {
2495 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2496 if (tep->te_vp == ux_addr.soua_vp)
2497 goto skip_addr_bind;
2498 else /* Rebind to a new address. */
2499 tl_addr_unbind(tep);
2500 }
2501 /*
2502 * Insert address in the hash if it is not already
2503 * there. Since we use preallocated handle, the insert
2504 * can fail only if the key is already present.
2505 */
2506 rc = mod_hash_insert_reserve(tep->te_addrhash,
2507 (mod_hash_key_t)ux_addr.soua_vp,
2508 (mod_hash_val_t)tep, tep->te_hash_hndl);
2509
2510 if (rc != 0) {
2511 ASSERT(rc == MH_ERR_DUPLICATE);
2512 /*
2513 * Violate O_T_BIND_REQ semantics and fail with
2514 * TADDRBUSY - sockets will not use any address
2515 * other than supplied one for explicit binds.
2516 */
2517 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2518 SL_TRACE|SL_ERROR,
2519 "tl_bind:requested addr %p is busy",
2520 ux_addr.soua_vp));
2521 tli_err = TADDRBUSY; unix_err = 0;
2522 goto error;
2523 }
2524 tep->te_uxaddr = ux_addr;
2525 tep->te_flag |= TL_ADDRHASHED;
2526 tep->te_hash_hndl = NULL;
2527 }
2528 } else if (alen == 0) {
2529 /*
2530 * assign any free address
2531 */
2532 if (! tl_get_any_addr(tep, NULL)) {
2533 (void) (STRLOG(TL_ID, tep->te_minor,
2534 1, SL_TRACE|SL_ERROR,
2535 "tl_bind:failed to get buffer for any "
2536 "address"));
2537 tli_err = TSYSERR; unix_err = ENOSR;
2538 goto error;
2539 }
2540 } else {
2541 addr_req.ta_alen = alen;
2542 addr_req.ta_abuf = (mp->b_rptr + aoff);
2543 addr_req.ta_zoneid = tep->te_zoneid;
2544
2545 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2546 if (tep->te_abuf == NULL) {
2547 tli_err = TSYSERR; unix_err = ENOSR;
2548 goto error;
2549 }
2550 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2551 tep->te_alen = alen;
2552
2553 if (mod_hash_insert_reserve(tep->te_addrhash,
2554 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2555 tep->te_hash_hndl) != 0) {
2556 if (save_prim_type == T_BIND_REQ) {
2557 /*
2558 * The bind semantics for this primitive
2559 * require a failure if the exact address
2560 * requested is busy
2561 */
2562 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2563 SL_TRACE|SL_ERROR,
2564 "tl_bind:requested addr is busy"));
2565 tli_err = TADDRBUSY; unix_err = 0;
2566 goto error;
2567 }
2568
2569 /*
2570 * O_T_BIND_REQ semantics say if address if requested
2571 * address is busy, bind to any available free address
2572 */
2573 if (! tl_get_any_addr(tep, &addr_req)) {
2574 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2575 SL_TRACE|SL_ERROR,
2576 "tl_bind:unable to get any addr buf"));
2577 tli_err = TSYSERR; unix_err = ENOMEM;
2578 goto error;
2579 }
2580 } else {
2581 tep->te_flag |= TL_ADDRHASHED;
2582 tep->te_hash_hndl = NULL;
2583 }
2584 }
2585
2586 ASSERT(tep->te_alen >= 0);
2587
2588 skip_addr_bind:
2589 /*
2590 * prepare T_BIND_ACK TPI message
2591 */
2592 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2593 bamp = reallocb(mp, basize, 0);
2594 if (bamp == NULL) {
2595 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2596 "tl_wput:tl_bind: allocb failed"));
2597 /*
2598 * roll back state changes
2599 */
2600 tl_addr_unbind(tep);
2601 tep->te_state = TS_UNBND;
2602 tl_memrecover(wq, mp, basize);
2603 return;
2604 }
2605
2606 DB_TYPE(bamp) = M_PCPROTO;
2607 bamp->b_wptr = bamp->b_rptr + basize;
2608 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2609 b_ack->PRIM_type = T_BIND_ACK;
2610 b_ack->CONIND_number = qlen;
2611 b_ack->ADDR_length = tep->te_alen;
2612 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2613 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2614 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2615
2616 if (IS_COTS(tep)) {
2617 tep->te_qlen = qlen;
2618 if (qlen > 0)
2619 tep->te_flag |= TL_LISTENER;
2620 }
2621
2622 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2623 /*
2624 * send T_BIND_ACK message
2625 */
2626 (void) qreply(wq, bamp);
2627 return;
2628
2629 error:
2630 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2631 if (ackmp == NULL) {
2632 /*
2633 * roll back state changes
2634 */
2635 tep->te_state = save_state;
2636 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2637 return;
2638 }
2639 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2640 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2641 }
2642
2643 /*
2644 * Process T_UNBIND_REQ.
2645 * Called from serializer.
2646 */
2647 static void
tl_unbind(mblk_t * mp,tl_endpt_t * tep)2648 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2649 {
2650 queue_t *wq;
2651 mblk_t *ackmp;
2652
2653 if (tep->te_closing) {
2654 freemsg(mp);
2655 return;
2656 }
2657
2658 wq = tep->te_wq;
2659
2660 /*
2661 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2662 * ==> allocate for T_ERROR_ACK (known max)
2663 */
2664 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2665 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2666 return;
2667 }
2668 /*
2669 * memory resources committed
2670 * Note: no message validation. T_UNBIND_REQ message is
2671 * same size as PRIM_type field so already verified earlier.
2672 */
2673
2674 /*
2675 * validate state
2676 */
2677 if (tep->te_state != TS_IDLE) {
2678 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2679 SL_TRACE|SL_ERROR,
2680 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2681 tep->te_state));
2682 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2683 return;
2684 }
2685 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2686
2687 /*
2688 * TPI says on T_UNBIND_REQ:
2689 * send up a M_FLUSH to flush both
2690 * read and write queues
2691 */
2692 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2693
2694 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2695 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2696
2697 /*
2698 * Sockets use bind with qlen==0 followed by bind() to
2699 * the same address with qlen > 0 for listeners.
2700 * We allow rebind with a new qlen value.
2701 */
2702 tl_addr_unbind(tep);
2703 }
2704
2705 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2706 /*
2707 * send T_OK_ACK
2708 */
2709 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2710 }
2711
2712
2713 /*
2714 * Option management code from drv/ip is used here
2715 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2716 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2717 * However, that is what we want as that option is 'unorthodox'
2718 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2719 * and not in T_SVR4_OPTMGMT_REQ/ACK
2720 * Note2: use of optcom_req means this routine is an exception to
2721 * recovery from allocb() failures.
2722 */
2723
2724 static void
tl_optmgmt(queue_t * wq,mblk_t * mp)2725 tl_optmgmt(queue_t *wq, mblk_t *mp)
2726 {
2727 tl_endpt_t *tep;
2728 mblk_t *ackmp;
2729 union T_primitives *prim;
2730 cred_t *cr;
2731
2732 tep = (tl_endpt_t *)wq->q_ptr;
2733 prim = (union T_primitives *)mp->b_rptr;
2734
2735 /*
2736 * All Solaris components should pass a db_credp
2737 * for this TPI message, hence we ASSERT.
2738 * But in case there is some other M_PROTO that looks
2739 * like a TPI message sent by some other kernel
2740 * component, we check and return an error.
2741 */
2742 cr = msg_getcred(mp, NULL);
2743 ASSERT(cr != NULL);
2744 if (cr == NULL) {
2745 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2746 return;
2747 }
2748
2749 /* all states OK for AF_UNIX options ? */
2750 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2751 prim->type == T_SVR4_OPTMGMT_REQ) {
2752 /*
2753 * Broken TLI semantics that options can only be managed
2754 * in TS_IDLE state. Needed for Sparc ABI test suite that
2755 * tests this TLI (mis)feature using this device driver.
2756 */
2757 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2758 SL_TRACE|SL_ERROR,
2759 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2760 tep->te_state));
2761 /*
2762 * preallocate memory for T_ERROR_ACK
2763 */
2764 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2765 if (! ackmp) {
2766 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2767 return;
2768 }
2769
2770 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2771 freemsg(mp);
2772 return;
2773 }
2774
2775 /*
2776 * call common option management routine from drv/ip
2777 */
2778 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2779 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2780 } else {
2781 ASSERT(prim->type == T_OPTMGMT_REQ);
2782 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2783 }
2784 }
2785
2786 /*
2787 * Handle T_conn_req - the driver part of accept().
2788 * If TL_SET[U]CRED generate the credentials options.
2789 * If this is a socket pass through options unmodified.
2790 * For sockets generate the T_CONN_CON here instead of
2791 * waiting for the T_CONN_RES.
2792 */
2793 static void
tl_conn_req(queue_t * wq,mblk_t * mp)2794 tl_conn_req(queue_t *wq, mblk_t *mp)
2795 {
2796 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2797 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2798 ssize_t msz = MBLKL(mp);
2799 t_scalar_t alen, aoff, olen, ooff, err = 0;
2800 tl_endpt_t *peer_tep = NULL;
2801 mblk_t *ackmp;
2802 mblk_t *dimp;
2803 struct T_discon_ind *di;
2804 soux_addr_t ux_addr;
2805 tl_addr_t dst;
2806
2807 ASSERT(IS_COTS(tep));
2808
2809 if (tep->te_closing) {
2810 freemsg(mp);
2811 return;
2812 }
2813
2814 /*
2815 * preallocate memory for:
2816 * 1. max of T_ERROR_ACK and T_OK_ACK
2817 * ==> known max T_ERROR_ACK
2818 * 2. max of T_DISCON_IND and T_CONN_IND
2819 */
2820 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2821 if (! ackmp) {
2822 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2823 return;
2824 }
2825 /*
2826 * memory committed for T_OK_ACK/T_ERROR_ACK now
2827 * will be committed for T_DISCON_IND/T_CONN_IND later
2828 */
2829
2830 if (tep->te_state != TS_IDLE) {
2831 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2832 SL_TRACE|SL_ERROR,
2833 "tl_wput:T_CONN_REQ:out of state, state=%d",
2834 tep->te_state));
2835 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2836 freemsg(mp);
2837 return;
2838 }
2839
2840 /*
2841 * validate the message
2842 * Note: dereference fields in struct inside message only
2843 * after validating the message length.
2844 */
2845 if (msz < sizeof (struct T_conn_req)) {
2846 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2847 "tl_conn_req:invalid message length"));
2848 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2849 freemsg(mp);
2850 return;
2851 }
2852 alen = creq->DEST_length;
2853 aoff = creq->DEST_offset;
2854 olen = creq->OPT_length;
2855 ooff = creq->OPT_offset;
2856 if (olen == 0)
2857 ooff = 0;
2858
2859 if (IS_SOCKET(tep)) {
2860 if ((alen != TL_SOUX_ADDRLEN) ||
2861 (aoff < 0) ||
2862 (aoff + alen > msz) ||
2863 (alen > msz - sizeof (struct T_conn_req))) {
2864 (void) (STRLOG(TL_ID, tep->te_minor,
2865 1, SL_TRACE|SL_ERROR,
2866 "tl_conn_req: invalid socket addr"));
2867 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2868 freemsg(mp);
2869 return;
2870 }
2871 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2872 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2873 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2874 (void) (STRLOG(TL_ID, tep->te_minor,
2875 1, SL_TRACE|SL_ERROR,
2876 "tl_conn_req: invalid socket magic"));
2877 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878 freemsg(mp);
2879 return;
2880 }
2881 } else {
2882 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2883 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2884 ooff + olen < 0)) ||
2885 olen < 0 || ooff < 0) {
2886 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2887 SL_TRACE|SL_ERROR,
2888 "tl_conn_req:invalid message"));
2889 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2890 freemsg(mp);
2891 return;
2892 }
2893
2894 if (alen <= 0 || aoff < 0 ||
2895 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2896 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2897 SL_TRACE|SL_ERROR,
2898 "tl_conn_req:bad addr in message, "
2899 "alen=%d, msz=%ld",
2900 alen, msz));
2901 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2902 freemsg(mp);
2903 return;
2904 }
2905 #ifdef DEBUG
2906 /*
2907 * Mild form of ASSERT()ion to detect broken TPI apps.
2908 * if (! assertion)
2909 * log warning;
2910 */
2911 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2912 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2913 SL_TRACE|SL_ERROR,
2914 "tl_conn_req: addr overlaps TPI message"));
2915 }
2916 #endif
2917 if (olen) {
2918 /*
2919 * no opts in connect req
2920 * supported in this provider except for sockets.
2921 */
2922 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2923 SL_TRACE|SL_ERROR,
2924 "tl_conn_req:options not supported "
2925 "in message"));
2926 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2927 freemsg(mp);
2928 return;
2929 }
2930 }
2931
2932 /*
2933 * Prevent tep from closing on us.
2934 */
2935 if (! tl_noclose(tep)) {
2936 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2937 "tl_conn_req:endpoint is closing"));
2938 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2939 freemsg(mp);
2940 return;
2941 }
2942
2943 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2944 /*
2945 * get endpoint to connect to
2946 * check that peer with DEST addr is bound to addr
2947 * and has CONIND_number > 0
2948 */
2949 dst.ta_alen = alen;
2950 dst.ta_abuf = mp->b_rptr + aoff;
2951 dst.ta_zoneid = tep->te_zoneid;
2952
2953 /*
2954 * Verify if remote addr is in use
2955 */
2956 peer_tep = (IS_SOCKET(tep) ?
2957 tl_sock_find_peer(tep, &ux_addr) :
2958 tl_find_peer(tep, &dst));
2959
2960 if (peer_tep == NULL) {
2961 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2962 "tl_conn_req:no one at connect address"));
2963 err = ECONNREFUSED;
2964 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2965 /*
2966 * validate that number of incoming connection is
2967 * not to capacity on destination endpoint
2968 */
2969 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2970 "tl_conn_req: qlen overflow connection refused"));
2971 err = ECONNREFUSED;
2972 }
2973
2974 /*
2975 * Send T_DISCON_IND in case of error
2976 */
2977 if (err != 0) {
2978 if (peer_tep != NULL)
2979 tl_refrele(peer_tep);
2980 /* We are still expected to send T_OK_ACK */
2981 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2982 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2983 tl_closeok(tep);
2984 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2985 M_PROTO, T_DISCON_IND);
2986 if (dimp == NULL) {
2987 tl_merror(wq, NULL, ENOSR);
2988 return;
2989 }
2990 di = (struct T_discon_ind *)dimp->b_rptr;
2991 di->DISCON_reason = err;
2992 di->SEQ_number = BADSEQNUM;
2993
2994 tep->te_state = TS_IDLE;
2995 /*
2996 * send T_DISCON_IND message
2997 */
2998 putnext(tep->te_rq, dimp);
2999 return;
3000 }
3001
3002 ASSERT(IS_COTS(peer_tep));
3003
3004 /*
3005 * Found the listener. At this point processing will continue on
3006 * listener serializer. Close of the endpoint should be blocked while we
3007 * switch serializers.
3008 */
3009 tl_serializer_refhold(peer_tep->te_ser);
3010 tl_serializer_refrele(tep->te_ser);
3011 tep->te_ser = peer_tep->te_ser;
3012 ASSERT(tep->te_oconp == NULL);
3013 tep->te_oconp = peer_tep;
3014
3015 /*
3016 * It is safe to close now. Close may continue on listener serializer.
3017 */
3018 tl_closeok(tep);
3019
3020 /*
3021 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3022 * data, so we link mp to ackmp.
3023 */
3024 ackmp->b_cont = mp;
3025 mp = ackmp;
3026
3027 tl_refhold(tep);
3028 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3029 }
3030
3031 /*
3032 * Finish T_CONN_REQ processing on listener serializer.
3033 */
3034 static void
tl_conn_req_ser(mblk_t * mp,tl_endpt_t * tep)3035 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3036 {
3037 queue_t *wq;
3038 tl_endpt_t *peer_tep = tep->te_oconp;
3039 mblk_t *confmp, *cimp, *indmp;
3040 void *opts = NULL;
3041 mblk_t *ackmp = mp;
3042 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3043 struct T_conn_ind *ci;
3044 tl_icon_t *tip;
3045 void *addr_startp;
3046 t_scalar_t olen = creq->OPT_length;
3047 t_scalar_t ooff = creq->OPT_offset;
3048 size_t ci_msz;
3049 size_t size;
3050 cred_t *cr = NULL;
3051 pid_t cpid;
3052
3053 if (tep->te_closing) {
3054 TL_UNCONNECT(tep->te_oconp);
3055 tl_serializer_exit(tep);
3056 tl_refrele(tep);
3057 freemsg(mp);
3058 return;
3059 }
3060
3061 wq = tep->te_wq;
3062 tep->te_flag |= TL_EAGER;
3063
3064 /*
3065 * Extract preallocated ackmp from mp.
3066 */
3067 mp = mp->b_cont;
3068 ackmp->b_cont = NULL;
3069
3070 if (olen == 0)
3071 ooff = 0;
3072
3073 if (peer_tep->te_closing ||
3074 !((peer_tep->te_state == TS_IDLE) ||
3075 (peer_tep->te_state == TS_WRES_CIND))) {
3076 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3077 "tl_conn_req:peer in bad state (%d)",
3078 peer_tep->te_state));
3079 TL_UNCONNECT(tep->te_oconp);
3080 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3081 freemsg(ackmp);
3082 tl_serializer_exit(tep);
3083 tl_refrele(tep);
3084 return;
3085 }
3086
3087 /*
3088 * preallocate now for T_DISCON_IND or T_CONN_IND
3089 */
3090 /*
3091 * calculate length of T_CONN_IND message
3092 */
3093 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3094 cr = msg_getcred(mp, &cpid);
3095 ASSERT(cr != NULL);
3096 if (peer_tep->te_flag & TL_SETCRED) {
3097 ooff = 0;
3098 olen = (t_scalar_t) sizeof (struct opthdr) +
3099 OPTLEN(sizeof (tl_credopt_t));
3100 /* 1 option only */
3101 } else {
3102 ooff = 0;
3103 olen = (t_scalar_t)sizeof (struct opthdr) +
3104 OPTLEN(ucredminsize(cr));
3105 /* 1 option only */
3106 }
3107 }
3108 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3109 ci_msz = T_ALIGN(ci_msz) + olen;
3110 size = max(ci_msz, sizeof (struct T_discon_ind));
3111
3112 /*
3113 * Save options from mp - we'll need them for T_CONN_IND.
3114 */
3115 if (ooff != 0) {
3116 opts = kmem_alloc(olen, KM_NOSLEEP);
3117 if (opts == NULL) {
3118 /*
3119 * roll back state changes
3120 */
3121 tep->te_state = TS_IDLE;
3122 tl_memrecover(wq, mp, size);
3123 freemsg(ackmp);
3124 TL_UNCONNECT(tep->te_oconp);
3125 tl_serializer_exit(tep);
3126 tl_refrele(tep);
3127 return;
3128 }
3129 /* Copy options to a temp buffer */
3130 bcopy(mp->b_rptr + ooff, opts, olen);
3131 }
3132
3133 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3134 /*
3135 * Generate a T_CONN_CON that has the identical address
3136 * (and options) as the T_CONN_REQ.
3137 * NOTE: assumes that the T_conn_req and T_conn_con structures
3138 * are isomorphic.
3139 */
3140 confmp = copyb(mp);
3141 if (! confmp) {
3142 /*
3143 * roll back state changes
3144 */
3145 tep->te_state = TS_IDLE;
3146 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3147 freemsg(ackmp);
3148 if (opts != NULL)
3149 kmem_free(opts, olen);
3150 TL_UNCONNECT(tep->te_oconp);
3151 tl_serializer_exit(tep);
3152 tl_refrele(tep);
3153 return;
3154 }
3155 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3156 T_CONN_CON;
3157 } else {
3158 confmp = NULL;
3159 }
3160 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3161 /*
3162 * roll back state changes
3163 */
3164 tep->te_state = TS_IDLE;
3165 tl_memrecover(wq, mp, size);
3166 freemsg(ackmp);
3167 if (opts != NULL)
3168 kmem_free(opts, olen);
3169 freemsg(confmp);
3170 TL_UNCONNECT(tep->te_oconp);
3171 tl_serializer_exit(tep);
3172 tl_refrele(tep);
3173 return;
3174 }
3175
3176 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3177 if (tip == NULL) {
3178 /*
3179 * roll back state changes
3180 */
3181 tep->te_state = TS_IDLE;
3182 tl_memrecover(wq, indmp, sizeof (*tip));
3183 freemsg(ackmp);
3184 if (opts != NULL)
3185 kmem_free(opts, olen);
3186 freemsg(confmp);
3187 TL_UNCONNECT(tep->te_oconp);
3188 tl_serializer_exit(tep);
3189 tl_refrele(tep);
3190 return;
3191 }
3192 tip->ti_mp = NULL;
3193
3194 /*
3195 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3196 * and tl_icon_t cell.
3197 */
3198
3199 /*
3200 * ack validity of request and send the peer credential in the ACK.
3201 */
3202 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3203
3204 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3205 confmp != NULL) {
3206 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3207 }
3208
3209 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3210
3211 /*
3212 * prepare message to send T_CONN_IND
3213 */
3214 /*
3215 * allocate the message - original data blocks retained
3216 * in the returned mblk
3217 */
3218 cimp = tl_resizemp(indmp, size);
3219 if (! cimp) {
3220 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3221 "tl_conn_req:con_ind:allocb failure"));
3222 tl_merror(wq, indmp, ENOMEM);
3223 TL_UNCONNECT(tep->te_oconp);
3224 tl_serializer_exit(tep);
3225 tl_refrele(tep);
3226 if (opts != NULL)
3227 kmem_free(opts, olen);
3228 freemsg(confmp);
3229 ASSERT(tip->ti_mp == NULL);
3230 kmem_free(tip, sizeof (*tip));
3231 return;
3232 }
3233
3234 DB_TYPE(cimp) = M_PROTO;
3235 ci = (struct T_conn_ind *)cimp->b_rptr;
3236 ci->PRIM_type = T_CONN_IND;
3237 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3238 ci->SRC_length = tep->te_alen;
3239 ci->SEQ_number = tep->te_seqno;
3240
3241 addr_startp = cimp->b_rptr + ci->SRC_offset;
3242 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3243 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3244
3245 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3246 ci->SRC_length);
3247 ci->OPT_length = olen; /* because only 1 option */
3248 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3249 cr, cpid,
3250 peer_tep->te_flag, peer_tep->te_credp);
3251 } else if (ooff != 0) {
3252 /* Copy option from T_CONN_REQ */
3253 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254 ci->SRC_length);
3255 ci->OPT_length = olen;
3256 ASSERT(opts != NULL);
3257 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3258 } else {
3259 ci->OPT_offset = 0;
3260 ci->OPT_length = 0;
3261 }
3262 if (opts != NULL)
3263 kmem_free(opts, olen);
3264
3265 /*
3266 * register connection request with server peer
3267 * append to list of incoming connections
3268 * increment references for both peer_tep and tep: peer_tep is placed on
3269 * te_oconp and tep is placed on listeners queue.
3270 */
3271 tip->ti_tep = tep;
3272 tip->ti_seqno = tep->te_seqno;
3273 list_insert_tail(&peer_tep->te_iconp, tip);
3274 peer_tep->te_nicon++;
3275
3276 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3277 /*
3278 * send the T_CONN_IND message
3279 */
3280 putnext(peer_tep->te_rq, cimp);
3281
3282 /*
3283 * Send a T_CONN_CON message for sockets.
3284 * Disable the queues until we have reached the correct state!
3285 */
3286 if (confmp != NULL) {
3287 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3288 noenable(wq);
3289 putnext(tep->te_rq, confmp);
3290 }
3291 /*
3292 * Now we need to increment tep reference because tep is referenced by
3293 * server list of pending connections. We also need to decrement
3294 * reference before exiting serializer. Two operations void each other
3295 * so we don't modify reference at all.
3296 */
3297 ASSERT(tep->te_refcnt >= 2);
3298 ASSERT(peer_tep->te_refcnt >= 2);
3299 tl_serializer_exit(tep);
3300 }
3301
3302
3303
3304 /*
3305 * Handle T_conn_res on listener stream. Called on listener serializer.
3306 * tl_conn_req has already generated the T_CONN_CON.
3307 * tl_conn_res is called on listener serializer.
3308 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3309 * Switch eager serializer to acceptor's.
3310 *
3311 * If TL_SET[U]CRED generate the credentials options.
3312 * For sockets tl_conn_req has already generated the T_CONN_CON.
3313 */
3314 static void
tl_conn_res(mblk_t * mp,tl_endpt_t * tep)3315 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3316 {
3317 queue_t *wq;
3318 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3319 ssize_t msz = MBLKL(mp);
3320 t_scalar_t olen, ooff, err = 0;
3321 t_scalar_t prim = cres->PRIM_type;
3322 uchar_t *addr_startp;
3323 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3324 tl_icon_t *tip;
3325 size_t size;
3326 mblk_t *ackmp, *respmp;
3327 mblk_t *dimp, *ccmp = NULL;
3328 struct T_discon_ind *di;
3329 struct T_conn_con *cc;
3330 boolean_t client_noclose_set = B_FALSE;
3331 boolean_t switch_client_serializer = B_TRUE;
3332
3333 ASSERT(IS_COTS(tep));
3334
3335 if (tep->te_closing) {
3336 freemsg(mp);
3337 return;
3338 }
3339
3340 wq = tep->te_wq;
3341
3342 /*
3343 * preallocate memory for:
3344 * 1. max of T_ERROR_ACK and T_OK_ACK
3345 * ==> known max T_ERROR_ACK
3346 * 2. max of T_DISCON_IND and T_CONN_CON
3347 */
3348 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3349 if (! ackmp) {
3350 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3351 return;
3352 }
3353 /*
3354 * memory committed for T_OK_ACK/T_ERROR_ACK now
3355 * will be committed for T_DISCON_IND/T_CONN_CON later
3356 */
3357
3358
3359 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3360
3361 /*
3362 * validate state
3363 */
3364 if (tep->te_state != TS_WRES_CIND) {
3365 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3366 SL_TRACE|SL_ERROR,
3367 "tl_wput:T_CONN_RES:out of state, state=%d",
3368 tep->te_state));
3369 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3370 freemsg(mp);
3371 return;
3372 }
3373
3374 /*
3375 * validate the message
3376 * Note: dereference fields in struct inside message only
3377 * after validating the message length.
3378 */
3379 if (msz < sizeof (struct T_conn_res)) {
3380 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3381 "tl_conn_res:invalid message length"));
3382 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3383 freemsg(mp);
3384 return;
3385 }
3386 olen = cres->OPT_length;
3387 ooff = cres->OPT_offset;
3388 if (((olen > 0) && ((ooff + olen) > msz))) {
3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3390 "tl_conn_res:invalid message"));
3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3392 freemsg(mp);
3393 return;
3394 }
3395 if (olen) {
3396 /*
3397 * no opts in connect res
3398 * supported in this provider
3399 */
3400 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3401 "tl_conn_res:options not supported in message"));
3402 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3403 freemsg(mp);
3404 return;
3405 }
3406
3407 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3408 ASSERT(tep->te_state == TS_WACK_CRES);
3409
3410 if (cres->SEQ_number < TL_MINOR_START &&
3411 cres->SEQ_number >= BADSEQNUM) {
3412 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3413 "tl_conn_res:remote endpoint sequence number bad"));
3414 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3415 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3416 freemsg(mp);
3417 return;
3418 }
3419
3420 /*
3421 * find accepting endpoint. Will have extra reference if found.
3422 */
3423 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3424 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3425 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3426 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3427 "tl_conn_res:bad accepting endpoint"));
3428 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3429 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3430 freemsg(mp);
3431 return;
3432 }
3433
3434 /*
3435 * Prevent acceptor from closing.
3436 */
3437 if (! tl_noclose(acc_ep)) {
3438 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3439 "tl_conn_res:bad accepting endpoint"));
3440 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3441 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3442 tl_refrele(acc_ep);
3443 freemsg(mp);
3444 return;
3445 }
3446
3447 acc_ep->te_flag |= TL_ACCEPTOR;
3448
3449 /*
3450 * validate that accepting endpoint, if different from listening
3451 * has address bound => state is TS_IDLE
3452 * TROUBLE in XPG4 !!?
3453 */
3454 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3455 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3456 "tl_conn_res:accepting endpoint has no address bound,"
3457 "state=%d", acc_ep->te_state));
3458 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3459 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3460 freemsg(mp);
3461 tl_closeok(acc_ep);
3462 tl_refrele(acc_ep);
3463 return;
3464 }
3465
3466 /*
3467 * validate if accepting endpt same as listening, then
3468 * no other incoming connection should be on the queue
3469 */
3470
3471 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3472 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3473 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3474 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3475 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3476 freemsg(mp);
3477 tl_closeok(acc_ep);
3478 tl_refrele(acc_ep);
3479 return;
3480 }
3481
3482 /*
3483 * Mark for deletion, the entry corresponding to client
3484 * on list of pending connections made by the listener
3485 * search list to see if client is one of the
3486 * recorded as a listener.
3487 */
3488 tip = tl_icon_find(tep, cres->SEQ_number);
3489 if (tip == NULL) {
3490 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3491 "tl_conn_res:no client in listener list"));
3492 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3493 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3494 freemsg(mp);
3495 tl_closeok(acc_ep);
3496 tl_refrele(acc_ep);
3497 return;
3498 }
3499
3500 /*
3501 * If ti_tep is NULL the client has already closed. In this case
3502 * the code below will avoid any action on the client side
3503 * but complete the server and acceptor state transitions.
3504 */
3505 ASSERT(tip->ti_tep == NULL ||
3506 tip->ti_tep->te_seqno == cres->SEQ_number);
3507 cl_ep = tip->ti_tep;
3508
3509 /*
3510 * If the client is present it is switched from listener's to acceptor's
3511 * serializer. We should block client closes while serializers are
3512 * being switched.
3513 *
3514 * It is possible that the client is present but is currently being
3515 * closed. There are two possible cases:
3516 *
3517 * 1) The client has already entered tl_close_finish_ser() and sent
3518 * T_ORDREL_IND. In this case we can just ignore the client (but we
3519 * still need to send all messages from tip->ti_mp to the acceptor).
3520 *
3521 * 2) The client started the close but has not entered
3522 * tl_close_finish_ser() yet. In this case, the client is already
3523 * proceeding asynchronously on the listener's serializer, so we're
3524 * forced to change the acceptor to use the listener's serializer to
3525 * ensure that any operations on the acceptor are serialized with
3526 * respect to the close that's in-progress.
3527 */
3528 if (cl_ep != NULL) {
3529 if (tl_noclose(cl_ep)) {
3530 client_noclose_set = B_TRUE;
3531 } else {
3532 /*
3533 * Client is closing. If it it has sent the
3534 * T_ORDREL_IND, we can simply ignore it - otherwise,
3535 * we have to let let the client continue until it is
3536 * sent.
3537 *
3538 * If we do continue using the client, acceptor will
3539 * switch to client's serializer which is used by client
3540 * for its close.
3541 */
3542 tl_client_closing_when_accepting++;
3543 switch_client_serializer = B_FALSE;
3544 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3545 cl_ep->te_state == -1)
3546 cl_ep = NULL;
3547 }
3548 }
3549
3550 if (cl_ep != NULL) {
3551 /*
3552 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3553 * (latter for sockets only)
3554 */
3555 if (cl_ep->te_state != TS_WCON_CREQ &&
3556 (cl_ep->te_state != TS_DATA_XFER &&
3557 IS_SOCKET(cl_ep))) {
3558 err = ECONNREFUSED;
3559 /*
3560 * T_DISCON_IND sent later after committing memory
3561 * and acking validity of request
3562 */
3563 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3564 "tl_conn_res:peer in bad state"));
3565 }
3566
3567 /*
3568 * preallocate now for T_DISCON_IND or T_CONN_CONN
3569 * ack validity of request (T_OK_ACK) after memory committed
3570 */
3571
3572 if (err)
3573 size = sizeof (struct T_discon_ind);
3574 else {
3575 /*
3576 * calculate length of T_CONN_CON message
3577 */
3578 olen = 0;
3579 if (cl_ep->te_flag & TL_SETCRED) {
3580 olen = (t_scalar_t)sizeof (struct opthdr) +
3581 OPTLEN(sizeof (tl_credopt_t));
3582 } else if (cl_ep->te_flag & TL_SETUCRED) {
3583 olen = (t_scalar_t)sizeof (struct opthdr) +
3584 OPTLEN(ucredminsize(acc_ep->te_credp));
3585 }
3586 size = T_ALIGN(sizeof (struct T_conn_con) +
3587 acc_ep->te_alen) + olen;
3588 }
3589 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3590 /*
3591 * roll back state changes
3592 */
3593 tep->te_state = TS_WRES_CIND;
3594 tl_memrecover(wq, mp, size);
3595 freemsg(ackmp);
3596 if (client_noclose_set)
3597 tl_closeok(cl_ep);
3598 tl_closeok(acc_ep);
3599 tl_refrele(acc_ep);
3600 return;
3601 }
3602 mp = NULL;
3603 }
3604
3605 /*
3606 * Now ack validity of request
3607 */
3608 if (tep->te_nicon == 1) {
3609 if (tep == acc_ep)
3610 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3611 else
3612 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3613 } else
3614 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3615
3616 /*
3617 * send T_DISCON_IND now if client state validation failed earlier
3618 */
3619 if (err) {
3620 tl_ok_ack(wq, ackmp, prim);
3621 /*
3622 * flush the queues - why always ?
3623 */
3624 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3625
3626 dimp = tl_resizemp(respmp, size);
3627 if (! dimp) {
3628 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3629 SL_TRACE|SL_ERROR,
3630 "tl_conn_res:con_ind:allocb failure"));
3631 tl_merror(wq, respmp, ENOMEM);
3632 tl_closeok(acc_ep);
3633 if (client_noclose_set)
3634 tl_closeok(cl_ep);
3635 tl_refrele(acc_ep);
3636 return;
3637 }
3638 if (dimp->b_cont) {
3639 /* no user data in provider generated discon ind */
3640 freemsg(dimp->b_cont);
3641 dimp->b_cont = NULL;
3642 }
3643
3644 DB_TYPE(dimp) = M_PROTO;
3645 di = (struct T_discon_ind *)dimp->b_rptr;
3646 di->PRIM_type = T_DISCON_IND;
3647 di->DISCON_reason = err;
3648 di->SEQ_number = BADSEQNUM;
3649
3650 tep->te_state = TS_IDLE;
3651 /*
3652 * send T_DISCON_IND message
3653 */
3654 putnext(acc_ep->te_rq, dimp);
3655 if (client_noclose_set)
3656 tl_closeok(cl_ep);
3657 tl_closeok(acc_ep);
3658 tl_refrele(acc_ep);
3659 return;
3660 }
3661
3662 /*
3663 * now start connecting the accepting endpoint
3664 */
3665 if (tep != acc_ep)
3666 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3667
3668 if (cl_ep == NULL) {
3669 /*
3670 * The client has already closed. Send up any queued messages
3671 * and change the state accordingly.
3672 */
3673 tl_ok_ack(wq, ackmp, prim);
3674 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3675
3676 /*
3677 * remove endpoint from incoming connection
3678 * delete client from list of incoming connections
3679 */
3680 tl_freetip(tep, tip);
3681 freemsg(mp);
3682 tl_closeok(acc_ep);
3683 tl_refrele(acc_ep);
3684 return;
3685 } else if (tip->ti_mp != NULL) {
3686 /*
3687 * The client could have queued a T_DISCON_IND which needs
3688 * to be sent up.
3689 * Note that t_discon_req can not operate the same as
3690 * t_data_req since it is not possible for it to putbq
3691 * the message and return -1 due to the use of qwriter.
3692 */
3693 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3694 }
3695
3696 /*
3697 * prepare connect confirm T_CONN_CON message
3698 */
3699
3700 /*
3701 * allocate the message - original data blocks
3702 * retained in the returned mblk
3703 */
3704 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3705 ccmp = tl_resizemp(respmp, size);
3706 if (ccmp == NULL) {
3707 tl_ok_ack(wq, ackmp, prim);
3708 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3709 SL_TRACE|SL_ERROR,
3710 "tl_conn_res:conn_con:allocb failure"));
3711 tl_merror(wq, respmp, ENOMEM);
3712 tl_closeok(acc_ep);
3713 if (client_noclose_set)
3714 tl_closeok(cl_ep);
3715 tl_refrele(acc_ep);
3716 return;
3717 }
3718
3719 DB_TYPE(ccmp) = M_PROTO;
3720 cc = (struct T_conn_con *)ccmp->b_rptr;
3721 cc->PRIM_type = T_CONN_CON;
3722 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3723 cc->RES_length = acc_ep->te_alen;
3724 addr_startp = ccmp->b_rptr + cc->RES_offset;
3725 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3726 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3727 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3728 cc->RES_length);
3729 cc->OPT_length = olen;
3730 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3731 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3732 cl_ep->te_credp);
3733 } else {
3734 cc->OPT_offset = 0;
3735 cc->OPT_length = 0;
3736 }
3737 /*
3738 * Forward the credential in the packet so it can be picked up
3739 * at the higher layers for more complete credential processing
3740 */
3741 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3742 } else {
3743 freemsg(respmp);
3744 respmp = NULL;
3745 }
3746
3747 /*
3748 * make connection linking
3749 * accepting and client endpoints
3750 * No need to increment references:
3751 * on client: it should already have one from tip->ti_tep linkage.
3752 * on acceptor is should already have one from the table lookup.
3753 *
3754 * At this point both client and acceptor can't close. Set client
3755 * serializer to acceptor's.
3756 */
3757 ASSERT(cl_ep->te_refcnt >= 2);
3758 ASSERT(acc_ep->te_refcnt >= 2);
3759 ASSERT(cl_ep->te_conp == NULL);
3760 ASSERT(acc_ep->te_conp == NULL);
3761 cl_ep->te_conp = acc_ep;
3762 acc_ep->te_conp = cl_ep;
3763 ASSERT(cl_ep->te_ser == tep->te_ser);
3764 if (switch_client_serializer) {
3765 mutex_enter(&cl_ep->te_ser_lock);
3766 if (cl_ep->te_ser_count > 0) {
3767 switch_client_serializer = B_FALSE;
3768 tl_serializer_noswitch++;
3769 } else {
3770 /*
3771 * Move client to the acceptor's serializer.
3772 */
3773 tl_serializer_refhold(acc_ep->te_ser);
3774 tl_serializer_refrele(cl_ep->te_ser);
3775 cl_ep->te_ser = acc_ep->te_ser;
3776 }
3777 mutex_exit(&cl_ep->te_ser_lock);
3778 }
3779 if (!switch_client_serializer) {
3780 /*
3781 * It is not possible to switch client to use acceptor's.
3782 * Move acceptor to client's serializer (which is the same as
3783 * listener's).
3784 */
3785 tl_serializer_refhold(cl_ep->te_ser);
3786 tl_serializer_refrele(acc_ep->te_ser);
3787 acc_ep->te_ser = cl_ep->te_ser;
3788 }
3789
3790 TL_REMOVE_PEER(cl_ep->te_oconp);
3791 TL_REMOVE_PEER(acc_ep->te_oconp);
3792
3793 /*
3794 * remove endpoint from incoming connection
3795 * delete client from list of incoming connections
3796 */
3797 tip->ti_tep = NULL;
3798 tl_freetip(tep, tip);
3799 tl_ok_ack(wq, ackmp, prim);
3800
3801 /*
3802 * data blocks already linked in reallocb()
3803 */
3804
3805 /*
3806 * link queues so that I_SENDFD will work
3807 */
3808 if (! IS_SOCKET(tep)) {
3809 acc_ep->te_wq->q_next = cl_ep->te_rq;
3810 cl_ep->te_wq->q_next = acc_ep->te_rq;
3811 }
3812
3813 /*
3814 * send T_CONN_CON up on client side unless it was already
3815 * done (for a socket). In cases any data or ordrel req has been
3816 * queued make sure that the service procedure runs.
3817 */
3818 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3819 enableok(cl_ep->te_wq);
3820 TL_QENABLE(cl_ep);
3821 if (ccmp != NULL)
3822 freemsg(ccmp);
3823 } else {
3824 /*
3825 * change client state on TE_CONN_CON event
3826 */
3827 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3828 putnext(cl_ep->te_rq, ccmp);
3829 }
3830
3831 /* Mark the both endpoints as accepted */
3832 cl_ep->te_flag |= TL_ACCEPTED;
3833 acc_ep->te_flag |= TL_ACCEPTED;
3834
3835 /*
3836 * Allow client and acceptor to close.
3837 */
3838 tl_closeok(acc_ep);
3839 if (client_noclose_set)
3840 tl_closeok(cl_ep);
3841 }
3842
3843
3844
3845
3846 static void
tl_discon_req(mblk_t * mp,tl_endpt_t * tep)3847 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3848 {
3849 queue_t *wq;
3850 struct T_discon_req *dr;
3851 ssize_t msz;
3852 tl_endpt_t *peer_tep = tep->te_conp;
3853 tl_endpt_t *srv_tep = tep->te_oconp;
3854 tl_icon_t *tip;
3855 size_t size;
3856 mblk_t *ackmp, *dimp, *respmp;
3857 struct T_discon_ind *di;
3858 t_scalar_t save_state, new_state;
3859
3860 if (tep->te_closing) {
3861 freemsg(mp);
3862 return;
3863 }
3864
3865 if ((peer_tep != NULL) && peer_tep->te_closing) {
3866 TL_UNCONNECT(tep->te_conp);
3867 peer_tep = NULL;
3868 }
3869 if ((srv_tep != NULL) && srv_tep->te_closing) {
3870 TL_UNCONNECT(tep->te_oconp);
3871 srv_tep = NULL;
3872 }
3873
3874 wq = tep->te_wq;
3875
3876 /*
3877 * preallocate memory for:
3878 * 1. max of T_ERROR_ACK and T_OK_ACK
3879 * ==> known max T_ERROR_ACK
3880 * 2. for T_DISCON_IND
3881 */
3882 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3883 if (! ackmp) {
3884 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3885 return;
3886 }
3887 /*
3888 * memory committed for T_OK_ACK/T_ERROR_ACK now
3889 * will be committed for T_DISCON_IND later
3890 */
3891
3892 dr = (struct T_discon_req *)mp->b_rptr;
3893 msz = MBLKL(mp);
3894
3895 /*
3896 * validate the state
3897 */
3898 save_state = new_state = tep->te_state;
3899 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3900 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3901 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3902 SL_TRACE|SL_ERROR,
3903 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3904 tep->te_state));
3905 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3906 freemsg(mp);
3907 return;
3908 }
3909 /*
3910 * Defer committing the state change until it is determined if
3911 * the message will be queued with the tl_icon or not.
3912 */
3913 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3914
3915 /* validate the message */
3916 if (msz < sizeof (struct T_discon_req)) {
3917 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3918 "tl_discon_req:invalid message"));
3919 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3920 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3921 freemsg(mp);
3922 return;
3923 }
3924
3925 /*
3926 * if server, then validate that client exists
3927 * by connection sequence number etc.
3928 */
3929 if (tep->te_nicon > 0) { /* server */
3930
3931 /*
3932 * search server list for disconnect client
3933 */
3934 tip = tl_icon_find(tep, dr->SEQ_number);
3935 if (tip == NULL) {
3936 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3937 SL_TRACE|SL_ERROR,
3938 "tl_discon_req:no disconnect endpoint"));
3939 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3940 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3941 freemsg(mp);
3942 return;
3943 }
3944 /*
3945 * If ti_tep is NULL the client has already closed. In this case
3946 * the code below will avoid any action on the client side.
3947 */
3948
3949 IMPLY(tip->ti_tep != NULL,
3950 tip->ti_tep->te_seqno == dr->SEQ_number);
3951 peer_tep = tip->ti_tep;
3952 }
3953
3954 /*
3955 * preallocate now for T_DISCON_IND
3956 * ack validity of request (T_OK_ACK) after memory committed
3957 */
3958 size = sizeof (struct T_discon_ind);
3959 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3960 tl_memrecover(wq, mp, size);
3961 freemsg(ackmp);
3962 return;
3963 }
3964
3965 /*
3966 * prepare message to ack validity of request
3967 */
3968 if (tep->te_nicon == 0)
3969 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3970 else
3971 if (tep->te_nicon == 1)
3972 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3973 else
3974 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3975
3976 /*
3977 * Flushing queues according to TPI. Using the old state.
3978 */
3979 if ((tep->te_nicon <= 1) &&
3980 ((save_state == TS_DATA_XFER) ||
3981 (save_state == TS_WIND_ORDREL) ||
3982 (save_state == TS_WREQ_ORDREL)))
3983 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3984
3985 /* send T_OK_ACK up */
3986 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3987
3988 /*
3989 * now do disconnect business
3990 */
3991 if (tep->te_nicon > 0) { /* listener */
3992 if (peer_tep != NULL && !peer_tep->te_closing) {
3993 /*
3994 * disconnect incoming connect request pending to tep
3995 */
3996 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
3997 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3998 SL_TRACE|SL_ERROR,
3999 "tl_discon_req: reallocb failed"));
4000 tep->te_state = new_state;
4001 tl_merror(wq, respmp, ENOMEM);
4002 return;
4003 }
4004 di = (struct T_discon_ind *)dimp->b_rptr;
4005 di->SEQ_number = BADSEQNUM;
4006 save_state = peer_tep->te_state;
4007 peer_tep->te_state = TS_IDLE;
4008
4009 TL_REMOVE_PEER(peer_tep->te_oconp);
4010 enableok(peer_tep->te_wq);
4011 TL_QENABLE(peer_tep);
4012 } else {
4013 freemsg(respmp);
4014 dimp = NULL;
4015 }
4016
4017 /*
4018 * remove endpoint from incoming connection list
4019 * - remove disconnect client from list on server
4020 */
4021 tl_freetip(tep, tip);
4022 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4023 /*
4024 * disconnect an outgoing request pending from tep
4025 */
4026
4027 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4028 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4029 SL_TRACE|SL_ERROR,
4030 "tl_discon_req: reallocb failed"));
4031 tep->te_state = new_state;
4032 tl_merror(wq, respmp, ENOMEM);
4033 return;
4034 }
4035 di = (struct T_discon_ind *)dimp->b_rptr;
4036 DB_TYPE(dimp) = M_PROTO;
4037 di->PRIM_type = T_DISCON_IND;
4038 di->DISCON_reason = ECONNRESET;
4039 di->SEQ_number = tep->te_seqno;
4040
4041 /*
4042 * If this is a socket the T_DISCON_IND is queued with
4043 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4044 * from the list of pending connections.
4045 * Note that when te_oconp is set the peer better have
4046 * a t_connind_t for the client.
4047 */
4048 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4049 /*
4050 * No need to check that
4051 * ti_tep == NULL since the T_DISCON_IND
4052 * takes precedence over other queued
4053 * messages.
4054 */
4055 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4056 peer_tep = NULL;
4057 dimp = NULL;
4058 /*
4059 * Can't clear te_oconp since tl_co_unconnect needs
4060 * it as a hint not to free the tep.
4061 * Keep the state unchanged since tl_conn_res inspects
4062 * it.
4063 */
4064 new_state = tep->te_state;
4065 } else {
4066 /* Found - delete it */
4067 tip = tl_icon_find(peer_tep, tep->te_seqno);
4068 if (tip != NULL) {
4069 ASSERT(tep == tip->ti_tep);
4070 save_state = peer_tep->te_state;
4071 if (peer_tep->te_nicon == 1)
4072 peer_tep->te_state =
4073 NEXTSTATE(TE_DISCON_IND2,
4074 peer_tep->te_state);
4075 else
4076 peer_tep->te_state =
4077 NEXTSTATE(TE_DISCON_IND3,
4078 peer_tep->te_state);
4079 tl_freetip(peer_tep, tip);
4080 }
4081 ASSERT(tep->te_oconp != NULL);
4082 TL_UNCONNECT(tep->te_oconp);
4083 }
4084 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4085 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4086 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4087 SL_TRACE|SL_ERROR,
4088 "tl_discon_req: reallocb failed"));
4089 tep->te_state = new_state;
4090 tl_merror(wq, respmp, ENOMEM);
4091 return;
4092 }
4093 di = (struct T_discon_ind *)dimp->b_rptr;
4094 di->SEQ_number = BADSEQNUM;
4095
4096 save_state = peer_tep->te_state;
4097 peer_tep->te_state = TS_IDLE;
4098 } else {
4099 /* Not connected */
4100 tep->te_state = new_state;
4101 freemsg(respmp);
4102 return;
4103 }
4104
4105 /* Commit state changes */
4106 tep->te_state = new_state;
4107
4108 if (peer_tep == NULL) {
4109 ASSERT(dimp == NULL);
4110 goto done;
4111 }
4112 /*
4113 * Flush queues on peer before sending up
4114 * T_DISCON_IND according to TPI
4115 */
4116
4117 if ((save_state == TS_DATA_XFER) ||
4118 (save_state == TS_WIND_ORDREL) ||
4119 (save_state == TS_WREQ_ORDREL))
4120 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4121
4122 DB_TYPE(dimp) = M_PROTO;
4123 di->PRIM_type = T_DISCON_IND;
4124 di->DISCON_reason = ECONNRESET;
4125
4126 /*
4127 * data blocks already linked into dimp by reallocb()
4128 */
4129 /*
4130 * send indication message to peer user module
4131 */
4132 ASSERT(dimp != NULL);
4133 putnext(peer_tep->te_rq, dimp);
4134 done:
4135 if (tep->te_conp) { /* disconnect pointers if connected */
4136 ASSERT(! peer_tep->te_closing);
4137
4138 /*
4139 * Messages may be queued on peer's write queue
4140 * waiting to be processed by its write service
4141 * procedure. Before the pointer to the peer transport
4142 * structure is set to NULL, qenable the peer's write
4143 * queue so that the queued up messages are processed.
4144 */
4145 if ((save_state == TS_DATA_XFER) ||
4146 (save_state == TS_WIND_ORDREL) ||
4147 (save_state == TS_WREQ_ORDREL))
4148 TL_QENABLE(peer_tep);
4149 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4150 TL_UNCONNECT(peer_tep->te_conp);
4151 if (! IS_SOCKET(tep)) {
4152 /*
4153 * unlink the streams
4154 */
4155 tep->te_wq->q_next = NULL;
4156 peer_tep->te_wq->q_next = NULL;
4157 }
4158 TL_UNCONNECT(tep->te_conp);
4159 }
4160 }
4161
4162
4163 static void
tl_addr_req(mblk_t * mp,tl_endpt_t * tep)4164 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4165 {
4166 queue_t *wq;
4167 size_t ack_sz;
4168 mblk_t *ackmp;
4169 struct T_addr_ack *taa;
4170
4171 if (tep->te_closing) {
4172 freemsg(mp);
4173 return;
4174 }
4175
4176 wq = tep->te_wq;
4177
4178 /*
4179 * Note: T_ADDR_REQ message has only PRIM_type field
4180 * so it is already validated earlier.
4181 */
4182
4183 if (IS_CLTS(tep) ||
4184 (tep->te_state > TS_WREQ_ORDREL) ||
4185 (tep->te_state < TS_DATA_XFER)) {
4186 /*
4187 * Either connectionless or connection oriented but not
4188 * in connected data transfer state or half-closed states.
4189 */
4190 ack_sz = sizeof (struct T_addr_ack);
4191 if (tep->te_state >= TS_IDLE)
4192 /* is bound */
4193 ack_sz += tep->te_alen;
4194 ackmp = reallocb(mp, ack_sz, 0);
4195 if (ackmp == NULL) {
4196 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4197 SL_TRACE|SL_ERROR,
4198 "tl_addr_req: reallocb failed"));
4199 tl_memrecover(wq, mp, ack_sz);
4200 return;
4201 }
4202
4203 taa = (struct T_addr_ack *)ackmp->b_rptr;
4204
4205 bzero(taa, sizeof (struct T_addr_ack));
4206
4207 taa->PRIM_type = T_ADDR_ACK;
4208 ackmp->b_datap->db_type = M_PCPROTO;
4209 ackmp->b_wptr = (uchar_t *)&taa[1];
4210
4211 if (tep->te_state >= TS_IDLE) {
4212 /* endpoint is bound */
4213 taa->LOCADDR_length = tep->te_alen;
4214 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4215
4216 bcopy(tep->te_abuf, ackmp->b_wptr,
4217 tep->te_alen);
4218 ackmp->b_wptr += tep->te_alen;
4219 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4220 }
4221
4222 (void) qreply(wq, ackmp);
4223 } else {
4224 ASSERT(tep->te_state == TS_DATA_XFER ||
4225 tep->te_state == TS_WIND_ORDREL ||
4226 tep->te_state == TS_WREQ_ORDREL);
4227 /* connection oriented in data transfer */
4228 tl_connected_cots_addr_req(mp, tep);
4229 }
4230 }
4231
4232
4233 static void
tl_connected_cots_addr_req(mblk_t * mp,tl_endpt_t * tep)4234 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4235 {
4236 tl_endpt_t *peer_tep;
4237 size_t ack_sz;
4238 mblk_t *ackmp;
4239 struct T_addr_ack *taa;
4240 uchar_t *addr_startp;
4241
4242 if (tep->te_closing) {
4243 freemsg(mp);
4244 return;
4245 }
4246
4247 ASSERT(tep->te_state >= TS_IDLE);
4248
4249 ack_sz = sizeof (struct T_addr_ack);
4250 ack_sz += T_ALIGN(tep->te_alen);
4251 peer_tep = tep->te_conp;
4252 ack_sz += peer_tep->te_alen;
4253
4254 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4255 if (ackmp == NULL) {
4256 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4257 "tl_connected_cots_addr_req: reallocb failed"));
4258 tl_memrecover(tep->te_wq, mp, ack_sz);
4259 return;
4260 }
4261
4262 taa = (struct T_addr_ack *)ackmp->b_rptr;
4263
4264 /* endpoint is bound */
4265 taa->LOCADDR_length = tep->te_alen;
4266 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4267
4268 addr_startp = (uchar_t *)&taa[1];
4269
4270 bcopy(tep->te_abuf, addr_startp,
4271 tep->te_alen);
4272
4273 taa->REMADDR_length = peer_tep->te_alen;
4274 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4275 taa->LOCADDR_length);
4276 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4277 bcopy(peer_tep->te_abuf, addr_startp,
4278 peer_tep->te_alen);
4279 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4280 taa->REMADDR_offset + peer_tep->te_alen;
4281 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4282
4283 putnext(tep->te_rq, ackmp);
4284 }
4285
4286 static void
tl_copy_info(struct T_info_ack * ia,tl_endpt_t * tep)4287 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4288 {
4289 if (IS_CLTS(tep)) {
4290 *ia = tl_clts_info_ack;
4291 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4292 } else {
4293 *ia = tl_cots_info_ack;
4294 if (IS_COTSORD(tep))
4295 ia->SERV_type = T_COTS_ORD;
4296 }
4297 ia->TIDU_size = tl_tidusz;
4298 ia->CURRENT_state = tep->te_state;
4299 }
4300
4301 /*
4302 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4303 * tl_wput.
4304 */
4305 static void
tl_capability_req(mblk_t * mp,tl_endpt_t * tep)4306 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4307 {
4308 mblk_t *ackmp;
4309 t_uscalar_t cap_bits1;
4310 struct T_capability_ack *tcap;
4311
4312 if (tep->te_closing) {
4313 freemsg(mp);
4314 return;
4315 }
4316
4317 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4318
4319 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4320 M_PCPROTO, T_CAPABILITY_ACK);
4321 if (ackmp == NULL) {
4322 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4323 "tl_capability_req: reallocb failed"));
4324 tl_memrecover(tep->te_wq, mp,
4325 sizeof (struct T_capability_ack));
4326 return;
4327 }
4328
4329 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4330 tcap->CAP_bits1 = 0;
4331
4332 if (cap_bits1 & TC1_INFO) {
4333 tl_copy_info(&tcap->INFO_ack, tep);
4334 tcap->CAP_bits1 |= TC1_INFO;
4335 }
4336
4337 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4338 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4339 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4340 }
4341
4342 putnext(tep->te_rq, ackmp);
4343 }
4344
4345 static void
tl_info_req_ser(mblk_t * mp,tl_endpt_t * tep)4346 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4347 {
4348 if (! tep->te_closing)
4349 tl_info_req(mp, tep);
4350 else
4351 freemsg(mp);
4352
4353 tl_serializer_exit(tep);
4354 tl_refrele(tep);
4355 }
4356
4357 static void
tl_info_req(mblk_t * mp,tl_endpt_t * tep)4358 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4359 {
4360 mblk_t *ackmp;
4361
4362 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4363 M_PCPROTO, T_INFO_ACK);
4364 if (ackmp == NULL) {
4365 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4366 "tl_info_req: reallocb failed"));
4367 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4368 return;
4369 }
4370
4371 /*
4372 * fill in T_INFO_ACK contents
4373 */
4374 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4375
4376 /*
4377 * send ack message
4378 */
4379 putnext(tep->te_rq, ackmp);
4380 }
4381
4382 /*
4383 * Handle M_DATA, T_data_req and T_optdata_req.
4384 * If this is a socket pass through T_optdata_req options unmodified.
4385 */
4386 static void
tl_data(mblk_t * mp,tl_endpt_t * tep)4387 tl_data(mblk_t *mp, tl_endpt_t *tep)
4388 {
4389 queue_t *wq = tep->te_wq;
4390 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4391 ssize_t msz = MBLKL(mp);
4392 tl_endpt_t *peer_tep;
4393 queue_t *peer_rq;
4394 boolean_t closing = tep->te_closing;
4395
4396 if (IS_CLTS(tep)) {
4397 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4398 SL_TRACE|SL_ERROR,
4399 "tl_wput:clts:unattached M_DATA"));
4400 if (!closing) {
4401 tl_merror(wq, mp, EPROTO);
4402 } else {
4403 freemsg(mp);
4404 }
4405 return;
4406 }
4407
4408 /*
4409 * If the endpoint is closing it should still forward any data to the
4410 * peer (if it has one). If it is not allowed to forward it can just
4411 * free the message.
4412 */
4413 if (closing &&
4414 (tep->te_state != TS_DATA_XFER) &&
4415 (tep->te_state != TS_WREQ_ORDREL)) {
4416 freemsg(mp);
4417 return;
4418 }
4419
4420 if (DB_TYPE(mp) == M_PROTO) {
4421 if (prim->type == T_DATA_REQ &&
4422 msz < sizeof (struct T_data_req)) {
4423 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4424 SL_TRACE|SL_ERROR,
4425 "tl_data:T_DATA_REQ:invalid message"));
4426 if (!closing) {
4427 tl_merror(wq, mp, EPROTO);
4428 } else {
4429 freemsg(mp);
4430 }
4431 return;
4432 } else if (prim->type == T_OPTDATA_REQ &&
4433 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4434 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4435 SL_TRACE|SL_ERROR,
4436 "tl_data:T_OPTDATA_REQ:invalid message"));
4437 if (!closing) {
4438 tl_merror(wq, mp, EPROTO);
4439 } else {
4440 freemsg(mp);
4441 }
4442 return;
4443 }
4444 }
4445
4446 /*
4447 * connection oriented provider
4448 */
4449 switch (tep->te_state) {
4450 case TS_IDLE:
4451 /*
4452 * Other end not here - do nothing.
4453 */
4454 freemsg(mp);
4455 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4456 "tl_data:cots with endpoint idle"));
4457 return;
4458
4459 case TS_DATA_XFER:
4460 /* valid states */
4461 if (tep->te_conp != NULL)
4462 break;
4463
4464 if (tep->te_oconp == NULL) {
4465 if (!closing) {
4466 tl_merror(wq, mp, EPROTO);
4467 } else {
4468 freemsg(mp);
4469 }
4470 return;
4471 }
4472 /*
4473 * For a socket the T_CONN_CON is sent early thus
4474 * the peer might not yet have accepted the connection.
4475 * If we are closing queue the packet with the T_CONN_IND.
4476 * Otherwise defer processing the packet until the peer
4477 * accepts the connection.
4478 * Note that the queue is noenabled when we go into this
4479 * state.
4480 */
4481 if (!closing) {
4482 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4483 SL_TRACE|SL_ERROR,
4484 "tl_data: ocon"));
4485 TL_PUTBQ(tep, mp);
4486 return;
4487 }
4488 if (DB_TYPE(mp) == M_PROTO) {
4489 if (msz < sizeof (t_scalar_t)) {
4490 freemsg(mp);
4491 return;
4492 }
4493 /* reuse message block - just change REQ to IND */
4494 if (prim->type == T_DATA_REQ)
4495 prim->type = T_DATA_IND;
4496 else
4497 prim->type = T_OPTDATA_IND;
4498 }
4499 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4500 return;
4501
4502 case TS_WREQ_ORDREL:
4503 if (tep->te_conp == NULL) {
4504 /*
4505 * Other end closed - generate discon_ind
4506 * with reason 0 to cause an EPIPE but no
4507 * read side error on AF_UNIX sockets.
4508 */
4509 freemsg(mp);
4510 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4511 SL_TRACE|SL_ERROR,
4512 "tl_data: WREQ_ORDREL and no peer"));
4513 tl_discon_ind(tep, 0);
4514 return;
4515 }
4516 break;
4517
4518 default:
4519 /* invalid state for event TE_DATA_REQ */
4520 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4521 "tl_data:cots:out of state"));
4522 tl_merror(wq, mp, EPROTO);
4523 return;
4524 }
4525 /*
4526 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4527 * (State stays same on this event)
4528 */
4529
4530 /*
4531 * get connected endpoint
4532 */
4533 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4534 freemsg(mp);
4535 /* Peer closed */
4536 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4537 "tl_data: peer gone"));
4538 return;
4539 }
4540
4541 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4542 peer_rq = peer_tep->te_rq;
4543
4544 /*
4545 * Put it back if flow controlled
4546 * Note: Messages already on queue when we are closing is bounded
4547 * so we can ignore flow control.
4548 */
4549 if (!canputnext(peer_rq) && !closing) {
4550 TL_PUTBQ(tep, mp);
4551 return;
4552 }
4553
4554 /*
4555 * validate peer state
4556 */
4557 switch (peer_tep->te_state) {
4558 case TS_DATA_XFER:
4559 case TS_WIND_ORDREL:
4560 /* valid states */
4561 break;
4562 default:
4563 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4564 "tl_data:rx side:invalid state"));
4565 tl_merror(peer_tep->te_wq, mp, EPROTO);
4566 return;
4567 }
4568 if (DB_TYPE(mp) == M_PROTO) {
4569 /* reuse message block - just change REQ to IND */
4570 if (prim->type == T_DATA_REQ)
4571 prim->type = T_DATA_IND;
4572 else
4573 prim->type = T_OPTDATA_IND;
4574 }
4575 /*
4576 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4577 * (peer state stays same on this event)
4578 */
4579 /*
4580 * send data to connected peer
4581 */
4582 putnext(peer_rq, mp);
4583 }
4584
4585
4586
4587 static void
tl_exdata(mblk_t * mp,tl_endpt_t * tep)4588 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4589 {
4590 queue_t *wq = tep->te_wq;
4591 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4592 ssize_t msz = MBLKL(mp);
4593 tl_endpt_t *peer_tep;
4594 queue_t *peer_rq;
4595 boolean_t closing = tep->te_closing;
4596
4597 if (msz < sizeof (struct T_exdata_req)) {
4598 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4599 "tl_exdata:invalid message"));
4600 if (!closing) {
4601 tl_merror(wq, mp, EPROTO);
4602 } else {
4603 freemsg(mp);
4604 }
4605 return;
4606 }
4607
4608 /*
4609 * If the endpoint is closing it should still forward any data to the
4610 * peer (if it has one). If it is not allowed to forward it can just
4611 * free the message.
4612 */
4613 if (closing &&
4614 (tep->te_state != TS_DATA_XFER) &&
4615 (tep->te_state != TS_WREQ_ORDREL)) {
4616 freemsg(mp);
4617 return;
4618 }
4619
4620 /*
4621 * validate state
4622 */
4623 switch (tep->te_state) {
4624 case TS_IDLE:
4625 /*
4626 * Other end not here - do nothing.
4627 */
4628 freemsg(mp);
4629 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4630 "tl_exdata:cots with endpoint idle"));
4631 return;
4632
4633 case TS_DATA_XFER:
4634 /* valid states */
4635 if (tep->te_conp != NULL)
4636 break;
4637
4638 if (tep->te_oconp == NULL) {
4639 if (!closing) {
4640 tl_merror(wq, mp, EPROTO);
4641 } else {
4642 freemsg(mp);
4643 }
4644 return;
4645 }
4646 /*
4647 * For a socket the T_CONN_CON is sent early thus
4648 * the peer might not yet have accepted the connection.
4649 * If we are closing queue the packet with the T_CONN_IND.
4650 * Otherwise defer processing the packet until the peer
4651 * accepts the connection.
4652 * Note that the queue is noenabled when we go into this
4653 * state.
4654 */
4655 if (!closing) {
4656 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4657 SL_TRACE|SL_ERROR,
4658 "tl_exdata: ocon"));
4659 TL_PUTBQ(tep, mp);
4660 return;
4661 }
4662 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4663 "tl_exdata: closing socket ocon"));
4664 prim->type = T_EXDATA_IND;
4665 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4666 return;
4667
4668 case TS_WREQ_ORDREL:
4669 if (tep->te_conp == NULL) {
4670 /*
4671 * Other end closed - generate discon_ind
4672 * with reason 0 to cause an EPIPE but no
4673 * read side error on AF_UNIX sockets.
4674 */
4675 freemsg(mp);
4676 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4677 SL_TRACE|SL_ERROR,
4678 "tl_exdata: WREQ_ORDREL and no peer"));
4679 tl_discon_ind(tep, 0);
4680 return;
4681 }
4682 break;
4683
4684 default:
4685 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4686 SL_TRACE|SL_ERROR,
4687 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4688 tep->te_state));
4689 tl_merror(wq, mp, EPROTO);
4690 return;
4691 }
4692 /*
4693 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4694 * (state stays same on this event)
4695 */
4696
4697 /*
4698 * get connected endpoint
4699 */
4700 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4701 freemsg(mp);
4702 /* Peer closed */
4703 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4704 "tl_exdata: peer gone"));
4705 return;
4706 }
4707
4708 peer_rq = peer_tep->te_rq;
4709
4710 /*
4711 * Put it back if flow controlled
4712 * Note: Messages already on queue when we are closing is bounded
4713 * so we can ignore flow control.
4714 */
4715 if (!canputnext(peer_rq) && !closing) {
4716 TL_PUTBQ(tep, mp);
4717 return;
4718 }
4719
4720 /*
4721 * validate state on peer
4722 */
4723 switch (peer_tep->te_state) {
4724 case TS_DATA_XFER:
4725 case TS_WIND_ORDREL:
4726 /* valid states */
4727 break;
4728 default:
4729 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4730 "tl_exdata:rx side:invalid state"));
4731 tl_merror(peer_tep->te_wq, mp, EPROTO);
4732 return;
4733 }
4734 /*
4735 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4736 * (peer state stays same on this event)
4737 */
4738 /*
4739 * reuse message block
4740 */
4741 prim->type = T_EXDATA_IND;
4742
4743 /*
4744 * send data to connected peer
4745 */
4746 putnext(peer_rq, mp);
4747 }
4748
4749
4750
4751 static void
tl_ordrel(mblk_t * mp,tl_endpt_t * tep)4752 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4753 {
4754 queue_t *wq = tep->te_wq;
4755 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4756 ssize_t msz = MBLKL(mp);
4757 tl_endpt_t *peer_tep;
4758 queue_t *peer_rq;
4759 boolean_t closing = tep->te_closing;
4760
4761 if (msz < sizeof (struct T_ordrel_req)) {
4762 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4763 "tl_ordrel:invalid message"));
4764 if (!closing) {
4765 tl_merror(wq, mp, EPROTO);
4766 } else {
4767 freemsg(mp);
4768 }
4769 return;
4770 }
4771
4772 /*
4773 * validate state
4774 */
4775 switch (tep->te_state) {
4776 case TS_DATA_XFER:
4777 case TS_WREQ_ORDREL:
4778 /* valid states */
4779 if (tep->te_conp != NULL)
4780 break;
4781
4782 if (tep->te_oconp == NULL)
4783 break;
4784
4785 /*
4786 * For a socket the T_CONN_CON is sent early thus
4787 * the peer might not yet have accepted the connection.
4788 * If we are closing queue the packet with the T_CONN_IND.
4789 * Otherwise defer processing the packet until the peer
4790 * accepts the connection.
4791 * Note that the queue is noenabled when we go into this
4792 * state.
4793 */
4794 if (!closing) {
4795 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4796 SL_TRACE|SL_ERROR,
4797 "tl_ordlrel: ocon"));
4798 TL_PUTBQ(tep, mp);
4799 return;
4800 }
4801 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4802 "tl_ordlrel: closing socket ocon"));
4803 prim->type = T_ORDREL_IND;
4804 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4805 return;
4806
4807 default:
4808 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4809 SL_TRACE|SL_ERROR,
4810 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4811 tep->te_state));
4812 if (!closing) {
4813 tl_merror(wq, mp, EPROTO);
4814 } else {
4815 freemsg(mp);
4816 }
4817 return;
4818 }
4819 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4820
4821 /*
4822 * get connected endpoint
4823 */
4824 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4825 /* Peer closed */
4826 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4827 "tl_ordrel: peer gone"));
4828 freemsg(mp);
4829 return;
4830 }
4831
4832 peer_rq = peer_tep->te_rq;
4833
4834 /*
4835 * Put it back if flow controlled except when we are closing.
4836 * Note: Messages already on queue when we are closing is bounded
4837 * so we can ignore flow control.
4838 */
4839 if (! canputnext(peer_rq) && !closing) {
4840 TL_PUTBQ(tep, mp);
4841 return;
4842 }
4843
4844 /*
4845 * validate state on peer
4846 */
4847 switch (peer_tep->te_state) {
4848 case TS_DATA_XFER:
4849 case TS_WIND_ORDREL:
4850 /* valid states */
4851 break;
4852 default:
4853 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4854 "tl_ordrel:rx side:invalid state"));
4855 tl_merror(peer_tep->te_wq, mp, EPROTO);
4856 return;
4857 }
4858 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4859
4860 /*
4861 * reuse message block
4862 */
4863 prim->type = T_ORDREL_IND;
4864 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4865 "tl_ordrel: send ordrel_ind"));
4866
4867 /*
4868 * send data to connected peer
4869 */
4870 putnext(peer_rq, mp);
4871 }
4872
4873
4874 /*
4875 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4876 */
4877 static void
tl_uderr(queue_t * wq,mblk_t * mp,t_scalar_t err)4878 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4879 {
4880 size_t err_sz;
4881 tl_endpt_t *tep;
4882 struct T_unitdata_req *udreq;
4883 mblk_t *err_mp;
4884 t_scalar_t alen;
4885 t_scalar_t olen;
4886 struct T_uderror_ind *uderr;
4887 uchar_t *addr_startp;
4888
4889 err_sz = sizeof (struct T_uderror_ind);
4890 tep = (tl_endpt_t *)wq->q_ptr;
4891 udreq = (struct T_unitdata_req *)mp->b_rptr;
4892 alen = udreq->DEST_length;
4893 olen = udreq->OPT_length;
4894
4895 if (alen > 0)
4896 err_sz = T_ALIGN(err_sz + alen);
4897 if (olen > 0)
4898 err_sz += olen;
4899
4900 err_mp = allocb(err_sz, BPRI_MED);
4901 if (! err_mp) {
4902 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4903 "tl_uderr:allocb failure"));
4904 /*
4905 * Note: no rollback of state needed as it does
4906 * not change in connectionless transport
4907 */
4908 tl_memrecover(wq, mp, err_sz);
4909 return;
4910 }
4911
4912 DB_TYPE(err_mp) = M_PROTO;
4913 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4914 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4915 uderr->PRIM_type = T_UDERROR_IND;
4916 uderr->ERROR_type = err;
4917 uderr->DEST_length = alen;
4918 uderr->OPT_length = olen;
4919 if (alen <= 0) {
4920 uderr->DEST_offset = 0;
4921 } else {
4922 uderr->DEST_offset =
4923 (t_scalar_t)sizeof (struct T_uderror_ind);
4924 addr_startp = mp->b_rptr + udreq->DEST_offset;
4925 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4926 (size_t)alen);
4927 }
4928 if (olen <= 0) {
4929 uderr->OPT_offset = 0;
4930 } else {
4931 uderr->OPT_offset =
4932 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4933 uderr->DEST_length);
4934 addr_startp = mp->b_rptr + udreq->OPT_offset;
4935 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4936 (size_t)olen);
4937 }
4938 freemsg(mp);
4939
4940 /*
4941 * send indication message
4942 */
4943 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4944
4945 qreply(wq, err_mp);
4946 }
4947
4948 static void
tl_unitdata_ser(mblk_t * mp,tl_endpt_t * tep)4949 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4950 {
4951 queue_t *wq = tep->te_wq;
4952
4953 if (!tep->te_closing && (wq->q_first != NULL)) {
4954 TL_PUTQ(tep, mp);
4955 } else if (tep->te_rq != NULL)
4956 tl_unitdata(mp, tep);
4957 else
4958 freemsg(mp);
4959
4960 tl_serializer_exit(tep);
4961 tl_refrele(tep);
4962 }
4963
4964 /*
4965 * Handle T_unitdata_req.
4966 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4967 * If this is a socket pass through options unmodified.
4968 */
4969 static void
tl_unitdata(mblk_t * mp,tl_endpt_t * tep)4970 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4971 {
4972 queue_t *wq = tep->te_wq;
4973 soux_addr_t ux_addr;
4974 tl_addr_t destaddr;
4975 uchar_t *addr_startp;
4976 tl_endpt_t *peer_tep;
4977 struct T_unitdata_ind *udind;
4978 struct T_unitdata_req *udreq;
4979 ssize_t msz, ui_sz;
4980 t_scalar_t alen, aoff, olen, ooff;
4981 t_scalar_t oldolen = 0;
4982 cred_t *cr = NULL;
4983 pid_t cpid;
4984
4985 udreq = (struct T_unitdata_req *)mp->b_rptr;
4986 msz = MBLKL(mp);
4987
4988 /*
4989 * validate the state
4990 */
4991 if (tep->te_state != TS_IDLE) {
4992 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4993 SL_TRACE|SL_ERROR,
4994 "tl_wput:T_CONN_REQ:out of state"));
4995 tl_merror(wq, mp, EPROTO);
4996 return;
4997 }
4998 /*
4999 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5000 * (state does not change on this event)
5001 */
5002
5003 /*
5004 * validate the message
5005 * Note: dereference fields in struct inside message only
5006 * after validating the message length.
5007 */
5008 if (msz < sizeof (struct T_unitdata_req)) {
5009 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5010 "tl_unitdata:invalid message length"));
5011 tl_merror(wq, mp, EINVAL);
5012 return;
5013 }
5014 alen = udreq->DEST_length;
5015 aoff = udreq->DEST_offset;
5016 oldolen = olen = udreq->OPT_length;
5017 ooff = udreq->OPT_offset;
5018 if (olen == 0)
5019 ooff = 0;
5020
5021 if (IS_SOCKET(tep)) {
5022 if ((alen != TL_SOUX_ADDRLEN) ||
5023 (aoff < 0) ||
5024 (aoff + alen > msz) ||
5025 (olen < 0) || (ooff < 0) ||
5026 ((olen > 0) && ((ooff + olen) > msz))) {
5027 (void) (STRLOG(TL_ID, tep->te_minor,
5028 1, SL_TRACE|SL_ERROR,
5029 "tl_unitdata_req: invalid socket addr "
5030 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5031 (int)msz, alen, aoff, olen, ooff));
5032 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5033 return;
5034 }
5035 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5036
5037 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5038 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5039 (void) (STRLOG(TL_ID, tep->te_minor,
5040 1, SL_TRACE|SL_ERROR,
5041 "tl_conn_req: invalid socket magic"));
5042 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5043 return;
5044 }
5045 } else {
5046 if ((alen < 0) ||
5047 (aoff < 0) ||
5048 ((alen > 0) && ((aoff + alen) > msz)) ||
5049 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5050 ((aoff + alen) < 0) ||
5051 ((olen > 0) && ((ooff + olen) > msz)) ||
5052 (olen < 0) ||
5053 (ooff < 0) ||
5054 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5055 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5056 SL_TRACE|SL_ERROR,
5057 "tl_unitdata:invalid unit data message"));
5058 tl_merror(wq, mp, EINVAL);
5059 return;
5060 }
5061 }
5062
5063 /* Options not supported unless it's a socket */
5064 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5065 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5066 "tl_unitdata:option use(unsupported) or zero len addr"));
5067 tl_uderr(wq, mp, EPROTO);
5068 return;
5069 }
5070 #ifdef DEBUG
5071 /*
5072 * Mild form of ASSERT()ion to detect broken TPI apps.
5073 * if (! assertion)
5074 * log warning;
5075 */
5076 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5077 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5078 "tl_unitdata:addr overlaps TPI message"));
5079 }
5080 #endif
5081 /*
5082 * get destination endpoint
5083 */
5084 destaddr.ta_alen = alen;
5085 destaddr.ta_abuf = mp->b_rptr + aoff;
5086 destaddr.ta_zoneid = tep->te_zoneid;
5087
5088 /*
5089 * Check whether the destination is the same that was used previously
5090 * and the destination endpoint is in the right state. If something is
5091 * wrong, find destination again and cache it.
5092 */
5093 peer_tep = tep->te_lastep;
5094
5095 if ((peer_tep == NULL) || peer_tep->te_closing ||
5096 (peer_tep->te_state != TS_IDLE) ||
5097 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5098 /*
5099 * Not the same as cached destination , need to find the right
5100 * destination.
5101 */
5102 peer_tep = (IS_SOCKET(tep) ?
5103 tl_sock_find_peer(tep, &ux_addr) :
5104 tl_find_peer(tep, &destaddr));
5105
5106 if (peer_tep == NULL) {
5107 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5108 SL_TRACE|SL_ERROR,
5109 "tl_unitdata:no one at destination address"));
5110 tl_uderr(wq, mp, ECONNRESET);
5111 return;
5112 }
5113
5114 /*
5115 * Cache the new peer.
5116 */
5117 if (tep->te_lastep != NULL)
5118 tl_refrele(tep->te_lastep);
5119
5120 tep->te_lastep = peer_tep;
5121 }
5122
5123 if (peer_tep->te_state != TS_IDLE) {
5124 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5125 "tl_unitdata:provider in invalid state"));
5126 tl_uderr(wq, mp, EPROTO);
5127 return;
5128 }
5129
5130 ASSERT(peer_tep->te_rq != NULL);
5131
5132 /*
5133 * Put it back if flow controlled except when we are closing.
5134 * Note: Messages already on queue when we are closing is bounded
5135 * so we can ignore flow control.
5136 */
5137 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5138 /* record what we are flow controlled on */
5139 if (tep->te_flowq != NULL) {
5140 list_remove(&tep->te_flowq->te_flowlist, tep);
5141 }
5142 list_insert_head(&peer_tep->te_flowlist, tep);
5143 tep->te_flowq = peer_tep;
5144 TL_PUTBQ(tep, mp);
5145 return;
5146 }
5147 /*
5148 * prepare indication message
5149 */
5150
5151 /*
5152 * calculate length of message
5153 */
5154 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5155 cr = msg_getcred(mp, &cpid);
5156 ASSERT(cr != NULL);
5157
5158 if (peer_tep->te_flag & TL_SETCRED) {
5159 ASSERT(olen == 0);
5160 olen = (t_scalar_t)sizeof (struct opthdr) +
5161 OPTLEN(sizeof (tl_credopt_t));
5162 /* 1 option only */
5163 } else if (peer_tep->te_flag & TL_SETUCRED) {
5164 ASSERT(olen == 0);
5165 olen = (t_scalar_t)sizeof (struct opthdr) +
5166 OPTLEN(ucredminsize(cr));
5167 /* 1 option only */
5168 } else {
5169 /* Possibly more than one option */
5170 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5171 OPTLEN(ucredminsize(cr));
5172 }
5173 }
5174
5175 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5176 olen;
5177 /*
5178 * If the unitdata_ind fits and we are not adding options
5179 * reuse the udreq mblk.
5180 */
5181 if (msz >= ui_sz && alen >= tep->te_alen &&
5182 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5183 /*
5184 * Reuse the original mblk. Leave options in place.
5185 */
5186 udind = (struct T_unitdata_ind *)mp->b_rptr;
5187 udind->PRIM_type = T_UNITDATA_IND;
5188 udind->SRC_length = tep->te_alen;
5189 addr_startp = mp->b_rptr + udind->SRC_offset;
5190 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5191 } else {
5192 /* Allocate a new T_unidata_ind message */
5193 mblk_t *ui_mp;
5194
5195 ui_mp = allocb(ui_sz, BPRI_MED);
5196 if (! ui_mp) {
5197 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5198 "tl_unitdata:allocb failure:message queued"));
5199 tl_memrecover(wq, mp, ui_sz);
5200 return;
5201 }
5202
5203 /*
5204 * fill in T_UNITDATA_IND contents
5205 */
5206 DB_TYPE(ui_mp) = M_PROTO;
5207 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5208 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5209 udind->PRIM_type = T_UNITDATA_IND;
5210 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5211 udind->SRC_length = tep->te_alen;
5212 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5213 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5214 udind->OPT_offset =
5215 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5216 udind->OPT_length = olen;
5217 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5218
5219 if (oldolen != 0) {
5220 bcopy((void *)((uintptr_t)udreq + ooff),
5221 (void *)((uintptr_t)udind +
5222 udind->OPT_offset),
5223 oldolen);
5224 }
5225 ASSERT(cr != NULL);
5226
5227 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5228 oldolen, cr, cpid,
5229 peer_tep->te_flag, peer_tep->te_credp);
5230 } else {
5231 bcopy((void *)((uintptr_t)udreq + ooff),
5232 (void *)((uintptr_t)udind + udind->OPT_offset),
5233 olen);
5234 }
5235
5236 /*
5237 * relink data blocks from mp to ui_mp
5238 */
5239 ui_mp->b_cont = mp->b_cont;
5240 freeb(mp);
5241 mp = ui_mp;
5242 }
5243 /*
5244 * send indication message
5245 */
5246 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5247 putnext(peer_tep->te_rq, mp);
5248 }
5249
5250
5251
5252 /*
5253 * Check if a given addr is in use.
5254 * Endpoint ptr returned or NULL if not found.
5255 * The name space is separate for each mode. This implies that
5256 * sockets get their own name space.
5257 */
5258 static tl_endpt_t *
tl_find_peer(tl_endpt_t * tep,tl_addr_t * ap)5259 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5260 {
5261 tl_endpt_t *peer_tep = NULL;
5262 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5263 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5264
5265 ASSERT(! IS_SOCKET(tep));
5266
5267 ASSERT(ap != NULL && ap->ta_alen > 0);
5268 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5269 ASSERT(ap->ta_abuf != NULL);
5270 EQUIV(rc == 0, peer_tep != NULL);
5271 IMPLY(rc == 0,
5272 (tep->te_zoneid == peer_tep->te_zoneid) &&
5273 (tep->te_transport == peer_tep->te_transport));
5274
5275 if ((rc == 0) && (peer_tep->te_closing)) {
5276 tl_refrele(peer_tep);
5277 peer_tep = NULL;
5278 }
5279
5280 return (peer_tep);
5281 }
5282
5283 /*
5284 * Find peer for a socket based on unix domain address.
5285 * For implicit addresses our peer can be found by minor number in ai hash. For
5286 * explicit binds we look vnode address at addr_hash.
5287 */
5288 static tl_endpt_t *
tl_sock_find_peer(tl_endpt_t * tep,soux_addr_t * ux_addr)5289 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5290 {
5291 tl_endpt_t *peer_tep = NULL;
5292 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5293 tep->te_aihash : tep->te_addrhash;
5294 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5295 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5296
5297 ASSERT(IS_SOCKET(tep));
5298 EQUIV(rc == 0, peer_tep != NULL);
5299 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5300
5301 if (peer_tep != NULL) {
5302 /* Don't attempt to use closing peer. */
5303 if (peer_tep->te_closing)
5304 goto errout;
5305
5306 /*
5307 * Cross-zone unix sockets are permitted, but for Trusted
5308 * Extensions only, the "server" for these must be in the
5309 * global zone.
5310 */
5311 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5312 is_system_labeled() &&
5313 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5314 goto errout;
5315 }
5316
5317 return (peer_tep);
5318
5319 errout:
5320 tl_refrele(peer_tep);
5321 return (NULL);
5322 }
5323
5324 /*
5325 * Generate a free addr and return it in struct pointed by ap
5326 * but allocating space for address buffer.
5327 * The generated address will be at least 4 bytes long and, if req->ta_alen
5328 * exceeds 4 bytes, be req->ta_alen bytes long.
5329 *
5330 * If address is found it will be inserted in the hash.
5331 *
5332 * If req->ta_alen is larger than the default alen (4 bytes) the last
5333 * alen-4 bytes will always be the same as in req.
5334 *
5335 * Return 0 for failure.
5336 * Return non-zero for success.
5337 */
5338 static boolean_t
tl_get_any_addr(tl_endpt_t * tep,tl_addr_t * req)5339 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5340 {
5341 t_scalar_t alen;
5342 uint32_t loopcnt; /* Limit loop to 2^32 */
5343
5344 ASSERT(tep->te_hash_hndl != NULL);
5345 ASSERT(! IS_SOCKET(tep));
5346
5347 if (tep->te_hash_hndl == NULL)
5348 return (B_FALSE);
5349
5350 /*
5351 * check if default addr is in use
5352 * if it is - bump it and try again
5353 */
5354 if (req == NULL) {
5355 alen = sizeof (uint32_t);
5356 } else {
5357 alen = max(req->ta_alen, sizeof (uint32_t));
5358 ASSERT(tep->te_zoneid == req->ta_zoneid);
5359 }
5360
5361 if (tep->te_alen < alen) {
5362 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5363
5364 /*
5365 * Not enough space in tep->ta_ap to hold the address,
5366 * allocate a bigger space.
5367 */
5368 if (abuf == NULL)
5369 return (B_FALSE);
5370
5371 if (tep->te_alen > 0)
5372 kmem_free(tep->te_abuf, tep->te_alen);
5373
5374 tep->te_alen = alen;
5375 tep->te_abuf = abuf;
5376 }
5377
5378 /* Copy in the address in req */
5379 if (req != NULL) {
5380 ASSERT(alen >= req->ta_alen);
5381 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5382 }
5383
5384 /*
5385 * First try minor number then try default addresses.
5386 */
5387 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5388
5389 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5390 if (mod_hash_insert_reserve(tep->te_addrhash,
5391 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5392 tep->te_hash_hndl) == 0) {
5393 /*
5394 * found free address
5395 */
5396 tep->te_flag |= TL_ADDRHASHED;
5397 tep->te_hash_hndl = NULL;
5398
5399 return (B_TRUE); /* successful return */
5400 }
5401 /*
5402 * Use default address.
5403 */
5404 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5405 atomic_add_32(&tep->te_defaddr, 1);
5406 }
5407
5408 /*
5409 * Failed to find anything.
5410 */
5411 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5412 "tl_get_any_addr:looped 2^32 times"));
5413 return (B_FALSE);
5414 }
5415
5416 /*
5417 * reallocb + set r/w ptrs to reflect size.
5418 */
5419 static mblk_t *
tl_resizemp(mblk_t * mp,ssize_t new_size)5420 tl_resizemp(mblk_t *mp, ssize_t new_size)
5421 {
5422 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5423 return (NULL);
5424
5425 mp->b_rptr = DB_BASE(mp);
5426 mp->b_wptr = mp->b_rptr + new_size;
5427 return (mp);
5428 }
5429
5430 static void
tl_cl_backenable(tl_endpt_t * tep)5431 tl_cl_backenable(tl_endpt_t *tep)
5432 {
5433 list_t *l = &tep->te_flowlist;
5434 tl_endpt_t *elp;
5435
5436 ASSERT(IS_CLTS(tep));
5437
5438 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5439 ASSERT(tep->te_ser == elp->te_ser);
5440 ASSERT(elp->te_flowq == tep);
5441 if (! elp->te_closing)
5442 TL_QENABLE(elp);
5443 elp->te_flowq = NULL;
5444 list_remove(l, elp);
5445 }
5446 }
5447
5448 /*
5449 * Unconnect endpoints.
5450 */
5451 static void
tl_co_unconnect(tl_endpt_t * tep)5452 tl_co_unconnect(tl_endpt_t *tep)
5453 {
5454 tl_endpt_t *peer_tep = tep->te_conp;
5455 tl_endpt_t *srv_tep = tep->te_oconp;
5456 list_t *l;
5457 tl_icon_t *tip;
5458 tl_endpt_t *cl_tep;
5459 mblk_t *d_mp;
5460
5461 ASSERT(IS_COTS(tep));
5462 /*
5463 * If our peer is closing, don't use it.
5464 */
5465 if ((peer_tep != NULL) && peer_tep->te_closing) {
5466 TL_UNCONNECT(tep->te_conp);
5467 peer_tep = NULL;
5468 }
5469 if ((srv_tep != NULL) && srv_tep->te_closing) {
5470 TL_UNCONNECT(tep->te_oconp);
5471 srv_tep = NULL;
5472 }
5473
5474 if (tep->te_nicon > 0) {
5475 l = &tep->te_iconp;
5476 /*
5477 * If incoming requests pending, change state
5478 * of clients on disconnect ind event and send
5479 * discon_ind pdu to modules above them
5480 * for server: all clients get disconnect
5481 */
5482
5483 while (tep->te_nicon > 0) {
5484 tip = list_head(l);
5485 cl_tep = tip->ti_tep;
5486
5487 if (cl_tep == NULL) {
5488 tl_freetip(tep, tip);
5489 continue;
5490 }
5491
5492 if (cl_tep->te_oconp != NULL) {
5493 ASSERT(cl_tep != cl_tep->te_oconp);
5494 TL_UNCONNECT(cl_tep->te_oconp);
5495 }
5496
5497 if (cl_tep->te_closing) {
5498 tl_freetip(tep, tip);
5499 continue;
5500 }
5501
5502 enableok(cl_tep->te_wq);
5503 TL_QENABLE(cl_tep);
5504 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5505 if (d_mp != NULL) {
5506 cl_tep->te_state = TS_IDLE;
5507 putnext(cl_tep->te_rq, d_mp);
5508 } else {
5509 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5510 SL_TRACE|SL_ERROR,
5511 "tl_co_unconnect:icmng: "
5512 "allocb failure"));
5513 }
5514 tl_freetip(tep, tip);
5515 }
5516 } else if (srv_tep != NULL) {
5517 /*
5518 * If outgoing request pending, change state
5519 * of server on discon ind event
5520 */
5521
5522 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5523 IS_COTSORD(srv_tep) &&
5524 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5525 /*
5526 * Queue ordrel_ind for server to be picked up
5527 * when the connection is accepted.
5528 */
5529 d_mp = tl_ordrel_ind_alloc();
5530 } else {
5531 /*
5532 * send discon_ind to server
5533 */
5534 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5535 }
5536 if (d_mp == NULL) {
5537 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5538 SL_TRACE|SL_ERROR,
5539 "tl_co_unconnect:outgoing:allocb failure"));
5540 TL_UNCONNECT(tep->te_oconp);
5541 goto discon_peer;
5542 }
5543
5544 /*
5545 * If this is a socket the T_DISCON_IND is queued with
5546 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5547 * from the list of pending connections.
5548 * Note that when te_oconp is set the peer better have
5549 * a t_connind_t for the client.
5550 */
5551 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5552 /*
5553 * Queue the disconnection message.
5554 */
5555 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5556 } else {
5557 tip = tl_icon_find(srv_tep, tep->te_seqno);
5558 if (tip == NULL) {
5559 freemsg(d_mp);
5560 } else {
5561 ASSERT(tep == tip->ti_tep);
5562 ASSERT(tep->te_ser == srv_tep->te_ser);
5563 /*
5564 * Delete tip from the server list.
5565 */
5566 if (srv_tep->te_nicon == 1) {
5567 srv_tep->te_state =
5568 NEXTSTATE(TE_DISCON_IND2,
5569 srv_tep->te_state);
5570 } else {
5571 srv_tep->te_state =
5572 NEXTSTATE(TE_DISCON_IND3,
5573 srv_tep->te_state);
5574 }
5575 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5576 T_DISCON_IND);
5577 putnext(srv_tep->te_rq, d_mp);
5578 tl_freetip(srv_tep, tip);
5579 }
5580 TL_UNCONNECT(tep->te_oconp);
5581 srv_tep = NULL;
5582 }
5583 } else if (peer_tep != NULL) {
5584 /*
5585 * unconnect existing connection
5586 * If connected, change state of peer on
5587 * discon ind event and send discon ind pdu
5588 * to module above it
5589 */
5590
5591 ASSERT(tep->te_ser == peer_tep->te_ser);
5592 if (IS_COTSORD(peer_tep) &&
5593 (peer_tep->te_state == TS_WIND_ORDREL ||
5594 peer_tep->te_state == TS_DATA_XFER)) {
5595 /*
5596 * send ordrel ind
5597 */
5598 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5599 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5600 peer_tep->te_state,
5601 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5602 d_mp = tl_ordrel_ind_alloc();
5603 if (! d_mp) {
5604 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5605 SL_TRACE|SL_ERROR,
5606 "tl_co_unconnect:connected:"
5607 "allocb failure"));
5608 /*
5609 * Continue with cleaning up peer as
5610 * this side may go away with the close
5611 */
5612 TL_QENABLE(peer_tep);
5613 goto discon_peer;
5614 }
5615 peer_tep->te_state =
5616 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5617
5618 putnext(peer_tep->te_rq, d_mp);
5619 /*
5620 * Handle flow control case. This will generate
5621 * a t_discon_ind message with reason 0 if there
5622 * is data queued on the write side.
5623 */
5624 TL_QENABLE(peer_tep);
5625 } else if (IS_COTSORD(peer_tep) &&
5626 peer_tep->te_state == TS_WREQ_ORDREL) {
5627 /*
5628 * Sent an ordrel_ind. We send a discon with
5629 * with error 0 to inform that the peer is gone.
5630 */
5631 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5632 SL_TRACE|SL_ERROR,
5633 "tl_co_unconnect: discon in state %d",
5634 tep->te_state));
5635 tl_discon_ind(peer_tep, 0);
5636 } else {
5637 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5638 SL_TRACE|SL_ERROR,
5639 "tl_co_unconnect: state %d", tep->te_state));
5640 tl_discon_ind(peer_tep, ECONNRESET);
5641 }
5642
5643 discon_peer:
5644 /*
5645 * Disconnect cross-pointers only for close
5646 */
5647 if (tep->te_closing) {
5648 peer_tep = tep->te_conp;
5649 TL_REMOVE_PEER(peer_tep->te_conp);
5650 TL_REMOVE_PEER(tep->te_conp);
5651 }
5652 }
5653 }
5654
5655 /*
5656 * Note: The following routine does not recover from allocb()
5657 * failures
5658 * The reason should be from the <sys/errno.h> space.
5659 */
5660 static void
tl_discon_ind(tl_endpt_t * tep,uint32_t reason)5661 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5662 {
5663 mblk_t *d_mp;
5664
5665 if (tep->te_closing)
5666 return;
5667
5668 /*
5669 * flush the queues.
5670 */
5671 flushq(tep->te_rq, FLUSHDATA);
5672 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5673
5674 /*
5675 * send discon ind
5676 */
5677 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5678 if (! d_mp) {
5679 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5680 "tl_discon_ind:allocb failure"));
5681 return;
5682 }
5683 tep->te_state = TS_IDLE;
5684 putnext(tep->te_rq, d_mp);
5685 }
5686
5687 /*
5688 * Note: The following routine does not recover from allocb()
5689 * failures
5690 * The reason should be from the <sys/errno.h> space.
5691 */
5692 static mblk_t *
tl_discon_ind_alloc(uint32_t reason,t_scalar_t seqnum)5693 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5694 {
5695 mblk_t *mp;
5696 struct T_discon_ind *tdi;
5697
5698 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5699 DB_TYPE(mp) = M_PROTO;
5700 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5701 tdi = (struct T_discon_ind *)mp->b_rptr;
5702 tdi->PRIM_type = T_DISCON_IND;
5703 tdi->DISCON_reason = reason;
5704 tdi->SEQ_number = seqnum;
5705 }
5706 return (mp);
5707 }
5708
5709
5710 /*
5711 * Note: The following routine does not recover from allocb()
5712 * failures
5713 */
5714 static mblk_t *
tl_ordrel_ind_alloc(void)5715 tl_ordrel_ind_alloc(void)
5716 {
5717 mblk_t *mp;
5718 struct T_ordrel_ind *toi;
5719
5720 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5721 DB_TYPE(mp) = M_PROTO;
5722 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5723 toi = (struct T_ordrel_ind *)mp->b_rptr;
5724 toi->PRIM_type = T_ORDREL_IND;
5725 }
5726 return (mp);
5727 }
5728
5729
5730 /*
5731 * Lookup the seqno in the list of queued connections.
5732 */
5733 static tl_icon_t *
tl_icon_find(tl_endpt_t * tep,t_scalar_t seqno)5734 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5735 {
5736 list_t *l = &tep->te_iconp;
5737 tl_icon_t *tip = list_head(l);
5738
5739 ASSERT(seqno != 0);
5740
5741 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5742 ;
5743
5744 return (tip);
5745 }
5746
5747 /*
5748 * Queue data for a given T_CONN_IND while verifying that redundant
5749 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5750 * Used when the originator of the connection closes.
5751 */
5752 static void
tl_icon_queuemsg(tl_endpt_t * tep,t_scalar_t seqno,mblk_t * nmp)5753 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5754 {
5755 tl_icon_t *tip;
5756 mblk_t **mpp, *mp;
5757 int prim, nprim;
5758
5759 if (nmp->b_datap->db_type == M_PROTO)
5760 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5761 else
5762 nprim = -1; /* M_DATA */
5763
5764 tip = tl_icon_find(tep, seqno);
5765 if (tip == NULL) {
5766 freemsg(nmp);
5767 return;
5768 }
5769
5770 ASSERT(tip->ti_seqno != 0);
5771 mpp = &tip->ti_mp;
5772 while (*mpp != NULL) {
5773 mp = *mpp;
5774
5775 if (mp->b_datap->db_type == M_PROTO)
5776 prim = ((union T_primitives *)mp->b_rptr)->type;
5777 else
5778 prim = -1; /* M_DATA */
5779
5780 /*
5781 * Allow nothing after a T_DISCON_IND
5782 */
5783 if (prim == T_DISCON_IND) {
5784 freemsg(nmp);
5785 return;
5786 }
5787 /*
5788 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5789 */
5790 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5791 freemsg(nmp);
5792 return;
5793 }
5794 mpp = &(mp->b_next);
5795 }
5796 *mpp = nmp;
5797 }
5798
5799 /*
5800 * Verify if a certain TPI primitive exists on the connind queue.
5801 * Use prim -1 for M_DATA.
5802 * Return non-zero if found.
5803 */
5804 static boolean_t
tl_icon_hasprim(tl_endpt_t * tep,t_scalar_t seqno,t_scalar_t prim)5805 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5806 {
5807 tl_icon_t *tip = tl_icon_find(tep, seqno);
5808 boolean_t found = B_FALSE;
5809
5810 if (tip != NULL) {
5811 mblk_t *mp;
5812 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5813 found = (DB_TYPE(mp) == M_PROTO &&
5814 ((union T_primitives *)mp->b_rptr)->type == prim);
5815 }
5816 }
5817 return (found);
5818 }
5819
5820 /*
5821 * Send the b_next mblk chain that has accumulated before the connection
5822 * was accepted. Perform the necessary state transitions.
5823 */
5824 static void
tl_icon_sendmsgs(tl_endpt_t * tep,mblk_t ** mpp)5825 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5826 {
5827 mblk_t *mp;
5828 union T_primitives *primp;
5829
5830 if (tep->te_closing) {
5831 tl_icon_freemsgs(mpp);
5832 return;
5833 }
5834
5835 ASSERT(tep->te_state == TS_DATA_XFER);
5836 ASSERT(tep->te_rq->q_first == NULL);
5837
5838 while ((mp = *mpp) != NULL) {
5839 *mpp = mp->b_next;
5840 mp->b_next = NULL;
5841
5842 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5843 switch (DB_TYPE(mp)) {
5844 default:
5845 freemsg(mp);
5846 break;
5847 case M_DATA:
5848 putnext(tep->te_rq, mp);
5849 break;
5850 case M_PROTO:
5851 primp = (union T_primitives *)mp->b_rptr;
5852 switch (primp->type) {
5853 case T_UNITDATA_IND:
5854 case T_DATA_IND:
5855 case T_OPTDATA_IND:
5856 case T_EXDATA_IND:
5857 putnext(tep->te_rq, mp);
5858 break;
5859 case T_ORDREL_IND:
5860 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5861 tep->te_state);
5862 putnext(tep->te_rq, mp);
5863 break;
5864 case T_DISCON_IND:
5865 tep->te_state = TS_IDLE;
5866 putnext(tep->te_rq, mp);
5867 break;
5868 default:
5869 #ifdef DEBUG
5870 cmn_err(CE_PANIC,
5871 "tl_icon_sendmsgs: unknown primitive");
5872 #endif /* DEBUG */
5873 freemsg(mp);
5874 break;
5875 }
5876 break;
5877 }
5878 }
5879 }
5880
5881 /*
5882 * Free the b_next mblk chain that has accumulated before the connection
5883 * was accepted.
5884 */
5885 static void
tl_icon_freemsgs(mblk_t ** mpp)5886 tl_icon_freemsgs(mblk_t **mpp)
5887 {
5888 mblk_t *mp;
5889
5890 while ((mp = *mpp) != NULL) {
5891 *mpp = mp->b_next;
5892 mp->b_next = NULL;
5893 freemsg(mp);
5894 }
5895 }
5896
5897 /*
5898 * Send M_ERROR
5899 * Note: assumes caller ensured enough space in mp or enough
5900 * memory available. Does not attempt recovery from allocb()
5901 * failures
5902 */
5903
5904 static void
tl_merror(queue_t * wq,mblk_t * mp,int error)5905 tl_merror(queue_t *wq, mblk_t *mp, int error)
5906 {
5907 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5908
5909 if (tep->te_closing) {
5910 freemsg(mp);
5911 return;
5912 }
5913
5914 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5915 SL_TRACE|SL_ERROR,
5916 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5917
5918 /*
5919 * flush all messages on queue. we are shutting
5920 * the stream down on fatal error
5921 */
5922 flushq(wq, FLUSHALL);
5923 if (IS_COTS(tep)) {
5924 /* connection oriented - unconnect endpoints */
5925 tl_co_unconnect(tep);
5926 }
5927 if (mp->b_cont) {
5928 freemsg(mp->b_cont);
5929 mp->b_cont = NULL;
5930 }
5931
5932 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5933 freemsg(mp);
5934 mp = allocb(1, BPRI_HI);
5935 if (!mp) {
5936 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5937 SL_TRACE|SL_ERROR,
5938 "tl_merror:M_PROTO: out of memory"));
5939 return;
5940 }
5941 }
5942 if (mp) {
5943 DB_TYPE(mp) = M_ERROR;
5944 mp->b_rptr = DB_BASE(mp);
5945 *mp->b_rptr = (char)error;
5946 mp->b_wptr = mp->b_rptr + sizeof (char);
5947 qreply(wq, mp);
5948 } else {
5949 (void) putnextctl1(tep->te_rq, M_ERROR, error);
5950 }
5951 }
5952
5953 static void
tl_fill_option(uchar_t * buf,cred_t * cr,pid_t cpid,int flag,cred_t * pcr)5954 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5955 {
5956 ASSERT(cr != NULL);
5957
5958 if (flag & TL_SETCRED) {
5959 struct opthdr *opt = (struct opthdr *)buf;
5960 tl_credopt_t *tlcred;
5961
5962 opt->level = TL_PROT_LEVEL;
5963 opt->name = TL_OPT_PEER_CRED;
5964 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5965
5966 tlcred = (tl_credopt_t *)(opt + 1);
5967 tlcred->tc_uid = crgetuid(cr);
5968 tlcred->tc_gid = crgetgid(cr);
5969 tlcred->tc_ruid = crgetruid(cr);
5970 tlcred->tc_rgid = crgetrgid(cr);
5971 tlcred->tc_suid = crgetsuid(cr);
5972 tlcred->tc_sgid = crgetsgid(cr);
5973 tlcred->tc_ngroups = crgetngroups(cr);
5974 } else if (flag & TL_SETUCRED) {
5975 struct opthdr *opt = (struct opthdr *)buf;
5976
5977 opt->level = TL_PROT_LEVEL;
5978 opt->name = TL_OPT_PEER_UCRED;
5979 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5980
5981 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
5982 } else {
5983 struct T_opthdr *topt = (struct T_opthdr *)buf;
5984 ASSERT(flag & TL_SOCKUCRED);
5985
5986 topt->level = SOL_SOCKET;
5987 topt->name = SCM_UCRED;
5988 topt->len = ucredminsize(cr) + sizeof (*topt);
5989 topt->status = 0;
5990 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
5991 }
5992 }
5993
5994 /* ARGSUSED */
5995 static int
tl_default_opt(queue_t * wq,int level,int name,uchar_t * ptr)5996 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
5997 {
5998 /* no default value processed in protocol specific code currently */
5999 return (-1);
6000 }
6001
6002 /* ARGSUSED */
6003 static int
tl_get_opt(queue_t * wq,int level,int name,uchar_t * ptr)6004 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6005 {
6006 int len;
6007 tl_endpt_t *tep;
6008 int *valp;
6009
6010 tep = (tl_endpt_t *)wq->q_ptr;
6011
6012 len = 0;
6013
6014 /*
6015 * Assumes: option level and name sanity check done elsewhere
6016 */
6017
6018 switch (level) {
6019 case SOL_SOCKET:
6020 if (! IS_SOCKET(tep))
6021 break;
6022 switch (name) {
6023 case SO_RECVUCRED:
6024 len = sizeof (int);
6025 valp = (int *)ptr;
6026 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6027 break;
6028 default:
6029 break;
6030 }
6031 break;
6032 case TL_PROT_LEVEL:
6033 switch (name) {
6034 case TL_OPT_PEER_CRED:
6035 case TL_OPT_PEER_UCRED:
6036 /*
6037 * option not supposed to retrieved directly
6038 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6039 * when some internal flags set by other options
6040 * Direct retrieval always designed to fail(ignored)
6041 * for this option.
6042 */
6043 break;
6044 }
6045 }
6046 return (len);
6047 }
6048
6049 /* ARGSUSED */
6050 static int
tl_set_opt(queue_t * wq,uint_t mgmt_flags,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)6051 tl_set_opt(
6052 queue_t *wq,
6053 uint_t mgmt_flags,
6054 int level,
6055 int name,
6056 uint_t inlen,
6057 uchar_t *invalp,
6058 uint_t *outlenp,
6059 uchar_t *outvalp,
6060 void *thisdg_attrs,
6061 cred_t *cr)
6062 {
6063 int error;
6064 tl_endpt_t *tep;
6065
6066 tep = (tl_endpt_t *)wq->q_ptr;
6067
6068 error = 0; /* NOERROR */
6069
6070 /*
6071 * Assumes: option level and name sanity checks done elsewhere
6072 */
6073
6074 switch (level) {
6075 case SOL_SOCKET:
6076 if (! IS_SOCKET(tep)) {
6077 error = EINVAL;
6078 break;
6079 }
6080 /*
6081 * TBD: fill in other AF_UNIX socket options and then stop
6082 * returning error.
6083 */
6084 switch (name) {
6085 case SO_RECVUCRED:
6086 /*
6087 * We only support this for datagram sockets;
6088 * getpeerucred handles the connection oriented
6089 * transports.
6090 */
6091 if (! IS_CLTS(tep)) {
6092 error = EINVAL;
6093 break;
6094 }
6095 if (*(int *)invalp == 0)
6096 tep->te_flag &= ~TL_SOCKUCRED;
6097 else
6098 tep->te_flag |= TL_SOCKUCRED;
6099 break;
6100 default:
6101 error = EINVAL;
6102 break;
6103 }
6104 break;
6105 case TL_PROT_LEVEL:
6106 switch (name) {
6107 case TL_OPT_PEER_CRED:
6108 case TL_OPT_PEER_UCRED:
6109 /*
6110 * option not supposed to be set directly
6111 * Its value in initialized for each endpoint at
6112 * driver open time.
6113 * Direct setting always designed to fail for this
6114 * option.
6115 */
6116 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6117 SL_TRACE|SL_ERROR,
6118 "tl_set_opt: option is not supported"));
6119 error = EPROTO;
6120 break;
6121 }
6122 }
6123 return (error);
6124 }
6125
6126
6127 static void
tl_timer(void * arg)6128 tl_timer(void *arg)
6129 {
6130 queue_t *wq = arg;
6131 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6132
6133 ASSERT(tep);
6134
6135 tep->te_timoutid = 0;
6136
6137 enableok(wq);
6138 /*
6139 * Note: can call wsrv directly here and save context switch
6140 * Consider change when qtimeout (not timeout) is active
6141 */
6142 qenable(wq);
6143 }
6144
6145 static void
tl_buffer(void * arg)6146 tl_buffer(void *arg)
6147 {
6148 queue_t *wq = arg;
6149 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6150
6151 ASSERT(tep);
6152
6153 tep->te_bufcid = 0;
6154 tep->te_nowsrv = B_FALSE;
6155
6156 enableok(wq);
6157 /*
6158 * Note: can call wsrv directly here and save context switch
6159 * Consider change when qbufcall (not bufcall) is active
6160 */
6161 qenable(wq);
6162 }
6163
6164 static void
tl_memrecover(queue_t * wq,mblk_t * mp,size_t size)6165 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6166 {
6167 tl_endpt_t *tep;
6168
6169 tep = (tl_endpt_t *)wq->q_ptr;
6170
6171 if (tep->te_closing) {
6172 freemsg(mp);
6173 return;
6174 }
6175 noenable(wq);
6176
6177 (void) insq(wq, wq->q_first, mp);
6178
6179 if (tep->te_bufcid || tep->te_timoutid) {
6180 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6181 "tl_memrecover:recover %p pending", (void *)wq));
6182 return;
6183 }
6184
6185 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6186 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6187 drv_usectohz(TL_BUFWAIT));
6188 }
6189 }
6190
6191 static void
tl_freetip(tl_endpt_t * tep,tl_icon_t * tip)6192 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6193 {
6194 ASSERT(tip->ti_seqno != 0);
6195
6196 if (tip->ti_mp != NULL) {
6197 tl_icon_freemsgs(&tip->ti_mp);
6198 tip->ti_mp = NULL;
6199 }
6200 if (tip->ti_tep != NULL) {
6201 tl_refrele(tip->ti_tep);
6202 tip->ti_tep = NULL;
6203 }
6204 list_remove(&tep->te_iconp, tip);
6205 kmem_free(tip, sizeof (tl_icon_t));
6206 tep->te_nicon--;
6207 }
6208
6209 /*
6210 * Remove address from address hash.
6211 */
6212 static void
tl_addr_unbind(tl_endpt_t * tep)6213 tl_addr_unbind(tl_endpt_t *tep)
6214 {
6215 tl_endpt_t *elp;
6216
6217 if (tep->te_flag & TL_ADDRHASHED) {
6218 if (IS_SOCKET(tep)) {
6219 (void) mod_hash_remove(tep->te_addrhash,
6220 (mod_hash_key_t)tep->te_vp,
6221 (mod_hash_val_t *)&elp);
6222 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6223 tep->te_magic = SOU_MAGIC_IMPLICIT;
6224 } else {
6225 (void) mod_hash_remove(tep->te_addrhash,
6226 (mod_hash_key_t)&tep->te_ap,
6227 (mod_hash_val_t *)&elp);
6228 (void) kmem_free(tep->te_abuf, tep->te_alen);
6229 tep->te_alen = -1;
6230 tep->te_abuf = NULL;
6231 }
6232 tep->te_flag &= ~TL_ADDRHASHED;
6233 }
6234 }
6235