1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 #include <sys/ksocket.h>
49 #include <sys/filio.h> /* FIONBIO */
50 #include <sys/iscsi_protocol.h>
51 #include <sys/idm/idm.h>
52 #include <sys/idm/idm_so.h>
53 #include <sys/idm/idm_text.h>
54
55 #define IN_PROGRESS_DELAY 1
56
57 /*
58 * in6addr_any is currently all zeroes, but use the macro in case this
59 * ever changes.
60 */
61 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
62
63 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
64 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
65 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
66
67 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
68 static void idm_so_conn_destroy_common(idm_conn_t *ic);
69 static void idm_so_conn_connect_common(idm_conn_t *ic);
70
71 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
72 boolean_t boot_conn);
73 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
74 static void idm_set_tgt_connect_options(ksocket_t so);
75 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
76
77 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
78 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
79 idm_buf_t *idb, uint32_t offset, uint32_t length);
80 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
81 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
82 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
83
84 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
85 uint32_t ro, uint32_t dlength);
86
87 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
88 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
89
90 static void idm_so_socket_set_nonblock(struct sonode *node);
91 static void idm_so_socket_set_block(struct sonode *node);
92
93 /*
94 * Transport ops prototypes
95 */
96 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
97 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
98 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
99 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
100 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
101 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
102 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
103 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
104 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
105 static void idm_so_notice_key_values(idm_conn_t *it,
106 nvlist_t *negotiated_nvl);
107 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
108 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
109 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
110 idm_transport_caps_t *caps);
111 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
112 static void idm_so_buf_free(idm_buf_t *idb);
113 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
114 static void idm_so_buf_teardown(idm_buf_t *idb);
115 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
116 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
117 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
118 static void idm_so_tgt_svc_offline(idm_svc_t *is);
119 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
120 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
121 static void idm_so_conn_disconnect(idm_conn_t *ic);
122 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
123 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
124 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
125
126 /*
127 * IDM Native Sockets transport operations
128 */
129 static
130 idm_transport_ops_t idm_so_transport_ops = {
131 idm_so_tx, /* it_tx_pdu */
132 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
133 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
134 idm_so_rx_datain, /* it_rx_datain */
135 idm_so_rx_rtt, /* it_rx_rtt */
136 idm_so_rx_dataout, /* it_rx_dataout */
137 NULL, /* it_alloc_conn_rsrc */
138 NULL, /* it_free_conn_rsrc */
139 NULL, /* it_tgt_enable_datamover */
140 NULL, /* it_ini_enable_datamover */
141 NULL, /* it_conn_terminate */
142 idm_so_free_task_rsrc, /* it_free_task_rsrc */
143 idm_so_negotiate_key_values, /* it_negotiate_key_values */
144 idm_so_notice_key_values, /* it_notice_key_values */
145 idm_so_conn_is_capable, /* it_conn_is_capable */
146 idm_so_buf_alloc, /* it_buf_alloc */
147 idm_so_buf_free, /* it_buf_free */
148 idm_so_buf_setup, /* it_buf_setup */
149 idm_so_buf_teardown, /* it_buf_teardown */
150 idm_so_tgt_svc_create, /* it_tgt_svc_create */
151 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
152 idm_so_tgt_svc_online, /* it_tgt_svc_online */
153 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
154 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
155 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
156 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
157 idm_so_ini_conn_create, /* it_ini_conn_create */
158 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
159 idm_so_ini_conn_connect, /* it_ini_conn_connect */
160 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
161 idm_so_declare_key_values /* it_declare_key_values */
162 };
163
164 kmutex_t idm_so_timed_socket_mutex;
165 /*
166 * idm_so_init()
167 * Sockets transport initialization
168 */
169 void
idm_so_init(idm_transport_t * it)170 idm_so_init(idm_transport_t *it)
171 {
172 /* Cache for IDM Data and R2T Transmit PDU's */
173 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
174 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
175 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
176
177 /* Cache for IDM Receive PDU's */
178 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
179 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
180 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
181
182 /* 128k buffer cache */
183 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
184 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
185
186 /* Set the sockets transport ops */
187 it->it_ops = &idm_so_transport_ops;
188
189 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
190
191 }
192
193 /*
194 * idm_so_fini()
195 * Sockets transport teardown
196 */
197 void
idm_so_fini(void)198 idm_so_fini(void)
199 {
200 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
201 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
202 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
203 mutex_destroy(&idm_so_timed_socket_mutex);
204 }
205
206 ksocket_t
idm_socreate(int domain,int type,int protocol)207 idm_socreate(int domain, int type, int protocol)
208 {
209 ksocket_t ks;
210
211 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
212 CRED())) {
213 return (ks);
214 } else {
215 return (NULL);
216 }
217 }
218
219 /*
220 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
221 * reception and transmission. The sonode still exists but its state
222 * gets modified to indicate it is no longer connected. Calls to
223 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
224 * regain control of a thread stuck in idm_sorecv.
225 */
226 void
idm_soshutdown(ksocket_t so)227 idm_soshutdown(ksocket_t so)
228 {
229 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
230 }
231
232 /*
233 * idm_sodestroy releases all resources associated with a socket previously
234 * created with idm_socreate. The socket must be shutdown using
235 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
236 * otherwise undefined behavior will result.
237 */
238 void
idm_sodestroy(ksocket_t ks)239 idm_sodestroy(ksocket_t ks)
240 {
241 (void) ksocket_close(ks, CRED());
242 }
243
244 /*
245 * Function to compare two addresses in sockaddr_storage format
246 */
247
248 int
idm_ss_compare(const struct sockaddr_storage * cmp_ss1,const struct sockaddr_storage * cmp_ss2,boolean_t v4_mapped_as_v4,boolean_t compare_ports)249 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
250 const struct sockaddr_storage *cmp_ss2,
251 boolean_t v4_mapped_as_v4,
252 boolean_t compare_ports)
253 {
254 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
255 const struct sockaddr_storage *ss1, *ss2;
256 struct in_addr *in1, *in2;
257 struct in6_addr *in61, *in62;
258 int i;
259
260 /*
261 * Normalize V4-mapped IPv6 addresses into V4 format if
262 * v4_mapped_as_v4 is B_TRUE.
263 */
264 ss1 = cmp_ss1;
265 ss2 = cmp_ss2;
266 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
267 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
268 if (IN6_IS_ADDR_V4MAPPED(in61)) {
269 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
270 mapped_v4_ss1.ss_family = AF_INET;
271 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
272 ((struct sockaddr_in *)ss1)->sin_port;
273 IN6_V4MAPPED_TO_INADDR(in61,
274 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
275 ss1 = &mapped_v4_ss1;
276 }
277 }
278 ss2 = cmp_ss2;
279 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
280 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
281 if (IN6_IS_ADDR_V4MAPPED(in62)) {
282 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
283 mapped_v4_ss2.ss_family = AF_INET;
284 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
285 ((struct sockaddr_in *)ss2)->sin_port;
286 IN6_V4MAPPED_TO_INADDR(in62,
287 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
288 ss2 = &mapped_v4_ss2;
289 }
290 }
291
292 /*
293 * Compare ports, then address family, then ip address
294 */
295 if (compare_ports &&
296 (((struct sockaddr_in *)ss1)->sin_port !=
297 ((struct sockaddr_in *)ss2)->sin_port)) {
298 if (((struct sockaddr_in *)ss1)->sin_port >
299 ((struct sockaddr_in *)ss2)->sin_port)
300 return (1);
301 else
302 return (-1);
303 }
304
305 /*
306 * ports are the same
307 */
308 if (ss1->ss_family != ss2->ss_family) {
309 if (ss1->ss_family == AF_INET)
310 return (1);
311 else
312 return (-1);
313 }
314
315 /*
316 * address families are the same
317 */
318 if (ss1->ss_family == AF_INET) {
319 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
320 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
321
322 if (in1->s_addr > in2->s_addr)
323 return (1);
324 else if (in1->s_addr < in2->s_addr)
325 return (-1);
326 else
327 return (0);
328 } else if (ss1->ss_family == AF_INET6) {
329 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
330 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
331
332 for (i = 0; i < 4; i++) {
333 if (in61->s6_addr32[i] > in62->s6_addr32[i])
334 return (1);
335 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
336 return (-1);
337 }
338 return (0);
339 }
340
341 return (1);
342 }
343
344 /*
345 * IP address filter functions to flag addresses that should not
346 * go out to initiators through discovery.
347 */
348 static boolean_t
idm_v4_addr_okay(struct in_addr * in_addr)349 idm_v4_addr_okay(struct in_addr *in_addr)
350 {
351 in_addr_t addr = ntohl(in_addr->s_addr);
352
353 if ((INADDR_NONE == addr) ||
354 (IN_MULTICAST(addr)) ||
355 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
356 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
357 return (B_FALSE);
358 }
359 return (B_TRUE);
360 }
361
362 static boolean_t
idm_v6_addr_okay(struct in6_addr * addr6)363 idm_v6_addr_okay(struct in6_addr *addr6)
364 {
365
366 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
367 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
368 (IN6_IS_ADDR_MULTICAST(addr6)) ||
369 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
370 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
371 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
372 return (B_FALSE);
373 }
374 return (B_TRUE);
375 }
376
377 /*
378 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
379 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
380 */
381 int
idm_get_ipaddr(idm_addr_list_t ** ipaddr_p)382 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
383 {
384 ksocket_t so4, so6;
385 struct lifnum lifn;
386 struct lifconf lifc;
387 struct lifreq *lp;
388 int rval;
389 int numifs;
390 int bufsize;
391 void *buf;
392 int i, j, n, rc;
393 struct sockaddr_storage ss;
394 struct sockaddr_in *sin;
395 struct sockaddr_in6 *sin6;
396 idm_addr_t *ip;
397 idm_addr_list_t *ipaddr = NULL;
398 int size_ipaddr;
399
400 *ipaddr_p = NULL;
401 size_ipaddr = 0;
402 buf = NULL;
403
404 /* create an ipv4 and ipv6 UDP socket */
405 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
406 return (0);
407 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
408 idm_sodestroy(so6);
409 return (0);
410 }
411
412
413 retry_count:
414 /* snapshot the current number of interfaces */
415 lifn.lifn_family = PF_UNSPEC;
416 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
417 lifn.lifn_count = 0;
418 /* use vp6 for ioctls with unspecified families by default */
419 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
420 != 0) {
421 goto cleanup;
422 }
423
424 numifs = lifn.lifn_count;
425 if (numifs <= 0) {
426 goto cleanup;
427 }
428
429 /* allocate extra room in case more interfaces appear */
430 numifs += 10;
431
432 /* get the interface names and ip addresses */
433 bufsize = numifs * sizeof (struct lifreq);
434 buf = kmem_alloc(bufsize, KM_SLEEP);
435
436 lifc.lifc_family = AF_UNSPEC;
437 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
438 lifc.lifc_len = bufsize;
439 lifc.lifc_buf = buf;
440 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
441 if (rc != 0) {
442 goto cleanup;
443 }
444 /* if our extra room is used up, try again */
445 if (bufsize <= lifc.lifc_len) {
446 kmem_free(buf, bufsize);
447 buf = NULL;
448 goto retry_count;
449 }
450 /* calc actual number of ifconfs */
451 n = lifc.lifc_len / sizeof (struct lifreq);
452
453 /* get ip address */
454 if (n > 0) {
455 size_ipaddr = sizeof (idm_addr_list_t) +
456 (n - 1) * sizeof (idm_addr_t);
457 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
458 } else {
459 goto cleanup;
460 }
461
462 /*
463 * Examine the array of interfaces and filter uninteresting ones
464 */
465 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
466
467 /*
468 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
469 */
470 ss = lp->lifr_addr;
471 /*
472 * fetch the flags using the socket of the correct family
473 */
474 switch (ss.ss_family) {
475 case AF_INET:
476 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
477 &rval, CRED());
478 break;
479 case AF_INET6:
480 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
481 &rval, CRED());
482 break;
483 default:
484 continue;
485 }
486 if (rc == 0) {
487 /*
488 * If we got the flags, skip uninteresting
489 * interfaces based on flags
490 */
491 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
492 continue;
493 if (lp->lifr_flags &
494 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
495 continue;
496 }
497
498 /* save ip address */
499 ip = &ipaddr->al_addrs[j];
500 switch (ss.ss_family) {
501 case AF_INET:
502 sin = (struct sockaddr_in *)&ss;
503 if (!idm_v4_addr_okay(&sin->sin_addr))
504 continue;
505 ip->a_addr.i_addr.in4 = sin->sin_addr;
506 ip->a_addr.i_insize = sizeof (struct in_addr);
507 break;
508 case AF_INET6:
509 sin6 = (struct sockaddr_in6 *)&ss;
510 if (!idm_v6_addr_okay(&sin6->sin6_addr))
511 continue;
512 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
513 ip->a_addr.i_insize = sizeof (struct in6_addr);
514 break;
515 default:
516 continue;
517 }
518 j++;
519 }
520
521 if (j == 0) {
522 /* no valid ifaddr */
523 kmem_free(ipaddr, size_ipaddr);
524 size_ipaddr = 0;
525 ipaddr = NULL;
526 } else {
527 ipaddr->al_out_cnt = j;
528 }
529
530
531 cleanup:
532 idm_sodestroy(so6);
533 idm_sodestroy(so4);
534
535 if (buf != NULL)
536 kmem_free(buf, bufsize);
537
538 *ipaddr_p = ipaddr;
539 return (size_ipaddr);
540 }
541
542 int
idm_sorecv(ksocket_t so,void * msg,size_t len)543 idm_sorecv(ksocket_t so, void *msg, size_t len)
544 {
545 iovec_t iov;
546
547 ASSERT(so != NULL);
548 ASSERT(len != 0);
549
550 /*
551 * Fill in iovec and receive data
552 */
553 iov.iov_base = msg;
554 iov.iov_len = len;
555
556 return (idm_iov_sorecv(so, &iov, 1, len));
557 }
558
559 /*
560 * idm_sosendto - Sends a buffered data on a non-connected socket.
561 *
562 * This function puts the data provided on the wire by calling sosendmsg.
563 * It will return only when all the data has been sent or if an error
564 * occurs.
565 *
566 * Returns 0 for success, the socket errno value if sosendmsg fails, and
567 * -1 if sosendmsg returns success but uio_resid != 0
568 */
569 int
idm_sosendto(ksocket_t so,void * buff,size_t len,struct sockaddr * name,socklen_t namelen)570 idm_sosendto(ksocket_t so, void *buff, size_t len,
571 struct sockaddr *name, socklen_t namelen)
572 {
573 struct msghdr msg;
574 struct iovec iov[1];
575 int error;
576 size_t sent = 0;
577
578 iov[0].iov_base = buff;
579 iov[0].iov_len = len;
580
581 /* Initialization of the message header. */
582 bzero(&msg, sizeof (msg));
583 msg.msg_iov = iov;
584 msg.msg_iovlen = 1;
585 msg.msg_name = name;
586 msg.msg_namelen = namelen;
587
588 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
589 /* Data sent */
590 if (sent == len) {
591 /* All data sent. Success. */
592 return (0);
593 } else {
594 /* Not all data was sent. Failure */
595 return (-1);
596 }
597 }
598
599 /* Send failed */
600 return (error);
601 }
602
603 /*
604 * idm_iov_sosend - Sends an iovec on a connection.
605 *
606 * This function puts the data provided on the wire by calling sosendmsg.
607 * It will return only when all the data has been sent or if an error
608 * occurs.
609 *
610 * Returns 0 for success, the socket errno value if sosendmsg fails, and
611 * -1 if sosendmsg returns success but uio_resid != 0
612 */
613 int
idm_iov_sosend(ksocket_t so,iovec_t * iop,int iovlen,size_t total_len)614 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
615 {
616 struct msghdr msg;
617 int error;
618 size_t sent = 0;
619
620 ASSERT(iop != NULL);
621
622 /* Initialization of the message header. */
623 bzero(&msg, sizeof (msg));
624 msg.msg_iov = iop;
625 msg.msg_iovlen = iovlen;
626
627 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
628 == 0) {
629 /* Data sent */
630 if (sent == total_len) {
631 /* All data sent. Success. */
632 return (0);
633 } else {
634 /* Not all data was sent. Failure */
635 return (-1);
636 }
637 }
638
639 /* Send failed */
640 return (error);
641 }
642
643 /*
644 * idm_iov_sorecv - Receives an iovec from a connection
645 *
646 * This function gets the data asked for from the socket. It will return
647 * only when all the requested data has been retrieved or if an error
648 * occurs.
649 *
650 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
651 * -1 if sorecvmsg returns success but uio_resid != 0
652 */
653 int
idm_iov_sorecv(ksocket_t so,iovec_t * iop,int iovlen,size_t total_len)654 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
655 {
656 struct msghdr msg;
657 int error;
658 size_t recv;
659 int flags;
660
661 ASSERT(iop != NULL);
662
663 /* Initialization of the message header. */
664 bzero(&msg, sizeof (msg));
665 msg.msg_iov = iop;
666 msg.msg_iovlen = iovlen;
667 flags = MSG_WAITALL;
668
669 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
670 == 0) {
671 /* Received data */
672 if (recv == total_len) {
673 /* All requested data received. Success */
674 return (0);
675 } else {
676 /*
677 * Not all data was received. The connection has
678 * probably failed.
679 */
680 return (-1);
681 }
682 }
683
684 /* Receive failed */
685 return (error);
686 }
687
688 static void
idm_set_ini_preconnect_options(idm_so_conn_t * sc,boolean_t boot_conn)689 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
690 {
691 int conn_abort = 10000;
692 int conn_notify = 2000;
693 int abort = 30000;
694
695 /* Pre-connect socket options */
696 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
697 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
698 CRED());
699 if (boot_conn == B_FALSE) {
700 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
701 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
702 CRED());
703 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
704 TCP_ABORT_THRESHOLD,
705 (char *)&abort, sizeof (int), CRED());
706 }
707 }
708
709 static void
idm_set_ini_postconnect_options(idm_so_conn_t * sc)710 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
711 {
712 int32_t rcvbuf = IDM_RCVBUF_SIZE;
713 int32_t sndbuf = IDM_SNDBUF_SIZE;
714 const int on = 1;
715
716 /* Set postconnect options */
717 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
718 (char *)&on, sizeof (int), CRED());
719 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
720 (char *)&rcvbuf, sizeof (int), CRED());
721 (void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
722 (char *)&sndbuf, sizeof (int), CRED());
723 }
724
725 static void
idm_set_tgt_connect_options(ksocket_t ks)726 idm_set_tgt_connect_options(ksocket_t ks)
727 {
728 int32_t rcvbuf = IDM_RCVBUF_SIZE;
729 int32_t sndbuf = IDM_SNDBUF_SIZE;
730 const int on = 1;
731
732 /* Set connect options */
733 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
734 (char *)&rcvbuf, sizeof (int), CRED());
735 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
736 (char *)&sndbuf, sizeof (int), CRED());
737 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
738 (char *)&on, sizeof (on), CRED());
739 }
740
741 static uint32_t
n2h24(const uchar_t * ptr)742 n2h24(const uchar_t *ptr)
743 {
744 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
745 }
746
747
748 static idm_status_t
idm_sorecvhdr(idm_conn_t * ic,idm_pdu_t * pdu)749 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
750 {
751 iscsi_hdr_t *bhs;
752 uint32_t hdr_digest_crc;
753 uint32_t crc_calculated;
754 void *new_hdr;
755 int ahslen = 0;
756 int total_len = 0;
757 int iovlen = 0;
758 struct iovec iov[2];
759 idm_so_conn_t *so_conn;
760 int rc;
761
762 so_conn = ic->ic_transport_private;
763
764 /*
765 * Read BHS
766 */
767 bhs = pdu->isp_hdr;
768 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
769 if (rc != IDM_STATUS_SUCCESS) {
770 return (IDM_STATUS_FAIL);
771 }
772
773 /*
774 * Check actual AHS length against the amount available in the buffer
775 */
776 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
777 (bhs->hlength * sizeof (uint32_t));
778 pdu->isp_datalen = n2h24(bhs->dlength);
779 if (ic->ic_conn_type == CONN_TYPE_TGT &&
780 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
781 IDM_CONN_LOG(CE_WARN,
782 "idm_sorecvhdr: exceeded the max data segment length");
783 return (IDM_STATUS_FAIL);
784 }
785 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
786 /* Allocate a new header segment and change the callback */
787 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
788 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
789 pdu->isp_hdr = new_hdr;
790 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
791
792 /*
793 * This callback will restore the expected values after
794 * the RX PDU has been processed.
795 */
796 pdu->isp_callback = idm_sorx_addl_pdu_cb;
797 }
798
799 /*
800 * Setup receipt of additional header and header digest (if enabled).
801 */
802 if (bhs->hlength > 0) {
803 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
804 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
805 iov[iovlen].iov_len = ahslen;
806 total_len += iov[iovlen].iov_len;
807 iovlen++;
808 }
809
810 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
811 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
812 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
813 total_len += iov[iovlen].iov_len;
814 iovlen++;
815 }
816
817 if ((iovlen != 0) &&
818 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
819 total_len) != 0)) {
820 return (IDM_STATUS_FAIL);
821 }
822
823 /*
824 * Validate header digest if enabled
825 */
826 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
827 crc_calculated = idm_crc32c(pdu->isp_hdr,
828 sizeof (iscsi_hdr_t) + ahslen);
829 if (crc_calculated != hdr_digest_crc) {
830 /* Invalid Header Digest */
831 return (IDM_STATUS_HEADER_DIGEST);
832 }
833 }
834
835 return (0);
836 }
837
838 /*
839 * idm_so_ini_conn_create()
840 * Allocate the sockets transport connection resources.
841 */
842 static idm_status_t
idm_so_ini_conn_create(idm_conn_req_t * cr,idm_conn_t * ic)843 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
844 {
845 ksocket_t so;
846 idm_so_conn_t *so_conn;
847 idm_status_t idmrc;
848
849 so = idm_socreate(cr->cr_domain, cr->cr_type,
850 cr->cr_protocol);
851 if (so == NULL) {
852 return (IDM_STATUS_FAIL);
853 }
854
855 /* Bind the socket if configured to do so */
856 if (cr->cr_bound) {
857 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
858 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
859 idm_sodestroy(so);
860 return (IDM_STATUS_FAIL);
861 }
862 }
863
864 idmrc = idm_so_conn_create_common(ic, so);
865 if (idmrc != IDM_STATUS_SUCCESS) {
866 idm_soshutdown(so);
867 idm_sodestroy(so);
868 return (IDM_STATUS_FAIL);
869 }
870
871 so_conn = ic->ic_transport_private;
872 /* Set up socket options */
873 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
874
875 return (IDM_STATUS_SUCCESS);
876 }
877
878 /*
879 * idm_so_ini_conn_destroy()
880 * Tear down the sockets transport connection resources.
881 */
882 static void
idm_so_ini_conn_destroy(idm_conn_t * ic)883 idm_so_ini_conn_destroy(idm_conn_t *ic)
884 {
885 idm_so_conn_destroy_common(ic);
886 }
887
888 /*
889 * idm_so_ini_conn_connect()
890 * Establish the connection referred to by the handle previously allocated via
891 * idm_so_ini_conn_create().
892 */
893 static idm_status_t
idm_so_ini_conn_connect(idm_conn_t * ic)894 idm_so_ini_conn_connect(idm_conn_t *ic)
895 {
896 idm_so_conn_t *so_conn;
897 struct sonode *node = NULL;
898 int rc;
899 clock_t lbolt, conn_login_max, conn_login_interval;
900 boolean_t nonblock;
901
902 so_conn = ic->ic_transport_private;
903 nonblock = ic->ic_conn_params.nonblock_socket;
904 conn_login_max = ic->ic_conn_params.conn_login_max;
905 conn_login_interval = ddi_get_lbolt() +
906 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
907
908 if (nonblock == B_TRUE) {
909 node = ((struct sonode *)(so_conn->ic_so));
910 /* Set to none block socket mode */
911 idm_so_socket_set_nonblock(node);
912 do {
913 rc = ksocket_connect(so_conn->ic_so,
914 &ic->ic_ini_dst_addr.sin,
915 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
916 CRED());
917 if (rc == 0 || rc == EISCONN) {
918 /* socket success or already success */
919 rc = IDM_STATUS_SUCCESS;
920 break;
921 }
922 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
923 (rc == ECONNRESET)) {
924 /* socket connection timeout or refuse */
925 break;
926 }
927 lbolt = ddi_get_lbolt();
928 if (lbolt > conn_login_max) {
929 /*
930 * Connection retry timeout,
931 * failed connect to target.
932 */
933 break;
934 }
935 if (lbolt < conn_login_interval) {
936 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
937 /* TCP connect still in progress */
938 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
939 continue;
940 } else {
941 delay(conn_login_interval - lbolt);
942 }
943 }
944 conn_login_interval = ddi_get_lbolt() +
945 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
946 } while (rc != 0);
947 /* resume to nonblock mode */
948 if (rc == IDM_STATUS_SUCCESS) {
949 idm_so_socket_set_block(node);
950 }
951 } else {
952 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
953 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
954 }
955
956 if (rc != 0) {
957 idm_soshutdown(so_conn->ic_so);
958 return (IDM_STATUS_FAIL);
959 }
960
961 idm_so_conn_connect_common(ic);
962
963 idm_set_ini_postconnect_options(so_conn);
964
965 return (IDM_STATUS_SUCCESS);
966 }
967
968 idm_status_t
idm_so_tgt_conn_create(idm_conn_t * ic,ksocket_t new_so)969 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
970 {
971 idm_status_t idmrc;
972
973 idmrc = idm_so_conn_create_common(ic, new_so);
974
975 return (idmrc);
976 }
977
978 static void
idm_so_tgt_conn_destroy(idm_conn_t * ic)979 idm_so_tgt_conn_destroy(idm_conn_t *ic)
980 {
981 idm_so_conn_destroy_common(ic);
982 }
983
984 /*
985 * idm_so_tgt_conn_connect()
986 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
987 * is invoked from the SM as a result of an inbound connection request.
988 */
989 static idm_status_t
idm_so_tgt_conn_connect(idm_conn_t * ic)990 idm_so_tgt_conn_connect(idm_conn_t *ic)
991 {
992 idm_so_conn_connect_common(ic);
993
994 return (IDM_STATUS_SUCCESS);
995 }
996
997 static idm_status_t
idm_so_conn_create_common(idm_conn_t * ic,ksocket_t new_so)998 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
999 {
1000 idm_so_conn_t *so_conn;
1001
1002 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
1003 so_conn->ic_so = new_so;
1004
1005 ic->ic_transport_private = so_conn;
1006 ic->ic_transport_hdrlen = 0;
1007
1008 /* Set the scoreboarding flag on this connection */
1009 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
1010 ic->ic_conn_params.max_recv_dataseglen =
1011 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1012 ic->ic_conn_params.max_xmit_dataseglen =
1013 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1014
1015 /*
1016 * Initialize tx thread mutex and list
1017 */
1018 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1019 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1020 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1021 offsetof(idm_pdu_t, idm_tx_link));
1022
1023 return (IDM_STATUS_SUCCESS);
1024 }
1025
1026 static void
idm_so_conn_destroy_common(idm_conn_t * ic)1027 idm_so_conn_destroy_common(idm_conn_t *ic)
1028 {
1029 idm_so_conn_t *so_conn = ic->ic_transport_private;
1030
1031 ic->ic_transport_private = NULL;
1032 idm_sodestroy(so_conn->ic_so);
1033 list_destroy(&so_conn->ic_tx_list);
1034 mutex_destroy(&so_conn->ic_tx_mutex);
1035 cv_destroy(&so_conn->ic_tx_cv);
1036
1037 kmem_free(so_conn, sizeof (idm_so_conn_t));
1038 }
1039
1040 static void
idm_so_conn_connect_common(idm_conn_t * ic)1041 idm_so_conn_connect_common(idm_conn_t *ic)
1042 {
1043 idm_so_conn_t *so_conn;
1044 struct sockaddr_in6 t_addr;
1045 socklen_t t_addrlen = 0;
1046
1047 so_conn = ic->ic_transport_private;
1048 bzero(&t_addr, sizeof (struct sockaddr_in6));
1049 t_addrlen = sizeof (struct sockaddr_in6);
1050
1051 /* Set the local and remote addresses in the idm conn handle */
1052 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1053 &t_addrlen, CRED());
1054 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1055 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1056 &t_addrlen, CRED());
1057 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1058
1059 mutex_enter(&ic->ic_mutex);
1060 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1061 &p0, TS_RUN, minclsyspri);
1062 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1063 &p0, TS_RUN, minclsyspri);
1064
1065 while (so_conn->ic_rx_thread_did == 0 ||
1066 so_conn->ic_tx_thread_did == 0)
1067 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1068 mutex_exit(&ic->ic_mutex);
1069 }
1070
1071 /*
1072 * idm_so_conn_disconnect()
1073 * Shutdown the socket connection and stop the thread
1074 */
1075 static void
idm_so_conn_disconnect(idm_conn_t * ic)1076 idm_so_conn_disconnect(idm_conn_t *ic)
1077 {
1078 idm_so_conn_t *so_conn;
1079
1080 so_conn = ic->ic_transport_private;
1081
1082 mutex_enter(&ic->ic_mutex);
1083 so_conn->ic_rx_thread_running = B_FALSE;
1084 so_conn->ic_tx_thread_running = B_FALSE;
1085 /* We need to wakeup the TX thread */
1086 mutex_enter(&so_conn->ic_tx_mutex);
1087 cv_signal(&so_conn->ic_tx_cv);
1088 mutex_exit(&so_conn->ic_tx_mutex);
1089 mutex_exit(&ic->ic_mutex);
1090
1091 /* This should wakeup the RX thread if it is sleeping */
1092 idm_soshutdown(so_conn->ic_so);
1093
1094 thread_join(so_conn->ic_tx_thread_did);
1095 thread_join(so_conn->ic_rx_thread_did);
1096 }
1097
1098 /*
1099 * idm_so_tgt_svc_create()
1100 * Establish a service on an IP address and port. idm_svc_req_t contains
1101 * the service parameters.
1102 */
1103 /*ARGSUSED*/
1104 static idm_status_t
idm_so_tgt_svc_create(idm_svc_req_t * sr,idm_svc_t * is)1105 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1106 {
1107 idm_so_svc_t *so_svc;
1108
1109 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1110
1111 /* Set the new sockets service in svc handle */
1112 is->is_so_svc = (void *)so_svc;
1113
1114 return (IDM_STATUS_SUCCESS);
1115 }
1116
1117 /*
1118 * idm_so_tgt_svc_destroy()
1119 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1120 */
1121 static void
idm_so_tgt_svc_destroy(idm_svc_t * is)1122 idm_so_tgt_svc_destroy(idm_svc_t *is)
1123 {
1124 /* the socket will have been torn down; free the service */
1125 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1126 }
1127
1128 /*
1129 * idm_so_tgt_svc_online()
1130 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1131 */
1132
1133 static idm_status_t
idm_so_tgt_svc_online(idm_svc_t * is)1134 idm_so_tgt_svc_online(idm_svc_t *is)
1135 {
1136 idm_so_svc_t *so_svc;
1137 idm_svc_req_t *sr = &is->is_svc_req;
1138 struct sockaddr_in6 sin6_ip;
1139 const uint32_t on = 1;
1140 const uint32_t off = 0;
1141
1142 mutex_enter(&is->is_mutex);
1143 so_svc = (idm_so_svc_t *)is->is_so_svc;
1144
1145 /*
1146 * Try creating an IPv6 socket first
1147 */
1148 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1149 mutex_exit(&is->is_mutex);
1150 return (IDM_STATUS_FAIL);
1151 } else {
1152 bzero(&sin6_ip, sizeof (sin6_ip));
1153 sin6_ip.sin6_family = AF_INET6;
1154 sin6_ip.sin6_port = htons(sr->sr_port);
1155 sin6_ip.sin6_addr = in6addr_any;
1156
1157 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1158 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1159 /*
1160 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1161 */
1162 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1163 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1164
1165 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1166 sizeof (sin6_ip), CRED()) != 0) {
1167 mutex_exit(&is->is_mutex);
1168 idm_sodestroy(so_svc->is_so);
1169 return (IDM_STATUS_FAIL);
1170 }
1171 }
1172
1173 idm_set_tgt_connect_options(so_svc->is_so);
1174
1175 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1176 mutex_exit(&is->is_mutex);
1177 idm_soshutdown(so_svc->is_so);
1178 idm_sodestroy(so_svc->is_so);
1179 return (IDM_STATUS_FAIL);
1180 }
1181
1182 /* Launch a watch thread */
1183 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1184 is, 0, &p0, TS_RUN, minclsyspri);
1185
1186 if (so_svc->is_thread == NULL) {
1187 /* Failure to launch; teardown the socket */
1188 mutex_exit(&is->is_mutex);
1189 idm_soshutdown(so_svc->is_so);
1190 idm_sodestroy(so_svc->is_so);
1191 return (IDM_STATUS_FAIL);
1192 }
1193 ksocket_hold(so_svc->is_so);
1194 /* Wait for the port watcher thread to start */
1195 while (!so_svc->is_thread_running)
1196 cv_wait(&is->is_cv, &is->is_mutex);
1197 mutex_exit(&is->is_mutex);
1198
1199 return (IDM_STATUS_SUCCESS);
1200 }
1201
1202 /*
1203 * idm_so_tgt_svc_offline
1204 *
1205 * Stop listening on the IP address and port identified by idm_svc_t.
1206 */
1207 static void
idm_so_tgt_svc_offline(idm_svc_t * is)1208 idm_so_tgt_svc_offline(idm_svc_t *is)
1209 {
1210 idm_so_svc_t *so_svc;
1211 mutex_enter(&is->is_mutex);
1212 so_svc = (idm_so_svc_t *)is->is_so_svc;
1213 so_svc->is_thread_running = B_FALSE;
1214 mutex_exit(&is->is_mutex);
1215
1216 /*
1217 * Teardown socket
1218 */
1219 idm_sodestroy(so_svc->is_so);
1220
1221 /*
1222 * Now we expect the port watcher thread to terminate
1223 */
1224 thread_join(so_svc->is_thread_did);
1225 }
1226
1227 /*
1228 * Watch thread for target service connection establishment.
1229 */
1230 void
idm_so_svc_port_watcher(void * arg)1231 idm_so_svc_port_watcher(void *arg)
1232 {
1233 idm_svc_t *svc = arg;
1234 ksocket_t new_so;
1235 idm_conn_t *ic;
1236 idm_status_t idmrc;
1237 idm_so_svc_t *so_svc;
1238 int rc;
1239 const uint32_t off = 0;
1240 struct sockaddr_in6 t_addr;
1241 socklen_t t_addrlen;
1242
1243 bzero(&t_addr, sizeof (struct sockaddr_in6));
1244 t_addrlen = sizeof (struct sockaddr_in6);
1245 mutex_enter(&svc->is_mutex);
1246
1247 so_svc = svc->is_so_svc;
1248 so_svc->is_thread_running = B_TRUE;
1249 so_svc->is_thread_did = so_svc->is_thread->t_did;
1250
1251 cv_signal(&svc->is_cv);
1252
1253 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1254 svc->is_svc_req.sr_port);
1255
1256 while (so_svc->is_thread_running) {
1257 mutex_exit(&svc->is_mutex);
1258
1259 if ((rc = ksocket_accept(so_svc->is_so,
1260 (struct sockaddr *)&t_addr, &t_addrlen,
1261 &new_so, CRED())) != 0) {
1262 mutex_enter(&svc->is_mutex);
1263 if (rc == ECONNABORTED)
1264 continue;
1265 /* Connection problem */
1266 break;
1267 }
1268 /*
1269 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1270 */
1271 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1272 (char *)&off, sizeof (off), CRED());
1273
1274 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1275 &ic);
1276 if (idmrc != IDM_STATUS_SUCCESS) {
1277 /* Drop connection */
1278 idm_soshutdown(new_so);
1279 idm_sodestroy(new_so);
1280 mutex_enter(&svc->is_mutex);
1281 continue;
1282 }
1283
1284 idmrc = idm_so_tgt_conn_create(ic, new_so);
1285 if (idmrc != IDM_STATUS_SUCCESS) {
1286 idm_svc_conn_destroy(ic);
1287 idm_soshutdown(new_so);
1288 idm_sodestroy(new_so);
1289 mutex_enter(&svc->is_mutex);
1290 continue;
1291 }
1292
1293 /*
1294 * Kick the state machine. At CS_S3_XPT_UP the state machine
1295 * will notify the client (target) about the new connection.
1296 */
1297 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1298
1299 mutex_enter(&svc->is_mutex);
1300 }
1301 ksocket_rele(so_svc->is_so);
1302 so_svc->is_thread_running = B_FALSE;
1303 mutex_exit(&svc->is_mutex);
1304
1305 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1306 svc->is_svc_req.sr_port);
1307
1308 thread_exit();
1309 }
1310
1311 /*
1312 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1313 * frees resources associated with the task.
1314 *
1315 * It's not clear that this should return idm_status_t. What do we do
1316 * if it fails?
1317 */
1318 static idm_status_t
idm_so_free_task_rsrc(idm_task_t * idt)1319 idm_so_free_task_rsrc(idm_task_t *idt)
1320 {
1321 idm_buf_t *idb, *next_idb;
1322
1323 /*
1324 * There is nothing to cleanup on initiator connections
1325 */
1326 if (IDM_CONN_ISINI(idt->idt_ic))
1327 return (IDM_STATUS_SUCCESS);
1328
1329 /*
1330 * If this is a target connection, call idm_buf_rx_from_ini_done for
1331 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1332 *
1333 * In addition, remove any buffers associated with this task from
1334 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1335 * items don't actually get removed from that list (and completion
1336 * routines called) until idm_task_cleanup.
1337 */
1338 mutex_enter(&idt->idt_mutex);
1339
1340 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1341 next_idb = list_next(&idt->idt_outbufv, idb);
1342 if (idb->idb_in_transport) {
1343 /*
1344 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1345 */
1346 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1347 uintptr_t, idb->idb_buf,
1348 uint32_t, idb->idb_bufoffset,
1349 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1350 uint32_t, idb->idb_xfer_len,
1351 int, XFER_BUF_RX_FROM_INI);
1352 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1353 mutex_enter(&idt->idt_mutex);
1354 }
1355 }
1356
1357 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1358 next_idb = list_next(&idt->idt_inbufv, idb);
1359 /*
1360 * We want to remove these items from the tx_list as well,
1361 * but knowing it's in the idt_inbufv list is not a guarantee
1362 * that it's in the tx_list. If it's on the tx list then
1363 * let idm_sotx_thread() clean it up.
1364 */
1365 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1366 /*
1367 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1368 */
1369 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1370 uintptr_t, idb->idb_buf,
1371 uint32_t, idb->idb_bufoffset,
1372 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1373 uint32_t, idb->idb_xfer_len,
1374 int, XFER_BUF_TX_TO_INI);
1375 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1376 mutex_enter(&idt->idt_mutex);
1377 }
1378 }
1379
1380 mutex_exit(&idt->idt_mutex);
1381
1382 return (IDM_STATUS_SUCCESS);
1383 }
1384
1385 /*
1386 * idm_so_negotiate_key_values() validates the key values for this connection
1387 */
1388 /* ARGSUSED */
1389 static kv_status_t
idm_so_negotiate_key_values(idm_conn_t * it,nvlist_t * request_nvl,nvlist_t * response_nvl,nvlist_t * negotiated_nvl)1390 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1391 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1392 {
1393 /* All parameters are negotiated at the iscsit level */
1394 return (KV_HANDLED);
1395 }
1396
1397 /*
1398 * idm_so_notice_key_values() activates the negotiated key values for
1399 * this connection.
1400 */
1401 static void
idm_so_notice_key_values(idm_conn_t * it,nvlist_t * negotiated_nvl)1402 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1403 {
1404 char *nvp_name;
1405 nvpair_t *nvp;
1406 nvpair_t *next_nvp;
1407 int nvrc;
1408 idm_status_t idm_status;
1409 const idm_kv_xlate_t *ikvx;
1410 uint64_t num_val;
1411
1412 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1413 nvp != NULL; nvp = next_nvp) {
1414 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1415 nvp_name = nvpair_name(nvp);
1416
1417 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1418 switch (ikvx->ik_key_id) {
1419 case KI_HEADER_DIGEST:
1420 case KI_DATA_DIGEST:
1421 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1422 ASSERT(idm_status == 0);
1423
1424 /* Remove processed item from negotiated_nvl list */
1425 nvrc = nvlist_remove_all(
1426 negotiated_nvl, ikvx->ik_key_name);
1427 ASSERT(nvrc == 0);
1428 break;
1429 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1430 /*
1431 * Just pass the value down to idm layer.
1432 * No need to remove it from negotiated_nvl list here.
1433 */
1434 nvrc = nvpair_value_uint64(nvp, &num_val);
1435 ASSERT(nvrc == 0);
1436 it->ic_conn_params.max_xmit_dataseglen =
1437 (uint32_t)num_val;
1438 break;
1439 default:
1440 break;
1441 }
1442 }
1443 }
1444
1445 /*
1446 * idm_so_declare_key_values() declares the key values for this connection
1447 */
1448 /* ARGSUSED */
1449 static kv_status_t
idm_so_declare_key_values(idm_conn_t * it,nvlist_t * config_nvl,nvlist_t * outgoing_nvl)1450 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1451 nvlist_t *outgoing_nvl)
1452 {
1453 char *nvp_name;
1454 nvpair_t *nvp;
1455 nvpair_t *next_nvp;
1456 kv_status_t kvrc;
1457 int nvrc = 0;
1458 const idm_kv_xlate_t *ikvx;
1459 uint64_t num_val;
1460
1461 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1462 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1463 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1464 nvp_name = nvpair_name(nvp);
1465
1466 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1467 switch (ikvx->ik_key_id) {
1468 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1469 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1470 break;
1471 }
1472 if (outgoing_nvl &&
1473 (nvrc = nvlist_add_uint64(outgoing_nvl,
1474 nvp_name, num_val)) != 0) {
1475 break;
1476 }
1477 it->ic_conn_params.max_recv_dataseglen =
1478 (uint32_t)num_val;
1479 break;
1480 default:
1481 break;
1482 }
1483 }
1484 kvrc = idm_nvstat_to_kvstat(nvrc);
1485 return (kvrc);
1486 }
1487
1488 static idm_status_t
idm_so_handle_digest(idm_conn_t * it,nvpair_t * digest_choice,const idm_kv_xlate_t * ikvx)1489 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1490 const idm_kv_xlate_t *ikvx)
1491 {
1492 int nvrc;
1493 char *digest_choice_string;
1494
1495 nvrc = nvpair_value_string(digest_choice,
1496 &digest_choice_string);
1497 ASSERT(nvrc == 0);
1498 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1499 switch (ikvx->ik_key_id) {
1500 case KI_HEADER_DIGEST:
1501 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1502 break;
1503 case KI_DATA_DIGEST:
1504 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1505 break;
1506 default:
1507 ASSERT(0);
1508 break;
1509 }
1510 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1511 switch (ikvx->ik_key_id) {
1512 case KI_HEADER_DIGEST:
1513 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1514 break;
1515 case KI_DATA_DIGEST:
1516 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1517 break;
1518 default:
1519 ASSERT(0);
1520 break;
1521 }
1522 } else {
1523 ASSERT(0);
1524 }
1525
1526 return (IDM_STATUS_SUCCESS);
1527 }
1528
1529
1530 /*
1531 * idm_so_conn_is_capable() verifies that the passed connection is provided
1532 * for by the sockets interface.
1533 */
1534 /* ARGSUSED */
1535 static boolean_t
idm_so_conn_is_capable(idm_conn_req_t * ic,idm_transport_caps_t * caps)1536 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1537 {
1538 return (B_TRUE);
1539 }
1540
1541 /*
1542 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1543 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1544 * off the socket into the appropriate buffers.
1545 */
1546 static void
idm_so_rx_datain(idm_conn_t * ic,idm_pdu_t * pdu)1547 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1548 {
1549 iscsi_data_hdr_t *bhs;
1550 idm_task_t *idt;
1551 idm_buf_t *idb;
1552 uint32_t datasn;
1553 size_t offset;
1554 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1555 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1556
1557 ASSERT(ic != NULL);
1558 ASSERT(pdu != NULL);
1559
1560 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1561 datasn = ntohl(bhs->datasn);
1562 offset = ntohl(bhs->offset);
1563
1564 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1565
1566 /*
1567 * Look up the task corresponding to the initiator task tag
1568 * to get the buffers affiliated with the task.
1569 */
1570 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1571 if (idt == NULL) {
1572 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1573 idm_pdu_rx_protocol_error(ic, pdu);
1574 return;
1575 }
1576
1577 idb = pdu->isp_sorx_buf;
1578 if (idb == NULL) {
1579 IDM_CONN_LOG(CE_WARN,
1580 "idm_so_rx_datain: failed to find buffer");
1581 idm_task_rele(idt);
1582 idm_pdu_rx_protocol_error(ic, pdu);
1583 return;
1584 }
1585
1586 /*
1587 * DataSN values should be sequential and should not have any gaps or
1588 * repetitions. Check the DataSN with the one stored in the task.
1589 */
1590 if (datasn == idt->idt_exp_datasn) {
1591 idt->idt_exp_datasn++; /* keep track of DataSN received */
1592 } else {
1593 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1594 idm_task_rele(idt);
1595 idm_pdu_rx_protocol_error(ic, pdu);
1596 return;
1597 }
1598
1599 /*
1600 * PDUs in a sequence should be in continuously increasing
1601 * address offset
1602 */
1603 if (offset != idb->idb_exp_offset) {
1604 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1605 idm_task_rele(idt);
1606 idm_pdu_rx_protocol_error(ic, pdu);
1607 return;
1608 }
1609 /* Expected next relative buffer offset */
1610 idb->idb_exp_offset += n2h24(bhs->dlength);
1611 idt->idt_rx_bytes += n2h24(bhs->dlength);
1612
1613 idm_task_rele(idt);
1614
1615 /*
1616 * For now call scsi_rsp which will process the data rsp
1617 * Revisit, need to provide an explicit client entry point for
1618 * phase collapse completions.
1619 */
1620 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1621 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1622 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1623 }
1624
1625 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1626 }
1627
1628 /*
1629 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1630 * data from the Data-Out PDU sent by the iSCSI initiator.
1631 *
1632 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1633 * task to get the buffers associated with the PDU. A PDU might span buffers.
1634 * The data is then read into the respective buffer.
1635 */
1636 static void
idm_so_rx_dataout(idm_conn_t * ic,idm_pdu_t * pdu)1637 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1638 {
1639
1640 iscsi_data_hdr_t *bhs;
1641 idm_task_t *idt;
1642 idm_buf_t *idb;
1643 size_t offset;
1644
1645 ASSERT(ic != NULL);
1646 ASSERT(pdu != NULL);
1647
1648 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1649 offset = ntohl(bhs->offset);
1650 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1651
1652 /*
1653 * Look up the task corresponding to the initiator task tag
1654 * to get the buffers affiliated with the task.
1655 */
1656 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1657 if (idt == NULL) {
1658 IDM_CONN_LOG(CE_WARN,
1659 "idm_so_rx_dataout: failed to find task");
1660 idm_pdu_rx_protocol_error(ic, pdu);
1661 return;
1662 }
1663
1664 idb = pdu->isp_sorx_buf;
1665 if (idb == NULL) {
1666 IDM_CONN_LOG(CE_WARN,
1667 "idm_so_rx_dataout: failed to find buffer");
1668 idm_task_rele(idt);
1669 idm_pdu_rx_protocol_error(ic, pdu);
1670 return;
1671 }
1672
1673 /* Keep track of data transferred - check data offsets */
1674 if (offset != idb->idb_exp_offset) {
1675 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1676 "%ld, %d", offset, idb->idb_exp_offset);
1677 idm_task_rele(idt);
1678 idm_pdu_rx_protocol_error(ic, pdu);
1679 return;
1680 }
1681 /* Expected next relative offset */
1682 idb->idb_exp_offset += ntoh24(bhs->dlength);
1683 idt->idt_rx_bytes += n2h24(bhs->dlength);
1684
1685 /*
1686 * Call the buffer callback when the transfer is complete
1687 *
1688 * The connection state machine should only abort tasks after
1689 * shutting down the connection so we are assured that there
1690 * won't be a simultaneous attempt to abort this task at the
1691 * same time as we are processing this PDU (due to a connection
1692 * state change).
1693 */
1694 if (bhs->flags & ISCSI_FLAG_FINAL) {
1695 /*
1696 * We only want to call idm_buf_rx_from_ini_done once
1697 * per transfer. It's possible that this task has
1698 * already been aborted in which case
1699 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1700 * for each buffer with idb_in_transport==B_TRUE. To
1701 * close this window and ensure that this doesn't happen,
1702 * we'll clear idb->idb_in_transport now while holding
1703 * the task mutex. This is only really an issue for
1704 * SCSI task abort -- if tasks were being aborted because
1705 * of a connection state change the state machine would
1706 * have already stopped the receive thread.
1707 */
1708 mutex_enter(&idt->idt_mutex);
1709
1710 /*
1711 * Release the task hold here (obtained in idm_task_find)
1712 * because the task may complete synchronously during
1713 * idm_buf_rx_from_ini_done. Since we still have an active
1714 * buffer we know there is at least one additional hold on idt.
1715 */
1716 idm_task_rele(idt);
1717
1718 /*
1719 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1720 */
1721 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1722 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1723 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1724 uint32_t, idb->idb_xfer_len,
1725 int, XFER_BUF_RX_FROM_INI);
1726 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1727 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1728 return;
1729 }
1730
1731 idm_task_rele(idt);
1732 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1733 }
1734
1735 /*
1736 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1737 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1738 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1739 * and looks up the task in the task tree using the itt to get the output
1740 * buffers associated the task. The R2T PDU contains the offset of the
1741 * requested data and the data length. This function then constructs a
1742 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1743 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1744 */
1745
1746 static void
idm_so_rx_rtt(idm_conn_t * ic,idm_pdu_t * pdu)1747 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1748 {
1749 idm_task_t *idt;
1750 idm_buf_t *idb;
1751 iscsi_rtt_hdr_t *rtt_hdr;
1752 uint32_t data_offset;
1753 uint32_t data_length;
1754
1755 ASSERT(ic != NULL);
1756 ASSERT(pdu != NULL);
1757
1758 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1759 data_offset = ntohl(rtt_hdr->data_offset);
1760 data_length = ntohl(rtt_hdr->data_length);
1761 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1762
1763 if (idt == NULL) {
1764 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1765 idm_pdu_rx_protocol_error(ic, pdu);
1766 return;
1767 }
1768
1769 /* Find the buffer bound to the task by the iSCSI initiator */
1770 mutex_enter(&idt->idt_mutex);
1771 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1772 if (idb == NULL) {
1773 mutex_exit(&idt->idt_mutex);
1774 idm_task_rele(idt);
1775 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1776 idm_pdu_rx_protocol_error(ic, pdu);
1777 return;
1778 }
1779
1780 /* return buffer contains this data */
1781 if (data_offset + data_length > idb->idb_buflen) {
1782 /* Overflow */
1783 mutex_exit(&idt->idt_mutex);
1784 idm_task_rele(idt);
1785 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1786 "buffer");
1787 idm_pdu_rx_protocol_error(ic, pdu);
1788 return;
1789 }
1790
1791 idt->idt_r2t_ttt = rtt_hdr->ttt;
1792 idt->idt_exp_datasn = 0;
1793
1794 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1795 ntohl(rtt_hdr->data_length));
1796 /*
1797 * the idt_mutex is released in idm_so_send_rtt_data
1798 */
1799
1800 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1801 idm_task_rele(idt);
1802
1803 }
1804
1805 idm_status_t
idm_sorecvdata(idm_conn_t * ic,idm_pdu_t * pdu)1806 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1807 {
1808 uint8_t pad[ISCSI_PAD_WORD_LEN];
1809 int pad_len;
1810 uint32_t data_digest_crc;
1811 uint32_t crc_calculated;
1812 int total_len;
1813 idm_so_conn_t *so_conn;
1814
1815 so_conn = ic->ic_transport_private;
1816
1817 pad_len = ((ISCSI_PAD_WORD_LEN -
1818 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1819 (ISCSI_PAD_WORD_LEN - 1));
1820
1821 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1822
1823 total_len = pdu->isp_datalen;
1824
1825 if (pad_len) {
1826 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1827 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1828 total_len += pad_len;
1829 pdu->isp_iovlen++;
1830 }
1831
1832 /* setup data digest */
1833 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1834 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1835 (char *)&data_digest_crc;
1836 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1837 sizeof (data_digest_crc);
1838 total_len += sizeof (data_digest_crc);
1839 pdu->isp_iovlen++;
1840 }
1841
1842 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1843
1844 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1845 pdu->isp_iovlen, total_len) != 0) {
1846 return (IDM_STATUS_IO);
1847 }
1848
1849 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1850 crc_calculated = idm_crc32c(pdu->isp_data,
1851 pdu->isp_datalen);
1852 if (pad_len) {
1853 crc_calculated = idm_crc32c_continued((char *)&pad,
1854 pad_len, crc_calculated);
1855 }
1856 if (crc_calculated != data_digest_crc) {
1857 IDM_CONN_LOG(CE_WARN,
1858 "idm_sorecvdata: "
1859 "CRC error: actual 0x%x, calc 0x%x",
1860 data_digest_crc, crc_calculated);
1861
1862 /* Invalid Data Digest */
1863 return (IDM_STATUS_DATA_DIGEST);
1864 }
1865 }
1866
1867 return (IDM_STATUS_SUCCESS);
1868 }
1869
1870 /*
1871 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1872 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1873 * calling this function.
1874 */
1875 idm_status_t
idm_sorecv_scsidata(idm_conn_t * ic,idm_pdu_t * pdu)1876 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1877 {
1878 iscsi_data_hdr_t *bhs;
1879 idm_task_t *task;
1880 uint32_t offset;
1881 uint8_t opcode;
1882 uint32_t dlength;
1883 list_t *buflst;
1884 uint32_t xfer_bytes;
1885 idm_status_t status;
1886
1887 ASSERT(ic != NULL);
1888 ASSERT(pdu != NULL);
1889
1890 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1891
1892 offset = ntohl(bhs->offset);
1893 opcode = bhs->opcode;
1894 dlength = n2h24(bhs->dlength);
1895
1896 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1897 (opcode == ISCSI_OP_SCSI_DATA));
1898
1899 /*
1900 * Successful lookup implicitly gets a "hold" on the task. This
1901 * hold must be released before leaving this function. At one
1902 * point we were caching this task context and retaining the hold
1903 * but it turned out to be very difficult to release the hold properly.
1904 * The task can be aborted and the connection shutdown between this
1905 * call and the subsequent expected call to idm_so_rx_datain/
1906 * idm_so_rx_dataout (in which case those functions are not called).
1907 * Releasing the hold in the PDU callback doesn't work well either
1908 * because the whole task may be completed by then at which point
1909 * it is too late to release the hold -- for better or worse this
1910 * code doesn't wait on the refcnts during normal operation.
1911 * idm_task_find() is very fast and it is not a huge burden if we
1912 * have to do it twice.
1913 */
1914 task = idm_task_find(ic, bhs->itt, bhs->ttt);
1915 if (task == NULL) {
1916 IDM_CONN_LOG(CE_WARN,
1917 "idm_sorecv_scsidata: could not find task");
1918 return (IDM_STATUS_FAIL);
1919 }
1920
1921 mutex_enter(&task->idt_mutex);
1922 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1923 &task->idt_inbufv : &task->idt_outbufv;
1924 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1925 mutex_exit(&task->idt_mutex);
1926
1927 if (pdu->isp_sorx_buf == NULL) {
1928 idm_task_rele(task);
1929 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1930 "buffer for offset %x opcode=%x",
1931 offset, opcode);
1932 return (IDM_STATUS_FAIL);
1933 }
1934
1935 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1936 ASSERT(xfer_bytes != 0);
1937 if (xfer_bytes != dlength) {
1938 idm_task_rele(task);
1939 /*
1940 * Buffer overflow, connection error. The PDU data is still
1941 * sitting in the socket so we can't use the connection
1942 * again until that data is drained.
1943 */
1944 return (IDM_STATUS_FAIL);
1945 }
1946
1947 status = idm_sorecvdata(ic, pdu);
1948
1949 idm_task_rele(task);
1950
1951 return (status);
1952 }
1953
1954 static uint32_t
idm_fill_iov(idm_pdu_t * pdu,idm_buf_t * idb,uint32_t ro,uint32_t dlength)1955 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1956 {
1957 uint32_t buf_ro = ro - idb->idb_bufoffset;
1958 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1959
1960 ASSERT(ro >= idb->idb_bufoffset);
1961
1962 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1963 (caddr_t)idb->idb_buf + buf_ro;
1964 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
1965 pdu->isp_iovlen++;
1966
1967 return (xfer_len);
1968 }
1969
1970 int
idm_sorecv_nonscsidata(idm_conn_t * ic,idm_pdu_t * pdu)1971 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1972 {
1973 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1974 ASSERT(pdu->isp_data != NULL);
1975
1976 pdu->isp_databuflen = pdu->isp_datalen;
1977 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1978 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1979 pdu->isp_iovlen = 1;
1980 /*
1981 * Since we are associating a new data buffer with this received
1982 * PDU we need to set a specific callback to free the data
1983 * after the PDU is processed.
1984 */
1985 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1986 pdu->isp_callback = idm_sorx_addl_pdu_cb;
1987
1988 return (idm_sorecvdata(ic, pdu));
1989 }
1990
1991 void
idm_sorx_thread(void * arg)1992 idm_sorx_thread(void *arg)
1993 {
1994 boolean_t conn_failure = B_FALSE;
1995 idm_conn_t *ic = (idm_conn_t *)arg;
1996 idm_so_conn_t *so_conn;
1997 idm_pdu_t *pdu;
1998 idm_status_t rc;
1999
2000 idm_conn_hold(ic);
2001
2002 mutex_enter(&ic->ic_mutex);
2003
2004 so_conn = ic->ic_transport_private;
2005 so_conn->ic_rx_thread_running = B_TRUE;
2006 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
2007 cv_signal(&ic->ic_cv);
2008
2009 while (so_conn->ic_rx_thread_running) {
2010 mutex_exit(&ic->ic_mutex);
2011
2012 /*
2013 * Get PDU with default header size (large enough for
2014 * BHS plus any anticipated AHS). PDU from
2015 * the cache will have all values set correctly
2016 * for sockets RX including callback.
2017 */
2018 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2019 pdu->isp_ic = ic;
2020 pdu->isp_flags = 0;
2021 pdu->isp_transport_hdrlen = 0;
2022
2023 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2024 /*
2025 * Call idm_pdu_complete so that we call the callback
2026 * and ensure any memory allocated in idm_sorecvhdr
2027 * gets freed up.
2028 */
2029 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2030
2031 /*
2032 * If ic_rx_thread_running is still set then
2033 * this is some kind of connection problem
2034 * on the socket. In this case we want to
2035 * generate an event. Otherwise some other
2036 * thread closed the socket due to another
2037 * issue in which case we don't need to
2038 * generate an event.
2039 */
2040 mutex_enter(&ic->ic_mutex);
2041 if (so_conn->ic_rx_thread_running) {
2042 conn_failure = B_TRUE;
2043 so_conn->ic_rx_thread_running = B_FALSE;
2044 }
2045
2046 continue;
2047 }
2048
2049 /*
2050 * Header has been read and validated. Now we need
2051 * to read the PDU data payload (if present). SCSI data
2052 * need to be transferred from the socket directly into
2053 * the associated transfer buffer for the SCSI task.
2054 */
2055 if (pdu->isp_datalen != 0) {
2056 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2057 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2058 rc = idm_sorecv_scsidata(ic, pdu);
2059 /*
2060 * All SCSI errors are fatal to the
2061 * connection right now since we have no
2062 * place to put the data. What we need
2063 * is some kind of sink to dispose of unwanted
2064 * SCSI data. For example an invalid task tag
2065 * should not kill the connection (although
2066 * we may want to drop the connection).
2067 */
2068 } else {
2069 /*
2070 * Not data PDUs so allocate a buffer for the
2071 * data segment and read the remaining data.
2072 */
2073 rc = idm_sorecv_nonscsidata(ic, pdu);
2074 }
2075 if (rc != 0) {
2076 /*
2077 * Call idm_pdu_complete so that we call the
2078 * callback and ensure any memory allocated
2079 * in idm_sorecvhdr gets freed up.
2080 */
2081 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2082
2083 /*
2084 * If ic_rx_thread_running is still set then
2085 * this is some kind of connection problem
2086 * on the socket. In this case we want to
2087 * generate an event. Otherwise some other
2088 * thread closed the socket due to another
2089 * issue in which case we don't need to
2090 * generate an event.
2091 */
2092 mutex_enter(&ic->ic_mutex);
2093 if (so_conn->ic_rx_thread_running) {
2094 conn_failure = B_TRUE;
2095 so_conn->ic_rx_thread_running = B_FALSE;
2096 }
2097 continue;
2098 }
2099 }
2100
2101 /*
2102 * Process RX PDU
2103 */
2104 idm_pdu_rx(ic, pdu);
2105
2106 mutex_enter(&ic->ic_mutex);
2107 }
2108
2109 mutex_exit(&ic->ic_mutex);
2110
2111 /*
2112 * If we dropped out of the RX processing loop because of
2113 * a socket problem or other connection failure (including
2114 * digest errors) then we need to generate a state machine
2115 * event to shut the connection down.
2116 * If the state machine is already in, for example, INIT_ERROR, this
2117 * event will get dropped, and the TX thread will never be notified
2118 * to shut down. To be safe, we'll just notify it here.
2119 */
2120 if (conn_failure) {
2121 if (so_conn->ic_tx_thread_running) {
2122 so_conn->ic_tx_thread_running = B_FALSE;
2123 mutex_enter(&so_conn->ic_tx_mutex);
2124 cv_signal(&so_conn->ic_tx_cv);
2125 mutex_exit(&so_conn->ic_tx_mutex);
2126 }
2127
2128 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2129 }
2130
2131 idm_conn_rele(ic);
2132
2133 thread_exit();
2134 }
2135
2136 /*
2137 * idm_so_tx
2138 *
2139 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2140 * point. By definition, it is supposed to be fast. So, simply queue
2141 * the entry and return. The real work is done by idm_i_so_tx() via
2142 * idm_sotx_thread().
2143 */
2144
2145 static void
idm_so_tx(idm_conn_t * ic,idm_pdu_t * pdu)2146 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2147 {
2148 idm_so_conn_t *so_conn = ic->ic_transport_private;
2149
2150 ASSERT(pdu->isp_ic == ic);
2151 mutex_enter(&so_conn->ic_tx_mutex);
2152
2153 if (!so_conn->ic_tx_thread_running) {
2154 mutex_exit(&so_conn->ic_tx_mutex);
2155 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2156 return;
2157 }
2158
2159 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2160 cv_signal(&so_conn->ic_tx_cv);
2161 mutex_exit(&so_conn->ic_tx_mutex);
2162 }
2163
2164 static idm_status_t
idm_i_so_tx(idm_pdu_t * pdu)2165 idm_i_so_tx(idm_pdu_t *pdu)
2166 {
2167 idm_conn_t *ic = pdu->isp_ic;
2168 idm_status_t status = IDM_STATUS_SUCCESS;
2169 uint8_t pad[ISCSI_PAD_WORD_LEN];
2170 int pad_len;
2171 uint32_t hdr_digest_crc;
2172 uint32_t data_digest_crc = 0;
2173 int total_len = 0;
2174 int iovlen = 0;
2175 struct iovec iov[6];
2176 idm_so_conn_t *so_conn;
2177
2178 so_conn = ic->ic_transport_private;
2179
2180 /* Setup BHS */
2181 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2182 iov[iovlen].iov_len = pdu->isp_hdrlen;
2183 total_len += iov[iovlen].iov_len;
2184 iovlen++;
2185
2186 /* Setup header digest */
2187 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2188 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2189 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2190
2191 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2192 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2193 total_len += iov[iovlen].iov_len;
2194 iovlen++;
2195 }
2196
2197 /* Setup the data */
2198 if (pdu->isp_datalen) {
2199 idm_task_t *idt;
2200 idm_buf_t *idb;
2201 iscsi_data_hdr_t *ihp;
2202 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2203 /* Write of immediate data */
2204 if (ic->ic_ffp &&
2205 (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2206 ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2207 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2208 if (idt) {
2209 mutex_enter(&idt->idt_mutex);
2210 idb = idm_buf_find(&idt->idt_outbufv, 0);
2211 mutex_exit(&idt->idt_mutex);
2212 /*
2213 * If the initiator call to idm_buf_alloc
2214 * failed then we can get to this point
2215 * without a bound buffer. The associated
2216 * connection failure will clean things up
2217 * later. It would be nice to come up with
2218 * a cleaner way to handle this. In
2219 * particular it seems absurd to look up
2220 * the task and the buffer just to update
2221 * this counter.
2222 */
2223 if (idb)
2224 idb->idb_xfer_len += pdu->isp_datalen;
2225 idm_task_rele(idt);
2226 }
2227 }
2228
2229 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2230 iov[iovlen].iov_len = pdu->isp_datalen;
2231 total_len += iov[iovlen].iov_len;
2232 iovlen++;
2233 }
2234
2235 /* Setup the data pad if necessary */
2236 pad_len = ((ISCSI_PAD_WORD_LEN -
2237 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2238 (ISCSI_PAD_WORD_LEN - 1));
2239
2240 if (pad_len) {
2241 bzero(pad, sizeof (pad));
2242 iov[iovlen].iov_base = (void *)&pad;
2243 iov[iovlen].iov_len = pad_len;
2244 total_len += iov[iovlen].iov_len;
2245 iovlen++;
2246 }
2247
2248 /*
2249 * Setup the data digest if enabled. Data-digest is not sent
2250 * for login-phase PDUs.
2251 */
2252 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2253 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2254 (pdu->isp_datalen || pad_len)) {
2255 /*
2256 * RFC3720/10.2.3: A zero-length Data Segment also
2257 * implies a zero-length data digest.
2258 */
2259 if (pdu->isp_datalen) {
2260 data_digest_crc = idm_crc32c(pdu->isp_data,
2261 pdu->isp_datalen);
2262 }
2263 if (pad_len) {
2264 data_digest_crc = idm_crc32c_continued(&pad,
2265 pad_len, data_digest_crc);
2266 }
2267
2268 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2269 iov[iovlen].iov_len = sizeof (data_digest_crc);
2270 total_len += iov[iovlen].iov_len;
2271 iovlen++;
2272 }
2273
2274 /* Transmit the PDU */
2275 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2276 total_len) != 0) {
2277 /* Set error status */
2278 IDM_CONN_LOG(CE_WARN,
2279 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2280 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2281 (void *) pdu->isp_data);
2282 status = IDM_STATUS_IO;
2283 }
2284
2285 /*
2286 * Success does not mean that the PDU actually reached the
2287 * remote node since it could get dropped along the way.
2288 */
2289 idm_pdu_complete(pdu, status);
2290
2291 return (status);
2292 }
2293
2294 /*
2295 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2296 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2297 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2298 * A target can invoke this function multiple times for a single read command
2299 * (identified by the same ITT) to split the input into several sequences.
2300 *
2301 * DataSN starts with 0 for the first data PDU of an input command and advances
2302 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2303 * which is set to 1 for the last data PDU of a sequence.
2304 * If the initiator supports phase collapse, the status bit must be set along
2305 * with the F bit to indicate that the status is shipped together with the last
2306 * Data-In PDU.
2307 *
2308 * The data PDUs within a sequence will be sent in order with the buffer offset
2309 * in increasing order. i.e. initiator and target must have negotiated the
2310 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2311 *
2312 * Caller holds idt->idt_mutex
2313 */
2314 static idm_status_t
idm_so_buf_tx_to_ini(idm_task_t * idt,idm_buf_t * idb)2315 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2316 {
2317 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2318 idm_pdu_t tmppdu;
2319
2320 ASSERT(mutex_owned(&idt->idt_mutex));
2321
2322 /*
2323 * Put the idm_buf_t on the tx queue. It will be transmitted by
2324 * idm_sotx_thread.
2325 */
2326 mutex_enter(&so_conn->ic_tx_mutex);
2327
2328 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2329 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2332
2333 if (!so_conn->ic_tx_thread_running) {
2334 mutex_exit(&so_conn->ic_tx_mutex);
2335 /*
2336 * Don't release idt->idt_mutex since we're supposed to hold
2337 * in when calling idm_buf_tx_to_ini_done
2338 */
2339 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2340 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2341 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2342 uint32_t, idb->idb_xfer_len,
2343 int, XFER_BUF_TX_TO_INI);
2344 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2345 return (IDM_STATUS_FAIL);
2346 }
2347
2348 /*
2349 * Build a template for the data PDU headers we will use so that
2350 * the SN values will stay consistent with other PDU's we are
2351 * transmitting like R2T and SCSI status.
2352 */
2353 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2354 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2355 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2356 ISCSI_OP_SCSI_DATA_RSP);
2357 idb->idb_tx_thread = B_TRUE;
2358 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2359 cv_signal(&so_conn->ic_tx_cv);
2360 mutex_exit(&so_conn->ic_tx_mutex);
2361 mutex_exit(&idt->idt_mutex);
2362
2363 /*
2364 * Returning success here indicates the transfer was successfully
2365 * dispatched -- it does not mean that the transfer completed
2366 * successfully.
2367 */
2368 return (IDM_STATUS_SUCCESS);
2369 }
2370
2371 /*
2372 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2373 * data blocks it is ready to receive from the initiator in response to a WRITE
2374 * SCSI command. The target iSCSI layer passes the information about the desired
2375 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2376 * offset and datalen are passed via the 'idb' argument.
2377 *
2378 * Scope for Prototype build:
2379 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2380 * negotiated the "InitialR2T" to "Yes".
2381 *
2382 * Caller holds idt->idt_mutex
2383 */
2384 static idm_status_t
idm_so_buf_rx_from_ini(idm_task_t * idt,idm_buf_t * idb)2385 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2386 {
2387 idm_pdu_t *pdu;
2388 iscsi_rtt_hdr_t *rtt;
2389
2390 ASSERT(mutex_owned(&idt->idt_mutex));
2391
2392 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2393 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2394 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2395 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2396
2397 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2398 pdu->isp_ic = idt->idt_ic;
2399 pdu->isp_flags = IDM_PDU_SET_STATSN;
2400 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2401
2402 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2403 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2404
2405 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2406 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2407
2408 rtt->opcode = ISCSI_OP_RTT_RSP;
2409 rtt->flags = ISCSI_FLAG_FINAL;
2410 rtt->data_offset = htonl(idb->idb_bufoffset);
2411 rtt->data_length = htonl(idb->idb_xfer_len);
2412 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2413
2414 /* Keep track of buffer offsets */
2415 idb->idb_exp_offset = idb->idb_bufoffset;
2416 mutex_exit(&idt->idt_mutex);
2417
2418 /*
2419 * Transmit the PDU.
2420 */
2421 idm_pdu_tx(pdu);
2422
2423 return (IDM_STATUS_SUCCESS);
2424 }
2425
2426 static idm_status_t
idm_so_buf_alloc(idm_buf_t * idb,uint64_t buflen)2427 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2428 {
2429 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2430 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2431 KM_NOSLEEP);
2432 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2433 } else {
2434 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2435 idb->idb_buf_private = NULL;
2436 }
2437
2438 if (idb->idb_buf == NULL) {
2439 IDM_CONN_LOG(CE_NOTE,
2440 "idm_so_buf_alloc: failed buffer allocation");
2441 return (IDM_STATUS_FAIL);
2442 }
2443
2444 return (IDM_STATUS_SUCCESS);
2445 }
2446
2447 /* ARGSUSED */
2448 static idm_status_t
idm_so_buf_setup(idm_buf_t * idb)2449 idm_so_buf_setup(idm_buf_t *idb)
2450 {
2451 /* Ensure bufalloc'd flag is unset */
2452 idb->idb_bufalloc = B_FALSE;
2453
2454 return (IDM_STATUS_SUCCESS);
2455 }
2456
2457 /* ARGSUSED */
2458 static void
idm_so_buf_teardown(idm_buf_t * idb)2459 idm_so_buf_teardown(idm_buf_t *idb)
2460 {
2461 /* nothing to do here */
2462 }
2463
2464 static void
idm_so_buf_free(idm_buf_t * idb)2465 idm_so_buf_free(idm_buf_t *idb)
2466 {
2467 if (idb->idb_buf_private == NULL) {
2468 kmem_free(idb->idb_buf, idb->idb_buflen);
2469 } else {
2470 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2471 }
2472 }
2473
2474 static void
idm_so_send_rtt_data(idm_conn_t * ic,idm_task_t * idt,idm_buf_t * idb,uint32_t offset,uint32_t length)2475 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2476 uint32_t offset, uint32_t length)
2477 {
2478 idm_so_conn_t *so_conn = ic->ic_transport_private;
2479 idm_pdu_t tmppdu;
2480 idm_buf_t *rtt_buf;
2481
2482 ASSERT(mutex_owned(&idt->idt_mutex));
2483
2484 /*
2485 * Allocate a buffer to represent the RTT transfer. We could further
2486 * optimize this by allocating the buffers internally from an rtt
2487 * specific buffer cache since this is socket-specific code but for
2488 * now we will keep it simple.
2489 */
2490 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2491 if (rtt_buf == NULL) {
2492 /*
2493 * If we're in FFP then the failure was likely a resource
2494 * allocation issue and we should close the connection by
2495 * sending a CE_TRANSPORT_FAIL event.
2496 *
2497 * If we're not in FFP then idm_buf_alloc will always
2498 * fail and the state is transitioning to "complete" anyway
2499 * so we won't bother to send an event.
2500 */
2501 mutex_enter(&ic->ic_state_mutex);
2502 if (ic->ic_ffp)
2503 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2504 NULL, CT_NONE);
2505 mutex_exit(&ic->ic_state_mutex);
2506 mutex_exit(&idt->idt_mutex);
2507 return;
2508 }
2509
2510 rtt_buf->idb_buf_cb = NULL;
2511 rtt_buf->idb_cb_arg = NULL;
2512 rtt_buf->idb_bufoffset = offset;
2513 rtt_buf->idb_xfer_len = length;
2514 rtt_buf->idb_ic = idt->idt_ic;
2515 rtt_buf->idb_task_binding = idt;
2516
2517 /*
2518 * The new buffer (if any) represents an additional
2519 * reference on the task
2520 */
2521 idm_task_hold(idt);
2522 mutex_exit(&idt->idt_mutex);
2523
2524 /*
2525 * Put the idm_buf_t on the tx queue. It will be transmitted by
2526 * idm_sotx_thread.
2527 */
2528 mutex_enter(&so_conn->ic_tx_mutex);
2529
2530 if (!so_conn->ic_tx_thread_running) {
2531 idm_buf_free(rtt_buf);
2532 mutex_exit(&so_conn->ic_tx_mutex);
2533 idm_task_rele(idt);
2534 return;
2535 }
2536
2537 /*
2538 * Build a template for the data PDU headers we will use so that
2539 * the SN values will stay consistent with other PDU's we are
2540 * transmitting like R2T and SCSI status.
2541 */
2542 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2543 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2544 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2545 ISCSI_OP_SCSI_DATA);
2546 rtt_buf->idb_tx_thread = B_TRUE;
2547 rtt_buf->idb_in_transport = B_TRUE;
2548 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2549 cv_signal(&so_conn->ic_tx_cv);
2550 mutex_exit(&so_conn->ic_tx_mutex);
2551 }
2552
2553 static void
idm_so_send_rtt_data_done(idm_task_t * idt,idm_buf_t * idb)2554 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2555 {
2556 /*
2557 * Don't worry about status -- we assume any error handling
2558 * is performed by the caller (idm_sotx_thread).
2559 */
2560 idb->idb_in_transport = B_FALSE;
2561 idm_task_rele(idt);
2562 idm_buf_free(idb);
2563 }
2564
2565 static idm_status_t
idm_so_send_buf_region(idm_task_t * idt,idm_buf_t * idb,uint32_t buf_region_offset,uint32_t buf_region_length)2566 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2567 uint32_t buf_region_offset, uint32_t buf_region_length)
2568 {
2569 idm_conn_t *ic;
2570 uint32_t max_dataseglen;
2571 size_t remainder, chunk;
2572 uint32_t data_offset = buf_region_offset;
2573 iscsi_data_hdr_t *bhs;
2574 idm_pdu_t *pdu;
2575 idm_status_t tx_status;
2576
2577 ASSERT(mutex_owned(&idt->idt_mutex));
2578
2579 ic = idt->idt_ic;
2580
2581 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2582 remainder = buf_region_length;
2583
2584 while (remainder) {
2585 if (idt->idt_state != TASK_ACTIVE) {
2586 ASSERT((idt->idt_state != TASK_IDLE) &&
2587 (idt->idt_state != TASK_COMPLETE));
2588 return (IDM_STATUS_ABORTED);
2589 }
2590
2591 /* check to see if we need to chunk the data */
2592 if (remainder > max_dataseglen) {
2593 chunk = max_dataseglen;
2594 } else {
2595 chunk = remainder;
2596 }
2597
2598 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2599 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2600 pdu->isp_ic = ic;
2601 pdu->isp_flags = 0; /* initialize isp_flags */
2602
2603 /*
2604 * We've already built a build a header template
2605 * to use during the transfer. Use this template so that
2606 * the SN values stay consistent with any unrelated PDU's
2607 * being transmitted.
2608 */
2609 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2610 sizeof (iscsi_hdr_t));
2611
2612 /*
2613 * Set DataSN, data offset, and flags in BHS
2614 * For the prototype build, A = 0, S = 0, U = 0
2615 */
2616 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2617
2618 bhs->datasn = htonl(idt->idt_exp_datasn++);
2619
2620 hton24(bhs->dlength, chunk);
2621 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2622
2623 /* setup data */
2624 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2625 pdu->isp_datalen = (uint_t)chunk;
2626
2627 if (chunk == remainder) {
2628 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2629 /* Piggyback the status with the last data PDU */
2630 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2631 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2632 IDM_PDU_ADVANCE_STATSN;
2633 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2634 (idt, pdu);
2635 idt->idt_flags |=
2636 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2637
2638 }
2639 }
2640
2641 remainder -= chunk;
2642 data_offset += chunk;
2643
2644 /* Instrument the data-send DTrace probe. */
2645 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2646 DTRACE_ISCSI_2(data__send,
2647 idm_conn_t *, idt->idt_ic,
2648 iscsi_data_rsp_hdr_t *,
2649 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2650 }
2651
2652 /*
2653 * Now that we're done working with idt_exp_datasn,
2654 * idt->idt_state and idb->idb_bufoffset we can release
2655 * the task lock -- don't want to hold it across the
2656 * call to idm_i_so_tx since we could block.
2657 */
2658 mutex_exit(&idt->idt_mutex);
2659
2660 /*
2661 * Transmit the PDU. Call the internal routine directly
2662 * as there is already implicit ordering.
2663 */
2664 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2665 mutex_enter(&idt->idt_mutex);
2666 return (tx_status);
2667 }
2668
2669 mutex_enter(&idt->idt_mutex);
2670 idt->idt_tx_bytes += chunk;
2671 }
2672
2673 return (IDM_STATUS_SUCCESS);
2674 }
2675
2676 /*
2677 * TX PDU cache
2678 */
2679 /* ARGSUSED */
2680 int
idm_sotx_pdu_constructor(void * hdl,void * arg,int flags)2681 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2682 {
2683 idm_pdu_t *pdu = hdl;
2684
2685 bzero(pdu, sizeof (idm_pdu_t));
2686 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2687 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2688 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2689 pdu->isp_magic = IDM_PDU_MAGIC;
2690 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2691
2692 return (0);
2693 }
2694
2695 /* ARGSUSED */
2696 void
idm_sotx_cache_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2697 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2698 {
2699 /* reset values between use */
2700 pdu->isp_datalen = 0;
2701
2702 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2703 }
2704
2705 /*
2706 * RX PDU cache
2707 */
2708 /* ARGSUSED */
2709 int
idm_sorx_pdu_constructor(void * hdl,void * arg,int flags)2710 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2711 {
2712 idm_pdu_t *pdu = hdl;
2713
2714 bzero(pdu, sizeof (idm_pdu_t));
2715 pdu->isp_magic = IDM_PDU_MAGIC;
2716 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2717 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2718
2719 return (0);
2720 }
2721
2722 /* ARGSUSED */
2723 static void
idm_sorx_cache_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2724 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2725 {
2726 pdu->isp_iovlen = 0;
2727 pdu->isp_sorx_buf = 0;
2728 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2729 }
2730
2731 static void
idm_sorx_addl_pdu_cb(idm_pdu_t * pdu,idm_status_t status)2732 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2733 {
2734 /*
2735 * We had to modify our cached RX PDU with a longer header buffer
2736 * and/or a longer data buffer. Release the new buffers and fix
2737 * the fields back to what we would expect for a cached RX PDU.
2738 */
2739 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2740 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2741 }
2742 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2743 kmem_free(pdu->isp_data, pdu->isp_datalen);
2744 }
2745 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2746 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2747 pdu->isp_data = NULL;
2748 pdu->isp_datalen = 0;
2749 pdu->isp_sorx_buf = 0;
2750 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2751 idm_sorx_cache_pdu_cb(pdu, status);
2752 }
2753
2754 /*
2755 * This thread is only active when I/O is queued for transmit
2756 * because the socket is busy.
2757 */
2758 void
idm_sotx_thread(void * arg)2759 idm_sotx_thread(void *arg)
2760 {
2761 idm_conn_t *ic = arg;
2762 idm_tx_obj_t *object, *next;
2763 idm_so_conn_t *so_conn;
2764 idm_status_t status = IDM_STATUS_SUCCESS;
2765
2766 idm_conn_hold(ic);
2767
2768 mutex_enter(&ic->ic_mutex);
2769 so_conn = ic->ic_transport_private;
2770 so_conn->ic_tx_thread_running = B_TRUE;
2771 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2772 cv_signal(&ic->ic_cv);
2773 mutex_exit(&ic->ic_mutex);
2774
2775 mutex_enter(&so_conn->ic_tx_mutex);
2776
2777 while (so_conn->ic_tx_thread_running) {
2778 while (list_is_empty(&so_conn->ic_tx_list)) {
2779 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2780 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2781 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2782
2783 if (!so_conn->ic_tx_thread_running) {
2784 goto tx_bail;
2785 }
2786 }
2787
2788 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2789 list_remove(&so_conn->ic_tx_list, object);
2790 mutex_exit(&so_conn->ic_tx_mutex);
2791
2792 switch (object->idm_tx_obj_magic) {
2793 case IDM_PDU_MAGIC: {
2794 idm_pdu_t *pdu = (idm_pdu_t *)object;
2795 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2796 idm_pdu_t *, (idm_pdu_t *)object);
2797
2798 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2799 /* No IDM task */
2800 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2801 }
2802 status = idm_i_so_tx((idm_pdu_t *)object);
2803 break;
2804 }
2805 case IDM_BUF_MAGIC: {
2806 idm_buf_t *idb = (idm_buf_t *)object;
2807 idm_task_t *idt = idb->idb_task_binding;
2808
2809 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2810 idm_buf_t *, idb);
2811
2812 mutex_enter(&idt->idt_mutex);
2813 status = idm_so_send_buf_region(idt,
2814 idb, 0, idb->idb_xfer_len);
2815
2816 /*
2817 * TX thread owns the buffer so we expect it to
2818 * be "in transport"
2819 */
2820 ASSERT(idb->idb_in_transport);
2821 if (IDM_CONN_ISTGT(ic)) {
2822 /*
2823 * idm_buf_tx_to_ini_done releases
2824 * idt->idt_mutex
2825 */
2826 DTRACE_ISCSI_8(xfer__done,
2827 idm_conn_t *, idt->idt_ic,
2828 uintptr_t, idb->idb_buf,
2829 uint32_t, idb->idb_bufoffset,
2830 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2831 uint32_t, idb->idb_xfer_len,
2832 int, XFER_BUF_TX_TO_INI);
2833 idm_buf_tx_to_ini_done(idt, idb, status);
2834 } else {
2835 idm_so_send_rtt_data_done(idt, idb);
2836 mutex_exit(&idt->idt_mutex);
2837 }
2838 break;
2839 }
2840
2841 default:
2842 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2843 "(0x%08x)", object->idm_tx_obj_magic);
2844 status = IDM_STATUS_FAIL;
2845 }
2846
2847 mutex_enter(&so_conn->ic_tx_mutex);
2848
2849 if (status != IDM_STATUS_SUCCESS) {
2850 so_conn->ic_tx_thread_running = B_FALSE;
2851 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2852 }
2853 }
2854
2855 /*
2856 * Before we leave, we need to abort every item remaining in the
2857 * TX list.
2858 */
2859
2860 tx_bail:
2861 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2862
2863 while (object != NULL) {
2864 next = list_next(&so_conn->ic_tx_list, object);
2865
2866 list_remove(&so_conn->ic_tx_list, object);
2867 switch (object->idm_tx_obj_magic) {
2868 case IDM_PDU_MAGIC:
2869 idm_pdu_complete((idm_pdu_t *)object,
2870 IDM_STATUS_ABORTED);
2871 break;
2872
2873 case IDM_BUF_MAGIC: {
2874 idm_buf_t *idb = (idm_buf_t *)object;
2875 idm_task_t *idt = idb->idb_task_binding;
2876 mutex_exit(&so_conn->ic_tx_mutex);
2877 mutex_enter(&idt->idt_mutex);
2878 /*
2879 * TX thread owns the buffer so we expect it to
2880 * be "in transport"
2881 */
2882 ASSERT(idb->idb_in_transport);
2883 if (IDM_CONN_ISTGT(ic)) {
2884 /*
2885 * idm_buf_tx_to_ini_done releases
2886 * idt->idt_mutex
2887 */
2888 DTRACE_ISCSI_8(xfer__done,
2889 idm_conn_t *, idt->idt_ic,
2890 uintptr_t, idb->idb_buf,
2891 uint32_t, idb->idb_bufoffset,
2892 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2893 uint32_t, idb->idb_xfer_len,
2894 int, XFER_BUF_TX_TO_INI);
2895 idm_buf_tx_to_ini_done(idt, idb,
2896 IDM_STATUS_ABORTED);
2897 } else {
2898 idm_so_send_rtt_data_done(idt, idb);
2899 mutex_exit(&idt->idt_mutex);
2900 }
2901 mutex_enter(&so_conn->ic_tx_mutex);
2902 break;
2903 }
2904 default:
2905 IDM_CONN_LOG(CE_WARN,
2906 "idm_sotx_thread: Unexpected magic "
2907 "(0x%08x)", object->idm_tx_obj_magic);
2908 }
2909
2910 object = next;
2911 }
2912
2913 mutex_exit(&so_conn->ic_tx_mutex);
2914 idm_conn_rele(ic);
2915 thread_exit();
2916 /*NOTREACHED*/
2917 }
2918
2919 static void
idm_so_socket_set_nonblock(struct sonode * node)2920 idm_so_socket_set_nonblock(struct sonode *node)
2921 {
2922 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2923 (node->so_state | FNONBLOCK), CRED(), NULL);
2924 }
2925
2926 static void
idm_so_socket_set_block(struct sonode * node)2927 idm_so_socket_set_block(struct sonode *node)
2928 {
2929 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2930 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2931 }
2932
2933
2934 /*
2935 * Called by kernel sockets when the connection has been accepted or
2936 * rejected. In early volo, a "disconnect" callback was sent instead of
2937 * "connectfailed", so we check for both.
2938 */
2939 /* ARGSUSED */
2940 void
idm_so_timed_socket_connect_cb(ksocket_t ks,ksocket_callback_event_t ev,void * arg,uintptr_t info)2941 idm_so_timed_socket_connect_cb(ksocket_t ks,
2942 ksocket_callback_event_t ev, void *arg, uintptr_t info)
2943 {
2944 idm_so_timed_socket_t *itp = arg;
2945 ASSERT(itp != NULL);
2946 ASSERT(ev == KSOCKET_EV_CONNECTED ||
2947 ev == KSOCKET_EV_CONNECTFAILED ||
2948 ev == KSOCKET_EV_DISCONNECTED);
2949
2950 mutex_enter(&idm_so_timed_socket_mutex);
2951 itp->it_callback_called = B_TRUE;
2952 if (ev == KSOCKET_EV_CONNECTED) {
2953 itp->it_socket_error_code = 0;
2954 } else {
2955 /* Make sure the error code is non-zero on error */
2956 if (info == 0)
2957 info = ECONNRESET;
2958 itp->it_socket_error_code = (int)info;
2959 }
2960 cv_signal(&itp->it_cv);
2961 mutex_exit(&idm_so_timed_socket_mutex);
2962 }
2963
2964 int
idm_so_timed_socket_connect(ksocket_t ks,struct sockaddr_storage * sa,int sa_sz,int login_max_usec)2965 idm_so_timed_socket_connect(ksocket_t ks,
2966 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2967 {
2968 clock_t conn_login_max;
2969 int rc, nonblocking, rval;
2970 idm_so_timed_socket_t it;
2971 ksocket_callbacks_t ks_cb;
2972
2973 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2974
2975 /*
2976 * Set to non-block socket mode, with callback on connect
2977 * Early volo used "disconnected" instead of "connectfailed",
2978 * so set callback to look for both.
2979 */
2980 bzero(&it, sizeof (it));
2981 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2982 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2983 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2984 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2985 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2986 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2987 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2988 if (rc != 0)
2989 return (rc);
2990
2991 /* Set to non-blocking mode */
2992 nonblocking = 1;
2993 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2994 CRED());
2995 if (rc != 0)
2996 goto cleanup;
2997
2998 bzero(&it, sizeof (it));
2999 for (;;) {
3000 /*
3001 * Warning -- in a loopback scenario, the call to
3002 * the connect_cb can occur inside the call to
3003 * ksocket_connect. Do not hold the mutex around the
3004 * call to ksocket_connect.
3005 */
3006 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
3007 if (rc == 0 || rc == EISCONN) {
3008 /* socket success or already success */
3009 rc = 0;
3010 break;
3011 }
3012 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3013 break;
3014 }
3015
3016 /* TCP connect still in progress. See if out of time. */
3017 if (ddi_get_lbolt() > conn_login_max) {
3018 /*
3019 * Connection retry timeout,
3020 * failed connect to target.
3021 */
3022 rc = ETIMEDOUT;
3023 break;
3024 }
3025
3026 /*
3027 * TCP connect still in progress. Sleep until callback.
3028 * Do NOT go to sleep if the callback already occurred!
3029 */
3030 mutex_enter(&idm_so_timed_socket_mutex);
3031 if (!it.it_callback_called) {
3032 (void) cv_timedwait(&it.it_cv,
3033 &idm_so_timed_socket_mutex, conn_login_max);
3034 }
3035 if (it.it_callback_called) {
3036 rc = it.it_socket_error_code;
3037 mutex_exit(&idm_so_timed_socket_mutex);
3038 break;
3039 }
3040 /* If timer expires, go call ksocket_connect one last time. */
3041 mutex_exit(&idm_so_timed_socket_mutex);
3042 }
3043
3044 /* resume blocking mode */
3045 nonblocking = 0;
3046 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3047 CRED());
3048 cleanup:
3049 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3050 cv_destroy(&it.it_cv);
3051 if (rc != 0) {
3052 idm_soshutdown(ks);
3053 }
3054 return (rc);
3055 }
3056
3057
3058 void
idm_addr_to_sa(idm_addr_t * dportal,struct sockaddr_storage * sa)3059 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3060 {
3061 int dp_addr_size;
3062 struct sockaddr_in *sin;
3063 struct sockaddr_in6 *sin6;
3064
3065 /* Build sockaddr_storage for this portal (idm_addr_t) */
3066 bzero(sa, sizeof (*sa));
3067 dp_addr_size = dportal->a_addr.i_insize;
3068 if (dp_addr_size == sizeof (struct in_addr)) {
3069 /* IPv4 */
3070 sa->ss_family = AF_INET;
3071 sin = (struct sockaddr_in *)sa;
3072 sin->sin_port = htons(dportal->a_port);
3073 bcopy(&dportal->a_addr.i_addr.in4,
3074 &sin->sin_addr, sizeof (struct in_addr));
3075 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3076 /* IPv6 */
3077 sa->ss_family = AF_INET6;
3078 sin6 = (struct sockaddr_in6 *)sa;
3079 sin6->sin6_port = htons(dportal->a_port);
3080 bcopy(&dportal->a_addr.i_addr.in6,
3081 &sin6->sin6_addr, sizeof (struct in6_addr));
3082 } else {
3083 ASSERT(0);
3084 }
3085 }
3086
3087
3088 /*
3089 * return a human-readable form of a sockaddr_storage, in the form
3090 * [ip-address]:port. This is used in calls to logging functions.
3091 * If several calls to idm_sa_ntop are made within the same invocation
3092 * of a logging function, then each one needs its own buf.
3093 */
3094 const char *
idm_sa_ntop(const struct sockaddr_storage * sa,char * buf,size_t size)3095 idm_sa_ntop(const struct sockaddr_storage *sa,
3096 char *buf, size_t size)
3097 {
3098 static const char bogus_ip[] = "[0].-1";
3099 char tmp[INET6_ADDRSTRLEN];
3100
3101 switch (sa->ss_family) {
3102 case AF_INET6:
3103 {
3104 const struct sockaddr_in6 *in6 =
3105 (const struct sockaddr_in6 *) sa;
3106
3107 if (inet_ntop(in6->sin6_family,
3108 &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3109 goto err;
3110 }
3111 if (strlen(tmp) + sizeof ("[].65535") > size) {
3112 goto err;
3113 }
3114 /* struct sockaddr_storage gets port info from v4 loc */
3115 (void) snprintf(buf, size, "[%s].%u", tmp,
3116 ntohs(in6->sin6_port));
3117 return (buf);
3118 }
3119 case AF_INET:
3120 {
3121 const struct sockaddr_in *in =
3122 (const struct sockaddr_in *) sa;
3123
3124 if (inet_ntop(in->sin_family, &in->sin_addr,
3125 tmp, sizeof (tmp)) == NULL) {
3126 goto err;
3127 }
3128 if (strlen(tmp) + sizeof ("[].65535") > size) {
3129 goto err;
3130 }
3131 (void) snprintf(buf, size, "[%s].%u", tmp,
3132 ntohs(in->sin_port));
3133 return (buf);
3134 }
3135 default:
3136 break;
3137 }
3138 err:
3139 (void) snprintf(buf, size, "%s", bogus_ip);
3140 return (buf);
3141 }
3142