1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 /*
28 * Overview of the RSM Kernel Agent:
29 * ---------------------------------
30 *
31 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM
32 * kernel agent is a pseudo device driver which makes use of the RSMPI
33 * interface on behalf of the RSMAPI user library.
34 *
35 * The kernel agent functionality can be categorized into the following
36 * components:
37 * 1. Driver Infrastructure
38 * 2. Export/Import Segment Management
39 * 3. Internal resource allocation/deallocation
40 *
41 * The driver infrastructure includes the basic module loading entry points
42 * like _init, _info, _fini to load, unload and report information about
43 * the driver module. The driver infrastructure also includes the
44 * autoconfiguration entry points namely, attach, detach and getinfo for
45 * the device autoconfiguration.
46 *
47 * The kernel agent is a pseudo character device driver and exports
48 * a cb_ops structure which defines the driver entry points for character
49 * device access. This includes the open and close entry points. The
50 * other entry points provided include ioctl, devmap and segmap and chpoll.
51 * read and write entry points are not used since the device is memory
52 * mapped. Also ddi_prop_op is used for the prop_op entry point.
53 *
54 * The ioctl entry point supports a number of commands, which are used by
55 * the RSMAPI library in order to export and import segments. These
56 * commands include commands for binding and rebinding the physical pages
57 * allocated to the virtual address range, publishing the export segment,
58 * unpublishing and republishing an export segment, creating an
59 * import segment and a virtual connection from this import segment to
60 * an export segment, performing scatter-gather data transfer, barrier
61 * operations.
62 *
63 *
64 * Export and Import segments:
65 * ---------------------------
66 *
67 * In order to create an RSM export segment a process allocates a range in its
68 * virtual address space for the segment using standard Solaris interfaces.
69 * The process then calls RSMAPI, which in turn makes an ioctl call to the
70 * RSM kernel agent for an allocation of physical memory pages and for
71 * creation of the export segment by binding these pages to the virtual
72 * address range. These pages are locked in memory so that remote accesses
73 * are always applied to the correct page. Then the RSM segment is published,
74 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id
75 * is assigned to it.
76 *
77 * In order to import a published RSM segment, RSMAPI creates an import
78 * segment and forms a virtual connection across the interconnect to the
79 * export segment, via an ioctl into the kernel agent with the connect
80 * command. The import segment setup is completed by mapping the
81 * local device memory into the importers virtual address space. The
82 * mapping of the import segment is handled by the segmap/devmap
83 * infrastructure described as follows.
84 *
85 * Segmap and Devmap interfaces:
86 *
87 * The RSM kernel agent allows device memory to be directly accessed by user
88 * threads via memory mapping. In order to do so, the RSM kernel agent
89 * supports the devmap and segmap entry points.
90 *
91 * The segmap entry point(rsm_segmap) is responsible for setting up a memory
92 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is
93 * responsible for exporting the device memory to the user applications.
94 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the
95 * control is transfered to the devmap_setup call which calls rsm_devmap.
96 *
97 * rsm_devmap validates the user mapping to the device or kernel memory
98 * and passes the information to the system for setting up the mapping. The
99 * actual setting up of the mapping is done by devmap_devmem_setup(for
100 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are
101 * registered for device context management via the devmap_devmem_setup
102 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap,
103 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping
104 * is created, a mapping is freed, a mapping is accessed or an existing
105 * mapping is duplicated respectively. These callbacks allow the RSM kernel
106 * agent to maintain state information associated with the mappings.
107 * The state information is mainly in the form of a cookie list for the import
108 * segment for which mapping has been done.
109 *
110 * Forced disconnect of import segments:
111 *
112 * When an exported segment is unpublished, the exporter sends a forced
113 * disconnect message to all its importers. The importer segments are
114 * unloaded and disconnected. This involves unloading the original
115 * mappings and remapping to a preallocated kernel trash page. This is
116 * done by devmap_umem_remap. The trash/dummy page is a kernel page,
117 * preallocated by the kernel agent during attach using ddi_umem_alloc with
118 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application
119 * due to unloading of the original mappings.
120 *
121 * Additionally every segment has a mapping generation number associated
122 * with it. This is an entry in the barrier generation page, created
123 * during attach time. This mapping generation number for the import
124 * segments is incremented on a force disconnect to notify the application
125 * of the force disconnect. On this notification, the application needs
126 * to reconnect the segment to establish a new legitimate mapping.
127 *
128 *
129 * Locks used in the kernel agent:
130 * -------------------------------
131 *
132 * The kernel agent uses a variety of mutexes and condition variables for
133 * mutual exclusion of the shared data structures and for synchronization
134 * between the various threads. Some of the locks are described as follows.
135 *
136 * Each resource structure, which represents either an export/import segment
137 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock.
138 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the
139 * rsmseglock_acquire and rsmseglock_release macros. An additional
140 * lock called the rsmsi_lock is used for the shared import data structure
141 * that is relevant for resources representing import segments. There is
142 * also a condition variable associated with the resource called s_cv. This
143 * is used to wait for events like the segment state change etc.
144 *
145 * The resource structures are allocated from a pool of resource structures,
146 * called rsm_resource. This pool is protected via a reader-writer lock,
147 * called rsmrc_lock.
148 *
149 * There are two separate hash tables, one for the export segments and
150 * one for the import segments. The export segments are inserted into the
151 * export segment hash table only after they have been published and the
152 * import segments are inserted in the import segments list only after they
153 * have successfully connected to an exported segment. These tables are
154 * protected via reader-writer locks.
155 *
156 * Debug Support in the kernel agent:
157 * ----------------------------------
158 *
159 * Debugging support in the kernel agent is provided by the following
160 * macros.
161 *
162 * DBG_PRINTF((category, level, message)) is a macro which logs a debug
163 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer
164 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based
165 * on the definition of the category and level. All messages that belong to
166 * the specified category(rsmdbg_category) and are of an equal or greater
167 * severity than the specified level(rsmdbg_level) are logged. The message
168 * is a string which uses the same formatting rules as the strings used in
169 * printf.
170 *
171 * The category defines which component of the kernel agent has logged this
172 * message. There are a number of categories that have been defined such as
173 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro,
174 * DBG_ADDCATEGORY is used to add in another category to the currently
175 * specified category value so that the component using this new category
176 * can also effectively log debug messages. Thus, the category of a specific
177 * message is some combination of the available categories and we can define
178 * sub-categories if we want a finer level of granularity.
179 *
180 * The level defines the severity of the message. Different level values are
181 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being
182 * the least severe(debug level is 0).
183 *
184 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug
185 * variable or a string respectively.
186 *
187 *
188 * NOTES:
189 *
190 * Special Fork and Exec Handling:
191 * -------------------------------
192 *
193 * The backing physical pages of an exported segment are always locked down.
194 * Thus, there are two cases in which a process having exported segments
195 * will cause a cpu to hang: (1) the process invokes exec; (2) a process
196 * forks and invokes exit before the duped file descriptors for the export
197 * segments are closed in the child process. The hang is caused because the
198 * address space release algorithm in Solaris VM subsystem is based on a
199 * non-blocking loop which does not terminate while segments are locked
200 * down. In addition to this, Solaris VM subsystem lacks a callback
201 * mechanism to the rsm kernel agent to allow unlocking these export
202 * segment pages.
203 *
204 * In order to circumvent this problem, the kernel agent does the following.
205 * The Solaris VM subsystem keeps memory segments in increasing order of
206 * virtual addressses. Thus a special page(special_exit_offset) is allocated
207 * by the kernel agent and is mmapped into the heap area of the process address
208 * space(the mmap is done by the RSMAPI library). During the mmap processing
209 * of this special page by the devmap infrastructure, a callback(the same
210 * devmap context management callbacks discussed above) is registered for an
211 * unmap.
212 *
213 * As discussed above, this page is processed by the Solaris address space
214 * release code before any of the exported segments pages(which are allocated
215 * from high memory). It is during this processing that the unmap callback gets
216 * called and this callback is responsible for force destroying the exported
217 * segments and thus eliminating the problem of locked pages.
218 *
219 * Flow-control:
220 * ------------
221 *
222 * A credit based flow control algorithm is used for messages whose
223 * processing cannot be done in the interrupt context because it might
224 * involve invoking rsmpi calls, or might take a long time to complete
225 * or might need to allocate resources. The algorithm operates on a per
226 * path basis. To send a message the pathend needs to have a credit and
227 * it consumes one for every message that is flow controlled. On the
228 * receiving pathend the message is put on a msgbuf_queue and a task is
229 * dispatched on the worker thread - recv_taskq where it is processed.
230 * After processing the message, the receiving pathend dequeues the message,
231 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends
232 * credits to the sender pathend.
233 *
234 * RSM_DRTEST:
235 * -----------
236 *
237 * This is used to enable the DR testing using a test driver on test
238 * platforms which do not supported DR.
239 *
240 */
241
242 #include <sys/types.h>
243 #include <sys/param.h>
244 #include <sys/user.h>
245 #include <sys/buf.h>
246 #include <sys/systm.h>
247 #include <sys/cred.h>
248 #include <sys/vm.h>
249 #include <sys/uio.h>
250 #include <vm/seg.h>
251 #include <vm/page.h>
252 #include <sys/stat.h>
253
254 #include <sys/time.h>
255 #include <sys/errno.h>
256
257 #include <sys/file.h>
258 #include <sys/uio.h>
259 #include <sys/proc.h>
260 #include <sys/mman.h>
261 #include <sys/open.h>
262 #include <sys/atomic.h>
263 #include <sys/mem_config.h>
264
265
266 #include <sys/ddi.h>
267 #include <sys/devops.h>
268 #include <sys/ddidevmap.h>
269 #include <sys/sunddi.h>
270 #include <sys/esunddi.h>
271 #include <sys/ddi_impldefs.h>
272
273 #include <sys/kmem.h>
274 #include <sys/conf.h>
275 #include <sys/devops.h>
276 #include <sys/ddi_impldefs.h>
277
278 #include <sys/modctl.h>
279
280 #include <sys/policy.h>
281 #include <sys/types.h>
282 #include <sys/conf.h>
283 #include <sys/param.h>
284
285 #include <sys/taskq.h>
286
287 #include <sys/rsm/rsm_common.h>
288 #include <sys/rsm/rsmapi_common.h>
289 #include <sys/rsm/rsm.h>
290 #include <rsm_in.h>
291 #include <sys/rsm/rsmka_path_int.h>
292 #include <sys/rsm/rsmpi.h>
293
294 #include <sys/modctl.h>
295 #include <sys/debug.h>
296
297 #include <sys/tuneable.h>
298
299 #ifdef RSM_DRTEST
300 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec,
301 void *arg);
302 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec,
303 void *arg);
304 #endif
305
306 extern void dbg_printf(int category, int level, char *fmt, ...);
307 extern void rsmka_pathmanager_init();
308 extern void rsmka_pathmanager_cleanup();
309 extern void rele_sendq_token();
310 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t);
311 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t);
312 extern int rsmka_topology_ioctl(caddr_t, int, int);
313
314 extern pri_t maxclsyspri;
315 extern work_queue_t work_queue;
316 extern kmutex_t ipc_info_lock;
317 extern kmutex_t ipc_info_cvlock;
318 extern kcondvar_t ipc_info_cv;
319 extern kmutex_t path_hold_cvlock;
320 extern kcondvar_t path_hold_cv;
321
322 extern kmutex_t rsmka_buf_lock;
323
324 extern path_t *rsm_find_path(char *, int, rsm_addr_t);
325 extern adapter_t *rsmka_lookup_adapter(char *, int);
326 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *);
327 extern boolean_t rsmka_do_path_active(path_t *, int);
328 extern boolean_t rsmka_check_node_alive(rsm_node_id_t);
329 extern void rsmka_release_adapter(adapter_t *);
330 extern void rsmka_enqueue_msgbuf(path_t *path, void *data);
331 extern void rsmka_dequeue_msgbuf(path_t *path);
332 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path);
333 /* lint -w2 */
334
335 static int rsm_open(dev_t *, int, int, cred_t *);
336 static int rsm_close(dev_t, int, int, cred_t *);
337 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
338 cred_t *credp, int *rvalp);
339 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *,
340 uint_t);
341 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t,
342 uint_t, uint_t, cred_t *);
343 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
344 struct pollhead **phpp);
345
346 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
347 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t);
348 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t);
349
350 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *);
351 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t);
352 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t);
353 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int,
354 rsm_permission_t);
355 static void rsm_export_force_destroy(ddi_umem_cookie_t *);
356 static void rsmacl_free(rsmapi_access_entry_t *, int);
357 static void rsmpiacl_free(rsm_access_entry_t *, int);
358
359 static int rsm_inc_pgcnt(pgcnt_t);
360 static void rsm_dec_pgcnt(pgcnt_t);
361 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop);
362 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *,
363 size_t *);
364 static void exporter_quiesce();
365 static void rsmseg_suspend(rsmseg_t *, int *);
366 static void rsmsegshare_suspend(rsmseg_t *);
367 static int rsmseg_resume(rsmseg_t *, void **);
368 static int rsmsegshare_resume(rsmseg_t *);
369
370 static struct cb_ops rsm_cb_ops = {
371 rsm_open, /* open */
372 rsm_close, /* close */
373 nodev, /* strategy */
374 nodev, /* print */
375 nodev, /* dump */
376 nodev, /* read */
377 nodev, /* write */
378 rsm_ioctl, /* ioctl */
379 rsm_devmap, /* devmap */
380 NULL, /* mmap */
381 rsm_segmap, /* segmap */
382 rsm_chpoll, /* poll */
383 ddi_prop_op, /* cb_prop_op */
384 0, /* streamtab */
385 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
386 0,
387 0,
388 0
389 };
390
391 static struct dev_ops rsm_ops = {
392 DEVO_REV, /* devo_rev, */
393 0, /* refcnt */
394 rsm_info, /* get_dev_info */
395 nulldev, /* identify */
396 nulldev, /* probe */
397 rsm_attach, /* attach */
398 rsm_detach, /* detach */
399 nodev, /* reset */
400 &rsm_cb_ops, /* driver operations */
401 (struct bus_ops *)0, /* bus operations */
402 0,
403 ddi_quiesce_not_needed, /* quiesce */
404 };
405
406 /*
407 * Module linkage information for the kernel.
408 */
409
410 static struct modldrv modldrv = {
411 &mod_driverops, /* Type of module. This one is a pseudo driver */
412 "Remote Shared Memory Driver",
413 &rsm_ops, /* driver ops */
414 };
415
416 static struct modlinkage modlinkage = {
417 MODREV_1,
418 (void *)&modldrv,
419 0,
420 0,
421 0
422 };
423
424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta);
425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta);
426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled);
427
428 static kphysm_setup_vector_t rsm_dr_callback_vec = {
429 KPHYSM_SETUP_VECTOR_VERSION,
430 rsm_dr_callback_post_add,
431 rsm_dr_callback_pre_del,
432 rsm_dr_callback_post_del
433 };
434
435 /* This flag can be changed to 0 to help with PIT testing */
436 int rsmka_modunloadok = 1;
437 int no_reply_cnt = 0;
438
439 uint64_t rsm_ctrlmsg_errcnt = 0;
440 uint64_t rsm_ipcsend_errcnt = 0;
441
442 #define MAX_NODES 64
443
444 static struct rsm_driver_data rsm_drv_data;
445 static struct rsmresource_table rsm_resource;
446
447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t);
448 static void rsmresource_destroy(void);
449 static int rsmresource_alloc(minor_t *);
450 static rsmresource_t *rsmresource_free(minor_t rnum);
451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie);
452 static int rsm_unpublish(rsmseg_t *seg, int mode);
453 static int rsm_unbind(rsmseg_t *seg);
454 static uint_t rsmhash(rsm_memseg_id_t key);
455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size);
456 static void rsmhash_free(rsmhash_table_t *rhash, int size);
457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval);
458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval);
459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid,
460 void *cookie);
461 int rsm_disconnect(rsmseg_t *seg);
462 void rsmseg_unload(rsmseg_t *);
463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag);
464
465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
466 rsm_intr_q_op_t opcode, rsm_addr_t src,
467 void *data, size_t size, rsm_intr_hand_arg_t arg);
468
469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t);
470
471 rsm_node_id_t my_nodeid;
472
473 /* cookie, va, offsets and length for the barrier */
474 static rsm_gnum_t *bar_va;
475 static ddi_umem_cookie_t bar_cookie;
476 static off_t barrier_offset;
477 static size_t barrier_size;
478 static int max_segs;
479
480 /* cookie for the trash memory */
481 static ddi_umem_cookie_t remap_cookie;
482
483 static rsm_memseg_id_t rsm_nextavail_segmentid;
484
485 extern taskq_t *work_taskq;
486 extern char *taskq_name;
487
488 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */
489
490 static rsmhash_table_t rsm_export_segs; /* list of exported segs */
491 rsmhash_table_t rsm_import_segs; /* list of imported segs */
492 static rsmhash_table_t rsm_event_queues; /* list of event queues */
493
494 static rsm_ipc_t rsm_ipc; /* ipc info */
495
496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */
497 static list_head_t rsm_suspend_list;
498
499 /* list of descriptors for remote importers */
500 static importers_table_t importer_list;
501
502 kmutex_t rsm_suspend_cvlock;
503 kcondvar_t rsm_suspend_cv;
504
505 static kmutex_t rsm_lock;
506
507 adapter_t loopback_adapter;
508 rsm_controller_attr_t loopback_attr;
509
510 int rsmipc_send_controlmsg(path_t *path, int msgtype);
511
512 void rsmka_init_loopback();
513
514 int rsmka_null_seg_create(
515 rsm_controller_handle_t,
516 rsm_memseg_export_handle_t *,
517 size_t,
518 uint_t,
519 rsm_memory_local_t *,
520 rsm_resource_callback_t,
521 rsm_resource_callback_arg_t);
522
523 int rsmka_null_seg_destroy(
524 rsm_memseg_export_handle_t);
525
526 int rsmka_null_bind(
527 rsm_memseg_export_handle_t,
528 off_t,
529 rsm_memory_local_t *,
530 rsm_resource_callback_t,
531 rsm_resource_callback_arg_t);
532
533 int rsmka_null_unbind(
534 rsm_memseg_export_handle_t,
535 off_t,
536 size_t);
537
538 int rsmka_null_rebind(
539 rsm_memseg_export_handle_t,
540 off_t,
541 rsm_memory_local_t *,
542 rsm_resource_callback_t,
543 rsm_resource_callback_arg_t);
544
545 int rsmka_null_publish(
546 rsm_memseg_export_handle_t,
547 rsm_access_entry_t [],
548 uint_t,
549 rsm_memseg_id_t,
550 rsm_resource_callback_t,
551 rsm_resource_callback_arg_t);
552
553
554 int rsmka_null_republish(
555 rsm_memseg_export_handle_t,
556 rsm_access_entry_t [],
557 uint_t,
558 rsm_resource_callback_t,
559 rsm_resource_callback_arg_t);
560
561 int rsmka_null_unpublish(
562 rsm_memseg_export_handle_t);
563
564 rsm_ops_t null_rsmpi_ops;
565
566 /*
567 * data and locks to keep track of total amount of exported memory
568 */
569 static pgcnt_t rsm_pgcnt;
570 static pgcnt_t rsm_pgcnt_max; /* max allowed */
571 static kmutex_t rsm_pgcnt_lock;
572
573 static int rsm_enable_dr;
574
575 static char loopback_str[] = "loopback";
576
577 int rsm_hash_size;
578
579 /*
580 * The locking model is as follows:
581 *
582 * Local operations:
583 * find resource - grab reader lock on resouce list
584 * insert rc - grab writer lock
585 * delete rc - grab writer lock and resource mutex
586 * read/write - no lock
587 *
588 * Remote invocations:
589 * find resource - grab read lock and resource mutex
590 *
591 * State:
592 * resource state - grab resource mutex
593 */
594
595 int
_init(void)596 _init(void)
597 {
598 int e;
599
600 e = mod_install(&modlinkage);
601 if (e != 0) {
602 return (e);
603 }
604
605 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL);
606
607 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL);
608
609
610 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL);
611
612 rsm_hash_size = RSM_HASHSZ;
613
614 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
615
616 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL);
617
618 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL);
619
620 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL);
621 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0);
622
623 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL);
624 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0);
625
626 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL);
627 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0);
628
629 rsm_ipc.count = RSMIPC_SZ;
630 rsm_ipc.wanted = 0;
631 rsm_ipc.sequence = 0;
632
633 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL);
634
635 for (e = 0; e < RSMIPC_SZ; e++) {
636 rsmipc_slot_t *slot = &rsm_ipc.slots[e];
637
638 RSMIPC_SET(slot, RSMIPC_FREE);
639 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL);
640 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0);
641 }
642
643 /*
644 * Initialize the suspend message list
645 */
646 rsm_suspend_list.list_head = NULL;
647 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL);
648
649 /*
650 * It is assumed here that configuration data is available
651 * during system boot since _init may be called at that time.
652 */
653
654 rsmka_pathmanager_init();
655
656 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
657 "rsm: _init done\n"));
658
659 return (DDI_SUCCESS);
660
661 }
662
663 int
_info(struct modinfo * modinfop)664 _info(struct modinfo *modinfop)
665 {
666
667 return (mod_info(&modlinkage, modinfop));
668 }
669
670 int
_fini(void)671 _fini(void)
672 {
673 int e;
674
675 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE,
676 "rsm: _fini enter\n"));
677
678 /*
679 * The rsmka_modunloadok flag is simply used to help with
680 * the PIT testing. Make this flag 0 to disallow modunload.
681 */
682 if (rsmka_modunloadok == 0)
683 return (EBUSY);
684
685 /* rsm_detach will be called as a result of mod_remove */
686 e = mod_remove(&modlinkage);
687 if (e) {
688 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR,
689 "Unable to fini RSM %x\n", e));
690 return (e);
691 }
692
693 rsmka_pathmanager_cleanup();
694
695 rw_destroy(&rsm_resource.rsmrc_lock);
696
697 rw_destroy(&rsm_export_segs.rsmhash_rw);
698 rw_destroy(&rsm_import_segs.rsmhash_rw);
699 rw_destroy(&rsm_event_queues.rsmhash_rw);
700
701 mutex_destroy(&importer_list.lock);
702
703 mutex_destroy(&rsm_ipc.lock);
704 cv_destroy(&rsm_ipc.cv);
705
706 (void) mutex_destroy(&rsm_suspend_list.list_lock);
707
708 (void) mutex_destroy(&rsm_pgcnt_lock);
709
710 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n"));
711
712 return (DDI_SUCCESS);
713
714 }
715
716 /*ARGSUSED1*/
717 static int
rsm_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
719 {
720 minor_t rnum;
721 int percent;
722 int ret;
723 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
724
725 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n"));
726
727 switch (cmd) {
728 case DDI_ATTACH:
729 break;
730 case DDI_RESUME:
731 default:
732 DBG_PRINTF((category, RSM_ERR,
733 "rsm:rsm_attach - cmd not supported\n"));
734 return (DDI_FAILURE);
735 }
736
737 if (rsm_dip != NULL) {
738 DBG_PRINTF((category, RSM_ERR,
739 "rsm:rsm_attach - supports only "
740 "one instance\n"));
741 return (DDI_FAILURE);
742 }
743
744 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
745 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
746 "enable-dynamic-reconfiguration", 1);
747
748 mutex_enter(&rsm_drv_data.drv_lock);
749 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING;
750 mutex_exit(&rsm_drv_data.drv_lock);
751
752 if (rsm_enable_dr) {
753 #ifdef RSM_DRTEST
754 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec,
755 (void *)NULL);
756 #else
757 ret = kphysm_setup_func_register(&rsm_dr_callback_vec,
758 (void *)NULL);
759 #endif
760 if (ret != 0) {
761 mutex_exit(&rsm_drv_data.drv_lock);
762 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic "
763 "reconfiguration setup failed\n");
764 return (DDI_FAILURE);
765 }
766 }
767
768 mutex_enter(&rsm_drv_data.drv_lock);
769 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING);
770 rsm_drv_data.drv_state = RSM_DRV_OK;
771 cv_broadcast(&rsm_drv_data.drv_cv);
772 mutex_exit(&rsm_drv_data.drv_lock);
773
774 /*
775 * page_list_read_lock();
776 * xx_setup();
777 * page_list_read_unlock();
778 */
779
780 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
781 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
782 "segment-hashtable-size", RSM_HASHSZ);
783 if (rsm_hash_size == 0) {
784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
785 "rsm: segment-hashtable-size in rsm.conf "
786 "must be greater than 0, defaulting to 128\n"));
787 rsm_hash_size = RSM_HASHSZ;
788 }
789
790 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n",
791 rsm_hash_size));
792
793 rsm_pgcnt = 0;
794
795 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
796 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
797 "max-exported-memory", 0);
798 if (percent < 0) {
799 DBG_PRINTF((category, RSM_ERR,
800 "rsm:rsm_attach not enough memory available to "
801 "export, or max-exported-memory set incorrectly.\n"));
802 return (DDI_FAILURE);
803 }
804 /* 0 indicates no fixed upper limit. maxmem is the max */
805 /* available pageable physical mem */
806 rsm_pgcnt_max = (percent*maxmem)/100;
807
808 if (rsm_pgcnt_max > 0) {
809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
810 "rsm: Available physical memory = %lu pages, "
811 "Max exportable memory = %lu pages",
812 maxmem, rsm_pgcnt_max));
813 }
814
815 /*
816 * Create minor number
817 */
818 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) {
819 DBG_PRINTF((category, RSM_ERR,
820 "rsm: rsm_attach - Unable to get "
821 "minor number\n"));
822 return (DDI_FAILURE);
823 }
824
825 ASSERT(rnum == RSM_DRIVER_MINOR);
826
827 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR,
828 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) {
829 DBG_PRINTF((category, RSM_ERR,
830 "rsm: rsm_attach - unable to allocate "
831 "minor #\n"));
832 return (DDI_FAILURE);
833 }
834
835 rsm_dip = devi;
836 /*
837 * Allocate the hashtables
838 */
839 rsmhash_alloc(&rsm_export_segs, rsm_hash_size);
840 rsmhash_alloc(&rsm_import_segs, rsm_hash_size);
841
842 importer_list.bucket = (importing_token_t **)
843 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP);
844
845 /*
846 * Allocate a resource struct
847 */
848 {
849 rsmresource_t *p;
850
851 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP);
852
853 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL);
854
855 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR);
856 }
857
858 /*
859 * Based on the rsm.conf property max-segments, determine the maximum
860 * number of segments that can be exported/imported. This is then used
861 * to determine the size for barrier failure pages.
862 */
863
864 /* First get the max number of segments from the rsm.conf file */
865 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
866 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
867 "max-segments", 0);
868 if (max_segs == 0) {
869 /* Use default number of segments */
870 max_segs = RSM_MAX_NUM_SEG;
871 }
872
873 /*
874 * Based on the max number of segments allowed, determine the barrier
875 * page size. add 1 to max_segs since the barrier page itself uses
876 * a slot
877 */
878 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t),
879 PAGESIZE);
880
881 /*
882 * allocation of the barrier failure page
883 */
884 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size,
885 DDI_UMEM_SLEEP, &bar_cookie);
886
887 /*
888 * Set the barrier_offset
889 */
890 barrier_offset = 0;
891
892 /*
893 * Allocate a trash memory and get a cookie for it. This will be used
894 * when remapping segments during force disconnects. Allocate the
895 * trash memory with a large size which is page aligned.
896 */
897 (void) ddi_umem_alloc((size_t)TRASHSIZE,
898 DDI_UMEM_TRASH, &remap_cookie);
899
900 /* initialize user segment id allocation variable */
901 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE;
902
903 /*
904 * initialize the null_rsmpi_ops vector and the loopback adapter
905 */
906 rsmka_init_loopback();
907
908
909 ddi_report_dev(devi);
910
911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n"));
912
913 return (DDI_SUCCESS);
914 }
915
916 /*
917 * The call to mod_remove in the _fine routine will cause the system
918 * to call rsm_detach
919 */
920 /*ARGSUSED*/
921 static int
rsm_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)922 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
923 {
924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
925
926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n"));
927
928 switch (cmd) {
929 case DDI_DETACH:
930 break;
931 default:
932 DBG_PRINTF((category, RSM_ERR,
933 "rsm:rsm_detach - cmd %x not supported\n",
934 cmd));
935 return (DDI_FAILURE);
936 }
937
938 mutex_enter(&rsm_drv_data.drv_lock);
939 while (rsm_drv_data.drv_state != RSM_DRV_OK)
940 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
941 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING;
942 mutex_exit(&rsm_drv_data.drv_lock);
943
944 /*
945 * Unregister the DR callback functions
946 */
947 if (rsm_enable_dr) {
948 #ifdef RSM_DRTEST
949 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec,
950 (void *)NULL);
951 #else
952 kphysm_setup_func_unregister(&rsm_dr_callback_vec,
953 (void *)NULL);
954 #endif
955 }
956
957 mutex_enter(&rsm_drv_data.drv_lock);
958 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING);
959 rsm_drv_data.drv_state = RSM_DRV_NEW;
960 mutex_exit(&rsm_drv_data.drv_lock);
961
962 ASSERT(rsm_suspend_list.list_head == NULL);
963
964 /*
965 * Release all resources, seglist, controller, ...
966 */
967
968 /* remove intersend queues */
969 /* remove registered services */
970
971
972 ddi_remove_minor_node(dip, DRIVER_NAME);
973 rsm_dip = NULL;
974
975 /*
976 * Free minor zero resource
977 */
978 {
979 rsmresource_t *p;
980
981 p = rsmresource_free(RSM_DRIVER_MINOR);
982 if (p) {
983 mutex_destroy(&p->rsmrc_lock);
984 kmem_free((void *)p, sizeof (*p));
985 }
986 }
987
988 /*
989 * Free resource table
990 */
991
992 rsmresource_destroy();
993
994 /*
995 * Free the hash tables
996 */
997 rsmhash_free(&rsm_export_segs, rsm_hash_size);
998 rsmhash_free(&rsm_import_segs, rsm_hash_size);
999
1000 kmem_free((void *)importer_list.bucket,
1001 rsm_hash_size * sizeof (importing_token_t *));
1002 importer_list.bucket = NULL;
1003
1004
1005 /* free barrier page */
1006 if (bar_cookie != NULL) {
1007 ddi_umem_free(bar_cookie);
1008 }
1009 bar_va = NULL;
1010 bar_cookie = NULL;
1011
1012 /*
1013 * Free the memory allocated for the trash
1014 */
1015 if (remap_cookie != NULL) {
1016 ddi_umem_free(remap_cookie);
1017 }
1018 remap_cookie = NULL;
1019
1020 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n"));
1021
1022 return (DDI_SUCCESS);
1023 }
1024
1025 /*ARGSUSED*/
1026 static int
rsm_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1027 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1028 {
1029 register int error;
1030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI);
1031
1032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n"));
1033
1034 switch (infocmd) {
1035 case DDI_INFO_DEVT2DEVINFO:
1036 if (rsm_dip == NULL)
1037 error = DDI_FAILURE;
1038 else {
1039 *result = (void *)rsm_dip;
1040 error = DDI_SUCCESS;
1041 }
1042 break;
1043 case DDI_INFO_DEVT2INSTANCE:
1044 *result = (void *)0;
1045 error = DDI_SUCCESS;
1046 break;
1047 default:
1048 error = DDI_FAILURE;
1049 }
1050
1051 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n"));
1052 return (error);
1053 }
1054
1055 adapter_t *
rsm_getadapter(rsm_ioctlmsg_t * msg,int mode)1056 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode)
1057 {
1058 adapter_t *adapter;
1059 char adapter_devname[MAXNAMELEN];
1060 int instance;
1061 DBG_DEFINE(category,
1062 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL);
1063
1064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n"));
1065
1066 instance = msg->cnum;
1067
1068 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) {
1069 return (NULL);
1070 }
1071
1072 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode))
1073 return (NULL);
1074
1075 if (strcmp(adapter_devname, "loopback") == 0)
1076 return (&loopback_adapter);
1077
1078 adapter = rsmka_lookup_adapter(adapter_devname, instance);
1079
1080 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n"));
1081
1082 return (adapter);
1083 }
1084
1085
1086 /*
1087 * *********************** Resource Number Management ********************
1088 * All resources are stored in a simple hash table. The table is an array
1089 * of pointers to resource blks. Each blk contains:
1090 * base - base number of this blk
1091 * used - number of used slots in this blk.
1092 * blks - array of pointers to resource items.
1093 * An entry in a resource blk is empty if it's NULL.
1094 *
1095 * We start with no resource array. Each time we run out of slots, we
1096 * reallocate a new larger array and copy the pointer to the new array and
1097 * a new resource blk is allocated and added to the hash table.
1098 *
1099 * The resource control block contains:
1100 * root - array of pointer of resource blks
1101 * sz - current size of array.
1102 * len - last valid entry in array.
1103 *
1104 * A search operation based on a resource number is as follows:
1105 * index = rnum / RESOURCE_BLKSZ;
1106 * ASSERT(index < resource_block.len);
1107 * ASSERT(index < resource_block.sz);
1108 * offset = rnum % RESOURCE_BLKSZ;
1109 * ASSERT(offset >= resource_block.root[index]->base);
1110 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
1111 * return resource_block.root[index]->blks[offset];
1112 *
1113 * A resource blk is freed with its used count reachs zero.
1114 */
1115 static int
rsmresource_alloc(minor_t * rnum)1116 rsmresource_alloc(minor_t *rnum)
1117 {
1118
1119 /* search for available resource slot */
1120 int i, j, empty = -1;
1121 rsmresource_blk_t *blk;
1122
1123 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1124 "rsmresource_alloc enter\n"));
1125
1126 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1127
1128 /* Try to find an empty slot */
1129 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1130 blk = rsm_resource.rsmrc_root[i];
1131 if (blk != NULL && blk->rsmrcblk_avail > 0) {
1132 /* found an empty slot in this blk */
1133 for (j = 0; j < RSMRC_BLKSZ; j++) {
1134 if (blk->rsmrcblk_blks[j] == NULL) {
1135 *rnum = (minor_t)
1136 (j + (i * RSMRC_BLKSZ));
1137 /*
1138 * obey gen page limits
1139 */
1140 if (*rnum >= max_segs + 1) {
1141 if (empty < 0) {
1142 rw_exit(&rsm_resource.
1143 rsmrc_lock);
1144 DBG_PRINTF((
1145 RSM_KERNEL_ALL,
1146 RSM_ERR,
1147 "rsmresource"
1148 "_alloc failed:"
1149 "not enough res"
1150 "%d\n", *rnum));
1151 return (RSMERR_INSUFFICIENT_RESOURCES);
1152 } else {
1153 /* use empty slot */
1154 break;
1155 }
1156
1157 }
1158
1159 blk->rsmrcblk_blks[j] = RSMRC_RESERVED;
1160 blk->rsmrcblk_avail--;
1161 rw_exit(&rsm_resource.rsmrc_lock);
1162 DBG_PRINTF((RSM_KERNEL_ALL,
1163 RSM_DEBUG_VERBOSE,
1164 "rsmresource_alloc done\n"));
1165 return (RSM_SUCCESS);
1166 }
1167 }
1168 } else if (blk == NULL && empty < 0) {
1169 /* remember first empty slot */
1170 empty = i;
1171 }
1172 }
1173
1174 /* Couldn't find anything, allocate a new blk */
1175 /*
1176 * Do we need to reallocate the root array
1177 */
1178 if (empty < 0) {
1179 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) {
1180 /*
1181 * Allocate new array and copy current stuff into it
1182 */
1183 rsmresource_blk_t **p;
1184 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz +
1185 RSMRC_BLKSZ;
1186 /*
1187 * Don't allocate more that max valid rnum
1188 */
1189 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >=
1190 max_segs + 1) {
1191 rw_exit(&rsm_resource.rsmrc_lock);
1192 return (RSMERR_INSUFFICIENT_RESOURCES);
1193 }
1194
1195 p = (rsmresource_blk_t **)kmem_zalloc(
1196 newsz * sizeof (*p),
1197 KM_SLEEP);
1198
1199 if (rsm_resource.rsmrc_root) {
1200 uint_t oldsz;
1201
1202 oldsz = (uint_t)(rsm_resource.rsmrc_sz *
1203 (int)sizeof (*p));
1204
1205 /*
1206 * Copy old data into new space and
1207 * free old stuff
1208 */
1209 bcopy(rsm_resource.rsmrc_root, p, oldsz);
1210 kmem_free(rsm_resource.rsmrc_root, oldsz);
1211 }
1212
1213 rsm_resource.rsmrc_root = p;
1214 rsm_resource.rsmrc_sz = (int)newsz;
1215 }
1216
1217 empty = rsm_resource.rsmrc_len;
1218 rsm_resource.rsmrc_len++;
1219 }
1220
1221 /*
1222 * Allocate a new blk
1223 */
1224 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP);
1225 ASSERT(rsm_resource.rsmrc_root[empty] == NULL);
1226 rsm_resource.rsmrc_root[empty] = blk;
1227 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1;
1228
1229 /*
1230 * Allocate slot
1231 */
1232
1233 *rnum = (minor_t)(empty * RSMRC_BLKSZ);
1234
1235 /*
1236 * watch out not to exceed bounds of barrier page
1237 */
1238 if (*rnum >= max_segs + 1) {
1239 rw_exit(&rsm_resource.rsmrc_lock);
1240 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR,
1241 "rsmresource_alloc failed %d\n", *rnum));
1242
1243 return (RSMERR_INSUFFICIENT_RESOURCES);
1244 }
1245 blk->rsmrcblk_blks[0] = RSMRC_RESERVED;
1246
1247
1248 rw_exit(&rsm_resource.rsmrc_lock);
1249
1250 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1251 "rsmresource_alloc done\n"));
1252
1253 return (RSM_SUCCESS);
1254 }
1255
1256 static rsmresource_t *
rsmresource_free(minor_t rnum)1257 rsmresource_free(minor_t rnum)
1258 {
1259
1260 /* search for available resource slot */
1261 int i, j;
1262 rsmresource_blk_t *blk;
1263 rsmresource_t *p;
1264
1265 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1266 "rsmresource_free enter\n"));
1267
1268 i = (int)(rnum / RSMRC_BLKSZ);
1269 j = (int)(rnum % RSMRC_BLKSZ);
1270
1271 if (i >= rsm_resource.rsmrc_len) {
1272 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1273 "rsmresource_free done\n"));
1274 return (NULL);
1275 }
1276
1277 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1278
1279 ASSERT(rsm_resource.rsmrc_root);
1280 ASSERT(i < rsm_resource.rsmrc_len);
1281 ASSERT(i < rsm_resource.rsmrc_sz);
1282 blk = rsm_resource.rsmrc_root[i];
1283 if (blk == NULL) {
1284 rw_exit(&rsm_resource.rsmrc_lock);
1285 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1286 "rsmresource_free done\n"));
1287 return (NULL);
1288 }
1289
1290 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */
1291
1292 p = blk->rsmrcblk_blks[j];
1293 if (p == RSMRC_RESERVED) {
1294 p = NULL;
1295 }
1296
1297 blk->rsmrcblk_blks[j] = NULL;
1298 blk->rsmrcblk_avail++;
1299 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) {
1300 /* free this blk */
1301 kmem_free(blk, sizeof (*blk));
1302 rsm_resource.rsmrc_root[i] = NULL;
1303 }
1304
1305 rw_exit(&rsm_resource.rsmrc_lock);
1306
1307 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1308 "rsmresource_free done\n"));
1309
1310 return (p);
1311 }
1312
1313 static rsmresource_t *
rsmresource_lookup(minor_t rnum,int lock)1314 rsmresource_lookup(minor_t rnum, int lock)
1315 {
1316 int i, j;
1317 rsmresource_blk_t *blk;
1318 rsmresource_t *p;
1319
1320 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1321 "rsmresource_lookup enter\n"));
1322
1323 /* Find resource and lock it in READER mode */
1324 /* search for available resource slot */
1325
1326 i = (int)(rnum / RSMRC_BLKSZ);
1327 j = (int)(rnum % RSMRC_BLKSZ);
1328
1329 if (i >= rsm_resource.rsmrc_len) {
1330 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1331 "rsmresource_lookup done\n"));
1332 return (NULL);
1333 }
1334
1335 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1336
1337 blk = rsm_resource.rsmrc_root[i];
1338 if (blk != NULL) {
1339 ASSERT(i < rsm_resource.rsmrc_len);
1340 ASSERT(i < rsm_resource.rsmrc_sz);
1341
1342 p = blk->rsmrcblk_blks[j];
1343 if (lock == RSM_LOCK) {
1344 if (p != RSMRC_RESERVED) {
1345 mutex_enter(&p->rsmrc_lock);
1346 } else {
1347 p = NULL;
1348 }
1349 }
1350 } else {
1351 p = NULL;
1352 }
1353 rw_exit(&rsm_resource.rsmrc_lock);
1354
1355 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1356 "rsmresource_lookup done\n"));
1357
1358 return (p);
1359 }
1360
1361 static void
rsmresource_insert(minor_t rnum,rsmresource_t * p,rsm_resource_type_t type)1362 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type)
1363 {
1364 /* Find resource and lock it in READER mode */
1365 /* Caller can upgrade if need be */
1366 /* search for available resource slot */
1367 int i, j;
1368 rsmresource_blk_t *blk;
1369
1370 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1371 "rsmresource_insert enter\n"));
1372
1373 i = (int)(rnum / RSMRC_BLKSZ);
1374 j = (int)(rnum % RSMRC_BLKSZ);
1375
1376 p->rsmrc_type = type;
1377 p->rsmrc_num = rnum;
1378
1379 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
1380
1381 ASSERT(rsm_resource.rsmrc_root);
1382 ASSERT(i < rsm_resource.rsmrc_len);
1383 ASSERT(i < rsm_resource.rsmrc_sz);
1384
1385 blk = rsm_resource.rsmrc_root[i];
1386 ASSERT(blk);
1387
1388 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED);
1389
1390 blk->rsmrcblk_blks[j] = p;
1391
1392 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1393 "rsmresource_insert done\n"));
1394
1395 rw_exit(&rsm_resource.rsmrc_lock);
1396 }
1397
1398 static void
rsmresource_destroy()1399 rsmresource_destroy()
1400 {
1401 int i, j;
1402
1403 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1404 "rsmresource_destroy enter\n"));
1405
1406 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER);
1407
1408 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
1409 rsmresource_blk_t *blk;
1410
1411 blk = rsm_resource.rsmrc_root[i];
1412 if (blk == NULL) {
1413 continue;
1414 }
1415 for (j = 0; j < RSMRC_BLKSZ; j++) {
1416 if (blk->rsmrcblk_blks[j] != NULL) {
1417 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1418 "Not null slot %d, %lx\n", j,
1419 (size_t)blk->rsmrcblk_blks[j]));
1420 }
1421 }
1422 kmem_free(blk, sizeof (*blk));
1423 rsm_resource.rsmrc_root[i] = NULL;
1424 }
1425 if (rsm_resource.rsmrc_root) {
1426 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *);
1427 kmem_free(rsm_resource.rsmrc_root, (uint_t)i);
1428 rsm_resource.rsmrc_root = NULL;
1429 rsm_resource.rsmrc_len = 0;
1430 rsm_resource.rsmrc_sz = 0;
1431 }
1432
1433 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE,
1434 "rsmresource_destroy done\n"));
1435
1436 rw_exit(&rsm_resource.rsmrc_lock);
1437 }
1438
1439
1440 /* ******************** Generic Key Hash Table Management ********* */
1441 static rsmresource_t *
rsmhash_lookup(rsmhash_table_t * rhash,rsm_memseg_id_t key,rsm_resource_state_t state)1442 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key,
1443 rsm_resource_state_t state)
1444 {
1445 rsmresource_t *p;
1446 uint_t hashval;
1447 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1448
1449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n"));
1450
1451 hashval = rsmhash(key);
1452
1453 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n",
1454 key, hashval));
1455
1456 rw_enter(&rhash->rsmhash_rw, RW_READER);
1457
1458 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1459
1460 for (; p; p = p->rsmrc_next) {
1461 if (p->rsmrc_key == key) {
1462 /* acquire resource lock */
1463 RSMRC_LOCK(p);
1464 break;
1465 }
1466 }
1467
1468 rw_exit(&rhash->rsmhash_rw);
1469
1470 if (p != NULL && p->rsmrc_state != state) {
1471 /* state changed, release lock and return null */
1472 RSMRC_UNLOCK(p);
1473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1474 "rsmhash_lookup done: state changed\n"));
1475 return (NULL);
1476 }
1477
1478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n"));
1479
1480 return (p);
1481 }
1482
1483 static void
rsmhash_rm(rsmhash_table_t * rhash,rsmresource_t * rcelm)1484 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm)
1485 {
1486 rsmresource_t *p, **back;
1487 uint_t hashval;
1488 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1489
1490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n"));
1491
1492 hashval = rsmhash(rcelm->rsmrc_key);
1493
1494 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n",
1495 rcelm->rsmrc_key, hashval));
1496
1497 /*
1498 * It's ok not to find the segment.
1499 */
1500 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1501
1502 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1503
1504 for (; (p = *back) != NULL; back = &p->rsmrc_next) {
1505 if (p == rcelm) {
1506 *back = rcelm->rsmrc_next;
1507 break;
1508 }
1509 }
1510
1511 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n"));
1512
1513 rw_exit(&rhash->rsmhash_rw);
1514 }
1515
1516 static int
rsmhash_add(rsmhash_table_t * rhash,rsmresource_t * new,rsm_memseg_id_t key,int dup_check,rsm_resource_state_t state)1517 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key,
1518 int dup_check, rsm_resource_state_t state)
1519 {
1520 rsmresource_t *p = NULL, **bktp;
1521 uint_t hashval;
1522 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1523
1524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n"));
1525
1526 /* lock table */
1527 rw_enter(&rhash->rsmhash_rw, RW_WRITER);
1528
1529 /*
1530 * If the current resource state is other than the state passed in
1531 * then the resource is (probably) already on the list. eg. for an
1532 * import segment if the state is not RSM_STATE_NEW then it's on the
1533 * list already.
1534 */
1535 RSMRC_LOCK(new);
1536 if (new->rsmrc_state != state) {
1537 RSMRC_UNLOCK(new);
1538 rw_exit(&rhash->rsmhash_rw);
1539 return (RSMERR_BAD_SEG_HNDL);
1540 }
1541
1542 hashval = rsmhash(key);
1543 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval));
1544
1545 if (dup_check) {
1546 /*
1547 * Used for checking export segments; don't want to have
1548 * the same key used for multiple segments.
1549 */
1550
1551 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval);
1552
1553 for (; p; p = p->rsmrc_next) {
1554 if (p->rsmrc_key == key) {
1555 RSMRC_UNLOCK(new);
1556 break;
1557 }
1558 }
1559 }
1560
1561 if (p == NULL) {
1562 /* Key doesn't exist, add it */
1563
1564 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval);
1565
1566 new->rsmrc_key = key;
1567 new->rsmrc_next = *bktp;
1568 *bktp = new;
1569 }
1570
1571 rw_exit(&rhash->rsmhash_rw);
1572
1573 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n"));
1574
1575 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE);
1576 }
1577
1578 /*
1579 * XOR each byte of the key.
1580 */
1581 static uint_t
rsmhash(rsm_memseg_id_t key)1582 rsmhash(rsm_memseg_id_t key)
1583 {
1584 uint_t hash = key;
1585
1586 hash ^= (key >> 8);
1587 hash ^= (key >> 16);
1588 hash ^= (key >> 24);
1589
1590 return (hash % rsm_hash_size);
1591
1592 }
1593
1594 /*
1595 * generic function to get a specific bucket
1596 */
1597 static void *
rsmhash_getbkt(rsmhash_table_t * rhash,uint_t hashval)1598 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval)
1599 {
1600
1601 if (rhash->bucket == NULL)
1602 return (NULL);
1603 else
1604 return ((void *)rhash->bucket[hashval]);
1605 }
1606
1607 /*
1608 * generic function to get a specific bucket's address
1609 */
1610 static void **
rsmhash_bktaddr(rsmhash_table_t * rhash,uint_t hashval)1611 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval)
1612 {
1613 if (rhash->bucket == NULL)
1614 return (NULL);
1615 else
1616 return ((void **)&(rhash->bucket[hashval]));
1617 }
1618
1619 /*
1620 * generic function to alloc a hash table
1621 */
1622 static void
rsmhash_alloc(rsmhash_table_t * rhash,int size)1623 rsmhash_alloc(rsmhash_table_t *rhash, int size)
1624 {
1625 rhash->bucket = (rsmresource_t **)
1626 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP);
1627 }
1628
1629 /*
1630 * generic function to free a hash table
1631 */
1632 static void
rsmhash_free(rsmhash_table_t * rhash,int size)1633 rsmhash_free(rsmhash_table_t *rhash, int size)
1634 {
1635
1636 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t));
1637 rhash->bucket = NULL;
1638
1639 }
1640 /* *********************** Exported Segment Key Management ************ */
1641
1642 #define rsmexport_add(new, key) \
1643 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \
1644 RSM_STATE_BIND)
1645
1646 #define rsmexport_rm(arg) \
1647 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg))
1648
1649 #define rsmexport_lookup(key) \
1650 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT)
1651
1652 /* ************************** Import Segment List Management ********** */
1653
1654 /*
1655 * Add segment to import list. This will be useful for paging and loopback
1656 * segment unloading.
1657 */
1658 #define rsmimport_add(arg, key) \
1659 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \
1660 RSM_STATE_NEW)
1661
1662 #define rsmimport_rm(arg) \
1663 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg))
1664
1665 /*
1666 * #define rsmimport_lookup(key) \
1667 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT)
1668 */
1669
1670 /*
1671 * increase the ref count and make the import segment point to the
1672 * shared data structure. Return a pointer to the share data struct
1673 * and the shared data struct is locked upon return
1674 */
1675 static rsm_import_share_t *
rsmshare_get(rsm_memseg_id_t key,rsm_node_id_t node,adapter_t * adapter,rsmseg_t * segp)1676 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter,
1677 rsmseg_t *segp)
1678 {
1679 uint_t hash;
1680 rsmresource_t *p;
1681 rsm_import_share_t *shdatap;
1682 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1683
1684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n"));
1685
1686 hash = rsmhash(key);
1687 /* lock table */
1688 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER);
1689 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n",
1690 key, hash));
1691
1692 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash);
1693
1694 for (; p; p = p->rsmrc_next) {
1695 /*
1696 * Look for an entry that is importing the same exporter
1697 * with the share data structure allocated.
1698 */
1699 if ((p->rsmrc_key == key) &&
1700 (p->rsmrc_node == node) &&
1701 (p->rsmrc_adapter == adapter) &&
1702 (((rsmseg_t *)p)->s_share != NULL)) {
1703 shdatap = ((rsmseg_t *)p)->s_share;
1704 break;
1705 }
1706 }
1707
1708 if (p == NULL) {
1709 /* we are the first importer, create the shared data struct */
1710 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP);
1711 shdatap->rsmsi_state = RSMSI_STATE_NEW;
1712 shdatap->rsmsi_segid = key;
1713 shdatap->rsmsi_node = node;
1714 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL);
1715 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0);
1716 }
1717
1718 rsmseglock_acquire(segp);
1719
1720 /* we grab the shared lock before returning from this function */
1721 mutex_enter(&shdatap->rsmsi_lock);
1722
1723 shdatap->rsmsi_refcnt++;
1724 segp->s_share = shdatap;
1725
1726 rsmseglock_release(segp);
1727
1728 rw_exit(&rsm_import_segs.rsmhash_rw);
1729
1730 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n"));
1731
1732 return (shdatap);
1733 }
1734
1735 /*
1736 * the shared data structure should be locked before calling
1737 * rsmsharecv_signal().
1738 * Change the state and signal any waiting segments.
1739 */
1740 void
rsmsharecv_signal(rsmseg_t * seg,int oldstate,int newstate)1741 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate)
1742 {
1743 ASSERT(rsmsharelock_held(seg));
1744
1745 if (seg->s_share->rsmsi_state == oldstate) {
1746 seg->s_share->rsmsi_state = newstate;
1747 cv_broadcast(&seg->s_share->rsmsi_cv);
1748 }
1749 }
1750
1751 /*
1752 * Add to the hash table
1753 */
1754 static void
importer_list_add(rsm_node_id_t node,rsm_memseg_id_t key,rsm_addr_t hwaddr,void * cookie)1755 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr,
1756 void *cookie)
1757 {
1758
1759 importing_token_t *head;
1760 importing_token_t *new_token;
1761 int index;
1762
1763 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1764
1765 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n"));
1766
1767 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP);
1768 new_token->importing_node = node;
1769 new_token->key = key;
1770 new_token->import_segment_cookie = cookie;
1771 new_token->importing_adapter_hwaddr = hwaddr;
1772
1773 index = rsmhash(key);
1774
1775 mutex_enter(&importer_list.lock);
1776
1777 head = importer_list.bucket[index];
1778 importer_list.bucket[index] = new_token;
1779 new_token->next = head;
1780 mutex_exit(&importer_list.lock);
1781
1782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n"));
1783 }
1784
1785 static void
importer_list_rm(rsm_node_id_t node,rsm_memseg_id_t key,void * cookie)1786 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie)
1787 {
1788
1789 importing_token_t *prev, *token = NULL;
1790 int index;
1791 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1792
1793 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n"));
1794
1795 index = rsmhash(key);
1796
1797 mutex_enter(&importer_list.lock);
1798
1799 token = importer_list.bucket[index];
1800
1801 prev = token;
1802 while (token != NULL) {
1803 if (token->importing_node == node &&
1804 token->import_segment_cookie == cookie) {
1805 if (prev == token)
1806 importer_list.bucket[index] = token->next;
1807 else
1808 prev->next = token->next;
1809 kmem_free((void *)token, sizeof (*token));
1810 break;
1811 } else {
1812 prev = token;
1813 token = token->next;
1814 }
1815 }
1816
1817 mutex_exit(&importer_list.lock);
1818
1819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n"));
1820
1821
1822 }
1823
1824 /* **************************Segment Structure Management ************* */
1825
1826 /*
1827 * Free segment structure
1828 */
1829 static void
rsmseg_free(rsmseg_t * seg)1830 rsmseg_free(rsmseg_t *seg)
1831 {
1832
1833 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1834
1835 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n"));
1836
1837 /* need to take seglock here to avoid race with rsmmap_unmap() */
1838 rsmseglock_acquire(seg);
1839 if (seg->s_ckl != NULL) {
1840 /* Segment is still busy */
1841 seg->s_state = RSM_STATE_END;
1842 rsmseglock_release(seg);
1843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1844 "rsmseg_free done\n"));
1845 return;
1846 }
1847
1848 rsmseglock_release(seg);
1849
1850 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW);
1851
1852 /*
1853 * If it's an importer decrement the refcount
1854 * and if its down to zero free the shared data structure.
1855 * This is where failures during rsm_connect() are unrefcounted
1856 */
1857 if (seg->s_share != NULL) {
1858
1859 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT);
1860
1861 rsmsharelock_acquire(seg);
1862
1863 ASSERT(seg->s_share->rsmsi_refcnt > 0);
1864
1865 seg->s_share->rsmsi_refcnt--;
1866
1867 if (seg->s_share->rsmsi_refcnt == 0) {
1868 rsmsharelock_release(seg);
1869 mutex_destroy(&seg->s_share->rsmsi_lock);
1870 cv_destroy(&seg->s_share->rsmsi_cv);
1871 kmem_free((void *)(seg->s_share),
1872 sizeof (rsm_import_share_t));
1873 } else {
1874 rsmsharelock_release(seg);
1875 }
1876 /*
1877 * The following needs to be done after any
1878 * rsmsharelock calls which use seg->s_share.
1879 */
1880 seg->s_share = NULL;
1881 }
1882
1883 cv_destroy(&seg->s_cv);
1884 mutex_destroy(&seg->s_lock);
1885 rsmacl_free(seg->s_acl, seg->s_acl_len);
1886 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len);
1887 if (seg->s_adapter)
1888 rsmka_release_adapter(seg->s_adapter);
1889
1890 kmem_free((void *)seg, sizeof (*seg));
1891
1892 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n"));
1893
1894 }
1895
1896
1897 static rsmseg_t *
rsmseg_alloc(minor_t num,struct cred * cred)1898 rsmseg_alloc(minor_t num, struct cred *cred)
1899 {
1900 rsmseg_t *new;
1901 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
1902
1903 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n"));
1904 /*
1905 * allocate memory for new segment. This should be a segkmem cache.
1906 */
1907 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP);
1908
1909 new->s_state = RSM_STATE_NEW;
1910 new->s_minor = num;
1911 new->s_acl_len = 0;
1912 new->s_cookie = NULL;
1913 new->s_adapter = NULL;
1914
1915 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask;
1916 /* we don't have a key yet, will set at export/connect */
1917 new->s_uid = crgetuid(cred);
1918 new->s_gid = crgetgid(cred);
1919
1920 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL);
1921 cv_init(&new->s_cv, NULL, CV_DRIVER, 0);
1922
1923 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n"));
1924
1925 return (new);
1926 }
1927
1928 /* ******************************** Driver Open/Close/Poll *************** */
1929
1930 /*ARGSUSED1*/
1931 static int
rsm_open(dev_t * devp,int flag,int otyp,struct cred * cred)1932 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred)
1933 {
1934 minor_t rnum;
1935 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
1936
1937 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n"));
1938 /*
1939 * Char only
1940 */
1941 if (otyp != OTYP_CHR) {
1942 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n"));
1943 return (EINVAL);
1944 }
1945
1946 /*
1947 * Only zero can be opened, clones are used for resources.
1948 */
1949 if (getminor(*devp) != RSM_DRIVER_MINOR) {
1950 DBG_PRINTF((category, RSM_ERR,
1951 "rsm_open: bad minor %d\n", getminor(*devp)));
1952 return (ENODEV);
1953 }
1954
1955 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) {
1956 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n"));
1957 return (EPERM);
1958 }
1959
1960 if (!(flag & FWRITE)) {
1961 /*
1962 * The library function _rsm_librsm_init calls open for
1963 * /dev/rsm with flag set to O_RDONLY. We want a valid
1964 * file descriptor to be returned for minor device zero.
1965 */
1966
1967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
1968 "rsm_open RDONLY done\n"));
1969 return (DDI_SUCCESS);
1970 }
1971
1972 /*
1973 * - allocate new minor number and segment.
1974 * - add segment to list of all segments.
1975 * - set minordev data to segment
1976 * - update devp argument to new device
1977 * - update s_cred to cred; make sure you do crhold(cred);
1978 */
1979
1980 /* allocate a new resource number */
1981 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) {
1982 /*
1983 * We will bind this minor to a specific resource in first
1984 * ioctl
1985 */
1986 *devp = makedevice(getmajor(*devp), rnum);
1987 } else {
1988 return (EAGAIN);
1989 }
1990
1991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n"));
1992 return (DDI_SUCCESS);
1993 }
1994
1995 static void
rsmseg_close(rsmseg_t * seg,int force_flag)1996 rsmseg_close(rsmseg_t *seg, int force_flag)
1997 {
1998 int e = RSM_SUCCESS;
1999
2000 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2001
2002 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n"));
2003
2004 rsmseglock_acquire(seg);
2005 if (!force_flag && (seg->s_hdr.rsmrc_type ==
2006 RSM_RESOURCE_EXPORT_SEGMENT)) {
2007 /*
2008 * If we are processing rsm_close wait for force_destroy
2009 * processing to complete since force_destroy processing
2010 * needs to finish first before we can free the segment.
2011 * force_destroy is only for export segments
2012 */
2013 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) {
2014 cv_wait(&seg->s_cv, &seg->s_lock);
2015 }
2016 }
2017 rsmseglock_release(seg);
2018
2019 /* It's ok to read the state without a lock */
2020 switch (seg->s_state) {
2021 case RSM_STATE_EXPORT:
2022 case RSM_STATE_EXPORT_QUIESCING:
2023 case RSM_STATE_EXPORT_QUIESCED:
2024 e = rsm_unpublish(seg, 1);
2025 /* FALLTHRU */
2026 case RSM_STATE_BIND_QUIESCED:
2027 /* FALLTHRU */
2028 case RSM_STATE_BIND:
2029 e = rsm_unbind(seg);
2030 if (e != RSM_SUCCESS && force_flag == 1)
2031 return;
2032 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT);
2033 /* FALLTHRU */
2034 case RSM_STATE_NEW_QUIESCED:
2035 rsmseglock_acquire(seg);
2036 seg->s_state = RSM_STATE_NEW;
2037 cv_broadcast(&seg->s_cv);
2038 rsmseglock_release(seg);
2039 break;
2040 case RSM_STATE_NEW:
2041 break;
2042 case RSM_STATE_ZOMBIE:
2043 /*
2044 * Segments in this state have been removed off the
2045 * exported segments list and have been unpublished
2046 * and unbind. These segments have been removed during
2047 * a callback to the rsm_export_force_destroy, which
2048 * is called for the purpose of unlocking these
2049 * exported memory segments when a process exits but
2050 * leaves the segments locked down since rsm_close is
2051 * is not called for the segments. This can happen
2052 * when a process calls fork or exec and then exits.
2053 * Once the segments are in the ZOMBIE state, all that
2054 * remains is to destroy them when rsm_close is called.
2055 * This is done here. Thus, for such segments the
2056 * the state is changed to new so that later in this
2057 * function rsmseg_free is called.
2058 */
2059 rsmseglock_acquire(seg);
2060 seg->s_state = RSM_STATE_NEW;
2061 rsmseglock_release(seg);
2062 break;
2063 case RSM_STATE_MAP_QUIESCE:
2064 case RSM_STATE_ACTIVE:
2065 /* Disconnect will handle the unmap */
2066 case RSM_STATE_CONN_QUIESCE:
2067 case RSM_STATE_CONNECT:
2068 case RSM_STATE_DISCONNECT:
2069 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
2070 (void) rsm_disconnect(seg);
2071 break;
2072 case RSM_STATE_MAPPING:
2073 /*FALLTHRU*/
2074 case RSM_STATE_END:
2075 DBG_PRINTF((category, RSM_ERR,
2076 "Invalid segment state %d in rsm_close\n", seg->s_state));
2077 break;
2078 default:
2079 DBG_PRINTF((category, RSM_ERR,
2080 "Invalid segment state %d in rsm_close\n", seg->s_state));
2081 break;
2082 }
2083
2084 /*
2085 * check state.
2086 * - make sure you do crfree(s_cred);
2087 * release segment and minor number
2088 */
2089 ASSERT(seg->s_state == RSM_STATE_NEW);
2090
2091 /*
2092 * The export_force_destroy callback is created to unlock
2093 * the exported segments of a process
2094 * when the process does a fork or exec and then exits calls this
2095 * function with the force flag set to 1 which indicates that the
2096 * segment state must be converted to ZOMBIE. This state means that the
2097 * segments still exist and have been unlocked and most importantly the
2098 * only operation allowed is to destroy them on an rsm_close.
2099 */
2100 if (force_flag) {
2101 rsmseglock_acquire(seg);
2102 seg->s_state = RSM_STATE_ZOMBIE;
2103 rsmseglock_release(seg);
2104 } else {
2105 rsmseg_free(seg);
2106 }
2107
2108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n"));
2109 }
2110
2111 static int
rsm_close(dev_t dev,int flag,int otyp,cred_t * cred)2112 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred)
2113 {
2114 minor_t rnum = getminor(dev);
2115 rsmresource_t *res;
2116 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI);
2117
2118 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n"));
2119
2120 flag = flag; cred = cred;
2121
2122 if (otyp != OTYP_CHR)
2123 return (EINVAL);
2124
2125 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum));
2126
2127 /*
2128 * At this point we are the last reference to the resource.
2129 * Free resource number from resource table.
2130 * It's ok to remove number before we free the segment.
2131 * We need to lock the resource to protect against remote calls.
2132 */
2133 if (rnum == RSM_DRIVER_MINOR ||
2134 (res = rsmresource_free(rnum)) == NULL) {
2135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2136 return (DDI_SUCCESS);
2137 }
2138
2139 switch (res->rsmrc_type) {
2140 case RSM_RESOURCE_EXPORT_SEGMENT:
2141 case RSM_RESOURCE_IMPORT_SEGMENT:
2142 rsmseg_close((rsmseg_t *)res, 0);
2143 break;
2144 case RSM_RESOURCE_BAR:
2145 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n"));
2146 break;
2147 default:
2148 break;
2149 }
2150
2151 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n"));
2152
2153 return (DDI_SUCCESS);
2154 }
2155
2156 /*
2157 * rsm_inc_pgcnt
2158 *
2159 * Description: increment rsm page counter.
2160 *
2161 * Parameters: pgcnt_t pnum; number of pages to be used
2162 *
2163 * Returns: RSM_SUCCESS if memory limit not exceeded
2164 * ENOSPC if memory limit exceeded. In this case, the
2165 * page counter remains unchanged.
2166 *
2167 */
2168 static int
rsm_inc_pgcnt(pgcnt_t pnum)2169 rsm_inc_pgcnt(pgcnt_t pnum)
2170 {
2171 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2172 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2173 return (RSM_SUCCESS);
2174 }
2175
2176 mutex_enter(&rsm_pgcnt_lock);
2177
2178 if (rsm_pgcnt + pnum > rsm_pgcnt_max) {
2179 /* ensure that limits have not been exceeded */
2180 mutex_exit(&rsm_pgcnt_lock);
2181 return (RSMERR_INSUFFICIENT_MEM);
2182 }
2183
2184 rsm_pgcnt += pnum;
2185 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n",
2186 rsm_pgcnt));
2187 mutex_exit(&rsm_pgcnt_lock);
2188
2189 return (RSM_SUCCESS);
2190 }
2191
2192 /*
2193 * rsm_dec_pgcnt
2194 *
2195 * Description: decrement rsm page counter.
2196 *
2197 * Parameters: pgcnt_t pnum; number of pages freed
2198 *
2199 */
2200 static void
rsm_dec_pgcnt(pgcnt_t pnum)2201 rsm_dec_pgcnt(pgcnt_t pnum)
2202 {
2203 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2204
2205 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */
2206 return;
2207 }
2208
2209 mutex_enter(&rsm_pgcnt_lock);
2210 ASSERT(rsm_pgcnt >= pnum);
2211 rsm_pgcnt -= pnum;
2212 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n",
2213 rsm_pgcnt));
2214 mutex_exit(&rsm_pgcnt_lock);
2215 }
2216
2217 static struct umem_callback_ops rsm_as_ops = {
2218 UMEM_CALLBACK_VERSION, /* version number */
2219 rsm_export_force_destroy,
2220 };
2221
2222 static int
rsm_bind_pages(ddi_umem_cookie_t * cookie,caddr_t vaddr,size_t len,proc_t * procp)2223 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len,
2224 proc_t *procp)
2225 {
2226 int error = RSM_SUCCESS;
2227 ulong_t pnum;
2228 struct umem_callback_ops *callbackops = &rsm_as_ops;
2229
2230 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2231
2232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n"));
2233
2234 /*
2235 * Make sure vaddr and len are aligned on a page boundary
2236 */
2237 if ((uintptr_t)vaddr & (PAGESIZE - 1)) {
2238 return (RSMERR_BAD_ADDR);
2239 }
2240
2241 if (len & (PAGESIZE - 1)) {
2242 return (RSMERR_BAD_LENGTH);
2243 }
2244
2245 /*
2246 * Find number of pages
2247 */
2248 pnum = btopr(len);
2249 error = rsm_inc_pgcnt(pnum);
2250 if (error != RSM_SUCCESS) {
2251 DBG_PRINTF((category, RSM_ERR,
2252 "rsm_bind_pages:mem limit exceeded\n"));
2253 return (RSMERR_INSUFFICIENT_MEM);
2254 }
2255
2256 error = umem_lockmemory(vaddr, len,
2257 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM,
2258 cookie,
2259 callbackops, procp);
2260
2261 if (error) {
2262 rsm_dec_pgcnt(pnum);
2263 DBG_PRINTF((category, RSM_ERR,
2264 "rsm_bind_pages:ddi_umem_lock failed\n"));
2265 /*
2266 * ddi_umem_lock, in the case of failure, returns one of
2267 * the following three errors. These are translated into
2268 * the RSMERR namespace and returned.
2269 */
2270 if (error == EFAULT)
2271 return (RSMERR_BAD_ADDR);
2272 else if (error == EACCES)
2273 return (RSMERR_PERM_DENIED);
2274 else
2275 return (RSMERR_INSUFFICIENT_MEM);
2276 }
2277
2278 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n"));
2279
2280 return (error);
2281
2282 }
2283
2284 static int
rsm_unbind_pages(rsmseg_t * seg)2285 rsm_unbind_pages(rsmseg_t *seg)
2286 {
2287 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2288
2289 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n"));
2290
2291 ASSERT(rsmseglock_held(seg));
2292
2293 if (seg->s_cookie != NULL) {
2294 /* unlock address range */
2295 ddi_umem_unlock(seg->s_cookie);
2296 rsm_dec_pgcnt(btopr(seg->s_len));
2297 seg->s_cookie = NULL;
2298 }
2299
2300 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n"));
2301
2302 return (RSM_SUCCESS);
2303 }
2304
2305
2306 static int
rsm_bind(rsmseg_t * seg,rsm_ioctlmsg_t * msg,intptr_t dataptr,int mode)2307 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2308 {
2309 int e;
2310 adapter_t *adapter;
2311 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2312
2313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n"));
2314
2315 adapter = rsm_getadapter(msg, mode);
2316 if (adapter == NULL) {
2317 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2318 "rsm_bind done:no adapter\n"));
2319 return (RSMERR_CTLR_NOT_PRESENT);
2320 }
2321
2322 /* lock address range */
2323 if (msg->vaddr == NULL) {
2324 rsmka_release_adapter(adapter);
2325 DBG_PRINTF((category, RSM_ERR,
2326 "rsm: rsm_bind done: invalid vaddr\n"));
2327 return (RSMERR_BAD_ADDR);
2328 }
2329 if (msg->len <= 0) {
2330 rsmka_release_adapter(adapter);
2331 DBG_PRINTF((category, RSM_ERR,
2332 "rsm_bind: invalid length\n"));
2333 return (RSMERR_BAD_LENGTH);
2334 }
2335
2336 /* Lock segment */
2337 rsmseglock_acquire(seg);
2338
2339 while (seg->s_state == RSM_STATE_NEW_QUIESCED) {
2340 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2341 DBG_PRINTF((category, RSM_DEBUG,
2342 "rsm_bind done: cv_wait INTERRUPTED"));
2343 rsmka_release_adapter(adapter);
2344 rsmseglock_release(seg);
2345 return (RSMERR_INTERRUPTED);
2346 }
2347 }
2348
2349 ASSERT(seg->s_state == RSM_STATE_NEW);
2350
2351 ASSERT(seg->s_cookie == NULL);
2352
2353 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc);
2354 if (e == RSM_SUCCESS) {
2355 seg->s_flags |= RSM_USER_MEMORY;
2356 if (msg->perm & RSM_ALLOW_REBIND) {
2357 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND;
2358 }
2359 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) {
2360 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT;
2361 }
2362 seg->s_region.r_vaddr = msg->vaddr;
2363 /*
2364 * Set the s_pid value in the segment structure. This is used
2365 * to identify exported segments belonging to a particular
2366 * process so that when the process exits, these segments can
2367 * be unlocked forcefully even if rsm_close is not called on
2368 * process exit since there maybe other processes referencing
2369 * them (for example on a fork or exec).
2370 * The s_pid value is also used to authenticate the process
2371 * doing a publish or unpublish on the export segment. Only
2372 * the creator of the export segment has a right to do a
2373 * publish or unpublish and unbind on the segment.
2374 */
2375 seg->s_pid = ddi_get_pid();
2376 seg->s_len = msg->len;
2377 seg->s_state = RSM_STATE_BIND;
2378 seg->s_adapter = adapter;
2379 seg->s_proc = curproc;
2380 } else {
2381 rsmka_release_adapter(adapter);
2382 DBG_PRINTF((category, RSM_WARNING,
2383 "unable to lock down pages\n"));
2384 }
2385
2386 msg->rnum = seg->s_minor;
2387 /* Unlock segment */
2388 rsmseglock_release(seg);
2389
2390 if (e == RSM_SUCCESS) {
2391 /* copyout the resource number */
2392 #ifdef _MULTI_DATAMODEL
2393 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2394 rsm_ioctlmsg32_t msg32;
2395
2396 msg32.rnum = msg->rnum;
2397 if (ddi_copyout((caddr_t)&msg32.rnum,
2398 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum,
2399 sizeof (minor_t), mode)) {
2400 rsmka_release_adapter(adapter);
2401 e = RSMERR_BAD_ADDR;
2402 }
2403 }
2404 #endif
2405 if (ddi_copyout((caddr_t)&msg->rnum,
2406 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum,
2407 sizeof (minor_t), mode)) {
2408 rsmka_release_adapter(adapter);
2409 e = RSMERR_BAD_ADDR;
2410 }
2411 }
2412
2413 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n"));
2414
2415 return (e);
2416 }
2417
2418 static void
rsm_remap_local_importers(rsm_node_id_t src_nodeid,rsm_memseg_id_t ex_segid,ddi_umem_cookie_t cookie)2419 rsm_remap_local_importers(rsm_node_id_t src_nodeid,
2420 rsm_memseg_id_t ex_segid,
2421 ddi_umem_cookie_t cookie)
2422
2423 {
2424 rsmresource_t *p = NULL;
2425 rsmhash_table_t *rhash = &rsm_import_segs;
2426 uint_t index;
2427
2428 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2429 "rsm_remap_local_importers enter\n"));
2430
2431 index = rsmhash(ex_segid);
2432
2433 rw_enter(&rhash->rsmhash_rw, RW_READER);
2434
2435 p = rsmhash_getbkt(rhash, index);
2436
2437 for (; p; p = p->rsmrc_next) {
2438 rsmseg_t *seg = (rsmseg_t *)p;
2439 rsmseglock_acquire(seg);
2440 /*
2441 * Change the s_cookie value of only the local importers
2442 * which have been mapped (in state RSM_STATE_ACTIVE).
2443 * Note that there is no need to change the s_cookie value
2444 * if the imported segment is in RSM_STATE_MAPPING since
2445 * eventually the s_cookie will be updated via the mapping
2446 * functionality.
2447 */
2448 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) &&
2449 (seg->s_state == RSM_STATE_ACTIVE)) {
2450 seg->s_cookie = cookie;
2451 }
2452 rsmseglock_release(seg);
2453 }
2454 rw_exit(&rhash->rsmhash_rw);
2455
2456 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE,
2457 "rsm_remap_local_importers done\n"));
2458 }
2459
2460 static int
rsm_rebind(rsmseg_t * seg,rsm_ioctlmsg_t * msg)2461 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg)
2462 {
2463 int e;
2464 adapter_t *adapter;
2465 ddi_umem_cookie_t cookie;
2466 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2467
2468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n"));
2469
2470 /* Check for permissions to rebind */
2471 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) {
2472 return (RSMERR_REBIND_NOT_ALLOWED);
2473 }
2474
2475 if (seg->s_pid != ddi_get_pid() &&
2476 ddi_get_pid() != 0) {
2477 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n"));
2478 return (RSMERR_NOT_CREATOR);
2479 }
2480
2481 /*
2482 * We will not be allowing partial rebind and hence length passed
2483 * in must be same as segment length
2484 */
2485 if (msg->vaddr == NULL) {
2486 DBG_PRINTF((category, RSM_ERR,
2487 "rsm_rebind done: null msg->vaddr\n"));
2488 return (RSMERR_BAD_ADDR);
2489 }
2490 if (msg->len != seg->s_len) {
2491 DBG_PRINTF((category, RSM_ERR,
2492 "rsm_rebind: invalid length\n"));
2493 return (RSMERR_BAD_LENGTH);
2494 }
2495
2496 /* Lock segment */
2497 rsmseglock_acquire(seg);
2498
2499 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) ||
2500 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) {
2502 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
2503 rsmseglock_release(seg);
2504 DBG_PRINTF((category, RSM_DEBUG,
2505 "rsm_rebind done: cv_wait INTERRUPTED"));
2506 return (RSMERR_INTERRUPTED);
2507 }
2508 }
2509
2510 /* verify segment state */
2511 if ((seg->s_state != RSM_STATE_BIND) &&
2512 (seg->s_state != RSM_STATE_EXPORT)) {
2513 /* Unlock segment */
2514 rsmseglock_release(seg);
2515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2516 "rsm_rebind done: invalid state\n"));
2517 return (RSMERR_BAD_SEG_HNDL);
2518 }
2519
2520 ASSERT(seg->s_cookie != NULL);
2521
2522 if (msg->vaddr == seg->s_region.r_vaddr) {
2523 rsmseglock_release(seg);
2524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2525 return (RSM_SUCCESS);
2526 }
2527
2528 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc);
2529 if (e == RSM_SUCCESS) {
2530 struct buf *xbuf;
2531 dev_t sdev = 0;
2532 rsm_memory_local_t mem;
2533
2534 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE,
2535 sdev, 0, NULL, DDI_UMEM_SLEEP);
2536 ASSERT(xbuf != NULL);
2537
2538 mem.ms_type = RSM_MEM_BUF;
2539 mem.ms_bp = xbuf;
2540
2541 adapter = seg->s_adapter;
2542 e = adapter->rsmpi_ops->rsm_rebind(
2543 seg->s_handle.out, 0, &mem,
2544 RSM_RESOURCE_DONTWAIT, NULL);
2545
2546 if (e == RSM_SUCCESS) {
2547 /*
2548 * unbind the older pages, and unload local importers;
2549 * but don't disconnect importers
2550 */
2551 (void) rsm_unbind_pages(seg);
2552 seg->s_cookie = cookie;
2553 seg->s_region.r_vaddr = msg->vaddr;
2554 rsm_remap_local_importers(my_nodeid, seg->s_segid,
2555 cookie);
2556 } else {
2557 /*
2558 * Unbind the pages associated with "cookie" by the
2559 * rsm_bind_pages calls prior to this. This is
2560 * similar to what is done in the rsm_unbind_pages
2561 * routine for the seg->s_cookie.
2562 */
2563 ddi_umem_unlock(cookie);
2564 rsm_dec_pgcnt(btopr(msg->len));
2565 DBG_PRINTF((category, RSM_ERR,
2566 "rsm_rebind failed with %d\n", e));
2567 }
2568 /*
2569 * At present there is no dependency on the existence of xbuf.
2570 * So we can free it here. If in the future this changes, it can
2571 * be freed sometime during the segment destroy.
2572 */
2573 freerbuf(xbuf);
2574 }
2575
2576 /* Unlock segment */
2577 rsmseglock_release(seg);
2578
2579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n"));
2580
2581 return (e);
2582 }
2583
2584 static int
rsm_unbind(rsmseg_t * seg)2585 rsm_unbind(rsmseg_t *seg)
2586 {
2587 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2588
2589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n"));
2590
2591 rsmseglock_acquire(seg);
2592
2593 /* verify segment state */
2594 if ((seg->s_state != RSM_STATE_BIND) &&
2595 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2596 rsmseglock_release(seg);
2597 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2598 "rsm_unbind: invalid state\n"));
2599 return (RSMERR_BAD_SEG_HNDL);
2600 }
2601
2602 /* unlock current range */
2603 (void) rsm_unbind_pages(seg);
2604
2605 if (seg->s_state == RSM_STATE_BIND) {
2606 seg->s_state = RSM_STATE_NEW;
2607 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
2608 seg->s_state = RSM_STATE_NEW_QUIESCED;
2609 }
2610
2611 rsmseglock_release(seg);
2612
2613 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n"));
2614
2615 return (RSM_SUCCESS);
2616 }
2617
2618 /* **************************** Exporter Access List Management ******* */
2619 static void
rsmacl_free(rsmapi_access_entry_t * acl,int acl_len)2620 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len)
2621 {
2622 int acl_sz;
2623 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2624
2625 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n"));
2626
2627 /* acl could be NULL */
2628
2629 if (acl != NULL && acl_len > 0) {
2630 acl_sz = acl_len * sizeof (rsmapi_access_entry_t);
2631 kmem_free((void *)acl, acl_sz);
2632 }
2633
2634 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n"));
2635 }
2636
2637 static void
rsmpiacl_free(rsm_access_entry_t * acl,int acl_len)2638 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len)
2639 {
2640 int acl_sz;
2641 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2642
2643 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n"));
2644
2645 if (acl != NULL && acl_len > 0) {
2646 acl_sz = acl_len * sizeof (rsm_access_entry_t);
2647 kmem_free((void *)acl, acl_sz);
2648 }
2649
2650 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n"));
2651
2652 }
2653
2654 static int
rsmacl_build(rsm_ioctlmsg_t * msg,int mode,rsmapi_access_entry_t ** list,int * len,int loopback)2655 rsmacl_build(rsm_ioctlmsg_t *msg, int mode,
2656 rsmapi_access_entry_t **list, int *len, int loopback)
2657 {
2658 rsmapi_access_entry_t *acl;
2659 int acl_len;
2660 int i;
2661 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2662
2663 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n"));
2664
2665 *len = 0;
2666 *list = NULL;
2667
2668 acl_len = msg->acl_len;
2669 if ((loopback && acl_len > 1) || (acl_len < 0) ||
2670 (acl_len > MAX_NODES)) {
2671 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2672 "rsmacl_build done: acl invalid\n"));
2673 return (RSMERR_BAD_ACL);
2674 }
2675
2676 if (acl_len > 0 && acl_len <= MAX_NODES) {
2677 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t);
2678
2679 acl = kmem_alloc(acl_size, KM_SLEEP);
2680
2681 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl,
2682 acl_size, mode)) {
2683 kmem_free((void *) acl, acl_size);
2684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2685 "rsmacl_build done: BAD_ADDR\n"));
2686 return (RSMERR_BAD_ADDR);
2687 }
2688
2689 /*
2690 * Verify access list
2691 */
2692 for (i = 0; i < acl_len; i++) {
2693 if (acl[i].ae_node > MAX_NODES ||
2694 (loopback && (acl[i].ae_node != my_nodeid)) ||
2695 acl[i].ae_permission > RSM_ACCESS_TRUSTED) {
2696 /* invalid entry */
2697 kmem_free((void *) acl, acl_size);
2698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2699 "rsmacl_build done: EINVAL\n"));
2700 return (RSMERR_BAD_ACL);
2701 }
2702 }
2703
2704 *len = acl_len;
2705 *list = acl;
2706 }
2707
2708 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n"));
2709
2710 return (DDI_SUCCESS);
2711 }
2712
2713 static int
rsmpiacl_create(rsmapi_access_entry_t * src,rsm_access_entry_t ** dest,int acl_len,adapter_t * adapter)2714 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest,
2715 int acl_len, adapter_t *adapter)
2716 {
2717 rsm_access_entry_t *acl;
2718 rsm_addr_t hwaddr;
2719 int i;
2720 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2721
2722 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n"));
2723
2724 if (src != NULL) {
2725 size_t acl_size = acl_len * sizeof (rsm_access_entry_t);
2726 acl = kmem_alloc(acl_size, KM_SLEEP);
2727
2728 /*
2729 * translate access list
2730 */
2731 for (i = 0; i < acl_len; i++) {
2732 if (src[i].ae_node == my_nodeid) {
2733 acl[i].ae_addr = adapter->hwaddr;
2734 } else {
2735 hwaddr = get_remote_hwaddr(adapter,
2736 src[i].ae_node);
2737 if ((int64_t)hwaddr < 0) {
2738 /* invalid hwaddr */
2739 kmem_free((void *) acl, acl_size);
2740 DBG_PRINTF((category,
2741 RSM_DEBUG_VERBOSE,
2742 "rsmpiacl_create done:"
2743 "EINVAL hwaddr\n"));
2744 return (RSMERR_INTERNAL_ERROR);
2745 }
2746 acl[i].ae_addr = hwaddr;
2747 }
2748 /* rsmpi understands only RSM_PERM_XXXX */
2749 acl[i].ae_permission =
2750 src[i].ae_permission & RSM_PERM_RDWR;
2751 }
2752 *dest = acl;
2753 } else {
2754 *dest = NULL;
2755 }
2756
2757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n"));
2758
2759 return (RSM_SUCCESS);
2760 }
2761
2762 static int
rsmsegacl_validate(rsmipc_request_t * req,rsm_node_id_t rnode,rsmipc_reply_t * reply)2763 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode,
2764 rsmipc_reply_t *reply)
2765 {
2766
2767 int i;
2768 rsmseg_t *seg;
2769 rsm_memseg_id_t key = req->rsmipc_key;
2770 rsm_permission_t perm = req->rsmipc_perm;
2771 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2772
2773 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2774 "rsmsegacl_validate enter\n"));
2775
2776 /*
2777 * Find segment and grab its lock. The reason why we grab the segment
2778 * lock in side the search is to avoid the race when the segment is
2779 * being deleted and we already have a pointer to it.
2780 */
2781 seg = rsmexport_lookup(key);
2782 if (!seg) {
2783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2784 "rsmsegacl_validate done: %u ENXIO\n", key));
2785 return (RSMERR_SEG_NOT_PUBLISHED);
2786 }
2787
2788 ASSERT(rsmseglock_held(seg));
2789 ASSERT(seg->s_state == RSM_STATE_EXPORT);
2790
2791 /*
2792 * We implement a 2-level protection scheme.
2793 * First, we check if local/remote host has access rights.
2794 * Second, we check if the user has access rights.
2795 *
2796 * This routine only validates the rnode access_list
2797 */
2798 if (seg->s_acl_len > 0) {
2799 /*
2800 * Check host access list
2801 */
2802 ASSERT(seg->s_acl != NULL);
2803 for (i = 0; i < seg->s_acl_len; i++) {
2804 if (seg->s_acl[i].ae_node == rnode) {
2805 perm &= seg->s_acl[i].ae_permission;
2806 goto found;
2807 }
2808 }
2809 /* rnode is not found in the list */
2810 rsmseglock_release(seg);
2811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
2812 "rsmsegacl_validate done: EPERM\n"));
2813 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
2814 } else {
2815 /* use default owner creation umask */
2816 perm &= seg->s_mode;
2817 }
2818
2819 found:
2820 /* update perm for this node */
2821 reply->rsmipc_mode = perm;
2822 reply->rsmipc_uid = seg->s_uid;
2823 reply->rsmipc_gid = seg->s_gid;
2824 reply->rsmipc_segid = seg->s_segid;
2825 reply->rsmipc_seglen = seg->s_len;
2826
2827 /*
2828 * Perm of requesting node is valid; source will validate user
2829 */
2830 rsmseglock_release(seg);
2831
2832 /*
2833 * Add the importer to the list right away, if connect fails
2834 * the importer will ask the exporter to remove it.
2835 */
2836 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr,
2837 req->rsmipc_segment_cookie);
2838
2839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n"));
2840
2841 return (RSM_SUCCESS);
2842 }
2843
2844
2845 /* ************************** Exporter Calls ************************* */
2846
2847 static int
rsm_publish(rsmseg_t * seg,rsm_ioctlmsg_t * msg,intptr_t dataptr,int mode)2848 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode)
2849 {
2850 int e;
2851 int acl_len;
2852 rsmapi_access_entry_t *acl;
2853 rsm_access_entry_t *rsmpi_acl;
2854 rsm_memory_local_t mem;
2855 struct buf *xbuf;
2856 dev_t sdev = 0;
2857 adapter_t *adapter;
2858 rsm_memseg_id_t segment_id = 0;
2859 int loopback_flag = 0;
2860 int create_flags = 0;
2861 rsm_resource_callback_t callback_flag;
2862 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
2863
2864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n"));
2865
2866 if (seg->s_adapter == &loopback_adapter)
2867 loopback_flag = 1;
2868
2869 if (seg->s_pid != ddi_get_pid() &&
2870 ddi_get_pid() != 0) {
2871 DBG_PRINTF((category, RSM_ERR,
2872 "rsm_publish: Not creator\n"));
2873 return (RSMERR_NOT_CREATOR);
2874 }
2875
2876 /*
2877 * Get per node access list
2878 */
2879 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag);
2880 if (e != DDI_SUCCESS) {
2881 DBG_PRINTF((category, RSM_ERR,
2882 "rsm_publish done: rsmacl_build failed\n"));
2883 return (e);
2884 }
2885
2886 /*
2887 * The application provided msg->key is used for resolving a
2888 * segment id according to the following:
2889 * key = 0 Kernel Agent selects the segment id
2890 * key <= RSM_DLPI_ID_END Reserved for system usage except
2891 * RSMLIB range
2892 * key < RSM_USER_APP_ID_BASE segment id = key
2893 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections
2894 *
2895 * rsm_nextavail_segmentid is initialized to 0x80000000 and
2896 * overflows to zero after 0x80000000 allocations.
2897 * An algorithm is needed which allows reinitialization and provides
2898 * for reallocation after overflow. For now, ENOMEM is returned
2899 * once the overflow condition has occurred.
2900 */
2901 if (msg->key == 0) {
2902 mutex_enter(&rsm_lock);
2903 segment_id = rsm_nextavail_segmentid;
2904 if (segment_id != 0) {
2905 rsm_nextavail_segmentid++;
2906 mutex_exit(&rsm_lock);
2907 } else {
2908 mutex_exit(&rsm_lock);
2909 DBG_PRINTF((category, RSM_ERR,
2910 "rsm_publish done: no more keys avlbl\n"));
2911 return (RSMERR_INSUFFICIENT_RESOURCES);
2912 }
2913 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END)
2914 /* range reserved for internal use by base/ndi libraries */
2915 segment_id = msg->key;
2916 else if (msg->key <= RSM_DLPI_ID_END)
2917 return (RSMERR_RESERVED_SEGID);
2918 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1)
2919 segment_id = msg->key;
2920 else {
2921 DBG_PRINTF((category, RSM_ERR,
2922 "rsm_publish done: invalid key %u\n", msg->key));
2923 return (RSMERR_RESERVED_SEGID);
2924 }
2925
2926 /* Add key to exportlist; The segment lock is held on success */
2927 e = rsmexport_add(seg, segment_id);
2928 if (e) {
2929 rsmacl_free(acl, acl_len);
2930 DBG_PRINTF((category, RSM_ERR,
2931 "rsm_publish done: export_add failed: %d\n", e));
2932 return (e);
2933 }
2934
2935 seg->s_segid = segment_id;
2936
2937 if ((seg->s_state != RSM_STATE_BIND) &&
2938 (seg->s_state != RSM_STATE_BIND_QUIESCED)) {
2939 /* state changed since then, free acl and return */
2940 rsmseglock_release(seg);
2941 rsmexport_rm(seg);
2942 rsmacl_free(acl, acl_len);
2943 DBG_PRINTF((category, RSM_ERR,
2944 "rsm_publish done: segment in wrong state: %d\n",
2945 seg->s_state));
2946 return (RSMERR_BAD_SEG_HNDL);
2947 }
2948
2949 /*
2950 * If this is for a local memory handle and permissions are zero,
2951 * then the surrogate segment is very large and we want to skip
2952 * allocation of DVMA space.
2953 *
2954 * Careful! If the user didn't use an ACL list, acl will be a NULL
2955 * pointer. Check that before dereferencing it.
2956 */
2957 if (acl != (rsmapi_access_entry_t *)NULL) {
2958 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
2959 goto skipdriver;
2960 }
2961
2962 /* create segment */
2963 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE,
2964 sdev, 0, NULL, DDI_UMEM_SLEEP);
2965 ASSERT(xbuf != NULL);
2966
2967 mem.ms_type = RSM_MEM_BUF;
2968 mem.ms_bp = xbuf;
2969
2970 /* This call includes a bind operations */
2971
2972 adapter = seg->s_adapter;
2973 /*
2974 * create a acl list with hwaddr for RSMPI publish
2975 */
2976 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter);
2977
2978 if (e != RSM_SUCCESS) {
2979 rsmseglock_release(seg);
2980 rsmexport_rm(seg);
2981 rsmacl_free(acl, acl_len);
2982 freerbuf(xbuf);
2983 DBG_PRINTF((category, RSM_ERR,
2984 "rsm_publish done: rsmpiacl_create failed: %d\n", e));
2985 return (e);
2986 }
2987
2988 if (seg->s_state == RSM_STATE_BIND) {
2989 /* create segment */
2990
2991 /* This call includes a bind operations */
2992
2993 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
2994 create_flags = RSM_ALLOW_UNBIND_REBIND;
2995 }
2996
2997 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
2998 callback_flag = RSM_RESOURCE_DONTWAIT;
2999 } else {
3000 callback_flag = RSM_RESOURCE_SLEEP;
3001 }
3002
3003 e = adapter->rsmpi_ops->rsm_seg_create(
3004 adapter->rsmpi_handle,
3005 &seg->s_handle.out, seg->s_len,
3006 create_flags, &mem,
3007 callback_flag, NULL);
3008 /*
3009 * At present there is no dependency on the existence of xbuf.
3010 * So we can free it here. If in the future this changes, it can
3011 * be freed sometime during the segment destroy.
3012 */
3013 freerbuf(xbuf);
3014
3015 if (e != RSM_SUCCESS) {
3016 rsmseglock_release(seg);
3017 rsmexport_rm(seg);
3018 rsmacl_free(acl, acl_len);
3019 rsmpiacl_free(rsmpi_acl, acl_len);
3020 DBG_PRINTF((category, RSM_ERR,
3021 "rsm_publish done: export_create failed: %d\n", e));
3022 /*
3023 * The following assertion ensures that the two errors
3024 * related to the length and its alignment do not occur
3025 * since they have been checked during export_create
3026 */
3027 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT &&
3028 e != RSMERR_BAD_LENGTH);
3029 if (e == RSMERR_NOT_MEM)
3030 e = RSMERR_INSUFFICIENT_MEM;
3031
3032 return (e);
3033 }
3034 /* export segment, this should create an IMMU mapping */
3035 e = adapter->rsmpi_ops->rsm_publish(
3036 seg->s_handle.out,
3037 rsmpi_acl, acl_len,
3038 seg->s_segid,
3039 RSM_RESOURCE_DONTWAIT, NULL);
3040
3041 if (e != RSM_SUCCESS) {
3042 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3043 rsmseglock_release(seg);
3044 rsmexport_rm(seg);
3045 rsmacl_free(acl, acl_len);
3046 rsmpiacl_free(rsmpi_acl, acl_len);
3047 DBG_PRINTF((category, RSM_ERR,
3048 "rsm_publish done: export_publish failed: %d\n",
3049 e));
3050 return (e);
3051 }
3052 }
3053
3054 seg->s_acl_in = rsmpi_acl;
3055
3056 skipdriver:
3057 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */
3058 seg->s_acl_len = acl_len;
3059 seg->s_acl = acl;
3060
3061 if (seg->s_state == RSM_STATE_BIND) {
3062 seg->s_state = RSM_STATE_EXPORT;
3063 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) {
3064 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
3065 cv_broadcast(&seg->s_cv);
3066 }
3067
3068 rsmseglock_release(seg);
3069
3070 /*
3071 * If the segment id was solicited, then return it in
3072 * the original incoming message.
3073 */
3074 if (msg->key == 0) {
3075 msg->key = segment_id;
3076 #ifdef _MULTI_DATAMODEL
3077 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
3078 rsm_ioctlmsg32_t msg32;
3079
3080 msg32.key = msg->key;
3081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3082 "rsm_publish done\n"));
3083 return (ddi_copyout((caddr_t)&msg32,
3084 (caddr_t)dataptr, sizeof (msg32), mode));
3085 }
3086 #endif
3087 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3088 "rsm_publish done\n"));
3089 return (ddi_copyout((caddr_t)msg,
3090 (caddr_t)dataptr, sizeof (*msg), mode));
3091 }
3092
3093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n"));
3094 return (DDI_SUCCESS);
3095 }
3096
3097 /*
3098 * This function modifies the access control list of an already published
3099 * segment. There is no effect on import segments which are already
3100 * connected.
3101 */
3102 static int
rsm_republish(rsmseg_t * seg,rsm_ioctlmsg_t * msg,int mode)3103 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode)
3104 {
3105 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl;
3106 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl;
3107 int new_acl_len, old_acl_len, tmp_acl_len;
3108 int e, i;
3109 adapter_t *adapter;
3110 int loopback_flag = 0;
3111 rsm_memseg_id_t key;
3112 rsm_permission_t permission;
3113 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3114
3115 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n"));
3116
3117 if ((seg->s_state != RSM_STATE_EXPORT) &&
3118 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) &&
3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCING))
3120 return (RSMERR_SEG_NOT_PUBLISHED);
3121
3122 if (seg->s_pid != ddi_get_pid() &&
3123 ddi_get_pid() != 0) {
3124 DBG_PRINTF((category, RSM_ERR,
3125 "rsm_republish: Not owner\n"));
3126 return (RSMERR_NOT_CREATOR);
3127 }
3128
3129 if (seg->s_adapter == &loopback_adapter)
3130 loopback_flag = 1;
3131
3132 /*
3133 * Build new list first
3134 */
3135 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag);
3136 if (e) {
3137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3138 "rsm_republish done: rsmacl_build failed %d", e));
3139 return (e);
3140 }
3141
3142 /* Lock segment */
3143 rsmseglock_acquire(seg);
3144 /*
3145 * a republish is in progress - REPUBLISH message is being
3146 * sent to the importers so wait for it to complete OR
3147 * wait till DR completes
3148 */
3149 while (((seg->s_state == RSM_STATE_EXPORT) &&
3150 (seg->s_flags & RSM_REPUBLISH_WAIT)) ||
3151 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) ||
3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) {
3153 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3155 "rsm_republish done: cv_wait INTERRUPTED"));
3156 rsmseglock_release(seg);
3157 rsmacl_free(new_acl, new_acl_len);
3158 return (RSMERR_INTERRUPTED);
3159 }
3160 }
3161
3162 /* recheck if state is valid */
3163 if (seg->s_state != RSM_STATE_EXPORT) {
3164 rsmseglock_release(seg);
3165 rsmacl_free(new_acl, new_acl_len);
3166 return (RSMERR_SEG_NOT_PUBLISHED);
3167 }
3168
3169 key = seg->s_key;
3170 old_acl = seg->s_acl;
3171 old_acl_len = seg->s_acl_len;
3172
3173 seg->s_acl = new_acl;
3174 seg->s_acl_len = new_acl_len;
3175
3176 /*
3177 * This call will only be meaningful if and when the interconnect
3178 * layer makes use of the access list
3179 */
3180 adapter = seg->s_adapter;
3181 /*
3182 * create a acl list with hwaddr for RSMPI publish
3183 */
3184 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter);
3185
3186 if (e != RSM_SUCCESS) {
3187 seg->s_acl = old_acl;
3188 seg->s_acl_len = old_acl_len;
3189 rsmseglock_release(seg);
3190 rsmacl_free(new_acl, new_acl_len);
3191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3192 "rsm_republish done: rsmpiacl_create failed %d", e));
3193 return (e);
3194 }
3195 rsmpi_old_acl = seg->s_acl_in;
3196 seg->s_acl_in = rsmpi_new_acl;
3197
3198 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out,
3199 seg->s_acl_in, seg->s_acl_len,
3200 RSM_RESOURCE_DONTWAIT, NULL);
3201
3202 if (e != RSM_SUCCESS) {
3203 seg->s_acl = old_acl;
3204 seg->s_acl_in = rsmpi_old_acl;
3205 seg->s_acl_len = old_acl_len;
3206 rsmseglock_release(seg);
3207 rsmacl_free(new_acl, new_acl_len);
3208 rsmpiacl_free(rsmpi_new_acl, new_acl_len);
3209
3210 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3211 "rsm_republish done: rsmpi republish failed %d\n", e));
3212 return (e);
3213 }
3214
3215 /* create a tmp copy of the new acl */
3216 tmp_acl_len = new_acl_len;
3217 if (tmp_acl_len > 0) {
3218 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP);
3219 for (i = 0; i < tmp_acl_len; i++) {
3220 tmp_acl[i].ae_node = new_acl[i].ae_node;
3221 tmp_acl[i].ae_permission = new_acl[i].ae_permission;
3222 }
3223 /*
3224 * The default permission of a node which was in the old
3225 * ACL but not in the new ACL is 0 ie no access.
3226 */
3227 permission = 0;
3228 } else {
3229 /*
3230 * NULL acl means all importers can connect and
3231 * default permission will be owner creation umask
3232 */
3233 tmp_acl = NULL;
3234 permission = seg->s_mode;
3235 }
3236
3237 /* make other republishers to wait for republish to complete */
3238 seg->s_flags |= RSM_REPUBLISH_WAIT;
3239
3240 rsmseglock_release(seg);
3241
3242 /* send the new perms to the importing nodes */
3243 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission);
3244
3245 rsmseglock_acquire(seg);
3246 seg->s_flags &= ~RSM_REPUBLISH_WAIT;
3247 /* wake up any one waiting for republish to complete */
3248 cv_broadcast(&seg->s_cv);
3249 rsmseglock_release(seg);
3250
3251 rsmacl_free(tmp_acl, tmp_acl_len);
3252 rsmacl_free(old_acl, old_acl_len);
3253 rsmpiacl_free(rsmpi_old_acl, old_acl_len);
3254
3255 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n"));
3256 return (DDI_SUCCESS);
3257 }
3258
3259 static int
rsm_unpublish(rsmseg_t * seg,int mode)3260 rsm_unpublish(rsmseg_t *seg, int mode)
3261 {
3262 rsmapi_access_entry_t *acl;
3263 rsm_access_entry_t *rsmpi_acl;
3264 int acl_len;
3265 int e;
3266 adapter_t *adapter;
3267 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT);
3268
3269 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n"));
3270
3271 if (seg->s_pid != ddi_get_pid() &&
3272 ddi_get_pid() != 0) {
3273 DBG_PRINTF((category, RSM_ERR,
3274 "rsm_unpublish: Not creator\n"));
3275 return (RSMERR_NOT_CREATOR);
3276 }
3277
3278 rsmseglock_acquire(seg);
3279 /*
3280 * wait for QUIESCING to complete here before rsmexport_rm
3281 * is called because the SUSPEND_COMPLETE mesg which changes
3282 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and
3283 * signals the cv_wait needs to find it in the hashtable.
3284 */
3285 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) ||
3286 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) {
3287 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3288 rsmseglock_release(seg);
3289 DBG_PRINTF((category, RSM_ERR,
3290 "rsm_unpublish done: cv_wait INTR qscing"
3291 "getv/putv in progress"));
3292 return (RSMERR_INTERRUPTED);
3293 }
3294 }
3295
3296 /* verify segment state */
3297 if ((seg->s_state != RSM_STATE_EXPORT) &&
3298 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3299 rsmseglock_release(seg);
3300 DBG_PRINTF((category, RSM_ERR,
3301 "rsm_unpublish done: bad state %x\n", seg->s_state));
3302 return (RSMERR_SEG_NOT_PUBLISHED);
3303 }
3304
3305 rsmseglock_release(seg);
3306
3307 rsmexport_rm(seg);
3308
3309 rsm_send_importer_disconnects(seg->s_segid, my_nodeid);
3310
3311 rsmseglock_acquire(seg);
3312 /*
3313 * wait for republish to complete
3314 */
3315 while ((seg->s_state == RSM_STATE_EXPORT) &&
3316 (seg->s_flags & RSM_REPUBLISH_WAIT)) {
3317 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
3318 DBG_PRINTF((category, RSM_ERR,
3319 "rsm_unpublish done: cv_wait INTR repubing"));
3320 rsmseglock_release(seg);
3321 return (RSMERR_INTERRUPTED);
3322 }
3323 }
3324
3325 if ((seg->s_state != RSM_STATE_EXPORT) &&
3326 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) {
3327 DBG_PRINTF((category, RSM_ERR,
3328 "rsm_unpublish done: invalid state"));
3329 rsmseglock_release(seg);
3330 return (RSMERR_SEG_NOT_PUBLISHED);
3331 }
3332
3333 /*
3334 * check for putv/get surrogate segment which was not published
3335 * to the driver.
3336 *
3337 * Be certain to see if there is an ACL first! If this segment was
3338 * not published with an ACL, acl will be a null pointer. Check
3339 * that before dereferencing it.
3340 */
3341 acl = seg->s_acl;
3342 if (acl != (rsmapi_access_entry_t *)NULL) {
3343 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0)
3344 goto bypass;
3345 }
3346
3347 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */
3348 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED)
3349 goto bypass;
3350
3351 adapter = seg->s_adapter;
3352 for (;;) {
3353 if (seg->s_state != RSM_STATE_EXPORT) {
3354 rsmseglock_release(seg);
3355 DBG_PRINTF((category, RSM_ERR,
3356 "rsm_unpublish done: bad state %x\n",
3357 seg->s_state));
3358 return (RSMERR_SEG_NOT_PUBLISHED);
3359 }
3360
3361 /* unpublish from adapter */
3362 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out);
3363
3364 if (e == RSM_SUCCESS) {
3365 break;
3366 }
3367
3368 if (e == RSMERR_SEG_IN_USE && mode == 1) {
3369 /*
3370 * wait for unpublish to succeed, it's busy.
3371 */
3372 seg->s_flags |= RSM_EXPORT_WAIT;
3373
3374 /* wait for a max of 1 ms - this is an empirical */
3375 /* value that was found by some minimal testing */
3376 /* can be fine tuned when we have better numbers */
3377 /* A long term fix would be to send cv_signal */
3378 /* from the intr callback routine */
3379 /* currently nobody signals this wait */
3380 (void) cv_reltimedwait(&seg->s_cv, &seg->s_lock,
3381 drv_usectohz(1000), TR_CLOCK_TICK);
3382
3383 DBG_PRINTF((category, RSM_ERR,
3384 "rsm_unpublish: SEG_IN_USE\n"));
3385
3386 seg->s_flags &= ~RSM_EXPORT_WAIT;
3387 } else {
3388 if (mode == 1) {
3389 DBG_PRINTF((category, RSM_ERR,
3390 "rsm:rsmpi unpublish err %x\n", e));
3391 seg->s_state = RSM_STATE_BIND;
3392 }
3393 rsmseglock_release(seg);
3394 return (e);
3395 }
3396 }
3397
3398 /* Free segment */
3399 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out);
3400
3401 if (e != RSM_SUCCESS) {
3402 DBG_PRINTF((category, RSM_ERR,
3403 "rsm_unpublish: rsmpi destroy key=%x failed %x\n",
3404 seg->s_key, e));
3405 }
3406
3407 bypass:
3408 acl = seg->s_acl;
3409 rsmpi_acl = seg->s_acl_in;
3410 acl_len = seg->s_acl_len;
3411
3412 seg->s_acl = NULL;
3413 seg->s_acl_in = NULL;
3414 seg->s_acl_len = 0;
3415
3416 if (seg->s_state == RSM_STATE_EXPORT) {
3417 seg->s_state = RSM_STATE_BIND;
3418 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) {
3419 seg->s_state = RSM_STATE_BIND_QUIESCED;
3420 cv_broadcast(&seg->s_cv);
3421 }
3422
3423 rsmseglock_release(seg);
3424
3425 rsmacl_free(acl, acl_len);
3426 rsmpiacl_free(rsmpi_acl, acl_len);
3427
3428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n"));
3429
3430 return (DDI_SUCCESS);
3431 }
3432
3433 /*
3434 * Called from rsm_unpublish to force an unload and disconnection of all
3435 * importers of the unpublished segment.
3436 *
3437 * First build the list of segments requiring a force disconnect, then
3438 * send a request for each.
3439 */
3440 static void
rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,rsm_node_id_t ex_nodeid)3441 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid,
3442 rsm_node_id_t ex_nodeid)
3443 {
3444 rsmipc_request_t request;
3445 importing_token_t *prev_token, *token, *tmp_token, *tokp;
3446 importing_token_t *force_disconnect_list = NULL;
3447 int index;
3448
3449 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3450 "rsm_send_importer_disconnects enter\n"));
3451
3452 index = rsmhash(ex_segid);
3453
3454 mutex_enter(&importer_list.lock);
3455
3456 prev_token = NULL;
3457 token = importer_list.bucket[index];
3458
3459 while (token != NULL) {
3460 if (token->key == ex_segid) {
3461 /*
3462 * take it off the importer list and add it
3463 * to the force disconnect list.
3464 */
3465 if (prev_token == NULL)
3466 importer_list.bucket[index] = token->next;
3467 else
3468 prev_token->next = token->next;
3469 tmp_token = token;
3470 token = token->next;
3471 if (force_disconnect_list == NULL) {
3472 force_disconnect_list = tmp_token;
3473 tmp_token->next = NULL;
3474 } else {
3475 tokp = force_disconnect_list;
3476 /*
3477 * make sure that the tmp_token's node
3478 * is not already on the force disconnect
3479 * list.
3480 */
3481 while (tokp != NULL) {
3482 if (tokp->importing_node ==
3483 tmp_token->importing_node) {
3484 break;
3485 }
3486 tokp = tokp->next;
3487 }
3488 if (tokp == NULL) {
3489 tmp_token->next =
3490 force_disconnect_list;
3491 force_disconnect_list = tmp_token;
3492 } else {
3493 kmem_free((void *)tmp_token,
3494 sizeof (*token));
3495 }
3496 }
3497
3498 } else {
3499 prev_token = token;
3500 token = token->next;
3501 }
3502 }
3503 mutex_exit(&importer_list.lock);
3504
3505 token = force_disconnect_list;
3506 while (token != NULL) {
3507 if (token->importing_node == my_nodeid) {
3508 rsm_force_unload(ex_nodeid, ex_segid,
3509 DISCONNECT);
3510 } else {
3511 request.rsmipc_hdr.rsmipc_type =
3512 RSMIPC_MSG_DISCONNECT;
3513 request.rsmipc_key = token->key;
3514 for (;;) {
3515 if (rsmipc_send(token->importing_node,
3516 &request,
3517 RSM_NO_REPLY) == RSM_SUCCESS) {
3518 break;
3519 } else {
3520 delay(drv_usectohz(10000));
3521 }
3522 }
3523 }
3524 tmp_token = token;
3525 token = token->next;
3526 kmem_free((void *)tmp_token, sizeof (*token));
3527 }
3528
3529 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3530 "rsm_send_importer_disconnects done\n"));
3531 }
3532
3533 /*
3534 * This function is used as a callback for unlocking the pages locked
3535 * down by a process which then does a fork or an exec.
3536 * It marks the export segments corresponding to umem cookie given by
3537 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be
3538 * destroyed later when an rsm_close occurs).
3539 */
3540 static void
rsm_export_force_destroy(ddi_umem_cookie_t * ck)3541 rsm_export_force_destroy(ddi_umem_cookie_t *ck)
3542 {
3543 rsmresource_blk_t *blk;
3544 rsmresource_t *p;
3545 rsmseg_t *eseg = NULL;
3546 int i, j;
3547 int found = 0;
3548
3549 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3550 "rsm_export_force_destroy enter\n"));
3551
3552 /*
3553 * Walk the resource list and locate the export segment (either
3554 * in the BIND or the EXPORT state) which corresponds to the
3555 * ddi_umem_cookie_t being freed up, and call rsmseg_close.
3556 * Change the state to ZOMBIE by calling rsmseg_close with the
3557 * force_flag argument (the second argument) set to 1. Also,
3558 * unpublish and unbind the segment, but don't free it. Free it
3559 * only on a rsm_close call for the segment.
3560 */
3561 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
3562
3563 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
3564 blk = rsm_resource.rsmrc_root[i];
3565 if (blk == NULL) {
3566 continue;
3567 }
3568
3569 for (j = 0; j < RSMRC_BLKSZ; j++) {
3570 p = blk->rsmrcblk_blks[j];
3571 if ((p != NULL) && (p != RSMRC_RESERVED) &&
3572 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) {
3573 eseg = (rsmseg_t *)p;
3574 if (eseg->s_cookie != ck)
3575 continue; /* continue searching */
3576 /*
3577 * Found the segment, set flag to indicate
3578 * force destroy processing is in progress
3579 */
3580 rsmseglock_acquire(eseg);
3581 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT;
3582 rsmseglock_release(eseg);
3583 found = 1;
3584 break;
3585 }
3586 }
3587
3588 if (found)
3589 break;
3590 }
3591
3592 rw_exit(&rsm_resource.rsmrc_lock);
3593
3594 if (found) {
3595 ASSERT(eseg != NULL);
3596 /* call rsmseg_close with force flag set to 1 */
3597 rsmseg_close(eseg, 1);
3598 /*
3599 * force destroy processing done, clear flag and signal any
3600 * thread waiting in rsmseg_close.
3601 */
3602 rsmseglock_acquire(eseg);
3603 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT;
3604 cv_broadcast(&eseg->s_cv);
3605 rsmseglock_release(eseg);
3606 }
3607
3608 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
3609 "rsm_export_force_destroy done\n"));
3610 }
3611
3612 /* ******************************* Remote Calls *********************** */
3613 static void
rsm_intr_segconnect(rsm_node_id_t src,rsmipc_request_t * req)3614 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req)
3615 {
3616 rsmipc_reply_t reply;
3617 DBG_DEFINE(category,
3618 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3619
3620 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3621 "rsm_intr_segconnect enter\n"));
3622
3623 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply);
3624
3625 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
3626 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie;
3627
3628 (void) rsmipc_send(src, NULL, &reply);
3629
3630 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3631 "rsm_intr_segconnect done\n"));
3632 }
3633
3634
3635 /*
3636 * When an exported segment is unpublished the exporter sends an ipc
3637 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher
3638 * calls this function. The import list is scanned; segments which match the
3639 * exported segment id are unloaded and disconnected.
3640 *
3641 * Will also be called from rsm_rebind with disconnect_flag FALSE.
3642 *
3643 */
3644 static void
rsm_force_unload(rsm_node_id_t src_nodeid,rsm_memseg_id_t ex_segid,boolean_t disconnect_flag)3645 rsm_force_unload(rsm_node_id_t src_nodeid,
3646 rsm_memseg_id_t ex_segid,
3647 boolean_t disconnect_flag)
3648
3649 {
3650 rsmresource_t *p = NULL;
3651 rsmhash_table_t *rhash = &rsm_import_segs;
3652 uint_t index;
3653 DBG_DEFINE(category,
3654 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3655
3656 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n"));
3657
3658 index = rsmhash(ex_segid);
3659
3660 rw_enter(&rhash->rsmhash_rw, RW_READER);
3661
3662 p = rsmhash_getbkt(rhash, index);
3663
3664 for (; p; p = p->rsmrc_next) {
3665 rsmseg_t *seg = (rsmseg_t *)p;
3666 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) {
3667 /*
3668 * In order to make rsmseg_unload and rsm_force_unload
3669 * thread safe, acquire the segment lock here.
3670 * rsmseg_unload is responsible for releasing the lock.
3671 * rsmseg_unload releases the lock just before a call
3672 * to rsmipc_send or in case of an early exit which
3673 * occurs if the segment was in the state
3674 * RSM_STATE_CONNECTING or RSM_STATE_NEW.
3675 */
3676 rsmseglock_acquire(seg);
3677 if (disconnect_flag)
3678 seg->s_flags |= RSM_FORCE_DISCONNECT;
3679 rsmseg_unload(seg);
3680 }
3681 }
3682 rw_exit(&rhash->rsmhash_rw);
3683
3684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n"));
3685 }
3686
3687 static void
rsm_intr_reply(rsmipc_msghdr_t * msg)3688 rsm_intr_reply(rsmipc_msghdr_t *msg)
3689 {
3690 /*
3691 * Find slot for cookie in reply.
3692 * Match sequence with sequence in cookie
3693 * If no match; return
3694 * Try to grap lock of slot, if locked return
3695 * copy data into reply slot area
3696 * signal waiter
3697 */
3698 rsmipc_slot_t *slot;
3699 rsmipc_cookie_t *cookie;
3700 void *data = (void *) msg;
3701 size_t size = sizeof (rsmipc_reply_t);
3702 DBG_DEFINE(category,
3703 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3704
3705 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n"));
3706
3707 cookie = &msg->rsmipc_cookie;
3708 if (cookie->ic.index >= RSMIPC_SZ) {
3709 DBG_PRINTF((category, RSM_ERR,
3710 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index));
3711 return;
3712 }
3713
3714 ASSERT(cookie->ic.index < RSMIPC_SZ);
3715 slot = &rsm_ipc.slots[cookie->ic.index];
3716 mutex_enter(&slot->rsmipc_lock);
3717 if (slot->rsmipc_cookie.value == cookie->value) {
3718 /* found a match */
3719 if (RSMIPC_GET(slot, RSMIPC_PENDING)) {
3720 bcopy(data, slot->rsmipc_data, size);
3721 RSMIPC_CLEAR(slot, RSMIPC_PENDING);
3722 cv_signal(&slot->rsmipc_cv);
3723 }
3724 } else {
3725 DBG_PRINTF((category, RSM_DEBUG,
3726 "rsm: rsm_intr_reply mismatched reply %d\n",
3727 cookie->ic.index));
3728 }
3729 mutex_exit(&slot->rsmipc_lock);
3730 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n"));
3731 }
3732
3733 /*
3734 * This function gets dispatched on the worker thread when we receive
3735 * the SQREADY message. This function sends the SQREADY_ACK message.
3736 */
3737 static void
rsm_sqready_ack_deferred(void * arg)3738 rsm_sqready_ack_deferred(void *arg)
3739 {
3740 path_t *path = (path_t *)arg;
3741 DBG_DEFINE(category,
3742 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3743
3744 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3745 "rsm_sqready_ack_deferred enter\n"));
3746
3747 mutex_enter(&path->mutex);
3748
3749 /*
3750 * If path is not active no point in sending the ACK
3751 * because the whole SQREADY protocol will again start
3752 * when the path becomes active.
3753 */
3754 if (path->state != RSMKA_PATH_ACTIVE) {
3755 /*
3756 * decrement the path refcnt incremented in rsm_proc_sqready
3757 */
3758 PATH_RELE_NOLOCK(path);
3759 mutex_exit(&path->mutex);
3760 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3761 "rsm_sqready_ack_deferred done:!ACTIVE\n"));
3762 return;
3763 }
3764
3765 /* send an SQREADY_ACK message */
3766 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK);
3767
3768 /* initialize credits to the max level */
3769 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3770
3771 /* wake up any send that is waiting for credits */
3772 cv_broadcast(&path->sendq_token.sendq_cv);
3773
3774 /*
3775 * decrement the path refcnt since we incremented it in
3776 * rsm_proc_sqready
3777 */
3778 PATH_RELE_NOLOCK(path);
3779
3780 mutex_exit(&path->mutex);
3781
3782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3783 "rsm_sqready_ack_deferred done\n"));
3784 }
3785
3786 /*
3787 * Process the SQREADY message
3788 */
3789 static void
rsm_proc_sqready(rsmipc_controlmsg_t * msg,rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)3790 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3791 rsm_intr_hand_arg_t arg)
3792 {
3793 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3794 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3795 path_t *path;
3796 DBG_DEFINE(category,
3797 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3798
3799 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n"));
3800
3801 /* look up the path - incr the path refcnt */
3802 path = rsm_find_path(hdlr_argp->adapter_name,
3803 hdlr_argp->adapter_instance, src_hwaddr);
3804
3805 /*
3806 * No path exists or path is not active - drop the message
3807 */
3808 if (path == NULL) {
3809 DBG_PRINTF((category, RSM_DEBUG,
3810 "rsm_proc_sqready done: msg dropped no path\n"));
3811 return;
3812 }
3813
3814 mutex_exit(&path->mutex);
3815
3816 /* drain any tasks from the previous incarnation */
3817 taskq_wait(path->recv_taskq);
3818
3819 mutex_enter(&path->mutex);
3820 /*
3821 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK
3822 * in the meanwhile we received an SQREADY message, blindly reset
3823 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK
3824 * and forget about the SQREADY that we sent.
3825 */
3826 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3827
3828 if (path->state != RSMKA_PATH_ACTIVE) {
3829 /* decr refcnt and drop the mutex */
3830 PATH_RELE_NOLOCK(path);
3831 mutex_exit(&path->mutex);
3832 DBG_PRINTF((category, RSM_DEBUG,
3833 "rsm_proc_sqready done: msg dropped path !ACTIVE\n"));
3834 return;
3835 }
3836
3837 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx "
3838 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3839
3840 /*
3841 * The sender's local incarnation number is our remote incarnation
3842 * number save it in the path data structure
3843 */
3844 path->remote_incn = msg->rsmipc_local_incn;
3845 path->sendq_token.msgbuf_avail = 0;
3846 path->procmsg_cnt = 0;
3847
3848 /*
3849 * path is active - dispatch task to send SQREADY_ACK - remember
3850 * RSMPI calls can't be done in interrupt context
3851 *
3852 * We can use the recv_taskq to send because the remote endpoint
3853 * cannot start sending messages till it receives SQREADY_ACK hence
3854 * at this point there are no tasks on recv_taskq.
3855 *
3856 * The path refcnt will be decremented in rsm_sqready_ack_deferred.
3857 */
3858 (void) taskq_dispatch(path->recv_taskq,
3859 rsm_sqready_ack_deferred, path, KM_NOSLEEP);
3860
3861 mutex_exit(&path->mutex);
3862
3863
3864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n"));
3865 }
3866
3867 /*
3868 * Process the SQREADY_ACK message
3869 */
3870 static void
rsm_proc_sqready_ack(rsmipc_controlmsg_t * msg,rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)3871 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3872 rsm_intr_hand_arg_t arg)
3873 {
3874 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3875 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3876 path_t *path;
3877 DBG_DEFINE(category,
3878 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
3879
3880 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3881 "rsm_proc_sqready_ack enter\n"));
3882
3883 /* look up the path - incr the path refcnt */
3884 path = rsm_find_path(hdlr_argp->adapter_name,
3885 hdlr_argp->adapter_instance, src_hwaddr);
3886
3887 /*
3888 * drop the message if - no path exists or path is not active
3889 * or if its not waiting for SQREADY_ACK message
3890 */
3891 if (path == NULL) {
3892 DBG_PRINTF((category, RSM_DEBUG,
3893 "rsm_proc_sqready_ack done: msg dropped no path\n"));
3894 return;
3895 }
3896
3897 if ((path->state != RSMKA_PATH_ACTIVE) ||
3898 !(path->flags & RSMKA_WAIT_FOR_SQACK)) {
3899 /* decrement the refcnt */
3900 PATH_RELE_NOLOCK(path);
3901 mutex_exit(&path->mutex);
3902 DBG_PRINTF((category, RSM_DEBUG,
3903 "rsm_proc_sqready_ack done: msg dropped\n"));
3904 return;
3905 }
3906
3907 /*
3908 * Check if this message is in response to the last RSMIPC_MSG_SQREADY
3909 * sent, if not drop it.
3910 */
3911 if (path->local_incn != msghdr->rsmipc_incn) {
3912 /* decrement the refcnt */
3913 PATH_RELE_NOLOCK(path);
3914 mutex_exit(&path->mutex);
3915 DBG_PRINTF((category, RSM_DEBUG,
3916 "rsm_proc_sqready_ack done: msg old incn %lld\n",
3917 msghdr->rsmipc_incn));
3918 return;
3919 }
3920
3921 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx "
3922 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr));
3923
3924 /*
3925 * clear the WAIT_FOR_SQACK flag since we have recvd the ack
3926 */
3927 path->flags &= ~RSMKA_WAIT_FOR_SQACK;
3928
3929 /* save the remote sendq incn number */
3930 path->remote_incn = msg->rsmipc_local_incn;
3931
3932 /* initialize credits to the max level */
3933 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES;
3934
3935 /* wake up any send that is waiting for credits */
3936 cv_broadcast(&path->sendq_token.sendq_cv);
3937
3938 /* decrement the refcnt */
3939 PATH_RELE_NOLOCK(path);
3940
3941 mutex_exit(&path->mutex);
3942
3943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
3944 "rsm_proc_sqready_ack done\n"));
3945 }
3946
3947 /*
3948 * process the RSMIPC_MSG_CREDIT message
3949 */
3950 static void
rsm_add_credits(rsmipc_controlmsg_t * msg,rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)3951 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr,
3952 rsm_intr_hand_arg_t arg)
3953 {
3954 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg;
3955 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
3956 path_t *path;
3957 DBG_DEFINE(category,
3958 RSM_KERNEL_AGENT | RSM_FUNC_ALL |
3959 RSM_INTR_CALLBACK | RSM_FLOWCONTROL);
3960
3961 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n"));
3962
3963 /* look up the path - incr the path refcnt */
3964 path = rsm_find_path(hdlr_argp->adapter_name,
3965 hdlr_argp->adapter_instance, src_hwaddr);
3966
3967 if (path == NULL) {
3968 DBG_PRINTF((category, RSM_DEBUG,
3969 "rsm_add_credits enter: path not found\n"));
3970 return;
3971 }
3972
3973 /* the path is not active - discard credits */
3974 if (path->state != RSMKA_PATH_ACTIVE) {
3975 PATH_RELE_NOLOCK(path);
3976 mutex_exit(&path->mutex);
3977 DBG_PRINTF((category, RSM_DEBUG,
3978 "rsm_add_credits enter:path=%lx !ACTIVE\n", path));
3979 return;
3980 }
3981
3982 /*
3983 * Check if these credits are for current incarnation of the path.
3984 */
3985 if (path->local_incn != msghdr->rsmipc_incn) {
3986 /* decrement the refcnt */
3987 PATH_RELE_NOLOCK(path);
3988 mutex_exit(&path->mutex);
3989 DBG_PRINTF((category, RSM_DEBUG,
3990 "rsm_add_credits enter: old incn %lld\n",
3991 msghdr->rsmipc_incn));
3992 return;
3993 }
3994
3995 DBG_PRINTF((category, RSM_DEBUG,
3996 "rsm_add_credits:path=%lx new-creds=%d "
3997 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits,
3998 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src,
3999 src_hwaddr));
4000
4001
4002 /* add credits to the path's sendq */
4003 path->sendq_token.msgbuf_avail += msg->rsmipc_credits;
4004
4005 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES);
4006
4007 /* wake up any send that is waiting for credits */
4008 cv_broadcast(&path->sendq_token.sendq_cv);
4009
4010 /* decrement the refcnt */
4011 PATH_RELE_NOLOCK(path);
4012
4013 mutex_exit(&path->mutex);
4014
4015 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n"));
4016 }
4017
4018 static void
rsm_intr_event(rsmipc_request_t * msg)4019 rsm_intr_event(rsmipc_request_t *msg)
4020 {
4021 rsmseg_t *seg;
4022 rsmresource_t *p;
4023 rsm_node_id_t src_node;
4024 DBG_DEFINE(category,
4025 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4026
4027 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n"));
4028
4029 src_node = msg->rsmipc_hdr.rsmipc_src;
4030
4031 if ((seg = msg->rsmipc_segment_cookie) != NULL) {
4032 /* This is for an import segment */
4033 uint_t hashval = rsmhash(msg->rsmipc_key);
4034
4035 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4036
4037 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4038
4039 for (; p; p = p->rsmrc_next) {
4040 if ((p->rsmrc_key == msg->rsmipc_key) &&
4041 (p->rsmrc_node == src_node)) {
4042 seg = (rsmseg_t *)p;
4043 rsmseglock_acquire(seg);
4044
4045 atomic_add_32(&seg->s_pollevent, 1);
4046
4047 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4048 pollwakeup(&seg->s_poll, POLLRDNORM);
4049
4050 rsmseglock_release(seg);
4051 }
4052 }
4053
4054 rw_exit(&rsm_import_segs.rsmhash_rw);
4055 } else {
4056 /* This is for an export segment */
4057 seg = rsmexport_lookup(msg->rsmipc_key);
4058 if (!seg) {
4059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4060 "rsm_intr_event done: exp seg not found\n"));
4061 return;
4062 }
4063
4064 ASSERT(rsmseglock_held(seg));
4065
4066 atomic_add_32(&seg->s_pollevent, 1);
4067
4068 /*
4069 * We must hold the segment lock here, or else the segment
4070 * can be freed while pollwakeup is using it. This implies
4071 * that we MUST NOT grab the segment lock during rsm_chpoll,
4072 * as outlined in the chpoll(2) man page.
4073 */
4074 if (seg->s_pollflag & RSM_SEGMENT_POLL)
4075 pollwakeup(&seg->s_poll, POLLRDNORM);
4076
4077 rsmseglock_release(seg);
4078 }
4079
4080 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n"));
4081 }
4082
4083 /*
4084 * The exporter did a republish and changed the ACL - this change is only
4085 * visible to new importers.
4086 */
4087 static void
importer_update(rsm_node_id_t src_node,rsm_memseg_id_t key,rsm_permission_t perm)4088 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key,
4089 rsm_permission_t perm)
4090 {
4091
4092 rsmresource_t *p;
4093 rsmseg_t *seg;
4094 uint_t hashval = rsmhash(key);
4095 DBG_DEFINE(category,
4096 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4097
4098 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n"));
4099
4100 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER);
4101
4102 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval);
4103
4104 for (; p; p = p->rsmrc_next) {
4105 /*
4106 * find the importer and update the permission in the shared
4107 * data structure. Any new importers will use the new perms
4108 */
4109 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) {
4110 seg = (rsmseg_t *)p;
4111
4112 rsmseglock_acquire(seg);
4113 rsmsharelock_acquire(seg);
4114 seg->s_share->rsmsi_mode = perm;
4115 rsmsharelock_release(seg);
4116 rsmseglock_release(seg);
4117
4118 break;
4119 }
4120 }
4121
4122 rw_exit(&rsm_import_segs.rsmhash_rw);
4123
4124 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n"));
4125 }
4126
4127 void
rsm_suspend_complete(rsm_node_id_t src_node,int flag)4128 rsm_suspend_complete(rsm_node_id_t src_node, int flag)
4129 {
4130 int done = 1; /* indicate all SUSPENDS have been acked */
4131 list_element_t *elem;
4132 DBG_DEFINE(category,
4133 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4134
4135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4136 "rsm_suspend_complete enter\n"));
4137
4138 mutex_enter(&rsm_suspend_list.list_lock);
4139
4140 if (rsm_suspend_list.list_head == NULL) {
4141 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4142 "rsm_suspend_complete done: suspend_list is empty\n"));
4143 mutex_exit(&rsm_suspend_list.list_lock);
4144 return;
4145 }
4146
4147 elem = rsm_suspend_list.list_head;
4148 while (elem != NULL) {
4149 if (elem->nodeid == src_node) {
4150 /* clear the pending flag for the node */
4151 elem->flags &= ~RSM_SUSPEND_ACKPENDING;
4152 elem->flags |= flag;
4153 }
4154
4155 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING))
4156 done = 0; /* still some nodes have not yet ACKED */
4157
4158 elem = elem->next;
4159 }
4160
4161 mutex_exit(&rsm_suspend_list.list_lock);
4162
4163 if (!done) {
4164 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4165 "rsm_suspend_complete done: acks pending\n"));
4166 return;
4167 }
4168 /*
4169 * Now that we are done with suspending all the remote importers
4170 * time to quiesce the local exporters
4171 */
4172 exporter_quiesce();
4173
4174 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4175 "rsm_suspend_complete done\n"));
4176 }
4177
4178 static void
exporter_quiesce()4179 exporter_quiesce()
4180 {
4181 int i, e;
4182 rsmresource_t *current;
4183 rsmseg_t *seg;
4184 adapter_t *adapter;
4185 DBG_DEFINE(category,
4186 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4187
4188 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n"));
4189 /*
4190 * The importers send a SUSPEND_COMPLETE to the exporter node
4191 * Unpublish, unbind the export segment and
4192 * move the segments to the EXPORT_QUIESCED state
4193 */
4194
4195 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER);
4196
4197 for (i = 0; i < rsm_hash_size; i++) {
4198 current = rsm_export_segs.bucket[i];
4199 while (current != NULL) {
4200 seg = (rsmseg_t *)current;
4201 rsmseglock_acquire(seg);
4202 if (current->rsmrc_state ==
4203 RSM_STATE_EXPORT_QUIESCING) {
4204 adapter = seg->s_adapter;
4205 /*
4206 * some local memory handles are not published
4207 * check if it was published
4208 */
4209 if ((seg->s_acl == NULL) ||
4210 (seg->s_acl[0].ae_node != my_nodeid) ||
4211 (seg->s_acl[0].ae_permission != 0)) {
4212
4213 e = adapter->rsmpi_ops->rsm_unpublish(
4214 seg->s_handle.out);
4215 DBG_PRINTF((category, RSM_DEBUG,
4216 "exporter_quiesce:unpub %d\n", e));
4217
4218 e = adapter->rsmpi_ops->rsm_seg_destroy(
4219 seg->s_handle.out);
4220
4221 DBG_PRINTF((category, RSM_DEBUG,
4222 "exporter_quiesce:destroy %d\n",
4223 e));
4224 }
4225
4226 (void) rsm_unbind_pages(seg);
4227 seg->s_state = RSM_STATE_EXPORT_QUIESCED;
4228 cv_broadcast(&seg->s_cv);
4229 }
4230 rsmseglock_release(seg);
4231 current = current->rsmrc_next;
4232 }
4233 }
4234 rw_exit(&rsm_export_segs.rsmhash_rw);
4235
4236 /*
4237 * All the local segments we are done with the pre-del processing
4238 * - time to move to PREDEL_COMPLETED.
4239 */
4240
4241 mutex_enter(&rsm_drv_data.drv_lock);
4242
4243 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED);
4244
4245 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED;
4246
4247 cv_broadcast(&rsm_drv_data.drv_cv);
4248
4249 mutex_exit(&rsm_drv_data.drv_lock);
4250
4251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n"));
4252 }
4253
4254 static void
importer_suspend(rsm_node_id_t src_node)4255 importer_suspend(rsm_node_id_t src_node)
4256 {
4257 int i;
4258 int susp_flg; /* true means already suspended */
4259 int num_importers;
4260 rsmresource_t *p = NULL, *curp;
4261 rsmhash_table_t *rhash = &rsm_import_segs;
4262 rsmseg_t *seg;
4263 rsmipc_request_t request;
4264 DBG_DEFINE(category,
4265 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4266
4267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n"));
4268
4269 rw_enter(&rhash->rsmhash_rw, RW_READER);
4270 for (i = 0; i < rsm_hash_size; i++) {
4271 p = rhash->bucket[i];
4272
4273 /*
4274 * Suspend all importers with same <node, key> pair.
4275 * After the last one of the shared importers has been
4276 * suspended - suspend the shared mappings/connection.
4277 */
4278 for (; p; p = p->rsmrc_next) {
4279 rsmseg_t *first = (rsmseg_t *)p;
4280 if ((first->s_node != src_node) ||
4281 (first->s_state == RSM_STATE_DISCONNECT))
4282 continue; /* go to next entry */
4283 /*
4284 * search the rest of the bucket for
4285 * other siblings (imprtrs with the same key)
4286 * of "first" and suspend them.
4287 * All importers with same key fall in
4288 * the same bucket.
4289 */
4290 num_importers = 0;
4291 for (curp = p; curp; curp = curp->rsmrc_next) {
4292 seg = (rsmseg_t *)curp;
4293
4294 rsmseglock_acquire(seg);
4295
4296 if ((seg->s_node != first->s_node) ||
4297 (seg->s_key != first->s_key) ||
4298 (seg->s_state == RSM_STATE_DISCONNECT)) {
4299 /*
4300 * either not a peer segment or its a
4301 * disconnected segment - skip it
4302 */
4303 rsmseglock_release(seg);
4304 continue;
4305 }
4306
4307 rsmseg_suspend(seg, &susp_flg);
4308
4309 if (susp_flg) { /* seg already suspended */
4310 rsmseglock_release(seg);
4311 break; /* the inner for loop */
4312 }
4313
4314 num_importers++;
4315 rsmsharelock_acquire(seg);
4316 /*
4317 * we've processed all importers that are
4318 * siblings of "first"
4319 */
4320 if (num_importers ==
4321 seg->s_share->rsmsi_refcnt) {
4322 rsmsharelock_release(seg);
4323 rsmseglock_release(seg);
4324 break;
4325 }
4326 rsmsharelock_release(seg);
4327 rsmseglock_release(seg);
4328 }
4329
4330 /*
4331 * All the importers with the same key and
4332 * nodeid as "first" have been suspended.
4333 * Now suspend the shared connect/mapping.
4334 * This is done only once.
4335 */
4336 if (!susp_flg) {
4337 rsmsegshare_suspend(seg);
4338 }
4339 }
4340 }
4341
4342 rw_exit(&rhash->rsmhash_rw);
4343
4344 /* send an ACK for SUSPEND message */
4345 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE;
4346 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY);
4347
4348
4349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n"));
4350
4351 }
4352
4353 static void
rsmseg_suspend(rsmseg_t * seg,int * susp_flg)4354 rsmseg_suspend(rsmseg_t *seg, int *susp_flg)
4355 {
4356 int recheck_state;
4357 rsmcookie_t *hdl;
4358 DBG_DEFINE(category,
4359 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4360
4361 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4362 "rsmseg_suspend enter: key=%u\n", seg->s_key));
4363
4364 *susp_flg = 0;
4365
4366 ASSERT(rsmseglock_held(seg));
4367 /* wait if putv/getv is in progress */
4368 while (seg->s_rdmacnt > 0)
4369 cv_wait(&seg->s_cv, &seg->s_lock);
4370
4371 do {
4372 recheck_state = 0;
4373
4374 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4375 "rsmseg_suspend:segment %x state=%d\n",
4376 seg->s_key, seg->s_state));
4377
4378 switch (seg->s_state) {
4379 case RSM_STATE_NEW:
4380 /* not a valid state */
4381 break;
4382 case RSM_STATE_CONNECTING:
4383 seg->s_state = RSM_STATE_ABORT_CONNECT;
4384 break;
4385 case RSM_STATE_ABORT_CONNECT:
4386 break;
4387 case RSM_STATE_CONNECT:
4388 seg->s_handle.in = NULL;
4389 seg->s_state = RSM_STATE_CONN_QUIESCE;
4390 break;
4391 case RSM_STATE_MAPPING:
4392 /* wait until segment leaves the mapping state */
4393 while (seg->s_state == RSM_STATE_MAPPING)
4394 cv_wait(&seg->s_cv, &seg->s_lock);
4395 recheck_state = 1;
4396 break;
4397 case RSM_STATE_ACTIVE:
4398 /* unload the mappings */
4399 if (seg->s_ckl != NULL) {
4400 hdl = seg->s_ckl;
4401 for (; hdl != NULL; hdl = hdl->c_next) {
4402 (void) devmap_unload(hdl->c_dhp,
4403 hdl->c_off, hdl->c_len);
4404 }
4405 }
4406 seg->s_mapinfo = NULL;
4407 seg->s_state = RSM_STATE_MAP_QUIESCE;
4408 break;
4409 case RSM_STATE_CONN_QUIESCE:
4410 /* FALLTHRU */
4411 case RSM_STATE_MAP_QUIESCE:
4412 /* rsmseg_suspend already done for seg */
4413 *susp_flg = 1;
4414 break;
4415 case RSM_STATE_DISCONNECT:
4416 break;
4417 default:
4418 ASSERT(0); /* invalid state */
4419 }
4420 } while (recheck_state);
4421
4422 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n"));
4423 }
4424
4425 static void
rsmsegshare_suspend(rsmseg_t * seg)4426 rsmsegshare_suspend(rsmseg_t *seg)
4427 {
4428 int e;
4429 adapter_t *adapter;
4430 rsm_import_share_t *sharedp;
4431 DBG_DEFINE(category,
4432 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4433
4434 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4435 "rsmsegshare_suspend enter\n"));
4436
4437 rsmseglock_acquire(seg);
4438 rsmsharelock_acquire(seg);
4439
4440 sharedp = seg->s_share;
4441 adapter = seg->s_adapter;
4442 switch (sharedp->rsmsi_state) {
4443 case RSMSI_STATE_NEW:
4444 break;
4445 case RSMSI_STATE_CONNECTING:
4446 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
4447 break;
4448 case RSMSI_STATE_ABORT_CONNECT:
4449 break;
4450 case RSMSI_STATE_CONNECTED:
4451 /* do the rsmpi disconnect */
4452 if (sharedp->rsmsi_node != my_nodeid) {
4453 e = adapter->rsmpi_ops->
4454 rsm_disconnect(sharedp->rsmsi_handle);
4455
4456 DBG_PRINTF((category, RSM_DEBUG,
4457 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4458 sharedp->rsmsi_segid, e));
4459 }
4460
4461 sharedp->rsmsi_handle = NULL;
4462
4463 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
4464 break;
4465 case RSMSI_STATE_CONN_QUIESCE:
4466 break;
4467 case RSMSI_STATE_MAPPED:
4468 /* do the rsmpi unmap and disconnect */
4469 if (sharedp->rsmsi_node != my_nodeid) {
4470 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in);
4471
4472 DBG_PRINTF((category, RSM_DEBUG,
4473 "rsmshare_suspend: rsmpi unmap %d\n", e));
4474
4475 e = adapter->rsmpi_ops->
4476 rsm_disconnect(sharedp->rsmsi_handle);
4477 DBG_PRINTF((category, RSM_DEBUG,
4478 "rsm:rsmpi disconnect seg=%x:err=%d\n",
4479 sharedp->rsmsi_segid, e));
4480 }
4481
4482 sharedp->rsmsi_handle = NULL;
4483
4484 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE;
4485 break;
4486 case RSMSI_STATE_MAP_QUIESCE:
4487 break;
4488 case RSMSI_STATE_DISCONNECTED:
4489 break;
4490 default:
4491 ASSERT(0); /* invalid state */
4492 }
4493
4494 rsmsharelock_release(seg);
4495 rsmseglock_release(seg);
4496
4497 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4498 "rsmsegshare_suspend done\n"));
4499 }
4500
4501 /*
4502 * This should get called on receiving a RESUME message or from
4503 * the pathmanger if the node undergoing DR dies.
4504 */
4505 static void
importer_resume(rsm_node_id_t src_node)4506 importer_resume(rsm_node_id_t src_node)
4507 {
4508 int i;
4509 rsmresource_t *p = NULL;
4510 rsmhash_table_t *rhash = &rsm_import_segs;
4511 void *cookie;
4512 DBG_DEFINE(category,
4513 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4514
4515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n"));
4516
4517 rw_enter(&rhash->rsmhash_rw, RW_READER);
4518
4519 for (i = 0; i < rsm_hash_size; i++) {
4520 p = rhash->bucket[i];
4521
4522 for (; p; p = p->rsmrc_next) {
4523 rsmseg_t *seg = (rsmseg_t *)p;
4524
4525 rsmseglock_acquire(seg);
4526
4527 /* process only importers of node undergoing DR */
4528 if (seg->s_node != src_node) {
4529 rsmseglock_release(seg);
4530 continue;
4531 }
4532
4533 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) {
4534 rsmipc_request_t request;
4535 /*
4536 * rsmpi map/connect failed
4537 * inform the exporter so that it can
4538 * remove the importer.
4539 */
4540 request.rsmipc_hdr.rsmipc_type =
4541 RSMIPC_MSG_NOTIMPORTING;
4542 request.rsmipc_key = seg->s_segid;
4543 request.rsmipc_segment_cookie = cookie;
4544 rsmseglock_release(seg);
4545 (void) rsmipc_send(seg->s_node, &request,
4546 RSM_NO_REPLY);
4547 } else {
4548 rsmseglock_release(seg);
4549 }
4550 }
4551 }
4552
4553 rw_exit(&rhash->rsmhash_rw);
4554
4555 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n"));
4556 }
4557
4558 static int
rsmseg_resume(rsmseg_t * seg,void ** cookie)4559 rsmseg_resume(rsmseg_t *seg, void **cookie)
4560 {
4561 int e;
4562 int retc;
4563 off_t dev_offset;
4564 size_t maplen;
4565 uint_t maxprot;
4566 rsm_mapinfo_t *p;
4567 rsmcookie_t *hdl;
4568 rsm_import_share_t *sharedp;
4569 DBG_DEFINE(category,
4570 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4571
4572 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4573 "rsmseg_resume enter: key=%u\n", seg->s_key));
4574
4575 *cookie = NULL;
4576
4577 ASSERT(rsmseglock_held(seg));
4578
4579 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) &&
4580 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
4581 return (RSM_SUCCESS);
4582 }
4583
4584 sharedp = seg->s_share;
4585
4586 rsmsharelock_acquire(seg);
4587
4588 /* resume the shared connection and/or mapping */
4589 retc = rsmsegshare_resume(seg);
4590
4591 if (seg->s_state == RSM_STATE_CONN_QUIESCE) {
4592 /* shared state can either be connected or mapped */
4593 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) ||
4594 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) {
4595 ASSERT(retc == RSM_SUCCESS);
4596 seg->s_handle.in = sharedp->rsmsi_handle;
4597 rsmsharelock_release(seg);
4598 seg->s_state = RSM_STATE_CONNECT;
4599
4600 } else { /* error in rsmpi connect during resume */
4601 seg->s_handle.in = NULL;
4602 seg->s_state = RSM_STATE_DISCONNECT;
4603
4604 sharedp->rsmsi_refcnt--;
4605 cookie = (void *)sharedp->rsmsi_cookie;
4606
4607 if (sharedp->rsmsi_refcnt == 0) {
4608 ASSERT(sharedp->rsmsi_mapcnt == 0);
4609 rsmsharelock_release(seg);
4610
4611 /* clean up the shared data structure */
4612 mutex_destroy(&sharedp->rsmsi_lock);
4613 cv_destroy(&sharedp->rsmsi_cv);
4614 kmem_free((void *)(sharedp),
4615 sizeof (rsm_import_share_t));
4616
4617 } else {
4618 rsmsharelock_release(seg);
4619 }
4620 /*
4621 * The following needs to be done after any
4622 * rsmsharelock calls which use seg->s_share.
4623 */
4624 seg->s_share = NULL;
4625 }
4626
4627 /* signal any waiting segment */
4628 cv_broadcast(&seg->s_cv);
4629
4630 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4631 "rsmseg_resume done:state=%d\n", seg->s_state));
4632 return (retc);
4633 }
4634
4635 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE);
4636
4637 /* Setup protections for remap */
4638 maxprot = PROT_USER;
4639 if (seg->s_mode & RSM_PERM_READ) {
4640 maxprot |= PROT_READ;
4641 }
4642 if (seg->s_mode & RSM_PERM_WRITE) {
4643 maxprot |= PROT_WRITE;
4644 }
4645
4646 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) {
4647 /* error in rsmpi connect or map during resume */
4648
4649 /* remap to trash page */
4650 ASSERT(seg->s_ckl != NULL);
4651
4652 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4653 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
4654 remap_cookie, hdl->c_off, hdl->c_len,
4655 maxprot, 0, NULL);
4656
4657 DBG_PRINTF((category, RSM_ERR,
4658 "rsmseg_resume:remap=%d\n", e));
4659 }
4660
4661 seg->s_handle.in = NULL;
4662 seg->s_state = RSM_STATE_DISCONNECT;
4663
4664 sharedp->rsmsi_refcnt--;
4665
4666 sharedp->rsmsi_mapcnt--;
4667 seg->s_mapinfo = NULL;
4668
4669 if (sharedp->rsmsi_refcnt == 0) {
4670 ASSERT(sharedp->rsmsi_mapcnt == 0);
4671 rsmsharelock_release(seg);
4672
4673 /* clean up the shared data structure */
4674 mutex_destroy(&sharedp->rsmsi_lock);
4675 cv_destroy(&sharedp->rsmsi_cv);
4676 kmem_free((void *)(sharedp),
4677 sizeof (rsm_import_share_t));
4678
4679 } else {
4680 rsmsharelock_release(seg);
4681 }
4682 /*
4683 * The following needs to be done after any
4684 * rsmsharelock calls which use seg->s_share.
4685 */
4686 seg->s_share = NULL;
4687
4688 /* signal any waiting segment */
4689 cv_broadcast(&seg->s_cv);
4690
4691 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4692 "rsmseg_resume done:seg=%x,err=%d\n",
4693 seg->s_key, retc));
4694 return (retc);
4695
4696 }
4697
4698 seg->s_handle.in = sharedp->rsmsi_handle;
4699
4700 if (seg->s_node == my_nodeid) { /* loopback */
4701 ASSERT(seg->s_mapinfo == NULL);
4702
4703 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4704 e = devmap_umem_remap(hdl->c_dhp,
4705 rsm_dip, seg->s_cookie,
4706 hdl->c_off, hdl->c_len,
4707 maxprot, 0, NULL);
4708
4709 DBG_PRINTF((category, RSM_ERR,
4710 "rsmseg_resume:remap=%d\n", e));
4711 }
4712 } else { /* remote exporter */
4713 /* remap to the new rsmpi maps */
4714 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
4715
4716 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) {
4717 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len,
4718 &dev_offset, &maplen);
4719 e = devmap_devmem_remap(hdl->c_dhp,
4720 p->dip, p->dev_register, dev_offset,
4721 maplen, maxprot, 0, NULL);
4722
4723 DBG_PRINTF((category, RSM_ERR,
4724 "rsmseg_resume:remap=%d\n", e));
4725 }
4726 }
4727
4728 rsmsharelock_release(seg);
4729
4730 seg->s_state = RSM_STATE_ACTIVE;
4731 cv_broadcast(&seg->s_cv);
4732
4733 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n"));
4734
4735 return (retc);
4736 }
4737
4738 static int
rsmsegshare_resume(rsmseg_t * seg)4739 rsmsegshare_resume(rsmseg_t *seg)
4740 {
4741 int e = RSM_SUCCESS;
4742 adapter_t *adapter;
4743 rsm_import_share_t *sharedp;
4744 DBG_DEFINE(category,
4745 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4746
4747 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n"));
4748
4749 ASSERT(rsmseglock_held(seg));
4750 ASSERT(rsmsharelock_held(seg));
4751
4752 sharedp = seg->s_share;
4753
4754 /*
4755 * If we are not in a xxxx_QUIESCE state that means shared
4756 * connect/mapping processing has been already been done
4757 * so return success.
4758 */
4759 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) &&
4760 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) {
4761 return (RSM_SUCCESS);
4762 }
4763
4764 adapter = seg->s_adapter;
4765
4766 if (sharedp->rsmsi_node != my_nodeid) {
4767 rsm_addr_t hwaddr;
4768 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node);
4769
4770 e = adapter->rsmpi_ops->rsm_connect(
4771 adapter->rsmpi_handle, hwaddr,
4772 sharedp->rsmsi_segid, &sharedp->rsmsi_handle);
4773
4774 DBG_PRINTF((category, RSM_DEBUG,
4775 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n",
4776 sharedp->rsmsi_segid, e));
4777
4778 if (e != RSM_SUCCESS) {
4779 /* when do we send the NOT_IMPORTING message */
4780 sharedp->rsmsi_handle = NULL;
4781 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4782 /* signal any waiting segment */
4783 cv_broadcast(&sharedp->rsmsi_cv);
4784 return (e);
4785 }
4786 }
4787
4788 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) {
4789 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
4790 /* signal any waiting segment */
4791 cv_broadcast(&sharedp->rsmsi_cv);
4792 return (e);
4793 }
4794
4795 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
4796
4797 /* do the rsmpi map of the whole segment here */
4798 if (sharedp->rsmsi_node != my_nodeid) {
4799 size_t mapped_len;
4800 rsm_mapinfo_t *p;
4801
4802 /*
4803 * We need to do rsmpi maps with <off, lens> identical to
4804 * the old mapinfo list because the segment mapping handles
4805 * dhp and such need the fragmentation of rsmpi maps to be
4806 * identical to what it was during the mmap of the segment
4807 */
4808 p = sharedp->rsmsi_mapinfo;
4809
4810 while (p != NULL) {
4811 mapped_len = 0;
4812
4813 e = adapter->rsmpi_ops->rsm_map(
4814 sharedp->rsmsi_handle, p->start_offset,
4815 p->individual_len, &mapped_len,
4816 &p->dip, &p->dev_register, &p->dev_offset,
4817 NULL, NULL);
4818
4819 if (e != 0) {
4820 DBG_PRINTF((category, RSM_ERR,
4821 "rsmsegshare_resume: rsmpi map err=%d\n",
4822 e));
4823 break;
4824 }
4825
4826 if (mapped_len != p->individual_len) {
4827 DBG_PRINTF((category, RSM_ERR,
4828 "rsmsegshare_resume: rsmpi maplen"
4829 "< reqlen=%lx\n", mapped_len));
4830 e = RSMERR_BAD_LENGTH;
4831 break;
4832 }
4833
4834 p = p->next;
4835
4836 }
4837
4838
4839 if (e != RSM_SUCCESS) { /* rsmpi map failed */
4840 int err;
4841 /* Check if this is the first rsm_map */
4842 if (p != sharedp->rsmsi_mapinfo) {
4843 /*
4844 * A single rsm_unmap undoes multiple rsm_maps.
4845 */
4846 (void) seg->s_adapter->rsmpi_ops->
4847 rsm_unmap(sharedp->rsmsi_handle);
4848 }
4849
4850 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
4851 sharedp->rsmsi_mapinfo = NULL;
4852
4853 err = adapter->rsmpi_ops->
4854 rsm_disconnect(sharedp->rsmsi_handle);
4855
4856 DBG_PRINTF((category, RSM_DEBUG,
4857 "rsmsegshare_resume:disconn seg=%x:err=%d\n",
4858 sharedp->rsmsi_segid, err));
4859
4860 sharedp->rsmsi_handle = NULL;
4861 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
4862
4863 /* signal the waiting segments */
4864 cv_broadcast(&sharedp->rsmsi_cv);
4865 DBG_PRINTF((category, RSM_DEBUG,
4866 "rsmsegshare_resume done: rsmpi map err\n"));
4867 return (e);
4868 }
4869 }
4870
4871 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
4872
4873 /* signal any waiting segment */
4874 cv_broadcast(&sharedp->rsmsi_cv);
4875
4876 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n"));
4877
4878 return (e);
4879 }
4880
4881 /*
4882 * this is the routine that gets called by recv_taskq which is the
4883 * thread that processes messages that are flow-controlled.
4884 */
4885 static void
rsm_intr_proc_deferred(void * arg)4886 rsm_intr_proc_deferred(void *arg)
4887 {
4888 path_t *path = (path_t *)arg;
4889 rsmipc_request_t *msg;
4890 rsmipc_msghdr_t *msghdr;
4891 rsm_node_id_t src_node;
4892 msgbuf_elem_t *head;
4893 int e;
4894 DBG_DEFINE(category,
4895 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4896
4897 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4898 "rsm_intr_proc_deferred enter\n"));
4899
4900 mutex_enter(&path->mutex);
4901
4902 /* use the head of the msgbuf_queue */
4903 head = rsmka_gethead_msgbuf(path);
4904
4905 mutex_exit(&path->mutex);
4906
4907 msg = (rsmipc_request_t *)&(head->msg);
4908 msghdr = (rsmipc_msghdr_t *)msg;
4909
4910 src_node = msghdr->rsmipc_src;
4911
4912 /*
4913 * messages that need to send a reply should check the message version
4914 * before processing the message. And all messages that need to
4915 * send a reply should be processed here by the worker thread.
4916 */
4917 switch (msghdr->rsmipc_type) {
4918 case RSMIPC_MSG_SEGCONNECT:
4919 if (msghdr->rsmipc_version != RSM_VERSION) {
4920 rsmipc_reply_t reply;
4921 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION;
4922 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY;
4923 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie;
4924 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply);
4925 } else {
4926 rsm_intr_segconnect(src_node, msg);
4927 }
4928 break;
4929 case RSMIPC_MSG_DISCONNECT:
4930 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT);
4931 break;
4932 case RSMIPC_MSG_SUSPEND:
4933 importer_suspend(src_node);
4934 break;
4935 case RSMIPC_MSG_SUSPEND_DONE:
4936 rsm_suspend_complete(src_node, 0);
4937 break;
4938 case RSMIPC_MSG_RESUME:
4939 importer_resume(src_node);
4940 break;
4941 default:
4942 ASSERT(0);
4943 }
4944
4945 mutex_enter(&path->mutex);
4946
4947 rsmka_dequeue_msgbuf(path);
4948
4949 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */
4950 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES)
4951 path->procmsg_cnt++;
4952
4953 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES);
4954
4955 /* No need to send credits if path is going down */
4956 if ((path->state == RSMKA_PATH_ACTIVE) &&
4957 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) {
4958 /*
4959 * send credits and reset procmsg_cnt if success otherwise
4960 * credits will be sent after processing the next message
4961 */
4962 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT);
4963 if (e == 0)
4964 path->procmsg_cnt = 0;
4965 else
4966 DBG_PRINTF((category, RSM_ERR,
4967 "rsm_intr_proc_deferred:send credits err=%d\n", e));
4968 }
4969
4970 /*
4971 * decrement the path refcnt since we incremented it in
4972 * rsm_intr_callback_dispatch
4973 */
4974 PATH_RELE_NOLOCK(path);
4975
4976 mutex_exit(&path->mutex);
4977
4978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4979 "rsm_intr_proc_deferred done\n"));
4980 }
4981
4982 /*
4983 * Flow-controlled messages are enqueued and dispatched onto a taskq here
4984 */
4985 static void
rsm_intr_callback_dispatch(void * data,rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)4986 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr,
4987 rsm_intr_hand_arg_t arg)
4988 {
4989 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
4990 path_t *path;
4991 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
4992 DBG_DEFINE(category,
4993 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
4994
4995 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
4996 "rsm_intr_callback_dispatch enter\n"));
4997 ASSERT(data && hdlr_argp);
4998
4999 /* look up the path - incr the path refcnt */
5000 path = rsm_find_path(hdlr_argp->adapter_name,
5001 hdlr_argp->adapter_instance, src_hwaddr);
5002
5003 /* the path has been removed - drop this message */
5004 if (path == NULL) {
5005 DBG_PRINTF((category, RSM_DEBUG,
5006 "rsm_intr_callback_dispatch done: msg dropped\n"));
5007 return;
5008 }
5009 /* the path is not active - don't accept new messages */
5010 if (path->state != RSMKA_PATH_ACTIVE) {
5011 PATH_RELE_NOLOCK(path);
5012 mutex_exit(&path->mutex);
5013 DBG_PRINTF((category, RSM_DEBUG,
5014 "rsm_intr_callback_dispatch done: msg dropped"
5015 " path=%lx !ACTIVE\n", path));
5016 return;
5017 }
5018
5019 /*
5020 * Check if this message was sent to an older incarnation
5021 * of the path/sendq.
5022 */
5023 if (path->local_incn != msghdr->rsmipc_incn) {
5024 /* decrement the refcnt */
5025 PATH_RELE_NOLOCK(path);
5026 mutex_exit(&path->mutex);
5027 DBG_PRINTF((category, RSM_DEBUG,
5028 "rsm_intr_callback_dispatch done: old incn %lld\n",
5029 msghdr->rsmipc_incn));
5030 return;
5031 }
5032
5033 /* copy and enqueue msg on the path's msgbuf queue */
5034 rsmka_enqueue_msgbuf(path, data);
5035
5036 /*
5037 * schedule task to process messages - ignore retval from
5038 * task_dispatch because we sender cannot send more than
5039 * what receiver can handle.
5040 */
5041 (void) taskq_dispatch(path->recv_taskq,
5042 rsm_intr_proc_deferred, path, KM_NOSLEEP);
5043
5044 mutex_exit(&path->mutex);
5045
5046 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5047 "rsm_intr_callback_dispatch done\n"));
5048 }
5049
5050 /*
5051 * This procedure is called from rsm_srv_func when a remote node creates a
5052 * a send queue. This event is used as a hint that an earlier failed
5053 * attempt to create a send queue to that remote node may now succeed and
5054 * should be retried. Indication of an earlier failed attempt is provided
5055 * by the RSMKA_SQCREATE_PENDING flag.
5056 */
5057 static void
rsm_sqcreateop_callback(rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)5058 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5059 {
5060 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg;
5061 path_t *path;
5062 DBG_DEFINE(category,
5063 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5064
5065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5066 "rsm_sqcreateop_callback enter\n"));
5067
5068 /* look up the path - incr the path refcnt */
5069 path = rsm_find_path(hdlr_argp->adapter_name,
5070 hdlr_argp->adapter_instance, src_hwaddr);
5071
5072 if (path == NULL) {
5073 DBG_PRINTF((category, RSM_DEBUG,
5074 "rsm_sqcreateop_callback done: no path\n"));
5075 return;
5076 }
5077
5078 if ((path->state == RSMKA_PATH_UP) &&
5079 (path->flags & RSMKA_SQCREATE_PENDING)) {
5080 /*
5081 * previous attempt to create sendq had failed, retry
5082 * it and move to RSMKA_PATH_ACTIVE state if successful.
5083 * the refcnt will be decremented in the do_deferred_work
5084 */
5085 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP);
5086 } else {
5087 /* decrement the refcnt */
5088 PATH_RELE_NOLOCK(path);
5089 }
5090 mutex_exit(&path->mutex);
5091
5092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5093 "rsm_sqcreateop_callback done\n"));
5094 }
5095
5096 static void
rsm_intr_callback(void * data,rsm_addr_t src_hwaddr,rsm_intr_hand_arg_t arg)5097 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg)
5098 {
5099 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data;
5100 rsmipc_request_t *msg = (rsmipc_request_t *)data;
5101 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data;
5102 rsm_node_id_t src_node;
5103 DBG_DEFINE(category,
5104 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5105
5106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:"
5107 "src=%d, type=%d\n", msghdr->rsmipc_src,
5108 msghdr->rsmipc_type));
5109
5110 /*
5111 * Check for the version number in the msg header. If it is not
5112 * RSM_VERSION, drop the message. In the future, we need to manage
5113 * incompatible version numbers in some way
5114 */
5115 if (msghdr->rsmipc_version != RSM_VERSION) {
5116 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n"));
5117 /*
5118 * Drop requests that don't have a reply right here
5119 * Request with reply will send a BAD_VERSION reply
5120 * when they get processed by the worker thread.
5121 */
5122 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) {
5123 return;
5124 }
5125
5126 }
5127
5128 src_node = msghdr->rsmipc_src;
5129
5130 switch (msghdr->rsmipc_type) {
5131 case RSMIPC_MSG_SEGCONNECT:
5132 case RSMIPC_MSG_DISCONNECT:
5133 case RSMIPC_MSG_SUSPEND:
5134 case RSMIPC_MSG_SUSPEND_DONE:
5135 case RSMIPC_MSG_RESUME:
5136 /*
5137 * These message types are handled by a worker thread using
5138 * the flow-control algorithm.
5139 * Any message processing that does one or more of the
5140 * following should be handled in a worker thread.
5141 * - allocates resources and might sleep
5142 * - makes RSMPI calls down to the interconnect driver
5143 * this by defn include requests with reply.
5144 * - takes a long duration of time
5145 */
5146 rsm_intr_callback_dispatch(data, src_hwaddr, arg);
5147 break;
5148 case RSMIPC_MSG_NOTIMPORTING:
5149 importer_list_rm(src_node, msg->rsmipc_key,
5150 msg->rsmipc_segment_cookie);
5151 break;
5152 case RSMIPC_MSG_SQREADY:
5153 rsm_proc_sqready(data, src_hwaddr, arg);
5154 break;
5155 case RSMIPC_MSG_SQREADY_ACK:
5156 rsm_proc_sqready_ack(data, src_hwaddr, arg);
5157 break;
5158 case RSMIPC_MSG_CREDIT:
5159 rsm_add_credits(ctrlmsg, src_hwaddr, arg);
5160 break;
5161 case RSMIPC_MSG_REPLY:
5162 rsm_intr_reply(msghdr);
5163 break;
5164 case RSMIPC_MSG_BELL:
5165 rsm_intr_event(msg);
5166 break;
5167 case RSMIPC_MSG_IMPORTING:
5168 importer_list_add(src_node, msg->rsmipc_key,
5169 msg->rsmipc_adapter_hwaddr,
5170 msg->rsmipc_segment_cookie);
5171 break;
5172 case RSMIPC_MSG_REPUBLISH:
5173 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm);
5174 break;
5175 default:
5176 DBG_PRINTF((category, RSM_DEBUG,
5177 "rsm_intr_callback: bad msg %lx type %d data %lx\n",
5178 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data));
5179 }
5180
5181 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n"));
5182
5183 }
5184
rsm_srv_func(rsm_controller_object_t * chd,rsm_intr_q_op_t opcode,rsm_addr_t src,void * data,size_t size,rsm_intr_hand_arg_t arg)5185 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd,
5186 rsm_intr_q_op_t opcode, rsm_addr_t src,
5187 void *data, size_t size, rsm_intr_hand_arg_t arg)
5188 {
5189 DBG_DEFINE(category,
5190 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5191
5192 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n"));
5193
5194 switch (opcode) {
5195 case RSM_INTR_Q_OP_CREATE:
5196 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n"));
5197 rsm_sqcreateop_callback(src, arg);
5198 break;
5199 case RSM_INTR_Q_OP_DESTROY:
5200 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n"));
5201 break;
5202 case RSM_INTR_Q_OP_RECEIVE:
5203 rsm_intr_callback(data, src, arg);
5204 break;
5205 default:
5206 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5207 "rsm_srv_func: unknown opcode = %x\n", opcode));
5208 }
5209
5210 chd = chd;
5211 size = size;
5212
5213 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n"));
5214
5215 return (RSM_INTR_HAND_CLAIMED);
5216 }
5217
5218 /* *************************** IPC slots ************************* */
5219 static rsmipc_slot_t *
rsmipc_alloc()5220 rsmipc_alloc()
5221 {
5222 int i;
5223 rsmipc_slot_t *slot;
5224 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5225
5226 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n"));
5227
5228 /* try to find a free slot, if not wait */
5229 mutex_enter(&rsm_ipc.lock);
5230
5231 while (rsm_ipc.count == 0) {
5232 rsm_ipc.wanted = 1;
5233 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock);
5234 }
5235
5236 /* An empty slot is available, find it */
5237 slot = &rsm_ipc.slots[0];
5238 for (i = 0; i < RSMIPC_SZ; i++, slot++) {
5239 if (RSMIPC_GET(slot, RSMIPC_FREE)) {
5240 RSMIPC_CLEAR(slot, RSMIPC_FREE);
5241 break;
5242 }
5243 }
5244
5245 ASSERT(i < RSMIPC_SZ);
5246 rsm_ipc.count--; /* one less is available */
5247 rsm_ipc.sequence++; /* new sequence */
5248
5249 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence;
5250 slot->rsmipc_cookie.ic.index = (uint_t)i;
5251
5252 mutex_exit(&rsm_ipc.lock);
5253
5254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n"));
5255
5256 return (slot);
5257 }
5258
5259 static void
rsmipc_free(rsmipc_slot_t * slot)5260 rsmipc_free(rsmipc_slot_t *slot)
5261 {
5262 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
5263
5264 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n"));
5265
5266 ASSERT(MUTEX_HELD(&slot->rsmipc_lock));
5267 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot);
5268
5269 mutex_enter(&rsm_ipc.lock);
5270
5271 RSMIPC_SET(slot, RSMIPC_FREE);
5272
5273 slot->rsmipc_cookie.ic.sequence = 0;
5274
5275 mutex_exit(&slot->rsmipc_lock);
5276 rsm_ipc.count++;
5277 ASSERT(rsm_ipc.count <= RSMIPC_SZ);
5278 if (rsm_ipc.wanted) {
5279 rsm_ipc.wanted = 0;
5280 cv_broadcast(&rsm_ipc.cv);
5281 }
5282
5283 mutex_exit(&rsm_ipc.lock);
5284
5285 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n"));
5286 }
5287
5288 static int
rsmipc_send(rsm_node_id_t dest,rsmipc_request_t * req,rsmipc_reply_t * reply)5289 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply)
5290 {
5291 int e = 0;
5292 int credit_check = 0;
5293 int retry_cnt = 0;
5294 int min_retry_cnt = 10;
5295 rsm_send_t is;
5296 rsmipc_slot_t *rslot;
5297 adapter_t *adapter;
5298 path_t *path;
5299 sendq_token_t *sendq_token;
5300 sendq_token_t *used_sendq_token = NULL;
5301 rsm_send_q_handle_t ipc_handle;
5302 DBG_DEFINE(category,
5303 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5304
5305 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d",
5306 dest));
5307
5308 /*
5309 * Check if this is a local case
5310 */
5311 if (dest == my_nodeid) {
5312 switch (req->rsmipc_hdr.rsmipc_type) {
5313 case RSMIPC_MSG_SEGCONNECT:
5314 reply->rsmipc_status = (short)rsmsegacl_validate(
5315 req, dest, reply);
5316 break;
5317 case RSMIPC_MSG_BELL:
5318 req->rsmipc_hdr.rsmipc_src = dest;
5319 rsm_intr_event(req);
5320 break;
5321 case RSMIPC_MSG_IMPORTING:
5322 importer_list_add(dest, req->rsmipc_key,
5323 req->rsmipc_adapter_hwaddr,
5324 req->rsmipc_segment_cookie);
5325 break;
5326 case RSMIPC_MSG_NOTIMPORTING:
5327 importer_list_rm(dest, req->rsmipc_key,
5328 req->rsmipc_segment_cookie);
5329 break;
5330 case RSMIPC_MSG_REPUBLISH:
5331 importer_update(dest, req->rsmipc_key,
5332 req->rsmipc_perm);
5333 break;
5334 case RSMIPC_MSG_SUSPEND:
5335 importer_suspend(dest);
5336 break;
5337 case RSMIPC_MSG_SUSPEND_DONE:
5338 rsm_suspend_complete(dest, 0);
5339 break;
5340 case RSMIPC_MSG_RESUME:
5341 importer_resume(dest);
5342 break;
5343 default:
5344 ASSERT(0);
5345 }
5346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5347 "rsmipc_send done\n"));
5348 return (0);
5349 }
5350
5351 if (dest >= MAX_NODES) {
5352 DBG_PRINTF((category, RSM_ERR,
5353 "rsm: rsmipc_send bad node number %x\n", dest));
5354 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5355 }
5356
5357 /*
5358 * Oh boy! we are going remote.
5359 */
5360
5361 /*
5362 * identify if we need to have credits to send this message
5363 * - only selected requests are flow controlled
5364 */
5365 if (req != NULL) {
5366 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5367 "rsmipc_send:request type=%d\n",
5368 req->rsmipc_hdr.rsmipc_type));
5369
5370 switch (req->rsmipc_hdr.rsmipc_type) {
5371 case RSMIPC_MSG_SEGCONNECT:
5372 case RSMIPC_MSG_DISCONNECT:
5373 case RSMIPC_MSG_IMPORTING:
5374 case RSMIPC_MSG_SUSPEND:
5375 case RSMIPC_MSG_SUSPEND_DONE:
5376 case RSMIPC_MSG_RESUME:
5377 credit_check = 1;
5378 break;
5379 default:
5380 credit_check = 0;
5381 }
5382 }
5383
5384 again:
5385 if (retry_cnt++ == min_retry_cnt) {
5386 /* backoff before further retries for 10ms */
5387 delay(drv_usectohz(10000));
5388 retry_cnt = 0; /* reset retry_cnt */
5389 }
5390 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token);
5391 if (sendq_token == NULL) {
5392 DBG_PRINTF((category, RSM_ERR,
5393 "rsm: rsmipc_send no device to reach node %d\n", dest));
5394 return (RSMERR_REMOTE_NODE_UNREACHABLE);
5395 }
5396
5397 if ((sendq_token == used_sendq_token) &&
5398 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) ||
5399 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) {
5400 rele_sendq_token(sendq_token);
5401 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e));
5402 return (RSMERR_CONN_ABORTED);
5403 } else
5404 used_sendq_token = sendq_token;
5405
5406 /* lint -save -e413 */
5407 path = SQ_TOKEN_TO_PATH(sendq_token);
5408 adapter = path->local_adapter;
5409 /* lint -restore */
5410 ipc_handle = sendq_token->rsmpi_sendq_handle;
5411
5412 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5413 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle));
5414
5415 if (reply == NULL) {
5416 /* Send request without ack */
5417 /*
5418 * Set the rsmipc_version number in the msghdr for KA
5419 * communication versioning
5420 */
5421 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5422 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5423 /*
5424 * remote endpoints incn should match the value in our
5425 * path's remote_incn field. No need to grab any lock
5426 * since we have refcnted the path in rsmka_get_sendq_token
5427 */
5428 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5429
5430 is.is_data = (void *)req;
5431 is.is_size = sizeof (*req);
5432 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5433 is.is_wait = 0;
5434
5435 if (credit_check) {
5436 mutex_enter(&path->mutex);
5437 /*
5438 * wait till we recv credits or path goes down. If path
5439 * goes down rsm_send will fail and we handle the error
5440 * then
5441 */
5442 while ((sendq_token->msgbuf_avail == 0) &&
5443 (path->state == RSMKA_PATH_ACTIVE)) {
5444 e = cv_wait_sig(&sendq_token->sendq_cv,
5445 &path->mutex);
5446 if (e == 0) {
5447 mutex_exit(&path->mutex);
5448 no_reply_cnt++;
5449 rele_sendq_token(sendq_token);
5450 DBG_PRINTF((category, RSM_DEBUG,
5451 "rsmipc_send done: "
5452 "cv_wait INTERRUPTED"));
5453 return (RSMERR_INTERRUPTED);
5454 }
5455 }
5456
5457 /*
5458 * path is not active retry on another path.
5459 */
5460 if (path->state != RSMKA_PATH_ACTIVE) {
5461 mutex_exit(&path->mutex);
5462 rele_sendq_token(sendq_token);
5463 e = RSMERR_CONN_ABORTED;
5464 DBG_PRINTF((category, RSM_ERR,
5465 "rsm: rsmipc_send: path !ACTIVE"));
5466 goto again;
5467 }
5468
5469 ASSERT(sendq_token->msgbuf_avail > 0);
5470
5471 /*
5472 * reserve a msgbuf
5473 */
5474 sendq_token->msgbuf_avail--;
5475
5476 mutex_exit(&path->mutex);
5477
5478 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5479 NULL);
5480
5481 if (e != RSM_SUCCESS) {
5482 mutex_enter(&path->mutex);
5483 /*
5484 * release the reserved msgbuf since
5485 * the send failed
5486 */
5487 sendq_token->msgbuf_avail++;
5488 cv_broadcast(&sendq_token->sendq_cv);
5489 mutex_exit(&path->mutex);
5490 }
5491 } else
5492 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5493 NULL);
5494
5495 no_reply_cnt++;
5496 rele_sendq_token(sendq_token);
5497 if (e != RSM_SUCCESS) {
5498 DBG_PRINTF((category, RSM_ERR,
5499 "rsm: rsmipc_send no reply send"
5500 " err = %d no reply count = %d\n",
5501 e, no_reply_cnt));
5502 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5503 e != RSMERR_BAD_BARRIER_HNDL);
5504 atomic_add_64(&rsm_ipcsend_errcnt, 1);
5505 goto again;
5506 } else {
5507 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5508 "rsmipc_send done\n"));
5509 return (e);
5510 }
5511
5512 }
5513
5514 if (req == NULL) {
5515 /* Send reply - No flow control is done for reply */
5516 /*
5517 * Set the version in the msg header for KA communication
5518 * versioning
5519 */
5520 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5521 reply->rsmipc_hdr.rsmipc_src = my_nodeid;
5522 /* incn number is not used for reply msgs currently */
5523 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5524
5525 is.is_data = (void *)reply;
5526 is.is_size = sizeof (*reply);
5527 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5528 is.is_wait = 0;
5529 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5530 rele_sendq_token(sendq_token);
5531 if (e != RSM_SUCCESS) {
5532 DBG_PRINTF((category, RSM_ERR,
5533 "rsm: rsmipc_send reply send"
5534 " err = %d\n", e));
5535 atomic_add_64(&rsm_ipcsend_errcnt, 1);
5536 goto again;
5537 } else {
5538 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5539 "rsmipc_send done\n"));
5540 return (e);
5541 }
5542 }
5543
5544 /* Reply needed */
5545 rslot = rsmipc_alloc(); /* allocate a new ipc slot */
5546
5547 mutex_enter(&rslot->rsmipc_lock);
5548
5549 rslot->rsmipc_data = (void *)reply;
5550 RSMIPC_SET(rslot, RSMIPC_PENDING);
5551
5552 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) {
5553 /*
5554 * Set the rsmipc_version number in the msghdr for KA
5555 * communication versioning
5556 */
5557 req->rsmipc_hdr.rsmipc_version = RSM_VERSION;
5558 req->rsmipc_hdr.rsmipc_src = my_nodeid;
5559 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie;
5560 /*
5561 * remote endpoints incn should match the value in our
5562 * path's remote_incn field. No need to grab any lock
5563 * since we have refcnted the path in rsmka_get_sendq_token
5564 */
5565 req->rsmipc_hdr.rsmipc_incn = path->remote_incn;
5566
5567 is.is_data = (void *)req;
5568 is.is_size = sizeof (*req);
5569 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5570 is.is_wait = 0;
5571 if (credit_check) {
5572
5573 mutex_enter(&path->mutex);
5574 /*
5575 * wait till we recv credits or path goes down. If path
5576 * goes down rsm_send will fail and we handle the error
5577 * then.
5578 */
5579 while ((sendq_token->msgbuf_avail == 0) &&
5580 (path->state == RSMKA_PATH_ACTIVE)) {
5581 e = cv_wait_sig(&sendq_token->sendq_cv,
5582 &path->mutex);
5583 if (e == 0) {
5584 mutex_exit(&path->mutex);
5585 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5586 rsmipc_free(rslot);
5587 rele_sendq_token(sendq_token);
5588 DBG_PRINTF((category, RSM_DEBUG,
5589 "rsmipc_send done: "
5590 "cv_wait INTERRUPTED"));
5591 return (RSMERR_INTERRUPTED);
5592 }
5593 }
5594
5595 /*
5596 * path is not active retry on another path.
5597 */
5598 if (path->state != RSMKA_PATH_ACTIVE) {
5599 mutex_exit(&path->mutex);
5600 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5601 rsmipc_free(rslot);
5602 rele_sendq_token(sendq_token);
5603 e = RSMERR_CONN_ABORTED;
5604 DBG_PRINTF((category, RSM_ERR,
5605 "rsm: rsmipc_send: path !ACTIVE"));
5606 goto again;
5607 }
5608
5609 ASSERT(sendq_token->msgbuf_avail > 0);
5610
5611 /*
5612 * reserve a msgbuf
5613 */
5614 sendq_token->msgbuf_avail--;
5615
5616 mutex_exit(&path->mutex);
5617
5618 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5619 NULL);
5620
5621 if (e != RSM_SUCCESS) {
5622 mutex_enter(&path->mutex);
5623 /*
5624 * release the reserved msgbuf since
5625 * the send failed
5626 */
5627 sendq_token->msgbuf_avail++;
5628 cv_broadcast(&sendq_token->sendq_cv);
5629 mutex_exit(&path->mutex);
5630 }
5631 } else
5632 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is,
5633 NULL);
5634
5635 if (e != RSM_SUCCESS) {
5636 DBG_PRINTF((category, RSM_ERR,
5637 "rsm: rsmipc_send rsmpi send err = %d\n", e));
5638 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5639 rsmipc_free(rslot);
5640 rele_sendq_token(sendq_token);
5641 atomic_add_64(&rsm_ipcsend_errcnt, 1);
5642 goto again;
5643 }
5644
5645 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */
5646 e = cv_reltimedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock,
5647 drv_usectohz(5000000), TR_CLOCK_TICK);
5648 if (e < 0) {
5649 /* timed out - retry */
5650 e = RSMERR_TIMEOUT;
5651 } else if (e == 0) {
5652 /* signalled - return error */
5653 e = RSMERR_INTERRUPTED;
5654 break;
5655 } else {
5656 e = RSM_SUCCESS;
5657 }
5658 }
5659
5660 RSMIPC_CLEAR(rslot, RSMIPC_PENDING);
5661 rsmipc_free(rslot);
5662 rele_sendq_token(sendq_token);
5663
5664 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e));
5665 return (e);
5666 }
5667
5668 static int
rsm_send_notimporting(rsm_node_id_t dest,rsm_memseg_id_t segid,void * cookie)5669 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie)
5670 {
5671 rsmipc_request_t request;
5672
5673 /*
5674 * inform the exporter to delete this importer
5675 */
5676 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
5677 request.rsmipc_key = segid;
5678 request.rsmipc_segment_cookie = cookie;
5679 return (rsmipc_send(dest, &request, RSM_NO_REPLY));
5680 }
5681
5682 static void
rsm_send_republish(rsm_memseg_id_t segid,rsmapi_access_entry_t * acl,int acl_len,rsm_permission_t default_permission)5683 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl,
5684 int acl_len, rsm_permission_t default_permission)
5685 {
5686 int i;
5687 importing_token_t *token;
5688 rsmipc_request_t request;
5689 republish_token_t *republish_list = NULL;
5690 republish_token_t *rp;
5691 rsm_permission_t permission;
5692 int index;
5693
5694 /*
5695 * send the new access mode to all the nodes that have imported
5696 * this segment.
5697 * If the new acl does not have a node that was present in
5698 * the old acl a access permission of 0 is sent.
5699 */
5700
5701 index = rsmhash(segid);
5702
5703 /*
5704 * create a list of node/permissions to send the republish message
5705 */
5706 mutex_enter(&importer_list.lock);
5707
5708 token = importer_list.bucket[index];
5709 while (token != NULL) {
5710 if (segid == token->key) {
5711 permission = default_permission;
5712
5713 for (i = 0; i < acl_len; i++) {
5714 if (token->importing_node == acl[i].ae_node) {
5715 permission = acl[i].ae_permission;
5716 break;
5717 }
5718 }
5719 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP);
5720
5721 rp->key = segid;
5722 rp->importing_node = token->importing_node;
5723 rp->permission = permission;
5724 rp->next = republish_list;
5725 republish_list = rp;
5726 }
5727 token = token->next;
5728 }
5729
5730 mutex_exit(&importer_list.lock);
5731
5732 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH;
5733 request.rsmipc_key = segid;
5734
5735 while (republish_list != NULL) {
5736 request.rsmipc_perm = republish_list->permission;
5737 (void) rsmipc_send(republish_list->importing_node,
5738 &request, RSM_NO_REPLY);
5739 rp = republish_list;
5740 republish_list = republish_list->next;
5741 kmem_free(rp, sizeof (republish_token_t));
5742 }
5743 }
5744
5745 static void
rsm_send_suspend()5746 rsm_send_suspend()
5747 {
5748 int i, e;
5749 rsmipc_request_t request;
5750 list_element_t *tokp;
5751 list_element_t *head = NULL;
5752 importing_token_t *token;
5753 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5754 "rsm_send_suspend enter\n"));
5755
5756 /*
5757 * create a list of node to send the suspend message
5758 *
5759 * Currently the whole importer list is scanned and we obtain
5760 * all the nodes - this basically gets all nodes that at least
5761 * import one segment from the local node.
5762 *
5763 * no need to grab the rsm_suspend_list lock here since we are
5764 * single threaded when suspend is called.
5765 */
5766
5767 mutex_enter(&importer_list.lock);
5768 for (i = 0; i < rsm_hash_size; i++) {
5769
5770 token = importer_list.bucket[i];
5771
5772 while (token != NULL) {
5773
5774 tokp = head;
5775
5776 /*
5777 * make sure that the token's node
5778 * is not already on the suspend list
5779 */
5780 while (tokp != NULL) {
5781 if (tokp->nodeid == token->importing_node) {
5782 break;
5783 }
5784 tokp = tokp->next;
5785 }
5786
5787 if (tokp == NULL) { /* not in suspend list */
5788 tokp = kmem_zalloc(sizeof (list_element_t),
5789 KM_SLEEP);
5790 tokp->nodeid = token->importing_node;
5791 tokp->next = head;
5792 head = tokp;
5793 }
5794
5795 token = token->next;
5796 }
5797 }
5798 mutex_exit(&importer_list.lock);
5799
5800 if (head == NULL) { /* no importers so go ahead and quiesce segments */
5801 exporter_quiesce();
5802 return;
5803 }
5804
5805 mutex_enter(&rsm_suspend_list.list_lock);
5806 ASSERT(rsm_suspend_list.list_head == NULL);
5807 /*
5808 * update the suspend list righaway so that if a node dies the
5809 * pathmanager can set the NODE dead flag
5810 */
5811 rsm_suspend_list.list_head = head;
5812 mutex_exit(&rsm_suspend_list.list_lock);
5813
5814 tokp = head;
5815
5816 while (tokp != NULL) {
5817 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND;
5818 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY);
5819 /*
5820 * Error in rsmipc_send currently happens due to inaccessibility
5821 * of the remote node.
5822 */
5823 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */
5824 tokp->flags |= RSM_SUSPEND_ACKPENDING;
5825 }
5826
5827 tokp = tokp->next;
5828 }
5829
5830 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE,
5831 "rsm_send_suspend done\n"));
5832
5833 }
5834
5835 static void
rsm_send_resume()5836 rsm_send_resume()
5837 {
5838 rsmipc_request_t request;
5839 list_element_t *elem, *head;
5840
5841 /*
5842 * save the suspend list so that we know where to send
5843 * the resume messages and make the suspend list head
5844 * NULL.
5845 */
5846 mutex_enter(&rsm_suspend_list.list_lock);
5847 head = rsm_suspend_list.list_head;
5848 rsm_suspend_list.list_head = NULL;
5849 mutex_exit(&rsm_suspend_list.list_lock);
5850
5851 while (head != NULL) {
5852 elem = head;
5853 head = head->next;
5854
5855 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME;
5856
5857 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY);
5858
5859 kmem_free((void *)elem, sizeof (list_element_t));
5860
5861 }
5862
5863 }
5864
5865 /*
5866 * This function takes path and sends a message using the sendq
5867 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK
5868 * and RSMIPC_MSG_CREDIT are sent using this function.
5869 */
5870 int
rsmipc_send_controlmsg(path_t * path,int msgtype)5871 rsmipc_send_controlmsg(path_t *path, int msgtype)
5872 {
5873 int e;
5874 int retry_cnt = 0;
5875 int min_retry_cnt = 10;
5876 adapter_t *adapter;
5877 rsm_send_t is;
5878 rsm_send_q_handle_t ipc_handle;
5879 rsmipc_controlmsg_t msg;
5880 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL);
5881
5882 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5883 "rsmipc_send_controlmsg enter\n"));
5884
5885 ASSERT(MUTEX_HELD(&path->mutex));
5886
5887 adapter = path->local_adapter;
5888
5889 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx "
5890 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype,
5891 my_nodeid, adapter->hwaddr, path->remote_node,
5892 path->remote_hwaddr, path->procmsg_cnt));
5893
5894 if (path->state != RSMKA_PATH_ACTIVE) {
5895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5896 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE"));
5897 return (1);
5898 }
5899
5900 ipc_handle = path->sendq_token.rsmpi_sendq_handle;
5901
5902 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION;
5903 msg.rsmipc_hdr.rsmipc_src = my_nodeid;
5904 msg.rsmipc_hdr.rsmipc_type = msgtype;
5905 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn;
5906
5907 if (msgtype == RSMIPC_MSG_CREDIT)
5908 msg.rsmipc_credits = path->procmsg_cnt;
5909
5910 msg.rsmipc_local_incn = path->local_incn;
5911
5912 msg.rsmipc_adapter_hwaddr = adapter->hwaddr;
5913 /* incr the sendq, path refcnt */
5914 PATH_HOLD_NOLOCK(path);
5915 SENDQ_TOKEN_HOLD(path);
5916
5917 do {
5918 /* drop the path lock before doing the rsm_send */
5919 mutex_exit(&path->mutex);
5920
5921 is.is_data = (void *)&msg;
5922 is.is_size = sizeof (msg);
5923 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP;
5924 is.is_wait = 0;
5925
5926 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL);
5927
5928 ASSERT(e != RSMERR_QUEUE_FENCE_UP &&
5929 e != RSMERR_BAD_BARRIER_HNDL);
5930
5931 mutex_enter(&path->mutex);
5932
5933 if (e == RSM_SUCCESS) {
5934 break;
5935 }
5936 /* error counter for statistics */
5937 atomic_add_64(&rsm_ctrlmsg_errcnt, 1);
5938
5939 DBG_PRINTF((category, RSM_ERR,
5940 "rsmipc_send_controlmsg:rsm_send error=%d", e));
5941
5942 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */
5943 (void) cv_reltimedwait(&path->sendq_token.sendq_cv,
5944 &path->mutex, drv_usectohz(10000), TR_CLOCK_TICK);
5945 retry_cnt = 0;
5946 }
5947 } while (path->state == RSMKA_PATH_ACTIVE);
5948
5949 /* decrement the sendq,path refcnt that we incr before rsm_send */
5950 SENDQ_TOKEN_RELE(path);
5951 PATH_RELE_NOLOCK(path);
5952
5953 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
5954 "rsmipc_send_controlmsg done=%d", e));
5955 return (e);
5956 }
5957
5958 /*
5959 * Called from rsm_force_unload and path_importer_disconnect. The memory
5960 * mapping for the imported segment is removed and the segment is
5961 * disconnected at the interconnect layer if disconnect_flag is TRUE.
5962 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback
5963 * and FALSE from rsm_rebind.
5964 *
5965 * When subsequent accesses cause page faulting, the dummy page is mapped
5966 * to resolve the fault, and the mapping generation number is incremented
5967 * so that the application can be notified on a close barrier operation.
5968 *
5969 * It is important to note that the caller of rsmseg_unload is responsible for
5970 * acquiring the segment lock before making a call to rsmseg_unload. This is
5971 * required to make the caller and rsmseg_unload thread safe. The segment lock
5972 * will be released by the rsmseg_unload function.
5973 */
5974 void
rsmseg_unload(rsmseg_t * im_seg)5975 rsmseg_unload(rsmseg_t *im_seg)
5976 {
5977 rsmcookie_t *hdl;
5978 void *shared_cookie;
5979 rsmipc_request_t request;
5980 uint_t maxprot;
5981
5982 DBG_DEFINE(category,
5983 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK);
5984
5985 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n"));
5986
5987 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
5988
5989 /* wait until segment leaves the mapping state */
5990 while (im_seg->s_state == RSM_STATE_MAPPING)
5991 cv_wait(&im_seg->s_cv, &im_seg->s_lock);
5992 /*
5993 * An unload is only necessary if the segment is connected. However,
5994 * if the segment was on the import list in state RSM_STATE_CONNECTING
5995 * then a connection was in progress. Change to RSM_STATE_NEW
5996 * here to cause an early exit from the connection process.
5997 */
5998 if (im_seg->s_state == RSM_STATE_NEW) {
5999 rsmseglock_release(im_seg);
6000 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6001 "rsmseg_unload done: RSM_STATE_NEW\n"));
6002 return;
6003 } else if (im_seg->s_state == RSM_STATE_CONNECTING) {
6004 im_seg->s_state = RSM_STATE_ABORT_CONNECT;
6005 rsmsharelock_acquire(im_seg);
6006 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT;
6007 rsmsharelock_release(im_seg);
6008 rsmseglock_release(im_seg);
6009 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6010 "rsmseg_unload done: RSM_STATE_CONNECTING\n"));
6011 return;
6012 }
6013
6014 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) {
6015 if (im_seg->s_ckl != NULL) {
6016 int e;
6017 /* Setup protections for remap */
6018 maxprot = PROT_USER;
6019 if (im_seg->s_mode & RSM_PERM_READ) {
6020 maxprot |= PROT_READ;
6021 }
6022 if (im_seg->s_mode & RSM_PERM_WRITE) {
6023 maxprot |= PROT_WRITE;
6024 }
6025 hdl = im_seg->s_ckl;
6026 for (; hdl != NULL; hdl = hdl->c_next) {
6027 e = devmap_umem_remap(hdl->c_dhp, rsm_dip,
6028 remap_cookie,
6029 hdl->c_off, hdl->c_len,
6030 maxprot, 0, NULL);
6031
6032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6033 "remap returns %d\n", e));
6034 }
6035 }
6036
6037 (void) rsm_closeconnection(im_seg, &shared_cookie);
6038
6039 if (shared_cookie != NULL) {
6040 /*
6041 * inform the exporting node so this import
6042 * can be deleted from the list of importers.
6043 */
6044 request.rsmipc_hdr.rsmipc_type =
6045 RSMIPC_MSG_NOTIMPORTING;
6046 request.rsmipc_key = im_seg->s_segid;
6047 request.rsmipc_segment_cookie = shared_cookie;
6048 rsmseglock_release(im_seg);
6049 (void) rsmipc_send(im_seg->s_node, &request,
6050 RSM_NO_REPLY);
6051 } else {
6052 rsmseglock_release(im_seg);
6053 }
6054 }
6055 else
6056 rsmseglock_release(im_seg);
6057
6058 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n"));
6059
6060 }
6061
6062 /* ****************************** Importer Calls ************************ */
6063
6064 static int
rsm_access(uid_t owner,gid_t group,int perm,int mode,const struct cred * cr)6065 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr)
6066 {
6067 int shifts = 0;
6068
6069 if (crgetuid(cr) != owner) {
6070 shifts += 3;
6071 if (!groupmember(group, cr))
6072 shifts += 3;
6073 }
6074
6075 mode &= ~(perm << shifts);
6076
6077 if (mode == 0)
6078 return (0);
6079
6080 return (secpolicy_rsm_access(cr, owner, mode));
6081 }
6082
6083
6084 static int
rsm_connect(rsmseg_t * seg,rsm_ioctlmsg_t * msg,cred_t * cred,intptr_t dataptr,int mode)6085 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred,
6086 intptr_t dataptr, int mode)
6087 {
6088 int e;
6089 int recheck_state = 0;
6090 void *shared_cookie;
6091 rsmipc_request_t request;
6092 rsmipc_reply_t reply;
6093 rsm_permission_t access;
6094 adapter_t *adapter;
6095 rsm_addr_t addr = 0;
6096 rsm_import_share_t *sharedp;
6097 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6098
6099 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n"));
6100
6101 adapter = rsm_getadapter(msg, mode);
6102 if (adapter == NULL) {
6103 DBG_PRINTF((category, RSM_ERR,
6104 "rsm_connect done:ENODEV adapter=NULL\n"));
6105 return (RSMERR_CTLR_NOT_PRESENT);
6106 }
6107
6108 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) {
6109 rsmka_release_adapter(adapter);
6110 DBG_PRINTF((category, RSM_ERR,
6111 "rsm_connect done:ENODEV loopback\n"));
6112 return (RSMERR_CTLR_NOT_PRESENT);
6113 }
6114
6115
6116 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6117 ASSERT(seg->s_state == RSM_STATE_NEW);
6118
6119 /*
6120 * Translate perm to access
6121 */
6122 if (msg->perm & ~RSM_PERM_RDWR) {
6123 rsmka_release_adapter(adapter);
6124 DBG_PRINTF((category, RSM_ERR,
6125 "rsm_connect done:EINVAL invalid perms\n"));
6126 return (RSMERR_BAD_PERMS);
6127 }
6128 access = 0;
6129 if (msg->perm & RSM_PERM_READ)
6130 access |= RSM_ACCESS_READ;
6131 if (msg->perm & RSM_PERM_WRITE)
6132 access |= RSM_ACCESS_WRITE;
6133
6134 seg->s_node = msg->nodeid;
6135
6136 /*
6137 * Adding to the import list locks the segment; release the segment
6138 * lock so we can get the reply for the send.
6139 */
6140 e = rsmimport_add(seg, msg->key);
6141 if (e) {
6142 rsmka_release_adapter(adapter);
6143 DBG_PRINTF((category, RSM_ERR,
6144 "rsm_connect done:rsmimport_add failed %d\n", e));
6145 return (e);
6146 }
6147 seg->s_state = RSM_STATE_CONNECTING;
6148
6149 /*
6150 * Set the s_adapter field here so as to have a valid comparison of
6151 * the adapter and the s_adapter value during rsmshare_get. For
6152 * any error, set s_adapter to NULL before doing a release_adapter
6153 */
6154 seg->s_adapter = adapter;
6155
6156 rsmseglock_release(seg);
6157
6158 /*
6159 * get the pointer to the shared data structure; the
6160 * shared data is locked and refcount has been incremented
6161 */
6162 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg);
6163
6164 ASSERT(rsmsharelock_held(seg));
6165
6166 do {
6167 /* flag indicates whether we need to recheck the state */
6168 recheck_state = 0;
6169 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6170 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
6171 switch (sharedp->rsmsi_state) {
6172 case RSMSI_STATE_NEW:
6173 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6174 break;
6175 case RSMSI_STATE_CONNECTING:
6176 /* FALLTHRU */
6177 case RSMSI_STATE_CONN_QUIESCE:
6178 /* FALLTHRU */
6179 case RSMSI_STATE_MAP_QUIESCE:
6180 /* wait for the state to change */
6181 while ((sharedp->rsmsi_state ==
6182 RSMSI_STATE_CONNECTING) ||
6183 (sharedp->rsmsi_state ==
6184 RSMSI_STATE_CONN_QUIESCE) ||
6185 (sharedp->rsmsi_state ==
6186 RSMSI_STATE_MAP_QUIESCE)) {
6187 if (cv_wait_sig(&sharedp->rsmsi_cv,
6188 &sharedp->rsmsi_lock) == 0) {
6189 /* signalled - clean up and return */
6190 rsmsharelock_release(seg);
6191 rsmimport_rm(seg);
6192 seg->s_adapter = NULL;
6193 rsmka_release_adapter(adapter);
6194 seg->s_state = RSM_STATE_NEW;
6195 DBG_PRINTF((category, RSM_ERR,
6196 "rsm_connect done: INTERRUPTED\n"));
6197 return (RSMERR_INTERRUPTED);
6198 }
6199 }
6200 /*
6201 * the state changed, loop back and check what it is
6202 */
6203 recheck_state = 1;
6204 break;
6205 case RSMSI_STATE_ABORT_CONNECT:
6206 /* exit the loop and clean up further down */
6207 break;
6208 case RSMSI_STATE_CONNECTED:
6209 /* already connected, good - fall through */
6210 case RSMSI_STATE_MAPPED:
6211 /* already mapped, wow - fall through */
6212 /* access validation etc is done further down */
6213 break;
6214 case RSMSI_STATE_DISCONNECTED:
6215 /* disconnected - so reconnect now */
6216 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING;
6217 break;
6218 default:
6219 ASSERT(0); /* Invalid State */
6220 }
6221 } while (recheck_state);
6222
6223 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6224 /* we are the first to connect */
6225 rsmsharelock_release(seg);
6226
6227 if (msg->nodeid != my_nodeid) {
6228 addr = get_remote_hwaddr(adapter, msg->nodeid);
6229
6230 if ((int64_t)addr < 0) {
6231 rsmsharelock_acquire(seg);
6232 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6233 RSMSI_STATE_NEW);
6234 rsmsharelock_release(seg);
6235 rsmimport_rm(seg);
6236 seg->s_adapter = NULL;
6237 rsmka_release_adapter(adapter);
6238 seg->s_state = RSM_STATE_NEW;
6239 DBG_PRINTF((category, RSM_ERR,
6240 "rsm_connect done: hwaddr<0\n"));
6241 return (RSMERR_INTERNAL_ERROR);
6242 }
6243 } else {
6244 addr = adapter->hwaddr;
6245 }
6246
6247 /*
6248 * send request to node [src, dest, key, msgid] and get back
6249 * [status, msgid, cookie]
6250 */
6251 request.rsmipc_key = msg->key;
6252 /*
6253 * we need the s_mode of the exporter so pass
6254 * RSM_ACCESS_TRUSTED
6255 */
6256 request.rsmipc_perm = RSM_ACCESS_TRUSTED;
6257 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT;
6258 request.rsmipc_adapter_hwaddr = addr;
6259 request.rsmipc_segment_cookie = sharedp;
6260
6261 e = (int)rsmipc_send(msg->nodeid, &request, &reply);
6262 if (e) {
6263 rsmsharelock_acquire(seg);
6264 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6265 RSMSI_STATE_NEW);
6266 rsmsharelock_release(seg);
6267 rsmimport_rm(seg);
6268 seg->s_adapter = NULL;
6269 rsmka_release_adapter(adapter);
6270 seg->s_state = RSM_STATE_NEW;
6271 DBG_PRINTF((category, RSM_ERR,
6272 "rsm_connect done:rsmipc_send failed %d\n", e));
6273 return (e);
6274 }
6275
6276 if (reply.rsmipc_status != RSM_SUCCESS) {
6277 rsmsharelock_acquire(seg);
6278 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING,
6279 RSMSI_STATE_NEW);
6280 rsmsharelock_release(seg);
6281 rsmimport_rm(seg);
6282 seg->s_adapter = NULL;
6283 rsmka_release_adapter(adapter);
6284 seg->s_state = RSM_STATE_NEW;
6285 DBG_PRINTF((category, RSM_ERR,
6286 "rsm_connect done:rsmipc_send reply err %d\n",
6287 reply.rsmipc_status));
6288 return (reply.rsmipc_status);
6289 }
6290
6291 rsmsharelock_acquire(seg);
6292 /* store the information recvd into the shared data struct */
6293 sharedp->rsmsi_mode = reply.rsmipc_mode;
6294 sharedp->rsmsi_uid = reply.rsmipc_uid;
6295 sharedp->rsmsi_gid = reply.rsmipc_gid;
6296 sharedp->rsmsi_seglen = reply.rsmipc_seglen;
6297 sharedp->rsmsi_cookie = sharedp;
6298 }
6299
6300 rsmsharelock_release(seg);
6301
6302 /*
6303 * Get the segment lock and check for a force disconnect
6304 * from the export side which would have changed the state
6305 * back to RSM_STATE_NEW. Once the segment lock is acquired a
6306 * force disconnect will be held off until the connection
6307 * has completed.
6308 */
6309 rsmseglock_acquire(seg);
6310 rsmsharelock_acquire(seg);
6311 ASSERT(seg->s_state == RSM_STATE_CONNECTING ||
6312 seg->s_state == RSM_STATE_ABORT_CONNECT);
6313
6314 shared_cookie = sharedp->rsmsi_cookie;
6315
6316 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) ||
6317 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) {
6318 seg->s_state = RSM_STATE_NEW;
6319 seg->s_adapter = NULL;
6320 rsmsharelock_release(seg);
6321 rsmseglock_release(seg);
6322 rsmimport_rm(seg);
6323 rsmka_release_adapter(adapter);
6324
6325 rsmsharelock_acquire(seg);
6326 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) {
6327 /*
6328 * set a flag indicating abort handling has been
6329 * done
6330 */
6331 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE;
6332 rsmsharelock_release(seg);
6333 /* send a message to exporter - only once */
6334 (void) rsm_send_notimporting(msg->nodeid,
6335 msg->key, shared_cookie);
6336 rsmsharelock_acquire(seg);
6337 /*
6338 * wake up any waiting importers and inform that
6339 * connection has been aborted
6340 */
6341 cv_broadcast(&sharedp->rsmsi_cv);
6342 }
6343 rsmsharelock_release(seg);
6344
6345 DBG_PRINTF((category, RSM_ERR,
6346 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n"));
6347 return (RSMERR_INTERRUPTED);
6348 }
6349
6350
6351 /*
6352 * We need to verify that this process has access
6353 */
6354 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid,
6355 access & sharedp->rsmsi_mode,
6356 (int)(msg->perm & RSM_PERM_RDWR), cred);
6357 if (e) {
6358 rsmsharelock_release(seg);
6359 seg->s_state = RSM_STATE_NEW;
6360 seg->s_adapter = NULL;
6361 rsmseglock_release(seg);
6362 rsmimport_rm(seg);
6363 rsmka_release_adapter(adapter);
6364 /*
6365 * No need to lock segment it has been removed
6366 * from the hash table
6367 */
6368 rsmsharelock_acquire(seg);
6369 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6370 rsmsharelock_release(seg);
6371 /* this is the first importer */
6372
6373 (void) rsm_send_notimporting(msg->nodeid, msg->key,
6374 shared_cookie);
6375 rsmsharelock_acquire(seg);
6376 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6377 cv_broadcast(&sharedp->rsmsi_cv);
6378 }
6379 rsmsharelock_release(seg);
6380
6381 DBG_PRINTF((category, RSM_ERR,
6382 "rsm_connect done: ipcaccess failed\n"));
6383 return (RSMERR_PERM_DENIED);
6384 }
6385
6386 /* update state and cookie */
6387 seg->s_segid = sharedp->rsmsi_segid;
6388 seg->s_len = sharedp->rsmsi_seglen;
6389 seg->s_mode = access & sharedp->rsmsi_mode;
6390 seg->s_pid = ddi_get_pid();
6391 seg->s_mapinfo = NULL;
6392
6393 if (seg->s_node != my_nodeid) {
6394 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) {
6395 e = adapter->rsmpi_ops->rsm_connect(
6396 adapter->rsmpi_handle,
6397 addr, seg->s_segid, &sharedp->rsmsi_handle);
6398
6399 if (e != RSM_SUCCESS) {
6400 seg->s_state = RSM_STATE_NEW;
6401 seg->s_adapter = NULL;
6402 rsmsharelock_release(seg);
6403 rsmseglock_release(seg);
6404 rsmimport_rm(seg);
6405 rsmka_release_adapter(adapter);
6406 /*
6407 * inform the exporter to delete this importer
6408 */
6409 (void) rsm_send_notimporting(msg->nodeid,
6410 msg->key, shared_cookie);
6411
6412 /*
6413 * Now inform any waiting importers to
6414 * retry connect. This needs to be done
6415 * after sending notimporting so that
6416 * the notimporting is sent before a waiting
6417 * importer sends a segconnect while retrying
6418 *
6419 * No need to lock segment it has been removed
6420 * from the hash table
6421 */
6422
6423 rsmsharelock_acquire(seg);
6424 sharedp->rsmsi_state = RSMSI_STATE_NEW;
6425 cv_broadcast(&sharedp->rsmsi_cv);
6426 rsmsharelock_release(seg);
6427
6428 DBG_PRINTF((category, RSM_ERR,
6429 "rsm_connect error %d\n", e));
6430 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR)
6431 return (
6432 RSMERR_SEG_NOT_PUBLISHED_TO_NODE);
6433 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) ||
6434 (e == RSMERR_UNKNOWN_RSM_ADDR))
6435 return (RSMERR_REMOTE_NODE_UNREACHABLE);
6436 else
6437 return (e);
6438 }
6439
6440 }
6441 seg->s_handle.in = sharedp->rsmsi_handle;
6442
6443 }
6444
6445 seg->s_state = RSM_STATE_CONNECT;
6446
6447
6448 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */
6449 if (bar_va) {
6450 /* increment generation number on barrier page */
6451 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6452 /* return user off into barrier page where status will be */
6453 msg->off = (int)seg->s_hdr.rsmrc_num;
6454 msg->gnum = bar_va[msg->off]; /* gnum race */
6455 } else {
6456 msg->off = 0;
6457 msg->gnum = 0; /* gnum race */
6458 }
6459
6460 msg->len = (int)sharedp->rsmsi_seglen;
6461 msg->rnum = seg->s_minor;
6462 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED);
6463 rsmsharelock_release(seg);
6464 rsmseglock_release(seg);
6465
6466 /* Return back to user the segment size & perm in case it's needed */
6467
6468 #ifdef _MULTI_DATAMODEL
6469 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6470 rsm_ioctlmsg32_t msg32;
6471
6472 if (msg->len > UINT_MAX)
6473 msg32.len = RSM_MAXSZ_PAGE_ALIGNED;
6474 else
6475 msg32.len = msg->len;
6476 msg32.off = msg->off;
6477 msg32.perm = msg->perm;
6478 msg32.gnum = msg->gnum;
6479 msg32.rnum = msg->rnum;
6480
6481 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6482 "rsm_connect done\n"));
6483
6484 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr,
6485 sizeof (msg32), mode))
6486 return (RSMERR_BAD_ADDR);
6487 else
6488 return (RSM_SUCCESS);
6489 }
6490 #endif
6491 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n"));
6492
6493 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg),
6494 mode))
6495 return (RSMERR_BAD_ADDR);
6496 else
6497 return (RSM_SUCCESS);
6498 }
6499
6500 static int
rsm_unmap(rsmseg_t * seg)6501 rsm_unmap(rsmseg_t *seg)
6502 {
6503 int err;
6504 adapter_t *adapter;
6505 rsm_import_share_t *sharedp;
6506 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6507
6508 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6509 "rsm_unmap enter %u\n", seg->s_segid));
6510
6511 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6512
6513 /* assert seg is locked */
6514 ASSERT(rsmseglock_held(seg));
6515 ASSERT(seg->s_state != RSM_STATE_MAPPING);
6516
6517 if ((seg->s_state != RSM_STATE_ACTIVE) &&
6518 (seg->s_state != RSM_STATE_MAP_QUIESCE)) {
6519 /* segment unmap has already been done */
6520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6521 return (RSM_SUCCESS);
6522 }
6523
6524 sharedp = seg->s_share;
6525
6526 rsmsharelock_acquire(seg);
6527
6528 /*
6529 * - shared data struct is in MAPPED or MAP_QUIESCE state
6530 */
6531
6532 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED ||
6533 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE);
6534
6535 /*
6536 * Unmap pages - previously rsm_memseg_import_unmap was called only if
6537 * the segment cookie list was NULL; but it is always NULL when
6538 * called from rsmmap_unmap and won't be NULL when called for
6539 * a force disconnect - so the check for NULL cookie list was removed
6540 */
6541
6542 ASSERT(sharedp->rsmsi_mapcnt > 0);
6543
6544 sharedp->rsmsi_mapcnt--;
6545
6546 if (sharedp->rsmsi_mapcnt == 0) {
6547 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) {
6548 /* unmap the shared RSMPI mapping */
6549 adapter = seg->s_adapter;
6550 if (seg->s_node != my_nodeid) {
6551 ASSERT(sharedp->rsmsi_handle != NULL);
6552 err = adapter->rsmpi_ops->
6553 rsm_unmap(sharedp->rsmsi_handle);
6554 DBG_PRINTF((category, RSM_DEBUG,
6555 "rsm_unmap: rsmpi unmap %d\n", err));
6556 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
6557 sharedp->rsmsi_mapinfo = NULL;
6558 }
6559 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
6560 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */
6561 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE;
6562 }
6563 }
6564
6565 rsmsharelock_release(seg);
6566
6567 /*
6568 * The s_cookie field is used to store the cookie returned from the
6569 * ddi_umem_lock when binding the pages for an export segment. This
6570 * is the primary use of the s_cookie field and does not normally
6571 * pertain to any importing segment except in the loopback case.
6572 * For the loopback case, the import segment and export segment are
6573 * on the same node, the s_cookie field of the segment structure for
6574 * the importer is initialized to the s_cookie field in the exported
6575 * segment during the map operation and is used during the call to
6576 * devmap_umem_setup for the import mapping.
6577 * Thus, during unmap, we simply need to set s_cookie to NULL to
6578 * indicate that the mapping no longer exists.
6579 */
6580 seg->s_cookie = NULL;
6581
6582 seg->s_mapinfo = NULL;
6583
6584 if (seg->s_state == RSM_STATE_ACTIVE)
6585 seg->s_state = RSM_STATE_CONNECT;
6586 else
6587 seg->s_state = RSM_STATE_CONN_QUIESCE;
6588
6589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n"));
6590
6591 return (RSM_SUCCESS);
6592 }
6593
6594 /*
6595 * cookie returned here if not null indicates that it is
6596 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING
6597 * message.
6598 */
6599 static int
rsm_closeconnection(rsmseg_t * seg,void ** cookie)6600 rsm_closeconnection(rsmseg_t *seg, void **cookie)
6601 {
6602 int e;
6603 adapter_t *adapter;
6604 rsm_import_share_t *sharedp;
6605 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6606
6607 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6608 "rsm_closeconnection enter\n"));
6609
6610 *cookie = (void *)NULL;
6611
6612 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6613
6614 /* assert seg is locked */
6615 ASSERT(rsmseglock_held(seg));
6616
6617 if (seg->s_state == RSM_STATE_DISCONNECT) {
6618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6619 "rsm_closeconnection done: already disconnected\n"));
6620 return (RSM_SUCCESS);
6621 }
6622
6623 /* wait for all putv/getv ops to get done */
6624 while (seg->s_rdmacnt > 0) {
6625 cv_wait(&seg->s_cv, &seg->s_lock);
6626 }
6627
6628 (void) rsm_unmap(seg);
6629
6630 ASSERT(seg->s_state == RSM_STATE_CONNECT ||
6631 seg->s_state == RSM_STATE_CONN_QUIESCE);
6632
6633 adapter = seg->s_adapter;
6634 sharedp = seg->s_share;
6635
6636 ASSERT(sharedp != NULL);
6637
6638 rsmsharelock_acquire(seg);
6639
6640 /*
6641 * Disconnect on adapter
6642 *
6643 * The current algorithm is stateless, I don't have to contact
6644 * server when I go away. He only gives me permissions. Of course,
6645 * the adapters will talk to terminate the connect.
6646 *
6647 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE
6648 */
6649 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) &&
6650 (sharedp->rsmsi_node != my_nodeid)) {
6651
6652 if (sharedp->rsmsi_refcnt == 1) {
6653 /* this is the last importer */
6654 ASSERT(sharedp->rsmsi_mapcnt == 0);
6655
6656 e = adapter->rsmpi_ops->
6657 rsm_disconnect(sharedp->rsmsi_handle);
6658 if (e != RSM_SUCCESS) {
6659 DBG_PRINTF((category, RSM_DEBUG,
6660 "rsm:disconnect failed seg=%x:err=%d\n",
6661 seg->s_key, e));
6662 }
6663 }
6664 }
6665
6666 seg->s_handle.in = NULL;
6667
6668 sharedp->rsmsi_refcnt--;
6669
6670 if (sharedp->rsmsi_refcnt == 0) {
6671 *cookie = (void *)sharedp->rsmsi_cookie;
6672 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED;
6673 sharedp->rsmsi_handle = NULL;
6674 rsmsharelock_release(seg);
6675
6676 /* clean up the shared data structure */
6677 mutex_destroy(&sharedp->rsmsi_lock);
6678 cv_destroy(&sharedp->rsmsi_cv);
6679 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t));
6680
6681 } else {
6682 rsmsharelock_release(seg);
6683 }
6684
6685 /* increment generation number on barrier page */
6686 if (bar_va) {
6687 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1);
6688 }
6689
6690 /*
6691 * The following needs to be done after any
6692 * rsmsharelock calls which use seg->s_share.
6693 */
6694 seg->s_share = NULL;
6695
6696 seg->s_state = RSM_STATE_DISCONNECT;
6697 /* signal anyone waiting in the CONN_QUIESCE state */
6698 cv_broadcast(&seg->s_cv);
6699
6700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6701 "rsm_closeconnection done\n"));
6702
6703 return (RSM_SUCCESS);
6704 }
6705
6706 int
rsm_disconnect(rsmseg_t * seg)6707 rsm_disconnect(rsmseg_t *seg)
6708 {
6709 rsmipc_request_t request;
6710 void *shared_cookie;
6711 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT);
6712
6713 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n"));
6714
6715 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
6716
6717 /* assert seg isn't locked */
6718 ASSERT(!rsmseglock_held(seg));
6719
6720
6721 /* Remove segment from imported list */
6722 rsmimport_rm(seg);
6723
6724 /* acquire the segment */
6725 rsmseglock_acquire(seg);
6726
6727 /* wait until segment leaves the mapping state */
6728 while (seg->s_state == RSM_STATE_MAPPING)
6729 cv_wait(&seg->s_cv, &seg->s_lock);
6730
6731 if (seg->s_state == RSM_STATE_DISCONNECT) {
6732 seg->s_state = RSM_STATE_NEW;
6733 rsmseglock_release(seg);
6734 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6735 "rsm_disconnect done: already disconnected\n"));
6736 return (RSM_SUCCESS);
6737 }
6738
6739 (void) rsm_closeconnection(seg, &shared_cookie);
6740
6741 /* update state */
6742 seg->s_state = RSM_STATE_NEW;
6743
6744 if (shared_cookie != NULL) {
6745 /*
6746 * This is the last importer so inform the exporting node
6747 * so this import can be deleted from the list of importers.
6748 */
6749 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING;
6750 request.rsmipc_key = seg->s_segid;
6751 request.rsmipc_segment_cookie = shared_cookie;
6752 rsmseglock_release(seg);
6753 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
6754 } else {
6755 rsmseglock_release(seg);
6756 }
6757
6758 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n"));
6759
6760 return (DDI_SUCCESS);
6761 }
6762
6763 /*ARGSUSED*/
6764 static int
rsm_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)6765 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
6766 struct pollhead **phpp)
6767 {
6768 minor_t rnum;
6769 rsmresource_t *res;
6770 rsmseg_t *seg;
6771 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
6772
6773 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n"));
6774
6775 /* find minor, no lock */
6776 rnum = getminor(dev);
6777 res = rsmresource_lookup(rnum, RSM_NOLOCK);
6778
6779 /* poll is supported only for export/import segments */
6780 if ((res == NULL) || (res == RSMRC_RESERVED) ||
6781 (res->rsmrc_type == RSM_RESOURCE_BAR)) {
6782 return (ENXIO);
6783 }
6784
6785 *reventsp = 0;
6786
6787 /*
6788 * An exported segment must be in state RSM_STATE_EXPORT; an
6789 * imported segment must be in state RSM_STATE_ACTIVE.
6790 */
6791 seg = (rsmseg_t *)res;
6792
6793 if (seg->s_pollevent) {
6794 *reventsp = POLLRDNORM;
6795 } else if (!anyyet) {
6796 /* cannot take segment lock here */
6797 *phpp = &seg->s_poll;
6798 seg->s_pollflag |= RSM_SEGMENT_POLL;
6799 }
6800 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n"));
6801 return (0);
6802 }
6803
6804
6805
6806 /* ************************* IOCTL Commands ********************* */
6807
6808 static rsmseg_t *
rsmresource_seg(rsmresource_t * res,minor_t rnum,cred_t * credp,rsm_resource_type_t type)6809 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp,
6810 rsm_resource_type_t type)
6811 {
6812 /* get segment from resource handle */
6813 rsmseg_t *seg;
6814 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
6815
6816 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n"));
6817
6818
6819 if (res != RSMRC_RESERVED) {
6820 seg = (rsmseg_t *)res;
6821 } else {
6822 /* Allocate segment now and bind it */
6823 seg = rsmseg_alloc(rnum, credp);
6824
6825 /*
6826 * if DR pre-processing is going on or DR is in progress
6827 * then the new export segments should be in the NEW_QSCD state
6828 */
6829 if (type == RSM_RESOURCE_EXPORT_SEGMENT) {
6830 mutex_enter(&rsm_drv_data.drv_lock);
6831 if ((rsm_drv_data.drv_state ==
6832 RSM_DRV_PREDEL_STARTED) ||
6833 (rsm_drv_data.drv_state ==
6834 RSM_DRV_PREDEL_COMPLETED) ||
6835 (rsm_drv_data.drv_state ==
6836 RSM_DRV_DR_IN_PROGRESS)) {
6837 seg->s_state = RSM_STATE_NEW_QUIESCED;
6838 }
6839 mutex_exit(&rsm_drv_data.drv_lock);
6840 }
6841
6842 rsmresource_insert(rnum, (rsmresource_t *)seg, type);
6843 }
6844
6845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n"));
6846
6847 return (seg);
6848 }
6849
6850 static int
rsmexport_ioctl(rsmseg_t * seg,rsm_ioctlmsg_t * msg,int cmd,intptr_t arg,int mode,cred_t * credp)6851 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6852 int mode, cred_t *credp)
6853 {
6854 int error;
6855 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
6856
6857 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n"));
6858
6859 arg = arg;
6860 credp = credp;
6861
6862 ASSERT(seg != NULL);
6863
6864 switch (cmd) {
6865 case RSM_IOCTL_BIND:
6866 error = rsm_bind(seg, msg, arg, mode);
6867 break;
6868 case RSM_IOCTL_REBIND:
6869 error = rsm_rebind(seg, msg);
6870 break;
6871 case RSM_IOCTL_UNBIND:
6872 error = ENOTSUP;
6873 break;
6874 case RSM_IOCTL_PUBLISH:
6875 error = rsm_publish(seg, msg, arg, mode);
6876 break;
6877 case RSM_IOCTL_REPUBLISH:
6878 error = rsm_republish(seg, msg, mode);
6879 break;
6880 case RSM_IOCTL_UNPUBLISH:
6881 error = rsm_unpublish(seg, 1);
6882 break;
6883 default:
6884 error = EINVAL;
6885 break;
6886 }
6887
6888 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n",
6889 error));
6890
6891 return (error);
6892 }
6893 static int
rsmimport_ioctl(rsmseg_t * seg,rsm_ioctlmsg_t * msg,int cmd,intptr_t arg,int mode,cred_t * credp)6894 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6895 int mode, cred_t *credp)
6896 {
6897 int error;
6898 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6899
6900 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n"));
6901
6902 ASSERT(seg);
6903
6904 switch (cmd) {
6905 case RSM_IOCTL_CONNECT:
6906 error = rsm_connect(seg, msg, credp, arg, mode);
6907 break;
6908 default:
6909 error = EINVAL;
6910 break;
6911 }
6912
6913 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n",
6914 error));
6915 return (error);
6916 }
6917
6918 static int
rsmbar_ioctl(rsmseg_t * seg,rsm_ioctlmsg_t * msg,int cmd,intptr_t arg,int mode)6919 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg,
6920 int mode)
6921 {
6922 int e;
6923 adapter_t *adapter;
6924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
6925
6926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n"));
6927
6928
6929 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) {
6930 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6931 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n"));
6932 return (RSMERR_CONN_ABORTED);
6933 } else if (seg->s_node == my_nodeid) {
6934 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6935 "rsmbar_ioctl done: loopback\n"));
6936 return (RSM_SUCCESS);
6937 }
6938
6939 adapter = seg->s_adapter;
6940
6941 switch (cmd) {
6942 case RSM_IOCTL_BAR_CHECK:
6943 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6944 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va));
6945 return (bar_va ? RSM_SUCCESS : EINVAL);
6946 case RSM_IOCTL_BAR_OPEN:
6947 e = adapter->rsmpi_ops->
6948 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar);
6949 break;
6950 case RSM_IOCTL_BAR_ORDER:
6951 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar);
6952 break;
6953 case RSM_IOCTL_BAR_CLOSE:
6954 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar);
6955 break;
6956 default:
6957 e = EINVAL;
6958 break;
6959 }
6960
6961 if (e == RSM_SUCCESS) {
6962 #ifdef _MULTI_DATAMODEL
6963 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
6964 rsm_ioctlmsg32_t msg32;
6965 int i;
6966
6967 for (i = 0; i < 4; i++) {
6968 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64;
6969 }
6970
6971 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6972 "rsmbar_ioctl done\n"));
6973 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
6974 sizeof (msg32), mode))
6975 return (RSMERR_BAD_ADDR);
6976 else
6977 return (RSM_SUCCESS);
6978 }
6979 #endif
6980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6981 "rsmbar_ioctl done\n"));
6982 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg,
6983 sizeof (*msg), mode))
6984 return (RSMERR_BAD_ADDR);
6985 else
6986 return (RSM_SUCCESS);
6987 }
6988
6989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
6990 "rsmbar_ioctl done: error=%d\n", e));
6991
6992 return (e);
6993 }
6994
6995 /*
6996 * Ring the doorbell of the export segment to which this segment is
6997 * connected.
6998 */
6999 static int
exportbell_ioctl(rsmseg_t * seg,int cmd)7000 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7001 {
7002 int e = 0;
7003 rsmipc_request_t request;
7004
7005 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7006
7007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n"));
7008
7009 request.rsmipc_key = seg->s_segid;
7010 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7011 request.rsmipc_segment_cookie = NULL;
7012 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY);
7013
7014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7015 "exportbell_ioctl done: %d\n", e));
7016
7017 return (e);
7018 }
7019
7020 /*
7021 * Ring the doorbells of all segments importing this segment
7022 */
7023 static int
importbell_ioctl(rsmseg_t * seg,int cmd)7024 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/)
7025 {
7026 importing_token_t *token = NULL;
7027 rsmipc_request_t request;
7028 int index;
7029
7030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL);
7031
7032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n"));
7033
7034 ASSERT(seg->s_state != RSM_STATE_NEW &&
7035 seg->s_state != RSM_STATE_NEW_QUIESCED);
7036
7037 request.rsmipc_key = seg->s_segid;
7038 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7039
7040 index = rsmhash(seg->s_segid);
7041
7042 token = importer_list.bucket[index];
7043
7044 while (token != NULL) {
7045 if (seg->s_key == token->key) {
7046 request.rsmipc_segment_cookie =
7047 token->import_segment_cookie;
7048 (void) rsmipc_send(token->importing_node,
7049 &request, RSM_NO_REPLY);
7050 }
7051 token = token->next;
7052 }
7053
7054 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7055 "importbell_ioctl done\n"));
7056 return (RSM_SUCCESS);
7057 }
7058
7059 static int
rsm_consumeevent_copyin(caddr_t arg,rsm_consume_event_msg_t * msgp,rsm_poll_event_t ** eventspp,int mode)7060 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp,
7061 rsm_poll_event_t **eventspp, int mode)
7062 {
7063 rsm_poll_event_t *evlist = NULL;
7064 size_t evlistsz;
7065 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7066
7067 #ifdef _MULTI_DATAMODEL
7068 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7069 int i;
7070 rsm_consume_event_msg32_t cemsg32 = {0};
7071 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7072 rsm_poll_event32_t *evlist32;
7073 size_t evlistsz32;
7074
7075 /* copyin the ioctl message */
7076 if (ddi_copyin(arg, (caddr_t)&cemsg32,
7077 sizeof (rsm_consume_event_msg32_t), mode)) {
7078 DBG_PRINTF((category, RSM_ERR,
7079 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7080 return (RSMERR_BAD_ADDR);
7081 }
7082 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist;
7083 msgp->numents = (int)cemsg32.numents;
7084
7085 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents;
7086 /*
7087 * If numents is large alloc events list on heap otherwise
7088 * use the address of array that was passed in.
7089 */
7090 if (msgp->numents > RSM_MAX_POLLFDS) {
7091 if (msgp->numents > max_segs) { /* validate numents */
7092 DBG_PRINTF((category, RSM_ERR,
7093 "consumeevent_copyin: "
7094 "RSMERR_BAD_ARGS_ERRORS\n"));
7095 return (RSMERR_BAD_ARGS_ERRORS);
7096 }
7097 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7098 } else {
7099 evlist32 = event32;
7100 }
7101
7102 /* copyin the seglist into the rsm_poll_event32_t array */
7103 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32,
7104 evlistsz32, mode)) {
7105 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7106 kmem_free(evlist32, evlistsz32);
7107 }
7108 DBG_PRINTF((category, RSM_ERR,
7109 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7110 return (RSMERR_BAD_ADDR);
7111 }
7112
7113 /* evlist and evlistsz are based on rsm_poll_event_t type */
7114 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents;
7115
7116 if (msgp->numents > RSM_MAX_POLLFDS) {
7117 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7118 *eventspp = evlist;
7119 } else {
7120 evlist = *eventspp;
7121 }
7122 /*
7123 * copy the rsm_poll_event32_t array to the rsm_poll_event_t
7124 * array
7125 */
7126 for (i = 0; i < msgp->numents; i++) {
7127 evlist[i].rnum = evlist32[i].rnum;
7128 evlist[i].fdsidx = evlist32[i].fdsidx;
7129 evlist[i].revent = evlist32[i].revent;
7130 }
7131 /* free the temp 32-bit event list */
7132 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) {
7133 kmem_free(evlist32, evlistsz32);
7134 }
7135
7136 return (RSM_SUCCESS);
7137 }
7138 #endif
7139 /* copyin the ioctl message */
7140 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t),
7141 mode)) {
7142 DBG_PRINTF((category, RSM_ERR,
7143 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n"));
7144 return (RSMERR_BAD_ADDR);
7145 }
7146 /*
7147 * If numents is large alloc events list on heap otherwise
7148 * use the address of array that was passed in.
7149 */
7150 if (msgp->numents > RSM_MAX_POLLFDS) {
7151 if (msgp->numents > max_segs) { /* validate numents */
7152 DBG_PRINTF((category, RSM_ERR,
7153 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n"));
7154 return (RSMERR_BAD_ARGS_ERRORS);
7155 }
7156 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7157 evlist = kmem_zalloc(evlistsz, KM_SLEEP);
7158 *eventspp = evlist;
7159 }
7160
7161 /* copyin the seglist */
7162 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp),
7163 sizeof (rsm_poll_event_t)*msgp->numents, mode)) {
7164 if (evlist) {
7165 kmem_free(evlist, evlistsz);
7166 *eventspp = NULL;
7167 }
7168 DBG_PRINTF((category, RSM_ERR,
7169 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n"));
7170 return (RSMERR_BAD_ADDR);
7171 }
7172
7173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7174 "consumeevent_copyin done\n"));
7175 return (RSM_SUCCESS);
7176 }
7177
7178 static int
rsm_consumeevent_copyout(rsm_consume_event_msg_t * msgp,rsm_poll_event_t * eventsp,int mode)7179 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp,
7180 rsm_poll_event_t *eventsp, int mode)
7181 {
7182 size_t evlistsz;
7183 int err = RSM_SUCCESS;
7184 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7185
7186 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7187 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n",
7188 msgp->numents, eventsp));
7189
7190 #ifdef _MULTI_DATAMODEL
7191 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7192 int i;
7193 rsm_poll_event32_t event32[RSM_MAX_POLLFDS];
7194 rsm_poll_event32_t *evlist32;
7195 size_t evlistsz32;
7196
7197 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents;
7198 if (msgp->numents > RSM_MAX_POLLFDS) {
7199 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP);
7200 } else {
7201 evlist32 = event32;
7202 }
7203
7204 /*
7205 * copy the rsm_poll_event_t array to the rsm_poll_event32_t
7206 * array
7207 */
7208 for (i = 0; i < msgp->numents; i++) {
7209 evlist32[i].rnum = eventsp[i].rnum;
7210 evlist32[i].fdsidx = eventsp[i].fdsidx;
7211 evlist32[i].revent = eventsp[i].revent;
7212 }
7213
7214 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist,
7215 evlistsz32, mode)) {
7216 err = RSMERR_BAD_ADDR;
7217 }
7218
7219 if (msgp->numents > RSM_MAX_POLLFDS) {
7220 if (evlist32) { /* free the temp 32-bit event list */
7221 kmem_free(evlist32, evlistsz32);
7222 }
7223 /*
7224 * eventsp and evlistsz are based on rsm_poll_event_t
7225 * type
7226 */
7227 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7228 /* event list on the heap and needs to be freed here */
7229 if (eventsp) {
7230 kmem_free(eventsp, evlistsz);
7231 }
7232 }
7233
7234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7235 "consumeevent_copyout done: err=%d\n", err));
7236 return (err);
7237 }
7238 #endif
7239 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents;
7240
7241 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz,
7242 mode)) {
7243 err = RSMERR_BAD_ADDR;
7244 }
7245
7246 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) {
7247 /* event list on the heap and needs to be freed here */
7248 kmem_free(eventsp, evlistsz);
7249 }
7250
7251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7252 "consumeevent_copyout done: err=%d\n", err));
7253 return (err);
7254 }
7255
7256 static int
rsm_consumeevent_ioctl(caddr_t arg,int mode)7257 rsm_consumeevent_ioctl(caddr_t arg, int mode)
7258 {
7259 int rc;
7260 int i;
7261 minor_t rnum;
7262 rsm_consume_event_msg_t msg = {0};
7263 rsmseg_t *seg;
7264 rsm_poll_event_t *event_list;
7265 rsm_poll_event_t events[RSM_MAX_POLLFDS];
7266 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL);
7267
7268 event_list = events;
7269
7270 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) !=
7271 RSM_SUCCESS) {
7272 return (rc);
7273 }
7274
7275 for (i = 0; i < msg.numents; i++) {
7276 rnum = event_list[i].rnum;
7277 event_list[i].revent = 0;
7278 /* get the segment structure */
7279 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
7280 if (seg) {
7281 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7282 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum,
7283 seg));
7284 if (seg->s_pollevent) {
7285 /* consume the event */
7286 atomic_add_32(&seg->s_pollevent, -1);
7287 event_list[i].revent = POLLRDNORM;
7288 }
7289 rsmseglock_release(seg);
7290 }
7291 }
7292
7293 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) !=
7294 RSM_SUCCESS) {
7295 return (rc);
7296 }
7297
7298 return (RSM_SUCCESS);
7299 }
7300
7301 static int
iovec_copyin(caddr_t user_vec,rsmka_iovec_t * iovec,int count,int mode)7302 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode)
7303 {
7304 int size;
7305 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7306
7307 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n"));
7308
7309 #ifdef _MULTI_DATAMODEL
7310 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7311 rsmka_iovec32_t *iovec32, *iovec32_base;
7312 int i;
7313
7314 size = count * sizeof (rsmka_iovec32_t);
7315 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP);
7316 if (ddi_copyin((caddr_t)user_vec,
7317 (caddr_t)iovec32, size, mode)) {
7318 kmem_free(iovec32, size);
7319 DBG_PRINTF((category, RSM_DEBUG,
7320 "iovec_copyin: returning RSMERR_BAD_ADDR\n"));
7321 return (RSMERR_BAD_ADDR);
7322 }
7323
7324 for (i = 0; i < count; i++, iovec++, iovec32++) {
7325 iovec->io_type = (int)iovec32->io_type;
7326 if (iovec->io_type == RSM_HANDLE_TYPE)
7327 iovec->local.segid = (rsm_memseg_id_t)
7328 iovec32->local;
7329 else
7330 iovec->local.vaddr =
7331 (caddr_t)(uintptr_t)iovec32->local;
7332 iovec->local_offset = (size_t)iovec32->local_offset;
7333 iovec->remote_offset = (size_t)iovec32->remote_offset;
7334 iovec->transfer_len = (size_t)iovec32->transfer_len;
7335
7336 }
7337 kmem_free(iovec32_base, size);
7338 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7339 "iovec_copyin done\n"));
7340 return (DDI_SUCCESS);
7341 }
7342 #endif
7343
7344 size = count * sizeof (rsmka_iovec_t);
7345 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) {
7346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7347 "iovec_copyin done: RSMERR_BAD_ADDR\n"));
7348 return (RSMERR_BAD_ADDR);
7349 }
7350
7351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n"));
7352
7353 return (DDI_SUCCESS);
7354 }
7355
7356
7357 static int
sgio_copyin(caddr_t arg,rsmka_scat_gath_t * sg_io,int mode)7358 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7359 {
7360 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7361
7362 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n"));
7363
7364 #ifdef _MULTI_DATAMODEL
7365 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7366 rsmka_scat_gath32_t sg_io32;
7367
7368 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32),
7369 mode)) {
7370 DBG_PRINTF((category, RSM_DEBUG,
7371 "sgio_copyin done: returning EFAULT\n"));
7372 return (RSMERR_BAD_ADDR);
7373 }
7374 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid;
7375 sg_io->io_request_count = (size_t)sg_io32.io_request_count;
7376 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count;
7377 sg_io->flags = (size_t)sg_io32.flags;
7378 sg_io->remote_handle = (rsm_memseg_import_handle_t)
7379 (uintptr_t)sg_io32.remote_handle;
7380 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec;
7381 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7382 "sgio_copyin done\n"));
7383 return (DDI_SUCCESS);
7384 }
7385 #endif
7386 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t),
7387 mode)) {
7388 DBG_PRINTF((category, RSM_DEBUG,
7389 "sgio_copyin done: returning EFAULT\n"));
7390 return (RSMERR_BAD_ADDR);
7391 }
7392 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n"));
7393 return (DDI_SUCCESS);
7394 }
7395
7396 static int
sgio_resid_copyout(caddr_t arg,rsmka_scat_gath_t * sg_io,int mode)7397 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode)
7398 {
7399 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7400
7401 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7402 "sgio_resid_copyout enter\n"));
7403
7404 #ifdef _MULTI_DATAMODEL
7405 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7406 rsmka_scat_gath32_t sg_io32;
7407
7408 sg_io32.io_residual_count = sg_io->io_residual_count;
7409 sg_io32.flags = sg_io->flags;
7410
7411 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count,
7412 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count,
7413 sizeof (uint32_t), mode)) {
7414
7415 DBG_PRINTF((category, RSM_ERR,
7416 "sgio_resid_copyout error: rescnt\n"));
7417 return (RSMERR_BAD_ADDR);
7418 }
7419
7420 if (ddi_copyout((caddr_t)&sg_io32.flags,
7421 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags,
7422 sizeof (uint32_t), mode)) {
7423
7424 DBG_PRINTF((category, RSM_ERR,
7425 "sgio_resid_copyout error: flags\n"));
7426 return (RSMERR_BAD_ADDR);
7427 }
7428 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7429 "sgio_resid_copyout done\n"));
7430 return (DDI_SUCCESS);
7431 }
7432 #endif
7433 if (ddi_copyout((caddr_t)&sg_io->io_residual_count,
7434 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count,
7435 sizeof (ulong_t), mode)) {
7436
7437 DBG_PRINTF((category, RSM_ERR,
7438 "sgio_resid_copyout error:rescnt\n"));
7439 return (RSMERR_BAD_ADDR);
7440 }
7441
7442 if (ddi_copyout((caddr_t)&sg_io->flags,
7443 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags,
7444 sizeof (uint_t), mode)) {
7445
7446 DBG_PRINTF((category, RSM_ERR,
7447 "sgio_resid_copyout error:flags\n"));
7448 return (RSMERR_BAD_ADDR);
7449 }
7450
7451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n"));
7452 return (DDI_SUCCESS);
7453 }
7454
7455
7456 static int
rsm_iovec_ioctl(dev_t dev,caddr_t arg,int cmd,int mode,cred_t * credp)7457 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp)
7458 {
7459 rsmka_scat_gath_t sg_io;
7460 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN];
7461 rsmka_iovec_t *ka_iovec;
7462 rsmka_iovec_t *ka_iovec_start;
7463 rsmpi_scat_gath_t rsmpi_sg_io;
7464 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN];
7465 rsmpi_iovec_t *iovec;
7466 rsmpi_iovec_t *iovec_start = NULL;
7467 rsmapi_access_entry_t *acl;
7468 rsmresource_t *res;
7469 minor_t rnum;
7470 rsmseg_t *im_seg, *ex_seg;
7471 int e;
7472 int error = 0;
7473 uint_t i;
7474 uint_t iov_proc = 0; /* num of iovecs processed */
7475 size_t size = 0;
7476 size_t ka_size;
7477
7478 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL);
7479
7480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n"));
7481
7482 credp = credp;
7483
7484 /*
7485 * Copyin the scatter/gather structure and build new structure
7486 * for rsmpi.
7487 */
7488 e = sgio_copyin(arg, &sg_io, mode);
7489 if (e != DDI_SUCCESS) {
7490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7491 "rsm_iovec_ioctl done: sgio_copyin %d\n", e));
7492 return (e);
7493 }
7494
7495 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) {
7496 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7497 "rsm_iovec_ioctl done: request_count(%d) too large\n",
7498 sg_io.io_request_count));
7499 return (RSMERR_BAD_SGIO);
7500 }
7501
7502 rsmpi_sg_io.io_request_count = sg_io.io_request_count;
7503 rsmpi_sg_io.io_residual_count = sg_io.io_request_count;
7504 rsmpi_sg_io.io_segflg = 0;
7505
7506 /* Allocate memory and copyin io vector array */
7507 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7508 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t);
7509 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP);
7510 } else {
7511 ka_iovec_start = ka_iovec = ka_iovec_arr;
7512 }
7513 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec,
7514 sg_io.io_request_count, mode);
7515 if (e != DDI_SUCCESS) {
7516 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7517 kmem_free(ka_iovec, ka_size);
7518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7519 "rsm_iovec_ioctl done: iovec_copyin %d\n", e));
7520 return (e);
7521 }
7522
7523 /* get the import segment descriptor */
7524 rnum = getminor(dev);
7525 res = rsmresource_lookup(rnum, RSM_LOCK);
7526
7527 /*
7528 * The following sequence of locking may (or MAY NOT) cause a
7529 * deadlock but this is currently not addressed here since the
7530 * implementation will be changed to incorporate the use of
7531 * reference counting for both the import and the export segments.
7532 */
7533
7534 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */
7535
7536 im_seg = (rsmseg_t *)res;
7537
7538 if (im_seg == NULL) {
7539 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7540 kmem_free(ka_iovec, ka_size);
7541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7542 "rsm_iovec_ioctl done: rsmresource_lookup failed\n"));
7543 return (EINVAL);
7544 }
7545 /* putv/getv supported is supported only on import segments */
7546 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) {
7547 rsmseglock_release(im_seg);
7548 if (sg_io.io_request_count > RSM_MAX_IOVLEN)
7549 kmem_free(ka_iovec, ka_size);
7550 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7551 "rsm_iovec_ioctl done: not an import segment\n"));
7552 return (EINVAL);
7553 }
7554
7555 /*
7556 * wait for a remote DR to complete ie. for segments to get UNQUIESCED
7557 * as well as wait for a local DR to complete.
7558 */
7559 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) ||
7560 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) ||
7561 (im_seg->s_flags & RSM_DR_INPROGRESS)) {
7562 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) {
7563 DBG_PRINTF((category, RSM_DEBUG,
7564 "rsm_iovec_ioctl done: cv_wait INTR"));
7565 rsmseglock_release(im_seg);
7566 return (RSMERR_INTERRUPTED);
7567 }
7568 }
7569
7570 if ((im_seg->s_state != RSM_STATE_CONNECT) &&
7571 (im_seg->s_state != RSM_STATE_ACTIVE)) {
7572
7573 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT ||
7574 im_seg->s_state == RSM_STATE_NEW);
7575
7576 DBG_PRINTF((category, RSM_DEBUG,
7577 "rsm_iovec_ioctl done: im_seg not conn/map"));
7578 rsmseglock_release(im_seg);
7579 e = RSMERR_BAD_SGIO;
7580 goto out;
7581 }
7582
7583 im_seg->s_rdmacnt++;
7584 rsmseglock_release(im_seg);
7585
7586 /*
7587 * Allocate and set up the io vector for rsmpi
7588 */
7589 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7590 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t);
7591 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP);
7592 } else {
7593 iovec_start = iovec = iovec_arr;
7594 }
7595
7596 rsmpi_sg_io.iovec = iovec;
7597 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) {
7598 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7599 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7600
7601 if (ex_seg == NULL) {
7602 e = RSMERR_BAD_SGIO;
7603 break;
7604 }
7605 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7606
7607 acl = ex_seg->s_acl;
7608 if (acl[0].ae_permission == 0) {
7609 struct buf *xbuf;
7610 dev_t sdev = 0;
7611
7612 xbuf = ddi_umem_iosetup(ex_seg->s_cookie,
7613 0, ex_seg->s_len, B_WRITE,
7614 sdev, 0, NULL, DDI_UMEM_SLEEP);
7615
7616 ASSERT(xbuf != NULL);
7617
7618 iovec->local_mem.ms_type = RSM_MEM_BUF;
7619 iovec->local_mem.ms_memory.bp = xbuf;
7620 } else {
7621 iovec->local_mem.ms_type = RSM_MEM_HANDLE;
7622 iovec->local_mem.ms_memory.handle =
7623 ex_seg->s_handle.out;
7624 }
7625 ex_seg->s_rdmacnt++; /* refcnt the handle */
7626 rsmseglock_release(ex_seg);
7627 } else {
7628 iovec->local_mem.ms_type = RSM_MEM_VADDR;
7629 iovec->local_mem.ms_memory.vr.vaddr =
7630 ka_iovec->local.vaddr;
7631 }
7632
7633 iovec->local_offset = ka_iovec->local_offset;
7634 iovec->remote_handle = im_seg->s_handle.in;
7635 iovec->remote_offset = ka_iovec->remote_offset;
7636 iovec->transfer_length = ka_iovec->transfer_len;
7637 iovec++;
7638 ka_iovec++;
7639 }
7640
7641 if (iov_proc < sg_io.io_request_count) {
7642 /* error while processing handle */
7643 rsmseglock_acquire(im_seg);
7644 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */
7645 if (im_seg->s_rdmacnt == 0) {
7646 cv_broadcast(&im_seg->s_cv);
7647 }
7648 rsmseglock_release(im_seg);
7649 goto out;
7650 }
7651
7652 /* call rsmpi */
7653 if (cmd == RSM_IOCTL_PUTV)
7654 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv(
7655 im_seg->s_adapter->rsmpi_handle,
7656 &rsmpi_sg_io);
7657 else if (cmd == RSM_IOCTL_GETV)
7658 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv(
7659 im_seg->s_adapter->rsmpi_handle,
7660 &rsmpi_sg_io);
7661 else {
7662 e = EINVAL;
7663 DBG_PRINTF((category, RSM_DEBUG,
7664 "iovec_ioctl: bad command = %x\n", cmd));
7665 }
7666
7667
7668 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7669 "rsm_iovec_ioctl RSMPI oper done %d\n", e));
7670
7671 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count;
7672
7673 /*
7674 * Check for implicit signal post flag and do the signal
7675 * post if needed
7676 */
7677 if (sg_io.flags & RSM_IMPLICIT_SIGPOST &&
7678 e == RSM_SUCCESS) {
7679 rsmipc_request_t request;
7680
7681 request.rsmipc_key = im_seg->s_segid;
7682 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL;
7683 request.rsmipc_segment_cookie = NULL;
7684 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY);
7685 /*
7686 * Reset the implicit signal post flag to 0 to indicate
7687 * that the signal post has been done and need not be
7688 * done in the RSMAPI library
7689 */
7690 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST;
7691 }
7692
7693 rsmseglock_acquire(im_seg);
7694 im_seg->s_rdmacnt--;
7695 if (im_seg->s_rdmacnt == 0) {
7696 cv_broadcast(&im_seg->s_cv);
7697 }
7698 rsmseglock_release(im_seg);
7699 error = sgio_resid_copyout(arg, &sg_io, mode);
7700 out:
7701 iovec = iovec_start;
7702 ka_iovec = ka_iovec_start;
7703 for (i = 0; i < iov_proc; i++) {
7704 if (ka_iovec->io_type == RSM_HANDLE_TYPE) {
7705 ex_seg = rsmexport_lookup(ka_iovec->local.segid);
7706
7707 ASSERT(ex_seg != NULL);
7708 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT);
7709
7710 ex_seg->s_rdmacnt--; /* unrefcnt the handle */
7711 if (ex_seg->s_rdmacnt == 0) {
7712 cv_broadcast(&ex_seg->s_cv);
7713 }
7714 rsmseglock_release(ex_seg);
7715 }
7716
7717 ASSERT(iovec != NULL); /* true if iov_proc > 0 */
7718
7719 /*
7720 * At present there is no dependency on the existence of xbufs
7721 * created by ddi_umem_iosetup for each of the iovecs. So we
7722 * can these xbufs here.
7723 */
7724 if (iovec->local_mem.ms_type == RSM_MEM_BUF) {
7725 freerbuf(iovec->local_mem.ms_memory.bp);
7726 }
7727
7728 iovec++;
7729 ka_iovec++;
7730 }
7731
7732 if (sg_io.io_request_count > RSM_MAX_IOVLEN) {
7733 if (iovec_start)
7734 kmem_free(iovec_start, size);
7735 kmem_free(ka_iovec_start, ka_size);
7736 }
7737
7738 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7739 "rsm_iovec_ioctl done %d\n", e));
7740 /* if RSMPI call fails return that else return copyout's retval */
7741 return ((e != RSM_SUCCESS) ? e : error);
7742
7743 }
7744
7745
7746 static int
rsmaddr_ioctl(int cmd,rsm_ioctlmsg_t * msg,int mode)7747 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode)
7748 {
7749 adapter_t *adapter;
7750 rsm_addr_t addr;
7751 rsm_node_id_t node;
7752 int rval = DDI_SUCCESS;
7753 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7754
7755 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n"));
7756
7757 adapter = rsm_getadapter(msg, mode);
7758 if (adapter == NULL) {
7759 DBG_PRINTF((category, RSM_DEBUG,
7760 "rsmaddr_ioctl done: adapter not found\n"));
7761 return (RSMERR_CTLR_NOT_PRESENT);
7762 }
7763
7764 switch (cmd) {
7765 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */
7766 /* returns the hwaddr in msg->hwaddr */
7767 if (msg->nodeid == my_nodeid) {
7768 msg->hwaddr = adapter->hwaddr;
7769 } else {
7770 addr = get_remote_hwaddr(adapter, msg->nodeid);
7771 if ((int64_t)addr < 0) {
7772 rval = RSMERR_INTERNAL_ERROR;
7773 } else {
7774 msg->hwaddr = addr;
7775 }
7776 }
7777 break;
7778 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */
7779 /* returns the nodeid in msg->nodeid */
7780 if (msg->hwaddr == adapter->hwaddr) {
7781 msg->nodeid = my_nodeid;
7782 } else {
7783 node = get_remote_nodeid(adapter, msg->hwaddr);
7784 if ((int)node < 0) {
7785 rval = RSMERR_INTERNAL_ERROR;
7786 } else {
7787 msg->nodeid = (rsm_node_id_t)node;
7788 }
7789 }
7790 break;
7791 default:
7792 rval = EINVAL;
7793 break;
7794 }
7795
7796 rsmka_release_adapter(adapter);
7797 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7798 "rsmaddr_ioctl done: %d\n", rval));
7799 return (rval);
7800 }
7801
7802 static int
rsm_ddi_copyin(caddr_t arg,rsm_ioctlmsg_t * msg,int mode)7803 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode)
7804 {
7805 DBG_DEFINE(category,
7806 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7807
7808 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n"));
7809
7810 #ifdef _MULTI_DATAMODEL
7811
7812 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7813 rsm_ioctlmsg32_t msg32;
7814 int i;
7815
7816 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) {
7817 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7818 "rsm_ddi_copyin done: EFAULT\n"));
7819 return (RSMERR_BAD_ADDR);
7820 }
7821 msg->len = msg32.len;
7822 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr;
7823 msg->arg = (caddr_t)(uintptr_t)msg32.arg;
7824 msg->key = msg32.key;
7825 msg->acl_len = msg32.acl_len;
7826 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl;
7827 msg->cnum = msg32.cnum;
7828 msg->cname = (caddr_t)(uintptr_t)msg32.cname;
7829 msg->cname_len = msg32.cname_len;
7830 msg->nodeid = msg32.nodeid;
7831 msg->hwaddr = msg32.hwaddr;
7832 msg->perm = msg32.perm;
7833 for (i = 0; i < 4; i++) {
7834 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64;
7835 }
7836 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7837 "rsm_ddi_copyin done\n"));
7838 return (RSM_SUCCESS);
7839 }
7840 #endif
7841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n"));
7842 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode))
7843 return (RSMERR_BAD_ADDR);
7844 else
7845 return (RSM_SUCCESS);
7846 }
7847
7848 static int
rsmattr_ddi_copyout(adapter_t * adapter,caddr_t arg,int mode)7849 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode)
7850 {
7851 rsmka_int_controller_attr_t rsm_cattr;
7852 DBG_DEFINE(category,
7853 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI);
7854
7855 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7856 "rsmattr_ddi_copyout enter\n"));
7857 /*
7858 * need to copy appropriate data from rsm_controller_attr_t
7859 * to rsmka_int_controller_attr_t
7860 */
7861 #ifdef _MULTI_DATAMODEL
7862 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
7863 rsmka_int_controller_attr32_t rsm_cattr32;
7864
7865 rsm_cattr32.attr_direct_access_sizes =
7866 adapter->rsm_attr.attr_direct_access_sizes;
7867 rsm_cattr32.attr_atomic_sizes =
7868 adapter->rsm_attr.attr_atomic_sizes;
7869 rsm_cattr32.attr_page_size =
7870 adapter->rsm_attr.attr_page_size;
7871 if (adapter->rsm_attr.attr_max_export_segment_size >
7872 UINT_MAX)
7873 rsm_cattr32.attr_max_export_segment_size =
7874 RSM_MAXSZ_PAGE_ALIGNED;
7875 else
7876 rsm_cattr32.attr_max_export_segment_size =
7877 adapter->rsm_attr.attr_max_export_segment_size;
7878 if (adapter->rsm_attr.attr_tot_export_segment_size >
7879 UINT_MAX)
7880 rsm_cattr32.attr_tot_export_segment_size =
7881 RSM_MAXSZ_PAGE_ALIGNED;
7882 else
7883 rsm_cattr32.attr_tot_export_segment_size =
7884 adapter->rsm_attr.attr_tot_export_segment_size;
7885 if (adapter->rsm_attr.attr_max_export_segments >
7886 UINT_MAX)
7887 rsm_cattr32.attr_max_export_segments =
7888 UINT_MAX;
7889 else
7890 rsm_cattr32.attr_max_export_segments =
7891 adapter->rsm_attr.attr_max_export_segments;
7892 if (adapter->rsm_attr.attr_max_import_map_size >
7893 UINT_MAX)
7894 rsm_cattr32.attr_max_import_map_size =
7895 RSM_MAXSZ_PAGE_ALIGNED;
7896 else
7897 rsm_cattr32.attr_max_import_map_size =
7898 adapter->rsm_attr.attr_max_import_map_size;
7899 if (adapter->rsm_attr.attr_tot_import_map_size >
7900 UINT_MAX)
7901 rsm_cattr32.attr_tot_import_map_size =
7902 RSM_MAXSZ_PAGE_ALIGNED;
7903 else
7904 rsm_cattr32.attr_tot_import_map_size =
7905 adapter->rsm_attr.attr_tot_import_map_size;
7906 if (adapter->rsm_attr.attr_max_import_segments >
7907 UINT_MAX)
7908 rsm_cattr32.attr_max_import_segments =
7909 UINT_MAX;
7910 else
7911 rsm_cattr32.attr_max_import_segments =
7912 adapter->rsm_attr.attr_max_import_segments;
7913 rsm_cattr32.attr_controller_addr =
7914 adapter->rsm_attr.attr_controller_addr;
7915
7916 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7917 "rsmattr_ddi_copyout done\n"));
7918 if (ddi_copyout((caddr_t)&rsm_cattr32, arg,
7919 sizeof (rsmka_int_controller_attr32_t), mode)) {
7920 return (RSMERR_BAD_ADDR);
7921 }
7922 else
7923 return (RSM_SUCCESS);
7924 }
7925 #endif
7926 rsm_cattr.attr_direct_access_sizes =
7927 adapter->rsm_attr.attr_direct_access_sizes;
7928 rsm_cattr.attr_atomic_sizes =
7929 adapter->rsm_attr.attr_atomic_sizes;
7930 rsm_cattr.attr_page_size =
7931 adapter->rsm_attr.attr_page_size;
7932 rsm_cattr.attr_max_export_segment_size =
7933 adapter->rsm_attr.attr_max_export_segment_size;
7934 rsm_cattr.attr_tot_export_segment_size =
7935 adapter->rsm_attr.attr_tot_export_segment_size;
7936 rsm_cattr.attr_max_export_segments =
7937 adapter->rsm_attr.attr_max_export_segments;
7938 rsm_cattr.attr_max_import_map_size =
7939 adapter->rsm_attr.attr_max_import_map_size;
7940 rsm_cattr.attr_tot_import_map_size =
7941 adapter->rsm_attr.attr_tot_import_map_size;
7942 rsm_cattr.attr_max_import_segments =
7943 adapter->rsm_attr.attr_max_import_segments;
7944 rsm_cattr.attr_controller_addr =
7945 adapter->rsm_attr.attr_controller_addr;
7946 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7947 "rsmattr_ddi_copyout done\n"));
7948 if (ddi_copyout((caddr_t)&rsm_cattr, arg,
7949 sizeof (rsmka_int_controller_attr_t), mode)) {
7950 return (RSMERR_BAD_ADDR);
7951 }
7952 else
7953 return (RSM_SUCCESS);
7954 }
7955
7956 /*ARGSUSED*/
7957 static int
rsm_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)7958 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
7959 int *rvalp)
7960 {
7961 rsmseg_t *seg;
7962 rsmresource_t *res;
7963 minor_t rnum;
7964 rsm_ioctlmsg_t msg = {0};
7965 int error;
7966 adapter_t *adapter;
7967 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL);
7968
7969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n"));
7970
7971 if (cmd == RSM_IOCTL_CONSUMEEVENT) {
7972 error = rsm_consumeevent_ioctl((caddr_t)arg, mode);
7973 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7974 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error));
7975 return (error);
7976 }
7977
7978 /* topology cmd does not use the arg common to other cmds */
7979 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) {
7980 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode);
7981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7982 "rsm_ioctl done: %d\n", error));
7983 return (error);
7984 }
7985
7986 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) {
7987 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp);
7988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7989 "rsm_ioctl done: %d\n", error));
7990 return (error);
7991 }
7992
7993 /*
7994 * try to load arguments
7995 */
7996 if (cmd != RSM_IOCTL_RING_BELL &&
7997 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) {
7998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
7999 "rsm_ioctl done: EFAULT\n"));
8000 return (RSMERR_BAD_ADDR);
8001 }
8002
8003 if (cmd == RSM_IOCTL_ATTR) {
8004 adapter = rsm_getadapter(&msg, mode);
8005 if (adapter == NULL) {
8006 DBG_PRINTF((category, RSM_DEBUG,
8007 "rsm_ioctl done: ENODEV\n"));
8008 return (RSMERR_CTLR_NOT_PRESENT);
8009 }
8010 error = rsmattr_ddi_copyout(adapter, msg.arg, mode);
8011 rsmka_release_adapter(adapter);
8012 DBG_PRINTF((category, RSM_DEBUG,
8013 "rsm_ioctl:after copyout %d\n", error));
8014 return (error);
8015 }
8016
8017 if (cmd == RSM_IOCTL_BAR_INFO) {
8018 /* Return library off,len of barrier page */
8019 msg.off = barrier_offset;
8020 msg.len = (int)barrier_size;
8021 #ifdef _MULTI_DATAMODEL
8022 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8023 rsm_ioctlmsg32_t msg32;
8024
8025 if (msg.len > UINT_MAX)
8026 msg.len = RSM_MAXSZ_PAGE_ALIGNED;
8027 else
8028 msg32.len = (int32_t)msg.len;
8029 msg32.off = (int32_t)msg.off;
8030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8031 "rsm_ioctl done\n"));
8032 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8033 sizeof (msg32), mode))
8034 return (RSMERR_BAD_ADDR);
8035 else
8036 return (RSM_SUCCESS);
8037 }
8038 #endif
8039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8040 "rsm_ioctl done\n"));
8041 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8042 sizeof (msg), mode))
8043 return (RSMERR_BAD_ADDR);
8044 else
8045 return (RSM_SUCCESS);
8046 }
8047
8048 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) {
8049 /* map the nodeid or hwaddr */
8050 error = rsmaddr_ioctl(cmd, &msg, mode);
8051 if (error == RSM_SUCCESS) {
8052 #ifdef _MULTI_DATAMODEL
8053 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
8054 rsm_ioctlmsg32_t msg32;
8055
8056 msg32.hwaddr = (uint64_t)msg.hwaddr;
8057 msg32.nodeid = (uint32_t)msg.nodeid;
8058
8059 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8060 "rsm_ioctl done\n"));
8061 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg,
8062 sizeof (msg32), mode))
8063 return (RSMERR_BAD_ADDR);
8064 else
8065 return (RSM_SUCCESS);
8066 }
8067 #endif
8068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8069 "rsm_ioctl done\n"));
8070 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg,
8071 sizeof (msg), mode))
8072 return (RSMERR_BAD_ADDR);
8073 else
8074 return (RSM_SUCCESS);
8075 }
8076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8077 "rsm_ioctl done: %d\n", error));
8078 return (error);
8079 }
8080
8081 /* Find resource and look it in read mode */
8082 rnum = getminor(dev);
8083 res = rsmresource_lookup(rnum, RSM_NOLOCK);
8084 ASSERT(res != NULL);
8085
8086 /*
8087 * Find command group
8088 */
8089 switch (RSM_IOCTL_CMDGRP(cmd)) {
8090 case RSM_IOCTL_EXPORT_SEG:
8091 /*
8092 * Export list is searched during publish, loopback and
8093 * remote lookup call.
8094 */
8095 seg = rsmresource_seg(res, rnum, credp,
8096 RSM_RESOURCE_EXPORT_SEGMENT);
8097 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) {
8098 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode,
8099 credp);
8100 } else { /* export ioctl on an import/barrier resource */
8101 error = RSMERR_BAD_SEG_HNDL;
8102 }
8103 break;
8104 case RSM_IOCTL_IMPORT_SEG:
8105 /* Import list is searched during remote unmap call. */
8106 seg = rsmresource_seg(res, rnum, credp,
8107 RSM_RESOURCE_IMPORT_SEGMENT);
8108 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8109 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode,
8110 credp);
8111 } else { /* import ioctl on an export/barrier resource */
8112 error = RSMERR_BAD_SEG_HNDL;
8113 }
8114 break;
8115 case RSM_IOCTL_BAR:
8116 if (res != RSMRC_RESERVED &&
8117 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) {
8118 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg,
8119 mode);
8120 } else { /* invalid res value */
8121 error = RSMERR_BAD_SEG_HNDL;
8122 }
8123 break;
8124 case RSM_IOCTL_BELL:
8125 if (res != RSMRC_RESERVED) {
8126 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT)
8127 error = exportbell_ioctl((rsmseg_t *)res, cmd);
8128 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)
8129 error = importbell_ioctl((rsmseg_t *)res, cmd);
8130 else /* RSM_RESOURCE_BAR */
8131 error = RSMERR_BAD_SEG_HNDL;
8132 } else { /* invalid res value */
8133 error = RSMERR_BAD_SEG_HNDL;
8134 }
8135 break;
8136 default:
8137 error = EINVAL;
8138 }
8139
8140 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n",
8141 error));
8142 return (error);
8143 }
8144
8145
8146 /* **************************** Segment Mapping Operations ********* */
8147 static rsm_mapinfo_t *
rsm_get_mapinfo(rsmseg_t * seg,off_t off,size_t len,off_t * dev_offset,size_t * map_len)8148 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset,
8149 size_t *map_len)
8150 {
8151 rsm_mapinfo_t *p;
8152 /*
8153 * Find the correct mapinfo structure to use during the mapping
8154 * from the seg->s_mapinfo list.
8155 * The seg->s_mapinfo list contains in reverse order the mappings
8156 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to
8157 * access the correct entry within this list for the mapping
8158 * requested.
8159 *
8160 * The algorithm for selecting a list entry is as follows:
8161 *
8162 * When start_offset of an entry <= off we have found the entry
8163 * we were looking for. Adjust the dev_offset and map_len (needs
8164 * to be PAGESIZE aligned).
8165 */
8166 p = seg->s_mapinfo;
8167 for (; p; p = p->next) {
8168 if (p->start_offset <= off) {
8169 *dev_offset = p->dev_offset + off - p->start_offset;
8170 *map_len = (len > p->individual_len) ?
8171 p->individual_len : ptob(btopr(len));
8172 return (p);
8173 }
8174 p = p->next;
8175 }
8176
8177 return (NULL);
8178 }
8179
8180 static void
rsm_free_mapinfo(rsm_mapinfo_t * mapinfo)8181 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo)
8182 {
8183 rsm_mapinfo_t *p;
8184
8185 while (mapinfo != NULL) {
8186 p = mapinfo;
8187 mapinfo = mapinfo->next;
8188 kmem_free(p, sizeof (*p));
8189 }
8190 }
8191
8192 static int
rsmmap_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)8193 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
8194 size_t len, void **pvtp)
8195 {
8196 rsmcookie_t *p;
8197 rsmresource_t *res;
8198 rsmseg_t *seg;
8199 minor_t rnum;
8200 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8201
8202 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n"));
8203
8204 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8205 "rsmmap_map: dhp = %x\n", dhp));
8206
8207 flags = flags;
8208
8209 rnum = getminor(dev);
8210 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8211 ASSERT(res != NULL);
8212
8213 seg = (rsmseg_t *)res;
8214
8215 rsmseglock_acquire(seg);
8216
8217 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8218
8219 /*
8220 * Allocate structure and add cookie to segment list
8221 */
8222 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8223
8224 p->c_dhp = dhp;
8225 p->c_off = off;
8226 p->c_len = len;
8227 p->c_next = seg->s_ckl;
8228 seg->s_ckl = p;
8229
8230 *pvtp = (void *)seg;
8231
8232 rsmseglock_release(seg);
8233
8234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n"));
8235 return (DDI_SUCCESS);
8236 }
8237
8238 /*
8239 * Page fault handling is done here. The prerequisite mapping setup
8240 * has been done in rsm_devmap with calls to ddi_devmem_setup or
8241 * ddi_umem_setup
8242 */
8243 static int
rsmmap_access(devmap_cookie_t dhp,void * pvt,offset_t offset,size_t len,uint_t type,uint_t rw)8244 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len,
8245 uint_t type, uint_t rw)
8246 {
8247 int e;
8248 rsmseg_t *seg = (rsmseg_t *)pvt;
8249 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8250
8251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n"));
8252
8253 rsmseglock_acquire(seg);
8254
8255 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8256
8257 while (seg->s_state == RSM_STATE_MAP_QUIESCE) {
8258 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8259 DBG_PRINTF((category, RSM_DEBUG,
8260 "rsmmap_access done: cv_wait INTR"));
8261 rsmseglock_release(seg);
8262 return (RSMERR_INTERRUPTED);
8263 }
8264 }
8265
8266 ASSERT(seg->s_state == RSM_STATE_DISCONNECT ||
8267 seg->s_state == RSM_STATE_ACTIVE);
8268
8269 if (seg->s_state == RSM_STATE_DISCONNECT)
8270 seg->s_flags |= RSM_IMPORT_DUMMY;
8271
8272 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8273 "rsmmap_access: dhp = %x\n", dhp));
8274
8275 rsmseglock_release(seg);
8276
8277 if (e = devmap_load(dhp, offset, len, type, rw)) {
8278 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n"));
8279 }
8280
8281
8282 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n"));
8283
8284 return (e);
8285 }
8286
8287 static int
rsmmap_dup(devmap_cookie_t dhp,void * oldpvt,devmap_cookie_t new_dhp,void ** newpvt)8288 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
8289 void **newpvt)
8290 {
8291 rsmseg_t *seg = (rsmseg_t *)oldpvt;
8292 rsmcookie_t *p, *old;
8293 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8294
8295 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n"));
8296
8297 /*
8298 * Same as map, create an entry to hold cookie and add it to
8299 * connect segment list. The oldpvt is a pointer to segment.
8300 * Return segment pointer in newpvt.
8301 */
8302 rsmseglock_acquire(seg);
8303
8304 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8305
8306 /*
8307 * Find old cookie
8308 */
8309 for (old = seg->s_ckl; old != NULL; old = old->c_next) {
8310 if (old->c_dhp == dhp) {
8311 break;
8312 }
8313 }
8314 if (old == NULL) {
8315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8316 "rsmmap_dup done: EINVAL\n"));
8317 rsmseglock_release(seg);
8318 return (EINVAL);
8319 }
8320
8321 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8322
8323 p->c_dhp = new_dhp;
8324 p->c_off = old->c_off;
8325 p->c_len = old->c_len;
8326 p->c_next = seg->s_ckl;
8327 seg->s_ckl = p;
8328
8329 *newpvt = (void *)seg;
8330
8331 rsmseglock_release(seg);
8332
8333 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n"));
8334
8335 return (DDI_SUCCESS);
8336 }
8337
8338 static void
rsmmap_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)8339 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
8340 devmap_cookie_t new_dhp1, void **pvtp1,
8341 devmap_cookie_t new_dhp2, void **pvtp2)
8342 {
8343 /*
8344 * Remove pvtp structure from segment list.
8345 */
8346 rsmseg_t *seg = (rsmseg_t *)pvtp;
8347 int freeflag;
8348
8349 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8350
8351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n"));
8352
8353 off = off; len = len;
8354 pvtp1 = pvtp1; pvtp2 = pvtp2;
8355
8356 rsmseglock_acquire(seg);
8357
8358 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8359
8360 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8361 "rsmmap_unmap: dhp = %x\n", dhp));
8362 /*
8363 * We can go ahead and remove the dhps even if we are in
8364 * the MAPPING state because the dhps being removed here
8365 * belong to a different mmap and we are holding the segment
8366 * lock.
8367 */
8368 if (new_dhp1 == NULL && new_dhp2 == NULL) {
8369 /* find and remove dhp handle */
8370 rsmcookie_t *tmp, **back = &seg->s_ckl;
8371
8372 while (*back != NULL) {
8373 tmp = *back;
8374 if (tmp->c_dhp == dhp) {
8375 *back = tmp->c_next;
8376 kmem_free(tmp, sizeof (*tmp));
8377 break;
8378 }
8379 back = &tmp->c_next;
8380 }
8381 } else {
8382 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8383 "rsmmap_unmap:parital unmap"
8384 "new_dhp1 %lx, new_dhp2 %lx\n",
8385 (size_t)new_dhp1, (size_t)new_dhp2));
8386 }
8387
8388 /*
8389 * rsmmap_unmap is called for each mapping cookie on the list.
8390 * When the list becomes empty and we are not in the MAPPING
8391 * state then unmap in the rsmpi driver.
8392 */
8393 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING))
8394 (void) rsm_unmap(seg);
8395
8396 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) {
8397 freeflag = 1;
8398 } else {
8399 freeflag = 0;
8400 }
8401
8402 rsmseglock_release(seg);
8403
8404 if (freeflag) {
8405 /* Free the segment structure */
8406 rsmseg_free(seg);
8407 }
8408 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n"));
8409
8410 }
8411
8412 static struct devmap_callback_ctl rsmmap_ops = {
8413 DEVMAP_OPS_REV, /* devmap_ops version number */
8414 rsmmap_map, /* devmap_ops map routine */
8415 rsmmap_access, /* devmap_ops access routine */
8416 rsmmap_dup, /* devmap_ops dup routine */
8417 rsmmap_unmap, /* devmap_ops unmap routine */
8418 };
8419
8420 static int
rsm_devmap(dev_t dev,devmap_cookie_t dhc,offset_t off,size_t len,size_t * maplen,uint_t model)8421 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len,
8422 size_t *maplen, uint_t model /*ARGSUSED*/)
8423 {
8424 struct devmap_callback_ctl *callbackops = &rsmmap_ops;
8425 int err;
8426 uint_t maxprot;
8427 minor_t rnum;
8428 rsmseg_t *seg;
8429 off_t dev_offset;
8430 size_t cur_len;
8431 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8432
8433 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n"));
8434
8435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8436 "rsm_devmap: off = %lx, len = %lx\n", off, len));
8437 rnum = getminor(dev);
8438 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK);
8439 ASSERT(seg != NULL);
8440
8441 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8442 if ((off == barrier_offset) &&
8443 (len == barrier_size)) {
8444
8445 ASSERT(bar_va != NULL && bar_cookie != NULL);
8446
8447 /*
8448 * The offset argument in devmap_umem_setup represents
8449 * the offset within the kernel memory defined by the
8450 * cookie. We use this offset as barrier_offset.
8451 */
8452 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie,
8453 barrier_offset, len, PROT_USER|PROT_READ,
8454 DEVMAP_DEFAULTS, 0);
8455
8456 if (err != 0) {
8457 DBG_PRINTF((category, RSM_ERR,
8458 "rsm_devmap done: %d\n", err));
8459 return (RSMERR_MAP_FAILED);
8460 }
8461 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8462 "rsm_devmap done: %d\n", err));
8463
8464 *maplen = barrier_size;
8465
8466 return (err);
8467 } else {
8468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8469 "rsm_devmap done: %d\n", err));
8470 return (RSMERR_MAP_FAILED);
8471 }
8472 }
8473
8474 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT);
8475 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8476
8477 /*
8478 * Make sure we still have permission for the map operation.
8479 */
8480 maxprot = PROT_USER;
8481 if (seg->s_mode & RSM_PERM_READ) {
8482 maxprot |= PROT_READ;
8483 }
8484
8485 if (seg->s_mode & RSM_PERM_WRITE) {
8486 maxprot |= PROT_WRITE;
8487 }
8488
8489 /*
8490 * For each devmap call, rsmmap_map is called. This maintains driver
8491 * private information for the mapping. Thus, if there are multiple
8492 * devmap calls there will be multiple rsmmap_map calls and for each
8493 * call, the mapping information will be stored.
8494 * In case of an error during the processing of the devmap call, error
8495 * will be returned. This error return causes the caller of rsm_devmap
8496 * to undo all the mappings by calling rsmmap_unmap for each one.
8497 * rsmmap_unmap will free up the private information for the requested
8498 * mapping.
8499 */
8500 if (seg->s_node != my_nodeid) {
8501 rsm_mapinfo_t *p;
8502
8503 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len);
8504 if (p == NULL) {
8505 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8506 "rsm_devmap: incorrect mapping info\n"));
8507 return (RSMERR_MAP_FAILED);
8508 }
8509 err = devmap_devmem_setup(dhc, p->dip,
8510 callbackops, p->dev_register,
8511 dev_offset, cur_len, maxprot,
8512 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0);
8513
8514 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8515 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx,"
8516 "off=%lx,len=%lx\n",
8517 p->dip, p->dev_register, dev_offset, off, cur_len));
8518
8519 if (err != 0) {
8520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8521 "rsm_devmap: devmap_devmem_setup failed %d\n",
8522 err));
8523 return (RSMERR_MAP_FAILED);
8524 }
8525 /* cur_len is always an integral multiple pagesize */
8526 ASSERT((cur_len & (PAGESIZE-1)) == 0);
8527 *maplen = cur_len;
8528 return (err);
8529
8530 } else {
8531 err = devmap_umem_setup(dhc, rsm_dip, callbackops,
8532 seg->s_cookie, off, len, maxprot,
8533 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0);
8534 if (err != 0) {
8535 DBG_PRINTF((category, RSM_DEBUG,
8536 "rsm_devmap: devmap_umem_setup failed %d\n",
8537 err));
8538 return (RSMERR_MAP_FAILED);
8539 }
8540 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8541 "rsm_devmap: loopback done\n"));
8542
8543 *maplen = ptob(btopr(len));
8544
8545 return (err);
8546 }
8547 }
8548
8549 /*
8550 * We can use the devmap framework for mapping device memory to user space by
8551 * specifying this routine in the rsm_cb_ops structure. The kernel mmap
8552 * processing calls this entry point and devmap_setup is called within this
8553 * function, which eventually calls rsm_devmap
8554 */
8555 static int
rsm_segmap(dev_t dev,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags,struct cred * cred)8556 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
8557 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
8558 {
8559 int error = 0;
8560 int old_state;
8561 minor_t rnum;
8562 rsmseg_t *seg, *eseg;
8563 adapter_t *adapter;
8564 rsm_import_share_t *sharedp;
8565 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI);
8566
8567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n"));
8568
8569 /*
8570 * find segment
8571 */
8572 rnum = getminor(dev);
8573 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK);
8574
8575 if (seg == NULL) {
8576 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8577 "rsm_segmap done: invalid segment\n"));
8578 return (EINVAL);
8579 }
8580
8581 /*
8582 * the user is trying to map a resource that has not been
8583 * defined yet. The library uses this to map in the
8584 * barrier page.
8585 */
8586 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) {
8587 rsmseglock_release(seg);
8588
8589 /*
8590 * The mapping for the barrier page is identified
8591 * by the special offset barrier_offset
8592 */
8593
8594 if (off == (off_t)barrier_offset ||
8595 len == (off_t)barrier_size) {
8596 if (bar_cookie == NULL || bar_va == NULL) {
8597 DBG_PRINTF((category, RSM_DEBUG,
8598 "rsm_segmap: bar cookie/va is NULL\n"));
8599 return (EINVAL);
8600 }
8601
8602 error = devmap_setup(dev, (offset_t)off, as, addrp,
8603 (size_t)len, prot, maxprot, flags, cred);
8604
8605 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8606 "rsm_segmap done: %d\n", error));
8607 return (error);
8608 } else {
8609 DBG_PRINTF((category, RSM_DEBUG,
8610 "rsm_segmap: bad offset/length\n"));
8611 return (EINVAL);
8612 }
8613 }
8614
8615 /* Make sure you can only map imported segments */
8616 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) {
8617 rsmseglock_release(seg);
8618 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8619 "rsm_segmap done: not an import segment\n"));
8620 return (EINVAL);
8621 }
8622 /* check means library is broken */
8623 ASSERT(seg->s_hdr.rsmrc_num == rnum);
8624
8625 /* wait for the segment to become unquiesced */
8626 while (seg->s_state == RSM_STATE_CONN_QUIESCE) {
8627 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) {
8628 rsmseglock_release(seg);
8629 DBG_PRINTF((category, RSM_DEBUG,
8630 "rsm_segmap done: cv_wait INTR"));
8631 return (ENODEV);
8632 }
8633 }
8634
8635 /* wait until segment leaves the mapping state */
8636 while (seg->s_state == RSM_STATE_MAPPING)
8637 cv_wait(&seg->s_cv, &seg->s_lock);
8638
8639 /*
8640 * we allow multiple maps of the same segment in the KA
8641 * and it works because we do an rsmpi map of the whole
8642 * segment during the first map and all the device mapping
8643 * information needed in rsm_devmap is in the mapinfo list.
8644 */
8645 if ((seg->s_state != RSM_STATE_CONNECT) &&
8646 (seg->s_state != RSM_STATE_ACTIVE)) {
8647 rsmseglock_release(seg);
8648 DBG_PRINTF((category, RSM_DEBUG,
8649 "rsm_segmap done: segment not connected\n"));
8650 return (ENODEV);
8651 }
8652
8653 /*
8654 * Make sure we are not mapping a larger segment than what's
8655 * exported
8656 */
8657 if ((size_t)off + ptob(btopr(len)) > seg->s_len) {
8658 rsmseglock_release(seg);
8659 DBG_PRINTF((category, RSM_DEBUG,
8660 "rsm_segmap done: off+len>seg size\n"));
8661 return (ENXIO);
8662 }
8663
8664 /*
8665 * Make sure we still have permission for the map operation.
8666 */
8667 maxprot = PROT_USER;
8668 if (seg->s_mode & RSM_PERM_READ) {
8669 maxprot |= PROT_READ;
8670 }
8671
8672 if (seg->s_mode & RSM_PERM_WRITE) {
8673 maxprot |= PROT_WRITE;
8674 }
8675
8676 if ((prot & maxprot) != prot) {
8677 /* No permission */
8678 rsmseglock_release(seg);
8679 DBG_PRINTF((category, RSM_DEBUG,
8680 "rsm_segmap done: no permission\n"));
8681 return (EACCES);
8682 }
8683
8684 old_state = seg->s_state;
8685
8686 ASSERT(seg->s_share != NULL);
8687
8688 rsmsharelock_acquire(seg);
8689
8690 sharedp = seg->s_share;
8691
8692 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
8693 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state));
8694
8695 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) &&
8696 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) {
8697 rsmsharelock_release(seg);
8698 rsmseglock_release(seg);
8699 DBG_PRINTF((category, RSM_DEBUG,
8700 "rsm_segmap done:RSMSI_STATE %d invalid\n",
8701 sharedp->rsmsi_state));
8702 return (ENODEV);
8703 }
8704
8705 /*
8706 * Do the map - since we want importers to share mappings
8707 * we do the rsmpi map for the whole segment
8708 */
8709 if (seg->s_node != my_nodeid) {
8710 uint_t dev_register;
8711 off_t dev_offset;
8712 dev_info_t *dip;
8713 size_t tmp_len;
8714 size_t total_length_mapped = 0;
8715 size_t length_to_map = seg->s_len;
8716 off_t tmp_off = 0;
8717 rsm_mapinfo_t *p;
8718
8719 /*
8720 * length_to_map = seg->s_len is always an integral
8721 * multiple of PAGESIZE. Length mapped in each entry in mapinfo
8722 * list is a multiple of PAGESIZE - RSMPI map ensures this
8723 */
8724
8725 adapter = seg->s_adapter;
8726 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8727 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8728
8729 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) {
8730 error = 0;
8731 /* map the whole segment */
8732 while (total_length_mapped < seg->s_len) {
8733 tmp_len = 0;
8734
8735 error = adapter->rsmpi_ops->rsm_map(
8736 seg->s_handle.in, tmp_off,
8737 length_to_map, &tmp_len,
8738 &dip, &dev_register, &dev_offset,
8739 NULL, NULL);
8740
8741 if (error != 0)
8742 break;
8743
8744 /*
8745 * Store the mapping info obtained from rsm_map
8746 */
8747 p = kmem_alloc(sizeof (*p), KM_SLEEP);
8748 p->dev_register = dev_register;
8749 p->dev_offset = dev_offset;
8750 p->dip = dip;
8751 p->individual_len = tmp_len;
8752 p->start_offset = tmp_off;
8753 p->next = sharedp->rsmsi_mapinfo;
8754 sharedp->rsmsi_mapinfo = p;
8755
8756 total_length_mapped += tmp_len;
8757 length_to_map -= tmp_len;
8758 tmp_off += tmp_len;
8759 }
8760 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8761
8762 if (error != RSM_SUCCESS) {
8763 /* Check if this is the the first rsm_map */
8764 if (sharedp->rsmsi_mapinfo != NULL) {
8765 /*
8766 * A single rsm_unmap undoes
8767 * multiple rsm_maps.
8768 */
8769 (void) seg->s_adapter->rsmpi_ops->
8770 rsm_unmap(sharedp->rsmsi_handle);
8771 rsm_free_mapinfo(sharedp->
8772 rsmsi_mapinfo);
8773 }
8774 sharedp->rsmsi_mapinfo = NULL;
8775 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8776 rsmsharelock_release(seg);
8777 rsmseglock_release(seg);
8778 DBG_PRINTF((category, RSM_DEBUG,
8779 "rsm_segmap done: rsmpi map err %d\n",
8780 error));
8781 ASSERT(error != RSMERR_BAD_LENGTH &&
8782 error != RSMERR_BAD_MEM_ALIGNMENT &&
8783 error != RSMERR_BAD_SEG_HNDL);
8784 if (error == RSMERR_UNSUPPORTED_OPERATION)
8785 return (ENOTSUP);
8786 else if (error == RSMERR_INSUFFICIENT_RESOURCES)
8787 return (EAGAIN);
8788 else if (error == RSMERR_CONN_ABORTED)
8789 return (ENODEV);
8790 else
8791 return (error);
8792 } else {
8793 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8794 }
8795 } else {
8796 seg->s_mapinfo = sharedp->rsmsi_mapinfo;
8797 }
8798
8799 sharedp->rsmsi_mapcnt++;
8800
8801 rsmsharelock_release(seg);
8802
8803 /* move to an intermediate mapping state */
8804 seg->s_state = RSM_STATE_MAPPING;
8805 rsmseglock_release(seg);
8806
8807 error = devmap_setup(dev, (offset_t)off, as, addrp,
8808 len, prot, maxprot, flags, cred);
8809
8810 rsmseglock_acquire(seg);
8811 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8812
8813 if (error == DDI_SUCCESS) {
8814 seg->s_state = RSM_STATE_ACTIVE;
8815 } else {
8816 rsmsharelock_acquire(seg);
8817
8818 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8819
8820 sharedp->rsmsi_mapcnt--;
8821 if (sharedp->rsmsi_mapcnt == 0) {
8822 /* unmap the shared RSMPI mapping */
8823 ASSERT(sharedp->rsmsi_handle != NULL);
8824 (void) adapter->rsmpi_ops->
8825 rsm_unmap(sharedp->rsmsi_handle);
8826 rsm_free_mapinfo(sharedp->rsmsi_mapinfo);
8827 sharedp->rsmsi_mapinfo = NULL;
8828 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8829 }
8830
8831 rsmsharelock_release(seg);
8832 seg->s_state = old_state;
8833 DBG_PRINTF((category, RSM_ERR,
8834 "rsm: devmap_setup failed %d\n", error));
8835 }
8836 cv_broadcast(&seg->s_cv);
8837 rsmseglock_release(seg);
8838 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n",
8839 error));
8840 return (error);
8841 } else {
8842 /*
8843 * For loopback, the export segment mapping cookie (s_cookie)
8844 * is also used as the s_cookie value for its import segments
8845 * during mapping.
8846 * Note that reference counting for s_cookie of the export
8847 * segment is not required due to the following:
8848 * We never have a case of the export segment being destroyed,
8849 * leaving the import segments with a stale value for the
8850 * s_cookie field, since a force disconnect is done prior to a
8851 * destroy of an export segment. The force disconnect causes
8852 * the s_cookie value to be reset to NULL. Also for the
8853 * rsm_rebind operation, we change the s_cookie value of the
8854 * export segment as well as of all its local (loopback)
8855 * importers.
8856 */
8857 DBG_ADDCATEGORY(category, RSM_LOOPBACK);
8858
8859 rsmsharelock_release(seg);
8860 /*
8861 * In order to maintain the lock ordering between the export
8862 * and import segment locks, we need to acquire the export
8863 * segment lock first and only then acquire the import
8864 * segment lock.
8865 * The above is necessary to avoid any deadlock scenarios
8866 * with rsm_rebind which also acquires both the export
8867 * and import segment locks in the above mentioned order.
8868 * Based on code inspection, there seem to be no other
8869 * situations in which both the export and import segment
8870 * locks are acquired either in the same or opposite order
8871 * as mentioned above.
8872 * Thus in order to conform to the above lock order, we
8873 * need to change the state of the import segment to
8874 * RSM_STATE_MAPPING, release the lock. Once this is done we
8875 * can now safely acquire the export segment lock first
8876 * followed by the import segment lock which is as per
8877 * the lock order mentioned above.
8878 */
8879 /* move to an intermediate mapping state */
8880 seg->s_state = RSM_STATE_MAPPING;
8881 rsmseglock_release(seg);
8882
8883 eseg = rsmexport_lookup(seg->s_key);
8884
8885 if (eseg == NULL) {
8886 rsmseglock_acquire(seg);
8887 /*
8888 * Revert to old_state and signal any waiters
8889 * The shared state is not changed
8890 */
8891
8892 seg->s_state = old_state;
8893 cv_broadcast(&seg->s_cv);
8894 rsmseglock_release(seg);
8895 DBG_PRINTF((category, RSM_DEBUG,
8896 "rsm_segmap done: key %d not found\n", seg->s_key));
8897 return (ENODEV);
8898 }
8899
8900 rsmsharelock_acquire(seg);
8901 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED ||
8902 sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8903
8904 sharedp->rsmsi_mapcnt++;
8905 sharedp->rsmsi_state = RSMSI_STATE_MAPPED;
8906 rsmsharelock_release(seg);
8907
8908 ASSERT(eseg->s_cookie != NULL);
8909
8910 /*
8911 * It is not required or necessary to acquire the import
8912 * segment lock here to change the value of s_cookie since
8913 * no one will touch the import segment as long as it is
8914 * in the RSM_STATE_MAPPING state.
8915 */
8916 seg->s_cookie = eseg->s_cookie;
8917
8918 rsmseglock_release(eseg);
8919
8920 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len,
8921 prot, maxprot, flags, cred);
8922
8923 rsmseglock_acquire(seg);
8924 ASSERT(seg->s_state == RSM_STATE_MAPPING);
8925 if (error == 0) {
8926 seg->s_state = RSM_STATE_ACTIVE;
8927 } else {
8928 rsmsharelock_acquire(seg);
8929
8930 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED);
8931
8932 sharedp->rsmsi_mapcnt--;
8933 if (sharedp->rsmsi_mapcnt == 0) {
8934 sharedp->rsmsi_mapinfo = NULL;
8935 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED;
8936 }
8937 rsmsharelock_release(seg);
8938 seg->s_state = old_state;
8939 seg->s_cookie = NULL;
8940 }
8941 cv_broadcast(&seg->s_cv);
8942 rsmseglock_release(seg);
8943 DBG_PRINTF((category, RSM_DEBUG_LVL2,
8944 "rsm_segmap done: %d\n", error));
8945 return (error);
8946 }
8947 }
8948
8949 int
rsmka_null_seg_create(rsm_controller_handle_t argcp,rsm_memseg_export_handle_t * handle,size_t size,uint_t flags,rsm_memory_local_t * memory,rsm_resource_callback_t callback,rsm_resource_callback_arg_t callback_arg)8950 rsmka_null_seg_create(
8951 rsm_controller_handle_t argcp,
8952 rsm_memseg_export_handle_t *handle,
8953 size_t size,
8954 uint_t flags,
8955 rsm_memory_local_t *memory,
8956 rsm_resource_callback_t callback,
8957 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8958 {
8959 return (RSM_SUCCESS);
8960 }
8961
8962
8963 int
rsmka_null_seg_destroy(rsm_memseg_export_handle_t argmemseg)8964 rsmka_null_seg_destroy(
8965 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
8966 {
8967 return (RSM_SUCCESS);
8968 }
8969
8970
8971 int
rsmka_null_bind(rsm_memseg_export_handle_t argmemseg,off_t offset,rsm_memory_local_t * argmemory,rsm_resource_callback_t callback,rsm_resource_callback_arg_t callback_arg)8972 rsmka_null_bind(
8973 rsm_memseg_export_handle_t argmemseg,
8974 off_t offset,
8975 rsm_memory_local_t *argmemory,
8976 rsm_resource_callback_t callback,
8977 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8978 {
8979 return (RSM_SUCCESS);
8980 }
8981
8982
8983 int
rsmka_null_unbind(rsm_memseg_export_handle_t argmemseg,off_t offset,size_t length)8984 rsmka_null_unbind(
8985 rsm_memseg_export_handle_t argmemseg,
8986 off_t offset,
8987 size_t length /*ARGSUSED*/)
8988 {
8989 return (DDI_SUCCESS);
8990 }
8991
8992 int
rsmka_null_rebind(rsm_memseg_export_handle_t argmemseg,off_t offset,rsm_memory_local_t * memory,rsm_resource_callback_t callback,rsm_resource_callback_arg_t callback_arg)8993 rsmka_null_rebind(
8994 rsm_memseg_export_handle_t argmemseg,
8995 off_t offset,
8996 rsm_memory_local_t *memory,
8997 rsm_resource_callback_t callback,
8998 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
8999 {
9000 return (RSM_SUCCESS);
9001 }
9002
9003 int
rsmka_null_publish(rsm_memseg_export_handle_t argmemseg,rsm_access_entry_t access_list[],uint_t access_list_length,rsm_memseg_id_t segment_id,rsm_resource_callback_t callback,rsm_resource_callback_arg_t callback_arg)9004 rsmka_null_publish(
9005 rsm_memseg_export_handle_t argmemseg,
9006 rsm_access_entry_t access_list[],
9007 uint_t access_list_length,
9008 rsm_memseg_id_t segment_id,
9009 rsm_resource_callback_t callback,
9010 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9011 {
9012 return (RSM_SUCCESS);
9013 }
9014
9015
9016 int
rsmka_null_republish(rsm_memseg_export_handle_t memseg,rsm_access_entry_t access_list[],uint_t access_list_length,rsm_resource_callback_t callback,rsm_resource_callback_arg_t callback_arg)9017 rsmka_null_republish(
9018 rsm_memseg_export_handle_t memseg,
9019 rsm_access_entry_t access_list[],
9020 uint_t access_list_length,
9021 rsm_resource_callback_t callback,
9022 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/)
9023 {
9024 return (RSM_SUCCESS);
9025 }
9026
9027 int
rsmka_null_unpublish(rsm_memseg_export_handle_t argmemseg)9028 rsmka_null_unpublish(
9029 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/)
9030 {
9031 return (RSM_SUCCESS);
9032 }
9033
9034
9035 void
rsmka_init_loopback()9036 rsmka_init_loopback()
9037 {
9038 rsm_ops_t *ops = &null_rsmpi_ops;
9039 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK);
9040
9041 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9042 "rsmka_init_loopback enter\n"));
9043
9044 /* initialize null ops vector */
9045 ops->rsm_seg_create = rsmka_null_seg_create;
9046 ops->rsm_seg_destroy = rsmka_null_seg_destroy;
9047 ops->rsm_bind = rsmka_null_bind;
9048 ops->rsm_unbind = rsmka_null_unbind;
9049 ops->rsm_rebind = rsmka_null_rebind;
9050 ops->rsm_publish = rsmka_null_publish;
9051 ops->rsm_unpublish = rsmka_null_unpublish;
9052 ops->rsm_republish = rsmka_null_republish;
9053
9054 /* initialize attributes for loopback adapter */
9055 loopback_attr.attr_name = loopback_str;
9056 loopback_attr.attr_page_size = 0x8; /* 8K */
9057
9058 /* initialize loopback adapter */
9059 loopback_adapter.rsm_attr = loopback_attr;
9060 loopback_adapter.rsmpi_ops = &null_rsmpi_ops;
9061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9062 "rsmka_init_loopback done\n"));
9063 }
9064
9065 /* ************** DR functions ********************************** */
9066 static void
rsm_quiesce_exp_seg(rsmresource_t * resp)9067 rsm_quiesce_exp_seg(rsmresource_t *resp)
9068 {
9069 int recheck_state;
9070 rsmseg_t *segp = (rsmseg_t *)resp;
9071 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9072 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9073
9074 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9075 "%s enter: key=%u\n", function, segp->s_key));
9076
9077 rsmseglock_acquire(segp);
9078 do {
9079 recheck_state = 0;
9080 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) ||
9081 (segp->s_state == RSM_STATE_BIND_QUIESCED) ||
9082 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) ||
9083 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) {
9084 rsmseglock_release(segp);
9085 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9086 "%s done:state =%d\n", function,
9087 segp->s_state));
9088 return;
9089 }
9090
9091 if (segp->s_state == RSM_STATE_NEW) {
9092 segp->s_state = RSM_STATE_NEW_QUIESCED;
9093 rsmseglock_release(segp);
9094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9095 "%s done:state =%d\n", function,
9096 segp->s_state));
9097 return;
9098 }
9099
9100 if (segp->s_state == RSM_STATE_BIND) {
9101 /* unbind */
9102 (void) rsm_unbind_pages(segp);
9103 segp->s_state = RSM_STATE_BIND_QUIESCED;
9104 rsmseglock_release(segp);
9105 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9106 "%s done:state =%d\n", function,
9107 segp->s_state));
9108 return;
9109 }
9110
9111 if (segp->s_state == RSM_STATE_EXPORT) {
9112 /*
9113 * wait for putv/getv to complete if the segp is
9114 * a local memory handle
9115 */
9116 while ((segp->s_state == RSM_STATE_EXPORT) &&
9117 (segp->s_rdmacnt != 0)) {
9118 cv_wait(&segp->s_cv, &segp->s_lock);
9119 }
9120
9121 if (segp->s_state != RSM_STATE_EXPORT) {
9122 /*
9123 * state changed need to see what it
9124 * should be changed to.
9125 */
9126 recheck_state = 1;
9127 continue;
9128 }
9129
9130 segp->s_state = RSM_STATE_EXPORT_QUIESCING;
9131 rsmseglock_release(segp);
9132 /*
9133 * send SUSPEND messages - currently it will be
9134 * done at the end
9135 */
9136 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9137 "%s done:state =%d\n", function,
9138 segp->s_state));
9139 return;
9140 }
9141 } while (recheck_state);
9142
9143 rsmseglock_release(segp);
9144 }
9145
9146 static void
rsm_unquiesce_exp_seg(rsmresource_t * resp)9147 rsm_unquiesce_exp_seg(rsmresource_t *resp)
9148 {
9149 int ret;
9150 rsmseg_t *segp = (rsmseg_t *)resp;
9151 rsmapi_access_entry_t *acl;
9152 rsm_access_entry_t *rsmpi_acl;
9153 int acl_len;
9154 int create_flags = 0;
9155 struct buf *xbuf;
9156 rsm_memory_local_t mem;
9157 adapter_t *adapter;
9158 dev_t sdev = 0;
9159 rsm_resource_callback_t callback_flag;
9160 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9161 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg");
9162
9163 rsmseglock_acquire(segp);
9164
9165 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9166 "%s enter: key=%u, state=%d\n", function, segp->s_key,
9167 segp->s_state));
9168
9169 if ((segp->s_state == RSM_STATE_NEW) ||
9170 (segp->s_state == RSM_STATE_BIND) ||
9171 (segp->s_state == RSM_STATE_EXPORT)) {
9172 rsmseglock_release(segp);
9173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9174 function, segp->s_state));
9175 return;
9176 }
9177
9178 if (segp->s_state == RSM_STATE_NEW_QUIESCED) {
9179 segp->s_state = RSM_STATE_NEW;
9180 cv_broadcast(&segp->s_cv);
9181 rsmseglock_release(segp);
9182 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n",
9183 function, segp->s_state));
9184 return;
9185 }
9186
9187 if (segp->s_state == RSM_STATE_BIND_QUIESCED) {
9188 /* bind the segment */
9189 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9190 segp->s_len, segp->s_proc);
9191 if (ret == RSM_SUCCESS) { /* bind successful */
9192 segp->s_state = RSM_STATE_BIND;
9193 } else { /* bind failed - resource unavailable */
9194 segp->s_state = RSM_STATE_NEW;
9195 }
9196 cv_broadcast(&segp->s_cv);
9197 rsmseglock_release(segp);
9198 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9199 "%s done: bind_qscd bind = %d\n", function, ret));
9200 return;
9201 }
9202
9203 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) {
9204 /* wait for the segment to move to EXPORT_QUIESCED state */
9205 cv_wait(&segp->s_cv, &segp->s_lock);
9206 }
9207
9208 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) {
9209 /* bind the segment */
9210 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr,
9211 segp->s_len, segp->s_proc);
9212
9213 if (ret != RSM_SUCCESS) {
9214 /* bind failed - resource unavailable */
9215 acl_len = segp->s_acl_len;
9216 acl = segp->s_acl;
9217 rsmpi_acl = segp->s_acl_in;
9218 segp->s_acl_len = 0;
9219 segp->s_acl = NULL;
9220 segp->s_acl_in = NULL;
9221 rsmseglock_release(segp);
9222
9223 rsmexport_rm(segp);
9224 rsmacl_free(acl, acl_len);
9225 rsmpiacl_free(rsmpi_acl, acl_len);
9226
9227 rsmseglock_acquire(segp);
9228 segp->s_state = RSM_STATE_NEW;
9229 cv_broadcast(&segp->s_cv);
9230 rsmseglock_release(segp);
9231 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9232 "%s done: exp_qscd bind failed = %d\n",
9233 function, ret));
9234 return;
9235 }
9236 /*
9237 * publish the segment
9238 * if successful
9239 * segp->s_state = RSM_STATE_EXPORT;
9240 * else failed
9241 * segp->s_state = RSM_STATE_BIND;
9242 */
9243
9244 /* check whether it is a local_memory_handle */
9245 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) {
9246 if ((segp->s_acl[0].ae_node == my_nodeid) &&
9247 (segp->s_acl[0].ae_permission == 0)) {
9248 segp->s_state = RSM_STATE_EXPORT;
9249 cv_broadcast(&segp->s_cv);
9250 rsmseglock_release(segp);
9251 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9252 "%s done:exp_qscd\n", function));
9253 return;
9254 }
9255 }
9256 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE,
9257 sdev, 0, NULL, DDI_UMEM_SLEEP);
9258 ASSERT(xbuf != NULL);
9259
9260 mem.ms_type = RSM_MEM_BUF;
9261 mem.ms_bp = xbuf;
9262
9263 adapter = segp->s_adapter;
9264
9265 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) {
9266 create_flags = RSM_ALLOW_UNBIND_REBIND;
9267 }
9268
9269 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) {
9270 callback_flag = RSM_RESOURCE_DONTWAIT;
9271 } else {
9272 callback_flag = RSM_RESOURCE_SLEEP;
9273 }
9274
9275 ret = adapter->rsmpi_ops->rsm_seg_create(
9276 adapter->rsmpi_handle, &segp->s_handle.out,
9277 segp->s_len, create_flags, &mem,
9278 callback_flag, NULL);
9279
9280 if (ret != RSM_SUCCESS) {
9281 acl_len = segp->s_acl_len;
9282 acl = segp->s_acl;
9283 rsmpi_acl = segp->s_acl_in;
9284 segp->s_acl_len = 0;
9285 segp->s_acl = NULL;
9286 segp->s_acl_in = NULL;
9287 rsmseglock_release(segp);
9288
9289 rsmexport_rm(segp);
9290 rsmacl_free(acl, acl_len);
9291 rsmpiacl_free(rsmpi_acl, acl_len);
9292
9293 rsmseglock_acquire(segp);
9294 segp->s_state = RSM_STATE_BIND;
9295 cv_broadcast(&segp->s_cv);
9296 rsmseglock_release(segp);
9297 DBG_PRINTF((category, RSM_ERR,
9298 "%s done: exp_qscd create failed = %d\n",
9299 function, ret));
9300 return;
9301 }
9302
9303 ret = adapter->rsmpi_ops->rsm_publish(
9304 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len,
9305 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL);
9306
9307 if (ret != RSM_SUCCESS) {
9308 acl_len = segp->s_acl_len;
9309 acl = segp->s_acl;
9310 rsmpi_acl = segp->s_acl_in;
9311 segp->s_acl_len = 0;
9312 segp->s_acl = NULL;
9313 segp->s_acl_in = NULL;
9314 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out);
9315 rsmseglock_release(segp);
9316
9317 rsmexport_rm(segp);
9318 rsmacl_free(acl, acl_len);
9319 rsmpiacl_free(rsmpi_acl, acl_len);
9320
9321 rsmseglock_acquire(segp);
9322 segp->s_state = RSM_STATE_BIND;
9323 cv_broadcast(&segp->s_cv);
9324 rsmseglock_release(segp);
9325 DBG_PRINTF((category, RSM_ERR,
9326 "%s done: exp_qscd publish failed = %d\n",
9327 function, ret));
9328 return;
9329 }
9330
9331 segp->s_state = RSM_STATE_EXPORT;
9332 cv_broadcast(&segp->s_cv);
9333 rsmseglock_release(segp);
9334 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n",
9335 function));
9336 return;
9337 }
9338
9339 rsmseglock_release(segp);
9340
9341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9342 }
9343
9344 static void
rsm_quiesce_imp_seg(rsmresource_t * resp)9345 rsm_quiesce_imp_seg(rsmresource_t *resp)
9346 {
9347 rsmseg_t *segp = (rsmseg_t *)resp;
9348 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9349 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg");
9350
9351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9352 "%s enter: key=%u\n", function, segp->s_key));
9353
9354 rsmseglock_acquire(segp);
9355 segp->s_flags |= RSM_DR_INPROGRESS;
9356
9357 while (segp->s_rdmacnt != 0) {
9358 /* wait for the RDMA to complete */
9359 cv_wait(&segp->s_cv, &segp->s_lock);
9360 }
9361
9362 rsmseglock_release(segp);
9363
9364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9365
9366 }
9367
9368 static void
rsm_unquiesce_imp_seg(rsmresource_t * resp)9369 rsm_unquiesce_imp_seg(rsmresource_t *resp)
9370 {
9371 rsmseg_t *segp = (rsmseg_t *)resp;
9372 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9373 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg");
9374
9375 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9376 "%s enter: key=%u\n", function, segp->s_key));
9377
9378 rsmseglock_acquire(segp);
9379
9380 segp->s_flags &= ~RSM_DR_INPROGRESS;
9381 /* wake up any waiting putv/getv ops */
9382 cv_broadcast(&segp->s_cv);
9383
9384 rsmseglock_release(segp);
9385
9386 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function));
9387
9388
9389 }
9390
9391 static void
rsm_process_exp_seg(rsmresource_t * resp,int event)9392 rsm_process_exp_seg(rsmresource_t *resp, int event)
9393 {
9394 if (event == RSM_DR_QUIESCE)
9395 rsm_quiesce_exp_seg(resp);
9396 else /* UNQUIESCE */
9397 rsm_unquiesce_exp_seg(resp);
9398 }
9399
9400 static void
rsm_process_imp_seg(rsmresource_t * resp,int event)9401 rsm_process_imp_seg(rsmresource_t *resp, int event)
9402 {
9403 if (event == RSM_DR_QUIESCE)
9404 rsm_quiesce_imp_seg(resp);
9405 else /* UNQUIESCE */
9406 rsm_unquiesce_imp_seg(resp);
9407 }
9408
9409 static void
rsm_dr_process_local_segments(int event)9410 rsm_dr_process_local_segments(int event)
9411 {
9412
9413 int i, j;
9414 rsmresource_blk_t *blk;
9415 rsmresource_t *p;
9416 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9417
9418 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9419 "rsm_dr_process_local_segments enter\n"));
9420
9421 /* iterate through the resource structure */
9422
9423 rw_enter(&rsm_resource.rsmrc_lock, RW_READER);
9424
9425 for (i = 0; i < rsm_resource.rsmrc_len; i++) {
9426 blk = rsm_resource.rsmrc_root[i];
9427 if (blk != NULL) {
9428 for (j = 0; j < RSMRC_BLKSZ; j++) {
9429 p = blk->rsmrcblk_blks[j];
9430 if ((p != NULL) && (p != RSMRC_RESERVED)) {
9431 /* valid resource */
9432 if (p->rsmrc_type ==
9433 RSM_RESOURCE_EXPORT_SEGMENT)
9434 rsm_process_exp_seg(p, event);
9435 else if (p->rsmrc_type ==
9436 RSM_RESOURCE_IMPORT_SEGMENT)
9437 rsm_process_imp_seg(p, event);
9438 }
9439 }
9440 }
9441 }
9442
9443 rw_exit(&rsm_resource.rsmrc_lock);
9444
9445 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9446 "rsm_dr_process_local_segments done\n"));
9447 }
9448
9449 /* *************** DR callback functions ************ */
9450 static void
rsm_dr_callback_post_add(void * arg,pgcnt_t delta)9451 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */)
9452 {
9453 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9454 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9455 "rsm_dr_callback_post_add is a no-op\n"));
9456 /* Noop */
9457 }
9458
9459 static int
rsm_dr_callback_pre_del(void * arg,pgcnt_t delta)9460 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */)
9461 {
9462 int recheck_state = 0;
9463 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9464
9465 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9466 "rsm_dr_callback_pre_del enter\n"));
9467
9468 mutex_enter(&rsm_drv_data.drv_lock);
9469
9470 do {
9471 recheck_state = 0;
9472 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9473 "rsm_dr_callback_pre_del:state=%d\n",
9474 rsm_drv_data.drv_state));
9475
9476 switch (rsm_drv_data.drv_state) {
9477 case RSM_DRV_NEW:
9478 /*
9479 * The state should usually never be RSM_DRV_NEW
9480 * since in this state the callbacks have not yet
9481 * been registered. So, ASSERT.
9482 */
9483 ASSERT(0);
9484 return (0);
9485 case RSM_DRV_REG_PROCESSING:
9486 /*
9487 * The driver is in the process of registering
9488 * with the DR framework. So, wait till the
9489 * registration process is complete.
9490 */
9491 recheck_state = 1;
9492 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9493 break;
9494 case RSM_DRV_UNREG_PROCESSING:
9495 /*
9496 * If the state is RSM_DRV_UNREG_PROCESSING, the
9497 * module is in the process of detaching and
9498 * unregistering the callbacks from the DR
9499 * framework. So, simply return.
9500 */
9501 mutex_exit(&rsm_drv_data.drv_lock);
9502 DBG_PRINTF((category, RSM_DEBUG,
9503 "rsm_dr_callback_pre_del:"
9504 "pre-del on NEW/UNREG\n"));
9505 return (0);
9506 case RSM_DRV_OK:
9507 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED;
9508 break;
9509 case RSM_DRV_PREDEL_STARTED:
9510 /* FALLTHRU */
9511 case RSM_DRV_PREDEL_COMPLETED:
9512 /* FALLTHRU */
9513 case RSM_DRV_POSTDEL_IN_PROGRESS:
9514 recheck_state = 1;
9515 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9516 break;
9517 case RSM_DRV_DR_IN_PROGRESS:
9518 rsm_drv_data.drv_memdel_cnt++;
9519 mutex_exit(&rsm_drv_data.drv_lock);
9520 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9521 "rsm_dr_callback_pre_del done\n"));
9522 return (0);
9523 /* break; */
9524 default:
9525 ASSERT(0);
9526 break;
9527 }
9528
9529 } while (recheck_state);
9530
9531 rsm_drv_data.drv_memdel_cnt++;
9532
9533 mutex_exit(&rsm_drv_data.drv_lock);
9534
9535 /* Do all the quiescing stuff here */
9536 DBG_PRINTF((category, RSM_DEBUG,
9537 "rsm_dr_callback_pre_del: quiesce things now\n"));
9538
9539 rsm_dr_process_local_segments(RSM_DR_QUIESCE);
9540
9541 /*
9542 * now that all local segments have been quiesced lets inform
9543 * the importers
9544 */
9545 rsm_send_suspend();
9546
9547 /*
9548 * In response to the suspend message the remote node(s) will process
9549 * the segments and send a suspend_complete message. Till all
9550 * the nodes send the suspend_complete message we wait in the
9551 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce
9552 * function we transition to the RSM_DRV_PREDEL_COMPLETED state.
9553 */
9554 mutex_enter(&rsm_drv_data.drv_lock);
9555
9556 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) {
9557 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9558 }
9559
9560 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED);
9561
9562 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS;
9563 cv_broadcast(&rsm_drv_data.drv_cv);
9564
9565 mutex_exit(&rsm_drv_data.drv_lock);
9566
9567 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9568 "rsm_dr_callback_pre_del done\n"));
9569
9570 return (0);
9571 }
9572
9573 static void
rsm_dr_callback_post_del(void * arg,pgcnt_t delta,int cancelled)9574 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */)
9575 {
9576 int recheck_state = 0;
9577 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL);
9578
9579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9580 "rsm_dr_callback_post_del enter\n"));
9581
9582 mutex_enter(&rsm_drv_data.drv_lock);
9583
9584 do {
9585 recheck_state = 0;
9586 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9587 "rsm_dr_callback_post_del:state=%d\n",
9588 rsm_drv_data.drv_state));
9589
9590 switch (rsm_drv_data.drv_state) {
9591 case RSM_DRV_NEW:
9592 /*
9593 * The driver state cannot not be RSM_DRV_NEW
9594 * since in this state the callbacks have not
9595 * yet been registered.
9596 */
9597 ASSERT(0);
9598 return;
9599 case RSM_DRV_REG_PROCESSING:
9600 /*
9601 * The driver is in the process of registering with
9602 * the DR framework. Wait till the registration is
9603 * complete.
9604 */
9605 recheck_state = 1;
9606 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9607 break;
9608 case RSM_DRV_UNREG_PROCESSING:
9609 /*
9610 * RSM_DRV_UNREG_PROCESSING state means the module
9611 * is detaching and unregistering the callbacks
9612 * from the DR framework. So simply return.
9613 */
9614 /* FALLTHRU */
9615 case RSM_DRV_OK:
9616 /*
9617 * RSM_DRV_OK means we missed the pre-del
9618 * corresponding to this post-del coz we had not
9619 * registered yet, so simply return.
9620 */
9621 mutex_exit(&rsm_drv_data.drv_lock);
9622 DBG_PRINTF((category, RSM_DEBUG,
9623 "rsm_dr_callback_post_del:"
9624 "post-del on OK/UNREG\n"));
9625 return;
9626 /* break; */
9627 case RSM_DRV_PREDEL_STARTED:
9628 /* FALLTHRU */
9629 case RSM_DRV_PREDEL_COMPLETED:
9630 /* FALLTHRU */
9631 case RSM_DRV_POSTDEL_IN_PROGRESS:
9632 recheck_state = 1;
9633 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock);
9634 break;
9635 case RSM_DRV_DR_IN_PROGRESS:
9636 rsm_drv_data.drv_memdel_cnt--;
9637 if (rsm_drv_data.drv_memdel_cnt > 0) {
9638 mutex_exit(&rsm_drv_data.drv_lock);
9639 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9640 "rsm_dr_callback_post_del done:\n"));
9641 return;
9642 }
9643 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS;
9644 break;
9645 default:
9646 ASSERT(0);
9647 return;
9648 /* break; */
9649 }
9650 } while (recheck_state);
9651
9652 mutex_exit(&rsm_drv_data.drv_lock);
9653
9654 /* Do all the unquiescing stuff here */
9655 DBG_PRINTF((category, RSM_DEBUG,
9656 "rsm_dr_callback_post_del: unquiesce things now\n"));
9657
9658 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE);
9659
9660 /*
9661 * now that all local segments have been unquiesced lets inform
9662 * the importers
9663 */
9664 rsm_send_resume();
9665
9666 mutex_enter(&rsm_drv_data.drv_lock);
9667
9668 rsm_drv_data.drv_state = RSM_DRV_OK;
9669
9670 cv_broadcast(&rsm_drv_data.drv_cv);
9671
9672 mutex_exit(&rsm_drv_data.drv_lock);
9673
9674 DBG_PRINTF((category, RSM_DEBUG_VERBOSE,
9675 "rsm_dr_callback_post_del done\n"));
9676
9677 return;
9678
9679 }
9680