xref: /dflybsd-src/sys/dev/disk/xdisk/xdisk.c (revision fda7d3889b1114d34ad3a52a7257a2b80fe24e4c)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * This module allows disk devices to be created and associated with a
36  * communications pipe or socket.  You open the device and issue an
37  * ioctl() to install a new disk along with its communications descriptor.
38  *
39  * All further communication occurs via the descriptor using the DMSG
40  * LNK_CONN, LNK_SPAN, and BLOCK protocols.  The descriptor can be a
41  * direct connection to a remote machine's disk (in-kernenl), to a remote
42  * cluster controller, to the local cluster controller, etc.
43  *
44  * /dev/xdisk is the control device, issue ioctl()s to create the /dev/xa%d
45  * devices.  These devices look like raw disks to the system.
46  */
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/device.h>
52 #include <sys/devicestat.h>
53 #include <sys/disk.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/sysctl.h>
57 #include <sys/proc.h>
58 #include <sys/queue.h>
59 #include <sys/udev.h>
60 #include <sys/uuid.h>
61 #include <sys/kern_syscall.h>
62 
63 #include <sys/dmsg.h>
64 #include <sys/xdiskioctl.h>
65 
66 #include <sys/buf2.h>
67 #include <sys/thread2.h>
68 
69 static int xdisk_attach(struct xdisk_attach_ioctl *xaioc);
70 static void xa_exit(kdmsg_iocom_t *iocom);
71 static int xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
72 static int xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
73 static int xa_lnk_rcvmsg(kdmsg_msg_t *msg);
74 static int xa_lnk_dbgmsg(kdmsg_msg_t *msg);
75 static int xa_adhoc_input(kdmsg_msg_t *msg);
76 
77 MALLOC_DEFINE(M_XDISK, "Networked disk client", "Network Disks");
78 
79 /*
80  * Control device, issue ioctls to create xa devices.
81  */
82 static d_open_t xdisk_open;
83 static d_close_t xdisk_close;
84 static d_ioctl_t xdisk_ioctl;
85 
86 static struct dev_ops xdisk_ops = {
87 	{ "xdisk", 0, D_MPSAFE },
88         .d_open =	xdisk_open,
89         .d_close =	xdisk_close,
90         .d_ioctl =	xdisk_ioctl
91 };
92 
93 /*
94  * XA disk devices
95  */
96 static d_open_t xa_open;
97 static d_close_t xa_close;
98 static d_ioctl_t xa_ioctl;
99 static d_strategy_t xa_strategy;
100 static d_psize_t xa_size;
101 
102 static struct dev_ops xa_ops = {
103 	{ "xa", 0, D_DISK | D_CANFREE | D_MPSAFE },
104         .d_open =	xa_open,
105         .d_close =	xa_close,
106         .d_ioctl =	xa_ioctl,
107         .d_read =	physread,
108         .d_write =	physwrite,
109         .d_strategy =	xa_strategy,
110 	.d_psize =	xa_size
111 };
112 
113 struct xa_softc {
114 	TAILQ_ENTRY(xa_softc) entry;
115 	cdev_t		dev;
116 	kdmsg_iocom_t	iocom;
117 	struct xdisk_attach_ioctl xaioc;
118 	struct disk_info info;
119 	struct disk	disk;
120 	uuid_t		pfs_fsid;
121 	int		unit;
122 	int		inprog;
123 	int		connected;
124 };
125 
126 static struct lwkt_token xdisk_token = LWKT_TOKEN_INITIALIZER(xdisk_token);
127 static int xdisk_opencount;
128 static cdev_t xdisk_dev;
129 static TAILQ_HEAD(, xa_softc) xa_queue;
130 
131 /*
132  * Module initialization
133  */
134 static int
135 xdisk_modevent(module_t mod, int type, void *data)
136 {
137 	switch (type) {
138 	case MOD_LOAD:
139 		TAILQ_INIT(&xa_queue);
140 		xdisk_dev = make_dev(&xdisk_ops, 0,
141 				     UID_ROOT, GID_WHEEL, 0600, "xdisk");
142 		break;
143 	case MOD_UNLOAD:
144 	case MOD_SHUTDOWN:
145 		if (xdisk_opencount || TAILQ_FIRST(&xa_queue))
146 			return (EBUSY);
147 		if (xdisk_dev) {
148 			destroy_dev(xdisk_dev);
149 			xdisk_dev = NULL;
150 		}
151 		dev_ops_remove_all(&xdisk_ops);
152 		dev_ops_remove_all(&xa_ops);
153 		break;
154 	default:
155 		break;
156 	}
157 	return 0;
158 }
159 
160 DEV_MODULE(xdisk, xdisk_modevent, 0);
161 
162 /*
163  * Control device
164  */
165 static int
166 xdisk_open(struct dev_open_args *ap)
167 {
168 	lwkt_gettoken(&xdisk_token);
169 	++xdisk_opencount;
170 	lwkt_reltoken(&xdisk_token);
171 	return(0);
172 }
173 
174 static int
175 xdisk_close(struct dev_close_args *ap)
176 {
177 	lwkt_gettoken(&xdisk_token);
178 	--xdisk_opencount;
179 	lwkt_reltoken(&xdisk_token);
180 	return(0);
181 }
182 
183 static int
184 xdisk_ioctl(struct dev_ioctl_args *ap)
185 {
186 	int error;
187 
188 	switch(ap->a_cmd) {
189 	case XDISKIOCATTACH:
190 		error = xdisk_attach((void *)ap->a_data);
191 		break;
192 	default:
193 		error = ENOTTY;
194 		break;
195 	}
196 	return error;
197 }
198 
199 /************************************************************************
200  *				DMSG INTERFACE				*
201  ************************************************************************/
202 
203 static int
204 xdisk_attach(struct xdisk_attach_ioctl *xaioc)
205 {
206 	struct xa_softc *scan;
207 	struct xa_softc *xa;
208 	struct file *fp;
209 	kdmsg_msg_t *msg;
210 	int unit;
211 	char devname[64];
212 	cdev_t dev;
213 
214 	fp = holdfp(curproc->p_fd, xaioc->fd, -1);
215 	if (fp == NULL)
216 		return EINVAL;
217 
218 	xa = kmalloc(sizeof(*xa), M_XDISK, M_WAITOK|M_ZERO);
219 
220 	/*
221 	 * Find unit
222 	 */
223 	lwkt_gettoken(&xdisk_token);
224 	unit = 0;
225 	do {
226 		TAILQ_FOREACH(scan, &xa_queue, entry) {
227 			if (scan->unit == unit)
228 				break;
229 		}
230 	} while (scan != NULL);
231 	xa->unit = unit;
232 	xa->xaioc = *xaioc;
233 	TAILQ_INSERT_TAIL(&xa_queue, xa, entry);
234 	lwkt_reltoken(&xdisk_token);
235 
236 	/*
237 	 * Create device
238 	 */
239 	dev = disk_create(unit, &xa->disk, &xa_ops);
240 	dev->si_drv1 = xa;
241 	xa->dev = dev;
242 
243 	xa->info.d_media_blksize = 512;
244 	xa->info.d_media_blocks = xaioc->size / 512;
245 	xa->info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE;
246 	xa->info.d_secpertrack = 32;
247 	xa->info.d_nheads = 64;
248 	xa->info.d_secpercyl = xa->info.d_secpertrack * xa->info.d_nheads;
249 	xa->info.d_ncylinders = 0;
250 	disk_setdiskinfo_sync(&xa->disk, &xa->info);
251 
252 	/*
253 	 * Set up messaging connection
254 	 */
255 	ksnprintf(devname, sizeof(devname), "xa%d", unit);
256 	kdmsg_iocom_init(&xa->iocom, xa, M_XDISK,
257 			 xa_lnk_rcvmsg,
258 			 xa_lnk_dbgmsg,
259 			 xa_adhoc_input);
260 	xa->iocom.exit_func = xa_exit;
261 	xa->inprog = 1;
262 	kern_uuidgen(&xa->pfs_fsid, 1);
263 	kdmsg_iocom_reconnect(&xa->iocom, fp, devname);
264 
265 	/*
266 	 * Issue DMSG_LNK_CONN for device.  This sets up filters so hopefully
267 	 * the only SPANs we receive are from servers providing the label
268 	 * being configured.  Hopefully that's just a single server(!)(!).
269 	 * (HAMMER peers might have multiple servers but block device peers
270 	 * currently only allow one).  There could still be multiple spans
271 	 * due to there being multiple paths available, however.
272 	 */
273 
274 	msg = kdmsg_msg_alloc(&xa->iocom.router, DMSG_LNK_CONN | DMSGF_CREATE,
275 			      xa_msg_conn_reply, xa);
276 	msg->any.lnk_conn.pfs_type = 0;
277 	msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
278 	msg->any.lnk_conn.peer_type = DMSG_PEER_BLOCK;
279 	msg->any.lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
280 	ksnprintf(msg->any.lnk_conn.cl_label,
281 		  sizeof(msg->any.lnk_conn.cl_label),
282 		  "%s", xaioc->cl_label);
283 	msg->any.lnk_conn.pfs_fsid = xa->pfs_fsid;
284 	xa->iocom.conn_state = msg->state;
285 	kdmsg_msg_write(msg);
286 
287 	xa->inprog = 0;		/* unstall msg thread exit (if racing) */
288 
289 	return(0);
290 }
291 
292 /*
293  * Handle reply to our LNK_CONN transaction (transaction remains open)
294  */
295 static
296 int
297 xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
298 {
299 	struct xa_softc *xa = state->any.any;
300         kdmsg_msg_t *rmsg;
301 
302 	if (msg->any.head.cmd & DMSGF_CREATE) {
303 		kprintf("XA LNK_CONN received reply\n");
304 		rmsg = kdmsg_msg_alloc(&xa->iocom.router,
305 				       DMSG_LNK_SPAN | DMSGF_CREATE,
306 				       xa_msg_span_reply, xa);
307 		rmsg->any.lnk_span.pfs_type = 0;
308 		rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1;
309 		rmsg->any.lnk_span.peer_type = DMSG_PEER_BLOCK;
310 
311 		ksnprintf(rmsg->any.lnk_span.cl_label,
312 			  sizeof(rmsg->any.lnk_span.cl_label),
313 			  "%s", xa->xaioc.cl_label);
314 		kdmsg_msg_write(rmsg);
315 	}
316 	if ((state->txcmd & DMSGF_DELETE) == 0 &&
317 	    (msg->any.head.cmd & DMSGF_DELETE)) {
318 		kprintf("DISK LNK_CONN terminated by remote\n");
319 		xa->iocom.conn_state = NULL;
320 		kdmsg_msg_reply(msg, 0);
321 	}
322 	return(0);
323 }
324 
325 static int
326 xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
327 {
328 	if ((state->txcmd & DMSGF_DELETE) == 0 &&
329 	    (msg->any.head.cmd & DMSGF_DELETE)) {
330 		kprintf("SPAN REPLY - Our sent span was terminated by the "
331 			"remote %08x state %p\n", msg->any.head.cmd, state);
332 		kdmsg_msg_reply(msg, 0);
333 	}
334 	return (0);
335 }
336 
337 /*
338  * Called from iocom core transmit thread upon disconnect.
339  */
340 static
341 void
342 xa_exit(kdmsg_iocom_t *iocom)
343 {
344 	struct xa_softc *xa = iocom->handle;
345 
346 	kprintf("XA_EXIT UNIT %d\n", xa->unit);
347 
348 	kdmsg_iocom_uninit(iocom);
349 
350 	while (xa->inprog) {
351 		tsleep(xa, 0, "xarace", hz);
352 	}
353 
354 	/*
355 	 * XXX allow reconnection, wait for users to terminate?
356 	 */
357 
358 	disk_destroy(&xa->disk);
359 
360 	lwkt_gettoken(&xdisk_token);
361 	TAILQ_REMOVE(&xa_queue, xa, entry);
362 	lwkt_reltoken(&xdisk_token);
363 
364 	kfree(xa, M_XDISK);
365 }
366 
367 static int
368 xa_lnk_rcvmsg(kdmsg_msg_t *msg)
369 {
370 	switch(msg->any.head.cmd & DMSGF_TRANSMASK) {
371 	case DMSG_LNK_CONN | DMSGF_CREATE:
372 		/*
373 		 * connection request from peer, send a streaming
374 		 * result of 0 (leave the transaction open).  Transaction
375 		 * is left open for the duration of the connection, we
376 		 * let the kern_dmsg module clean it up on disconnect.
377 		 */
378 		kdmsg_msg_result(msg, 0);
379 		break;
380 	case DMSG_LNK_SPAN | DMSGF_CREATE:
381 		/*
382 		 * Incoming SPAN - transaction create
383 		 *
384 		 * We do not have to respond right now.  Instead we will
385 		 * respond later on when the peer deletes their side.
386 		 */
387 		break;
388 	case DMSG_LNK_SPAN | DMSGF_DELETE:
389 		/*
390 		 * Incoming SPAN - transaction delete.
391 		 *
392 		 * We must terminate our side so both ends can free up
393 		 * their recorded state.
394 		 */
395 		/* fall through */
396 	case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
397 		/*
398 		 * Incoming SPAN - transaction delete (degenerate span).
399 		 *
400 		 * We must terminate our side so both ends can free up
401 		 * their recorded state.
402 		 */
403 		kdmsg_msg_reply(msg, 0);
404 		break;
405 	default:
406 		/*
407 		 * Unsupported LNK message received.  We only need to
408 		 * reply if it's a transaction in order to close our end.
409 		 * Ignore any one-way messages are any further messages
410 		 * associated with the transaction.
411 		 *
412 		 * NOTE: This case also includes DMSG_LNK_ERROR messages
413 		 *	 which might be one-way, replying to those would
414 		 *	 cause an infinite ping-pong.
415 		 */
416 		if (msg->any.head.cmd & DMSGF_CREATE)
417 			kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
418 		break;
419 	}
420 	return(0);
421 }
422 
423 static int
424 xa_lnk_dbgmsg(kdmsg_msg_t *msg)
425 {
426 	switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
427 	case DMSG_DBG_SHELL:
428 		/*
429 		 * Execute shell command (not supported atm).
430 		 *
431 		 * This is a one-way packet but if not (e.g. if part of
432 		 * a streaming transaction), we will have already closed
433 		 * our end.
434 		 */
435 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
436 		break;
437 	case DMSG_DBG_SHELL | DMSGF_REPLY:
438 		/*
439 		 * Receive one or more replies to a shell command that we
440 		 * sent.
441 		 *
442 		 * This is a one-way packet but if not (e.g. if part of
443 		 * a streaming transaction), we will have already closed
444 		 * our end.
445 		 */
446 		if (msg->aux_data) {
447 			msg->aux_data[msg->aux_size - 1] = 0;
448 			kprintf("DEBUGMSG: %s\n", msg->aux_data);
449 		}
450 		break;
451 	default:
452 		/*
453 		 * We don't understand what is going on, issue a reply.
454 		 * This will take care of all left-over cases whether it
455 		 * is a transaction or one-way.
456 		 */
457 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
458 		break;
459 	}
460 	return(0);
461 }
462 
463 static int
464 xa_adhoc_input(kdmsg_msg_t *msg)
465 {
466         kprintf("XA ADHOC INPUT MSG %08x\n", msg->any.head.cmd);
467         return(0);
468 }
469 
470 /************************************************************************
471  *			   XA DEVICE INTERFACE				*
472  ************************************************************************/
473 
474 static int
475 xa_open(struct dev_open_args *ap)
476 {
477 	cdev_t dev = ap->a_head.a_dev;
478 	struct xa_softc *xa;
479 
480 	xa = dev->si_drv1;
481 
482 	dev->si_bsize_phys = 512;
483 	dev->si_bsize_best = 32768;
484 
485 	/*
486 	 * Issue streaming open and wait for reply.
487 	 */
488 
489 	/* XXX check ap->a_oflags & FWRITE, EACCES if read-only */
490 
491 	return(0);
492 }
493 
494 static int
495 xa_close(struct dev_close_args *ap)
496 {
497 	cdev_t dev = ap->a_head.a_dev;
498 }
499 
500 static int
501 xa_strategy(struct dev_strategy_args *ap)
502 {
503 }
504 
505 static int
506 xa_ioctl(struct dev_ioctl_args *ap)
507 {
508 	return (ENOTTY);
509 }
510 
511 static int
512 xa_size(struct dev_psize_args *ap)
513 {
514 	struct xa_softc *xa;
515 
516 	if ((xa = ap->a_head.a_dev->si_drv1) == NULL)
517 		return (ENXIO);
518 	if (xa->inprog)
519 		return (ENXIO);
520 	ap->a_result = xa->info.d_media_blocks;
521 	return (0);
522 }
523