xref: /spdk/test/external_code/passthru/vbdev_passthru.c (revision 1fa071d332db21bf893d581a8e93b425ba788a24)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * This is a simple example of a virtual block device module that passes IO
36  * down to a bdev (or bdevs) that its configured to attach to.
37  */
38 
39 #include "vbdev_passthru.h"
40 #include "spdk/env.h"
41 #include "spdk/conf.h"
42 #include "spdk/endian.h"
43 #include "spdk/thread.h"
44 
45 
46 static int vbdev_ext_passthru_init(void);
47 static void vbdev_ext_passthru_get_spdk_running_config(FILE *fp);
48 static int vbdev_ext_passthru_get_ctx_size(void);
49 static void vbdev_ext_passthru_examine(struct spdk_bdev *bdev);
50 static void vbdev_ext_passthru_finish(void);
51 static int vbdev_ext_passthru_config_json(struct spdk_json_write_ctx *w);
52 
53 static struct spdk_bdev_module passthru_if_external = {
54 	.name = "passthru_external",
55 	.module_init = vbdev_ext_passthru_init,
56 	.config_text = vbdev_ext_passthru_get_spdk_running_config,
57 	.get_ctx_size = vbdev_ext_passthru_get_ctx_size,
58 	.examine_config = vbdev_ext_passthru_examine,
59 	.module_fini = vbdev_ext_passthru_finish,
60 	.config_json = vbdev_ext_passthru_config_json
61 };
62 
63 SPDK_BDEV_MODULE_REGISTER(passthru, &passthru_if_external)
64 
65 /* List of pt_bdev names and their base bdevs via configuration file.
66  * Used so we can parse the conf once at init and use this list in examine().
67  */
68 struct bdev_names {
69 	char			*vbdev_name;
70 	char			*bdev_name;
71 	TAILQ_ENTRY(bdev_names)	link;
72 };
73 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
74 
75 /* List of virtual bdevs and associated info for each. */
76 struct vbdev_passthru {
77 	struct spdk_bdev		*base_bdev; /* the thing we're attaching to */
78 	struct spdk_bdev_desc		*base_desc; /* its descriptor we get from open */
79 	struct spdk_bdev		pt_bdev;    /* the PT virtual bdev */
80 	TAILQ_ENTRY(vbdev_passthru)	link;
81 };
82 static TAILQ_HEAD(, vbdev_passthru) g_pt_nodes = TAILQ_HEAD_INITIALIZER(g_pt_nodes);
83 
84 /* The pt vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
85  * If this vbdev needed to implement a poller or a queue for IO, this is where those things
86  * would be defined. This passthru bdev doesn't actually need to allocate a channel, it could
87  * simply pass back the channel of the bdev underneath it but for example purposes we will
88  * present its own to the upper layers.
89  */
90 struct pt_io_channel {
91 	struct spdk_io_channel	*base_ch; /* IO channel of base device */
92 };
93 
94 /* Just for fun, this pt_bdev module doesn't need it but this is essentially a per IO
95  * context that we get handed by the bdev layer.
96  */
97 struct passthru_bdev_io {
98 	uint8_t test;
99 
100 	/* bdev related */
101 	struct spdk_io_channel *ch;
102 
103 	/* for bdev_io_wait */
104 	struct spdk_bdev_io_wait_entry bdev_io_wait;
105 };
106 
107 static void
108 vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
109 
110 
111 /* Callback for unregistering the IO device. */
112 static void
113 _device_unregister_cb(void *io_device)
114 {
115 	struct vbdev_passthru *pt_node  = io_device;
116 
117 	/* Done with this pt_node. */
118 	free(pt_node->pt_bdev.name);
119 	free(pt_node);
120 }
121 
122 /* Called after we've unregistered following a hot remove callback.
123  * Our finish entry point will be called next.
124  */
125 static int
126 vbdev_passthru_destruct(void *ctx)
127 {
128 	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
129 
130 	/* It is important to follow this exact sequence of steps for destroying
131 	 * a vbdev...
132 	 */
133 
134 	TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
135 
136 	/* Unclaim the underlying bdev. */
137 	spdk_bdev_module_release_bdev(pt_node->base_bdev);
138 
139 	/* Close the underlying bdev. */
140 	spdk_bdev_close(pt_node->base_desc);
141 
142 	/* Unregister the io_device. */
143 	spdk_io_device_unregister(pt_node, _device_unregister_cb);
144 
145 	return 0;
146 }
147 
148 /* Completion callback for IO that were issued from this bdev. The original bdev_io
149  * is passed in as an arg so we'll complete that one with the appropriate status
150  * and then free the one that this module issued.
151  */
152 static void
153 _pt_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
154 {
155 	struct spdk_bdev_io *orig_io = cb_arg;
156 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
157 	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)orig_io->driver_ctx;
158 
159 	/* We setup this value in the submission routine, just showing here that it is
160 	 * passed back to us.
161 	 */
162 	if (io_ctx->test != 0x5a) {
163 		SPDK_ERRLOG("Error, original IO device_ctx is wrong! 0x%x\n",
164 			    io_ctx->test);
165 	}
166 
167 	/* Complete the original IO and then free the one that we created here
168 	 * as a result of issuing an IO via submit_reqeust.
169 	 */
170 	spdk_bdev_io_complete(orig_io, status);
171 	spdk_bdev_free_io(bdev_io);
172 }
173 
174 static void
175 vbdev_passthru_resubmit_io(void *arg)
176 {
177 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
178 	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
179 
180 	vbdev_passthru_submit_request(io_ctx->ch, bdev_io);
181 }
182 
183 static void
184 vbdev_passthru_queue_io(struct spdk_bdev_io *bdev_io)
185 {
186 	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
187 	int rc;
188 
189 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
190 	io_ctx->bdev_io_wait.cb_fn = vbdev_passthru_resubmit_io;
191 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
192 
193 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->ch, &io_ctx->bdev_io_wait);
194 	if (rc != 0) {
195 		SPDK_ERRLOG("Queue io failed in vbdev_passthru_queue_io, rc=%d.\n", rc);
196 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
197 	}
198 }
199 
200 /* Callback for getting a buf from the bdev pool in the event that the caller passed
201  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
202  * beneath us before we're done with it. That won't happen in this example but it could
203  * if this example were used as a template for something more complex.
204  */
205 static void
206 pt_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
207 {
208 	struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru,
209 					 pt_bdev);
210 	struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
211 	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
212 	int rc;
213 
214 	if (!success) {
215 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
216 		return;
217 	}
218 
219 	if (bdev_io->u.bdev.md_buf == NULL) {
220 		rc = spdk_bdev_readv_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
221 					    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
222 					    bdev_io->u.bdev.num_blocks, _pt_complete_io,
223 					    bdev_io);
224 	} else {
225 		rc = spdk_bdev_readv_blocks_with_md(pt_node->base_desc, pt_ch->base_ch,
226 						    bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
227 						    bdev_io->u.bdev.md_buf,
228 						    bdev_io->u.bdev.offset_blocks,
229 						    bdev_io->u.bdev.num_blocks,
230 						    _pt_complete_io, bdev_io);
231 	}
232 
233 	if (rc != 0) {
234 		if (rc == -ENOMEM) {
235 			SPDK_ERRLOG("No memory, start to queue io for passthru.\n");
236 			io_ctx->ch = ch;
237 			vbdev_passthru_queue_io(bdev_io);
238 		} else {
239 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
240 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
241 		}
242 	}
243 }
244 
245 /* Called when someone above submits IO to this pt vbdev. We're simply passing it on here
246  * via SPDK IO calls which in turn allocate another bdev IO and call our cpl callback provided
247  * below along with the original bdiv_io so that we can complete it once this IO completes.
248  */
249 static void
250 vbdev_passthru_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
251 {
252 	struct vbdev_passthru *pt_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_passthru, pt_bdev);
253 	struct pt_io_channel *pt_ch = spdk_io_channel_get_ctx(ch);
254 	struct passthru_bdev_io *io_ctx = (struct passthru_bdev_io *)bdev_io->driver_ctx;
255 	int rc = 0;
256 
257 	/* Setup a per IO context value; we don't do anything with it in the vbdev other
258 	 * than confirm we get the same thing back in the completion callback just to
259 	 * demonstrate.
260 	 */
261 	io_ctx->test = 0x5a;
262 
263 	switch (bdev_io->type) {
264 	case SPDK_BDEV_IO_TYPE_READ:
265 		spdk_bdev_io_get_buf(bdev_io, pt_read_get_buf_cb,
266 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
267 		break;
268 	case SPDK_BDEV_IO_TYPE_WRITE:
269 		if (bdev_io->u.bdev.md_buf == NULL) {
270 			rc = spdk_bdev_writev_blocks(pt_node->base_desc, pt_ch->base_ch, bdev_io->u.bdev.iovs,
271 						     bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
272 						     bdev_io->u.bdev.num_blocks, _pt_complete_io,
273 						     bdev_io);
274 		} else {
275 			rc = spdk_bdev_writev_blocks_with_md(pt_node->base_desc, pt_ch->base_ch,
276 							     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
277 							     bdev_io->u.bdev.md_buf,
278 							     bdev_io->u.bdev.offset_blocks,
279 							     bdev_io->u.bdev.num_blocks,
280 							     _pt_complete_io, bdev_io);
281 		}
282 		break;
283 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
284 		rc = spdk_bdev_write_zeroes_blocks(pt_node->base_desc, pt_ch->base_ch,
285 						   bdev_io->u.bdev.offset_blocks,
286 						   bdev_io->u.bdev.num_blocks,
287 						   _pt_complete_io, bdev_io);
288 		break;
289 	case SPDK_BDEV_IO_TYPE_UNMAP:
290 		rc = spdk_bdev_unmap_blocks(pt_node->base_desc, pt_ch->base_ch,
291 					    bdev_io->u.bdev.offset_blocks,
292 					    bdev_io->u.bdev.num_blocks,
293 					    _pt_complete_io, bdev_io);
294 		break;
295 	case SPDK_BDEV_IO_TYPE_FLUSH:
296 		rc = spdk_bdev_flush_blocks(pt_node->base_desc, pt_ch->base_ch,
297 					    bdev_io->u.bdev.offset_blocks,
298 					    bdev_io->u.bdev.num_blocks,
299 					    _pt_complete_io, bdev_io);
300 		break;
301 	case SPDK_BDEV_IO_TYPE_RESET:
302 		rc = spdk_bdev_reset(pt_node->base_desc, pt_ch->base_ch,
303 				     _pt_complete_io, bdev_io);
304 		break;
305 	default:
306 		SPDK_ERRLOG("passthru: unknown I/O type %d\n", bdev_io->type);
307 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
308 		return;
309 	}
310 	if (rc != 0) {
311 		if (rc == -ENOMEM) {
312 			SPDK_ERRLOG("No memory, start to queue io for passthru.\n");
313 			io_ctx->ch = ch;
314 			vbdev_passthru_queue_io(bdev_io);
315 		} else {
316 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
317 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
318 		}
319 	}
320 }
321 
322 /* We'll just call the base bdev and let it answer however if we were more
323  * restrictive for some reason (or less) we could get the response back
324  * and modify according to our purposes.
325  */
326 static bool
327 vbdev_passthru_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
328 {
329 	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
330 
331 	return spdk_bdev_io_type_supported(pt_node->base_bdev, io_type);
332 }
333 
334 /* We supplied this as an entry point for upper layers who want to communicate to this
335  * bdev.  This is how they get a channel. We are passed the same context we provided when
336  * we created our PT vbdev in examine() which, for this bdev, is the address of one of
337  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
338  * struct and we'll keep it in our PT node.
339  */
340 static struct spdk_io_channel *
341 vbdev_passthru_get_io_channel(void *ctx)
342 {
343 	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
344 	struct spdk_io_channel *pt_ch = NULL;
345 
346 	/* The IO channel code will allocate a channel for us which consists of
347 	 * the SPDK channel structure plus the size of our pt_io_channel struct
348 	 * that we passed in when we registered our IO device. It will then call
349 	 * our channel create callback to populate any elements that we need to
350 	 * update.
351 	 */
352 	pt_ch = spdk_get_io_channel(pt_node);
353 
354 	return pt_ch;
355 }
356 
357 /* This is the output for get_bdevs() for this vbdev */
358 static int
359 vbdev_passthru_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
360 {
361 	struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx;
362 
363 	spdk_json_write_name(w, "passthru");
364 	spdk_json_write_object_begin(w);
365 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&pt_node->pt_bdev));
366 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(pt_node->base_bdev));
367 	spdk_json_write_object_end(w);
368 
369 	return 0;
370 }
371 
372 /* This is used to generate JSON that can configure this module to its current state. */
373 static int
374 vbdev_ext_passthru_config_json(struct spdk_json_write_ctx *w)
375 {
376 	struct vbdev_passthru *pt_node;
377 
378 	TAILQ_FOREACH(pt_node, &g_pt_nodes, link) {
379 		spdk_json_write_object_begin(w);
380 		spdk_json_write_named_string(w, "method", "construct_passthru_bdev");
381 		spdk_json_write_named_object_begin(w, "params");
382 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(pt_node->base_bdev));
383 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&pt_node->pt_bdev));
384 		spdk_json_write_object_end(w);
385 		spdk_json_write_object_end(w);
386 	}
387 	return 0;
388 }
389 
390 /* We provide this callback for the SPDK channel code to create a channel using
391  * the channel struct we provided in our module get_io_channel() entry point. Here
392  * we get and save off an underlying base channel of the device below us so that
393  * we can communicate with the base bdev on a per channel basis.  If we needed
394  * our own poller for this vbdev, we'd register it here.
395  */
396 static int
397 pt_bdev_ch_create_cb(void *io_device, void *ctx_buf)
398 {
399 	struct pt_io_channel *pt_ch = ctx_buf;
400 	struct vbdev_passthru *pt_node = io_device;
401 
402 	pt_ch->base_ch = spdk_bdev_get_io_channel(pt_node->base_desc);
403 
404 	return 0;
405 }
406 
407 /* We provide this callback for the SPDK channel code to destroy a channel
408  * created with our create callback. We just need to undo anything we did
409  * when we created. If this bdev used its own poller, we'd unregsiter it here.
410  */
411 static void
412 pt_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
413 {
414 	struct pt_io_channel *pt_ch = ctx_buf;
415 
416 	spdk_put_io_channel(pt_ch->base_ch);
417 }
418 
419 /* Create the passthru association from the bdev and vbdev name and insert
420  * on the global list. */
421 static int
422 vbdev_passthru_insert_name(const char *bdev_name, const char *vbdev_name)
423 {
424 	struct bdev_names *name;
425 
426 	TAILQ_FOREACH(name, &g_bdev_names, link) {
427 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
428 			SPDK_ERRLOG("passthru bdev %s already exists\n", vbdev_name);
429 			return -EEXIST;
430 		}
431 	}
432 
433 	name = calloc(1, sizeof(struct bdev_names));
434 	if (!name) {
435 		SPDK_ERRLOG("could not allocate bdev_names\n");
436 		return -ENOMEM;
437 	}
438 
439 	name->bdev_name = strdup(bdev_name);
440 	if (!name->bdev_name) {
441 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
442 		free(name);
443 		return -ENOMEM;
444 	}
445 
446 	name->vbdev_name = strdup(vbdev_name);
447 	if (!name->vbdev_name) {
448 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
449 		free(name->bdev_name);
450 		free(name);
451 		return -ENOMEM;
452 	}
453 
454 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
455 
456 	return 0;
457 }
458 
459 /* On init, just parse config file and build list of pt vbdevs and bdev name pairs. */
460 static int
461 vbdev_ext_passthru_init(void)
462 {
463 	struct spdk_conf_section *sp = NULL;
464 	const char *conf_bdev_name = NULL;
465 	const char *conf_vbdev_name = NULL;
466 	struct bdev_names *name;
467 	int i, rc;
468 
469 	sp = spdk_conf_find_section(NULL, "Ext_Pt");
470 	if (sp == NULL) {
471 		return 0;
472 	}
473 
474 	for (i = 0; ; i++) {
475 		if (!spdk_conf_section_get_nval(sp, "PTE", i)) {
476 			break;
477 		}
478 
479 		conf_bdev_name = spdk_conf_section_get_nmval(sp, "PTE", i, 0);
480 		if (!conf_bdev_name) {
481 			SPDK_ERRLOG("Passthru configuration missing bdev name\n");
482 			break;
483 		}
484 
485 		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "PTE", i, 1);
486 		if (!conf_vbdev_name) {
487 			SPDK_ERRLOG("Passthru configuration missing pt_bdev name\n");
488 			break;
489 		}
490 
491 		rc = vbdev_passthru_insert_name(conf_bdev_name, conf_vbdev_name);
492 		if (rc != 0) {
493 			return rc;
494 		}
495 	}
496 	TAILQ_FOREACH(name, &g_bdev_names, link) {
497 		SPDK_NOTICELOG("conf parse matched: %s\n", name->bdev_name);
498 	}
499 	return 0;
500 }
501 
502 /* Called when the entire module is being torn down. */
503 static void
504 vbdev_ext_passthru_finish(void)
505 {
506 	struct bdev_names *name;
507 
508 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
509 		TAILQ_REMOVE(&g_bdev_names, name, link);
510 		free(name->bdev_name);
511 		free(name->vbdev_name);
512 		free(name);
513 	}
514 }
515 
516 /* During init we'll be asked how much memory we'd like passed to us
517  * in bev_io structures as context. Here's where we specify how
518  * much context we want per IO.
519  */
520 static int
521 vbdev_ext_passthru_get_ctx_size(void)
522 {
523 	return sizeof(struct passthru_bdev_io);
524 }
525 
526 /* Called when SPDK wants to save the current config of this vbdev module to
527  * a file.
528  */
529 static void
530 vbdev_ext_passthru_get_spdk_running_config(FILE *fp)
531 {
532 	struct bdev_names *names = NULL;
533 
534 	fprintf(fp, "\n[Ext_Pt]\n");
535 	TAILQ_FOREACH(names, &g_bdev_names, link) {
536 		fprintf(fp, "  PTE %s %s\n", names->bdev_name, names->vbdev_name);
537 	}
538 	fprintf(fp, "\n");
539 }
540 
541 /* Where vbdev_ext_passthru_config_json() is used to generate per module JSON config data, this
542  * function is called to output any per bdev specific methods. For the PT module, there are
543  * none.
544  */
545 static void
546 vbdev_passthru_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
547 {
548 	/* No config per bdev needed */
549 }
550 
551 /* When we register our bdev this is how we specify our entry points. */
552 static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = {
553 	.destruct		= vbdev_passthru_destruct,
554 	.submit_request		= vbdev_passthru_submit_request,
555 	.io_type_supported	= vbdev_passthru_io_type_supported,
556 	.get_io_channel		= vbdev_passthru_get_io_channel,
557 	.dump_info_json		= vbdev_passthru_dump_info_json,
558 	.write_config_json	= vbdev_passthru_write_config_json,
559 };
560 
561 /* Called when the underlying base bdev goes away. */
562 static void
563 vbdev_passthru_base_bdev_hotremove_cb(void *ctx)
564 {
565 	struct vbdev_passthru *pt_node, *tmp;
566 	struct spdk_bdev *bdev_find = ctx;
567 
568 	TAILQ_FOREACH_SAFE(pt_node, &g_pt_nodes, link, tmp) {
569 		if (bdev_find == pt_node->base_bdev) {
570 			spdk_bdev_unregister(&pt_node->pt_bdev, NULL, NULL);
571 		}
572 	}
573 }
574 
575 /* Create and register the passthru vbdev if we find it in our list of bdev names.
576  * This can be called either by the examine path or RPC method.
577  */
578 static int
579 vbdev_passthru_register(struct spdk_bdev *bdev)
580 {
581 	struct bdev_names *name;
582 	struct vbdev_passthru *pt_node;
583 	int rc = 0;
584 
585 	/* Check our list of names from config versus this bdev and if
586 	 * there's a match, create the pt_node & bdev accordingly.
587 	 */
588 	TAILQ_FOREACH(name, &g_bdev_names, link) {
589 		if (strcmp(name->bdev_name, bdev->name) != 0) {
590 			continue;
591 		}
592 
593 		SPDK_NOTICELOG("Match on %s\n", bdev->name);
594 		pt_node = calloc(1, sizeof(struct vbdev_passthru));
595 		if (!pt_node) {
596 			rc = -ENOMEM;
597 			SPDK_ERRLOG("could not allocate pt_node\n");
598 			break;
599 		}
600 
601 		/* The base bdev that we're attaching to. */
602 		pt_node->base_bdev = bdev;
603 		pt_node->pt_bdev.name = strdup(name->vbdev_name);
604 		if (!pt_node->pt_bdev.name) {
605 			rc = -ENOMEM;
606 			SPDK_ERRLOG("could not allocate pt_bdev name\n");
607 			free(pt_node);
608 			break;
609 		}
610 		pt_node->pt_bdev.product_name = "passthru";
611 
612 		/* Copy some properties from the underlying base bdev. */
613 		pt_node->pt_bdev.write_cache = bdev->write_cache;
614 		pt_node->pt_bdev.required_alignment = bdev->required_alignment;
615 		pt_node->pt_bdev.optimal_io_boundary = bdev->optimal_io_boundary;
616 		pt_node->pt_bdev.blocklen = bdev->blocklen;
617 		pt_node->pt_bdev.blockcnt = bdev->blockcnt;
618 
619 		pt_node->pt_bdev.md_interleave = bdev->md_interleave;
620 		pt_node->pt_bdev.md_len = bdev->md_len;
621 		pt_node->pt_bdev.dif_type = bdev->dif_type;
622 		pt_node->pt_bdev.dif_is_head_of_md = bdev->dif_is_head_of_md;
623 		pt_node->pt_bdev.dif_check_flags = bdev->dif_check_flags;
624 
625 		/* This is the context that is passed to us when the bdev
626 		 * layer calls in so we'll save our pt_bdev node here.
627 		 */
628 		pt_node->pt_bdev.ctxt = pt_node;
629 		pt_node->pt_bdev.fn_table = &vbdev_passthru_fn_table;
630 		pt_node->pt_bdev.module = &passthru_if_external;
631 		TAILQ_INSERT_TAIL(&g_pt_nodes, pt_node, link);
632 
633 		spdk_io_device_register(pt_node, pt_bdev_ch_create_cb, pt_bdev_ch_destroy_cb,
634 					sizeof(struct pt_io_channel),
635 					name->vbdev_name);
636 		SPDK_NOTICELOG("io_device created at: 0x%p\n", pt_node);
637 
638 		rc = spdk_bdev_open(bdev, true, vbdev_passthru_base_bdev_hotremove_cb,
639 				    bdev, &pt_node->base_desc);
640 		if (rc) {
641 			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
642 			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
643 			spdk_io_device_unregister(pt_node, NULL);
644 			free(pt_node->pt_bdev.name);
645 			free(pt_node);
646 			break;
647 		}
648 		SPDK_NOTICELOG("bdev opened\n");
649 
650 		rc = spdk_bdev_module_claim_bdev(bdev, pt_node->base_desc, pt_node->pt_bdev.module);
651 		if (rc) {
652 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
653 			spdk_bdev_close(pt_node->base_desc);
654 			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
655 			spdk_io_device_unregister(pt_node, NULL);
656 			free(pt_node->pt_bdev.name);
657 			free(pt_node);
658 			break;
659 		}
660 		SPDK_NOTICELOG("bdev claimed\n");
661 
662 		rc = spdk_bdev_register(&pt_node->pt_bdev);
663 		if (rc) {
664 			SPDK_ERRLOG("could not register pt_bdev\n");
665 			spdk_bdev_module_release_bdev(&pt_node->pt_bdev);
666 			spdk_bdev_close(pt_node->base_desc);
667 			TAILQ_REMOVE(&g_pt_nodes, pt_node, link);
668 			spdk_io_device_unregister(pt_node, NULL);
669 			free(pt_node->pt_bdev.name);
670 			free(pt_node);
671 			break;
672 		}
673 		SPDK_NOTICELOG("ext_pt_bdev registered\n");
674 		SPDK_NOTICELOG("created ext_pt_bdev for: %s\n", name->vbdev_name);
675 	}
676 
677 	return rc;
678 }
679 
680 /* Create the passthru disk from the given bdev and vbdev name. */
681 int
682 create_passthru_disk(const char *bdev_name, const char *vbdev_name)
683 {
684 	struct spdk_bdev *bdev = NULL;
685 	int rc = 0;
686 
687 	/* Insert the bdev into our global name list even if it doesn't exist yet,
688 	 * it may show up soon...
689 	 */
690 	rc = vbdev_passthru_insert_name(bdev_name, vbdev_name);
691 	if (rc) {
692 		return rc;
693 	}
694 
695 	bdev = spdk_bdev_get_by_name(bdev_name);
696 	if (!bdev) {
697 		/* This is not an error, we tracked the name above and it still
698 		 * may show up later.
699 		 */
700 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
701 		return 0;
702 	}
703 
704 	return vbdev_passthru_register(bdev);
705 }
706 
707 void
708 delete_passthru_disk(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
709 {
710 	struct bdev_names *name;
711 
712 	if (!bdev || bdev->module != &passthru_if_external) {
713 		cb_fn(cb_arg, -ENODEV);
714 		return;
715 	}
716 
717 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
718 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
719 	 * unless the underlying bdev was hot-removed.
720 	 */
721 	TAILQ_FOREACH(name, &g_bdev_names, link) {
722 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
723 			TAILQ_REMOVE(&g_bdev_names, name, link);
724 			free(name->bdev_name);
725 			free(name->vbdev_name);
726 			free(name);
727 			break;
728 		}
729 	}
730 
731 	/* Additional cleanup happens in the destruct callback. */
732 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
733 }
734 
735 /* Because we specified this function in our pt bdev function table when we
736  * registered our pt bdev, we'll get this call anytime a new bdev shows up.
737  * Here we need to decide if we care about it and if so what to do. We
738  * parsed the config file at init so we check the new bdev against the list
739  * we built up at that time and if the user configured us to attach to this
740  * bdev, here's where we do it.
741  */
742 static void
743 vbdev_ext_passthru_examine(struct spdk_bdev *bdev)
744 {
745 	vbdev_passthru_register(bdev);
746 
747 	spdk_bdev_module_examine_done(&passthru_if_external);
748 }
749