xref: /dflybsd-src/sys/kern/subr_diskiocom.c (revision 9e1c08804a46f1c1a9cd11e190ddba7d2bc4abed)
1 /*
2  * Copyright (c) 2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/proc.h>
38 #include <sys/sysctl.h>
39 #include <sys/buf.h>
40 #include <sys/conf.h>
41 #include <sys/disklabel.h>
42 #include <sys/disklabel32.h>
43 #include <sys/disklabel64.h>
44 #include <sys/diskslice.h>
45 #include <sys/diskmbr.h>
46 #include <sys/disk.h>
47 #include <sys/malloc.h>
48 #include <sys/device.h>
49 #include <sys/devfs.h>
50 #include <sys/thread.h>
51 #include <sys/queue.h>
52 #include <sys/lock.h>
53 #include <sys/stat.h>
54 #include <sys/uuid.h>
55 #include <sys/dmsg.h>
56 
57 #include <sys/buf2.h>
58 #include <sys/mplock2.h>
59 #include <sys/msgport2.h>
60 #include <sys/thread2.h>
61 
62 struct dios_open {
63 	int	openrd;
64 	int	openwr;
65 };
66 
67 struct dios_io {
68 	int	count;
69 	int	eof;
70 };
71 
72 static MALLOC_DEFINE(M_DMSG_DISK, "dmsg_disk", "disk dmsg");
73 
74 static int disk_iocom_reconnect(struct disk *dp, struct file *fp);
75 static int disk_rcvdmsg(kdmsg_msg_t *msg);
76 
77 static void disk_blk_open(struct disk *dp, kdmsg_msg_t *msg);
78 static void disk_blk_read(struct disk *dp, kdmsg_msg_t *msg);
79 static void disk_blk_write(struct disk *dp, kdmsg_msg_t *msg);
80 static void disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg);
81 static void disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg);
82 static void diskiodone(struct bio *bio);
83 
84 void
85 disk_iocom_init(struct disk *dp)
86 {
87 	kdmsg_iocom_init(&dp->d_iocom, dp,
88 			 KDMSG_IOCOMF_AUTOCONN |
89 			 KDMSG_IOCOMF_AUTORXSPAN |
90 			 KDMSG_IOCOMF_AUTOTXSPAN,
91 			 M_DMSG_DISK, disk_rcvdmsg);
92 }
93 
94 void
95 disk_iocom_update(struct disk *dp)
96 {
97 }
98 
99 void
100 disk_iocom_uninit(struct disk *dp)
101 {
102 	kdmsg_iocom_uninit(&dp->d_iocom);
103 }
104 
105 int
106 disk_iocom_ioctl(struct disk *dp, int cmd, void *data)
107 {
108 	struct file *fp;
109 	struct disk_ioc_recluster *recl;
110 	int error;
111 
112 	switch(cmd) {
113 	case DIOCRECLUSTER:
114 		recl = data;
115 		fp = holdfp(curproc->p_fd, recl->fd, -1);
116 		if (fp) {
117 			error = disk_iocom_reconnect(dp, fp);
118 		} else {
119 			error = EINVAL;
120 		}
121 		break;
122 	default:
123 		error = EOPNOTSUPP;
124 		break;
125 	}
126 	return error;
127 }
128 
129 static
130 int
131 disk_iocom_reconnect(struct disk *dp, struct file *fp)
132 {
133 	char devname[64];
134 
135 	ksnprintf(devname, sizeof(devname), "%s%d",
136 		  dev_dname(dp->d_rawdev), dkunit(dp->d_rawdev));
137 
138 	kdmsg_iocom_reconnect(&dp->d_iocom, fp, devname);
139 
140 	dp->d_iocom.auto_lnk_conn.pfs_type = DMSG_PFSTYPE_SERVER;
141 	dp->d_iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
142 	dp->d_iocom.auto_lnk_conn.peer_type = DMSG_PEER_BLOCK;
143 	dp->d_iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
144 	dp->d_iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
145 	ksnprintf(dp->d_iocom.auto_lnk_conn.cl_label,
146 		  sizeof(dp->d_iocom.auto_lnk_conn.cl_label),
147 		  "%s/%s", hostname, devname);
148 	if (dp->d_info.d_serialno) {
149 		ksnprintf(dp->d_iocom.auto_lnk_conn.fs_label,
150 			  sizeof(dp->d_iocom.auto_lnk_conn.fs_label),
151 			  "%s", dp->d_info.d_serialno);
152 	}
153 
154 	dp->d_iocom.auto_lnk_span.pfs_type = DMSG_PFSTYPE_SERVER;
155 	dp->d_iocom.auto_lnk_span.proto_version = DMSG_SPAN_PROTO_1;
156 	dp->d_iocom.auto_lnk_span.peer_type = DMSG_PEER_BLOCK;
157 	dp->d_iocom.auto_lnk_span.media.block.bytes =
158 						dp->d_info.d_media_size;
159 	dp->d_iocom.auto_lnk_span.media.block.blksize =
160 						dp->d_info.d_media_blksize;
161 	ksnprintf(dp->d_iocom.auto_lnk_span.cl_label,
162 		  sizeof(dp->d_iocom.auto_lnk_span.cl_label),
163 		  "%s/%s", hostname, devname);
164 	if (dp->d_info.d_serialno) {
165 		ksnprintf(dp->d_iocom.auto_lnk_span.fs_label,
166 			  sizeof(dp->d_iocom.auto_lnk_span.fs_label),
167 			  "%s", dp->d_info.d_serialno);
168 	}
169 
170 	kdmsg_iocom_autoinitiate(&dp->d_iocom, NULL);
171 
172 	return (0);
173 }
174 
175 int
176 disk_rcvdmsg(kdmsg_msg_t *msg)
177 {
178 	struct disk *dp = msg->state->iocom->handle;
179 
180 	/*
181 	 * Handle debug messages (these might not be in transactions)
182 	 */
183 	switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
184 	case DMSG_DBG_SHELL:
185 		/*
186 		 * Execute shell command (not supported atm)
187 		 */
188 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
189 		return(0);
190 	case DMSG_DBG_SHELL | DMSGF_REPLY:
191 		if (msg->aux_data) {
192 			msg->aux_data[msg->aux_size - 1] = 0;
193 			kprintf("diskiocom: DEBUGMSG: %s\n", msg->aux_data);
194 		}
195 		return(0);
196 	}
197 
198 	/*
199 	 * All remaining messages must be in a transaction
200 	 *
201 	 * NOTE!  We are switching on the first message's command.  The
202 	 *	  actual message command within the transaction may be
203 	 *	  different (if streaming within a transaction).
204 	 */
205 	if (msg->state == &msg->state->iocom->state0) {
206 		kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
207 		return(0);
208 	}
209 
210 	switch(msg->state->rxcmd & DMSGF_CMDSWMASK) {
211 	case DMSG_BLK_OPEN:
212 	case DMSG_BLK_CLOSE:
213 		disk_blk_open(dp, msg);
214 		break;
215 	case DMSG_BLK_READ:
216 		disk_blk_read(dp, msg);
217 		break;
218 	case DMSG_BLK_WRITE:
219 		disk_blk_write(dp, msg);
220 		break;
221 	case DMSG_BLK_FLUSH:
222 		disk_blk_flush(dp, msg);
223 		break;
224 	case DMSG_BLK_FREEBLKS:
225 		disk_blk_freeblks(dp, msg);
226 		break;
227 	default:
228 		if ((msg->any.head.cmd & DMSGF_REPLY) == 0) {
229 			if (msg->any.head.cmd & DMSGF_DELETE)
230 				kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
231 			else
232 				kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
233 		}
234 		break;
235 	}
236 	return (0);
237 }
238 
239 static
240 void
241 disk_blk_open(struct disk *dp, kdmsg_msg_t *msg)
242 {
243 	struct dios_open *openst;
244 	int error = DMSG_ERR_NOSUPP;
245 	int fflags;
246 
247 	openst = msg->state->any.any;
248 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_OPEN) {
249 		if (openst == NULL) {
250 			openst = kmalloc(sizeof(*openst), M_DEVBUF,
251 						M_WAITOK | M_ZERO);
252 			msg->state->any.any = openst;
253 		}
254 		fflags = 0;
255 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
256 			fflags = FREAD;
257 		if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
258 			fflags |= FWRITE;
259 		error = dev_dopen(dp->d_rawdev, fflags, S_IFCHR, proc0.p_ucred, NULL);
260 		if (error) {
261 			error = DMSG_ERR_IO;
262 		} else {
263 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
264 				++openst->openrd;
265 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
266 				++openst->openwr;
267 		}
268 	}
269 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_CLOSE &&
270 	    openst) {
271 		fflags = 0;
272 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_RD) &&
273 		    openst->openrd) {
274 			fflags = FREAD;
275 		}
276 		if ((msg->any.blk_open.modes & DMSG_BLKOPEN_WR) &&
277 		    openst->openwr) {
278 			fflags |= FWRITE;
279 		}
280 		error = dev_dclose(dp->d_rawdev, fflags, S_IFCHR, NULL);
281 		if (error) {
282 			error = DMSG_ERR_IO;
283 		} else {
284 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_RD)
285 				--openst->openrd;
286 			if (msg->any.blk_open.modes & DMSG_BLKOPEN_WR)
287 				--openst->openwr;
288 		}
289 	}
290 	if (msg->any.head.cmd & DMSGF_DELETE) {
291 		if (openst) {
292 			while (openst->openrd && openst->openwr) {
293 				--openst->openrd;
294 				--openst->openwr;
295 				dev_dclose(dp->d_rawdev, FREAD|FWRITE, S_IFCHR, NULL);
296 			}
297 			while (openst->openrd) {
298 				--openst->openrd;
299 				dev_dclose(dp->d_rawdev, FREAD, S_IFCHR, NULL);
300 			}
301 			while (openst->openwr) {
302 				--openst->openwr;
303 				dev_dclose(dp->d_rawdev, FWRITE, S_IFCHR, NULL);
304 			}
305 			kfree(openst, M_DEVBUF);
306 			msg->state->any.any = NULL;
307 		}
308 		kdmsg_msg_reply(msg, error);
309 	} else {
310 		kdmsg_msg_result(msg, error);
311 	}
312 }
313 
314 static
315 void
316 disk_blk_read(struct disk *dp, kdmsg_msg_t *msg)
317 {
318 	struct dios_io *iost;
319 	struct buf *bp;
320 	struct bio *bio;
321 	int error = DMSG_ERR_NOSUPP;
322 	int reterr = 1;
323 
324 	/*
325 	 * Only DMSG_BLK_READ commands imply read ops.
326 	 */
327 	iost = msg->state->any.any;
328 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_READ) {
329 		if (msg->any.blk_read.bytes < DEV_BSIZE ||
330 		    msg->any.blk_read.bytes > MAXPHYS) {
331 			error = DMSG_ERR_PARAM;
332 			goto done;
333 		}
334 		if (iost == NULL) {
335 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
336 				       M_WAITOK | M_ZERO);
337 			msg->state->any.any = iost;
338 		}
339 		reterr = 0;
340 		bp = geteblk(msg->any.blk_read.bytes);
341 		bio = &bp->b_bio1;
342 		bp->b_cmd = BUF_CMD_READ;
343 		bp->b_bcount = msg->any.blk_read.bytes;
344 		bp->b_resid = bp->b_bcount;
345 		bio->bio_offset = msg->any.blk_read.offset;
346 		bio->bio_caller_info1.ptr = msg->state;
347 		bio->bio_done = diskiodone;
348 		/* kdmsg_state_hold(msg->state); */
349 
350 		atomic_add_int(&iost->count, 1);
351 		if (msg->any.head.cmd & DMSGF_DELETE)
352 			iost->eof = 1;
353 		BUF_KERNPROC(bp);
354 		dev_dstrategy(dp->d_rawdev, bio);
355 	}
356 done:
357 	if (reterr) {
358 		if (msg->any.head.cmd & DMSGF_DELETE) {
359 			if (iost && iost->count == 0) {
360 				kfree(iost, M_DEVBUF);
361 				msg->state->any.any = NULL;
362 			}
363 			kdmsg_msg_reply(msg, error);
364 		} else {
365 			kdmsg_msg_result(msg, error);
366 		}
367 	}
368 }
369 
370 static
371 void
372 disk_blk_write(struct disk *dp, kdmsg_msg_t *msg)
373 {
374 	struct dios_io *iost;
375 	struct buf *bp;
376 	struct bio *bio;
377 	int error = DMSG_ERR_NOSUPP;
378 	int reterr = 1;
379 
380 	/*
381 	 * Only DMSG_BLK_WRITE commands imply read ops.
382 	 */
383 	iost = msg->state->any.any;
384 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_WRITE) {
385 		if (msg->any.blk_write.bytes < DEV_BSIZE ||
386 		    msg->any.blk_write.bytes > MAXPHYS) {
387 			error = DMSG_ERR_PARAM;
388 			goto done;
389 		}
390 		if (iost == NULL) {
391 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
392 				       M_WAITOK | M_ZERO);
393 			msg->state->any.any = iost;
394 		}
395 
396 		/*
397 		 * Issue WRITE.  Short data implies zeros.  Try to optimize
398 		 * the buffer cache buffer for the case where we can just
399 		 * use the message's data pointer.
400 		 */
401 		reterr = 0;
402 		if (msg->aux_size >= msg->any.blk_write.bytes)
403 			bp = getpbuf(NULL);
404 		else
405 			bp = geteblk(msg->any.blk_write.bytes);
406 		bio = &bp->b_bio1;
407 		bp->b_cmd = BUF_CMD_WRITE;
408 		bp->b_bcount = msg->any.blk_write.bytes;
409 		bp->b_resid = bp->b_bcount;
410 		if (msg->aux_size >= msg->any.blk_write.bytes) {
411 			bp->b_data = msg->aux_data;
412 		} else {
413 			bcopy(msg->aux_data, bp->b_data, msg->aux_size);
414 			bzero(bp->b_data + msg->aux_size,
415 			      msg->any.blk_write.bytes - msg->aux_size);
416 		}
417 		bio->bio_offset = msg->any.blk_write.offset;
418 		bio->bio_caller_info1.ptr = msg->state;
419 		bio->bio_done = diskiodone;
420 		/* kdmsg_state_hold(msg->state); */
421 
422 		atomic_add_int(&iost->count, 1);
423 		if (msg->any.head.cmd & DMSGF_DELETE)
424 			iost->eof = 1;
425 		BUF_KERNPROC(bp);
426 		dev_dstrategy(dp->d_rawdev, bio);
427 	}
428 done:
429 	if (reterr) {
430 		if (msg->any.head.cmd & DMSGF_DELETE) {
431 			if (iost && iost->count == 0) {
432 				kfree(iost, M_DEVBUF);
433 				msg->state->any.any = NULL;
434 			}
435 			kdmsg_msg_reply(msg, error);
436 		} else {
437 			kdmsg_msg_result(msg, error);
438 		}
439 	}
440 }
441 
442 static
443 void
444 disk_blk_flush(struct disk *dp, kdmsg_msg_t *msg)
445 {
446 	struct dios_io *iost;
447 	struct buf *bp;
448 	struct bio *bio;
449 	int error = DMSG_ERR_NOSUPP;
450 	int reterr = 1;
451 
452 	/*
453 	 * Only DMSG_BLK_FLUSH commands imply read ops.
454 	 */
455 	iost = msg->state->any.any;
456 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FLUSH) {
457 		if (iost == NULL) {
458 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
459 				       M_WAITOK | M_ZERO);
460 			msg->state->any.any = iost;
461 		}
462 		reterr = 0;
463 		bp = getpbuf(NULL);
464 		bio = &bp->b_bio1;
465 		bp->b_cmd = BUF_CMD_FLUSH;
466 		bp->b_bcount = msg->any.blk_flush.bytes;
467 		bp->b_resid = 0;
468 		bio->bio_offset = msg->any.blk_flush.offset;
469 		bio->bio_caller_info1.ptr = msg->state;
470 		bio->bio_done = diskiodone;
471 		/* kdmsg_state_hold(msg->state); */
472 
473 		atomic_add_int(&iost->count, 1);
474 		if (msg->any.head.cmd & DMSGF_DELETE)
475 			iost->eof = 1;
476 		BUF_KERNPROC(bp);
477 		dev_dstrategy(dp->d_rawdev, bio);
478 	}
479 	if (reterr) {
480 		if (msg->any.head.cmd & DMSGF_DELETE) {
481 			if (iost && iost->count == 0) {
482 				kfree(iost, M_DEVBUF);
483 				msg->state->any.any = NULL;
484 			}
485 			kdmsg_msg_reply(msg, error);
486 		} else {
487 			kdmsg_msg_result(msg, error);
488 		}
489 	}
490 }
491 
492 static
493 void
494 disk_blk_freeblks(struct disk *dp, kdmsg_msg_t *msg)
495 {
496 	struct dios_io *iost;
497 	struct buf *bp;
498 	struct bio *bio;
499 	int error = DMSG_ERR_NOSUPP;
500 	int reterr = 1;
501 
502 	/*
503 	 * Only DMSG_BLK_FREEBLKS commands imply read ops.
504 	 */
505 	iost = msg->state->any.any;
506 	if ((msg->any.head.cmd & DMSGF_CMDSWMASK) == DMSG_BLK_FREEBLKS) {
507 		if (iost == NULL) {
508 			iost = kmalloc(sizeof(*iost), M_DEVBUF,
509 				       M_WAITOK | M_ZERO);
510 			msg->state->any.any = iost;
511 		}
512 		reterr = 0;
513 		bp = getpbuf(NULL);
514 		bio = &bp->b_bio1;
515 		bp->b_cmd = BUF_CMD_FREEBLKS;
516 		bp->b_bcount = msg->any.blk_freeblks.bytes;
517 		bp->b_resid = 0;
518 		bio->bio_offset = msg->any.blk_freeblks.offset;
519 		bio->bio_caller_info1.ptr = msg->state;
520 		bio->bio_done = diskiodone;
521 		/* kdmsg_state_hold(msg->state); */
522 
523 		atomic_add_int(&iost->count, 1);
524 		if (msg->any.head.cmd & DMSGF_DELETE)
525 			iost->eof = 1;
526 		BUF_KERNPROC(bp);
527 		dev_dstrategy(dp->d_rawdev, bio);
528 	}
529 	if (reterr) {
530 		if (msg->any.head.cmd & DMSGF_DELETE) {
531 			if (iost && iost->count == 0) {
532 				kfree(iost, M_DEVBUF);
533 				msg->state->any.any = NULL;
534 			}
535 			kdmsg_msg_reply(msg, error);
536 		} else {
537 			kdmsg_msg_result(msg, error);
538 		}
539 	}
540 }
541 
542 static
543 void
544 diskiodone(struct bio *bio)
545 {
546 	struct buf *bp = bio->bio_buf;
547 	kdmsg_state_t *state = bio->bio_caller_info1.ptr;
548 	kdmsg_msg_t *rmsg;
549 	struct dios_io *iost = state->any.any;
550 	int error;
551 	int resid = 0;
552 	int bytes;
553 	uint32_t cmd;
554 	void *data;
555 
556 	cmd = DMSG_LNK_ERROR;
557 	data = NULL;
558 	bytes = 0;
559 
560 	switch(bp->b_cmd) {
561 	case BUF_CMD_READ:
562 		cmd = DMSG_LNK_ERROR;
563 		data = bp->b_data;
564 		bytes = bp->b_bcount;
565 		/* fall through */
566 	case BUF_CMD_WRITE:
567 		if (bp->b_flags & B_ERROR) {
568 			error = bp->b_error;
569 		} else {
570 			error = 0;
571 			resid = bp->b_resid;
572 		}
573 		break;
574 	case BUF_CMD_FLUSH:
575 	case BUF_CMD_FREEBLKS:
576 		if (bp->b_flags & B_ERROR)
577 			error = bp->b_error;
578 		else
579 			error = 0;
580 		break;
581 	default:
582 		panic("diskiodone: Unknown bio cmd = %d\n",
583 		      bio->bio_buf->b_cmd);
584 		error = 0;	/* avoid compiler warning */
585 		break;		/* NOT REACHED */
586 	}
587 
588 	/*
589 	 * Convert error to DMSG_ERR_* code.
590 	 */
591 	if (error)
592 		error = DMSG_ERR_IO;
593 
594 	/*
595 	 * Convert LNK_ERROR or BLK_ERROR if non-zero resid.  READS will
596 	 * have already converted cmd to BLK_ERROR and set up data to return.
597 	 */
598 	if (resid && cmd == DMSG_LNK_ERROR)
599 		cmd = DMSG_BLK_ERROR;
600 	/* XXX txcmd is delayed so this won't work for streaming */
601 	if ((state->txcmd & DMSGF_CREATE) == 0)	/* assume serialized */
602 		cmd |= DMSGF_CREATE;
603 	if (iost->eof) {
604 		if (atomic_fetchadd_int(&iost->count, -1) == 1)
605 			cmd |= DMSGF_DELETE;
606 	} else {
607 		atomic_add_int(&iost->count, -1);
608 	}
609 	cmd |= DMSGF_REPLY;
610 
611 	/*
612 	 * Allocate a basic or extended reply.  Be careful not to populate
613 	 * extended header fields unless we allocated an extended reply.
614 	 */
615 	rmsg = kdmsg_msg_alloc(state, cmd, NULL, 0);
616 	if (data) {
617 		rmsg->aux_data = kmalloc(bytes, state->iocom->mmsg, M_INTWAIT);
618 		rmsg->aux_size = bytes;
619 		rmsg->flags |= KDMSG_FLAG_AUXALLOC;
620 		bcopy(data, rmsg->aux_data, bytes);
621 	}
622 	rmsg->any.blk_error.head.error = error;
623 	if ((cmd & DMSGF_BASECMDMASK) == DMSG_BLK_ERROR)
624 		rmsg->any.blk_error.resid = resid;
625 	bio->bio_caller_info1.ptr = NULL;
626 	/* kdmsg_state_drop(state); */
627 	kdmsg_msg_write(rmsg);
628 	if (bp->b_flags & B_PAGING) {
629 		relpbuf(bio->bio_buf, NULL);
630 	} else {
631 		bp->b_flags |= B_INVAL | B_AGE;
632 		brelse(bp);
633 	}
634 }
635