xref: /netbsd-src/sys/fs/udf/udf_strat_sequential.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /* $NetBSD: udf_strat_sequential.c,v 1.9 2009/02/10 17:48:19 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.9 2009/02/10 17:48:19 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60 
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63 
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67 
68 
69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
71 
72 /* --------------------------------------------------------------------- */
73 
74 /* BUFQ's */
75 #define UDF_SHED_MAX 3
76 
77 #define UDF_SHED_READING	0
78 #define UDF_SHED_WRITING	1
79 #define UDF_SHED_SEQWRITING	2
80 
81 struct strat_private {
82 	struct pool		 desc_pool;	 	/* node descriptors */
83 
84 	lwp_t			*queue_lwp;
85 	kcondvar_t		 discstrat_cv;		/* to wait on       */
86 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
87 
88 	int			 run_thread;		/* thread control */
89 	int			 cur_queue;
90 
91 	struct disk_strategy	 old_strategy_setting;
92 	struct bufq_state	*queues[UDF_SHED_MAX];
93 	struct timespec		 last_queued[UDF_SHED_MAX];
94 };
95 
96 
97 /* --------------------------------------------------------------------- */
98 
99 static void
100 udf_wr_nodedscr_callback(struct buf *buf)
101 {
102 	struct udf_node *udf_node;
103 
104 	KASSERT(buf);
105 	KASSERT(buf->b_data);
106 
107 	/* called when write action is done */
108 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
109 
110 	udf_node = VTOI(buf->b_vp);
111 	if (udf_node == NULL) {
112 		putiobuf(buf);
113 		printf("udf_wr_node_callback: NULL node?\n");
114 		return;
115 	}
116 
117 	/* XXX right flags to mark dirty again on error? */
118 	if (buf->b_error) {
119 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
120 		/* XXX TODO reshedule on error */
121 	}
122 
123 	/* decrement outstanding_nodedscr */
124 	KASSERT(udf_node->outstanding_nodedscr >= 1);
125 	udf_node->outstanding_nodedscr--;
126 	if (udf_node->outstanding_nodedscr == 0) {
127 		/* first unlock the node */
128 		KASSERT(udf_node->i_flags & IN_CALLBACK_ULK);
129 		UDF_UNLOCK_NODE(udf_node, IN_CALLBACK_ULK);
130 
131 		wakeup(&udf_node->outstanding_nodedscr);
132 	}
133 
134 	/* unreference the vnode so it can be recycled */
135 	holdrele(udf_node->vnode);
136 
137 	putiobuf(buf);
138 }
139 
140 /* --------------------------------------------------------------------- */
141 
142 static int
143 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
144 {
145 	union dscrptr   **dscrptr = &args->dscr;
146 	struct udf_mount *ump = args->ump;
147 	struct strat_private *priv = PRIV(ump);
148 	uint32_t lb_size;
149 
150 	lb_size = udf_rw32(ump->logical_vol->lb_size);
151 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
152 	memset(*dscrptr, 0, lb_size);
153 
154 	return 0;
155 }
156 
157 
158 static void
159 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
160 {
161 	union dscrptr    *dscr = args->dscr;
162 	struct udf_mount *ump  = args->ump;
163 	struct strat_private *priv = PRIV(ump);
164 
165 	pool_put(&priv->desc_pool, dscr);
166 }
167 
168 
169 static int
170 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
171 {
172 	union dscrptr   **dscrptr = &args->dscr;
173 	union dscrptr    *tmpdscr;
174 	struct udf_mount *ump = args->ump;
175 	struct long_ad   *icb = args->icb;
176 	struct strat_private *priv = PRIV(ump);
177 	uint32_t lb_size;
178 	uint32_t sector, dummy;
179 	int error;
180 
181 	lb_size = udf_rw32(ump->logical_vol->lb_size);
182 
183 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
184 	if (error)
185 		return error;
186 
187 	/* try to read in fe/efe */
188 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
189 	if (error)
190 		return error;
191 
192 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
193 	memcpy(*dscrptr, tmpdscr, lb_size);
194 	free(tmpdscr, M_UDFTEMP);
195 
196 	return 0;
197 }
198 
199 
200 static int
201 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
202 {
203 	union dscrptr    *dscr     = args->dscr;
204 	struct udf_mount *ump      = args->ump;
205 	struct udf_node  *udf_node = args->udf_node;
206 	struct long_ad   *icb      = args->icb;
207 	int               waitfor  = args->waitfor;
208 	uint32_t logsectornr, sectornr, dummy;
209 	int error, vpart;
210 
211 	/*
212 	 * we have to decide if we write it out sequential or at its fixed
213 	 * position by examining the partition its (to be) written on.
214 	 */
215 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
216 	logsectornr = udf_rw32(icb->loc.lb_num);
217 	sectornr    = 0;
218 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
219 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
220 		if (error)
221 			goto out;
222 	}
223 
224 	/* add reference to the vnode to prevent recycling */
225 	vhold(udf_node->vnode);
226 
227 	if (waitfor) {
228 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
229 
230 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
231 			dscr, sectornr, logsectornr);
232 	} else {
233 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
234 
235 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
236 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
237 		/* will be UNLOCKED in call back */
238 		return error;
239 	}
240 
241 	holdrele(udf_node->vnode);
242 out:
243 	udf_node->outstanding_nodedscr--;
244 	if (udf_node->outstanding_nodedscr == 0) {
245 		UDF_UNLOCK_NODE(udf_node, 0);
246 		wakeup(&udf_node->outstanding_nodedscr);
247 	}
248 
249 	return error;
250 }
251 
252 /* --------------------------------------------------------------------- */
253 
254 /*
255  * Main file-system specific sheduler. Due to the nature of optical media
256  * sheduling can't be performed in the traditional way. Most OS
257  * implementations i've seen thus read or write a file atomically giving all
258  * kinds of side effects.
259  *
260  * This implementation uses a kernel thread to shedule the queued requests in
261  * such a way that is semi-optimal for optical media; this means aproximately
262  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
263  * time.
264  */
265 
266 static void
267 udf_queuebuf_seq(struct udf_strat_args *args)
268 {
269 	struct udf_mount *ump = args->ump;
270 	struct buf *nestbuf = args->nestbuf;
271 	struct strat_private *priv = PRIV(ump);
272 	int queue;
273 	int what;
274 
275 	KASSERT(ump);
276 	KASSERT(nestbuf);
277 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
278 
279 	what = nestbuf->b_udf_c_type;
280 	queue = UDF_SHED_READING;
281 	if ((nestbuf->b_flags & B_READ) == 0) {
282 		/* writing */
283 		queue = UDF_SHED_SEQWRITING;
284 		if (what == UDF_C_ABSOLUTE)
285 			queue = UDF_SHED_WRITING;
286 	}
287 
288 	/* use our own sheduler lists for more complex sheduling */
289 	mutex_enter(&priv->discstrat_mutex);
290 		bufq_put(priv->queues[queue], nestbuf);
291 		vfs_timestamp(&priv->last_queued[queue]);
292 	mutex_exit(&priv->discstrat_mutex);
293 
294 	/* signal our thread that there might be something to do */
295 	cv_signal(&priv->discstrat_cv);
296 }
297 
298 /* --------------------------------------------------------------------- */
299 
300 /* TODO convert to lb_size */
301 static void
302 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
303 {
304 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
305 	struct vnode     *vp = buf->b_vp;
306 	struct udf_node  *udf_node = VTOI(vp);
307 	uint32_t lb_size, blks;
308 	uint32_t lb_num;
309 	uint32_t udf_rw32_lbmap;
310 	int c_type = buf->b_udf_c_type;
311 	int error;
312 
313 	/* only interested when we're using a VAT */
314 	KASSERT(ump->vat_node);
315 	KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
316 
317 	/* only nodes are recorded in the VAT */
318 	/* NOTE: and the fileset descriptor (FIXME ?) */
319 	if (c_type != UDF_C_NODE)
320 		return;
321 
322 	/* we now have an UDF FE/EFE node on media with VAT (or VAT itself) */
323 	lb_size = udf_rw32(ump->logical_vol->lb_size);
324 	blks = lb_size / DEV_BSIZE;
325 
326 	udf_rw32_lbmap = udf_rw32(lb_map);
327 
328 	/* if we're the VAT itself, only update our assigned sector number */
329 	if (udf_node == ump->vat_node) {
330 		fdscr->tag.tag_loc = udf_rw32_lbmap;
331 		udf_validate_tag_sum(fdscr);
332 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
333 			udf_rw32(udf_rw32_lbmap)));
334 		/* no use mapping the VAT node in the VAT */
335 		return;
336 	}
337 
338 	/* record new position in VAT file */
339 	lb_num = udf_rw32(fdscr->tag.tag_loc);
340 
341 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
342 
343 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
344 			lb_num, lb_map));
345 
346 	/* VAT should be the longer than this write, can't go wrong */
347 	KASSERT(lb_num <= ump->vat_entries);
348 
349 	mutex_enter(&ump->allocate_mutex);
350 	error = udf_vat_write(ump->vat_node,
351 			(uint8_t *) &udf_rw32_lbmap, 4,
352 			ump->vat_offset + lb_num * 4);
353 	mutex_exit(&ump->allocate_mutex);
354 
355 	if (error)
356 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
357 			"write in the VAT file ?\n");
358 }
359 
360 
361 static void
362 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
363 {
364 	union dscrptr *dscr;
365 	struct long_ad *node_ad_cpy;
366 	struct part_desc *pdesc;
367 	uint64_t *lmapping, *lmappos, blknr;
368 	uint32_t our_sectornr, sectornr, bpos;
369 	uint32_t ptov;
370 	uint16_t vpart_num;
371 	uint8_t *fidblk;
372 	int sector_size = ump->discinfo.sector_size;
373 	int blks = sector_size / DEV_BSIZE;
374 	int len, buf_len;
375 
376 	/* if reading, just pass to the device's STRATEGY */
377 	if (queue == UDF_SHED_READING) {
378 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
379 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
380 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
381 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
382 		VOP_STRATEGY(ump->devvp, buf);
383 		return;
384 	}
385 
386 	blknr        = buf->b_blkno;
387 	our_sectornr = blknr / blks;
388 
389 	if (queue == UDF_SHED_WRITING) {
390 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
391 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
392 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
393 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
394 		KASSERT(buf->b_udf_c_type == UDF_C_ABSOLUTE);
395 
396 		// udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
397 		VOP_STRATEGY(ump->devvp, buf);
398 		return;
399 	}
400 
401 	KASSERT(queue == UDF_SHED_SEQWRITING);
402 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
403 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
404 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
405 		buf->b_bufsize));
406 
407 	/*
408 	 * Buffers should not have been allocated to disc addresses yet on
409 	 * this queue. Note that a buffer can get multiple extents allocated.
410 	 *
411 	 * lmapping contains lb_num relative to base partition.
412 	 */
413 	lmapping    = ump->la_lmapping;
414 	node_ad_cpy = ump->la_node_ad_cpy;
415 
416 	/* logically allocate buf and map it in the file */
417 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
418 
419 	/*
420 	 * NOTE We are using the knowledge here that sequential media will
421 	 * always be mapped linearly. Thus no use to explicitly translate the
422 	 * lmapping list.
423 	 */
424 
425 	/* calculate offset from physical base partition */
426 	pdesc = ump->partitions[ump->vtop[vpart_num]];
427 	ptov  = udf_rw32(pdesc->start_loc);
428 
429 	/* set buffers blkno to the physical block number */
430 	buf->b_blkno = (*lmapping + ptov) * blks;
431 
432 	/* fixate floating descriptors */
433 	if (buf->b_udf_c_type == UDF_C_FLOAT_DSCR) {
434 		/* set our tag location to the absolute position */
435 		dscr = (union dscrptr *) buf->b_data;
436 		dscr->tag.tag_loc = udf_rw32(*lmapping + ptov);
437 		udf_validate_tag_and_crc_sums(dscr);
438 	}
439 
440 	/* update mapping in the VAT */
441 	if (buf->b_udf_c_type == UDF_C_NODE) {
442 		udf_VAT_mapping_update(ump, buf, *lmapping);
443 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
444 	}
445 
446 	/* if we have FIDs, fixup using the new allocation table */
447 	if (buf->b_udf_c_type == UDF_C_FIDS) {
448 		buf_len = buf->b_bcount;
449 		bpos = 0;
450 		lmappos = lmapping;
451 		while (buf_len) {
452 			sectornr = *lmappos++;
453 			len = MIN(buf_len, sector_size);
454 			fidblk = (uint8_t *) buf->b_data + bpos;
455 			udf_fixup_fid_block(fidblk, sector_size,
456 				0, len, sectornr);
457 			bpos += len;
458 			buf_len -= len;
459 		}
460 	}
461 
462 	VOP_STRATEGY(ump->devvp, buf);
463 }
464 
465 
466 static void
467 udf_doshedule(struct udf_mount *ump)
468 {
469 	struct buf *buf;
470 	struct timespec now, *last;
471 	struct strat_private *priv = PRIV(ump);
472 	void (*b_callback)(struct buf *);
473 	int new_queue;
474 	int error;
475 
476 	buf = bufq_get(priv->queues[priv->cur_queue]);
477 	if (buf) {
478 		/* transfer from the current queue to the device queue */
479 		mutex_exit(&priv->discstrat_mutex);
480 
481 		/* transform buffer to synchronous; XXX needed? */
482 		b_callback = buf->b_iodone;
483 		buf->b_iodone = NULL;
484 		CLR(buf->b_flags, B_ASYNC);
485 
486 		/* issue and wait on completion */
487 		udf_issue_buf(ump, priv->cur_queue, buf);
488 		biowait(buf);
489 
490 		mutex_enter(&priv->discstrat_mutex);
491 
492 		/* if there is an error, repair this error, otherwise propagate */
493 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
494 			/* check what we need to do */
495 			panic("UDF write error, can't handle yet!\n");
496 		}
497 
498 		/* propagate result to higher layers */
499 		if (b_callback) {
500 			buf->b_iodone = b_callback;
501 			(*buf->b_iodone)(buf);
502 		}
503 
504 		return;
505 	}
506 
507 	/* Check if we're idling in this state */
508 	vfs_timestamp(&now);
509 	last = &priv->last_queued[priv->cur_queue];
510 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
511 		/* dont switch too fast for CD media; its expensive in time */
512 		if (now.tv_sec - last->tv_sec < 3)
513 			return;
514 	}
515 
516 	/* check if we can/should switch */
517 	new_queue = priv->cur_queue;
518 
519 	if (bufq_peek(priv->queues[UDF_SHED_READING]))
520 		new_queue = UDF_SHED_READING;
521 	if (bufq_peek(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
522 		new_queue = UDF_SHED_WRITING;
523 	if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
524 		new_queue = UDF_SHED_SEQWRITING;
525 	if (priv->cur_queue == UDF_SHED_READING) {
526 		if (new_queue == UDF_SHED_SEQWRITING) {
527 			/* TODO use flag to signal if this is needed */
528 			mutex_exit(&priv->discstrat_mutex);
529 
530 			/* update trackinfo for data and metadata */
531 			error = udf_update_trackinfo(ump,
532 					&ump->data_track);
533 			assert(error == 0);
534 			error = udf_update_trackinfo(ump,
535 					&ump->metadata_track);
536 			assert(error == 0);
537 			mutex_enter(&priv->discstrat_mutex);
538 		}
539 	}
540 
541 	if (new_queue != priv->cur_queue) {
542 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
543 			priv->cur_queue, new_queue));
544 	}
545 
546 	priv->cur_queue = new_queue;
547 }
548 
549 
550 static void
551 udf_discstrat_thread(void *arg)
552 {
553 	struct udf_mount *ump = (struct udf_mount *) arg;
554 	struct strat_private *priv = PRIV(ump);
555 	int empty;
556 
557 	empty = 1;
558 	mutex_enter(&priv->discstrat_mutex);
559 	while (priv->run_thread || !empty) {
560 		/* process the current selected queue */
561 		udf_doshedule(ump);
562 		empty  = (bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
563 		empty &= (bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
564 		empty &= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
565 
566 		/* wait for more if needed */
567 		if (empty)
568 			cv_timedwait(&priv->discstrat_cv,
569 				&priv->discstrat_mutex, hz/8);
570 	}
571 	mutex_exit(&priv->discstrat_mutex);
572 
573 	wakeup(&priv->run_thread);
574 	kthread_exit(0);
575 	/* not reached */
576 }
577 
578 /* --------------------------------------------------------------------- */
579 
580 static void
581 udf_discstrat_init_seq(struct udf_strat_args *args)
582 {
583 	struct udf_mount *ump = args->ump;
584 	struct strat_private *priv = PRIV(ump);
585 	struct disk_strategy dkstrat;
586 	uint32_t lb_size;
587 
588 	KASSERT(ump);
589 	KASSERT(ump->logical_vol);
590 	KASSERT(priv == NULL);
591 
592 	lb_size = udf_rw32(ump->logical_vol->lb_size);
593 	KASSERT(lb_size > 0);
594 
595 	/* initialise our memory space */
596 	ump->strategy_private = malloc(sizeof(struct strat_private),
597 		M_UDFTEMP, M_WAITOK);
598 	priv = ump->strategy_private;
599 	memset(priv, 0 , sizeof(struct strat_private));
600 
601 	/* initialise locks */
602 	cv_init(&priv->discstrat_cv, "udfstrat");
603 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
604 
605 	/*
606 	 * Initialise pool for descriptors associated with nodes. This is done
607 	 * in lb_size units though currently lb_size is dictated to be
608 	 * sector_size.
609 	 */
610 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
611 	    IPL_NONE);
612 
613 	/*
614 	 * remember old device strategy method and explicit set method
615 	 * `discsort' since we have our own more complex strategy that is not
616 	 * implementable on the CD device and other strategies will get in the
617 	 * way.
618 	 */
619 	memset(&priv->old_strategy_setting, 0,
620 		sizeof(struct disk_strategy));
621 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
622 		FREAD | FKIOCTL, NOCRED);
623 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
624 	strcpy(dkstrat.dks_name, "discsort");
625 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
626 		NOCRED);
627 
628 	/* initialise our internal sheduler */
629 	priv->cur_queue = UDF_SHED_READING;
630 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
631 		BUFQ_SORT_RAWBLOCK);
632 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
633 		BUFQ_SORT_RAWBLOCK);
634 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
635 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
636 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
637 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
638 
639 	/* create our disk strategy thread */
640 	priv->run_thread = 1;
641 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
642 		udf_discstrat_thread, ump, &priv->queue_lwp,
643 		"%s", "udf_rw")) {
644 		panic("fork udf_rw");
645 	}
646 }
647 
648 
649 static void
650 udf_discstrat_finish_seq(struct udf_strat_args *args)
651 {
652 	struct udf_mount *ump = args->ump;
653 	struct strat_private *priv = PRIV(ump);
654 	int error;
655 
656 	if (ump == NULL)
657 		return;
658 
659 	/* stop our sheduling thread */
660 	KASSERT(priv->run_thread == 1);
661 	priv->run_thread = 0;
662 	wakeup(priv->queue_lwp);
663 	do {
664 		error = tsleep(&priv->run_thread, PRIBIO+1,
665 			"udfshedfin", hz);
666 	} while (error);
667 	/* kthread should be finished now */
668 
669 	/* set back old device strategy method */
670 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
671 			FWRITE, NOCRED);
672 
673 	/* destroy our pool */
674 	pool_destroy(&priv->desc_pool);
675 
676 	/* free our private space */
677 	free(ump->strategy_private, M_UDFTEMP);
678 	ump->strategy_private = NULL;
679 }
680 
681 /* --------------------------------------------------------------------- */
682 
683 struct udf_strategy udf_strat_sequential =
684 {
685 	udf_create_logvol_dscr_seq,
686 	udf_free_logvol_dscr_seq,
687 	udf_read_logvol_dscr_seq,
688 	udf_write_logvol_dscr_seq,
689 	udf_queuebuf_seq,
690 	udf_discstrat_init_seq,
691 	udf_discstrat_finish_seq
692 };
693 
694 
695