xref: /netbsd-src/sys/fs/udf/udf_strat_sequential.c (revision cac8e449158efc7261bebc8657cbb0125a2cfdde)
1 /* $NetBSD: udf_strat_sequential.c,v 1.3 2008/07/28 19:41:13 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.3 2008/07/28 19:41:13 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61 
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64 
65 #if defined(_KERNEL_OPT)
66 #include "opt_udf.h"
67 #endif
68 
69 #include "udf.h"
70 #include "udf_subr.h"
71 #include "udf_bswap.h"
72 
73 
74 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
75 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
76 
77 /* --------------------------------------------------------------------- */
78 
79 /* BUFQ's */
80 #define UDF_SHED_MAX 3
81 
82 #define UDF_SHED_READING	0
83 #define UDF_SHED_WRITING	1
84 #define UDF_SHED_SEQWRITING	2
85 
86 struct strat_private {
87 	struct pool		 desc_pool;	 	/* node descriptors */
88 
89 	lwp_t			*queue_lwp;
90 	kcondvar_t		 discstrat_cv;		/* to wait on       */
91 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
92 
93 	int			 run_thread;		/* thread control */
94 	int			 cur_queue;
95 
96 	struct disk_strategy	 old_strategy_setting;
97 	struct bufq_state	*queues[UDF_SHED_MAX];
98 	struct timespec		 last_queued[UDF_SHED_MAX];
99 };
100 
101 
102 /* --------------------------------------------------------------------- */
103 
104 static void
105 udf_wr_nodedscr_callback(struct buf *buf)
106 {
107 	struct udf_node *udf_node;
108 
109 	KASSERT(buf);
110 	KASSERT(buf->b_data);
111 
112 	/* called when write action is done */
113 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
114 
115 	udf_node = VTOI(buf->b_vp);
116 	if (udf_node == NULL) {
117 		putiobuf(buf);
118 		printf("udf_wr_node_callback: NULL node?\n");
119 		return;
120 	}
121 
122 	/* XXX right flags to mark dirty again on error? */
123 	if (buf->b_error) {
124 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
125 		/* XXX TODO reshedule on error */
126 	}
127 
128 	/* decrement outstanding_nodedscr */
129 	KASSERT(udf_node->outstanding_nodedscr >= 1);
130 	udf_node->outstanding_nodedscr--;
131 	if (udf_node->outstanding_nodedscr == 0) {
132 		/* first unlock the node */
133 		KASSERT(udf_node->i_flags & IN_CALLBACK_ULK);
134 		UDF_UNLOCK_NODE(udf_node, IN_CALLBACK_ULK);
135 
136 		wakeup(&udf_node->outstanding_nodedscr);
137 	}
138 
139 	/* unreference the vnode so it can be recycled */
140 	holdrele(udf_node->vnode);
141 
142 	putiobuf(buf);
143 }
144 
145 /* --------------------------------------------------------------------- */
146 
147 static int
148 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
149 {
150 	union dscrptr   **dscrptr = &args->dscr;
151 	struct udf_mount *ump = args->ump;
152 	struct strat_private *priv = PRIV(ump);
153 	uint32_t lb_size;
154 
155 	lb_size = udf_rw32(ump->logical_vol->lb_size);
156 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
157 	memset(*dscrptr, 0, lb_size);
158 
159 	return 0;
160 }
161 
162 
163 static void
164 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
165 {
166 	union dscrptr    *dscr = args->dscr;
167 	struct udf_mount *ump  = args->ump;
168 	struct strat_private *priv = PRIV(ump);
169 
170 	pool_put(&priv->desc_pool, dscr);
171 }
172 
173 
174 static int
175 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
176 {
177 	union dscrptr   **dscrptr = &args->dscr;
178 	union dscrptr    *tmpdscr;
179 	struct udf_mount *ump = args->ump;
180 	struct long_ad   *icb = args->icb;
181 	struct strat_private *priv = PRIV(ump);
182 	uint32_t lb_size;
183 	uint32_t sector, dummy;
184 	int error;
185 
186 	lb_size = udf_rw32(ump->logical_vol->lb_size);
187 
188 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
189 	if (error)
190 		return error;
191 
192 	/* try to read in fe/efe */
193 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
194 	if (error)
195 		return error;
196 
197 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
198 	memcpy(*dscrptr, tmpdscr, lb_size);
199 	free(tmpdscr, M_UDFTEMP);
200 
201 	return 0;
202 }
203 
204 
205 static int
206 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
207 {
208 	union dscrptr    *dscr     = args->dscr;
209 	struct udf_mount *ump      = args->ump;
210 	struct udf_node  *udf_node = args->udf_node;
211 	struct long_ad   *icb      = args->icb;
212 	int               waitfor  = args->waitfor;
213 	uint32_t logsectornr, sectornr, dummy;
214 	int error, vpart;
215 
216 	/*
217 	 * we have to decide if we write it out sequential or at its fixed
218 	 * position by examining the partition its (to be) written on.
219 	 */
220 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
221 	logsectornr = udf_rw32(icb->loc.lb_num);
222 	sectornr    = 0;
223 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
224 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
225 		if (error)
226 			goto out;
227 	}
228 
229 	/* add reference to the vnode to prevent recycling */
230 	vhold(udf_node->vnode);
231 
232 	if (waitfor) {
233 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
234 
235 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
236 			dscr, sectornr, logsectornr);
237 	} else {
238 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
239 
240 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
241 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
242 		/* will be UNLOCKED in call back */
243 		return error;
244 	}
245 
246 	holdrele(udf_node->vnode);
247 out:
248 	udf_node->outstanding_nodedscr--;
249 	if (udf_node->outstanding_nodedscr == 0) {
250 		UDF_UNLOCK_NODE(udf_node, 0);
251 		wakeup(&udf_node->outstanding_nodedscr);
252 	}
253 
254 	return error;
255 }
256 
257 /* --------------------------------------------------------------------- */
258 
259 /*
260  * Main file-system specific sheduler. Due to the nature of optical media
261  * sheduling can't be performed in the traditional way. Most OS
262  * implementations i've seen thus read or write a file atomically giving all
263  * kinds of side effects.
264  *
265  * This implementation uses a kernel thread to shedule the queued requests in
266  * such a way that is semi-optimal for optical media; this means aproximately
267  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
268  * time.
269  */
270 
271 static void
272 udf_queuebuf_seq(struct udf_strat_args *args)
273 {
274 	struct udf_mount *ump = args->ump;
275 	struct buf *nestbuf = args->nestbuf;
276 	struct strat_private *priv = PRIV(ump);
277 	int queue;
278 	int what;
279 
280 	KASSERT(ump);
281 	KASSERT(nestbuf);
282 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
283 
284 	what = nestbuf->b_udf_c_type;
285 	queue = UDF_SHED_READING;
286 	if ((nestbuf->b_flags & B_READ) == 0) {
287 		/* writing */
288 		queue = UDF_SHED_SEQWRITING;
289 		if (what == UDF_C_DSCR)
290 			queue = UDF_SHED_WRITING;
291 		if (what == UDF_C_NODE) {
292 			if (ump->meta_alloc != UDF_ALLOC_VAT)
293 				queue = UDF_SHED_WRITING;
294 		}
295 #if 0
296 		if (queue == UDF_SHED_SEQWRITING) {
297 			/* TODO do add sector to uncommitted space */
298 		}
299 #endif
300 	}
301 
302 	/* use our own sheduler lists for more complex sheduling */
303 	mutex_enter(&priv->discstrat_mutex);
304 		BUFQ_PUT(priv->queues[queue], nestbuf);
305 		vfs_timestamp(&priv->last_queued[queue]);
306 	mutex_exit(&priv->discstrat_mutex);
307 
308 	/* signal our thread that there might be something to do */
309 	cv_signal(&priv->discstrat_cv);
310 }
311 
312 /* --------------------------------------------------------------------- */
313 
314 /* TODO convert to lb_size */
315 static void
316 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
317 {
318 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
319 	struct vnode     *vp = buf->b_vp;
320 	struct udf_node  *udf_node = VTOI(vp);
321 	uint32_t lb_size, blks;
322 	uint32_t lb_num;
323 	uint32_t udf_rw32_lbmap;
324 	int c_type = buf->b_udf_c_type;
325 	int error;
326 
327 	/* only interested when we're using a VAT */
328 	if (ump->meta_alloc != UDF_ALLOC_VAT)
329 		return;
330 	KASSERT(ump->vat_node);
331 
332 	/* only nodes are recorded in the VAT */
333 	/* NOTE: and the fileset descriptor (FIXME ?) */
334 	if (c_type != UDF_C_NODE)
335 		return;
336 
337 	/* we now have an UDF FE/EFE node on media with VAT (or VAT itself) */
338 	lb_size = udf_rw32(ump->logical_vol->lb_size);
339 	blks = lb_size / DEV_BSIZE;
340 
341 	udf_rw32_lbmap = udf_rw32(lb_map);
342 
343 	/* if we're the VAT itself, only update our assigned sector number */
344 	if (udf_node == ump->vat_node) {
345 		fdscr->tag.tag_loc = udf_rw32_lbmap;
346 		udf_validate_tag_sum(fdscr);
347 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
348 			udf_rw32(udf_rw32_lbmap)));
349 		/* no use mapping the VAT node in the VAT */
350 		return;
351 	}
352 
353 	/* record new position in VAT file */
354 	lb_num = udf_rw32(fdscr->tag.tag_loc);
355 
356 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
357 
358 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
359 			lb_num, lb_map));
360 
361 	/* VAT should be the longer than this write, can't go wrong */
362 	KASSERT(lb_num <= ump->vat_entries);
363 
364 	mutex_enter(&ump->allocate_mutex);
365 	error = udf_vat_write(ump->vat_node,
366 			(uint8_t *) &udf_rw32_lbmap, 4,
367 			ump->vat_offset + lb_num * 4);
368 	mutex_exit(&ump->allocate_mutex);
369 
370 	if (error)
371 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
372 			"write in the VAT file ?\n");
373 }
374 
375 
376 static void
377 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
378 {
379 	struct long_ad *node_ad_cpy;
380 	struct part_desc *pdesc;
381 	uint64_t *lmapping, *lmappos, blknr;
382 	uint32_t our_sectornr, sectornr, bpos;
383 	uint32_t ptov;
384 	uint16_t vpart_num;
385 	uint8_t *fidblk;
386 	int sector_size = ump->discinfo.sector_size;
387 	int blks = sector_size / DEV_BSIZE;
388 	int len, buf_len;
389 
390 	/* if reading, just pass to the device's STRATEGY */
391 	if (queue == UDF_SHED_READING) {
392 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
393 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
394 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
395 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
396 		VOP_STRATEGY(ump->devvp, buf);
397 		return;
398 	}
399 
400 	blknr        = buf->b_blkno;
401 	our_sectornr = blknr / blks;
402 
403 	if (queue == UDF_SHED_WRITING) {
404 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
405 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
406 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
407 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
408 		/* if we have FIDs fixup using buffer's sector number(s) */
409 		if (buf->b_udf_c_type == UDF_C_FIDS) {
410 			panic("UDF_C_FIDS in SHED_WRITING!\n");
411 			buf_len = buf->b_bcount;
412 			sectornr = our_sectornr;
413 			bpos = 0;
414 			while (buf_len) {
415 				len = MIN(buf_len, sector_size);
416 				fidblk = (uint8_t *) buf->b_data + bpos;
417 				udf_fixup_fid_block(fidblk, sector_size,
418 					0, len, sectornr);
419 				sectornr++;
420 				bpos += len;
421 				buf_len -= len;
422 			}
423 		}
424 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
425 		VOP_STRATEGY(ump->devvp, buf);
426 		return;
427 	}
428 
429 	KASSERT(queue == UDF_SHED_SEQWRITING);
430 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
431 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
432 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
433 		buf->b_bufsize));
434 
435 	/*
436 	 * Buffers should not have been allocated to disc addresses yet on
437 	 * this queue. Note that a buffer can get multiple extents allocated.
438 	 *
439 	 * lmapping contains lb_num relative to base partition.
440 	 */
441 	lmapping    = ump->la_lmapping;
442 	node_ad_cpy = ump->la_node_ad_cpy;
443 
444 	/* logically allocate buf and map it in the file */
445 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
446 
447 	/* update mapping in the VAT */
448 	udf_VAT_mapping_update(ump, buf, *lmapping);
449 
450 	/*
451 	 * NOTE We are using the knowledge here that sequential media will
452 	 * always be mapped linearly. Thus no use to explicitly translate the
453 	 * lmapping list.
454 	 */
455 
456 	/* calculate offset from physical base partition */
457 	pdesc = ump->partitions[ump->vtop[vpart_num]];
458 	ptov  = udf_rw32(pdesc->start_loc);
459 
460 	/* set buffers blkno to the physical block number */
461 	buf->b_blkno = (*lmapping + ptov) * blks;
462 
463 	/* if we have FIDs, fixup using the new allocation table */
464 	if (buf->b_udf_c_type == UDF_C_FIDS) {
465 		buf_len = buf->b_bcount;
466 		bpos = 0;
467 		lmappos = lmapping;
468 		while (buf_len) {
469 			sectornr = *lmappos++;
470 			len = MIN(buf_len, sector_size);
471 			fidblk = (uint8_t *) buf->b_data + bpos;
472 			udf_fixup_fid_block(fidblk, sector_size,
473 				0, len, sectornr);
474 			bpos += len;
475 			buf_len -= len;
476 		}
477 	}
478 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
479 	VOP_STRATEGY(ump->devvp, buf);
480 }
481 
482 
483 static void
484 udf_doshedule(struct udf_mount *ump)
485 {
486 	struct buf *buf;
487 	struct timespec now, *last;
488 	struct strat_private *priv = PRIV(ump);
489 	void (*b_callback)(struct buf *);
490 	int new_queue;
491 	int error;
492 
493 	buf = BUFQ_GET(priv->queues[priv->cur_queue]);
494 	if (buf) {
495 		/* transfer from the current queue to the device queue */
496 		mutex_exit(&priv->discstrat_mutex);
497 
498 		/* transform buffer to synchronous; XXX needed? */
499 		b_callback = buf->b_iodone;
500 		buf->b_iodone = NULL;
501 		CLR(buf->b_flags, B_ASYNC);
502 
503 		/* issue and wait on completion */
504 		udf_issue_buf(ump, priv->cur_queue, buf);
505 		biowait(buf);
506 
507 		mutex_enter(&priv->discstrat_mutex);
508 
509 		/* if there is an error, repair this error, otherwise propagate */
510 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
511 			/* check what we need to do */
512 			panic("UDF write error, can't handle yet!\n");
513 		}
514 
515 		/* propagate result to higher layers */
516 		if (b_callback) {
517 			buf->b_iodone = b_callback;
518 			(*buf->b_iodone)(buf);
519 		}
520 
521 		return;
522 	}
523 
524 	/* Check if we're idling in this state */
525 	vfs_timestamp(&now);
526 	last = &priv->last_queued[priv->cur_queue];
527 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
528 		/* dont switch too fast for CD media; its expensive in time */
529 		if (now.tv_sec - last->tv_sec < 3)
530 			return;
531 	}
532 
533 	/* check if we can/should switch */
534 	new_queue = priv->cur_queue;
535 
536 	if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
537 		new_queue = UDF_SHED_READING;
538 	if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
539 		new_queue = UDF_SHED_SEQWRITING;
540 	if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
541 		new_queue = UDF_SHED_WRITING;
542 	if (priv->cur_queue == UDF_SHED_READING) {
543 		if (new_queue == UDF_SHED_SEQWRITING) {
544 			/* TODO use flag to signal if this is needed */
545 			mutex_exit(&priv->discstrat_mutex);
546 
547 			/* update trackinfo for data and metadata */
548 			error = udf_update_trackinfo(ump,
549 					&ump->data_track);
550 			assert(error == 0);
551 			error = udf_update_trackinfo(ump,
552 					&ump->metadata_track);
553 			assert(error == 0);
554 			mutex_enter(&priv->discstrat_mutex);
555 		}
556 	}
557 
558 	if (new_queue != priv->cur_queue) {
559 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
560 			priv->cur_queue, new_queue));
561 	}
562 
563 	priv->cur_queue = new_queue;
564 }
565 
566 
567 static void
568 udf_discstrat_thread(void *arg)
569 {
570 	struct udf_mount *ump = (struct udf_mount *) arg;
571 	struct strat_private *priv = PRIV(ump);
572 	int empty;
573 
574 	empty = 1;
575 	mutex_enter(&priv->discstrat_mutex);
576 	while (priv->run_thread || !empty) {
577 		/* process the current selected queue */
578 		udf_doshedule(ump);
579 		empty  = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
580 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
581 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
582 
583 		/* wait for more if needed */
584 		if (empty)
585 			cv_timedwait(&priv->discstrat_cv,
586 				&priv->discstrat_mutex, hz/8);
587 	}
588 	mutex_exit(&priv->discstrat_mutex);
589 
590 	wakeup(&priv->run_thread);
591 	kthread_exit(0);
592 	/* not reached */
593 }
594 
595 /* --------------------------------------------------------------------- */
596 
597 static void
598 udf_discstrat_init_seq(struct udf_strat_args *args)
599 {
600 	struct udf_mount *ump = args->ump;
601 	struct strat_private *priv = PRIV(ump);
602 	struct disk_strategy dkstrat;
603 	uint32_t lb_size;
604 
605 	KASSERT(ump);
606 	KASSERT(ump->logical_vol);
607 	KASSERT(priv == NULL);
608 
609 	lb_size = udf_rw32(ump->logical_vol->lb_size);
610 	KASSERT(lb_size > 0);
611 
612 	/* initialise our memory space */
613 	ump->strategy_private = malloc(sizeof(struct strat_private),
614 		M_UDFTEMP, M_WAITOK);
615 	priv = ump->strategy_private;
616 	memset(priv, 0 , sizeof(struct strat_private));
617 
618 	/* initialise locks */
619 	cv_init(&priv->discstrat_cv, "udfstrat");
620 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
621 
622 	/*
623 	 * Initialise pool for descriptors associated with nodes. This is done
624 	 * in lb_size units though currently lb_size is dictated to be
625 	 * sector_size.
626 	 */
627 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
628 	    IPL_NONE);
629 
630 	/*
631 	 * remember old device strategy method and explicit set method
632 	 * `discsort' since we have our own more complex strategy that is not
633 	 * implementable on the CD device and other strategies will get in the
634 	 * way.
635 	 */
636 	memset(&priv->old_strategy_setting, 0,
637 		sizeof(struct disk_strategy));
638 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
639 		FREAD | FKIOCTL, NOCRED);
640 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
641 	strcpy(dkstrat.dks_name, "discsort");
642 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
643 		NOCRED);
644 
645 	/* initialise our internal sheduler */
646 	priv->cur_queue = UDF_SHED_READING;
647 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
648 		BUFQ_SORT_RAWBLOCK);
649 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
650 		BUFQ_SORT_RAWBLOCK);
651 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
652 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
653 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
654 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
655 
656 	/* create our disk strategy thread */
657 	priv->run_thread = 1;
658 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
659 		udf_discstrat_thread, ump, &priv->queue_lwp,
660 		"%s", "udf_rw")) {
661 		panic("fork udf_rw");
662 	}
663 }
664 
665 
666 static void
667 udf_discstrat_finish_seq(struct udf_strat_args *args)
668 {
669 	struct udf_mount *ump = args->ump;
670 	struct strat_private *priv = PRIV(ump);
671 	int error;
672 
673 	if (ump == NULL)
674 		return;
675 
676 	/* stop our sheduling thread */
677 	KASSERT(priv->run_thread == 1);
678 	priv->run_thread = 0;
679 	wakeup(priv->queue_lwp);
680 	do {
681 		error = tsleep(&priv->run_thread, PRIBIO+1,
682 			"udfshedfin", hz);
683 	} while (error);
684 	/* kthread should be finished now */
685 
686 	/* set back old device strategy method */
687 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
688 			FWRITE, NOCRED);
689 
690 	/* destroy our pool */
691 	pool_destroy(&priv->desc_pool);
692 
693 	/* free our private space */
694 	free(ump->strategy_private, M_UDFTEMP);
695 	ump->strategy_private = NULL;
696 }
697 
698 /* --------------------------------------------------------------------- */
699 
700 struct udf_strategy udf_strat_sequential =
701 {
702 	udf_create_logvol_dscr_seq,
703 	udf_free_logvol_dscr_seq,
704 	udf_read_logvol_dscr_seq,
705 	udf_write_logvol_dscr_seq,
706 	udf_queuebuf_seq,
707 	udf_discstrat_init_seq,
708 	udf_discstrat_finish_seq
709 };
710 
711 
712