xref: /netbsd-src/sys/fs/udf/udf_strat_sequential.c (revision b78992537496bc71ee3d761f9fe0be0fc0a9a001)
1 /* $NetBSD: udf_strat_sequential.c,v 1.5 2008/08/29 15:04:18 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.5 2008/08/29 15:04:18 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_quota.h"
37 #include "opt_compat_netbsd.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysctl.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <miscfs/genfs/genfs_node.h>
48 #include <sys/mount.h>
49 #include <sys/buf.h>
50 #include <sys/file.h>
51 #include <sys/device.h>
52 #include <sys/disklabel.h>
53 #include <sys/ioctl.h>
54 #include <sys/malloc.h>
55 #include <sys/dirent.h>
56 #include <sys/stat.h>
57 #include <sys/conf.h>
58 #include <sys/kauth.h>
59 #include <sys/kthread.h>
60 #include <dev/clock_subr.h>
61 
62 #include <fs/udf/ecma167-udf.h>
63 #include <fs/udf/udf_mount.h>
64 
65 #include "udf.h"
66 #include "udf_subr.h"
67 #include "udf_bswap.h"
68 
69 
70 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
71 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
72 
73 /* --------------------------------------------------------------------- */
74 
75 /* BUFQ's */
76 #define UDF_SHED_MAX 3
77 
78 #define UDF_SHED_READING	0
79 #define UDF_SHED_WRITING	1
80 #define UDF_SHED_SEQWRITING	2
81 
82 struct strat_private {
83 	struct pool		 desc_pool;	 	/* node descriptors */
84 
85 	lwp_t			*queue_lwp;
86 	kcondvar_t		 discstrat_cv;		/* to wait on       */
87 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
88 
89 	int			 run_thread;		/* thread control */
90 	int			 cur_queue;
91 
92 	struct disk_strategy	 old_strategy_setting;
93 	struct bufq_state	*queues[UDF_SHED_MAX];
94 	struct timespec		 last_queued[UDF_SHED_MAX];
95 };
96 
97 
98 /* --------------------------------------------------------------------- */
99 
100 static void
101 udf_wr_nodedscr_callback(struct buf *buf)
102 {
103 	struct udf_node *udf_node;
104 
105 	KASSERT(buf);
106 	KASSERT(buf->b_data);
107 
108 	/* called when write action is done */
109 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
110 
111 	udf_node = VTOI(buf->b_vp);
112 	if (udf_node == NULL) {
113 		putiobuf(buf);
114 		printf("udf_wr_node_callback: NULL node?\n");
115 		return;
116 	}
117 
118 	/* XXX right flags to mark dirty again on error? */
119 	if (buf->b_error) {
120 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
121 		/* XXX TODO reshedule on error */
122 	}
123 
124 	/* decrement outstanding_nodedscr */
125 	KASSERT(udf_node->outstanding_nodedscr >= 1);
126 	udf_node->outstanding_nodedscr--;
127 	if (udf_node->outstanding_nodedscr == 0) {
128 		/* first unlock the node */
129 		KASSERT(udf_node->i_flags & IN_CALLBACK_ULK);
130 		UDF_UNLOCK_NODE(udf_node, IN_CALLBACK_ULK);
131 
132 		wakeup(&udf_node->outstanding_nodedscr);
133 	}
134 
135 	/* unreference the vnode so it can be recycled */
136 	holdrele(udf_node->vnode);
137 
138 	putiobuf(buf);
139 }
140 
141 /* --------------------------------------------------------------------- */
142 
143 static int
144 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
145 {
146 	union dscrptr   **dscrptr = &args->dscr;
147 	struct udf_mount *ump = args->ump;
148 	struct strat_private *priv = PRIV(ump);
149 	uint32_t lb_size;
150 
151 	lb_size = udf_rw32(ump->logical_vol->lb_size);
152 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
153 	memset(*dscrptr, 0, lb_size);
154 
155 	return 0;
156 }
157 
158 
159 static void
160 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
161 {
162 	union dscrptr    *dscr = args->dscr;
163 	struct udf_mount *ump  = args->ump;
164 	struct strat_private *priv = PRIV(ump);
165 
166 	pool_put(&priv->desc_pool, dscr);
167 }
168 
169 
170 static int
171 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
172 {
173 	union dscrptr   **dscrptr = &args->dscr;
174 	union dscrptr    *tmpdscr;
175 	struct udf_mount *ump = args->ump;
176 	struct long_ad   *icb = args->icb;
177 	struct strat_private *priv = PRIV(ump);
178 	uint32_t lb_size;
179 	uint32_t sector, dummy;
180 	int error;
181 
182 	lb_size = udf_rw32(ump->logical_vol->lb_size);
183 
184 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
185 	if (error)
186 		return error;
187 
188 	/* try to read in fe/efe */
189 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
190 	if (error)
191 		return error;
192 
193 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
194 	memcpy(*dscrptr, tmpdscr, lb_size);
195 	free(tmpdscr, M_UDFTEMP);
196 
197 	return 0;
198 }
199 
200 
201 static int
202 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
203 {
204 	union dscrptr    *dscr     = args->dscr;
205 	struct udf_mount *ump      = args->ump;
206 	struct udf_node  *udf_node = args->udf_node;
207 	struct long_ad   *icb      = args->icb;
208 	int               waitfor  = args->waitfor;
209 	uint32_t logsectornr, sectornr, dummy;
210 	int error, vpart;
211 
212 	/*
213 	 * we have to decide if we write it out sequential or at its fixed
214 	 * position by examining the partition its (to be) written on.
215 	 */
216 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
217 	logsectornr = udf_rw32(icb->loc.lb_num);
218 	sectornr    = 0;
219 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
220 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
221 		if (error)
222 			goto out;
223 	}
224 
225 	/* add reference to the vnode to prevent recycling */
226 	vhold(udf_node->vnode);
227 
228 	if (waitfor) {
229 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
230 
231 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
232 			dscr, sectornr, logsectornr);
233 	} else {
234 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
235 
236 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
237 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
238 		/* will be UNLOCKED in call back */
239 		return error;
240 	}
241 
242 	holdrele(udf_node->vnode);
243 out:
244 	udf_node->outstanding_nodedscr--;
245 	if (udf_node->outstanding_nodedscr == 0) {
246 		UDF_UNLOCK_NODE(udf_node, 0);
247 		wakeup(&udf_node->outstanding_nodedscr);
248 	}
249 
250 	return error;
251 }
252 
253 /* --------------------------------------------------------------------- */
254 
255 /*
256  * Main file-system specific sheduler. Due to the nature of optical media
257  * sheduling can't be performed in the traditional way. Most OS
258  * implementations i've seen thus read or write a file atomically giving all
259  * kinds of side effects.
260  *
261  * This implementation uses a kernel thread to shedule the queued requests in
262  * such a way that is semi-optimal for optical media; this means aproximately
263  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
264  * time.
265  */
266 
267 static void
268 udf_queuebuf_seq(struct udf_strat_args *args)
269 {
270 	struct udf_mount *ump = args->ump;
271 	struct buf *nestbuf = args->nestbuf;
272 	struct strat_private *priv = PRIV(ump);
273 	int queue;
274 	int what;
275 
276 	KASSERT(ump);
277 	KASSERT(nestbuf);
278 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
279 
280 	what = nestbuf->b_udf_c_type;
281 	queue = UDF_SHED_READING;
282 	if ((nestbuf->b_flags & B_READ) == 0) {
283 		/* writing */
284 		queue = UDF_SHED_SEQWRITING;
285 		if (what == UDF_C_DSCR)
286 			queue = UDF_SHED_WRITING;
287 #if 0
288 		if (queue == UDF_SHED_SEQWRITING) {
289 			/* TODO do add sector to uncommitted space */
290 		}
291 #endif
292 	}
293 
294 	/* use our own sheduler lists for more complex sheduling */
295 	mutex_enter(&priv->discstrat_mutex);
296 		BUFQ_PUT(priv->queues[queue], nestbuf);
297 		vfs_timestamp(&priv->last_queued[queue]);
298 	mutex_exit(&priv->discstrat_mutex);
299 
300 	/* signal our thread that there might be something to do */
301 	cv_signal(&priv->discstrat_cv);
302 }
303 
304 /* --------------------------------------------------------------------- */
305 
306 /* TODO convert to lb_size */
307 static void
308 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
309 {
310 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
311 	struct vnode     *vp = buf->b_vp;
312 	struct udf_node  *udf_node = VTOI(vp);
313 	uint32_t lb_size, blks;
314 	uint32_t lb_num;
315 	uint32_t udf_rw32_lbmap;
316 	int c_type = buf->b_udf_c_type;
317 	int error;
318 
319 	/* only interested when we're using a VAT */
320 	KASSERT(ump->vat_node);
321 	KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
322 
323 	/* only nodes are recorded in the VAT */
324 	/* NOTE: and the fileset descriptor (FIXME ?) */
325 	if (c_type != UDF_C_NODE)
326 		return;
327 
328 	/* we now have an UDF FE/EFE node on media with VAT (or VAT itself) */
329 	lb_size = udf_rw32(ump->logical_vol->lb_size);
330 	blks = lb_size / DEV_BSIZE;
331 
332 	udf_rw32_lbmap = udf_rw32(lb_map);
333 
334 	/* if we're the VAT itself, only update our assigned sector number */
335 	if (udf_node == ump->vat_node) {
336 		fdscr->tag.tag_loc = udf_rw32_lbmap;
337 		udf_validate_tag_sum(fdscr);
338 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
339 			udf_rw32(udf_rw32_lbmap)));
340 		/* no use mapping the VAT node in the VAT */
341 		return;
342 	}
343 
344 	/* record new position in VAT file */
345 	lb_num = udf_rw32(fdscr->tag.tag_loc);
346 
347 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
348 
349 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
350 			lb_num, lb_map));
351 
352 	/* VAT should be the longer than this write, can't go wrong */
353 	KASSERT(lb_num <= ump->vat_entries);
354 
355 	mutex_enter(&ump->allocate_mutex);
356 	error = udf_vat_write(ump->vat_node,
357 			(uint8_t *) &udf_rw32_lbmap, 4,
358 			ump->vat_offset + lb_num * 4);
359 	mutex_exit(&ump->allocate_mutex);
360 
361 	if (error)
362 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
363 			"write in the VAT file ?\n");
364 }
365 
366 
367 static void
368 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
369 {
370 	struct long_ad *node_ad_cpy;
371 	struct part_desc *pdesc;
372 	uint64_t *lmapping, *lmappos, blknr;
373 	uint32_t our_sectornr, sectornr, bpos;
374 	uint32_t ptov;
375 	uint16_t vpart_num;
376 	uint8_t *fidblk;
377 	int sector_size = ump->discinfo.sector_size;
378 	int blks = sector_size / DEV_BSIZE;
379 	int len, buf_len;
380 
381 	/* if reading, just pass to the device's STRATEGY */
382 	if (queue == UDF_SHED_READING) {
383 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
384 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
385 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
386 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
387 		VOP_STRATEGY(ump->devvp, buf);
388 		return;
389 	}
390 
391 	blknr        = buf->b_blkno;
392 	our_sectornr = blknr / blks;
393 
394 	if (queue == UDF_SHED_WRITING) {
395 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
396 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
397 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
398 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
399 		/* if we have FIDs fixup using buffer's sector number(s) */
400 		if (buf->b_udf_c_type == UDF_C_FIDS) {
401 			panic("UDF_C_FIDS in SHED_WRITING!\n");
402 			buf_len = buf->b_bcount;
403 			sectornr = our_sectornr;
404 			bpos = 0;
405 			while (buf_len) {
406 				len = MIN(buf_len, sector_size);
407 				fidblk = (uint8_t *) buf->b_data + bpos;
408 				udf_fixup_fid_block(fidblk, sector_size,
409 					0, len, sectornr);
410 				sectornr++;
411 				bpos += len;
412 				buf_len -= len;
413 			}
414 		}
415 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
416 		VOP_STRATEGY(ump->devvp, buf);
417 		return;
418 	}
419 
420 	KASSERT(queue == UDF_SHED_SEQWRITING);
421 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
422 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
423 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
424 		buf->b_bufsize));
425 
426 	/*
427 	 * Buffers should not have been allocated to disc addresses yet on
428 	 * this queue. Note that a buffer can get multiple extents allocated.
429 	 *
430 	 * lmapping contains lb_num relative to base partition.
431 	 */
432 	lmapping    = ump->la_lmapping;
433 	node_ad_cpy = ump->la_node_ad_cpy;
434 
435 	/* logically allocate buf and map it in the file */
436 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
437 
438 	/* update mapping in the VAT */
439 	udf_VAT_mapping_update(ump, buf, *lmapping);
440 
441 	/*
442 	 * NOTE We are using the knowledge here that sequential media will
443 	 * always be mapped linearly. Thus no use to explicitly translate the
444 	 * lmapping list.
445 	 */
446 
447 	/* calculate offset from physical base partition */
448 	pdesc = ump->partitions[ump->vtop[vpart_num]];
449 	ptov  = udf_rw32(pdesc->start_loc);
450 
451 	/* set buffers blkno to the physical block number */
452 	buf->b_blkno = (*lmapping + ptov) * blks;
453 
454 	/* if we have FIDs, fixup using the new allocation table */
455 	if (buf->b_udf_c_type == UDF_C_FIDS) {
456 		buf_len = buf->b_bcount;
457 		bpos = 0;
458 		lmappos = lmapping;
459 		while (buf_len) {
460 			sectornr = *lmappos++;
461 			len = MIN(buf_len, sector_size);
462 			fidblk = (uint8_t *) buf->b_data + bpos;
463 			udf_fixup_fid_block(fidblk, sector_size,
464 				0, len, sectornr);
465 			bpos += len;
466 			buf_len -= len;
467 		}
468 	}
469 
470 	/* NOTE we can't have metadata space bitmap descriptors here */
471 
472 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
473 	VOP_STRATEGY(ump->devvp, buf);
474 }
475 
476 
477 static void
478 udf_doshedule(struct udf_mount *ump)
479 {
480 	struct buf *buf;
481 	struct timespec now, *last;
482 	struct strat_private *priv = PRIV(ump);
483 	void (*b_callback)(struct buf *);
484 	int new_queue;
485 	int error;
486 
487 	buf = BUFQ_GET(priv->queues[priv->cur_queue]);
488 	if (buf) {
489 		/* transfer from the current queue to the device queue */
490 		mutex_exit(&priv->discstrat_mutex);
491 
492 		/* transform buffer to synchronous; XXX needed? */
493 		b_callback = buf->b_iodone;
494 		buf->b_iodone = NULL;
495 		CLR(buf->b_flags, B_ASYNC);
496 
497 		/* issue and wait on completion */
498 		udf_issue_buf(ump, priv->cur_queue, buf);
499 		biowait(buf);
500 
501 		mutex_enter(&priv->discstrat_mutex);
502 
503 		/* if there is an error, repair this error, otherwise propagate */
504 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
505 			/* check what we need to do */
506 			panic("UDF write error, can't handle yet!\n");
507 		}
508 
509 		/* propagate result to higher layers */
510 		if (b_callback) {
511 			buf->b_iodone = b_callback;
512 			(*buf->b_iodone)(buf);
513 		}
514 
515 		return;
516 	}
517 
518 	/* Check if we're idling in this state */
519 	vfs_timestamp(&now);
520 	last = &priv->last_queued[priv->cur_queue];
521 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
522 		/* dont switch too fast for CD media; its expensive in time */
523 		if (now.tv_sec - last->tv_sec < 3)
524 			return;
525 	}
526 
527 	/* check if we can/should switch */
528 	new_queue = priv->cur_queue;
529 
530 	if (BUFQ_PEEK(priv->queues[UDF_SHED_READING]))
531 		new_queue = UDF_SHED_READING;
532 	if (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]))
533 		new_queue = UDF_SHED_SEQWRITING;
534 	if (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
535 		new_queue = UDF_SHED_WRITING;
536 	if (priv->cur_queue == UDF_SHED_READING) {
537 		if (new_queue == UDF_SHED_SEQWRITING) {
538 			/* TODO use flag to signal if this is needed */
539 			mutex_exit(&priv->discstrat_mutex);
540 
541 			/* update trackinfo for data and metadata */
542 			error = udf_update_trackinfo(ump,
543 					&ump->data_track);
544 			assert(error == 0);
545 			error = udf_update_trackinfo(ump,
546 					&ump->metadata_track);
547 			assert(error == 0);
548 			mutex_enter(&priv->discstrat_mutex);
549 		}
550 	}
551 
552 	if (new_queue != priv->cur_queue) {
553 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
554 			priv->cur_queue, new_queue));
555 	}
556 
557 	priv->cur_queue = new_queue;
558 }
559 
560 
561 static void
562 udf_discstrat_thread(void *arg)
563 {
564 	struct udf_mount *ump = (struct udf_mount *) arg;
565 	struct strat_private *priv = PRIV(ump);
566 	int empty;
567 
568 	empty = 1;
569 	mutex_enter(&priv->discstrat_mutex);
570 	while (priv->run_thread || !empty) {
571 		/* process the current selected queue */
572 		udf_doshedule(ump);
573 		empty  = (BUFQ_PEEK(priv->queues[UDF_SHED_READING]) == NULL);
574 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_WRITING]) == NULL);
575 		empty &= (BUFQ_PEEK(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
576 
577 		/* wait for more if needed */
578 		if (empty)
579 			cv_timedwait(&priv->discstrat_cv,
580 				&priv->discstrat_mutex, hz/8);
581 	}
582 	mutex_exit(&priv->discstrat_mutex);
583 
584 	wakeup(&priv->run_thread);
585 	kthread_exit(0);
586 	/* not reached */
587 }
588 
589 /* --------------------------------------------------------------------- */
590 
591 static void
592 udf_discstrat_init_seq(struct udf_strat_args *args)
593 {
594 	struct udf_mount *ump = args->ump;
595 	struct strat_private *priv = PRIV(ump);
596 	struct disk_strategy dkstrat;
597 	uint32_t lb_size;
598 
599 	KASSERT(ump);
600 	KASSERT(ump->logical_vol);
601 	KASSERT(priv == NULL);
602 
603 	lb_size = udf_rw32(ump->logical_vol->lb_size);
604 	KASSERT(lb_size > 0);
605 
606 	/* initialise our memory space */
607 	ump->strategy_private = malloc(sizeof(struct strat_private),
608 		M_UDFTEMP, M_WAITOK);
609 	priv = ump->strategy_private;
610 	memset(priv, 0 , sizeof(struct strat_private));
611 
612 	/* initialise locks */
613 	cv_init(&priv->discstrat_cv, "udfstrat");
614 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
615 
616 	/*
617 	 * Initialise pool for descriptors associated with nodes. This is done
618 	 * in lb_size units though currently lb_size is dictated to be
619 	 * sector_size.
620 	 */
621 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
622 	    IPL_NONE);
623 
624 	/*
625 	 * remember old device strategy method and explicit set method
626 	 * `discsort' since we have our own more complex strategy that is not
627 	 * implementable on the CD device and other strategies will get in the
628 	 * way.
629 	 */
630 	memset(&priv->old_strategy_setting, 0,
631 		sizeof(struct disk_strategy));
632 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
633 		FREAD | FKIOCTL, NOCRED);
634 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
635 	strcpy(dkstrat.dks_name, "discsort");
636 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
637 		NOCRED);
638 
639 	/* initialise our internal sheduler */
640 	priv->cur_queue = UDF_SHED_READING;
641 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
642 		BUFQ_SORT_RAWBLOCK);
643 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
644 		BUFQ_SORT_RAWBLOCK);
645 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
646 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
647 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
648 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
649 
650 	/* create our disk strategy thread */
651 	priv->run_thread = 1;
652 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
653 		udf_discstrat_thread, ump, &priv->queue_lwp,
654 		"%s", "udf_rw")) {
655 		panic("fork udf_rw");
656 	}
657 }
658 
659 
660 static void
661 udf_discstrat_finish_seq(struct udf_strat_args *args)
662 {
663 	struct udf_mount *ump = args->ump;
664 	struct strat_private *priv = PRIV(ump);
665 	int error;
666 
667 	if (ump == NULL)
668 		return;
669 
670 	/* stop our sheduling thread */
671 	KASSERT(priv->run_thread == 1);
672 	priv->run_thread = 0;
673 	wakeup(priv->queue_lwp);
674 	do {
675 		error = tsleep(&priv->run_thread, PRIBIO+1,
676 			"udfshedfin", hz);
677 	} while (error);
678 	/* kthread should be finished now */
679 
680 	/* set back old device strategy method */
681 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
682 			FWRITE, NOCRED);
683 
684 	/* destroy our pool */
685 	pool_destroy(&priv->desc_pool);
686 
687 	/* free our private space */
688 	free(ump->strategy_private, M_UDFTEMP);
689 	ump->strategy_private = NULL;
690 }
691 
692 /* --------------------------------------------------------------------- */
693 
694 struct udf_strategy udf_strat_sequential =
695 {
696 	udf_create_logvol_dscr_seq,
697 	udf_free_logvol_dscr_seq,
698 	udf_read_logvol_dscr_seq,
699 	udf_write_logvol_dscr_seq,
700 	udf_queuebuf_seq,
701 	udf_discstrat_init_seq,
702 	udf_discstrat_finish_seq
703 };
704 
705 
706