xref: /netbsd-src/sys/fs/udf/udf_strat_sequential.c (revision 040768683fc73d9bbf7e5765a7f6adb6fef82b08)
1*04076868Sreinoud /* $NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $ */
2e979c658Sreinoud 
3e979c658Sreinoud /*
4e979c658Sreinoud  * Copyright (c) 2006, 2008 Reinoud Zandijk
5e979c658Sreinoud  * All rights reserved.
6e979c658Sreinoud  *
7e979c658Sreinoud  * Redistribution and use in source and binary forms, with or without
8e979c658Sreinoud  * modification, are permitted provided that the following conditions
9e979c658Sreinoud  * are met:
10e979c658Sreinoud  * 1. Redistributions of source code must retain the above copyright
11e979c658Sreinoud  *    notice, this list of conditions and the following disclaimer.
12e979c658Sreinoud  * 2. Redistributions in binary form must reproduce the above copyright
13e979c658Sreinoud  *    notice, this list of conditions and the following disclaimer in the
14e979c658Sreinoud  *    documentation and/or other materials provided with the distribution.
15e979c658Sreinoud  *
16e979c658Sreinoud  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17e979c658Sreinoud  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18e979c658Sreinoud  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19e979c658Sreinoud  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20e979c658Sreinoud  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21e979c658Sreinoud  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22e979c658Sreinoud  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23e979c658Sreinoud  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24e979c658Sreinoud  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25e979c658Sreinoud  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26e979c658Sreinoud  *
27e979c658Sreinoud  */
28e979c658Sreinoud 
29e979c658Sreinoud #include <sys/cdefs.h>
30e979c658Sreinoud #ifndef lint
31*04076868Sreinoud __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $");
32e979c658Sreinoud #endif /* not lint */
33e979c658Sreinoud 
34e979c658Sreinoud 
35e979c658Sreinoud #if defined(_KERNEL_OPT)
36e979c658Sreinoud #include "opt_compat_netbsd.h"
37e979c658Sreinoud #endif
38e979c658Sreinoud 
39e979c658Sreinoud #include <sys/param.h>
40e979c658Sreinoud #include <sys/systm.h>
41e979c658Sreinoud #include <sys/sysctl.h>
42e979c658Sreinoud #include <sys/namei.h>
43e979c658Sreinoud #include <sys/proc.h>
44e979c658Sreinoud #include <sys/kernel.h>
45e979c658Sreinoud #include <sys/vnode.h>
46e979c658Sreinoud #include <miscfs/genfs/genfs_node.h>
47e979c658Sreinoud #include <sys/mount.h>
48e979c658Sreinoud #include <sys/buf.h>
49e979c658Sreinoud #include <sys/file.h>
50e979c658Sreinoud #include <sys/device.h>
51e979c658Sreinoud #include <sys/disklabel.h>
52e979c658Sreinoud #include <sys/ioctl.h>
53e979c658Sreinoud #include <sys/malloc.h>
54e979c658Sreinoud #include <sys/dirent.h>
55e979c658Sreinoud #include <sys/stat.h>
56e979c658Sreinoud #include <sys/conf.h>
57e979c658Sreinoud #include <sys/kauth.h>
58e979c658Sreinoud #include <sys/kthread.h>
59e979c658Sreinoud #include <dev/clock_subr.h>
60e979c658Sreinoud 
61e979c658Sreinoud #include <fs/udf/ecma167-udf.h>
62e979c658Sreinoud #include <fs/udf/udf_mount.h>
63e979c658Sreinoud 
64e979c658Sreinoud #include "udf.h"
65e979c658Sreinoud #include "udf_subr.h"
66e979c658Sreinoud #include "udf_bswap.h"
67e979c658Sreinoud 
68e979c658Sreinoud 
69e979c658Sreinoud #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70e979c658Sreinoud #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
71e979c658Sreinoud 
72e979c658Sreinoud /* --------------------------------------------------------------------- */
73e979c658Sreinoud 
74e979c658Sreinoud /* BUFQ's */
75e979c658Sreinoud #define UDF_SHED_MAX 3
76e979c658Sreinoud 
77e979c658Sreinoud #define UDF_SHED_READING	0
78e979c658Sreinoud #define UDF_SHED_WRITING	1
79e979c658Sreinoud #define UDF_SHED_SEQWRITING	2
80e979c658Sreinoud 
81e979c658Sreinoud struct strat_private {
82e979c658Sreinoud 	struct pool		 desc_pool;	 	/* node descriptors */
83e979c658Sreinoud 
84e979c658Sreinoud 	lwp_t			*queue_lwp;
85e979c658Sreinoud 	kcondvar_t		 discstrat_cv;		/* to wait on       */
86e979c658Sreinoud 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
87e979c658Sreinoud 
88*04076868Sreinoud 	int			 thread_running;	/* thread control */
89e979c658Sreinoud 	int			 run_thread;		/* thread control */
90*04076868Sreinoud 	int			 thread_finished;	/* thread control */
91*04076868Sreinoud 
9242866dd2Sreinoud 	int			 sync_req;		/* thread control */
93e979c658Sreinoud 	int			 cur_queue;
94e979c658Sreinoud 
95e979c658Sreinoud 	struct disk_strategy	 old_strategy_setting;
96e979c658Sreinoud 	struct bufq_state	*queues[UDF_SHED_MAX];
97e979c658Sreinoud 	struct timespec		 last_queued[UDF_SHED_MAX];
98e979c658Sreinoud };
99e979c658Sreinoud 
100e979c658Sreinoud 
101e979c658Sreinoud /* --------------------------------------------------------------------- */
102e979c658Sreinoud 
103e979c658Sreinoud static void
udf_wr_nodedscr_callback(struct buf * buf)104e979c658Sreinoud udf_wr_nodedscr_callback(struct buf *buf)
105e979c658Sreinoud {
106e979c658Sreinoud 	struct udf_node *udf_node;
107e979c658Sreinoud 
108e979c658Sreinoud 	KASSERT(buf);
109e979c658Sreinoud 	KASSERT(buf->b_data);
110e979c658Sreinoud 
111e979c658Sreinoud 	/* called when write action is done */
112e979c658Sreinoud 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
113e979c658Sreinoud 
114e979c658Sreinoud 	udf_node = VTOI(buf->b_vp);
115e979c658Sreinoud 	if (udf_node == NULL) {
116e979c658Sreinoud 		putiobuf(buf);
117e979c658Sreinoud 		printf("udf_wr_node_callback: NULL node?\n");
118e979c658Sreinoud 		return;
119e979c658Sreinoud 	}
120e979c658Sreinoud 
121e979c658Sreinoud 	/* XXX right flags to mark dirty again on error? */
122e979c658Sreinoud 	if (buf->b_error) {
123e979c658Sreinoud 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
124f84252b4Sandvar 		/* XXX TODO reschedule on error */
125e979c658Sreinoud 	}
126e979c658Sreinoud 
1274d5c88faSreinoud 	/* decrement outstanding_nodedscr */
1284d5c88faSreinoud 	KASSERT(udf_node->outstanding_nodedscr >= 1);
1294d5c88faSreinoud 	udf_node->outstanding_nodedscr--;
1304d5c88faSreinoud 	if (udf_node->outstanding_nodedscr == 0) {
131e979c658Sreinoud 		/* first unlock the node */
132b33f7bb0Sreinoud 		UDF_UNLOCK_NODE(udf_node, 0);
133*04076868Sreinoud 		cv_broadcast(&udf_node->node_lock);
1344d5c88faSreinoud 	}
1354d5c88faSreinoud 
136e979c658Sreinoud 	putiobuf(buf);
137e979c658Sreinoud }
138e979c658Sreinoud 
139e979c658Sreinoud /* --------------------------------------------------------------------- */
140e979c658Sreinoud 
141e979c658Sreinoud static int
udf_create_logvol_dscr_seq(struct udf_strat_args * args)142e979c658Sreinoud udf_create_logvol_dscr_seq(struct udf_strat_args *args)
143e979c658Sreinoud {
144e979c658Sreinoud 	union dscrptr   **dscrptr = &args->dscr;
145e979c658Sreinoud 	struct udf_mount *ump = args->ump;
146e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
147e979c658Sreinoud 	uint32_t lb_size;
148e979c658Sreinoud 
149e979c658Sreinoud 	lb_size = udf_rw32(ump->logical_vol->lb_size);
150e979c658Sreinoud 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
151e979c658Sreinoud 	memset(*dscrptr, 0, lb_size);
152e979c658Sreinoud 
153e979c658Sreinoud 	return 0;
154e979c658Sreinoud }
155e979c658Sreinoud 
156e979c658Sreinoud 
157e979c658Sreinoud static void
udf_free_logvol_dscr_seq(struct udf_strat_args * args)158e979c658Sreinoud udf_free_logvol_dscr_seq(struct udf_strat_args *args)
159e979c658Sreinoud {
160e979c658Sreinoud 	union dscrptr    *dscr = args->dscr;
161e979c658Sreinoud 	struct udf_mount *ump  = args->ump;
162e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
163e979c658Sreinoud 
164e979c658Sreinoud 	pool_put(&priv->desc_pool, dscr);
165e979c658Sreinoud }
166e979c658Sreinoud 
167e979c658Sreinoud 
168e979c658Sreinoud static int
udf_read_logvol_dscr_seq(struct udf_strat_args * args)169e979c658Sreinoud udf_read_logvol_dscr_seq(struct udf_strat_args *args)
170e979c658Sreinoud {
171e979c658Sreinoud 	union dscrptr   **dscrptr = &args->dscr;
172e979c658Sreinoud 	union dscrptr    *tmpdscr;
173e979c658Sreinoud 	struct udf_mount *ump = args->ump;
174e979c658Sreinoud 	struct long_ad   *icb = args->icb;
175e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
176e979c658Sreinoud 	uint32_t lb_size;
177e979c658Sreinoud 	uint32_t sector, dummy;
178e979c658Sreinoud 	int error;
179e979c658Sreinoud 
180e979c658Sreinoud 	lb_size = udf_rw32(ump->logical_vol->lb_size);
181e979c658Sreinoud 
182e979c658Sreinoud 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
183e979c658Sreinoud 	if (error)
184e979c658Sreinoud 		return error;
185e979c658Sreinoud 
186e979c658Sreinoud 	/* try to read in fe/efe */
187e979c658Sreinoud 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
188e979c658Sreinoud 	if (error)
189e979c658Sreinoud 		return error;
190e979c658Sreinoud 
191e979c658Sreinoud 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
192e979c658Sreinoud 	memcpy(*dscrptr, tmpdscr, lb_size);
193e979c658Sreinoud 	free(tmpdscr, M_UDFTEMP);
194e979c658Sreinoud 
195e979c658Sreinoud 	return 0;
196e979c658Sreinoud }
197e979c658Sreinoud 
198e979c658Sreinoud 
199e979c658Sreinoud static int
udf_write_logvol_dscr_seq(struct udf_strat_args * args)200e979c658Sreinoud udf_write_logvol_dscr_seq(struct udf_strat_args *args)
201e979c658Sreinoud {
202e979c658Sreinoud 	union dscrptr    *dscr     = args->dscr;
203e979c658Sreinoud 	struct udf_mount *ump      = args->ump;
204e979c658Sreinoud 	struct udf_node  *udf_node = args->udf_node;
205e979c658Sreinoud 	struct long_ad   *icb      = args->icb;
206e979c658Sreinoud 	int               waitfor  = args->waitfor;
207e979c658Sreinoud 	uint32_t logsectornr, sectornr, dummy;
208e979c658Sreinoud 	int error, vpart;
209e979c658Sreinoud 
210e979c658Sreinoud 	/*
211e979c658Sreinoud 	 * we have to decide if we write it out sequential or at its fixed
212e979c658Sreinoud 	 * position by examining the partition its (to be) written on.
213e979c658Sreinoud 	 */
214e979c658Sreinoud 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
215e979c658Sreinoud 	logsectornr = udf_rw32(icb->loc.lb_num);
216e979c658Sreinoud 	sectornr    = 0;
217e979c658Sreinoud 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
218e979c658Sreinoud 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
219e979c658Sreinoud 		if (error)
2204d5c88faSreinoud 			goto out;
221e979c658Sreinoud 	}
222e979c658Sreinoud 
223e979c658Sreinoud 	if (waitfor) {
224e979c658Sreinoud 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
225e979c658Sreinoud 
226e979c658Sreinoud 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
227e979c658Sreinoud 			dscr, sectornr, logsectornr);
228e979c658Sreinoud 	} else {
229e979c658Sreinoud 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
230e979c658Sreinoud 
231e979c658Sreinoud 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
232e979c658Sreinoud 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
233e979c658Sreinoud 		/* will be UNLOCKED in call back */
2344d5c88faSreinoud 		return error;
2354d5c88faSreinoud 	}
2364d5c88faSreinoud out:
2374d5c88faSreinoud 	udf_node->outstanding_nodedscr--;
2384d5c88faSreinoud 	if (udf_node->outstanding_nodedscr == 0) {
2394d5c88faSreinoud 		UDF_UNLOCK_NODE(udf_node, 0);
240*04076868Sreinoud 		cv_broadcast(&udf_node->node_lock);
241e979c658Sreinoud 	}
242e979c658Sreinoud 
243e979c658Sreinoud 	return error;
244e979c658Sreinoud }
245e979c658Sreinoud 
246e979c658Sreinoud /* --------------------------------------------------------------------- */
247e979c658Sreinoud 
248e979c658Sreinoud /*
249f84252b4Sandvar  * Main file-system specific scheduler. Due to the nature of optical media
250f84252b4Sandvar  * scheduling can't be performed in the traditional way. Most OS
251e979c658Sreinoud  * implementations i've seen thus read or write a file atomically giving all
252e979c658Sreinoud  * kinds of side effects.
253e979c658Sreinoud  *
254f84252b4Sandvar  * This implementation uses a kernel thread to schedule the queued requests in
255d7a2a8f3Sskrll  * such a way that is semi-optimal for optical media; this means approximately
256e979c658Sreinoud  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
257e979c658Sreinoud  * time.
258e979c658Sreinoud  */
259e979c658Sreinoud 
260e979c658Sreinoud static void
udf_queuebuf_seq(struct udf_strat_args * args)261e979c658Sreinoud udf_queuebuf_seq(struct udf_strat_args *args)
262e979c658Sreinoud {
263e979c658Sreinoud 	struct udf_mount *ump = args->ump;
264e979c658Sreinoud 	struct buf *nestbuf = args->nestbuf;
265e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
266e979c658Sreinoud 	int queue;
267e979c658Sreinoud 	int what;
268e979c658Sreinoud 
269e979c658Sreinoud 	KASSERT(ump);
270e979c658Sreinoud 	KASSERT(nestbuf);
271e979c658Sreinoud 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
272e979c658Sreinoud 
273e979c658Sreinoud 	what = nestbuf->b_udf_c_type;
274e979c658Sreinoud 	queue = UDF_SHED_READING;
275e979c658Sreinoud 	if ((nestbuf->b_flags & B_READ) == 0) {
276e979c658Sreinoud 		/* writing */
277e979c658Sreinoud 		queue = UDF_SHED_SEQWRITING;
2789609b0edSreinoud 		if (what == UDF_C_ABSOLUTE)
279e979c658Sreinoud 			queue = UDF_SHED_WRITING;
280e979c658Sreinoud 	}
281e979c658Sreinoud 
282f84252b4Sandvar 	/* use our own scheduler lists for more complex scheduling */
283e979c658Sreinoud 	mutex_enter(&priv->discstrat_mutex);
28470de9736Syamt 		bufq_put(priv->queues[queue], nestbuf);
285e979c658Sreinoud 		vfs_timestamp(&priv->last_queued[queue]);
286e979c658Sreinoud 	mutex_exit(&priv->discstrat_mutex);
287e979c658Sreinoud 
288e979c658Sreinoud 	/* signal our thread that there might be something to do */
289e979c658Sreinoud 	cv_signal(&priv->discstrat_cv);
290e979c658Sreinoud }
291e979c658Sreinoud 
292e979c658Sreinoud /* --------------------------------------------------------------------- */
293e979c658Sreinoud 
29442866dd2Sreinoud static void
udf_sync_caches_seq(struct udf_strat_args * args)29542866dd2Sreinoud udf_sync_caches_seq(struct udf_strat_args *args)
29642866dd2Sreinoud {
29742866dd2Sreinoud 	struct udf_mount *ump = args->ump;
29842866dd2Sreinoud 	struct strat_private *priv = PRIV(ump);
29942866dd2Sreinoud 
30042866dd2Sreinoud 	/* we might be called during unmount inadvertedly, be on safe side */
30142866dd2Sreinoud 	if (!priv)
30242866dd2Sreinoud 		return;
30342866dd2Sreinoud 
30442866dd2Sreinoud 	/* signal our thread that there might be something to do */
30542866dd2Sreinoud 	priv->sync_req = 1;
30642866dd2Sreinoud 	cv_signal(&priv->discstrat_cv);
30742866dd2Sreinoud 
30842866dd2Sreinoud 	mutex_enter(&priv->discstrat_mutex);
30942866dd2Sreinoud 		while (priv->sync_req) {
31042866dd2Sreinoud 			cv_timedwait(&priv->discstrat_cv,
31142866dd2Sreinoud 				&priv->discstrat_mutex, hz/8);
31242866dd2Sreinoud 		}
31342866dd2Sreinoud 	mutex_exit(&priv->discstrat_mutex);
31442866dd2Sreinoud }
31542866dd2Sreinoud 
31642866dd2Sreinoud /* --------------------------------------------------------------------- */
31742866dd2Sreinoud 
318e979c658Sreinoud /* TODO convert to lb_size */
319e979c658Sreinoud static void
udf_VAT_mapping_update(struct udf_mount * ump,struct buf * buf,uint32_t lb_map)32071c9aa33Sreinoud udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
321e979c658Sreinoud {
322e979c658Sreinoud 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
323e979c658Sreinoud 	struct vnode     *vp = buf->b_vp;
324e979c658Sreinoud 	struct udf_node  *udf_node = VTOI(vp);
32571c9aa33Sreinoud 	uint32_t lb_num;
326e979c658Sreinoud 	uint32_t udf_rw32_lbmap;
327e979c658Sreinoud 	int c_type = buf->b_udf_c_type;
328e979c658Sreinoud 	int error;
329e979c658Sreinoud 
330e979c658Sreinoud 	/* only interested when we're using a VAT */
331e979c658Sreinoud 	KASSERT(ump->vat_node);
332a287d23dSreinoud 	KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
333e979c658Sreinoud 
334e979c658Sreinoud 	/* only nodes are recorded in the VAT */
335e979c658Sreinoud 	/* NOTE: and the fileset descriptor (FIXME ?) */
336e979c658Sreinoud 	if (c_type != UDF_C_NODE)
337e979c658Sreinoud 		return;
338e979c658Sreinoud 
339e979c658Sreinoud 	udf_rw32_lbmap = udf_rw32(lb_map);
340e979c658Sreinoud 
341e979c658Sreinoud 	/* if we're the VAT itself, only update our assigned sector number */
342e979c658Sreinoud 	if (udf_node == ump->vat_node) {
343e979c658Sreinoud 		fdscr->tag.tag_loc = udf_rw32_lbmap;
344e979c658Sreinoud 		udf_validate_tag_sum(fdscr);
345e979c658Sreinoud 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
346e979c658Sreinoud 			udf_rw32(udf_rw32_lbmap)));
347e979c658Sreinoud 		/* no use mapping the VAT node in the VAT */
348e979c658Sreinoud 		return;
349e979c658Sreinoud 	}
350e979c658Sreinoud 
351e979c658Sreinoud 	/* record new position in VAT file */
3524d5c88faSreinoud 	lb_num = udf_rw32(fdscr->tag.tag_loc);
3534d5c88faSreinoud 
3544d5c88faSreinoud 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
355e979c658Sreinoud 
356e979c658Sreinoud 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
357e979c658Sreinoud 			lb_num, lb_map));
358e979c658Sreinoud 
359e979c658Sreinoud 	/* VAT should be the longer than this write, can't go wrong */
360e979c658Sreinoud 	KASSERT(lb_num <= ump->vat_entries);
361e979c658Sreinoud 
362e979c658Sreinoud 	mutex_enter(&ump->allocate_mutex);
363e979c658Sreinoud 	error = udf_vat_write(ump->vat_node,
364e979c658Sreinoud 			(uint8_t *) &udf_rw32_lbmap, 4,
365e979c658Sreinoud 			ump->vat_offset + lb_num * 4);
366e979c658Sreinoud 	mutex_exit(&ump->allocate_mutex);
367e979c658Sreinoud 
368e979c658Sreinoud 	if (error)
369e979c658Sreinoud 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
370e979c658Sreinoud 			"write in the VAT file ?\n");
371e979c658Sreinoud }
372e979c658Sreinoud 
373e979c658Sreinoud 
374e979c658Sreinoud static void
udf_issue_buf(struct udf_mount * ump,int queue,struct buf * buf)375e979c658Sreinoud udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
376e979c658Sreinoud {
3779609b0edSreinoud 	union dscrptr *dscr;
378e979c658Sreinoud 	struct long_ad *node_ad_cpy;
37971c9aa33Sreinoud 	struct part_desc *pdesc;
38091b2f42cSchristos 	uint64_t *lmapping, *lmappos;
38191b2f42cSchristos 	uint32_t sectornr, bpos;
38271c9aa33Sreinoud 	uint32_t ptov;
38371c9aa33Sreinoud 	uint16_t vpart_num;
384e979c658Sreinoud 	uint8_t *fidblk;
385e979c658Sreinoud 	int sector_size = ump->discinfo.sector_size;
386e979c658Sreinoud 	int blks = sector_size / DEV_BSIZE;
387e979c658Sreinoud 	int len, buf_len;
388e979c658Sreinoud 
389e979c658Sreinoud 	/* if reading, just pass to the device's STRATEGY */
390e979c658Sreinoud 	if (queue == UDF_SHED_READING) {
391e979c658Sreinoud 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
392e979c658Sreinoud 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
393e979c658Sreinoud 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
394e979c658Sreinoud 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
395e979c658Sreinoud 		VOP_STRATEGY(ump->devvp, buf);
396e979c658Sreinoud 		return;
397e979c658Sreinoud 	}
398e979c658Sreinoud 
399e979c658Sreinoud 	if (queue == UDF_SHED_WRITING) {
400e979c658Sreinoud 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
401e979c658Sreinoud 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
402e979c658Sreinoud 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
403e979c658Sreinoud 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
4049609b0edSreinoud 		KASSERT(buf->b_udf_c_type == UDF_C_ABSOLUTE);
4059609b0edSreinoud 
4069609b0edSreinoud 		// udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
407e979c658Sreinoud 		VOP_STRATEGY(ump->devvp, buf);
408e979c658Sreinoud 		return;
409e979c658Sreinoud 	}
410e979c658Sreinoud 
411e979c658Sreinoud 	KASSERT(queue == UDF_SHED_SEQWRITING);
412e979c658Sreinoud 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
413e979c658Sreinoud 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
414e979c658Sreinoud 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
415e979c658Sreinoud 		buf->b_bufsize));
416e979c658Sreinoud 
417e979c658Sreinoud 	/*
418e979c658Sreinoud 	 * Buffers should not have been allocated to disc addresses yet on
419e979c658Sreinoud 	 * this queue. Note that a buffer can get multiple extents allocated.
420e979c658Sreinoud 	 *
421e979c658Sreinoud 	 * lmapping contains lb_num relative to base partition.
422e979c658Sreinoud 	 */
423e979c658Sreinoud 	lmapping    = ump->la_lmapping;
424e979c658Sreinoud 	node_ad_cpy = ump->la_node_ad_cpy;
425e979c658Sreinoud 
42671c9aa33Sreinoud 	/* logically allocate buf and map it in the file */
42771c9aa33Sreinoud 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
42871c9aa33Sreinoud 
42971c9aa33Sreinoud 	/*
43071c9aa33Sreinoud 	 * NOTE We are using the knowledge here that sequential media will
43171c9aa33Sreinoud 	 * always be mapped linearly. Thus no use to explicitly translate the
43271c9aa33Sreinoud 	 * lmapping list.
43371c9aa33Sreinoud 	 */
43471c9aa33Sreinoud 
43571c9aa33Sreinoud 	/* calculate offset from physical base partition */
43671c9aa33Sreinoud 	pdesc = ump->partitions[ump->vtop[vpart_num]];
43771c9aa33Sreinoud 	ptov  = udf_rw32(pdesc->start_loc);
43871c9aa33Sreinoud 
43971c9aa33Sreinoud 	/* set buffers blkno to the physical block number */
44071c9aa33Sreinoud 	buf->b_blkno = (*lmapping + ptov) * blks;
441e979c658Sreinoud 
4429609b0edSreinoud 	/* fixate floating descriptors */
4439609b0edSreinoud 	if (buf->b_udf_c_type == UDF_C_FLOAT_DSCR) {
4449609b0edSreinoud 		/* set our tag location to the absolute position */
4459609b0edSreinoud 		dscr = (union dscrptr *) buf->b_data;
4469609b0edSreinoud 		dscr->tag.tag_loc = udf_rw32(*lmapping + ptov);
4479609b0edSreinoud 		udf_validate_tag_and_crc_sums(dscr);
4489609b0edSreinoud 	}
4499609b0edSreinoud 
4509609b0edSreinoud 	/* update mapping in the VAT */
4519609b0edSreinoud 	if (buf->b_udf_c_type == UDF_C_NODE) {
4529609b0edSreinoud 		udf_VAT_mapping_update(ump, buf, *lmapping);
4539609b0edSreinoud 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
4549609b0edSreinoud 	}
4559609b0edSreinoud 
456e979c658Sreinoud 	/* if we have FIDs, fixup using the new allocation table */
457e979c658Sreinoud 	if (buf->b_udf_c_type == UDF_C_FIDS) {
458e979c658Sreinoud 		buf_len = buf->b_bcount;
459e979c658Sreinoud 		bpos = 0;
460e979c658Sreinoud 		lmappos = lmapping;
461e979c658Sreinoud 		while (buf_len) {
462e979c658Sreinoud 			sectornr = *lmappos++;
463e979c658Sreinoud 			len = MIN(buf_len, sector_size);
464e979c658Sreinoud 			fidblk = (uint8_t *) buf->b_data + bpos;
465e979c658Sreinoud 			udf_fixup_fid_block(fidblk, sector_size,
466e979c658Sreinoud 				0, len, sectornr);
467e979c658Sreinoud 			bpos += len;
468e979c658Sreinoud 			buf_len -= len;
469e979c658Sreinoud 		}
470e979c658Sreinoud 	}
471a287d23dSreinoud 
472e979c658Sreinoud 	VOP_STRATEGY(ump->devvp, buf);
473e979c658Sreinoud }
474e979c658Sreinoud 
475e979c658Sreinoud 
476e979c658Sreinoud static void
udf_doshedule(struct udf_mount * ump)477e979c658Sreinoud udf_doshedule(struct udf_mount *ump)
478e979c658Sreinoud {
479e979c658Sreinoud 	struct buf *buf;
480e979c658Sreinoud 	struct timespec now, *last;
481e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
482e979c658Sreinoud 	void (*b_callback)(struct buf *);
483e979c658Sreinoud 	int new_queue;
484e979c658Sreinoud 	int error;
485e979c658Sreinoud 
48670de9736Syamt 	buf = bufq_get(priv->queues[priv->cur_queue]);
487e979c658Sreinoud 	if (buf) {
488e979c658Sreinoud 		/* transfer from the current queue to the device queue */
489e979c658Sreinoud 		mutex_exit(&priv->discstrat_mutex);
490e979c658Sreinoud 
491e979c658Sreinoud 		/* transform buffer to synchronous; XXX needed? */
492e979c658Sreinoud 		b_callback = buf->b_iodone;
493e979c658Sreinoud 		buf->b_iodone = NULL;
494e979c658Sreinoud 		CLR(buf->b_flags, B_ASYNC);
495e979c658Sreinoud 
496e979c658Sreinoud 		/* issue and wait on completion */
497e979c658Sreinoud 		udf_issue_buf(ump, priv->cur_queue, buf);
498e979c658Sreinoud 		biowait(buf);
499e979c658Sreinoud 
500e979c658Sreinoud 		mutex_enter(&priv->discstrat_mutex);
501e979c658Sreinoud 
502e979c658Sreinoud 		/* if there is an error, repair this error, otherwise propagate */
503e979c658Sreinoud 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
504e979c658Sreinoud 			/* check what we need to do */
505e979c658Sreinoud 			panic("UDF write error, can't handle yet!\n");
506e979c658Sreinoud 		}
507e979c658Sreinoud 
508e979c658Sreinoud 		/* propagate result to higher layers */
509e979c658Sreinoud 		if (b_callback) {
510e979c658Sreinoud 			buf->b_iodone = b_callback;
511e979c658Sreinoud 			(*buf->b_iodone)(buf);
512e979c658Sreinoud 		}
513e979c658Sreinoud 
514e979c658Sreinoud 		return;
515e979c658Sreinoud 	}
516e979c658Sreinoud 
517e979c658Sreinoud 	/* Check if we're idling in this state */
518e979c658Sreinoud 	vfs_timestamp(&now);
519e979c658Sreinoud 	last = &priv->last_queued[priv->cur_queue];
520e979c658Sreinoud 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
521e979c658Sreinoud 		/* dont switch too fast for CD media; its expensive in time */
522e979c658Sreinoud 		if (now.tv_sec - last->tv_sec < 3)
523e979c658Sreinoud 			return;
524e979c658Sreinoud 	}
525e979c658Sreinoud 
526e979c658Sreinoud 	/* check if we can/should switch */
527e979c658Sreinoud 	new_queue = priv->cur_queue;
528e979c658Sreinoud 
52970de9736Syamt 	if (bufq_peek(priv->queues[UDF_SHED_READING]))
530e979c658Sreinoud 		new_queue = UDF_SHED_READING;
53170de9736Syamt 	if (bufq_peek(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
532e979c658Sreinoud 		new_queue = UDF_SHED_WRITING;
533a02b732eSreinoud 	if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
534a02b732eSreinoud 		new_queue = UDF_SHED_SEQWRITING;
535e979c658Sreinoud 	if (priv->cur_queue == UDF_SHED_READING) {
536e979c658Sreinoud 		if (new_queue == UDF_SHED_SEQWRITING) {
537e979c658Sreinoud 			/* TODO use flag to signal if this is needed */
538e979c658Sreinoud 			mutex_exit(&priv->discstrat_mutex);
539e979c658Sreinoud 
540e979c658Sreinoud 			/* update trackinfo for data and metadata */
541e979c658Sreinoud 			error = udf_update_trackinfo(ump,
542e979c658Sreinoud 					&ump->data_track);
543e979c658Sreinoud 			assert(error == 0);
544e979c658Sreinoud 			error = udf_update_trackinfo(ump,
545e979c658Sreinoud 					&ump->metadata_track);
546e979c658Sreinoud 			assert(error == 0);
547e979c658Sreinoud 			mutex_enter(&priv->discstrat_mutex);
5480f18888bSchristos 			__USE(error);
549e979c658Sreinoud 		}
550e979c658Sreinoud 	}
551e979c658Sreinoud 
552e979c658Sreinoud 	if (new_queue != priv->cur_queue) {
553e979c658Sreinoud 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
554e979c658Sreinoud 			priv->cur_queue, new_queue));
5555ba19a4fSreinoud 		if (new_queue == UDF_SHED_READING)
5565ba19a4fSreinoud 			udf_mmc_synchronise_caches(ump);
557e979c658Sreinoud 	}
558e979c658Sreinoud 
559e979c658Sreinoud 	priv->cur_queue = new_queue;
560e979c658Sreinoud }
561e979c658Sreinoud 
562e979c658Sreinoud 
563e979c658Sreinoud static void
udf_discstrat_thread(void * arg)564e979c658Sreinoud udf_discstrat_thread(void *arg)
565e979c658Sreinoud {
566e979c658Sreinoud 	struct udf_mount *ump = (struct udf_mount *) arg;
567e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
568e979c658Sreinoud 	int empty;
569e979c658Sreinoud 
570e979c658Sreinoud 	empty = 1;
571*04076868Sreinoud 
572*04076868Sreinoud 	priv->thread_running = 1;
573*04076868Sreinoud 	cv_broadcast(&priv->discstrat_cv);
574*04076868Sreinoud 
575e979c658Sreinoud 	mutex_enter(&priv->discstrat_mutex);
57642866dd2Sreinoud 	while (priv->run_thread || !empty || priv->sync_req) {
577e979c658Sreinoud 		/* process the current selected queue */
578e979c658Sreinoud 		udf_doshedule(ump);
57970de9736Syamt 		empty  = (bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
58070de9736Syamt 		empty &= (bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
58170de9736Syamt 		empty &= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
582e979c658Sreinoud 
583e979c658Sreinoud 		/* wait for more if needed */
58442866dd2Sreinoud 		if (empty) {
58542866dd2Sreinoud 			if (priv->sync_req) {
58642866dd2Sreinoud 				/* on sync, we need to simulate a read->write transition */
58742866dd2Sreinoud 				udf_mmc_synchronise_caches(ump);
58842866dd2Sreinoud 				priv->cur_queue = UDF_SHED_READING;
58942866dd2Sreinoud 				priv->sync_req = 0;
59042866dd2Sreinoud 			}
591e979c658Sreinoud 			cv_timedwait(&priv->discstrat_cv,
592e979c658Sreinoud 				&priv->discstrat_mutex, hz/8);
593e979c658Sreinoud 		}
59442866dd2Sreinoud 	}
595e979c658Sreinoud 	mutex_exit(&priv->discstrat_mutex);
596e979c658Sreinoud 
597*04076868Sreinoud 	priv->thread_running  = 0;
598*04076868Sreinoud 	priv->thread_finished = 1;
599*04076868Sreinoud 	cv_broadcast(&priv->discstrat_cv);
600*04076868Sreinoud 
601e979c658Sreinoud 	kthread_exit(0);
602e979c658Sreinoud 	/* not reached */
603e979c658Sreinoud }
604e979c658Sreinoud 
605e979c658Sreinoud /* --------------------------------------------------------------------- */
606e979c658Sreinoud 
607e979c658Sreinoud static void
udf_discstrat_init_seq(struct udf_strat_args * args)608e979c658Sreinoud udf_discstrat_init_seq(struct udf_strat_args *args)
609e979c658Sreinoud {
610e979c658Sreinoud 	struct udf_mount *ump = args->ump;
611e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
612e979c658Sreinoud 	struct disk_strategy dkstrat;
613e979c658Sreinoud 	uint32_t lb_size;
614e979c658Sreinoud 
615e979c658Sreinoud 	KASSERT(ump);
616e979c658Sreinoud 	KASSERT(ump->logical_vol);
617e979c658Sreinoud 	KASSERT(priv == NULL);
618e979c658Sreinoud 
619e979c658Sreinoud 	lb_size = udf_rw32(ump->logical_vol->lb_size);
620e979c658Sreinoud 	KASSERT(lb_size > 0);
621e979c658Sreinoud 
622e979c658Sreinoud 	/* initialise our memory space */
623e979c658Sreinoud 	ump->strategy_private = malloc(sizeof(struct strat_private),
624e979c658Sreinoud 		M_UDFTEMP, M_WAITOK);
625e979c658Sreinoud 	priv = ump->strategy_private;
626e979c658Sreinoud 	memset(priv, 0 , sizeof(struct strat_private));
627e979c658Sreinoud 
628e979c658Sreinoud 	/* initialise locks */
629e979c658Sreinoud 	cv_init(&priv->discstrat_cv, "udfstrat");
630e979c658Sreinoud 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
631e979c658Sreinoud 
632e979c658Sreinoud 	/*
633e979c658Sreinoud 	 * Initialise pool for descriptors associated with nodes. This is done
634e979c658Sreinoud 	 * in lb_size units though currently lb_size is dictated to be
635e979c658Sreinoud 	 * sector_size.
636e979c658Sreinoud 	 */
637e979c658Sreinoud 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
638e979c658Sreinoud 	    IPL_NONE);
639e979c658Sreinoud 
640e979c658Sreinoud 	/*
641e979c658Sreinoud 	 * remember old device strategy method and explicit set method
642e979c658Sreinoud 	 * `discsort' since we have our own more complex strategy that is not
643e979c658Sreinoud 	 * implementable on the CD device and other strategies will get in the
644e979c658Sreinoud 	 * way.
645e979c658Sreinoud 	 */
646e979c658Sreinoud 	memset(&priv->old_strategy_setting, 0,
647e979c658Sreinoud 		sizeof(struct disk_strategy));
648e979c658Sreinoud 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
649e979c658Sreinoud 		FREAD | FKIOCTL, NOCRED);
650e979c658Sreinoud 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
651e979c658Sreinoud 	strcpy(dkstrat.dks_name, "discsort");
652e979c658Sreinoud 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
653e979c658Sreinoud 		NOCRED);
654e979c658Sreinoud 
655f84252b4Sandvar 	/* initialise our internal scheduler */
656e979c658Sreinoud 	priv->cur_queue = UDF_SHED_READING;
657e979c658Sreinoud 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
658e979c658Sreinoud 		BUFQ_SORT_RAWBLOCK);
659e979c658Sreinoud 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
660e979c658Sreinoud 		BUFQ_SORT_RAWBLOCK);
661e979c658Sreinoud 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
662e979c658Sreinoud 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
663e979c658Sreinoud 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
664e979c658Sreinoud 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
665e979c658Sreinoud 
666e979c658Sreinoud 	/* create our disk strategy thread */
667*04076868Sreinoud 	priv->thread_finished = 0;
668*04076868Sreinoud 	priv->thread_running  = 0;
669e979c658Sreinoud 	priv->run_thread      = 1;
67042866dd2Sreinoud 	priv->sync_req        = 0;
671e979c658Sreinoud 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
672e979c658Sreinoud 		udf_discstrat_thread, ump, &priv->queue_lwp,
673e979c658Sreinoud 		"%s", "udf_rw")) {
674e979c658Sreinoud 		panic("fork udf_rw");
675e979c658Sreinoud 	}
676*04076868Sreinoud 
677*04076868Sreinoud 	/* wait for thread to spin up */
678*04076868Sreinoud 	mutex_enter(&priv->discstrat_mutex);
679*04076868Sreinoud 	while (!priv->thread_running) {
680*04076868Sreinoud 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
681*04076868Sreinoud 	}
682*04076868Sreinoud 	mutex_exit(&priv->discstrat_mutex);
683e979c658Sreinoud }
684e979c658Sreinoud 
685e979c658Sreinoud 
686e979c658Sreinoud static void
udf_discstrat_finish_seq(struct udf_strat_args * args)687e979c658Sreinoud udf_discstrat_finish_seq(struct udf_strat_args *args)
688e979c658Sreinoud {
689e979c658Sreinoud 	struct udf_mount *ump = args->ump;
690e979c658Sreinoud 	struct strat_private *priv = PRIV(ump);
691e979c658Sreinoud 
692e979c658Sreinoud 	if (ump == NULL)
693e979c658Sreinoud 		return;
694e979c658Sreinoud 
695f84252b4Sandvar 	/* stop our scheduling thread */
696e979c658Sreinoud 	KASSERT(priv->run_thread == 1);
697e979c658Sreinoud 	priv->run_thread = 0;
698*04076868Sreinoud 
699*04076868Sreinoud 	mutex_enter(&priv->discstrat_mutex);
700*04076868Sreinoud 	while (!priv->thread_finished) {
701*04076868Sreinoud 		cv_broadcast(&priv->discstrat_cv);
702*04076868Sreinoud 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
703*04076868Sreinoud 	}
704*04076868Sreinoud 	mutex_exit(&priv->discstrat_mutex);
705*04076868Sreinoud 
706e979c658Sreinoud 	/* kthread should be finished now */
707e979c658Sreinoud 
708e979c658Sreinoud 	/* set back old device strategy method */
709e979c658Sreinoud 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
710e979c658Sreinoud 			FWRITE, NOCRED);
711e979c658Sreinoud 
712e979c658Sreinoud 	/* destroy our pool */
713e979c658Sreinoud 	pool_destroy(&priv->desc_pool);
714e979c658Sreinoud 
715bccb285aSdrochner 	mutex_destroy(&priv->discstrat_mutex);
716bccb285aSdrochner 	cv_destroy(&priv->discstrat_cv);
717bccb285aSdrochner 
718e979c658Sreinoud 	/* free our private space */
719e979c658Sreinoud 	free(ump->strategy_private, M_UDFTEMP);
720e979c658Sreinoud 	ump->strategy_private = NULL;
721e979c658Sreinoud }
722e979c658Sreinoud 
723e979c658Sreinoud /* --------------------------------------------------------------------- */
724e979c658Sreinoud 
725e979c658Sreinoud struct udf_strategy udf_strat_sequential =
726e979c658Sreinoud {
727e979c658Sreinoud 	udf_create_logvol_dscr_seq,
728e979c658Sreinoud 	udf_free_logvol_dscr_seq,
729e979c658Sreinoud 	udf_read_logvol_dscr_seq,
730e979c658Sreinoud 	udf_write_logvol_dscr_seq,
731e979c658Sreinoud 	udf_queuebuf_seq,
73242866dd2Sreinoud 	udf_sync_caches_seq,
733e979c658Sreinoud 	udf_discstrat_init_seq,
734e979c658Sreinoud 	udf_discstrat_finish_seq
735e979c658Sreinoud };
736e979c658Sreinoud 
737e979c658Sreinoud 
738