xref: /netbsd-src/sys/kern/bufq_disksort.c (revision ec8060020862b5ac256de2a4e79555a03fb31d96)
1*ec806002Skamil /*	$NetBSD: bufq_disksort.c,v 1.14 2017/05/04 11:03:27 kamil Exp $	*/
21c2a2dcaSyamt /*	NetBSD: subr_disk.c,v 1.61 2004/09/25 03:30:44 thorpej Exp 	*/
31c2a2dcaSyamt 
41c2a2dcaSyamt /*-
51c2a2dcaSyamt  * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc.
61c2a2dcaSyamt  * All rights reserved.
71c2a2dcaSyamt  *
81c2a2dcaSyamt  * This code is derived from software contributed to The NetBSD Foundation
91c2a2dcaSyamt  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
101c2a2dcaSyamt  * NASA Ames Research Center.
111c2a2dcaSyamt  *
121c2a2dcaSyamt  * Redistribution and use in source and binary forms, with or without
131c2a2dcaSyamt  * modification, are permitted provided that the following conditions
141c2a2dcaSyamt  * are met:
151c2a2dcaSyamt  * 1. Redistributions of source code must retain the above copyright
161c2a2dcaSyamt  *    notice, this list of conditions and the following disclaimer.
171c2a2dcaSyamt  * 2. Redistributions in binary form must reproduce the above copyright
181c2a2dcaSyamt  *    notice, this list of conditions and the following disclaimer in the
191c2a2dcaSyamt  *    documentation and/or other materials provided with the distribution.
201c2a2dcaSyamt  *
211c2a2dcaSyamt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
221c2a2dcaSyamt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
231c2a2dcaSyamt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
241c2a2dcaSyamt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
251c2a2dcaSyamt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
261c2a2dcaSyamt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
271c2a2dcaSyamt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
281c2a2dcaSyamt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
291c2a2dcaSyamt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
301c2a2dcaSyamt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
311c2a2dcaSyamt  * POSSIBILITY OF SUCH DAMAGE.
321c2a2dcaSyamt  */
331c2a2dcaSyamt 
341c2a2dcaSyamt /*
351c2a2dcaSyamt  * Copyright (c) 1982, 1986, 1988, 1993
361c2a2dcaSyamt  *	The Regents of the University of California.  All rights reserved.
371c2a2dcaSyamt  * (c) UNIX System Laboratories, Inc.
381c2a2dcaSyamt  * All or some portions of this file are derived from material licensed
391c2a2dcaSyamt  * to the University of California by American Telephone and Telegraph
401c2a2dcaSyamt  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
411c2a2dcaSyamt  * the permission of UNIX System Laboratories, Inc.
421c2a2dcaSyamt  *
431c2a2dcaSyamt  * Redistribution and use in source and binary forms, with or without
441c2a2dcaSyamt  * modification, are permitted provided that the following conditions
451c2a2dcaSyamt  * are met:
461c2a2dcaSyamt  * 1. Redistributions of source code must retain the above copyright
471c2a2dcaSyamt  *    notice, this list of conditions and the following disclaimer.
481c2a2dcaSyamt  * 2. Redistributions in binary form must reproduce the above copyright
491c2a2dcaSyamt  *    notice, this list of conditions and the following disclaimer in the
501c2a2dcaSyamt  *    documentation and/or other materials provided with the distribution.
511c2a2dcaSyamt  * 3. Neither the name of the University nor the names of its contributors
521c2a2dcaSyamt  *    may be used to endorse or promote products derived from this software
531c2a2dcaSyamt  *    without specific prior written permission.
541c2a2dcaSyamt  *
551c2a2dcaSyamt  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
561c2a2dcaSyamt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
571c2a2dcaSyamt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
581c2a2dcaSyamt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
591c2a2dcaSyamt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
601c2a2dcaSyamt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
611c2a2dcaSyamt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
621c2a2dcaSyamt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
631c2a2dcaSyamt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
641c2a2dcaSyamt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
651c2a2dcaSyamt  * SUCH DAMAGE.
661c2a2dcaSyamt  *
671c2a2dcaSyamt  *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
681c2a2dcaSyamt  */
691c2a2dcaSyamt 
701c2a2dcaSyamt #include <sys/cdefs.h>
71*ec806002Skamil __KERNEL_RCSID(0, "$NetBSD: bufq_disksort.c,v 1.14 2017/05/04 11:03:27 kamil Exp $");
721c2a2dcaSyamt 
731c2a2dcaSyamt #include <sys/param.h>
741c2a2dcaSyamt #include <sys/systm.h>
751c2a2dcaSyamt #include <sys/buf.h>
7605f25dccSyamt #include <sys/bufq.h>
77aec75b1cSyamt #include <sys/bufq_impl.h>
783e196b68Syamt #include <sys/kmem.h>
79556c6909Spgoyette #include <sys/module.h>
801c2a2dcaSyamt 
811c2a2dcaSyamt /*
821c2a2dcaSyamt  * Seek sort for disks.
831c2a2dcaSyamt  *
841c2a2dcaSyamt  * There are actually two queues, sorted in ascendening order.  The first
851c2a2dcaSyamt  * queue holds those requests which are positioned after the current block;
861c2a2dcaSyamt  * the second holds requests which came in after their position was passed.
871c2a2dcaSyamt  * Thus we implement a one-way scan, retracting after reaching the end of
881c2a2dcaSyamt  * the drive to the first request on the second queue, at which time it
891c2a2dcaSyamt  * becomes the first queue.
901c2a2dcaSyamt  *
911c2a2dcaSyamt  * A one-way scan is natural because of the way UNIX read-ahead blocks are
921c2a2dcaSyamt  * allocated.
931c2a2dcaSyamt  */
941c2a2dcaSyamt 
951c2a2dcaSyamt struct bufq_disksort {
961c2a2dcaSyamt 	TAILQ_HEAD(, buf) bq_head;	/* actual list of buffers */
971c2a2dcaSyamt };
981c2a2dcaSyamt 
99eb54e7dbSyamt static void bufq_disksort_init(struct bufq_state *);
100eb54e7dbSyamt static void bufq_disksort_put(struct bufq_state *, struct buf *);
101eb54e7dbSyamt static struct buf *bufq_disksort_get(struct bufq_state *, int);
102eb54e7dbSyamt 
103aec75b1cSyamt BUFQ_DEFINE(disksort, 20, bufq_disksort_init);
104eb54e7dbSyamt 
1051c2a2dcaSyamt static void
bufq_disksort_put(struct bufq_state * bufq,struct buf * bp)1061c2a2dcaSyamt bufq_disksort_put(struct bufq_state *bufq, struct buf *bp)
1071c2a2dcaSyamt {
108aec75b1cSyamt 	struct bufq_disksort *disksort = bufq_private(bufq);
1091c2a2dcaSyamt 	struct buf *bq, *nbq;
1101c2a2dcaSyamt 	int sortby;
1111c2a2dcaSyamt 
1121c2a2dcaSyamt 	sortby = bufq->bq_flags & BUFQ_SORT_MASK;
1131c2a2dcaSyamt 
1141c2a2dcaSyamt 	bq = TAILQ_FIRST(&disksort->bq_head);
1151c2a2dcaSyamt 
1161c2a2dcaSyamt 	/*
1171c2a2dcaSyamt 	 * If the queue is empty it's easy; we just go on the end.
1181c2a2dcaSyamt 	 */
1191c2a2dcaSyamt 	if (bq == NULL) {
1201c2a2dcaSyamt 		TAILQ_INSERT_TAIL(&disksort->bq_head, bp, b_actq);
1211c2a2dcaSyamt 		return;
1221c2a2dcaSyamt 	}
1231c2a2dcaSyamt 
1241c2a2dcaSyamt 	/*
1251c2a2dcaSyamt 	 * If we lie before the currently active request, then we
1261c2a2dcaSyamt 	 * must locate the second request list and add ourselves to it.
1271c2a2dcaSyamt 	 */
1281c2a2dcaSyamt 	if (buf_inorder(bp, bq, sortby)) {
1291c2a2dcaSyamt 		while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) {
1301c2a2dcaSyamt 			/*
1311c2a2dcaSyamt 			 * Check for an ``inversion'' in the normally ascending
1321c2a2dcaSyamt 			 * block numbers, indicating the start of the second
1331c2a2dcaSyamt 			 * request list.
1341c2a2dcaSyamt 			 */
1351c2a2dcaSyamt 			if (buf_inorder(nbq, bq, sortby)) {
1361c2a2dcaSyamt 				/*
1371c2a2dcaSyamt 				 * Search the second request list for the first
1381c2a2dcaSyamt 				 * request at a larger block number.  We go
1391c2a2dcaSyamt 				 * after that; if there is no such request, we
1401c2a2dcaSyamt 				 * go at the end.
1411c2a2dcaSyamt 				 */
1421c2a2dcaSyamt 				do {
1431c2a2dcaSyamt 					if (buf_inorder(bp, nbq, sortby))
1441c2a2dcaSyamt 						goto insert;
1451c2a2dcaSyamt 					bq = nbq;
1461c2a2dcaSyamt 				} while ((nbq =
1471c2a2dcaSyamt 				    TAILQ_NEXT(bq, b_actq)) != NULL);
1481c2a2dcaSyamt 				goto insert;		/* after last */
1491c2a2dcaSyamt 			}
1501c2a2dcaSyamt 			bq = nbq;
1511c2a2dcaSyamt 		}
1521c2a2dcaSyamt 		/*
1531c2a2dcaSyamt 		 * No inversions... we will go after the last, and
1541c2a2dcaSyamt 		 * be the first request in the second request list.
1551c2a2dcaSyamt 		 */
1561c2a2dcaSyamt 		goto insert;
1571c2a2dcaSyamt 	}
1581c2a2dcaSyamt 	/*
1591c2a2dcaSyamt 	 * Request is at/after the current request...
1601c2a2dcaSyamt 	 * sort in the first request list.
1611c2a2dcaSyamt 	 */
1621c2a2dcaSyamt 	while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) {
1631c2a2dcaSyamt 		/*
1641c2a2dcaSyamt 		 * We want to go after the current request if there is an
1651c2a2dcaSyamt 		 * inversion after it (i.e. it is the end of the first
1661c2a2dcaSyamt 		 * request list), or if the next request is a larger cylinder
1671c2a2dcaSyamt 		 * than our request.
1681c2a2dcaSyamt 		 */
1691c2a2dcaSyamt 		if (buf_inorder(nbq, bq, sortby) ||
1701c2a2dcaSyamt 		    buf_inorder(bp, nbq, sortby))
1711c2a2dcaSyamt 			goto insert;
1721c2a2dcaSyamt 		bq = nbq;
1731c2a2dcaSyamt 	}
1741c2a2dcaSyamt 	/*
1751c2a2dcaSyamt 	 * Neither a second list nor a larger request... we go at the end of
1761c2a2dcaSyamt 	 * the first list, which is the same as the end of the whole schebang.
1771c2a2dcaSyamt 	 */
1781c2a2dcaSyamt insert:	TAILQ_INSERT_AFTER(&disksort->bq_head, bq, bp, b_actq);
1791c2a2dcaSyamt }
1801c2a2dcaSyamt 
1811c2a2dcaSyamt static struct buf *
bufq_disksort_get(struct bufq_state * bufq,int remove)1821c2a2dcaSyamt bufq_disksort_get(struct bufq_state *bufq, int remove)
1831c2a2dcaSyamt {
184*ec806002Skamil 	struct bufq_disksort *disksort = bufq_private(bufq);
1851c2a2dcaSyamt 	struct buf *bp;
1861c2a2dcaSyamt 
1871c2a2dcaSyamt 	bp = TAILQ_FIRST(&disksort->bq_head);
1881c2a2dcaSyamt 
1891c2a2dcaSyamt 	if (bp != NULL && remove)
1901c2a2dcaSyamt 		TAILQ_REMOVE(&disksort->bq_head, bp, b_actq);
1911c2a2dcaSyamt 
1921c2a2dcaSyamt 	return (bp);
1931c2a2dcaSyamt }
1941c2a2dcaSyamt 
1955fc434dcSreinoud static struct buf *
bufq_disksort_cancel(struct bufq_state * bufq,struct buf * buf)1965fc434dcSreinoud bufq_disksort_cancel(struct bufq_state *bufq, struct buf *buf)
1975fc434dcSreinoud {
198*ec806002Skamil 	struct bufq_disksort *disksort = bufq_private(bufq);
1995fc434dcSreinoud 	struct buf *bq;
2005fc434dcSreinoud 
2013e54d283Syamt 	TAILQ_FOREACH(bq, &disksort->bq_head, b_actq) {
2025fc434dcSreinoud 		if (bq == buf) {
2035fc434dcSreinoud 			TAILQ_REMOVE(&disksort->bq_head, bq, b_actq);
2045fc434dcSreinoud 			return buf;
2055fc434dcSreinoud 		}
2065fc434dcSreinoud 	}
2075fc434dcSreinoud 	return NULL;
2085fc434dcSreinoud }
2095fc434dcSreinoud 
210eb54e7dbSyamt static void
bufq_disksort_fini(struct bufq_state * bufq)2113e196b68Syamt bufq_disksort_fini(struct bufq_state *bufq)
2123e196b68Syamt {
2133e196b68Syamt 
2143e196b68Syamt 	KASSERT(bufq->bq_private != NULL);
2153e196b68Syamt 	kmem_free(bufq->bq_private, sizeof(struct bufq_disksort));
2163e196b68Syamt }
2173e196b68Syamt 
2183e196b68Syamt static void
bufq_disksort_init(struct bufq_state * bufq)2191c2a2dcaSyamt bufq_disksort_init(struct bufq_state *bufq)
2201c2a2dcaSyamt {
2211c2a2dcaSyamt 	struct bufq_disksort *disksort;
2221c2a2dcaSyamt 
2233e196b68Syamt 	disksort = kmem_zalloc(sizeof(*disksort), KM_SLEEP);
22489165435Scbiere 	bufq->bq_private = disksort;
2251c2a2dcaSyamt 	bufq->bq_get = bufq_disksort_get;
2261c2a2dcaSyamt 	bufq->bq_put = bufq_disksort_put;
2275fc434dcSreinoud 	bufq->bq_cancel = bufq_disksort_cancel;
2283e196b68Syamt 	bufq->bq_fini = bufq_disksort_fini;
2291c2a2dcaSyamt 	TAILQ_INIT(&disksort->bq_head);
2301c2a2dcaSyamt }
231556c6909Spgoyette 
232219154eeSpgoyette MODULE(MODULE_CLASS_BUFQ, bufq_disksort, NULL);
233556c6909Spgoyette 
234556c6909Spgoyette static int
bufq_disksort_modcmd(modcmd_t cmd,void * opaque)235556c6909Spgoyette bufq_disksort_modcmd(modcmd_t cmd, void *opaque)
236556c6909Spgoyette {
237556c6909Spgoyette 
238556c6909Spgoyette 	switch (cmd) {
239556c6909Spgoyette 	case MODULE_CMD_INIT:
240556c6909Spgoyette 		return bufq_register(&bufq_strat_disksort);
241556c6909Spgoyette 	case MODULE_CMD_FINI:
242556c6909Spgoyette 		return bufq_unregister(&bufq_strat_disksort);
243556c6909Spgoyette 	default:
244556c6909Spgoyette 		return ENOTTY;
245556c6909Spgoyette 	}
246556c6909Spgoyette }
247