xref: /freebsd-src/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_file.c (revision 16d6b3b3da62aa5baaf3c66c8d4e6f8c8f70aeb7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/spa.h>
28 #include <sys/file.h>
29 #include <sys/vdev_file.h>
30 #include <sys/vdev_impl.h>
31 #include <sys/zio.h>
32 #include <sys/fs/zfs.h>
33 #include <sys/fm/fs/zfs.h>
34 #include <sys/abd.h>
35 #include <sys/stat.h>
36 
37 /*
38  * Virtual device vector for files.
39  */
40 
41 static taskq_t *vdev_file_taskq;
42 
43 void
44 vdev_file_init(void)
45 {
46 	vdev_file_taskq = taskq_create("z_vdev_file", MAX(max_ncpus, 16),
47 	    minclsyspri, max_ncpus, INT_MAX, 0);
48 }
49 
50 void
51 vdev_file_fini(void)
52 {
53 	taskq_destroy(vdev_file_taskq);
54 }
55 
56 static void
57 vdev_file_hold(vdev_t *vd)
58 {
59 	ASSERT(vd->vdev_path != NULL);
60 }
61 
62 static void
63 vdev_file_rele(vdev_t *vd)
64 {
65 	ASSERT(vd->vdev_path != NULL);
66 }
67 
68 static mode_t
69 vdev_file_open_mode(spa_mode_t spa_mode)
70 {
71 	mode_t mode = 0;
72 
73 	if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
74 		mode = O_RDWR;
75 	} else if (spa_mode & SPA_MODE_READ) {
76 		mode = O_RDONLY;
77 	} else if (spa_mode & SPA_MODE_WRITE) {
78 		mode = O_WRONLY;
79 	}
80 
81 	return (mode | O_LARGEFILE);
82 }
83 
84 static int
85 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
86     uint64_t *logical_ashift, uint64_t *physical_ashift)
87 {
88 	vdev_file_t *vf;
89 	zfs_file_t *fp;
90 	zfs_file_attr_t zfa;
91 	int error;
92 
93 	/*
94 	 * Rotational optimizations only make sense on block devices.
95 	 */
96 	vd->vdev_nonrot = B_TRUE;
97 
98 	/*
99 	 * Allow TRIM on file based vdevs.  This may not always be supported,
100 	 * since it depends on your kernel version and underlying filesystem
101 	 * type but it is always safe to attempt.
102 	 */
103 	vd->vdev_has_trim = B_TRUE;
104 
105 	/*
106 	 * Disable secure TRIM on file based vdevs.  There is no way to
107 	 * request this behavior from the underlying filesystem.
108 	 */
109 	vd->vdev_has_securetrim = B_FALSE;
110 
111 	/*
112 	 * We must have a pathname, and it must be absolute.
113 	 */
114 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
115 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
116 		return (SET_ERROR(EINVAL));
117 	}
118 
119 	/*
120 	 * Reopen the device if it's not currently open.  Otherwise,
121 	 * just update the physical size of the device.
122 	 */
123 	if (vd->vdev_tsd != NULL) {
124 		ASSERT(vd->vdev_reopening);
125 		vf = vd->vdev_tsd;
126 		goto skip_open;
127 	}
128 
129 	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
130 
131 	/*
132 	 * We always open the files from the root of the global zone, even if
133 	 * we're in a local zone.  If the user has gotten to this point, the
134 	 * administrator has already decided that the pool should be available
135 	 * to local zone users, so the underlying devices should be as well.
136 	 */
137 	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
138 
139 	error = zfs_file_open(vd->vdev_path,
140 	    vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
141 	if (error) {
142 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
143 		return (error);
144 	}
145 
146 	vf->vf_file = fp;
147 
148 #ifdef _KERNEL
149 	/*
150 	 * Make sure it's a regular file.
151 	 */
152 	if (zfs_file_getattr(fp, &zfa)) {
153 		return (SET_ERROR(ENODEV));
154 	}
155 	if (!S_ISREG(zfa.zfa_mode)) {
156 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
157 		return (SET_ERROR(ENODEV));
158 	}
159 #endif
160 
161 skip_open:
162 
163 	error =  zfs_file_getattr(vf->vf_file, &zfa);
164 	if (error) {
165 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
166 		return (error);
167 	}
168 
169 	*max_psize = *psize = zfa.zfa_size;
170 	*logical_ashift = SPA_MINBLOCKSHIFT;
171 	*physical_ashift = SPA_MINBLOCKSHIFT;
172 
173 	return (0);
174 }
175 
176 static void
177 vdev_file_close(vdev_t *vd)
178 {
179 	vdev_file_t *vf = vd->vdev_tsd;
180 
181 	if (vd->vdev_reopening || vf == NULL)
182 		return;
183 
184 	if (vf->vf_file != NULL) {
185 		zfs_file_close(vf->vf_file);
186 	}
187 
188 	vd->vdev_delayed_close = B_FALSE;
189 	kmem_free(vf, sizeof (vdev_file_t));
190 	vd->vdev_tsd = NULL;
191 }
192 
193 /*
194  * Implements the interrupt side for file vdev types. This routine will be
195  * called when the I/O completes allowing us to transfer the I/O to the
196  * interrupt taskqs. For consistency, the code structure mimics disk vdev
197  * types.
198  */
199 static void
200 vdev_file_io_intr(zio_t *zio)
201 {
202 	zio_delay_interrupt(zio);
203 }
204 
205 static void
206 vdev_file_io_strategy(void *arg)
207 {
208 	zio_t *zio = arg;
209 	vdev_t *vd = zio->io_vd;
210 	vdev_file_t *vf;
211 	void *buf;
212 	ssize_t resid;
213 	loff_t off;
214 	ssize_t size;
215 	int err;
216 
217 	off = zio->io_offset;
218 	size = zio->io_size;
219 	resid = 0;
220 
221 	vf = vd->vdev_tsd;
222 
223 	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
224 	if (zio->io_type == ZIO_TYPE_READ) {
225 		buf = abd_borrow_buf(zio->io_abd, zio->io_size);
226 		err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
227 		abd_return_buf_copy(zio->io_abd, buf, size);
228 	} else {
229 		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
230 		err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
231 		abd_return_buf(zio->io_abd, buf, size);
232 	}
233 	if (resid != 0 && zio->io_error == 0)
234 		zio->io_error = ENOSPC;
235 
236 	vdev_file_io_intr(zio);
237 }
238 
239 static void
240 vdev_file_io_start(zio_t *zio)
241 {
242 	vdev_t *vd = zio->io_vd;
243 	vdev_file_t *vf = vd->vdev_tsd;
244 
245 	if (zio->io_type == ZIO_TYPE_IOCTL) {
246 		/* XXPOLICY */
247 		if (!vdev_readable(vd)) {
248 			zio->io_error = SET_ERROR(ENXIO);
249 			zio_interrupt(zio);
250 			return;
251 		}
252 
253 		switch (zio->io_cmd) {
254 		case DKIOCFLUSHWRITECACHE:
255 			zio->io_error = zfs_file_fsync(vf->vf_file,
256 			    O_SYNC|O_DSYNC);
257 			break;
258 		default:
259 			zio->io_error = SET_ERROR(ENOTSUP);
260 		}
261 
262 		zio_execute(zio);
263 		return;
264 	} else if (zio->io_type == ZIO_TYPE_TRIM) {
265 #ifdef notyet
266 		int mode = 0;
267 
268 		ASSERT3U(zio->io_size, !=, 0);
269 
270 		/* XXX FreeBSD has no fallocate routine in file ops */
271 		zio->io_error = zfs_file_fallocate(vf->vf_file,
272 		    mode, zio->io_offset, zio->io_size);
273 #endif
274 		zio->io_error = SET_ERROR(ENOTSUP);
275 		zio_execute(zio);
276 		return;
277 	}
278 	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
279 	zio->io_target_timestamp = zio_handle_io_delay(zio);
280 
281 	VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
282 	    TQ_SLEEP), !=, 0);
283 }
284 
285 /* ARGSUSED */
286 static void
287 vdev_file_io_done(zio_t *zio)
288 {
289 }
290 
291 vdev_ops_t vdev_file_ops = {
292 	vdev_file_open,
293 	vdev_file_close,
294 	vdev_default_asize,
295 	vdev_file_io_start,
296 	vdev_file_io_done,
297 	NULL,
298 	NULL,
299 	vdev_file_hold,
300 	vdev_file_rele,
301 	NULL,
302 	vdev_default_xlate,
303 	VDEV_TYPE_FILE,		/* name of this vdev type */
304 	B_TRUE			/* leaf vdev */
305 };
306 
307 /*
308  * From userland we access disks just like files.
309  */
310 #ifndef _KERNEL
311 
312 vdev_ops_t vdev_disk_ops = {
313 	vdev_file_open,
314 	vdev_file_close,
315 	vdev_default_asize,
316 	vdev_file_io_start,
317 	vdev_file_io_done,
318 	NULL,
319 	NULL,
320 	vdev_file_hold,
321 	vdev_file_rele,
322 	NULL,
323 	vdev_default_xlate,
324 	VDEV_TYPE_DISK,		/* name of this vdev type */
325 	B_TRUE			/* leaf vdev */
326 };
327 
328 #endif
329