1 /* VNode Disk driver, by D.C. van Moolenbroek <david@minix3.org> */
2
3 #include <minix/drivers.h>
4 #include <minix/blockdriver.h>
5 #include <minix/drvlib.h>
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/stat.h>
9 #include <fcntl.h>
10 #include <assert.h>
11
12 #define VND_BUF_SIZE 65536
13
14 static struct {
15 int fd; /* file descriptor for the underlying file */
16 int openct; /* number of times the device is open */
17 int exiting; /* exit after the last close? */
18 int rdonly; /* is the device set up read-only? */
19 dev_t dev; /* device on which the file resides */
20 ino_t ino; /* inode number of the file */
21 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
22 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
23 struct part_geom geom; /* geometry information */
24 char *buf; /* intermediate I/O transfer buffer */
25 } state;
26
27 static unsigned int instance;
28
29 static int vnd_open(devminor_t, int);
30 static int vnd_close(devminor_t);
31 static int vnd_transfer(devminor_t, int, u64_t, endpoint_t, iovec_t *,
32 unsigned int, int);
33 static int vnd_ioctl(devminor_t, unsigned long, endpoint_t, cp_grant_id_t,
34 endpoint_t);
35 static struct device *vnd_part(devminor_t);
36 static void vnd_geometry(devminor_t, struct part_geom *);
37
38 static struct blockdriver vnd_dtab = {
39 .bdr_type = BLOCKDRIVER_TYPE_DISK,
40 .bdr_open = vnd_open,
41 .bdr_close = vnd_close,
42 .bdr_transfer = vnd_transfer,
43 .bdr_ioctl = vnd_ioctl,
44 .bdr_part = vnd_part,
45 .bdr_geometry = vnd_geometry
46 };
47
48 /*
49 * Parse partition tables.
50 */
51 static void
vnd_partition(void)52 vnd_partition(void)
53 {
54 memset(state.part, 0, sizeof(state.part));
55 memset(state.subpart, 0, sizeof(state.subpart));
56
57 state.part[0].dv_size = state.geom.size;
58
59 partition(&vnd_dtab, 0, P_PRIMARY, FALSE /*atapi*/);
60 }
61
62 /*
63 * Open a device.
64 */
65 static int
vnd_open(devminor_t minor,int access)66 vnd_open(devminor_t minor, int access)
67 {
68 /* No sub/partition devices are available before initialization. */
69 if (state.fd == -1 && minor != 0)
70 return ENXIO;
71 else if (state.fd != -1 && vnd_part(minor) == NULL)
72 return ENXIO;
73
74 /*
75 * If the device either is not configured or configured as read-only,
76 * block open calls that request write permission. This is what user-
77 * land expects, although it does mean that vnconfig(8) has to open the
78 * device as read-only in order to (un)configure it.
79 */
80 if (access & BDEV_W_BIT) {
81 if (state.fd == -1)
82 return ENXIO;
83 if (state.rdonly)
84 return EACCES;
85 }
86
87 /*
88 * Userland expects that if the device is opened after having been
89 * fully closed, partition tables are (re)parsed. Since we already
90 * parse partition tables upon initialization, we could skip this for
91 * the first open, but that would introduce more state.
92 */
93 if (state.fd != -1 && state.openct == 0) {
94 vnd_partition();
95
96 /* Make sure our target device didn't just disappear. */
97 if (vnd_part(minor) == NULL)
98 return ENXIO;
99 }
100
101 state.openct++;
102
103 return OK;
104 }
105
106 /*
107 * Close a device.
108 */
109 static int
vnd_close(devminor_t UNUSED (minor))110 vnd_close(devminor_t UNUSED(minor))
111 {
112 if (state.openct == 0) {
113 printf("VND%u: closing already-closed device\n", instance);
114 return EINVAL;
115 }
116
117 state.openct--;
118
119 if (state.exiting)
120 blockdriver_terminate();
121
122 return OK;
123 }
124
125 /*
126 * Copy a number of bytes from or to the caller, to or from the intermediate
127 * buffer. If the given endpoint is SELF, a local memory copy must be made.
128 */
129 static int
vnd_copy(iovec_s_t * iov,size_t iov_off,size_t bytes,endpoint_t endpt,int do_write)130 vnd_copy(iovec_s_t *iov, size_t iov_off, size_t bytes, endpoint_t endpt,
131 int do_write)
132 {
133 struct vscp_vec vvec[SCPVEC_NR], *vvp;
134 size_t off, chunk;
135 int count;
136 char *ptr;
137
138 assert(bytes > 0 && bytes <= VND_BUF_SIZE);
139
140 vvp = vvec;
141 count = 0;
142
143 for (off = 0; off < bytes; off += chunk) {
144 chunk = MIN(bytes - off, iov->iov_size - iov_off);
145
146 if (endpt == SELF) {
147 ptr = (char *) iov->iov_grant + iov_off;
148
149 if (do_write)
150 memcpy(&state.buf[off], ptr, chunk);
151 else
152 memcpy(ptr, &state.buf[off], chunk);
153 } else {
154 assert(count < SCPVEC_NR); /* SCPVEC_NR >= NR_IOREQS */
155
156 vvp->v_from = do_write ? endpt : SELF;
157 vvp->v_to = do_write ? SELF : endpt;
158 vvp->v_bytes = chunk;
159 vvp->v_gid = iov->iov_grant;
160 vvp->v_offset = iov_off;
161 vvp->v_addr = (vir_bytes) &state.buf[off];
162
163 vvp++;
164 count++;
165 }
166
167 iov_off += chunk;
168 if (iov_off == iov->iov_size) {
169 iov++;
170 iov_off = 0;
171 }
172 }
173
174 if (endpt != SELF)
175 return sys_vsafecopy(vvec, count);
176 else
177 return OK;
178 }
179
180 /*
181 * Advance the given I/O vector, and the offset into its first element, by the
182 * given number of bytes.
183 */
184 static iovec_s_t *
vnd_advance(iovec_s_t * iov,size_t * iov_offp,size_t bytes)185 vnd_advance(iovec_s_t *iov, size_t *iov_offp, size_t bytes)
186 {
187 size_t iov_off;
188
189 assert(bytes > 0 && bytes <= VND_BUF_SIZE);
190
191 iov_off = *iov_offp;
192
193 while (bytes > 0) {
194 if (bytes >= iov->iov_size - iov_off) {
195 bytes -= iov->iov_size - iov_off;
196 iov++;
197 iov_off = 0;
198 } else {
199 iov_off += bytes;
200 bytes = 0;
201 }
202 }
203
204 *iov_offp = iov_off;
205 return iov;
206 }
207
208 /*
209 * Perform data transfer on the selected device.
210 */
211 static int
vnd_transfer(devminor_t minor,int do_write,u64_t position,endpoint_t endpt,iovec_t * iovt,unsigned int nr_req,int flags)212 vnd_transfer(devminor_t minor, int do_write, u64_t position,
213 endpoint_t endpt, iovec_t *iovt, unsigned int nr_req, int flags)
214 {
215 struct device *dv;
216 iovec_s_t *iov;
217 size_t off, chunk, bytes, iov_off;
218 ssize_t r;
219 unsigned int i;
220
221 iov = (iovec_s_t *) iovt;
222
223 if (state.fd == -1 || (dv = vnd_part(minor)) == NULL)
224 return ENXIO;
225
226 /* Prevent write operations on devices opened as write-only. */
227 if (do_write && state.rdonly)
228 return EACCES;
229
230 /* Determine the total number of bytes to transfer. */
231 if (position >= dv->dv_size)
232 return 0;
233
234 bytes = 0;
235
236 for (i = 0; i < nr_req; i++) {
237 if (iov[i].iov_size == 0 || iov[i].iov_size > LONG_MAX)
238 return EINVAL;
239 bytes += iov[i].iov_size;
240 if (bytes > LONG_MAX)
241 return EINVAL;
242 }
243
244 if (bytes > dv->dv_size - position)
245 bytes = dv->dv_size - position;
246
247 position += dv->dv_base;
248
249 /* Perform the actual transfer, in chunks if necessary. */
250 iov_off = 0;
251
252 for (off = 0; off < bytes; off += chunk) {
253 chunk = MIN(bytes - off, VND_BUF_SIZE);
254
255 assert((unsigned int) (iov - (iovec_s_t *) iovt) < nr_req);
256
257 /* For reads, read in the data for the chunk; possibly less. */
258 if (!do_write) {
259 chunk = r = pread(state.fd, state.buf, chunk,
260 position);
261
262 if (r < 0) {
263 printf("VND%u: pread failed (%d)\n", instance,
264 -errno);
265 return -errno;
266 }
267 if (r == 0)
268 break;
269 }
270
271 /* Copy the data for this chunk from or to the caller. */
272 if ((r = vnd_copy(iov, iov_off, chunk, endpt, do_write)) < 0) {
273 printf("VND%u: data copy failed (%d)\n", instance, r);
274 return r;
275 }
276
277 /* For writes, write the data to the file; possibly less. */
278 if (do_write) {
279 chunk = r = pwrite(state.fd, state.buf, chunk,
280 position);
281
282 if (r <= 0) {
283 if (r < 0)
284 r = -errno;
285 printf("VND%u: pwrite failed (%d)\n", instance,
286 r);
287 return (r < 0) ? r : EIO;
288 }
289 }
290
291 /* Move ahead on the I/O vector and the file position. */
292 iov = vnd_advance(iov, &iov_off, chunk);
293
294 position += chunk;
295 }
296
297 /* If force-write is requested, flush the underlying file to disk. */
298 if (do_write && (flags & BDEV_FORCEWRITE))
299 fsync(state.fd);
300
301 /* Return the number of bytes transferred. */
302 return off;
303 }
304
305 /*
306 * Initialize the size and geometry for the device and any partitions. If the
307 * user provided a geometry, this will be used; otherwise, a geometry will be
308 * computed.
309 */
310 static int
vnd_layout(u64_t size,struct vnd_ioctl * vnd)311 vnd_layout(u64_t size, struct vnd_ioctl *vnd)
312 {
313 u64_t sectors;
314
315 state.geom.base = 0ULL;
316
317 if (vnd->vnd_flags & VNDIOF_HASGEOM) {
318 /*
319 * The geometry determines the accessible part of the file.
320 * The resulting size must not exceed the file size.
321 */
322 state.geom.cylinders = vnd->vnd_geom.vng_ncylinders;
323 state.geom.heads = vnd->vnd_geom.vng_ntracks;
324 state.geom.sectors = vnd->vnd_geom.vng_nsectors;
325
326 state.geom.size = (u64_t) state.geom.cylinders *
327 state.geom.heads * state.geom.sectors *
328 vnd->vnd_geom.vng_secsize;
329 if (state.geom.size == 0 || state.geom.size > size)
330 return EINVAL;
331 } else {
332 sectors = size / SECTOR_SIZE;
333 state.geom.size = sectors * SECTOR_SIZE;
334
335 if (sectors >= 32 * 64) {
336 state.geom.cylinders = sectors / (32 * 64);
337 state.geom.heads = 64;
338 state.geom.sectors = 32;
339 } else {
340 state.geom.cylinders = sectors;
341 state.geom.heads = 1;
342 state.geom.sectors = 1;
343 }
344 }
345
346 /*
347 * Parse partition tables immediately, so that (sub)partitions can be
348 * opened right away. The first open will perform the same procedure,
349 * but that is only necessary to match userland expectations.
350 */
351 vnd_partition();
352
353 return OK;
354 }
355
356 /*
357 * Process I/O control requests.
358 */
359 static int
vnd_ioctl(devminor_t UNUSED (minor),unsigned long request,endpoint_t endpt,cp_grant_id_t grant,endpoint_t user_endpt)360 vnd_ioctl(devminor_t UNUSED(minor), unsigned long request, endpoint_t endpt,
361 cp_grant_id_t grant, endpoint_t user_endpt)
362 {
363 struct vnd_ioctl vnd;
364 struct vnd_user vnu;
365 struct stat st;
366 int r;
367
368 switch (request) {
369 case VNDIOCSET:
370 /*
371 * The VND must not be busy. Note that the caller has the
372 * device open to perform the IOCTL request.
373 */
374 if (state.fd != -1 || state.openct != 1)
375 return EBUSY;
376
377 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
378 sizeof(vnd))) != OK)
379 return r;
380
381 /*
382 * Issue a special VFS backcall that copies a file descriptor
383 * to the current process, from the user process ultimately
384 * making the IOCTL call. The result is either a newly
385 * allocated file descriptor or an error.
386 */
387 if ((r = copyfd(user_endpt, vnd.vnd_fildes, COPYFD_FROM)) < 0)
388 return r;
389
390 state.fd = r;
391
392 /* The target file must be regular. */
393 if (fstat(state.fd, &st) == -1) {
394 printf("VND%u: fstat failed (%d)\n", instance, -errno);
395 r = -errno;
396 }
397 if (r == OK && !S_ISREG(st.st_mode))
398 r = EINVAL;
399
400 /*
401 * Allocate memory for an intermediate I/O transfer buffer. In
402 * order to save on memory in the common case, the buffer is
403 * only allocated when the vnd is in use. We use mmap instead
404 * of malloc to allow the memory to be actually freed later.
405 */
406 if (r == OK) {
407 state.buf = mmap(NULL, VND_BUF_SIZE, PROT_READ |
408 PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
409 if (state.buf == MAP_FAILED)
410 r = ENOMEM;
411 }
412
413 if (r != OK) {
414 close(state.fd);
415 state.fd = -1;
416 return r;
417 }
418
419 /* Set various device state fields. */
420 state.dev = st.st_dev;
421 state.ino = st.st_ino;
422 state.rdonly = !!(vnd.vnd_flags & VNDIOF_READONLY);
423
424 r = vnd_layout(st.st_size, &vnd);
425
426 /* Upon success, return the device size to userland. */
427 if (r == OK) {
428 vnd.vnd_size = state.geom.size;
429
430 r = sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnd,
431 sizeof(vnd));
432 }
433
434 if (r != OK) {
435 munmap(state.buf, VND_BUF_SIZE);
436 close(state.fd);
437 state.fd = -1;
438 }
439
440 return r;
441
442 case VNDIOCCLR:
443 /* The VND can only be cleared if it has been configured. */
444 if (state.fd == -1)
445 return ENXIO;
446
447 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &vnd,
448 sizeof(vnd))) != OK)
449 return r;
450
451 /* The caller has the device open to do the IOCTL request. */
452 if (!(vnd.vnd_flags & VNDIOF_FORCE) && state.openct != 1)
453 return EBUSY;
454
455 /*
456 * Close the associated file descriptor immediately, but do not
457 * allow reuse until the device has been closed by the other
458 * users.
459 */
460 munmap(state.buf, VND_BUF_SIZE);
461 close(state.fd);
462 state.fd = -1;
463
464 return OK;
465
466 case VNDIOCGET:
467 /*
468 * We need not copy in the given structure. It would contain
469 * the requested unit number, but each driver instance provides
470 * only one unit anyway.
471 */
472
473 memset(&vnu, 0, sizeof(vnu));
474
475 vnu.vnu_unit = instance;
476
477 /* Leave these fields zeroed if the device is not in use. */
478 if (state.fd != -1) {
479 vnu.vnu_dev = state.dev;
480 vnu.vnu_ino = state.ino;
481 }
482
483 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &vnu,
484 sizeof(vnu));
485
486 case DIOCOPENCT:
487 return sys_safecopyto(endpt, grant, 0,
488 (vir_bytes) &state.openct, sizeof(state.openct));
489
490 case DIOCFLUSH:
491 if (state.fd == -1)
492 return ENXIO;
493
494 fsync(state.fd);
495
496 return OK;
497 }
498
499 return ENOTTY;
500 }
501
502 /*
503 * Return a pointer to the partition structure for the given minor device.
504 */
505 static struct device *
vnd_part(devminor_t minor)506 vnd_part(devminor_t minor)
507 {
508 if (minor >= 0 && minor < DEV_PER_DRIVE)
509 return &state.part[minor];
510 else if ((unsigned int) (minor -= MINOR_d0p0s0) < SUB_PER_DRIVE)
511 return &state.subpart[minor];
512 else
513 return NULL;
514 }
515
516 /*
517 * Return geometry information.
518 */
519 static void
vnd_geometry(devminor_t UNUSED (minor),struct part_geom * part)520 vnd_geometry(devminor_t UNUSED(minor), struct part_geom *part)
521 {
522 part->cylinders = state.geom.cylinders;
523 part->heads = state.geom.heads;
524 part->sectors = state.geom.sectors;
525 }
526
527 /*
528 * Initialize the device.
529 */
530 static int
vnd_init(int UNUSED (type),sef_init_info_t * UNUSED (info))531 vnd_init(int UNUSED(type), sef_init_info_t *UNUSED(info))
532 {
533 long v;
534
535 /*
536 * No support for crash recovery. The driver would have no way to
537 * reacquire the file descriptor for the target file.
538 */
539
540 /*
541 * The instance number is used for two purposes: reporting errors, and
542 * returning the proper unit number to userland in VNDIOCGET calls.
543 */
544 v = 0;
545 (void) env_parse("instance", "d", 0, &v, 0, 255);
546 instance = (unsigned int) v;
547
548 state.openct = 0;
549 state.exiting = FALSE;
550 state.fd = -1;
551
552 return OK;
553 }
554
555 /*
556 * Process an incoming signal.
557 */
558 static void
vnd_signal(int signo)559 vnd_signal(int signo)
560 {
561
562 /* In case of a termination signal, initiate driver shutdown. */
563 if (signo != SIGTERM)
564 return;
565
566 state.exiting = TRUE;
567
568 /* Keep running until the device has been fully closed. */
569 if (state.openct == 0)
570 blockdriver_terminate();
571 }
572
573 /*
574 * Set callbacks and initialize the System Event Framework (SEF).
575 */
576 static void
vnd_startup(void)577 vnd_startup(void)
578 {
579
580 /* Register init and signal callbacks. */
581 sef_setcb_init_fresh(vnd_init);
582 sef_setcb_signal_handler(vnd_signal);
583
584 /* Let SEF perform startup. */
585 sef_startup();
586 }
587
588 /*
589 * Driver task.
590 */
591 int
main(int argc,char ** argv)592 main(int argc, char **argv)
593 {
594
595 /* Initialize the driver. */
596 env_setargs(argc, argv);
597 vnd_startup();
598
599 /* Process requests until shutdown. */
600 blockdriver_task(&vnd_dtab);
601
602 return 0;
603 }
604