1 /* Test for end-of-file during block device I/O - by D.C. van Moolenbroek */
2 /* This test needs to be run as root; it sets up and uses a VND instance. */
3 /*
4 * The test should work with all root file system block sizes, but only tests
5 * certain corner cases if the root FS block size is twice the page size.
6 */
7 #include <stdlib.h>
8 #include <string.h>
9 #include <signal.h>
10 #include <sys/param.h>
11 #include <sys/wait.h>
12 #include <sys/mman.h>
13 #include <sys/ioctl.h>
14 #include <minix/partition.h>
15 #include <fcntl.h>
16 #include <unistd.h>
17 #include <assert.h>
18
19 #define VNCONFIG "/usr/sbin/vnconfig"
20
21 #define SECTOR_SIZE 512 /* this should be the sector size of VND */
22
23 #define ITERATIONS 3
24
25 enum {
26 BEFORE_EOF,
27 UPTO_EOF,
28 ACROSS_EOF,
29 ONEPAST_EOF,
30 FROM_EOF,
31 BEYOND_EOF
32 };
33
34 #include "common.h"
35
36 static int need_cleanup = 0;
37
38 static int dev_fd;
39 static size_t dev_size;
40 static char *dev_buf;
41 static char *dev_ref;
42
43 static size_t block_size;
44 static size_t page_size;
45 static int test_peek;
46
47 static char *mmap_ptr = NULL;
48 static size_t mmap_size;
49
50 static int pipe_fd[2];
51
52 /*
53 * Fill the given buffer with random contents.
54 */
55 static void
fill_buf(char * buf,size_t size)56 fill_buf(char * buf, size_t size)
57 {
58
59 while (size--)
60 *buf++ = lrand48() & 0xff;
61 }
62
63 /*
64 * Place the elements of the source array in the destination array in random
65 * order. There are probably better ways to do this, but it is morning, and I
66 * haven't had coffee yet, so go away.
67 */
68 static void
scramble(int * dst,const int * src,int count)69 scramble(int * dst, const int * src, int count)
70 {
71 int i, j, k;
72
73 for (i = 0; i < count; i++)
74 dst[i] = i;
75
76 for (i = count - 1; i >= 0; i--) {
77 j = lrand48() % (i + 1);
78
79 k = dst[j];
80 dst[j] = dst[i];
81 dst[i] = src[k];
82 }
83 }
84
85 /*
86 * Perform I/O using read(2) and check the returned results against the
87 * expected result and the image reference data.
88 */
89 static void
io_read(size_t pos,size_t len,size_t expected)90 io_read(size_t pos, size_t len, size_t expected)
91 {
92 ssize_t bytes;
93
94 assert(len > 0 && len <= dev_size);
95 assert(expected <= len);
96
97 if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
98
99 memset(dev_buf, 0, len);
100
101 if ((bytes = read(dev_fd, dev_buf, len)) < 0) e(0);
102
103 if (bytes != expected) e(0);
104
105 if (memcmp(&dev_ref[pos], dev_buf, bytes)) e(0);
106 }
107
108 /*
109 * Perform I/O using write(2) and check the returned result against the
110 * expected result. Update the image reference data as appropriate.
111 */
112 static void
io_write(size_t pos,size_t len,size_t expected)113 io_write(size_t pos, size_t len, size_t expected)
114 {
115 ssize_t bytes;
116
117 assert(len > 0 && len <= dev_size);
118 assert(expected <= len);
119
120 if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0);
121
122 fill_buf(dev_buf, len);
123
124 if ((bytes = write(dev_fd, dev_buf, len)) < 0) e(0);
125
126 if (bytes != expected) e(0);
127
128 if (bytes > 0) {
129 assert(pos + bytes <= dev_size);
130
131 memcpy(&dev_ref[pos], dev_buf, bytes);
132 }
133 }
134
135 /*
136 * Test if reading from the given pointer succeeds or not, and return the
137 * result.
138 */
139 static int
is_readable(char * ptr)140 is_readable(char * ptr)
141 {
142 ssize_t r;
143 char byte;
144
145 /*
146 * If we access the pointer directly, we will get a fatal signal.
147 * Thus, for that to work we would need a child process, making the
148 * whole test slow and noisy. Let a service try the operation instead.
149 */
150 r = write(pipe_fd[1], ptr, 1);
151
152 if (r == 1) {
153 /* Don't fill up the pipe. */
154 if (read(pipe_fd[0], &byte, 1) != 1) e(0);
155
156 return 1;
157 } else if (r != -1 || errno != EFAULT)
158 e(0);
159
160 return 0;
161 }
162
163 /*
164 * Perform I/O using mmap(2) and check the returned results against the
165 * expected result and the image reference data. Ensure that bytes beyond the
166 * device end are either zero (on the remainder of the last page) or
167 * inaccessible on pages entirely beyond the device end.
168 */
169 static void
io_peek(size_t pos,size_t len,size_t expected)170 io_peek(size_t pos, size_t len, size_t expected)
171 {
172 size_t n, delta, mapped_size;
173 char *ptr;
174
175 assert(test_peek);
176
177 delta = pos % page_size;
178
179 pos -= delta;
180 len += delta;
181
182 len = roundup(len, page_size);
183
184 /* Don't bother with the given expected value. Recompute it. */
185 if (pos < dev_size)
186 expected = MIN(dev_size - pos, len);
187 else
188 expected = 0;
189
190 mapped_size = roundup(dev_size, page_size);
191
192 assert(!(len % page_size));
193
194 ptr = mmap(NULL, len, PROT_READ, MAP_PRIVATE | MAP_FILE, dev_fd,
195 (off_t)pos);
196
197 /*
198 * As of writing, VM allows memory mapping at any offset and for any
199 * length. At least for block devices, VM should probably be changed
200 * to throw ENXIO for any pages beyond the file end, which in turn
201 * renders all the SIGBUS tests below obsolete.
202 */
203 if (ptr == MAP_FAILED) {
204 if (pos + len <= mapped_size) e(0);
205 if (errno != ENXIO) e(0);
206
207 return;
208 }
209
210 mmap_ptr = ptr;
211 mmap_size = len;
212
213 /*
214 * Any page that contains any valid part of the mapped device should be
215 * readable and have correct contents for that part. If the last valid
216 * page extends beyond the mapped device, its remainder should be zero.
217 */
218 if (pos < dev_size) {
219 /* The valid part should have the expected device contents. */
220 if (memcmp(&dev_ref[pos], ptr, expected)) e(0);
221
222 /* The remainder, if any, should be zero. */
223 for (n = expected; n % page_size; n++)
224 if (ptr[n] != 0) e(0);
225 }
226
227 /*
228 * Any page entirely beyond EOF should not be mapped in. In order to
229 * ensure that is_readable() works, also test pages that are mapped in.
230 */
231 for (n = pos; n < pos + len; n += page_size)
232 if (is_readable(&ptr[n - pos]) != (n < mapped_size)) e(0);
233
234 munmap(ptr, len);
235
236 mmap_ptr = NULL;
237 }
238
239 /*
240 * Perform one of the supported end-of-file access attempts using one I/O
241 * operation.
242 */
243 static void
do_one_io(int where,void (* io_proc)(size_t,size_t,size_t))244 do_one_io(int where, void (* io_proc)(size_t, size_t, size_t))
245 {
246 size_t start, bytes;
247
248 switch (where) {
249 case BEFORE_EOF:
250 bytes = lrand48() % (dev_size - 1) + 1;
251
252 io_proc(dev_size - bytes - 1, bytes, bytes);
253
254 break;
255
256 case UPTO_EOF:
257 bytes = lrand48() % dev_size + 1;
258
259 io_proc(dev_size - bytes, bytes, bytes);
260
261 break;
262
263 case ACROSS_EOF:
264 start = lrand48() % (dev_size - 1) + 1;
265 bytes = dev_size - start + 1;
266 assert(start < dev_size && start + bytes > dev_size);
267 bytes += lrand48() % (dev_size - bytes + 1);
268
269 io_proc(start, bytes, dev_size - start);
270
271 break;
272
273 case ONEPAST_EOF:
274 bytes = lrand48() % (dev_size - 1) + 1;
275
276 io_proc(dev_size - bytes + 1, bytes, bytes - 1);
277
278 break;
279
280 case FROM_EOF:
281 bytes = lrand48() % dev_size + 1;
282
283 io_proc(dev_size, bytes, 0);
284
285 break;
286
287 case BEYOND_EOF:
288 start = dev_size + lrand48() % dev_size + 1;
289 bytes = lrand48() % dev_size + 1;
290
291 io_proc(start, bytes, 0);
292
293 break;
294
295 default:
296 assert(0);
297 }
298 }
299
300 /*
301 * Perform I/O operations, testing all the supported end-of-file access
302 * attempts in a random order so as to detect possible problems with caching.
303 */
304 static void
do_io(void (* io_proc)(size_t,size_t,size_t))305 do_io(void (* io_proc)(size_t, size_t, size_t))
306 {
307 static const int list[] = { BEFORE_EOF, UPTO_EOF, ACROSS_EOF,
308 ONEPAST_EOF, FROM_EOF, BEYOND_EOF };
309 static const int count = sizeof(list) / sizeof(list[0]);
310 int i, where[count];
311
312 scramble(where, list, count);
313
314 for (i = 0; i < count; i++)
315 do_one_io(where[i], io_proc);
316 }
317
318 /*
319 * Set up an image file of the given size, assign it to a VND, and open the
320 * resulting block device. The size is size_t because we keep a reference copy
321 * of its entire contents in memory.
322 */
323 static void
setup_image(size_t size)324 setup_image(size_t size)
325 {
326 struct part_geom part;
327 size_t off;
328 ssize_t bytes;
329 int fd, status;
330
331 dev_size = size;
332 if ((dev_buf = malloc(dev_size)) == NULL) e(0);
333 if ((dev_ref = malloc(dev_size)) == NULL) e(0);
334
335 if ((fd = open("image", O_CREAT | O_TRUNC | O_RDWR, 0644)) < 0) e(0);
336
337 fill_buf(dev_ref, dev_size);
338
339 for (off = 0; off < dev_size; off += bytes) {
340 bytes = write(fd, &dev_ref[off], dev_size - off);
341
342 if (bytes <= 0) e(0);
343 }
344
345 close(fd);
346
347 status = system(VNCONFIG " vnd0 image 2>/dev/null");
348 if (!WIFEXITED(status)) e(0);
349 if (WEXITSTATUS(status) != 0) {
350 printf("skipped\n"); /* most likely cause: vnd0 is in use */
351 cleanup();
352 exit(0);
353 }
354
355 need_cleanup = 1;
356
357 if ((dev_fd = open("/dev/vnd0", O_RDWR)) < 0) e(0);
358
359 if (ioctl(dev_fd, DIOCGETP, &part) < 0) e(0);
360
361 if (part.size != dev_size) e(0);
362 }
363
364 /*
365 * Clean up the VND we set up previously. This function is also called in case
366 * of an unexpected exit.
367 */
368 static void
cleanup_device(void)369 cleanup_device(void)
370 {
371 int status;
372
373 if (!need_cleanup)
374 return;
375
376 if (mmap_ptr != NULL) {
377 munmap(mmap_ptr, mmap_size);
378
379 mmap_ptr = NULL;
380 }
381
382 if (dev_fd >= 0)
383 close(dev_fd);
384
385 status = system(VNCONFIG " -u vnd0 2>/dev/null");
386 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
387
388 need_cleanup = 0;
389 }
390
391 /*
392 * Signal handler for exceptions.
393 */
394 static void
got_signal(int __unused sig)395 got_signal(int __unused sig)
396 {
397
398 cleanup_device();
399
400 exit(1);
401 }
402
403 /*
404 * Clean up the VND and image file we set up previously.
405 */
406 static void
cleanup_image(void)407 cleanup_image(void)
408 {
409 size_t off;
410 ssize_t bytes;
411 int fd;
412
413 cleanup_device();
414
415 if ((fd = open("image", O_RDONLY, 0644)) < 0) e(0);
416
417 for (off = 0; off < dev_size; off += bytes) {
418 bytes = read(fd, &dev_buf[off], dev_size - off);
419
420 if (bytes <= 0) e(0);
421 }
422
423 close(fd);
424
425 /* Have all changes written back to the device? */
426 if (memcmp(dev_buf, dev_ref, dev_size)) e(0);
427
428 unlink("image");
429
430 free(dev_buf);
431 free(dev_ref);
432 }
433
434 /*
435 * Run the full test for a block device with the given size.
436 */
437 static void
do_test(size_t size)438 do_test(size_t size)
439 {
440 int i;
441
442 /*
443 * Using the three I/O primitives (read, write, peek), we run four
444 * sequences, mainly to test the effects of blocks being cached or not.
445 * We set up a new image for each sequence, because -if everything goes
446 * right- closing the device file also clears all cached blocks for it,
447 * in both the root file system's cache and the VM cache. Note that we
448 * currently do not even attempt to push the blocks out of the root FS'
449 * cache in order to test retrieval from the VM cache, since this would
450 * involve doing a LOT of extra I/O.
451 */
452 for (i = 0; i < 4; i++) {
453 setup_image(size);
454
455 switch (i) {
456 case 0:
457 do_io(io_read);
458
459 /* FALLTHROUGH */
460 case 1:
461 do_io(io_write);
462
463 do_io(io_read);
464
465 break;
466
467 case 2:
468 do_io(io_peek);
469
470 /* FALLTHROUGH */
471
472 case 3:
473 do_io(io_write);
474
475 do_io(io_peek);
476
477 break;
478 }
479
480 cleanup_image();
481 }
482 }
483
484 /*
485 * Test program for end-of-file conditions during block device I/O.
486 */
487 int
main(void)488 main(void)
489 {
490 static const unsigned int blocks[] = { 1, 4, 3, 5, 2 };
491 struct statvfs buf;
492 int i, j;
493
494 start(85);
495
496 setuid(geteuid());
497
498 signal(SIGINT, got_signal);
499 signal(SIGABRT, got_signal);
500 signal(SIGSEGV, got_signal);
501 signal(SIGBUS, got_signal);
502 atexit(cleanup_device);
503
504 srand48(time(NULL));
505
506 if (pipe(pipe_fd) != 0) e(0);
507
508 /*
509 * Get the system page size, and align all memory mapping offsets and
510 * sizes accordingly.
511 */
512 page_size = sysconf(_SC_PAGESIZE);
513
514 /*
515 * Get the root file system block size. In the current MINIX3 system
516 * architecture, the root file system's block size determines the
517 * transfer granularity for I/O on unmounted block devices. If this
518 * block size is not a multiple of the page size, we are (currently!)
519 * not expecting memory-mapped block devices to work.
520 */
521 if (statvfs("/", &buf) < 0) e(0);
522
523 block_size = buf.f_bsize;
524
525 test_peek = !(block_size % page_size);
526
527 for (i = 0; i < ITERATIONS; i++) {
528 /*
529 * The 'blocks' array is scrambled so as to detect any blocks
530 * left in the VM cache (or not) across runs, just in case.
531 */
532 for (j = 0; j < sizeof(blocks) / sizeof(blocks[0]); j++) {
533 do_test(blocks[j] * block_size + SECTOR_SIZE);
534
535 do_test(blocks[j] * block_size);
536
537 do_test(blocks[j] * block_size - SECTOR_SIZE);
538 }
539 }
540
541 quit();
542 }
543