1 /* Test for end-of-file during block device I/O - by D.C. van Moolenbroek */ 2 /* This test needs to be run as root; it sets up and uses a VND instance. */ 3 /* 4 * The test should work with all root file system block sizes, but only tests 5 * certain corner cases if the root FS block size is twice the page size. 6 */ 7 #include <stdlib.h> 8 #include <string.h> 9 #include <signal.h> 10 #include <sys/param.h> 11 #include <sys/wait.h> 12 #include <sys/mman.h> 13 #include <sys/ioctl.h> 14 #include <minix/partition.h> 15 #include <fcntl.h> 16 #include <unistd.h> 17 #include <assert.h> 18 19 #define VNCONFIG "/usr/sbin/vnconfig" 20 21 #define SECTOR_SIZE 512 /* this should be the sector size of VND */ 22 23 #define ITERATIONS 3 24 25 enum { 26 BEFORE_EOF, 27 UPTO_EOF, 28 ACROSS_EOF, 29 ONEPAST_EOF, 30 FROM_EOF, 31 BEYOND_EOF 32 }; 33 34 #include "common.h" 35 36 static int need_cleanup = 0; 37 38 static int dev_fd; 39 static size_t dev_size; 40 static char *dev_buf; 41 static char *dev_ref; 42 43 static size_t block_size; 44 static size_t page_size; 45 static int test_peek; 46 47 static char *mmap_ptr = NULL; 48 static size_t mmap_size; 49 50 static int pipe_fd[2]; 51 52 /* 53 * Fill the given buffer with random contents. 54 */ 55 static void 56 fill_buf(char * buf, size_t size) 57 { 58 59 while (size--) 60 *buf++ = lrand48() & 0xff; 61 } 62 63 /* 64 * Place the elements of the source array in the destination array in random 65 * order. There are probably better ways to do this, but it is morning, and I 66 * haven't had coffee yet, so go away. 67 */ 68 static void 69 scramble(int * dst, const int * src, int count) 70 { 71 int i, j, k; 72 73 for (i = 0; i < count; i++) 74 dst[i] = i; 75 76 for (i = count - 1; i >= 0; i--) { 77 j = lrand48() % (i + 1); 78 79 k = dst[j]; 80 dst[j] = dst[i]; 81 dst[i] = src[k]; 82 } 83 } 84 85 /* 86 * Perform I/O using read(2) and check the returned results against the 87 * expected result and the image reference data. 88 */ 89 static void 90 io_read(size_t pos, size_t len, size_t expected) 91 { 92 ssize_t bytes; 93 94 assert(len > 0 && len <= dev_size); 95 assert(expected <= len); 96 97 if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0); 98 99 memset(dev_buf, 0, len); 100 101 if ((bytes = read(dev_fd, dev_buf, len)) < 0) e(0); 102 103 if (bytes != expected) e(0); 104 105 if (memcmp(&dev_ref[pos], dev_buf, bytes)) e(0); 106 } 107 108 /* 109 * Perform I/O using write(2) and check the returned result against the 110 * expected result. Update the image reference data as appropriate. 111 */ 112 static void 113 io_write(size_t pos, size_t len, size_t expected) 114 { 115 ssize_t bytes; 116 117 assert(len > 0 && len <= dev_size); 118 assert(expected <= len); 119 120 if (lseek(dev_fd, (off_t)pos, SEEK_SET) != pos) e(0); 121 122 fill_buf(dev_buf, len); 123 124 if ((bytes = write(dev_fd, dev_buf, len)) < 0) e(0); 125 126 if (bytes != expected) e(0); 127 128 if (bytes > 0) { 129 assert(pos + bytes <= dev_size); 130 131 memcpy(&dev_ref[pos], dev_buf, bytes); 132 } 133 } 134 135 /* 136 * Test if reading from the given pointer succeeds or not, and return the 137 * result. 138 */ 139 static int 140 is_readable(char * ptr) 141 { 142 ssize_t r; 143 char byte; 144 145 /* 146 * If we access the pointer directly, we will get a fatal signal. 147 * Thus, for that to work we would need a child process, making the 148 * whole test slow and noisy. Let a service try the operation instead. 149 */ 150 r = write(pipe_fd[1], ptr, 1); 151 152 if (r == 1) { 153 /* Don't fill up the pipe. */ 154 if (read(pipe_fd[0], &byte, 1) != 1) e(0); 155 156 return 1; 157 } else if (r != -1 || errno != EFAULT) 158 e(0); 159 160 return 0; 161 } 162 163 /* 164 * Perform I/O using mmap(2) and check the returned results against the 165 * expected result and the image reference data. Ensure that bytes beyond the 166 * device end are either zero (on the remainder of the last page) or 167 * inaccessible on pages entirely beyond the device end. 168 */ 169 static void 170 io_peek(size_t pos, size_t len, size_t expected) 171 { 172 size_t n, delta, mapped_size; 173 char *ptr; 174 175 assert(test_peek); 176 177 delta = pos % page_size; 178 179 pos -= delta; 180 len += delta; 181 182 len = roundup(len, page_size); 183 184 /* Don't bother with the given expected value. Recompute it. */ 185 if (pos < dev_size) 186 expected = MIN(dev_size - pos, len); 187 else 188 expected = 0; 189 190 mapped_size = roundup(dev_size, page_size); 191 192 assert(!(len % page_size)); 193 194 ptr = mmap(NULL, len, PROT_READ, MAP_PRIVATE | MAP_FILE, dev_fd, 195 (off_t)pos); 196 197 /* 198 * As of writing, VM allows memory mapping at any offset and for any 199 * length. At least for block devices, VM should probably be changed 200 * to throw ENXIO for any pages beyond the file end, which in turn 201 * renders all the SIGBUS tests below obsolete. 202 */ 203 if (ptr == MAP_FAILED) { 204 if (pos + len <= mapped_size) e(0); 205 if (errno != ENXIO) e(0); 206 207 return; 208 } 209 210 mmap_ptr = ptr; 211 mmap_size = len; 212 213 /* 214 * Any page that contains any valid part of the mapped device should be 215 * readable and have correct contents for that part. If the last valid 216 * page extends beyond the mapped device, its remainder should be zero. 217 */ 218 if (pos < dev_size) { 219 /* The valid part should have the expected device contents. */ 220 if (memcmp(&dev_ref[pos], ptr, expected)) e(0); 221 222 /* The remainder, if any, should be zero. */ 223 for (n = expected; n % page_size; n++) 224 if (ptr[n] != 0) e(0); 225 } 226 227 /* 228 * Any page entirely beyond EOF should not be mapped in. In order to 229 * ensure that is_readable() works, also test pages that are mapped in. 230 */ 231 for (n = pos; n < pos + len; n += page_size) 232 if (is_readable(&ptr[n - pos]) != (n < mapped_size)) e(0); 233 234 munmap(ptr, len); 235 236 mmap_ptr = NULL; 237 } 238 239 /* 240 * Perform one of the supported end-of-file access attempts using one I/O 241 * operation. 242 */ 243 static void 244 do_one_io(int where, void (* io_proc)(size_t, size_t, size_t)) 245 { 246 size_t start, bytes; 247 248 switch (where) { 249 case BEFORE_EOF: 250 bytes = lrand48() % (dev_size - 1) + 1; 251 252 io_proc(dev_size - bytes - 1, bytes, bytes); 253 254 break; 255 256 case UPTO_EOF: 257 bytes = lrand48() % dev_size + 1; 258 259 io_proc(dev_size - bytes, bytes, bytes); 260 261 break; 262 263 case ACROSS_EOF: 264 start = lrand48() % (dev_size - 1) + 1; 265 bytes = dev_size - start + 1; 266 assert(start < dev_size && start + bytes > dev_size); 267 bytes += lrand48() % (dev_size - bytes + 1); 268 269 io_proc(start, bytes, dev_size - start); 270 271 break; 272 273 case ONEPAST_EOF: 274 bytes = lrand48() % (dev_size - 1) + 1; 275 276 io_proc(dev_size - bytes + 1, bytes, bytes - 1); 277 278 break; 279 280 case FROM_EOF: 281 bytes = lrand48() % dev_size + 1; 282 283 io_proc(dev_size, bytes, 0); 284 285 break; 286 287 case BEYOND_EOF: 288 start = dev_size + lrand48() % dev_size + 1; 289 bytes = lrand48() % dev_size + 1; 290 291 io_proc(start, bytes, 0); 292 293 break; 294 295 default: 296 assert(0); 297 } 298 } 299 300 /* 301 * Perform I/O operations, testing all the supported end-of-file access 302 * attempts in a random order so as to detect possible problems with caching. 303 */ 304 static void 305 do_io(void (* io_proc)(size_t, size_t, size_t)) 306 { 307 static const int list[] = { BEFORE_EOF, UPTO_EOF, ACROSS_EOF, 308 ONEPAST_EOF, FROM_EOF, BEYOND_EOF }; 309 static const int count = sizeof(list) / sizeof(list[0]); 310 int i, where[count]; 311 312 scramble(where, list, count); 313 314 for (i = 0; i < count; i++) 315 do_one_io(where[i], io_proc); 316 } 317 318 /* 319 * Set up an image file of the given size, assign it to a VND, and open the 320 * resulting block device. The size is size_t because we keep a reference copy 321 * of its entire contents in memory. 322 */ 323 static void 324 setup_image(size_t size) 325 { 326 struct part_geom part; 327 size_t off; 328 ssize_t bytes; 329 int fd, status; 330 331 dev_size = size; 332 if ((dev_buf = malloc(dev_size)) == NULL) e(0); 333 if ((dev_ref = malloc(dev_size)) == NULL) e(0); 334 335 if ((fd = open("image", O_CREAT | O_TRUNC | O_RDWR, 0644)) < 0) e(0); 336 337 fill_buf(dev_ref, dev_size); 338 339 for (off = 0; off < dev_size; off += bytes) { 340 bytes = write(fd, &dev_ref[off], dev_size - off); 341 342 if (bytes <= 0) e(0); 343 } 344 345 close(fd); 346 347 status = system(VNCONFIG " vnd0 image 2>/dev/null"); 348 if (!WIFEXITED(status)) e(0); 349 if (WEXITSTATUS(status) != 0) { 350 printf("skipped\n"); /* most likely cause: vnd0 is in use */ 351 cleanup(); 352 exit(0); 353 } 354 355 need_cleanup = 1; 356 357 if ((dev_fd = open("/dev/vnd0", O_RDWR)) < 0) e(0); 358 359 if (ioctl(dev_fd, DIOCGETP, &part) < 0) e(0); 360 361 if (part.size != dev_size) e(0); 362 } 363 364 /* 365 * Clean up the VND we set up previously. This function is also called in case 366 * of an unexpected exit. 367 */ 368 static void 369 cleanup_device(void) 370 { 371 int status; 372 373 if (!need_cleanup) 374 return; 375 376 if (mmap_ptr != NULL) { 377 munmap(mmap_ptr, mmap_size); 378 379 mmap_ptr = NULL; 380 } 381 382 if (dev_fd >= 0) 383 close(dev_fd); 384 385 status = system(VNCONFIG " -u vnd0 2>/dev/null"); 386 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0); 387 388 need_cleanup = 0; 389 } 390 391 /* 392 * Signal handler for exceptions. 393 */ 394 static void 395 got_signal(int __unused sig) 396 { 397 398 cleanup_device(); 399 400 exit(1); 401 } 402 403 /* 404 * Clean up the VND and image file we set up previously. 405 */ 406 static void 407 cleanup_image(void) 408 { 409 size_t off; 410 ssize_t bytes; 411 int fd; 412 413 cleanup_device(); 414 415 if ((fd = open("image", O_RDONLY, 0644)) < 0) e(0); 416 417 for (off = 0; off < dev_size; off += bytes) { 418 bytes = read(fd, &dev_buf[off], dev_size - off); 419 420 if (bytes <= 0) e(0); 421 } 422 423 close(fd); 424 425 /* Have all changes written back to the device? */ 426 if (memcmp(dev_buf, dev_ref, dev_size)) e(0); 427 428 unlink("image"); 429 430 free(dev_buf); 431 free(dev_ref); 432 } 433 434 /* 435 * Run the full test for a block device with the given size. 436 */ 437 static void 438 do_test(size_t size) 439 { 440 int i; 441 442 /* 443 * Using the three I/O primitives (read, write, peek), we run four 444 * sequences, mainly to test the effects of blocks being cached or not. 445 * We set up a new image for each sequence, because -if everything goes 446 * right- closing the device file also clears all cached blocks for it, 447 * in both the root file system's cache and the VM cache. Note that we 448 * currently do not even attempt to push the blocks out of the root FS' 449 * cache in order to test retrieval from the VM cache, since this would 450 * involve doing a LOT of extra I/O. 451 */ 452 for (i = 0; i < 4; i++) { 453 setup_image(size); 454 455 switch (i) { 456 case 0: 457 do_io(io_read); 458 459 /* FALLTHROUGH */ 460 case 1: 461 do_io(io_write); 462 463 do_io(io_read); 464 465 break; 466 467 case 2: 468 do_io(io_peek); 469 470 /* FALLTHROUGH */ 471 472 case 3: 473 do_io(io_write); 474 475 do_io(io_peek); 476 477 break; 478 } 479 480 cleanup_image(); 481 } 482 } 483 484 /* 485 * Test program for end-of-file conditions during block device I/O. 486 */ 487 int 488 main(void) 489 { 490 static const unsigned int blocks[] = { 1, 4, 3, 5, 2 }; 491 struct statvfs buf; 492 int i, j; 493 494 start(85); 495 496 signal(SIGINT, got_signal); 497 signal(SIGABRT, got_signal); 498 signal(SIGSEGV, got_signal); 499 signal(SIGBUS, got_signal); 500 atexit(cleanup_device); 501 502 srand48(time(NULL)); 503 504 if (pipe(pipe_fd) != 0) e(0); 505 506 /* 507 * Get the system page size, and align all memory mapping offsets and 508 * sizes accordingly. 509 */ 510 page_size = sysconf(_SC_PAGESIZE); 511 512 /* 513 * Get the root file system block size. In the current MINIX3 system 514 * architecture, the root file system's block size determines the 515 * transfer granularity for I/O on unmounted block devices. If this 516 * block size is not a multiple of the page size, we are (currently!) 517 * not expecting memory-mapped block devices to work. 518 */ 519 if (statvfs("/", &buf) < 0) e(0); 520 521 block_size = buf.f_bsize; 522 523 test_peek = !(block_size % page_size); 524 525 for (i = 0; i < ITERATIONS; i++) { 526 /* 527 * The 'blocks' array is scrambled so as to detect any blocks 528 * left in the VM cache (or not) across runs, just in case. 529 */ 530 for (j = 0; j < sizeof(blocks) / sizeof(blocks[0]); j++) { 531 do_test(blocks[j] * block_size + SECTOR_SIZE); 532 533 do_test(blocks[j] * block_size); 534 535 do_test(blocks[j] * block_size - SECTOR_SIZE); 536 } 537 } 538 539 quit(); 540 } 541