1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.7 2008/03/20 06:08:40 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, 40 struct hammer_ioc_prune *prune); 41 static int hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip, 42 struct hammer_ioc_history *hist); 43 44 int 45 hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, 46 struct ucred *cred) 47 { 48 struct hammer_transaction trans; 49 int error; 50 51 error = suser_cred(cred, PRISON_ROOT); 52 53 hammer_start_transaction(&trans, ip->hmp); 54 55 switch(com) { 56 case HAMMERIOC_PRUNE: 57 if (error == 0) { 58 error = hammer_ioc_prune(&trans, ip, 59 (struct hammer_ioc_prune *)data); 60 } 61 break; 62 case HAMMERIOC_GETHISTORY: 63 error = hammer_ioc_gethistory(&trans, ip, 64 (struct hammer_ioc_history *)data); 65 break; 66 case HAMMERIOC_REBLOCK: 67 error = hammer_ioc_reblock(&trans, ip, 68 (struct hammer_ioc_reblock *)data); 69 break; 70 default: 71 error = EOPNOTSUPP; 72 break; 73 } 74 hammer_commit_transaction(&trans); 75 return (error); 76 } 77 78 /* 79 * Iterate through the specified range of object ids and remove any 80 * deleted records that fall entirely within a prune modulo. 81 * 82 * A reverse iteration is used to prevent overlapping records from being 83 * created during the iteration due to alignments. This also allows us 84 * to adjust alignments without blowing up the B-Tree. 85 */ 86 static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, 87 int *realign_cre, int *realign_del); 88 static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor, 89 int realign_cre, int realign_del); 90 91 static int 92 hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, 93 struct hammer_ioc_prune *prune) 94 { 95 struct hammer_cursor cursor; 96 hammer_btree_elm_t elm; 97 int error; 98 int isdir; 99 int realign_cre; 100 int realign_del; 101 102 if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS) 103 return(EINVAL); 104 if (prune->beg_obj_id >= prune->end_obj_id) 105 return(EINVAL); 106 if ((prune->flags & HAMMER_IOC_PRUNE_ALL) && prune->nelms) 107 return(EINVAL); 108 109 retry: 110 error = hammer_init_cursor(trans, &cursor, NULL); 111 if (error) { 112 hammer_done_cursor(&cursor); 113 return(error); 114 } 115 cursor.key_beg.obj_id = prune->beg_obj_id; 116 cursor.key_beg.key = HAMMER_MIN_KEY; 117 cursor.key_beg.create_tid = 1; 118 cursor.key_beg.delete_tid = 0; 119 cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; 120 cursor.key_beg.obj_type = 0; 121 122 cursor.key_end.obj_id = prune->cur_obj_id; 123 cursor.key_end.key = prune->cur_key; 124 cursor.key_end.create_tid = HAMMER_MAX_TID - 1; 125 cursor.key_end.delete_tid = 0; 126 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 127 cursor.key_end.obj_type = 0; 128 129 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 130 131 error = hammer_btree_last(&cursor); 132 while (error == 0) { 133 elm = &cursor.node->ondisk->elms[cursor.index]; 134 prune->cur_obj_id = elm->base.obj_id; 135 prune->cur_key = elm->base.key; 136 137 if (prune->stat_oldest_tid > elm->leaf.base.create_tid) 138 prune->stat_oldest_tid = elm->leaf.base.create_tid; 139 140 if (check_prune(prune, elm, &realign_cre, &realign_del) == 0) { 141 if (hammer_debug_general & 0x0200) { 142 kprintf("check %016llx %016llx: DELETE\n", 143 elm->base.obj_id, elm->base.key); 144 } 145 146 /* 147 * NOTE: This can return EDEADLK 148 */ 149 isdir = (elm->base.rec_type == HAMMER_RECTYPE_DIRENTRY); 150 151 error = hammer_delete_at_cursor(&cursor, 152 &prune->stat_bytes); 153 if (error) 154 break; 155 156 if (isdir) 157 ++prune->stat_dirrecords; 158 else 159 ++prune->stat_rawrecords; 160 } else if (realign_cre >= 0 || realign_del >= 0) { 161 error = realign_prune(prune, &cursor, 162 realign_cre, realign_del); 163 if (error == 0) { 164 cursor.flags |= HAMMER_CURSOR_ATEDISK; 165 if (hammer_debug_general & 0x0200) { 166 kprintf("check %016llx %016llx: " 167 "REALIGN\n", 168 elm->base.obj_id, 169 elm->base.key); 170 } 171 } 172 } else { 173 cursor.flags |= HAMMER_CURSOR_ATEDISK; 174 if (hammer_debug_general & 0x0100) { 175 kprintf("check %016llx %016llx: SKIP\n", 176 elm->base.obj_id, elm->base.key); 177 } 178 } 179 error = hammer_signal_check(trans->hmp); 180 if (error == 0) 181 error = hammer_btree_iterate_reverse(&cursor); 182 } 183 if (error == ENOENT) 184 error = 0; 185 hammer_done_cursor(&cursor); 186 if (error == EDEADLK) 187 goto retry; 188 return(error); 189 } 190 191 /* 192 * Check pruning list. The list must be sorted in descending order. 193 */ 194 static int 195 check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, 196 int *realign_cre, int *realign_del) 197 { 198 struct hammer_ioc_prune_elm *scan; 199 int i; 200 201 *realign_cre = -1; 202 *realign_del = -1; 203 204 /* 205 * If pruning everything remove all records with a non-zero 206 * delete_tid. 207 */ 208 if (prune->flags & HAMMER_IOC_PRUNE_ALL) { 209 if (elm->base.delete_tid != 0) 210 return(0); 211 return(-1); 212 } 213 214 for (i = 0; i < prune->nelms; ++i) { 215 scan = &prune->elms[i]; 216 217 /* 218 * Locate the scan index covering the create and delete TIDs. 219 */ 220 if (*realign_cre < 0 && 221 elm->base.create_tid >= scan->beg_tid && 222 elm->base.create_tid < scan->end_tid) { 223 *realign_cre = i; 224 } 225 if (*realign_del < 0 && elm->base.delete_tid && 226 elm->base.delete_tid > scan->beg_tid && 227 elm->base.delete_tid <= scan->end_tid) { 228 *realign_del = i; 229 } 230 231 /* 232 * Now check for loop termination. 233 */ 234 if (elm->base.create_tid >= scan->end_tid || 235 elm->base.delete_tid > scan->end_tid) { 236 break; 237 } 238 239 /* 240 * Now determine if we can delete the record. 241 */ 242 if (elm->base.delete_tid && 243 elm->base.create_tid >= scan->beg_tid && 244 elm->base.delete_tid <= scan->end_tid && 245 elm->base.create_tid / scan->mod_tid == 246 elm->base.delete_tid / scan->mod_tid) { 247 return(0); 248 } 249 } 250 return(-1); 251 } 252 253 /* 254 * Align the record to cover any gaps created through the deletion of 255 * records within the pruning space. If we were to just delete the records 256 * there would be gaps which in turn would cause a snapshot that is NOT on 257 * a pruning boundary to appear corrupt to the user. Forcing alignment 258 * of the create_tid and delete_tid for retained records 'reconnects' 259 * the previously contiguous space, making it contiguous again after the 260 * deletions. 261 * 262 * The use of a reverse iteration allows us to safely align the records and 263 * related elements without creating temporary overlaps. XXX we should 264 * add ordering dependancies for record buffers to guarantee consistency 265 * during recovery. 266 */ 267 static int 268 realign_prune(struct hammer_ioc_prune *prune, 269 hammer_cursor_t cursor, int realign_cre, int realign_del) 270 { 271 hammer_btree_elm_t elm; 272 hammer_tid_t delta; 273 hammer_tid_t mod; 274 hammer_tid_t tid; 275 int error; 276 277 hammer_cursor_downgrade(cursor); 278 279 elm = &cursor->node->ondisk->elms[cursor->index]; 280 ++prune->stat_realignments; 281 282 /* 283 * Align the create_tid. By doing a reverse iteration we guarantee 284 * that all records after our current record have already been 285 * aligned, allowing us to safely correct the right-hand-boundary 286 * (because no record to our right if otherwise exactly matching 287 * will have a create_tid to the left of our aligned create_tid). 288 * 289 * Ordering is important here XXX but disk write ordering for 290 * inter-cluster corrections is not currently guaranteed. 291 */ 292 error = 0; 293 if (realign_cre >= 0) { 294 mod = prune->elms[realign_cre].mod_tid; 295 delta = elm->leaf.base.create_tid % mod; 296 if (delta) { 297 tid = elm->leaf.base.create_tid - delta + mod; 298 299 /* can EDEADLK */ 300 error = hammer_btree_correct_rhb(cursor, tid + 1); 301 if (error == 0) { 302 error = hammer_btree_extract(cursor, 303 HAMMER_CURSOR_GET_RECORD); 304 } 305 if (error == 0) { 306 /* can EDEADLK */ 307 error = hammer_cursor_upgrade(cursor); 308 } 309 if (error == 0) { 310 hammer_modify_buffer(cursor->trans, 311 cursor->record_buffer, 312 NULL, 0); 313 cursor->record->base.base.create_tid = tid; 314 hammer_modify_node(cursor->trans, cursor->node, 315 elm, sizeof(*elm)); 316 elm->leaf.base.create_tid = tid; 317 } 318 } 319 } 320 321 /* 322 * Align the delete_tid. This only occurs if the record is historical 323 * was deleted at some point. Realigning the delete_tid does not 324 * move the record within the B-Tree but may cause it to temporarily 325 * overlap a record that has not yet been pruned. 326 */ 327 if (error == 0 && realign_del >= 0) { 328 mod = prune->elms[realign_del].mod_tid; 329 delta = elm->leaf.base.delete_tid % mod; 330 hammer_modify_node(cursor->trans, cursor->node, 331 elm, sizeof(*elm)); 332 if (delta) { 333 error = hammer_btree_extract(cursor, 334 HAMMER_CURSOR_GET_RECORD); 335 if (error == 0) { 336 elm->leaf.base.delete_tid = 337 elm->leaf.base.delete_tid - 338 delta + mod; 339 hammer_modify_buffer(cursor->trans, cursor->record_buffer, &cursor->record->base.base.delete_tid, sizeof(hammer_tid_t)); 340 cursor->record->base.base.delete_tid = 341 elm->leaf.base.delete_tid; 342 } 343 } 344 } 345 return (error); 346 } 347 348 /* 349 * Iterate through an object's inode or an object's records and record 350 * modification TIDs. 351 */ 352 static void add_history(hammer_inode_t ip, struct hammer_ioc_history *hist, 353 hammer_btree_elm_t elm); 354 355 static 356 int 357 hammer_ioc_gethistory(hammer_transaction_t trans, hammer_inode_t ip, 358 struct hammer_ioc_history *hist) 359 { 360 struct hammer_cursor cursor; 361 hammer_btree_elm_t elm; 362 int error; 363 364 /* 365 * Validate the structure and initialize for return. 366 */ 367 if (hist->beg_tid > hist->end_tid) 368 return(EINVAL); 369 if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 370 if (hist->key > hist->nxt_key) 371 return(EINVAL); 372 } 373 374 hist->obj_id = ip->obj_id; 375 hist->count = 0; 376 hist->nxt_tid = hist->end_tid; 377 hist->flags &= ~HAMMER_IOC_HISTORY_NEXT_TID; 378 hist->flags &= ~HAMMER_IOC_HISTORY_NEXT_KEY; 379 hist->flags &= ~HAMMER_IOC_HISTORY_EOF; 380 hist->flags &= ~HAMMER_IOC_HISTORY_UNSYNCED; 381 if ((ip->flags & HAMMER_INODE_MODMASK) & ~HAMMER_INODE_ITIMES) 382 hist->flags |= HAMMER_IOC_HISTORY_UNSYNCED; 383 384 /* 385 * Setup the cursor. We can't handle undeletable records 386 * (create_tid of 0) at the moment. A create_tid of 0 has 387 * a special meaning and cannot be specified in the cursor. 388 */ 389 error = hammer_init_cursor(trans, &cursor, &ip->cache[0]); 390 if (error) { 391 hammer_done_cursor(&cursor); 392 return(error); 393 } 394 395 cursor.key_beg.obj_id = hist->obj_id; 396 cursor.key_beg.create_tid = hist->beg_tid; 397 cursor.key_beg.delete_tid = 0; 398 cursor.key_beg.obj_type = 0; 399 if (cursor.key_beg.create_tid == HAMMER_MIN_TID) 400 cursor.key_beg.create_tid = 1; 401 402 cursor.key_end.obj_id = hist->obj_id; 403 cursor.key_end.create_tid = hist->end_tid; 404 cursor.key_end.delete_tid = 0; 405 cursor.key_end.obj_type = 0; 406 407 cursor.flags |= HAMMER_CURSOR_END_EXCLUSIVE; 408 409 if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 410 /* 411 * key-range within the file. For a regular file the 412 * on-disk key represents BASE+LEN, not BASE, so the 413 * first possible record containing the offset 'key' 414 * has an on-disk key of (key + 1). 415 */ 416 cursor.key_beg.key = hist->key; 417 cursor.key_end.key = HAMMER_MAX_KEY; 418 419 switch(ip->ino_rec.base.base.obj_type) { 420 case HAMMER_OBJTYPE_REGFILE: 421 ++cursor.key_beg.key; 422 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 423 break; 424 case HAMMER_OBJTYPE_DIRECTORY: 425 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY; 426 break; 427 case HAMMER_OBJTYPE_DBFILE: 428 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 429 break; 430 default: 431 error = EINVAL; 432 break; 433 } 434 cursor.key_end.rec_type = cursor.key_beg.rec_type; 435 } else { 436 /* 437 * The inode itself. 438 */ 439 cursor.key_beg.key = 0; 440 cursor.key_end.key = 0; 441 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; 442 cursor.key_end.rec_type = HAMMER_RECTYPE_INODE; 443 } 444 445 error = hammer_btree_first(&cursor); 446 while (error == 0) { 447 elm = &cursor.node->ondisk->elms[cursor.index]; 448 449 add_history(ip, hist, elm); 450 if (hist->flags & (HAMMER_IOC_HISTORY_NEXT_TID | 451 HAMMER_IOC_HISTORY_NEXT_KEY | 452 HAMMER_IOC_HISTORY_EOF)) { 453 break; 454 } 455 error = hammer_btree_iterate(&cursor); 456 } 457 if (error == ENOENT) { 458 hist->flags |= HAMMER_IOC_HISTORY_EOF; 459 error = 0; 460 } 461 hammer_done_cursor(&cursor); 462 return(error); 463 } 464 465 /* 466 * Add the scanned element to the ioctl return structure. Some special 467 * casing is required for regular files to accomodate how data ranges are 468 * stored on-disk. 469 */ 470 static void 471 add_history(hammer_inode_t ip, struct hammer_ioc_history *hist, 472 hammer_btree_elm_t elm) 473 { 474 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 475 return; 476 if ((hist->flags & HAMMER_IOC_HISTORY_ATKEY) && 477 ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_REGFILE) { 478 /* 479 * Adjust nxt_key 480 */ 481 if (hist->nxt_key > elm->leaf.base.key - elm->leaf.data_len && 482 hist->key < elm->leaf.base.key - elm->leaf.data_len) { 483 hist->nxt_key = elm->leaf.base.key - elm->leaf.data_len; 484 } 485 if (hist->nxt_key > elm->leaf.base.key) 486 hist->nxt_key = elm->leaf.base.key; 487 488 /* 489 * Record is beyond MAXPHYS, there won't be any more records 490 * in the iteration covering the requested offset (key). 491 */ 492 if (elm->leaf.base.key >= MAXPHYS && 493 elm->leaf.base.key - MAXPHYS > hist->key) { 494 hist->flags |= HAMMER_IOC_HISTORY_NEXT_KEY; 495 } 496 497 /* 498 * Data-range of record does not cover the key. 499 */ 500 if (elm->leaf.base.key - elm->leaf.data_len > hist->key) 501 return; 502 503 } else if (hist->flags & HAMMER_IOC_HISTORY_ATKEY) { 504 /* 505 * Adjust nxt_key 506 */ 507 if (hist->nxt_key > elm->leaf.base.key && 508 hist->key < elm->leaf.base.key) { 509 hist->nxt_key = elm->leaf.base.key; 510 } 511 512 /* 513 * Record is beyond the requested key. 514 */ 515 if (elm->leaf.base.key > hist->key) 516 hist->flags |= HAMMER_IOC_HISTORY_NEXT_KEY; 517 } 518 519 /* 520 * Add create_tid if it is in-bounds. 521 */ 522 if ((hist->count == 0 || 523 elm->leaf.base.create_tid != hist->tid_ary[hist->count - 1]) && 524 elm->leaf.base.create_tid >= hist->beg_tid && 525 elm->leaf.base.create_tid < hist->end_tid) { 526 if (hist->count == HAMMER_MAX_HISTORY_ELMS) { 527 hist->nxt_tid = elm->leaf.base.create_tid; 528 hist->flags |= HAMMER_IOC_HISTORY_NEXT_TID; 529 return; 530 } 531 hist->tid_ary[hist->count++] = elm->leaf.base.create_tid; 532 } 533 534 /* 535 * Add delete_tid if it is in-bounds. Note that different portions 536 * of the history may have overlapping data ranges with different 537 * delete_tid's. If this case occurs the delete_tid may match the 538 * create_tid of a following record. XXX 539 * 540 * [ ] 541 * [ ] 542 */ 543 if (elm->leaf.base.delete_tid && 544 elm->leaf.base.delete_tid >= hist->beg_tid && 545 elm->leaf.base.delete_tid < hist->end_tid) { 546 if (hist->count == HAMMER_MAX_HISTORY_ELMS) { 547 hist->nxt_tid = elm->leaf.base.delete_tid; 548 hist->flags |= HAMMER_IOC_HISTORY_NEXT_TID; 549 return; 550 } 551 hist->tid_ary[hist->count++] = elm->leaf.base.delete_tid; 552 } 553 } 554 555