1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.4 2007/11/20 22:55:40 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_mem_add(hammer_transaction_t trans, 40 hammer_record_t record); 41 static int hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip); 42 static int hammer_mem_search(hammer_cursor_t cursor, hammer_inode_t ip); 43 44 /* 45 * Red-black tree support. 46 */ 47 static int 48 hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) 49 { 50 if (rec1->rec.base.base.rec_type < rec2->rec.base.base.rec_type) 51 return(-1); 52 if (rec1->rec.base.base.rec_type > rec2->rec.base.base.rec_type) 53 return(1); 54 55 if (rec1->rec.base.base.key < rec2->rec.base.base.key) 56 return(-1); 57 if (rec1->rec.base.base.key > rec2->rec.base.base.key) 58 return(1); 59 60 if (rec1->rec.base.base.create_tid < rec2->rec.base.base.create_tid) 61 return(-1); 62 if (rec1->rec.base.base.create_tid > rec2->rec.base.base.create_tid) 63 return(1); 64 return(0); 65 } 66 67 static int 68 hammer_rec_compare(hammer_base_elm_t info, hammer_record_t rec) 69 { 70 /* 71 * A key1->rec_type of 0 matches any record type. 72 */ 73 if (info->rec_type) { 74 if (info->rec_type < rec->rec.base.base.rec_type) 75 return(-3); 76 if (info->rec_type > rec->rec.base.base.rec_type) 77 return(3); 78 } 79 80 /* 81 * There is no special case for key. 0 means 0. 82 */ 83 if (info->key < rec->rec.base.base.key) 84 return(-2); 85 if (info->key > rec->rec.base.base.key) 86 return(2); 87 88 /* 89 * This test has a number of special cases. create_tid in key1 is 90 * the as-of transction id, and delete_tid in key1 is NOT USED. 91 * 92 * A key1->create_tid of 0 matches any record regardles of when 93 * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be 94 * used to search for the most current state of the object. 95 * 96 * key2->create_tid is a HAMMER record and will never be 97 * 0. key2->delete_tid is the deletion transaction id or 0 if 98 * the record has not yet been deleted. 99 */ 100 if (info->create_tid) { 101 if (info->create_tid < rec->rec.base.base.create_tid) 102 return(-1); 103 if (rec->rec.base.base.delete_tid && 104 info->create_tid >= rec->rec.base.base.delete_tid) { 105 return(1); 106 } 107 } 108 return(0); 109 } 110 111 /* 112 * RB_SCAN comparison code for hammer_mem_search(). The argument order 113 * is reversed so the comparison result has to be negated. key_beg and 114 * key_end are both inclusive boundaries. 115 */ 116 static 117 int 118 hammer_rec_scan_cmp(hammer_record_t rec, void *data) 119 { 120 hammer_cursor_t cursor = data; 121 int r; 122 123 r = hammer_rec_compare(&cursor->key_beg, rec); 124 if (r > 0) 125 return(-1); 126 if (r == 0) 127 return(0); 128 r = hammer_rec_compare(&cursor->key_end, rec); 129 if (r <= 0) 130 return(1); 131 return(0); 132 } 133 134 RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare); 135 RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, 136 hammer_rec_compare, hammer_base_elm_t); 137 138 /* 139 * Allocate a record for the caller to finish filling in 140 */ 141 hammer_record_t 142 hammer_alloc_mem_record(struct hammer_transaction *trans, hammer_inode_t ip) 143 { 144 hammer_record_t record; 145 146 record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); 147 record->ip = ip; 148 return (record); 149 } 150 151 /* 152 * Release a memory record. If the record is marked for defered deletion, 153 * destroy the record when the last reference goes away. 154 */ 155 void 156 hammer_rel_mem_record(struct hammer_record **recordp) 157 { 158 hammer_record_t rec; 159 160 if ((rec = *recordp) != NULL) { 161 if (hammer_islastref(&rec->lock)) { 162 hammer_unref(&rec->lock); 163 if (rec->flags & HAMMER_RECF_DELETED) 164 hammer_free_mem_record(rec); 165 } else { 166 hammer_unref(&rec->lock); 167 } 168 *recordp = NULL; 169 } 170 } 171 172 /* 173 * Free a record. Clean the structure up even though we are throwing it 174 * away as a sanity check. The actual free operation is delayed while 175 * the record is referenced. However, the record is removed from the RB 176 * tree immediately. 177 */ 178 void 179 hammer_free_mem_record(hammer_record_t record) 180 { 181 if (record->flags & HAMMER_RECF_ONRBTREE) { 182 RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, record); 183 record->flags &= ~HAMMER_RECF_ONRBTREE; 184 } 185 if (record->lock.refs) { 186 record->flags |= HAMMER_RECF_DELETED; 187 return; 188 } 189 if (record->flags & HAMMER_RECF_ALLOCDATA) { 190 kfree(record->data, M_HAMMER); 191 record->flags &= ~HAMMER_RECF_ALLOCDATA; 192 } 193 record->data = NULL; 194 kfree(record, M_HAMMER); 195 } 196 197 /* 198 * Lookup an in-memory record given the key specified in the cursor. Works 199 * just like hammer_btree_lookup() but operates on an inode's in-memory 200 * record list. 201 * 202 * The lookup must fail if the record is marked for deferred deletion. 203 */ 204 static 205 int 206 hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip) 207 { 208 int error; 209 210 if (cursor->iprec) 211 hammer_rel_mem_record(&cursor->iprec); 212 if (cursor->ip) { 213 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 214 &cursor->ip->rec_tree); 215 } 216 cursor->ip = ip; 217 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 218 cursor->scan.node = NULL; 219 cursor->iprec = hammer_rec_rb_tree_RB_LOOKUP_INFO( 220 &ip->rec_tree, &cursor->key_beg); 221 if (cursor->iprec == NULL) { 222 error = ENOENT; 223 } else { 224 hammer_ref(&cursor->iprec->lock); 225 error = 0; 226 } 227 return(error); 228 } 229 230 /* 231 * hammer_mem_search() - locate the first in-memory record matching the 232 * cursor. 233 * 234 * The RB_SCAN function we use is designed as a callback. We terminate it 235 * (return -1) as soon as we get a match. 236 */ 237 static 238 int 239 hammer_rec_scan_callback(hammer_record_t rec, void *data) 240 { 241 hammer_cursor_t cursor = data; 242 243 if (cursor->iprec == NULL) { 244 cursor->iprec = rec; 245 hammer_ref(&rec->lock); 246 return(-1); 247 } 248 return(0); 249 } 250 251 static 252 int 253 hammer_mem_search(hammer_cursor_t cursor, hammer_inode_t ip) 254 { 255 if (cursor->iprec) 256 hammer_rel_mem_record(&cursor->iprec); 257 if (cursor->ip) { 258 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 259 &cursor->ip->rec_tree); 260 } 261 cursor->ip = ip; 262 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 263 cursor->scan.node = NULL; 264 hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_scan_cmp, 265 hammer_rec_scan_callback, cursor); 266 if (cursor->iprec) { 267 cursor->scan.node = hammer_rec_rb_tree_RB_NEXT(cursor->iprec); 268 return(0); 269 } 270 return(ENOENT); 271 } 272 273 void 274 hammer_mem_done(hammer_cursor_t cursor) 275 { 276 if (cursor->ip) { 277 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 278 &cursor->ip->rec_tree); 279 cursor->ip = NULL; 280 } 281 if (cursor->iprec) 282 hammer_rel_mem_record(&cursor->iprec); 283 } 284 285 /************************************************************************ 286 * HAMMER IN-MEMORY RECORD FUNCTIONS * 287 ************************************************************************ 288 * 289 * These functions manipulate in-memory records. Such records typically 290 * exist prior to being committed to disk or indexed via the on-disk B-Tree. 291 */ 292 293 /* 294 * Add a directory entry (dip,ncp) which references inode (ip). 295 * 296 * Note that the low 32 bits of the namekey are set temporarily to create 297 * a unique in-memory record, and may be modified a second time when the 298 * record is synchronized to disk. In particular, the low 32 bits cannot be 299 * all 0's when synching to disk, which is not handled here. 300 */ 301 int 302 hammer_ip_add_directory(struct hammer_transaction *trans, 303 struct hammer_inode *dip, struct namecache *ncp, 304 struct hammer_inode *ip) 305 { 306 hammer_record_t record; 307 int error; 308 int bytes; 309 310 record = hammer_alloc_mem_record(trans, dip); 311 312 bytes = ncp->nc_nlen; /* NOTE: terminating \0 is NOT included */ 313 if (++trans->hmp->namekey_iterator == 0) 314 ++trans->hmp->namekey_iterator; 315 316 record->rec.entry.base.base.obj_id = dip->obj_id; 317 record->rec.entry.base.base.key = 318 hammer_directory_namekey(ncp->nc_name, bytes); 319 record->rec.entry.base.base.key += trans->hmp->namekey_iterator; 320 record->rec.entry.base.base.create_tid = trans->tid; 321 record->rec.entry.base.base.rec_type = HAMMER_RECTYPE_DIRENTRY; 322 record->rec.entry.base.base.obj_type = ip->ino_rec.base.base.obj_type; 323 record->rec.entry.obj_id = ip->obj_id; 324 if (bytes <= sizeof(record->rec.entry.den_name)) { 325 record->data = (void *)record->rec.entry.den_name; 326 } else { 327 record->data = kmalloc(bytes, M_HAMMER, M_WAITOK); 328 record->flags |= HAMMER_RECF_ALLOCDATA; 329 } 330 bcopy(ncp->nc_name, record->data, bytes); 331 record->rec.entry.base.data_len = bytes; 332 ++ip->ino_rec.ino_nlinks; 333 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 334 error = hammer_mem_add(trans, record); 335 return(error); 336 } 337 338 /* 339 * Delete the directory entry and update the inode link count. The 340 * cursor must be seeked to the directory entry record being deleted. 341 * 342 * NOTE: HAMMER_CURSOR_DELETE may not have been set. XXX remove flag. 343 */ 344 int 345 hammer_ip_del_directory(struct hammer_transaction *trans, 346 hammer_cursor_t cursor, struct hammer_inode *dip, 347 struct hammer_inode *ip) 348 { 349 int error; 350 351 if (cursor->record == &cursor->iprec->rec) { 352 /* 353 * The directory entry was in-memory, just scrap the 354 * record. 355 */ 356 hammer_free_mem_record(cursor->iprec); 357 error = 0; 358 } else { 359 /* 360 * The directory entry was on-disk, mark the record and 361 * B-Tree entry as deleted. The B-Tree entry does not 362 * have to be reindexed because a 'current' delete transid 363 * will wind up in the same position as the live record. 364 */ 365 KKASSERT(ip->flags & HAMMER_INODE_ONDISK); 366 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 367 if (error == 0) { 368 cursor->node->ondisk->elms[cursor->index].base.delete_tid = trans->tid; 369 cursor->record->base.base.delete_tid = trans->tid; 370 hammer_modify_node(cursor->node); 371 hammer_modify_buffer(cursor->record_buffer); 372 373 } 374 } 375 376 /* 377 * One less link. Mark the inode and all of its records as deleted 378 * when the last link goes away. The inode will be automatically 379 * flushed when its last reference goes away. 380 */ 381 if (error == 0) { 382 --ip->ino_rec.ino_nlinks; 383 if (ip->ino_rec.ino_nlinks == 0) 384 ip->ino_rec.base.base.delete_tid = trans->tid; 385 error = hammer_ip_delete_range(trans, ip, 386 HAMMER_MIN_KEY, HAMMER_MAX_KEY); 387 KKASSERT(RB_EMPTY(&ip->rec_tree)); 388 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 389 } 390 return(error); 391 } 392 393 /* 394 * Add a data record to the filesystem. 395 * 396 * This is called via the strategy code, typically when the kernel wants to 397 * flush a buffer cache buffer, so this operation goes directly to the disk. 398 */ 399 int 400 hammer_ip_add_data(hammer_transaction_t trans, hammer_inode_t ip, 401 int64_t offset, void *data, int bytes) 402 { 403 panic("hammer_ip_add_data"); 404 } 405 406 /* 407 * Add the record to the inode's rec_tree. The low 32 bits of a directory 408 * entry's key is used to deal with hash collisions in the upper 32 bits. 409 * A unique 64 bit key is generated in-memory and may be regenerated a 410 * second time when the directory record is flushed to the on-disk B-Tree. 411 */ 412 static 413 int 414 hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) 415 { 416 while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { 417 if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){ 418 hammer_free_mem_record(record); 419 return (EEXIST); 420 } 421 if (++trans->hmp->namekey_iterator == 0) 422 ++trans->hmp->namekey_iterator; 423 record->rec.base.base.key &= ~(0xFFFFFFFFLL); 424 record->rec.base.base.key |= trans->hmp->namekey_iterator; 425 } 426 record->flags |= HAMMER_RECF_ONRBTREE; 427 return(0); 428 } 429 430 /************************************************************************ 431 * HAMMER INODE MERGED-RECORD FUNCTIONS * 432 ************************************************************************ 433 * 434 * These functions augment the B-Tree scanning functions in hammer_btree.c 435 * by merging in-memory records with on-disk records. 436 */ 437 438 /* 439 * Locate a particular record either in-memory or on-disk. 440 * 441 * NOTE: This is basically a standalone routine, hammer_ip_next() may 442 * NOT be called to iterate results. 443 */ 444 int 445 hammer_ip_lookup(hammer_cursor_t cursor, struct hammer_inode *ip) 446 { 447 int error; 448 449 /* 450 * If the element is in-memory return it without searching the 451 * on-disk B-Tree 452 */ 453 error = hammer_mem_lookup(cursor, ip); 454 if (error == 0) { 455 cursor->record = &cursor->iprec->rec; 456 return(error); 457 } 458 if (error != ENOENT) 459 return(error); 460 461 /* 462 * If the inode has on-disk components search the on-disk B-Tree. 463 */ 464 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) 465 return(error); 466 error = hammer_btree_lookup(cursor); 467 if (error == 0) 468 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 469 return(error); 470 } 471 472 /* 473 * Locate the first record within the cursor's key_beg/key_end range, 474 * restricted to a particular inode. 0 is returned on success, ENOENT 475 * if no records matched the requested range, or some other error. 476 * 477 * When 0 is returned hammer_ip_next() may be used to iterate additional 478 * records within the requested range. 479 */ 480 int 481 hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) 482 { 483 int error; 484 485 /* 486 * Clean up fields and setup for merged scan 487 */ 488 cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM; 489 cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; 490 if (cursor->iprec) 491 hammer_rel_mem_record(&cursor->iprec); 492 493 /* 494 * Search the on-disk B-Tree 495 */ 496 if (ip->flags & HAMMER_INODE_ONDISK) { 497 error = hammer_btree_lookup(cursor); 498 if (error && error != ENOENT) 499 return(error); 500 if (error == 0) { 501 cursor->flags &= ~HAMMER_CURSOR_DISKEOF ; 502 cursor->flags &= ~HAMMER_CURSOR_ATEDISK ; 503 } 504 } 505 506 /* 507 * Search the in-memory record list (Red-Black tree) 508 */ 509 error = hammer_mem_search(cursor, ip); 510 if (error && error != ENOENT) 511 return(error); 512 if (error == 0) { 513 cursor->flags &= ~HAMMER_CURSOR_MEMEOF; 514 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 515 } 516 517 /* 518 * This will return the first matching record. 519 */ 520 return(hammer_ip_next(cursor)); 521 } 522 523 /* 524 * Retrieve the next record in a merged iteration within the bounds of the 525 * cursor. This call may be made multiple times after the cursor has been 526 * initially searched with hammer_ip_first(). 527 * 528 * 0 is returned on success, ENOENT if no further records match the 529 * requested range, or some other error code is returned. 530 */ 531 int 532 hammer_ip_next(hammer_cursor_t cursor) 533 { 534 hammer_btree_elm_t elm; 535 hammer_record_t rec; 536 int error; 537 int r; 538 539 /* 540 * Load the current on-disk and in-memory record. If we ate any 541 * records we have to get the next one. 542 * 543 * Get the next on-disk record 544 */ 545 if (cursor->flags & HAMMER_CURSOR_ATEDISK) { 546 if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { 547 error = hammer_btree_iterate(cursor); 548 if (error == 0) 549 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 550 else 551 cursor->flags |= HAMMER_CURSOR_DISKEOF; 552 } 553 } 554 555 /* 556 * Get the next in-memory record. The record can be ripped out 557 * of the RB tree so we maintain a scan_info structure to track 558 * the next node. 559 */ 560 if (cursor->flags & HAMMER_CURSOR_ATEMEM) { 561 if ((cursor->flags & HAMMER_CURSOR_MEMEOF) == 0) { 562 rec = cursor->scan.node; /* next node */ 563 if (rec) { 564 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 565 hammer_ref(&rec->lock); 566 cursor->scan.node = 567 hammer_rec_rb_tree_RB_NEXT(rec); 568 } else { 569 cursor->flags |= HAMMER_CURSOR_MEMEOF; 570 } 571 hammer_rel_mem_record(&cursor->iprec); 572 cursor->iprec = rec; 573 } 574 } 575 576 /* 577 * Extract either the disk or memory record depending on their 578 * relative position. 579 */ 580 error = 0; 581 switch(cursor->flags & (HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM)) { 582 case 0: 583 /* 584 * Both entries valid 585 */ 586 elm = &cursor->node->ondisk->elms[cursor->index]; 587 r = hammer_btree_cmp(&elm->base, 588 &cursor->iprec->rec.base.base); 589 if (r < 0) { 590 error = hammer_btree_extract(cursor, 591 HAMMER_CURSOR_GET_RECORD); 592 cursor->flags |= HAMMER_CURSOR_ATEDISK; 593 break; 594 } 595 /* fall through to the memory entry */ 596 case HAMMER_CURSOR_ATEDISK: 597 /* 598 * Only the memory entry is valid 599 */ 600 cursor->record = &cursor->iprec->rec; 601 cursor->flags |= HAMMER_CURSOR_ATEMEM; 602 break; 603 case HAMMER_CURSOR_ATEMEM: 604 /* 605 * Only the disk entry is valid 606 */ 607 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 608 cursor->flags |= HAMMER_CURSOR_ATEDISK; 609 break; 610 default: 611 /* 612 * Neither entry is valid 613 * 614 * XXX error not set properly 615 */ 616 cursor->record = NULL; 617 error = ENOENT; 618 break; 619 } 620 return(error); 621 } 622 623 /* 624 * Resolve the cursor->data pointer for the current cursor position in 625 * a merged iteration. 626 */ 627 int 628 hammer_ip_resolve_data(hammer_cursor_t cursor) 629 { 630 int error; 631 632 if (cursor->iprec && cursor->record == &cursor->iprec->rec) { 633 cursor->data = cursor->iprec->data; 634 error = 0; 635 } else { 636 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA); 637 } 638 return(error); 639 } 640 641 /* 642 * Delete all records within the specified range for inode ip. 643 * 644 * NOTE: An unaligned range will cause new records to be added to cover 645 * the edge cases. 646 * 647 * NOTE: ran_end is inclusive (e.g. 0,1023 instead of 0,1024). 648 */ 649 int 650 hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, 651 int64_t ran_beg, int64_t ran_end) 652 { 653 struct hammer_cursor cursor; 654 hammer_record_ondisk_t rec; 655 hammer_base_elm_t base; 656 int error; 657 int64_t off; 658 659 hammer_init_cursor_ip(&cursor, ip); 660 661 cursor.key_beg.obj_id = ip->obj_id; 662 cursor.key_beg.create_tid = ip->obj_asof; 663 cursor.key_beg.delete_tid = 0; 664 cursor.key_beg.obj_type = 0; 665 cursor.key_beg.key = ran_beg; 666 cursor.key_end = cursor.key_beg; 667 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { 668 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 669 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 670 cursor.key_end.key = ran_end; 671 } else { 672 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 673 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 674 if (ran_end + MAXPHYS < ran_end) 675 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 676 else 677 cursor.key_end.key = ran_end + MAXPHYS; 678 } 679 680 error = hammer_ip_first(&cursor, ip); 681 682 /* 683 * Iterate through matching records and mark them as deleted. 684 */ 685 while (error == 0) { 686 rec = cursor.record; 687 base = &rec->base.base; 688 689 KKASSERT(base->delete_tid == 0); 690 691 /* 692 * There may be overlap cases for regular file data. Also 693 * remember the key for a regular file record is the offset 694 * of the last byte of the record (base + len - 1), NOT the 695 * base offset. 696 */ 697 if (base->rec_type == HAMMER_RECTYPE_DATA) { 698 off = base->key - rec->base.data_len + 1; 699 /* 700 * Check the left edge case 701 */ 702 if (off < ran_beg) { 703 panic("hammer left edge case\n"); 704 } 705 706 /* 707 * Check the right edge case. Note that the 708 * record can be completely out of bounds, which 709 * terminates the search. 710 * 711 * base->key is (base_offset + bytes - 1), ran_end 712 * works the same way. 713 */ 714 if (base->key > ran_end) { 715 if (base->key - rec->base.data_len + 1 > ran_end) { 716 kprintf("right edge OOB\n"); 717 break; 718 } 719 panic("hammer right edge case\n"); 720 } 721 } 722 723 /* 724 * Mark the record and B-Tree entry as deleted 725 */ 726 if (cursor.record == &cursor.iprec->rec) { 727 hammer_free_mem_record(cursor.iprec); 728 729 } else { 730 cursor.node->ondisk->elms[cursor.index].base.delete_tid = trans->tid; 731 cursor.record->base.base.delete_tid = trans->tid; 732 hammer_modify_node(cursor.node); 733 hammer_modify_buffer(cursor.record_buffer); 734 } 735 error = hammer_ip_next(&cursor); 736 } 737 hammer_done_cursor(&cursor); 738 if (error == ENOENT) 739 error = 0; 740 return(error); 741 } 742 743