1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.15 2008/01/03 06:48:49 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 static int hammer_mem_add(hammer_transaction_t trans, 40 hammer_record_t record); 41 static int hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip); 42 static int hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip); 43 44 /* 45 * Red-black tree support. 46 */ 47 static int 48 hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) 49 { 50 if (rec1->rec.base.base.rec_type < rec2->rec.base.base.rec_type) 51 return(-1); 52 if (rec1->rec.base.base.rec_type > rec2->rec.base.base.rec_type) 53 return(1); 54 55 if (rec1->rec.base.base.key < rec2->rec.base.base.key) 56 return(-1); 57 if (rec1->rec.base.base.key > rec2->rec.base.base.key) 58 return(1); 59 60 if (rec1->rec.base.base.create_tid < rec2->rec.base.base.create_tid) 61 return(-1); 62 if (rec1->rec.base.base.create_tid > rec2->rec.base.base.create_tid) 63 return(1); 64 return(0); 65 } 66 67 static int 68 hammer_rec_compare(hammer_base_elm_t info, hammer_record_t rec) 69 { 70 if (info->rec_type < rec->rec.base.base.rec_type) 71 return(-3); 72 if (info->rec_type > rec->rec.base.base.rec_type) 73 return(3); 74 75 if (info->key < rec->rec.base.base.key) 76 return(-2); 77 if (info->key > rec->rec.base.base.key) 78 return(2); 79 80 /* 81 * This test has a number of special cases. create_tid in key1 is 82 * the as-of transction id, and delete_tid in key1 is NOT USED. 83 * 84 * A key1->create_tid of 0 matches any record regardles of when 85 * it was created or destroyed. 0xFFFFFFFFFFFFFFFFULL should be 86 * used to search for the most current state of the object. 87 * 88 * key2->create_tid is a HAMMER record and will never be 89 * 0. key2->delete_tid is the deletion transaction id or 0 if 90 * the record has not yet been deleted. 91 */ 92 if (info->create_tid) { 93 if (info->create_tid < rec->rec.base.base.create_tid) 94 return(-1); 95 if (rec->rec.base.base.delete_tid && 96 info->create_tid >= rec->rec.base.base.delete_tid) { 97 return(1); 98 } 99 } 100 return(0); 101 } 102 103 /* 104 * RB_SCAN comparison code for hammer_mem_first(). The argument order 105 * is reversed so the comparison result has to be negated. key_beg and 106 * key_end are both range-inclusive. 107 * 108 * The creation timestamp can cause hammer_rec_compare() to return -1 or +1. 109 * These do not stop the scan. 110 * 111 * Localized deletions are not cached in-memory. 112 */ 113 static 114 int 115 hammer_rec_scan_cmp(hammer_record_t rec, void *data) 116 { 117 hammer_cursor_t cursor = data; 118 int r; 119 120 r = hammer_rec_compare(&cursor->key_beg, rec); 121 if (r > 1) 122 return(-1); 123 if (r == 0) 124 return(0); 125 r = hammer_rec_compare(&cursor->key_end, rec); 126 if (r < -1) 127 return(1); 128 return(0); 129 } 130 131 RB_GENERATE(hammer_rec_rb_tree, hammer_record, rb_node, hammer_rec_rb_compare); 132 RB_GENERATE_XLOOKUP(hammer_rec_rb_tree, INFO, hammer_record, rb_node, 133 hammer_rec_compare, hammer_base_elm_t); 134 135 /* 136 * Allocate a record for the caller to finish filling in. The record is 137 * returned referenced. 138 */ 139 hammer_record_t 140 hammer_alloc_mem_record(hammer_inode_t ip) 141 { 142 hammer_record_t record; 143 144 ++hammer_count_records; 145 record = kmalloc(sizeof(*record), M_HAMMER, M_WAITOK|M_ZERO); 146 record->ip = ip; 147 hammer_ref(&record->lock); 148 return (record); 149 } 150 151 /* 152 * Release a memory record. Records marked for deletion are immediately 153 * removed from the RB-Tree but otherwise left intact until the last ref 154 * goes away. 155 */ 156 void 157 hammer_rel_mem_record(struct hammer_record *record) 158 { 159 hammer_unref(&record->lock); 160 if (record->flags & HAMMER_RECF_DELETED) { 161 if (record->flags & HAMMER_RECF_ONRBTREE) { 162 RB_REMOVE(hammer_rec_rb_tree, &record->ip->rec_tree, 163 record); 164 record->flags &= ~HAMMER_RECF_ONRBTREE; 165 } 166 if (record->lock.refs == 0) { 167 if (record->flags & HAMMER_RECF_ALLOCDATA) { 168 --hammer_count_record_datas; 169 kfree(record->data, M_HAMMER); 170 record->flags &= ~HAMMER_RECF_ALLOCDATA; 171 } 172 record->data = NULL; 173 --hammer_count_records; 174 kfree(record, M_HAMMER); 175 } 176 } 177 } 178 179 /* 180 * Lookup an in-memory record given the key specified in the cursor. Works 181 * just like hammer_btree_lookup() but operates on an inode's in-memory 182 * record list. 183 * 184 * The lookup must fail if the record is marked for deferred deletion. 185 */ 186 static 187 int 188 hammer_mem_lookup(hammer_cursor_t cursor, hammer_inode_t ip) 189 { 190 int error; 191 192 if (cursor->iprec) { 193 hammer_rel_mem_record(cursor->iprec); 194 cursor->iprec = NULL; 195 } 196 if (cursor->ip) { 197 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 198 &cursor->ip->rec_tree); 199 } 200 cursor->ip = ip; 201 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 202 cursor->scan.node = NULL; 203 cursor->iprec = hammer_rec_rb_tree_RB_LOOKUP_INFO( 204 &ip->rec_tree, &cursor->key_beg); 205 if (cursor->iprec == NULL) { 206 error = ENOENT; 207 } else { 208 hammer_ref(&cursor->iprec->lock); 209 error = 0; 210 } 211 return(error); 212 } 213 214 /* 215 * hammer_mem_first() - locate the first in-memory record matching the 216 * cursor. 217 * 218 * The RB_SCAN function we use is designed as a callback. We terminate it 219 * (return -1) as soon as we get a match. 220 */ 221 static 222 int 223 hammer_rec_scan_callback(hammer_record_t rec, void *data) 224 { 225 hammer_cursor_t cursor = data; 226 227 /* 228 * Skip if not visible due to our as-of TID 229 */ 230 if (cursor->key_beg.create_tid) { 231 if (cursor->key_beg.create_tid < rec->rec.base.base.create_tid) 232 return(0); 233 if (rec->rec.base.base.delete_tid && 234 cursor->key_beg.create_tid >= 235 rec->rec.base.base.delete_tid) { 236 return(0); 237 } 238 } 239 240 /* 241 * Return the first matching record and stop the scan 242 */ 243 if (cursor->iprec == NULL) { 244 cursor->iprec = rec; 245 hammer_ref(&rec->lock); 246 return(-1); 247 } 248 return(0); 249 } 250 251 static 252 int 253 hammer_mem_first(hammer_cursor_t cursor, hammer_inode_t ip) 254 { 255 if (cursor->iprec) { 256 hammer_rel_mem_record(cursor->iprec); 257 cursor->iprec = NULL; 258 } 259 if (cursor->ip) { 260 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 261 &cursor->ip->rec_tree); 262 } 263 cursor->ip = ip; 264 hammer_rec_rb_tree_scan_info_link(&cursor->scan, &ip->rec_tree); 265 266 cursor->scan.node = NULL; 267 hammer_rec_rb_tree_RB_SCAN(&ip->rec_tree, hammer_rec_scan_cmp, 268 hammer_rec_scan_callback, cursor); 269 270 /* 271 * Adjust scan.node and keep it linked into the RB-tree so we can 272 * hold the cursor through third party modifications of the RB-tree. 273 */ 274 if (cursor->iprec) { 275 cursor->scan.node = hammer_rec_rb_tree_RB_NEXT(cursor->iprec); 276 return(0); 277 } 278 return(ENOENT); 279 } 280 281 void 282 hammer_mem_done(hammer_cursor_t cursor) 283 { 284 if (cursor->ip) { 285 hammer_rec_rb_tree_scan_info_done(&cursor->scan, 286 &cursor->ip->rec_tree); 287 cursor->ip = NULL; 288 } 289 if (cursor->iprec) { 290 hammer_rel_mem_record(cursor->iprec); 291 cursor->iprec = NULL; 292 } 293 } 294 295 /************************************************************************ 296 * HAMMER IN-MEMORY RECORD FUNCTIONS * 297 ************************************************************************ 298 * 299 * These functions manipulate in-memory records. Such records typically 300 * exist prior to being committed to disk or indexed via the on-disk B-Tree. 301 */ 302 303 /* 304 * Add a directory entry (dip,ncp) which references inode (ip). 305 * 306 * Note that the low 32 bits of the namekey are set temporarily to create 307 * a unique in-memory record, and may be modified a second time when the 308 * record is synchronized to disk. In particular, the low 32 bits cannot be 309 * all 0's when synching to disk, which is not handled here. 310 */ 311 int 312 hammer_ip_add_directory(struct hammer_transaction *trans, 313 struct hammer_inode *dip, struct namecache *ncp, 314 struct hammer_inode *ip) 315 { 316 hammer_record_t record; 317 int error; 318 int bytes; 319 320 record = hammer_alloc_mem_record(dip); 321 322 bytes = ncp->nc_nlen; /* NOTE: terminating \0 is NOT included */ 323 if (++trans->hmp->namekey_iterator == 0) 324 ++trans->hmp->namekey_iterator; 325 326 record->rec.entry.base.base.obj_id = dip->obj_id; 327 record->rec.entry.base.base.key = 328 hammer_directory_namekey(ncp->nc_name, bytes); 329 record->rec.entry.base.base.key += trans->hmp->namekey_iterator; 330 record->rec.entry.base.base.create_tid = trans->tid; 331 record->rec.entry.base.base.rec_type = HAMMER_RECTYPE_DIRENTRY; 332 record->rec.entry.base.base.obj_type = ip->ino_rec.base.base.obj_type; 333 record->rec.entry.obj_id = ip->obj_id; 334 if (bytes <= sizeof(record->rec.entry.den_name)) { 335 record->data = (void *)record->rec.entry.den_name; 336 record->flags |= HAMMER_RECF_EMBEDDED_DATA; 337 } else { 338 ++hammer_count_record_datas; 339 record->data = kmalloc(bytes, M_HAMMER, M_WAITOK); 340 record->flags |= HAMMER_RECF_ALLOCDATA; 341 } 342 bcopy(ncp->nc_name, record->data, bytes); 343 record->rec.entry.base.data_len = bytes; 344 ++ip->ino_rec.ino_nlinks; 345 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 346 error = hammer_mem_add(trans, record); 347 return(error); 348 } 349 350 /* 351 * Delete the directory entry and update the inode link count. The 352 * cursor must be seeked to the directory entry record being deleted. 353 * 354 * NOTE: HAMMER_CURSOR_DELETE may not have been set. XXX remove flag. 355 */ 356 int 357 hammer_ip_del_directory(struct hammer_transaction *trans, 358 hammer_cursor_t cursor, struct hammer_inode *dip, 359 struct hammer_inode *ip) 360 { 361 int error; 362 363 error = hammer_ip_delete_record(cursor, trans->tid); 364 365 /* 366 * One less link. The file may still be open in the OS even after 367 * all links have gone away so we only try to sync if the OS has 368 * no references and nlinks falls to 0. 369 */ 370 if (error == 0) { 371 --ip->ino_rec.ino_nlinks; 372 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 373 if (ip->ino_rec.ino_nlinks == 0 && 374 (ip->vp == NULL || (ip->vp->v_flag & VINACTIVE))) { 375 hammer_sync_inode(ip, MNT_NOWAIT, 1); 376 } 377 378 } 379 return(error); 380 } 381 382 /* 383 * Add a record to an inode. 384 * 385 * The caller must allocate the record with hammer_alloc_mem_record(ip) and 386 * initialize the following additional fields: 387 * 388 * record->rec.entry.base.base.key 389 * record->rec.entry.base.base.rec_type 390 * record->rec.entry.base.base.data_len 391 * record->data (a copy will be kmalloc'd if not embedded) 392 */ 393 int 394 hammer_ip_add_record(struct hammer_transaction *trans, hammer_record_t record) 395 { 396 hammer_inode_t ip = record->ip; 397 int error; 398 int bytes; 399 void *data; 400 401 record->rec.base.base.obj_id = ip->obj_id; 402 record->rec.base.base.create_tid = trans->tid; 403 record->rec.base.base.obj_type = ip->ino_rec.base.base.obj_type; 404 bytes = record->rec.base.data_len; 405 406 if (record->data) { 407 if ((char *)record->data < (char *)&record->rec || 408 (char *)record->data >= (char *)(&record->rec + 1)) { 409 ++hammer_count_record_datas; 410 data = kmalloc(bytes, M_HAMMER, M_WAITOK); 411 record->flags |= HAMMER_RECF_ALLOCDATA; 412 bcopy(record->data, data, bytes); 413 record->data = data; 414 } else { 415 record->flags |= HAMMER_RECF_EMBEDDED_DATA; 416 } 417 } 418 hammer_modify_inode(trans, ip, HAMMER_INODE_RDIRTY); 419 error = hammer_mem_add(trans, record); 420 return(error); 421 } 422 423 /* 424 * Sync data from a buffer cache buffer (typically) to the filesystem. This 425 * is called via the strategy called from a cached data source. This code 426 * is responsible for actually writing a data record out to the disk. 427 */ 428 int 429 hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, 430 int64_t offset, void *data, int bytes, 431 struct hammer_cursor **spike) 432 { 433 struct hammer_cursor cursor; 434 hammer_record_ondisk_t rec; 435 union hammer_btree_elm elm; 436 void *bdata; 437 int error; 438 439 error = hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 440 if (error) 441 return(error); 442 cursor.key_beg.obj_id = ip->obj_id; 443 cursor.key_beg.key = offset + bytes; 444 cursor.key_beg.create_tid = trans->tid; 445 cursor.key_beg.delete_tid = 0; 446 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 447 cursor.flags = HAMMER_CURSOR_INSERT; 448 449 /* 450 * Issue a lookup to position the cursor and locate the cluster 451 */ 452 error = hammer_btree_lookup(&cursor); 453 if (error == 0) { 454 kprintf("hammer_ip_sync_data: duplicate data at (%lld,%d)\n", 455 offset, bytes); 456 hammer_print_btree_elm(&cursor.node->ondisk->elms[cursor.index], 457 HAMMER_BTREE_TYPE_LEAF, cursor.index); 458 error = EIO; 459 } 460 if (error != ENOENT) 461 goto done; 462 463 /* 464 * Allocate record and data space now that we know which cluster 465 * the B-Tree node ended up in. 466 */ 467 bdata = hammer_alloc_data(cursor.node->cluster, bytes, &error, 468 &cursor.data_buffer); 469 if (bdata == NULL) 470 goto done; 471 rec = hammer_alloc_record(cursor.node->cluster, &error, 472 &cursor.record_buffer); 473 if (rec == NULL) 474 goto fail1; 475 476 /* 477 * Fill everything in and insert our B-Tree node. 478 */ 479 hammer_modify_buffer(cursor.record_buffer); 480 rec->base.base = cursor.key_beg; 481 rec->base.data_crc = crc32(data, bytes); 482 rec->base.rec_id = 0; /* XXX */ 483 rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer, bdata); 484 rec->base.data_len = bytes; 485 hammer_modify_buffer_done(cursor.record_buffer); 486 487 hammer_modify_buffer(cursor.data_buffer); 488 bcopy(data, bdata, bytes); 489 hammer_modify_buffer_done(cursor.data_buffer); 490 491 elm.leaf.base = cursor.key_beg; 492 elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); 493 elm.leaf.data_offset = rec->base.data_offset; 494 elm.leaf.data_len = bytes; 495 elm.leaf.data_crc = rec->base.data_crc; 496 497 error = hammer_btree_insert(&cursor, &elm); 498 if (error == 0) 499 goto done; 500 501 hammer_free_record_ptr(cursor.record_buffer, rec); 502 fail1: 503 hammer_free_data_ptr(cursor.data_buffer, bdata, bytes); 504 done: 505 /* 506 * If ENOSPC in cluster fill in the spike structure and return 507 * ENOSPC. 508 */ 509 if (error == ENOSPC) 510 hammer_load_spike(&cursor, spike); 511 hammer_done_cursor(&cursor); 512 return(error); 513 } 514 515 /* 516 * Sync an in-memory record to the disk. this is typically called via fsync 517 * from a cached record source. This code is responsible for actually 518 * writing a record out to the disk. 519 */ 520 int 521 hammer_ip_sync_record(hammer_record_t record, struct hammer_cursor **spike) 522 { 523 struct hammer_cursor cursor; 524 hammer_record_ondisk_t rec; 525 hammer_mount_t hmp; 526 union hammer_btree_elm elm; 527 void *bdata; 528 int error; 529 530 error = hammer_init_cursor_hmp(&cursor, &record->ip->cache[0], 531 record->ip->hmp); 532 if (error) 533 return(error); 534 cursor.key_beg = record->rec.base.base; 535 cursor.flags = HAMMER_CURSOR_INSERT; 536 537 /* 538 * Issue a lookup to position the cursor and locate the cluster. The 539 * target key should not exist. If we are creating a directory entry 540 * we may have to iterate the low 32 bits of the key to find an unused 541 * key. 542 * 543 * If we run out of space trying to adjust the B-Tree for the 544 * insert, re-lookup without the insert flag so the cursor 545 * is properly positioned for the spike. 546 */ 547 again: 548 error = hammer_btree_lookup(&cursor); 549 if (error == 0) { 550 if (record->rec.base.base.rec_type == HAMMER_RECTYPE_DIRENTRY) { 551 hmp = cursor.node->cluster->volume->hmp; 552 if (++hmp->namekey_iterator == 0) 553 ++hmp->namekey_iterator; 554 record->rec.base.base.key &= ~(0xFFFFFFFFLL); 555 record->rec.base.base.key |= hmp->namekey_iterator; 556 goto again; 557 } 558 kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n", 559 record->rec.base.base.key); 560 Debugger("duplicate record1"); 561 error = EIO; 562 } 563 if (error != ENOENT) 564 goto done; 565 566 /* 567 * Mark the record as undergoing synchronization. Our cursor is 568 * holding a locked B-Tree node for the insertion which interlocks 569 * anyone trying to access this record. 570 * 571 * XXX There is still a race present related to iterations. An 572 * iteration may process the record, a sync may occur, and then 573 * later process the B-Tree element for the same record. 574 * 575 * We do not try to synchronize a deleted record. 576 */ 577 if (record->flags & (HAMMER_RECF_DELETED | HAMMER_RECF_SYNCING)) { 578 error = 0; 579 goto done; 580 } 581 record->flags |= HAMMER_RECF_SYNCING; 582 583 /* 584 * Allocate record and data space now that we know which cluster 585 * the B-Tree node ended up in. 586 */ 587 if (record->data == NULL || 588 (record->flags & HAMMER_RECF_EMBEDDED_DATA)) { 589 bdata = record->data; 590 } else { 591 bdata = hammer_alloc_data(cursor.node->cluster, 592 record->rec.base.data_len, &error, 593 &cursor.data_buffer); 594 if (bdata == NULL) 595 goto fail2; 596 } 597 rec = hammer_alloc_record(cursor.node->cluster, &error, 598 &cursor.record_buffer); 599 if (rec == NULL) 600 goto fail1; 601 602 /* 603 * Fill everything in and insert our B-Tree node. 604 * 605 * XXX assign rec_id here 606 */ 607 hammer_modify_buffer(cursor.record_buffer); 608 *rec = record->rec; 609 if (bdata) { 610 rec->base.data_crc = crc32(record->data, 611 record->rec.base.data_len); 612 if (record->flags & HAMMER_RECF_EMBEDDED_DATA) { 613 /* 614 * Data embedded in record 615 */ 616 rec->base.data_offset = ((char *)bdata - 617 (char *)&record->rec); 618 KKASSERT(rec->base.data_offset >= 0 && 619 rec->base.data_offset + rec->base.data_len <= 620 sizeof(*rec)); 621 rec->base.data_offset += hammer_bclu_offset(cursor.record_buffer, rec); 622 } else { 623 /* 624 * Data separate from record 625 */ 626 rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer,bdata); 627 hammer_modify_buffer(cursor.data_buffer); 628 bcopy(record->data, bdata, rec->base.data_len); 629 hammer_modify_buffer_done(cursor.data_buffer); 630 } 631 } 632 rec->base.rec_id = 0; /* XXX */ 633 hammer_modify_buffer_done(cursor.record_buffer); 634 635 elm.leaf.base = cursor.key_beg; 636 elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); 637 elm.leaf.data_offset = rec->base.data_offset; 638 elm.leaf.data_len = rec->base.data_len; 639 elm.leaf.data_crc = rec->base.data_crc; 640 641 error = hammer_btree_insert(&cursor, &elm); 642 643 /* 644 * Clean up on success, or fall through on error. 645 */ 646 if (error == 0) { 647 record->flags |= HAMMER_RECF_DELETED; 648 record->flags &= ~HAMMER_RECF_SYNCING; 649 goto done; 650 } 651 652 hammer_free_record_ptr(cursor.record_buffer, rec); 653 fail1: 654 if (record->data && (record->flags & HAMMER_RECF_EMBEDDED_DATA) == 0) { 655 hammer_free_data_ptr(cursor.data_buffer, bdata, 656 record->rec.base.data_len); 657 } 658 fail2: 659 record->flags &= ~HAMMER_RECF_SYNCING; 660 done: 661 /* 662 * If ENOSPC in cluster fill in the spike structure and return 663 * ENOSPC. 664 */ 665 if (error == ENOSPC) 666 hammer_load_spike(&cursor, spike); 667 hammer_done_cursor(&cursor); 668 return(error); 669 } 670 671 /* 672 * Write out a record using the specified cursor. The caller does not have 673 * to seek the cursor. The flags are used to determine whether the data 674 * (if any) is embedded in the record or not. 675 * 676 * The target cursor will be modified by this call. Note in particular 677 * that HAMMER_CURSOR_INSERT is set. 678 */ 679 int 680 hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t orec, 681 void *data, int cursor_flags) 682 { 683 union hammer_btree_elm elm; 684 hammer_record_ondisk_t nrec; 685 void *bdata; 686 int error; 687 688 cursor->key_beg = orec->base.base; 689 cursor->flags |= HAMMER_CURSOR_INSERT; 690 691 /* 692 * Issue a lookup to position the cursor and locate the cluster. The 693 * target key should not exist. 694 * 695 * If we run out of space trying to adjust the B-Tree for the 696 * insert, re-lookup without the insert flag so the cursor 697 * is properly positioned for the spike. 698 */ 699 error = hammer_btree_lookup(cursor); 700 if (error == 0) { 701 kprintf("hammer_ip_sync_record: duplicate rec at (%016llx)\n", 702 orec->base.base.key); 703 Debugger("duplicate record2"); 704 error = EIO; 705 } 706 if (error != ENOENT) 707 goto done; 708 709 /* 710 * Allocate record and data space now that we know which cluster 711 * the B-Tree node ended up in. 712 */ 713 if (data == NULL || 714 (cursor_flags & HAMMER_RECF_EMBEDDED_DATA)) { 715 bdata = data; 716 } else { 717 bdata = hammer_alloc_data(cursor->node->cluster, 718 orec->base.data_len, &error, 719 &cursor->data_buffer); 720 if (bdata == NULL) 721 goto done; 722 } 723 nrec = hammer_alloc_record(cursor->node->cluster, &error, 724 &cursor->record_buffer); 725 if (nrec == NULL) 726 goto fail1; 727 728 /* 729 * Fill everything in and insert our B-Tree node. 730 * 731 * XXX assign rec_id here 732 */ 733 hammer_modify_buffer(cursor->record_buffer); 734 *nrec = *orec; 735 nrec->base.data_offset = 0; 736 if (bdata) { 737 nrec->base.data_crc = crc32(bdata, nrec->base.data_len); 738 if (cursor_flags & HAMMER_RECF_EMBEDDED_DATA) { 739 /* 740 * Data embedded in record 741 */ 742 nrec->base.data_offset = ((char *)bdata - (char *)orec); 743 KKASSERT(nrec->base.data_offset >= 0 && 744 nrec->base.data_offset + nrec->base.data_len < 745 sizeof(*nrec)); 746 nrec->base.data_offset += hammer_bclu_offset(cursor->record_buffer, nrec); 747 } else { 748 /* 749 * Data separate from record 750 */ 751 nrec->base.data_offset = hammer_bclu_offset(cursor->data_buffer, bdata); 752 hammer_modify_buffer(cursor->data_buffer); 753 bcopy(data, bdata, nrec->base.data_len); 754 hammer_modify_buffer_done(cursor->data_buffer); 755 } 756 } 757 nrec->base.rec_id = 0; /* XXX */ 758 hammer_modify_buffer_done(cursor->record_buffer); 759 760 elm.leaf.base = nrec->base.base; 761 elm.leaf.rec_offset = hammer_bclu_offset(cursor->record_buffer, nrec); 762 elm.leaf.data_offset = nrec->base.data_offset; 763 elm.leaf.data_len = nrec->base.data_len; 764 elm.leaf.data_crc = nrec->base.data_crc; 765 766 error = hammer_btree_insert(cursor, &elm); 767 if (error == 0) 768 goto done; 769 770 hammer_free_record_ptr(cursor->record_buffer, nrec); 771 fail1: 772 if (data && (cursor_flags & HAMMER_RECF_EMBEDDED_DATA) == 0) { 773 hammer_free_data_ptr(cursor->data_buffer, bdata, 774 orec->base.data_len); 775 } 776 done: 777 /* leave cursor intact */ 778 return(error); 779 } 780 781 /* 782 * Add the record to the inode's rec_tree. The low 32 bits of a directory 783 * entry's key is used to deal with hash collisions in the upper 32 bits. 784 * A unique 64 bit key is generated in-memory and may be regenerated a 785 * second time when the directory record is flushed to the on-disk B-Tree. 786 * 787 * A referenced record is passed to this function. This function 788 * eats the reference. If an error occurs the record will be deleted. 789 */ 790 static 791 int 792 hammer_mem_add(struct hammer_transaction *trans, hammer_record_t record) 793 { 794 while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { 795 if (record->rec.base.base.rec_type != HAMMER_RECTYPE_DIRENTRY){ 796 record->flags |= HAMMER_RECF_DELETED; 797 hammer_rel_mem_record(record); 798 return (EEXIST); 799 } 800 if (++trans->hmp->namekey_iterator == 0) 801 ++trans->hmp->namekey_iterator; 802 record->rec.base.base.key &= ~(0xFFFFFFFFLL); 803 record->rec.base.base.key |= trans->hmp->namekey_iterator; 804 } 805 record->flags |= HAMMER_RECF_ONRBTREE; 806 hammer_modify_inode(trans, record->ip, HAMMER_INODE_XDIRTY); 807 hammer_rel_mem_record(record); 808 return(0); 809 } 810 811 /************************************************************************ 812 * HAMMER INODE MERGED-RECORD FUNCTIONS * 813 ************************************************************************ 814 * 815 * These functions augment the B-Tree scanning functions in hammer_btree.c 816 * by merging in-memory records with on-disk records. 817 */ 818 819 /* 820 * Locate a particular record either in-memory or on-disk. 821 * 822 * NOTE: This is basically a standalone routine, hammer_ip_next() may 823 * NOT be called to iterate results. 824 */ 825 int 826 hammer_ip_lookup(hammer_cursor_t cursor, struct hammer_inode *ip) 827 { 828 int error; 829 830 /* 831 * If the element is in-memory return it without searching the 832 * on-disk B-Tree 833 */ 834 error = hammer_mem_lookup(cursor, ip); 835 if (error == 0) { 836 cursor->record = &cursor->iprec->rec; 837 return(error); 838 } 839 if (error != ENOENT) 840 return(error); 841 842 /* 843 * If the inode has on-disk components search the on-disk B-Tree. 844 */ 845 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) 846 return(error); 847 error = hammer_btree_lookup(cursor); 848 if (error == 0) 849 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 850 return(error); 851 } 852 853 /* 854 * Locate the first record within the cursor's key_beg/key_end range, 855 * restricted to a particular inode. 0 is returned on success, ENOENT 856 * if no records matched the requested range, or some other error. 857 * 858 * When 0 is returned hammer_ip_next() may be used to iterate additional 859 * records within the requested range. 860 */ 861 int 862 hammer_ip_first(hammer_cursor_t cursor, struct hammer_inode *ip) 863 { 864 int error; 865 866 /* 867 * Clean up fields and setup for merged scan 868 */ 869 cursor->flags &= ~HAMMER_CURSOR_DELBTREE; 870 cursor->flags |= HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM; 871 cursor->flags |= HAMMER_CURSOR_DISKEOF | HAMMER_CURSOR_MEMEOF; 872 if (cursor->iprec) { 873 hammer_rel_mem_record(cursor->iprec); 874 cursor->iprec = NULL; 875 } 876 877 /* 878 * Search the on-disk B-Tree. hammer_btree_lookup() only does an 879 * exact lookup so if we get ENOENT we have to call the iterate 880 * function to validate the first record after the begin key. 881 * 882 * The ATEDISK flag is used by hammer_btree_iterate to determine 883 * whether it must index forwards or not. It is also used here 884 * to select the next record from in-memory or on-disk. 885 */ 886 if (ip->flags & HAMMER_INODE_ONDISK) { 887 error = hammer_btree_lookup(cursor); 888 if (error == ENOENT) { 889 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 890 error = hammer_btree_iterate(cursor); 891 } 892 if (error && error != ENOENT) 893 return(error); 894 if (error == 0) { 895 cursor->flags &= ~HAMMER_CURSOR_DISKEOF; 896 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 897 } else { 898 cursor->flags |= HAMMER_CURSOR_ATEDISK; 899 } 900 } 901 902 /* 903 * Search the in-memory record list (Red-Black tree). Unlike the 904 * B-Tree search, mem_first checks for records in the range. 905 */ 906 error = hammer_mem_first(cursor, ip); 907 if (error && error != ENOENT) 908 return(error); 909 if (error == 0) { 910 cursor->flags &= ~HAMMER_CURSOR_MEMEOF; 911 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 912 } 913 914 /* 915 * This will return the first matching record. 916 */ 917 return(hammer_ip_next(cursor)); 918 } 919 920 /* 921 * Retrieve the next record in a merged iteration within the bounds of the 922 * cursor. This call may be made multiple times after the cursor has been 923 * initially searched with hammer_ip_first(). 924 * 925 * 0 is returned on success, ENOENT if no further records match the 926 * requested range, or some other error code is returned. 927 */ 928 int 929 hammer_ip_next(hammer_cursor_t cursor) 930 { 931 hammer_btree_elm_t elm; 932 hammer_record_t rec; 933 int error; 934 int r; 935 936 /* 937 * Load the current on-disk and in-memory record. If we ate any 938 * records we have to get the next one. 939 * 940 * If we deleted the last on-disk record we had scanned ATEDISK will 941 * be clear and DELBTREE will be set, forcing a call to iterate. The 942 * fact that ATEDISK is clear causes iterate to re-test the 'current' 943 * element. If ATEDISK is set, iterate will skip the 'current' 944 * element. 945 * 946 * Get the next on-disk record 947 */ 948 if (cursor->flags & (HAMMER_CURSOR_ATEDISK|HAMMER_CURSOR_DELBTREE)) { 949 if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { 950 error = hammer_btree_iterate(cursor); 951 cursor->flags &= ~HAMMER_CURSOR_DELBTREE; 952 if (error == 0) 953 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 954 else 955 cursor->flags |= HAMMER_CURSOR_DISKEOF | 956 HAMMER_CURSOR_ATEDISK; 957 } 958 } 959 960 /* 961 * Get the next in-memory record. The record can be ripped out 962 * of the RB tree so we maintain a scan_info structure to track 963 * the next node. 964 * 965 * hammer_rec_scan_cmp: Is the record still in our general range, 966 * (non-inclusive of snapshot exclusions)? 967 * hammer_rec_scan_callback: Is the record in our snapshot? 968 */ 969 if (cursor->flags & HAMMER_CURSOR_ATEMEM) { 970 if ((cursor->flags & HAMMER_CURSOR_MEMEOF) == 0) { 971 if (cursor->iprec) { 972 hammer_rel_mem_record(cursor->iprec); 973 cursor->iprec = NULL; 974 } 975 rec = cursor->scan.node; /* next node */ 976 while (rec) { 977 if (hammer_rec_scan_cmp(rec, cursor) != 0) 978 break; 979 if (hammer_rec_scan_callback(rec, cursor) != 0) 980 break; 981 rec = hammer_rec_rb_tree_RB_NEXT(rec); 982 } 983 if (cursor->iprec) { 984 KKASSERT(cursor->iprec == rec); 985 cursor->flags &= ~HAMMER_CURSOR_ATEMEM; 986 cursor->scan.node = 987 hammer_rec_rb_tree_RB_NEXT(rec); 988 } else { 989 cursor->flags |= HAMMER_CURSOR_MEMEOF; 990 } 991 } 992 } 993 994 /* 995 * Extract either the disk or memory record depending on their 996 * relative position. 997 */ 998 error = 0; 999 switch(cursor->flags & (HAMMER_CURSOR_ATEDISK | HAMMER_CURSOR_ATEMEM)) { 1000 case 0: 1001 /* 1002 * Both entries valid 1003 */ 1004 elm = &cursor->node->ondisk->elms[cursor->index]; 1005 r = hammer_btree_cmp(&elm->base, &cursor->iprec->rec.base.base); 1006 if (r < 0) { 1007 error = hammer_btree_extract(cursor, 1008 HAMMER_CURSOR_GET_RECORD); 1009 cursor->flags |= HAMMER_CURSOR_ATEDISK; 1010 break; 1011 } 1012 /* fall through to the memory entry */ 1013 case HAMMER_CURSOR_ATEDISK: 1014 /* 1015 * Only the memory entry is valid 1016 */ 1017 cursor->record = &cursor->iprec->rec; 1018 cursor->flags |= HAMMER_CURSOR_ATEMEM; 1019 break; 1020 case HAMMER_CURSOR_ATEMEM: 1021 /* 1022 * Only the disk entry is valid 1023 */ 1024 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 1025 cursor->flags |= HAMMER_CURSOR_ATEDISK; 1026 break; 1027 default: 1028 /* 1029 * Neither entry is valid 1030 * 1031 * XXX error not set properly 1032 */ 1033 cursor->record = NULL; 1034 error = ENOENT; 1035 break; 1036 } 1037 return(error); 1038 } 1039 1040 /* 1041 * Resolve the cursor->data pointer for the current cursor position in 1042 * a merged iteration. 1043 */ 1044 int 1045 hammer_ip_resolve_data(hammer_cursor_t cursor) 1046 { 1047 int error; 1048 1049 if (cursor->iprec && cursor->record == &cursor->iprec->rec) { 1050 cursor->data = cursor->iprec->data; 1051 error = 0; 1052 } else { 1053 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA); 1054 } 1055 return(error); 1056 } 1057 1058 /* 1059 * Delete all records within the specified range for inode ip. 1060 * 1061 * NOTE: An unaligned range will cause new records to be added to cover 1062 * the edge cases. (XXX not implemented yet). 1063 * 1064 * NOTE: ran_end is inclusive (e.g. 0,1023 instead of 0,1024). 1065 * 1066 * NOTE: Record keys for regular file data have to be special-cased since 1067 * they indicate the end of the range (key = base + bytes). 1068 * 1069 * NOTE: The spike structure must be filled in if we return ENOSPC. 1070 */ 1071 int 1072 hammer_ip_delete_range(hammer_transaction_t trans, hammer_inode_t ip, 1073 int64_t ran_beg, int64_t ran_end, 1074 struct hammer_cursor **spike) 1075 { 1076 struct hammer_cursor cursor; 1077 hammer_record_ondisk_t rec; 1078 hammer_base_elm_t base; 1079 int error; 1080 int64_t off; 1081 1082 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1083 1084 cursor.key_beg.obj_id = ip->obj_id; 1085 cursor.key_beg.create_tid = ip->obj_asof; 1086 cursor.key_beg.delete_tid = 0; 1087 cursor.key_beg.obj_type = 0; 1088 1089 cursor.key_end = cursor.key_beg; 1090 if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) { 1091 cursor.key_beg.key = ran_beg; 1092 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB; 1093 cursor.key_end.rec_type = HAMMER_RECTYPE_DB; 1094 cursor.key_end.key = ran_end; 1095 } else { 1096 /* 1097 * The key in the B-Tree is (base+bytes), so the first possible 1098 * matching key is ran_beg + 1. 1099 */ 1100 int64_t tmp64; 1101 1102 cursor.key_beg.key = ran_beg + 1; 1103 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA; 1104 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA; 1105 1106 tmp64 = ran_end + MAXPHYS + 1; /* work around GCC-4 bug */ 1107 if (tmp64 < ran_end) 1108 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL; 1109 else 1110 cursor.key_end.key = ran_end + MAXPHYS + 1; 1111 } 1112 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1113 1114 error = hammer_ip_first(&cursor, ip); 1115 1116 /* 1117 * Iterate through matching records and mark them as deleted. 1118 */ 1119 while (error == 0) { 1120 rec = cursor.record; 1121 base = &rec->base.base; 1122 1123 KKASSERT(base->delete_tid == 0); 1124 1125 /* 1126 * There may be overlap cases for regular file data. Also 1127 * remember the key for a regular file record is the offset 1128 * of the last byte of the record (base + len - 1), NOT the 1129 * base offset. 1130 */ 1131 #if 0 1132 kprintf("delete_range rec_type %02x\n", base->rec_type); 1133 #endif 1134 if (base->rec_type == HAMMER_RECTYPE_DATA) { 1135 #if 0 1136 kprintf("delete_range loop key %016llx\n", 1137 base->key - rec->base.data_len); 1138 #endif 1139 off = base->key - rec->base.data_len; 1140 /* 1141 * Check the left edge case. We currently do not 1142 * split existing records. 1143 */ 1144 if (off < ran_beg) { 1145 panic("hammer left edge case %016llx %d\n", 1146 base->key, rec->base.data_len); 1147 } 1148 1149 /* 1150 * Check the right edge case. Note that the 1151 * record can be completely out of bounds, which 1152 * terminates the search. 1153 * 1154 * base->key is exclusive of the right edge while 1155 * ran_end is inclusive of the right edge. The 1156 * (key - data_len) left boundary is inclusive. 1157 * 1158 * XXX theory-check this test at some point, are 1159 * we missing a + 1 somewhere? Note that ran_end 1160 * could overflow. 1161 */ 1162 if (base->key - 1 > ran_end) { 1163 if (base->key - rec->base.data_len > ran_end) { 1164 kprintf("right edge OOB\n"); 1165 break; 1166 } 1167 panic("hammer right edge case\n"); 1168 } 1169 } 1170 1171 /* 1172 * Mark the record and B-Tree entry as deleted. This will 1173 * also physically delete the B-Tree entry, record, and 1174 * data if the retention policy dictates. The function 1175 * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() 1176 * uses to perform a fixup. 1177 */ 1178 error = hammer_ip_delete_record(&cursor, trans->tid); 1179 if (error) 1180 break; 1181 error = hammer_ip_next(&cursor); 1182 } 1183 hammer_done_cursor(&cursor); 1184 if (error == ENOENT) 1185 error = 0; 1186 return(error); 1187 } 1188 1189 /* 1190 * Delete all records associated with an inode except the inode record 1191 * itself. 1192 */ 1193 int 1194 hammer_ip_delete_range_all(hammer_transaction_t trans, hammer_inode_t ip) 1195 { 1196 struct hammer_cursor cursor; 1197 hammer_record_ondisk_t rec; 1198 hammer_base_elm_t base; 1199 int error; 1200 1201 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1202 1203 cursor.key_beg.obj_id = ip->obj_id; 1204 cursor.key_beg.create_tid = ip->obj_asof; 1205 cursor.key_beg.delete_tid = 0; 1206 cursor.key_beg.obj_type = 0; 1207 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; 1208 cursor.key_beg.key = HAMMER_MIN_KEY; 1209 1210 cursor.key_end = cursor.key_beg; 1211 cursor.key_end.rec_type = 0xFFFF; 1212 cursor.key_end.key = HAMMER_MAX_KEY; 1213 1214 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1215 1216 error = hammer_ip_first(&cursor, ip); 1217 1218 /* 1219 * Iterate through matching records and mark them as deleted. 1220 */ 1221 while (error == 0) { 1222 rec = cursor.record; 1223 base = &rec->base.base; 1224 1225 KKASSERT(base->delete_tid == 0); 1226 1227 /* 1228 * Mark the record and B-Tree entry as deleted. This will 1229 * also physically delete the B-Tree entry, record, and 1230 * data if the retention policy dictates. The function 1231 * will set HAMMER_CURSOR_DELBTREE which hammer_ip_next() 1232 * uses to perform a fixup. 1233 */ 1234 error = hammer_ip_delete_record(&cursor, trans->tid); 1235 if (error) 1236 break; 1237 error = hammer_ip_next(&cursor); 1238 } 1239 hammer_done_cursor(&cursor); 1240 if (error == ENOENT) 1241 error = 0; 1242 return(error); 1243 } 1244 1245 /* 1246 * Delete the record at the current cursor 1247 */ 1248 int 1249 hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid) 1250 { 1251 hammer_btree_elm_t elm; 1252 hammer_mount_t hmp; 1253 int error; 1254 1255 /* 1256 * In-memory (unsynchronized) records can simply be freed. 1257 */ 1258 if (cursor->record == &cursor->iprec->rec) { 1259 cursor->iprec->flags |= HAMMER_RECF_DELETED; 1260 return(0); 1261 } 1262 1263 /* 1264 * On-disk records are marked as deleted by updating their delete_tid. 1265 */ 1266 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); 1267 elm = NULL; 1268 hmp = cursor->node->cluster->volume->hmp; 1269 1270 if (error == 0) { 1271 hammer_modify_buffer(cursor->record_buffer); 1272 cursor->record->base.base.delete_tid = tid; 1273 1274 hammer_modify_buffer_done(cursor->record_buffer); 1275 hammer_modify_node(cursor->node); 1276 elm = &cursor->node->ondisk->elms[cursor->index]; 1277 elm->leaf.base.delete_tid = tid; 1278 hammer_modify_node_done(cursor->node); 1279 } 1280 1281 /* 1282 * If we were mounted with the nohistory option, we physically 1283 * delete the record. 1284 */ 1285 if (error == 0 && (hmp->hflags & HMNT_NOHISTORY)) { 1286 int32_t rec_offset; 1287 int32_t data_offset; 1288 int32_t data_len; 1289 hammer_cluster_t cluster; 1290 1291 rec_offset = elm->leaf.rec_offset; 1292 data_offset = elm->leaf.data_offset; 1293 data_len = elm->leaf.data_len; 1294 #if 0 1295 kprintf("hammer_ip_delete_record: %08x %08x/%d\n", 1296 rec_offset, data_offset, data_len); 1297 #endif 1298 cluster = cursor->node->cluster; 1299 hammer_ref_cluster(cluster); 1300 1301 error = hammer_btree_delete(cursor); 1302 if (error == 0) { 1303 /* 1304 * This forces a fixup for the iteration because 1305 * the cursor is now either sitting at the 'next' 1306 * element or sitting at the end of a leaf. 1307 */ 1308 if ((cursor->flags & HAMMER_CURSOR_DISKEOF) == 0) { 1309 cursor->flags |= HAMMER_CURSOR_DELBTREE; 1310 cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 1311 } 1312 hammer_free_record(cluster, rec_offset); 1313 if (data_offset && (data_offset - rec_offset < 0 || 1314 data_offset - rec_offset >= HAMMER_RECORD_SIZE)) { 1315 hammer_free_data(cluster, data_offset,data_len); 1316 } 1317 } 1318 hammer_rel_cluster(cluster, 0); 1319 if (error) { 1320 panic("hammer_ip_delete_record: unable to physically delete the record!\n"); 1321 error = 0; 1322 } 1323 } 1324 return(error); 1325 } 1326 1327 /* 1328 * Determine whether a directory is empty or not. Returns 0 if the directory 1329 * is empty, ENOTEMPTY if it isn't, plus other possible errors. 1330 */ 1331 int 1332 hammer_ip_check_directory_empty(hammer_transaction_t trans, hammer_inode_t ip) 1333 { 1334 struct hammer_cursor cursor; 1335 int error; 1336 1337 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp); 1338 1339 cursor.key_beg.obj_id = ip->obj_id; 1340 cursor.key_beg.create_tid = ip->obj_asof; 1341 cursor.key_beg.delete_tid = 0; 1342 cursor.key_beg.obj_type = 0; 1343 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE + 1; 1344 cursor.key_beg.key = HAMMER_MIN_KEY; 1345 1346 cursor.key_end = cursor.key_beg; 1347 cursor.key_end.rec_type = 0xFFFF; 1348 cursor.key_end.key = HAMMER_MAX_KEY; 1349 1350 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 1351 1352 error = hammer_ip_first(&cursor, ip); 1353 if (error == ENOENT) 1354 error = 0; 1355 else if (error == 0) 1356 error = ENOTEMPTY; 1357 hammer_done_cursor(&cursor); 1358 return(error); 1359 } 1360 1361