1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.9 2008/03/18 05:19:16 dillon Exp $ 35 */ 36 37 #include "hammer.h" 38 39 #if 0 40 41 static int hammer_recover_buffer_stage2(hammer_cluster_t cluster, 42 int32_t buf_no); 43 static int hammer_recover_record(hammer_cluster_t cluster, 44 hammer_buffer_t buffer, int32_t rec_offset, 45 hammer_record_ondisk_t rec); 46 static int hammer_recover_btree(hammer_cluster_t cluster, 47 hammer_buffer_t buffer, int32_t rec_offset, 48 hammer_record_ondisk_t rec); 49 50 /* 51 * Recover a cluster. The caller has referenced and locked the cluster. 52 * 53 * Generally returns 0 on success and EIO if the recovery was unsuccessful. 54 * 55 * WARNING! The cluster being recovered must not have any cached buffers 56 * (and hence no cached b-tree nodes). Any cached nodes will become seriously 57 * corrupted since we rip it all up and regenerate the B-Tree. 58 */ 59 int 60 hammer_recover(hammer_cluster_t cluster) 61 { 62 int buf_no; 63 int rec_no; 64 int maxblk; 65 int nbuffers; 66 int buffer_count; 67 int record_count; 68 69 kprintf("HAMMER_RECOVER %d:%d\n", 70 cluster->volume->vol_no, cluster->clu_no); 71 /*Debugger("RECOVER");*/ 72 KKASSERT(cluster->ondisk->synchronized_rec_id); 73 if (RB_ROOT(&cluster->rb_bufs_root)) { 74 panic("hammer_recover: cluster %d:%d has cached buffers!", 75 cluster->volume->vol_no, 76 cluster->clu_no); 77 } 78 79 if (hammer_alist_find(&cluster->volume->alist, cluster->clu_no, 80 cluster->clu_no + 1, 0) != cluster->clu_no) { 81 Debugger("hammer_recover: cluster not allocated!"); 82 } 83 84 nbuffers = cluster->ondisk->clu_limit / HAMMER_BUFSIZE; 85 hammer_modify_cluster(cluster); 86 87 /* 88 * Clear statistics. 89 */ 90 cluster->ondisk->stat_inodes = 0; 91 cluster->ondisk->stat_records = 0; 92 cluster->ondisk->stat_data_bufs = 0; 93 cluster->ondisk->stat_rec_bufs = 0; 94 cluster->ondisk->stat_idx_bufs = 0; 95 96 /* 97 * Reset allocation heuristics. 98 */ 99 cluster->ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS; 100 cluster->ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS; 101 cluster->ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS; 102 103 /* 104 * Re-initialize the master, B-Tree, and mdata A-lists, and 105 * recover the record A-list. 106 */ 107 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1, 108 HAMMER_ASTATE_FREE); 109 hammer_alist_init(&cluster->alist_btree, 110 HAMMER_FSBUF_MAXBLKS, 111 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS, 112 HAMMER_ASTATE_ALLOC); 113 hammer_alist_init(&cluster->alist_mdata, 114 HAMMER_FSBUF_MAXBLKS, 115 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS, 116 HAMMER_ASTATE_ALLOC); 117 hammer_alist_recover(&cluster->alist_record, 118 0, 119 HAMMER_FSBUF_MAXBLKS, 120 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS); 121 kprintf("\n"); 122 123 kprintf("hammer_recover(1): cluster_free %d\n", 124 cluster->alist_master.meta->bm_alist_freeblks); 125 126 /* 127 * The cluster is now in good enough shape that general allocations 128 * are possible. Construct an empty B-Tree root. 129 */ 130 { 131 hammer_node_t croot; 132 int error; 133 134 croot = hammer_alloc_btree(cluster, &error); 135 if (error == 0) { 136 hammer_modify_node_noundo(croot); 137 bzero(croot->ondisk, sizeof(*croot->ondisk)); 138 croot->ondisk->count = 0; 139 croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF; 140 cluster->ondisk->clu_btree_root = croot->node_offset; 141 hammer_rel_node(croot); 142 } 143 KKASSERT(error == 0); 144 } 145 kprintf("hammer_recover(2): cluster_free %d\n", 146 cluster->alist_master.meta->bm_alist_freeblks); 147 148 /* 149 * Scan the cluster's recovered record A-list. Just get the meta 150 * blocks and ignore all-allocated/uninitialized sections (which 151 * we use to indicate reserved areas not assigned to record buffers). 152 * 153 * The all-free sections are initialized and this is indicated by 154 * the alist config's bl_inverted flag being set. These sections 155 * will be returned for recovery purposes. 156 */ 157 buffer_count = 0; 158 record_count = 0; 159 160 rec_no = HAMMER_FSBUF_MAXBLKS; 161 maxblk = nbuffers * HAMMER_FSBUF_MAXBLKS; 162 for (;;) { 163 rec_no = hammer_alist_find(&cluster->alist_record, 164 rec_no, 165 maxblk, 166 HAMMER_ALIST_FIND_NOSTACK | 167 HAMMER_ALIST_FIND_INITONLY); 168 if (rec_no == HAMMER_ALIST_BLOCK_NONE) 169 break; 170 buf_no = rec_no / HAMMER_FSBUF_MAXBLKS; 171 KKASSERT(buf_no > 0 && buf_no <= nbuffers); 172 ++buffer_count; 173 kprintf("(%d)", buf_no); 174 record_count += hammer_recover_buffer_stage2(cluster, buf_no); 175 rec_no += HAMMER_FSBUF_MAXBLKS; 176 } 177 kprintf("HAMMER_RECOVER DONE %d:%d buffers=%d records=%d\n", 178 cluster->volume->vol_no, cluster->clu_no, 179 buffer_count, record_count); 180 181 /* 182 * Validate the parent cluster pointer. XXX 183 */ 184 185 /* 186 * On successful recovery mark the cluster validated. 187 */ 188 cluster->io.validated = 1; 189 return(0); 190 } 191 192 /* 193 * This is used in the alist callback and must return a negative error 194 * code or a positive free block count. 195 */ 196 int 197 buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count) 198 { 199 hammer_cluster_t cluster; 200 hammer_record_ondisk_t rec; 201 hammer_buffer_t buffer; 202 int32_t buf_no; 203 int32_t rec_no; 204 int32_t rec_offset; 205 int32_t r; 206 int error; 207 int xcount; 208 209 /* 210 * Extract cluster and buffer number to recover 211 */ 212 cluster = info; 213 buf_no = blk / HAMMER_FSBUF_MAXBLKS; 214 215 kprintf("(%d)", buf_no); 216 buffer = hammer_get_buffer(cluster, buf_no, 0, &error); 217 if (error) { 218 /* 219 * If we are unable to access the buffer leave it in a 220 * reserved state on the master alist. 221 */ 222 kprintf("hammer_recover_buffer_stage1: error " 223 "recovering %d:%d:%d\n", 224 cluster->volume->vol_no, cluster->clu_no, buf_no); 225 r = hammer_alist_alloc_fwd(&cluster->alist_master, 1, buf_no); 226 KKASSERT(r == buf_no); 227 return(-error); 228 } 229 KKASSERT(buffer->buf_type == HAMMER_FSBUF_RECORDS); 230 231 /* 232 * If the buffer contains no allocated records tell our parent to 233 * mark it as all-allocated/uninitialized and do not reserve it 234 * in the master list. 235 */ 236 if (hammer_alist_find(&buffer->alist, 0, HAMMER_RECORD_NODES, 0) == 237 HAMMER_ALIST_BLOCK_NONE) { 238 kprintf("GENERAL RECOVERY BUFFER %d\n", 239 blk / HAMMER_FSBUF_MAXBLKS); 240 hammer_rel_buffer(buffer, 0); 241 return(-EDOM); 242 } 243 244 245 /* 246 * Mark the buffer as allocated in the cluster's master A-list. 247 */ 248 r = hammer_alist_alloc_fwd(&cluster->alist_master, 1, buf_no); 249 KKASSERT(r == buf_no); 250 ++cluster->ondisk->stat_rec_bufs; 251 252 kprintf("recover buffer1 %d:%d:%d cluster_free %d\n", 253 cluster->volume->vol_no, 254 cluster->clu_no, buf_no, 255 cluster->alist_master.meta->bm_alist_freeblks); 256 257 /* 258 * Recover the buffer, scan and validate allocated records. Records 259 * which cannot be recovered are freed. 260 * 261 * The parent a-list must be properly adjusted so don't just call 262 * hammer_alist_recover() on the underlying buffer. Go through the 263 * parent. 264 */ 265 hammer_modify_buffer(buffer); 266 count = hammer_alist_recover(&buffer->alist, 0, 0, HAMMER_RECORD_NODES); 267 xcount = 0; 268 kprintf("hammer_recover_buffer count1 %d/%d\n", 269 HAMMER_RECORD_NODES - count, HAMMER_RECORD_NODES); 270 rec_no = 0; 271 for (;;) { 272 rec_no = hammer_alist_find(&buffer->alist, rec_no, 273 HAMMER_RECORD_NODES, 0); 274 if (rec_no == HAMMER_ALIST_BLOCK_NONE) 275 break; 276 #if 0 277 kprintf("recover record %d:%d:%d %d\n", 278 cluster->volume->vol_no, 279 cluster->clu_no, buf_no, rec_no); 280 #endif 281 rec_offset = offsetof(union hammer_fsbuf_ondisk, 282 record.recs[rec_no]); 283 rec_offset += buf_no * HAMMER_BUFSIZE; 284 rec = &buffer->ondisk->record.recs[rec_no]; 285 error = hammer_recover_record(cluster, buffer, rec_offset, rec); 286 if (error) { 287 kprintf("hammer_recover_record: failed %d:%d@%d\n", 288 cluster->clu_no, buffer->buf_no, rec_offset); 289 hammer_alist_free(&buffer->alist, rec_no, 1); 290 if (hammer_debug_recover_faults) 291 Debugger("FAILED"); 292 ++count; /* free count */ 293 --xcount; 294 } 295 ++rec_no; 296 ++xcount; 297 } 298 kprintf("hammer_recover_buffer count2 %d/%d/%d\n", 299 HAMMER_RECORD_NODES - count, xcount, HAMMER_RECORD_NODES); 300 KKASSERT(HAMMER_RECORD_NODES - count == xcount); 301 hammer_rel_buffer(buffer, 0); 302 return(count); 303 } 304 305 /* 306 * Recover a record, at least into a state that doesn't blow up the 307 * filesystem. Returns 0 on success, non-zero if the record is 308 * unrecoverable. 309 */ 310 static int 311 hammer_recover_record(hammer_cluster_t cluster, hammer_buffer_t buffer, 312 int32_t rec_offset, hammer_record_ondisk_t rec) 313 { 314 hammer_buffer_t dbuf; 315 u_int64_t syncid = cluster->ondisk->synchronized_rec_id; 316 int32_t data_offset; 317 int32_t data_len; 318 int32_t nblks; 319 int32_t dbuf_no; 320 int32_t dblk_no; 321 int32_t base_blk; 322 int32_t r; 323 int error = 0; 324 325 /* 326 * We have to discard any records with rec_id's greater then the 327 * last sync of the cluster header (which guarenteed all related 328 * buffers had been synced). Otherwise the record may reference 329 * information that was never synced to disk. 330 */ 331 if (rec->base.rec_id >= syncid) { 332 kprintf("recover record: syncid too large %016llx/%016llx\n", 333 rec->base.rec_id, syncid); 334 if (hammer_debug_recover_faults) 335 Debugger("DebugSyncid"); 336 return(EINVAL); 337 } 338 339 #if 0 340 /* XXX undo incomplete deletions */ 341 if (rec->base.base.delete_tid > syncid) 342 rec->base.base.delete_tid = 0; 343 #endif 344 345 /* 346 * Validate the record's B-Tree key 347 */ 348 KKASSERT(rec->base.base.rec_type != 0); 349 if (rec->base.base.rec_type != HAMMER_RECTYPE_CLUSTER) { 350 if (hammer_btree_cmp(&rec->base.base, 351 &cluster->ondisk->clu_btree_beg) < 0) { 352 kprintf("recover record: range low\n"); 353 Debugger("RANGE LOW"); 354 return(EINVAL); 355 } 356 if (hammer_btree_cmp(&rec->base.base, 357 &cluster->ondisk->clu_btree_end) >= 0) { 358 kprintf("recover record: range high\n"); 359 Debugger("RANGE HIGH"); 360 return(EINVAL); 361 } 362 } 363 364 /* 365 * Validate the record's data. If the offset is 0 there is no data 366 * (or it is zero-fill) and we can return success immediately. 367 * Otherwise make sure everything is ok. 368 */ 369 data_offset = rec->base.data_offset; 370 data_len = rec->base.data_len; 371 372 if (data_len == 0) 373 rec->base.data_offset = data_offset = 0; 374 if (data_offset == 0) 375 goto done; 376 377 /* 378 * Non-zero data offset, recover the data 379 */ 380 if (data_offset < HAMMER_BUFSIZE || 381 data_offset >= cluster->ondisk->clu_limit || 382 data_len < 0 || data_len > HAMMER_MAXDATA || 383 data_offset + data_len > cluster->ondisk->clu_limit) { 384 kprintf("recover record: bad offset/len %d/%d\n", 385 data_offset, data_len); 386 Debugger("BAD OFFSET"); 387 return(EINVAL); 388 } 389 390 /* 391 * Check data_offset relative to rec_offset 392 */ 393 if (data_offset < rec_offset && data_offset + data_len > rec_offset) { 394 kprintf("recover record: bad offset: overlapping1\n"); 395 Debugger("BAD OFFSET - OVERLAP1"); 396 return(EINVAL); 397 } 398 if (data_offset >= rec_offset && 399 data_offset < rec_offset + sizeof(struct hammer_base_record)) { 400 kprintf("recover record: bad offset: overlapping2\n"); 401 Debugger("BAD OFFSET - OVERLAP2"); 402 return(EINVAL); 403 } 404 405 /* 406 * Check for data embedded in the record 407 */ 408 if (data_offset >= rec_offset && 409 data_offset < rec_offset + HAMMER_RECORD_SIZE) { 410 if (data_offset + data_len > rec_offset + HAMMER_RECORD_SIZE) { 411 kprintf("recover record: bad offset: overlapping3\n"); 412 Debugger("BAD OFFSET - OVERLAP3"); 413 return(EINVAL); 414 } 415 goto done; 416 } 417 418 KKASSERT(cluster->io.modified); 419 /* 420 * Recover the allocated data either out of the cluster's master alist 421 * or as a buffer sub-allocation. 422 */ 423 if ((data_len & HAMMER_BUFMASK) == 0) { 424 if (data_offset & HAMMER_BUFMASK) { 425 kprintf("recover record: bad offset: unaligned\n"); 426 Debugger("BAD OFFSET - UNALIGNED"); 427 return(EINVAL); 428 } 429 nblks = data_len / HAMMER_BUFSIZE; 430 dbuf_no = data_offset / HAMMER_BUFSIZE; 431 /* XXX power-of-2 check data_len */ 432 433 r = hammer_alist_alloc_fwd(&cluster->alist_master, 434 nblks, dbuf_no); 435 if (r == HAMMER_ALIST_BLOCK_NONE) { 436 kprintf("recover record: cannot recover offset1\n"); 437 Debugger("CANNOT ALLOC DATABUFFER"); 438 return(EINVAL); 439 } 440 if (r != dbuf_no) { 441 kprintf("recover record: cannot recover offset2\n"); 442 hammer_alist_free(&cluster->alist_master, r, nblks); 443 KKASSERT(0); 444 return(EINVAL); 445 } 446 ++cluster->ondisk->stat_data_bufs; 447 } else { 448 if ((data_offset & ~HAMMER_BUFMASK) != 449 ((data_offset + data_len - 1) & ~HAMMER_BUFMASK)) { 450 kprintf("recover record: overlaps multiple bufs\n"); 451 Debugger("OVERLAP MULT"); 452 return(EINVAL); 453 } 454 if ((data_offset & HAMMER_BUFMASK) < 455 sizeof(struct hammer_fsbuf_head)) { 456 kprintf("recover record: data in header area\n"); 457 Debugger("DATA IN HEADER AREA"); 458 return(EINVAL); 459 } 460 if (data_offset & HAMMER_DATA_BLKMASK) { 461 kprintf("recover record: data blk unaligned\n"); 462 Debugger("DATA BLK UNALIGNED"); 463 return(EINVAL); 464 } 465 466 /* 467 * Ok, recover the space in the data buffer. 468 */ 469 dbuf_no = data_offset / HAMMER_BUFSIZE; 470 r = hammer_alist_alloc_fwd(&cluster->alist_master, 1, dbuf_no); 471 if (r != dbuf_no && r != HAMMER_ALIST_BLOCK_NONE) 472 hammer_alist_free(&cluster->alist_master, r, 1); 473 if (r == dbuf_no) { 474 /* 475 * This is the first time we've tried to recover 476 * data in this data buffer, reinit it (but don't 477 * zero it out, obviously). 478 * 479 * Calling initbuffer marks the data blocks within 480 * the buffer as being all-allocated. We have to 481 * mark it free. 482 */ 483 dbuf = hammer_get_buffer(cluster, dbuf_no, 484 0, &error); 485 if (error == 0) { 486 KKASSERT(dbuf->buf_type == HAMMER_FSBUF_DATA); 487 hammer_modify_buffer(dbuf); 488 hammer_initbuffer(&dbuf->alist, 489 &dbuf->ondisk->head, 490 HAMMER_FSBUF_DATA); 491 /*dbuf->buf_type = HAMMER_FSBUF_DATA;*/ 492 base_blk = dbuf_no * HAMMER_FSBUF_MAXBLKS; 493 hammer_alist_free(&cluster->alist_mdata, 494 base_blk, 495 HAMMER_DATA_NODES); 496 kprintf("FREE DATA %d/%d\n", base_blk, HAMMER_DATA_NODES); 497 ++cluster->ondisk->stat_data_bufs; 498 } 499 } else { 500 /* 501 * We've seen this data buffer before. 502 */ 503 dbuf = hammer_get_buffer(cluster, dbuf_no, 504 0, &error); 505 } 506 if (error) { 507 kprintf("recover record: data: getbuf failed\n"); 508 KKASSERT(0); 509 return(EINVAL); 510 } 511 512 if (dbuf->buf_type != HAMMER_FSBUF_DATA) { 513 hammer_rel_buffer(dbuf, 0); 514 kprintf("recover record: data: wrong buffer type\n"); 515 KKASSERT(0); 516 return(EINVAL); 517 } 518 519 /* 520 * Figure out the data block number and number of blocks. 521 */ 522 nblks = (data_len + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK; 523 nblks /= HAMMER_DATA_BLKSIZE; 524 dblk_no = ((data_offset & HAMMER_BUFMASK) - offsetof(union hammer_fsbuf_ondisk, data.data)) / HAMMER_DATA_BLKSIZE; 525 if ((data_offset & HAMMER_BUFMASK) != offsetof(union hammer_fsbuf_ondisk, data.data[dblk_no])) { 526 kprintf("dblk_no %d does not match data_offset %d/%d\n", 527 dblk_no, 528 offsetof(union hammer_fsbuf_ondisk, data.data[dblk_no]), 529 (data_offset & HAMMER_BUFMASK)); 530 hammer_rel_buffer(dbuf, 0); 531 kprintf("recover record: data: not block aligned\n"); 532 Debugger("bad data"); 533 return(EINVAL); 534 } 535 hammer_modify_buffer(dbuf); 536 dblk_no += dbuf_no * HAMMER_FSBUF_MAXBLKS; 537 r = hammer_alist_alloc_fwd(&cluster->alist_mdata, nblks, dblk_no); 538 if (r != dblk_no) { 539 if (r != HAMMER_ALIST_BLOCK_NONE) 540 hammer_alist_free(&cluster->alist_mdata, r, nblks); 541 hammer_rel_buffer(dbuf, 0); 542 kprintf("recover record: data: unable to realloc dbuf %d dblk %d\n", dbuf_no, dblk_no % HAMMER_FSBUF_MAXBLKS); 543 KKASSERT(0); 544 return(EINVAL); 545 } 546 hammer_rel_buffer(dbuf, 0); 547 } 548 done: 549 return(0); 550 } 551 552 /* 553 * Rebuild the B-Tree for the records residing in the specified buffer. 554 * 555 * Return the number of records recovered. 556 */ 557 static int 558 hammer_recover_buffer_stage2(hammer_cluster_t cluster, int32_t buf_no) 559 { 560 hammer_record_ondisk_t rec; 561 hammer_buffer_t buffer; 562 int32_t rec_no; 563 int32_t rec_offset; 564 int record_count = 0; 565 int error; 566 567 buffer = hammer_get_buffer(cluster, buf_no, 0, &error); 568 if (error) { 569 /* 570 * If we are unable to access the buffer leave it in a 571 * reserved state on the master alist. 572 */ 573 kprintf("hammer_recover_buffer_stage2: error " 574 "recovering %d:%d:%d\n", 575 cluster->volume->vol_no, cluster->clu_no, buf_no); 576 Debugger("RECOVER BUFFER STAGE2 FAIL"); 577 return(0); 578 } 579 580 /* 581 * Recover the buffer, scan and validate allocated records. Records 582 * which cannot be recovered are freed. 583 */ 584 rec_no = 0; 585 for (;;) { 586 rec_no = hammer_alist_find(&buffer->alist, rec_no, 587 HAMMER_RECORD_NODES, 0); 588 if (rec_no == HAMMER_ALIST_BLOCK_NONE) 589 break; 590 rec_offset = offsetof(union hammer_fsbuf_ondisk, 591 record.recs[rec_no]); 592 rec_offset += buf_no * HAMMER_BUFSIZE; 593 rec = &buffer->ondisk->record.recs[rec_no]; 594 error = hammer_recover_btree(cluster, buffer, rec_offset, rec); 595 if (error) { 596 kprintf("hammer_recover_btree: failed %d:%d@%08x " 597 "error %d buffer %p rec %p rec_no %d " 598 " cluster_free %d\n", 599 cluster->clu_no, buffer->buf_no, rec_offset, 600 error, buffer, rec, rec_no, 601 cluster->alist_master.meta->bm_alist_freeblks 602 ); 603 Debugger("recover_btree failed"); 604 /* XXX free the record and its data? */ 605 /*hammer_alist_free(&buffer->alist, rec_no, 1);*/ 606 } else { 607 ++record_count; 608 } 609 ++rec_no; 610 } 611 hammer_rel_buffer(buffer, 0); 612 return(record_count); 613 } 614 615 /* 616 * Enter a single record into the B-Tree. 617 */ 618 static int 619 hammer_recover_btree(hammer_cluster_t cluster, hammer_buffer_t buffer, 620 int32_t rec_offset, hammer_record_ondisk_t rec) 621 { 622 struct hammer_cursor cursor; 623 union hammer_btree_elm elm; 624 hammer_cluster_t ncluster; 625 int error = 0; 626 627 /* 628 * Check for a spike record. When spiking into a new cluster do 629 * NOT allow a recursive recovery to occur. We use a lot of 630 * stack and the only thing we actually modify in the target 631 * cluster is its parent pointer. 632 */ 633 if (rec->base.base.rec_type == HAMMER_RECTYPE_CLUSTER) { 634 hammer_volume_t ovolume = cluster->volume; 635 hammer_volume_t nvolume; 636 637 nvolume = hammer_get_volume(ovolume->hmp, rec->spike.vol_no, 638 &error); 639 if (error) { 640 Debugger("recover_btree1"); 641 return(error); 642 } 643 ncluster = hammer_get_cluster(nvolume, rec->spike.clu_no, 644 &error, GET_CLUSTER_NORECOVER); 645 hammer_rel_volume(nvolume, 0); 646 if (error) { 647 Debugger("recover_btree2"); 648 return(error); 649 } 650 651 /* 652 * Validate the cluster. Allow the offset to be fixed up. 653 */ 654 if (ncluster->ondisk->clu_btree_parent_vol_no != ovolume->vol_no || 655 ncluster->ondisk->clu_btree_parent_clu_no != cluster->clu_no) { 656 kprintf("hammer_recover: Bad cluster spike hookup: " 657 "%d:%d != %d:%d\n", 658 ncluster->ondisk->clu_btree_parent_vol_no, 659 ncluster->ondisk->clu_btree_parent_clu_no, 660 ovolume->vol_no, 661 cluster->clu_no); 662 error = EINVAL; 663 hammer_rel_cluster(ncluster, 0); 664 Debugger("recover_btree3"); 665 return(error); 666 } 667 } else { 668 ncluster = NULL; 669 } 670 671 /* 672 * Locate the insertion point. Note that we are using the cluster- 673 * localized cursor init so parent will start out NULL. 674 * 675 * The key(s) used for spike's are bounds and different from the 676 * key embedded in the spike record. A special B-Tree insertion 677 * call is made to deal with spikes. 678 */ 679 error = hammer_init_cursor_cluster(&cursor, cluster); 680 if (error) { 681 Debugger("recover_btree6"); 682 goto failed; 683 } 684 KKASSERT(cursor.node); 685 if (ncluster) 686 cursor.key_beg = ncluster->ondisk->clu_btree_beg; 687 else 688 cursor.key_beg = rec->base.base; 689 cursor.flags |= HAMMER_CURSOR_INSERT | HAMMER_CURSOR_RECOVER; 690 691 error = hammer_btree_lookup(&cursor); 692 KKASSERT(error != EDEADLK); 693 KKASSERT(cursor.node); 694 if (error == 0) { 695 kprintf("hammer_recover_btree: Duplicate record cursor %p rec %p ncluster %p\n", 696 &cursor, rec, ncluster); 697 hammer_print_btree_elm(&cursor.node->ondisk->elms[cursor.index], HAMMER_BTREE_TYPE_LEAF, cursor.index); 698 Debugger("duplicate record"); 699 } 700 if (error != ENOENT) { 701 Debugger("recover_btree5"); 702 goto failed; 703 } 704 705 706 if (ncluster) { 707 /* 708 * Spike record 709 */ 710 kprintf("recover spike clu %d %016llx-%016llx clusterfree %d\n", 711 ncluster->clu_no, 712 ncluster->ondisk->clu_btree_beg.obj_id, 713 ncluster->ondisk->clu_btree_end.obj_id, 714 cluster->alist_master.meta->bm_alist_freeblks); 715 error = hammer_btree_insert_cluster(&cursor, ncluster, 716 rec_offset); 717 kprintf("recover spike record error %d clusterfree %d\n", 718 error, 719 cluster->alist_master.meta->bm_alist_freeblks); 720 KKASSERT(error != EDEADLK); 721 if (error) 722 Debugger("spike recovery"); 723 } else { 724 /* 725 * Normal record 726 */ 727 #if 0 728 kprintf("recover recrd clu %d %016llx\n", 729 cluster->clu_no, rec->base.base.obj_id); 730 #endif 731 elm.leaf.base = rec->base.base; 732 elm.leaf.rec_offset = rec_offset; 733 elm.leaf.data_offset = rec->base.data_offset; 734 elm.leaf.data_len = rec->base.data_len; 735 elm.leaf.data_crc = rec->base.data_crc; 736 737 error = hammer_btree_insert(&cursor, &elm); 738 KKASSERT(error != EDEADLK); 739 } 740 741 /* 742 * Success if error is 0! 743 */ 744 if (error == 0) { 745 /* 746 * Update the cluster header's statistics count. stat_records 747 * is very important for proper reservation of B-Tree space. 748 * Note that a spike record counts as 2. 749 */ 750 ++cluster->ondisk->stat_records; 751 if (rec->base.base.rec_type == HAMMER_RECTYPE_INODE) 752 ++cluster->ondisk->stat_inodes; 753 if (rec->base.base.rec_type == HAMMER_RECTYPE_CLUSTER) 754 ++cluster->ondisk->stat_records; 755 } 756 if (error) { 757 kprintf("hammer_recover_btree: insertion failed\n"); 758 } 759 760 failed: 761 if (ncluster) 762 hammer_rel_cluster(ncluster, 0); 763 hammer_done_cursor(&cursor); 764 return(error); 765 } 766 767 #endif 768