1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/systm.h> 30*0Sstevel@tonic-gate #include <sys/types.h> 31*0Sstevel@tonic-gate #include <sys/vnode.h> 32*0Sstevel@tonic-gate #include <sys/errno.h> 33*0Sstevel@tonic-gate #include <sys/sysmacros.h> 34*0Sstevel@tonic-gate #include <sys/debug.h> 35*0Sstevel@tonic-gate #include <sys/kmem.h> 36*0Sstevel@tonic-gate #include <sys/conf.h> 37*0Sstevel@tonic-gate #include <sys/proc.h> 38*0Sstevel@tonic-gate #include <sys/cmn_err.h> 39*0Sstevel@tonic-gate #include <sys/fssnap_if.h> 40*0Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 41*0Sstevel@tonic-gate #include <sys/fs/ufs_filio.h> 42*0Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 43*0Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 44*0Sstevel@tonic-gate #include <sys/inttypes.h> 45*0Sstevel@tonic-gate #include <sys/callb.h> 46*0Sstevel@tonic-gate #include <sys/tnf_probe.h> 47*0Sstevel@tonic-gate 48*0Sstevel@tonic-gate /* 49*0Sstevel@tonic-gate * Kernel threads for logging 50*0Sstevel@tonic-gate * Currently only one for rolling the log (one per log). 51*0Sstevel@tonic-gate */ 52*0Sstevel@tonic-gate 53*0Sstevel@tonic-gate #define LUFS_DEFAULT_NUM_ROLL_BUFS 16 54*0Sstevel@tonic-gate #define LUFS_DEFAULT_MIN_ROLL_BUFS 4 55*0Sstevel@tonic-gate #define LUFS_DEFAULT_MAX_ROLL_BUFS 64 56*0Sstevel@tonic-gate 57*0Sstevel@tonic-gate /* 58*0Sstevel@tonic-gate * Macros 59*0Sstevel@tonic-gate */ 60*0Sstevel@tonic-gate #define logmap_need_roll(logmap) ((logmap)->mtm_nme > logmap_maxnme) 61*0Sstevel@tonic-gate #define ldl_empty(ul) ((ul)->un_head_lof == (ul)->un_tail_lof) 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate /* 64*0Sstevel@tonic-gate * Tunables 65*0Sstevel@tonic-gate */ 66*0Sstevel@tonic-gate uint32_t lufs_num_roll_bufs = LUFS_DEFAULT_NUM_ROLL_BUFS; 67*0Sstevel@tonic-gate uint32_t lufs_min_roll_bufs = LUFS_DEFAULT_MIN_ROLL_BUFS; 68*0Sstevel@tonic-gate uint32_t lufs_max_roll_bufs = LUFS_DEFAULT_MAX_ROLL_BUFS; 69*0Sstevel@tonic-gate long logmap_maxnme = 1536; 70*0Sstevel@tonic-gate int trans_roll_tics = 0; 71*0Sstevel@tonic-gate uint64_t trans_roll_new_delta = 0; 72*0Sstevel@tonic-gate uint64_t lrr_wait = 0; 73*0Sstevel@tonic-gate /* 74*0Sstevel@tonic-gate * Key for thread specific data for the roll thread to 75*0Sstevel@tonic-gate * bypass snapshot throttling 76*0Sstevel@tonic-gate */ 77*0Sstevel@tonic-gate uint_t bypass_snapshot_throttle_key; 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gate /* 80*0Sstevel@tonic-gate * externs 81*0Sstevel@tonic-gate */ 82*0Sstevel@tonic-gate extern kmutex_t ml_scan; 83*0Sstevel@tonic-gate extern kcondvar_t ml_scan_cv; 84*0Sstevel@tonic-gate extern int maxphys; 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate static void 87*0Sstevel@tonic-gate trans_roll_wait(mt_map_t *logmap, callb_cpr_t *cprinfop) 88*0Sstevel@tonic-gate { 89*0Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 90*0Sstevel@tonic-gate logmap->mtm_ref = 0; 91*0Sstevel@tonic-gate if (logmap->mtm_flags & MTM_FORCE_ROLL) { 92*0Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 93*0Sstevel@tonic-gate } 94*0Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLLING); 95*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(cprinfop); 96*0Sstevel@tonic-gate (void) cv_timedwait(&logmap->mtm_to_roll_cv, &logmap->mtm_mutex, 97*0Sstevel@tonic-gate lbolt + trans_roll_tics); 98*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(cprinfop, &logmap->mtm_mutex); 99*0Sstevel@tonic-gate logmap->mtm_flags |= MTM_ROLLING; 100*0Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 101*0Sstevel@tonic-gate } 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate /* 104*0Sstevel@tonic-gate * returns the number of 8K buffers to use for rolling the log 105*0Sstevel@tonic-gate */ 106*0Sstevel@tonic-gate static uint32_t 107*0Sstevel@tonic-gate log_roll_buffers() 108*0Sstevel@tonic-gate { 109*0Sstevel@tonic-gate /* 110*0Sstevel@tonic-gate * sanity validate the tunable lufs_num_roll_bufs 111*0Sstevel@tonic-gate */ 112*0Sstevel@tonic-gate if (lufs_num_roll_bufs < lufs_min_roll_bufs) { 113*0Sstevel@tonic-gate return (lufs_min_roll_bufs); 114*0Sstevel@tonic-gate } 115*0Sstevel@tonic-gate if (lufs_num_roll_bufs > lufs_max_roll_bufs) { 116*0Sstevel@tonic-gate return (lufs_max_roll_bufs); 117*0Sstevel@tonic-gate } 118*0Sstevel@tonic-gate return (lufs_num_roll_bufs); 119*0Sstevel@tonic-gate } 120*0Sstevel@tonic-gate 121*0Sstevel@tonic-gate /* 122*0Sstevel@tonic-gate * Find something to roll, then if we don't have cached roll buffers 123*0Sstevel@tonic-gate * covering all the deltas in that MAPBLOCK then read the master 124*0Sstevel@tonic-gate * and overlay the deltas. 125*0Sstevel@tonic-gate * returns; 126*0Sstevel@tonic-gate * 0 if sucessful 127*0Sstevel@tonic-gate * 1 on finding nothing to roll 128*0Sstevel@tonic-gate * 2 on error 129*0Sstevel@tonic-gate */ 130*0Sstevel@tonic-gate int 131*0Sstevel@tonic-gate log_roll_read(ml_unit_t *ul, rollbuf_t *rbs, int nmblk, caddr_t roll_bufs, 132*0Sstevel@tonic-gate int *retnbuf) 133*0Sstevel@tonic-gate { 134*0Sstevel@tonic-gate offset_t mof; 135*0Sstevel@tonic-gate buf_t *bp; 136*0Sstevel@tonic-gate rollbuf_t *rbp; 137*0Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 138*0Sstevel@tonic-gate daddr_t mblkno; 139*0Sstevel@tonic-gate int i; 140*0Sstevel@tonic-gate int error; 141*0Sstevel@tonic-gate int nbuf; 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate /* 144*0Sstevel@tonic-gate * Make sure there is really something to roll 145*0Sstevel@tonic-gate */ 146*0Sstevel@tonic-gate mof = 0; 147*0Sstevel@tonic-gate if (!logmap_next_roll(logmap, &mof)) { 148*0Sstevel@tonic-gate return (1); 149*0Sstevel@tonic-gate } 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate /* 152*0Sstevel@tonic-gate * build some master blocks + deltas to roll forward 153*0Sstevel@tonic-gate */ 154*0Sstevel@tonic-gate rw_enter(&logmap->mtm_rwlock, RW_READER); 155*0Sstevel@tonic-gate nbuf = 0; 156*0Sstevel@tonic-gate do { 157*0Sstevel@tonic-gate mof = mof & (offset_t)MAPBLOCKMASK; 158*0Sstevel@tonic-gate mblkno = lbtodb(mof); 159*0Sstevel@tonic-gate 160*0Sstevel@tonic-gate /* 161*0Sstevel@tonic-gate * Check for the case of a new delta to a set up buffer 162*0Sstevel@tonic-gate */ 163*0Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 164*0Sstevel@tonic-gate if (P2ALIGN(rbp->rb_bh.b_blkno, 165*0Sstevel@tonic-gate MAPBLOCKSIZE / DEV_BSIZE) == mblkno) { 166*0Sstevel@tonic-gate TNF_PROBE_0(trans_roll_new_delta, "lufs", 167*0Sstevel@tonic-gate /* CSTYLED */); 168*0Sstevel@tonic-gate trans_roll_new_delta++; 169*0Sstevel@tonic-gate /* Flush out the current set of buffers */ 170*0Sstevel@tonic-gate goto flush_bufs; 171*0Sstevel@tonic-gate } 172*0Sstevel@tonic-gate } 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate /* 175*0Sstevel@tonic-gate * Work out what to roll next. If it isn't cached then read 176*0Sstevel@tonic-gate * it asynchronously from the master. 177*0Sstevel@tonic-gate */ 178*0Sstevel@tonic-gate bp = &rbp->rb_bh; 179*0Sstevel@tonic-gate bp->b_blkno = mblkno; 180*0Sstevel@tonic-gate bp->b_flags = B_READ; 181*0Sstevel@tonic-gate bp->b_un.b_addr = roll_bufs + (nbuf << MAPBLOCKSHIFT); 182*0Sstevel@tonic-gate bp->b_bufsize = MAPBLOCKSIZE; 183*0Sstevel@tonic-gate if (top_read_roll(rbp, ul)) { 184*0Sstevel@tonic-gate /* logmap deltas were in use */ 185*0Sstevel@tonic-gate if (nbuf == 0) { 186*0Sstevel@tonic-gate /* 187*0Sstevel@tonic-gate * On first buffer wait for the logmap user 188*0Sstevel@tonic-gate * to finish by grabbing the logmap lock 189*0Sstevel@tonic-gate * exclusively rather than spinning 190*0Sstevel@tonic-gate */ 191*0Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 192*0Sstevel@tonic-gate lrr_wait++; 193*0Sstevel@tonic-gate rw_enter(&logmap->mtm_rwlock, RW_WRITER); 194*0Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 195*0Sstevel@tonic-gate return (1); 196*0Sstevel@tonic-gate } 197*0Sstevel@tonic-gate /* we have at least one buffer - flush it */ 198*0Sstevel@tonic-gate goto flush_bufs; 199*0Sstevel@tonic-gate } 200*0Sstevel@tonic-gate if ((bp->b_flags & B_INVAL) == 0) { 201*0Sstevel@tonic-gate nbuf++; 202*0Sstevel@tonic-gate } 203*0Sstevel@tonic-gate mof += MAPBLOCKSIZE; 204*0Sstevel@tonic-gate } while ((nbuf < nmblk) && logmap_next_roll(logmap, &mof)); 205*0Sstevel@tonic-gate 206*0Sstevel@tonic-gate /* 207*0Sstevel@tonic-gate * If there was nothing to roll cycle back 208*0Sstevel@tonic-gate */ 209*0Sstevel@tonic-gate if (nbuf == 0) { 210*0Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 211*0Sstevel@tonic-gate return (1); 212*0Sstevel@tonic-gate } 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate flush_bufs: 215*0Sstevel@tonic-gate /* 216*0Sstevel@tonic-gate * For each buffer, if it isn't cached then wait for the read to 217*0Sstevel@tonic-gate * finish and overlay the deltas. 218*0Sstevel@tonic-gate */ 219*0Sstevel@tonic-gate for (error = 0, i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 220*0Sstevel@tonic-gate if (!rbp->rb_crb) { 221*0Sstevel@tonic-gate bp = &rbp->rb_bh; 222*0Sstevel@tonic-gate if (trans_not_wait(bp)) { 223*0Sstevel@tonic-gate ldl_seterror(ul, 224*0Sstevel@tonic-gate "Error reading master during ufs log roll"); 225*0Sstevel@tonic-gate error = 1; 226*0Sstevel@tonic-gate } 227*0Sstevel@tonic-gate /* 228*0Sstevel@tonic-gate * sync read the data from the log 229*0Sstevel@tonic-gate */ 230*0Sstevel@tonic-gate if (ldl_read(ul, bp->b_un.b_addr, 231*0Sstevel@tonic-gate ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK, 232*0Sstevel@tonic-gate MAPBLOCKSIZE, rbp->rb_age)) { 233*0Sstevel@tonic-gate error = 1; 234*0Sstevel@tonic-gate } 235*0Sstevel@tonic-gate } 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate /* 238*0Sstevel@tonic-gate * reset the age bit in the age list 239*0Sstevel@tonic-gate */ 240*0Sstevel@tonic-gate logmap_list_put_roll(logmap, rbp->rb_age); 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) { 243*0Sstevel@tonic-gate error = 1; 244*0Sstevel@tonic-gate } 245*0Sstevel@tonic-gate } 246*0Sstevel@tonic-gate rw_exit(&logmap->mtm_rwlock); 247*0Sstevel@tonic-gate if (error) 248*0Sstevel@tonic-gate return (2); 249*0Sstevel@tonic-gate *retnbuf = nbuf; 250*0Sstevel@tonic-gate return (0); 251*0Sstevel@tonic-gate } 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate /* 254*0Sstevel@tonic-gate * Write out a cached roll buffer 255*0Sstevel@tonic-gate */ 256*0Sstevel@tonic-gate void 257*0Sstevel@tonic-gate log_roll_write_crb(ufsvfs_t *ufsvfsp, rollbuf_t *rbp) 258*0Sstevel@tonic-gate { 259*0Sstevel@tonic-gate crb_t *crb = rbp->rb_crb; 260*0Sstevel@tonic-gate buf_t *bp = &rbp->rb_bh; 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gate bp->b_blkno = lbtodb(crb->c_mof); 263*0Sstevel@tonic-gate bp->b_un.b_addr = crb->c_buf; 264*0Sstevel@tonic-gate bp->b_bcount = crb->c_nb; 265*0Sstevel@tonic-gate bp->b_bufsize = crb->c_nb; 266*0Sstevel@tonic-gate ASSERT((crb->c_nb & DEV_BMASK) == 0); 267*0Sstevel@tonic-gate bp->b_flags = B_WRITE; 268*0Sstevel@tonic-gate logstats.ls_rwrites.value.ui64++; 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* if snapshots are enabled, call it */ 271*0Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) { 272*0Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 273*0Sstevel@tonic-gate } else { 274*0Sstevel@tonic-gate (void) bdev_strategy(bp); 275*0Sstevel@tonic-gate } 276*0Sstevel@tonic-gate } 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate /* 279*0Sstevel@tonic-gate * Write out a set of non cached roll buffers 280*0Sstevel@tonic-gate */ 281*0Sstevel@tonic-gate void 282*0Sstevel@tonic-gate log_roll_write_bufs(ufsvfs_t *ufsvfsp, rollbuf_t *rbp) 283*0Sstevel@tonic-gate { 284*0Sstevel@tonic-gate buf_t *bp = &rbp->rb_bh; 285*0Sstevel@tonic-gate buf_t *bp2; 286*0Sstevel@tonic-gate rbsecmap_t secmap = rbp->rb_secmap; 287*0Sstevel@tonic-gate int j, k; 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate ASSERT(secmap); 290*0Sstevel@tonic-gate ASSERT((bp->b_flags & B_INVAL) == 0); 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate do { /* for each contiguous block of sectors */ 293*0Sstevel@tonic-gate /* find start of next sector to write */ 294*0Sstevel@tonic-gate for (j = 0; j < 16; ++j) { 295*0Sstevel@tonic-gate if (secmap & UINT16_C(1)) 296*0Sstevel@tonic-gate break; 297*0Sstevel@tonic-gate secmap >>= 1; 298*0Sstevel@tonic-gate } 299*0Sstevel@tonic-gate bp->b_un.b_addr += (j << DEV_BSHIFT); 300*0Sstevel@tonic-gate bp->b_blkno += j; 301*0Sstevel@tonic-gate 302*0Sstevel@tonic-gate /* calculate number of sectors */ 303*0Sstevel@tonic-gate secmap >>= 1; 304*0Sstevel@tonic-gate j++; 305*0Sstevel@tonic-gate for (k = 1; j < 16; ++j) { 306*0Sstevel@tonic-gate if ((secmap & UINT16_C(1)) == 0) 307*0Sstevel@tonic-gate break; 308*0Sstevel@tonic-gate secmap >>= 1; 309*0Sstevel@tonic-gate k++; 310*0Sstevel@tonic-gate } 311*0Sstevel@tonic-gate bp->b_bcount = k << DEV_BSHIFT; 312*0Sstevel@tonic-gate bp->b_flags = B_WRITE; 313*0Sstevel@tonic-gate logstats.ls_rwrites.value.ui64++; 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate /* if snapshots are enabled, call it */ 316*0Sstevel@tonic-gate if (ufsvfsp->vfs_snapshot) 317*0Sstevel@tonic-gate fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); 318*0Sstevel@tonic-gate else 319*0Sstevel@tonic-gate (void) bdev_strategy(bp); 320*0Sstevel@tonic-gate if (secmap) { 321*0Sstevel@tonic-gate /* 322*0Sstevel@tonic-gate * Allocate another buf_t to handle 323*0Sstevel@tonic-gate * the next write in this MAPBLOCK 324*0Sstevel@tonic-gate * Chain them via b_list. 325*0Sstevel@tonic-gate */ 326*0Sstevel@tonic-gate bp2 = kmem_alloc(sizeof (buf_t), KM_SLEEP); 327*0Sstevel@tonic-gate bp->b_list = bp2; 328*0Sstevel@tonic-gate bioinit(bp2); 329*0Sstevel@tonic-gate bp2->b_iodone = trans_not_done; 330*0Sstevel@tonic-gate bp2->b_bufsize = MAPBLOCKSIZE; 331*0Sstevel@tonic-gate bp2->b_edev = bp->b_edev; 332*0Sstevel@tonic-gate bp2->b_un.b_addr = 333*0Sstevel@tonic-gate bp->b_un.b_addr + bp->b_bcount; 334*0Sstevel@tonic-gate bp2->b_blkno = bp->b_blkno + k; 335*0Sstevel@tonic-gate bp = bp2; 336*0Sstevel@tonic-gate } 337*0Sstevel@tonic-gate } while (secmap); 338*0Sstevel@tonic-gate } 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate /* 341*0Sstevel@tonic-gate * Asynchronously roll the deltas, using the sector map 342*0Sstevel@tonic-gate * in each rollbuf_t. 343*0Sstevel@tonic-gate */ 344*0Sstevel@tonic-gate int 345*0Sstevel@tonic-gate log_roll_write(ml_unit_t *ul, rollbuf_t *rbs, int nbuf) 346*0Sstevel@tonic-gate { 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate ufsvfs_t *ufsvfsp = ul->un_ufsvfs; 349*0Sstevel@tonic-gate rollbuf_t *rbp; 350*0Sstevel@tonic-gate buf_t *bp, *bp2; 351*0Sstevel@tonic-gate rollbuf_t *head, *prev, *rbp2; 352*0Sstevel@tonic-gate 353*0Sstevel@tonic-gate /* 354*0Sstevel@tonic-gate * Order the buffers by blkno 355*0Sstevel@tonic-gate */ 356*0Sstevel@tonic-gate ASSERT(nbuf > 0); 357*0Sstevel@tonic-gate #ifdef lint 358*0Sstevel@tonic-gate prev = rbs; 359*0Sstevel@tonic-gate #endif 360*0Sstevel@tonic-gate for (head = rbs, rbp = rbs + 1; rbp < rbs + nbuf; rbp++) { 361*0Sstevel@tonic-gate for (rbp2 = head; rbp2; prev = rbp2, rbp2 = rbp2->rb_next) { 362*0Sstevel@tonic-gate if (rbp->rb_bh.b_blkno < rbp2->rb_bh.b_blkno) { 363*0Sstevel@tonic-gate if (rbp2 == head) { 364*0Sstevel@tonic-gate rbp->rb_next = head; 365*0Sstevel@tonic-gate head = rbp; 366*0Sstevel@tonic-gate } else { 367*0Sstevel@tonic-gate prev->rb_next = rbp; 368*0Sstevel@tonic-gate rbp->rb_next = rbp2; 369*0Sstevel@tonic-gate } 370*0Sstevel@tonic-gate break; 371*0Sstevel@tonic-gate } 372*0Sstevel@tonic-gate } 373*0Sstevel@tonic-gate if (rbp2 == NULL) { 374*0Sstevel@tonic-gate prev->rb_next = rbp; 375*0Sstevel@tonic-gate rbp->rb_next = NULL; 376*0Sstevel@tonic-gate } 377*0Sstevel@tonic-gate } 378*0Sstevel@tonic-gate 379*0Sstevel@tonic-gate /* 380*0Sstevel@tonic-gate * issue the in-order writes 381*0Sstevel@tonic-gate */ 382*0Sstevel@tonic-gate for (rbp = head; rbp; rbp = rbp2) { 383*0Sstevel@tonic-gate if (rbp->rb_crb) { 384*0Sstevel@tonic-gate log_roll_write_crb(ufsvfsp, rbp); 385*0Sstevel@tonic-gate } else { 386*0Sstevel@tonic-gate log_roll_write_bufs(ufsvfsp, rbp); 387*0Sstevel@tonic-gate } 388*0Sstevel@tonic-gate /* null out the rb_next link for next set of rolling */ 389*0Sstevel@tonic-gate rbp2 = rbp->rb_next; 390*0Sstevel@tonic-gate rbp->rb_next = NULL; 391*0Sstevel@tonic-gate } 392*0Sstevel@tonic-gate 393*0Sstevel@tonic-gate /* 394*0Sstevel@tonic-gate * wait for all the writes to finish 395*0Sstevel@tonic-gate */ 396*0Sstevel@tonic-gate for (rbp = rbs; rbp < rbs + nbuf; rbp++) { 397*0Sstevel@tonic-gate bp = &rbp->rb_bh; 398*0Sstevel@tonic-gate if (trans_not_wait(bp)) { 399*0Sstevel@tonic-gate ldl_seterror(ul, 400*0Sstevel@tonic-gate "Error writing master during ufs log roll"); 401*0Sstevel@tonic-gate } 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate /* 404*0Sstevel@tonic-gate * Now wait for all the "cloned" buffer writes (if any) 405*0Sstevel@tonic-gate * and free those headers 406*0Sstevel@tonic-gate */ 407*0Sstevel@tonic-gate bp2 = bp->b_list; 408*0Sstevel@tonic-gate bp->b_list = NULL; 409*0Sstevel@tonic-gate while (bp2) { 410*0Sstevel@tonic-gate if (trans_not_wait(bp2)) { 411*0Sstevel@tonic-gate ldl_seterror(ul, 412*0Sstevel@tonic-gate "Error writing master during ufs log roll"); 413*0Sstevel@tonic-gate } 414*0Sstevel@tonic-gate bp = bp2; 415*0Sstevel@tonic-gate bp2 = bp2->b_list; 416*0Sstevel@tonic-gate kmem_free(bp, sizeof (buf_t)); 417*0Sstevel@tonic-gate } 418*0Sstevel@tonic-gate } 419*0Sstevel@tonic-gate 420*0Sstevel@tonic-gate if (ul->un_flags & LDL_ERROR) 421*0Sstevel@tonic-gate return (1); 422*0Sstevel@tonic-gate return (0); 423*0Sstevel@tonic-gate } 424*0Sstevel@tonic-gate 425*0Sstevel@tonic-gate void 426*0Sstevel@tonic-gate trans_roll(ml_unit_t *ul) 427*0Sstevel@tonic-gate { 428*0Sstevel@tonic-gate callb_cpr_t cprinfo; 429*0Sstevel@tonic-gate mt_map_t *logmap = ul->un_logmap; 430*0Sstevel@tonic-gate rollbuf_t *rbs; 431*0Sstevel@tonic-gate rollbuf_t *rbp; 432*0Sstevel@tonic-gate buf_t *bp; 433*0Sstevel@tonic-gate caddr_t roll_bufs; 434*0Sstevel@tonic-gate uint32_t nmblk; 435*0Sstevel@tonic-gate int i; 436*0Sstevel@tonic-gate int doingforceroll; 437*0Sstevel@tonic-gate int nbuf; 438*0Sstevel@tonic-gate 439*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &logmap->mtm_mutex, callb_generic_cpr, 440*0Sstevel@tonic-gate "trans_roll"); 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate /* 443*0Sstevel@tonic-gate * We do not want the roll thread's writes to be 444*0Sstevel@tonic-gate * throttled by the snapshot. 445*0Sstevel@tonic-gate * If they are throttled then we can have a deadlock 446*0Sstevel@tonic-gate * between the roll thread and the snapshot taskq thread: 447*0Sstevel@tonic-gate * roll thread wants the throttling semaphore and 448*0Sstevel@tonic-gate * the snapshot taskq thread cannot release the semaphore 449*0Sstevel@tonic-gate * because it is writing to the log and the log is full. 450*0Sstevel@tonic-gate */ 451*0Sstevel@tonic-gate 452*0Sstevel@tonic-gate (void) tsd_set(bypass_snapshot_throttle_key, (void*)1); 453*0Sstevel@tonic-gate 454*0Sstevel@tonic-gate /* 455*0Sstevel@tonic-gate * setup some roll parameters 456*0Sstevel@tonic-gate */ 457*0Sstevel@tonic-gate if (trans_roll_tics == 0) 458*0Sstevel@tonic-gate trans_roll_tics = 5 * hz; 459*0Sstevel@tonic-gate nmblk = log_roll_buffers(); 460*0Sstevel@tonic-gate 461*0Sstevel@tonic-gate /* 462*0Sstevel@tonic-gate * allocate the buffers and buffer headers 463*0Sstevel@tonic-gate */ 464*0Sstevel@tonic-gate roll_bufs = kmem_alloc(nmblk * MAPBLOCKSIZE, KM_SLEEP); 465*0Sstevel@tonic-gate rbs = kmem_alloc(nmblk * sizeof (rollbuf_t), KM_SLEEP); 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate /* 468*0Sstevel@tonic-gate * initialize the buffer headers 469*0Sstevel@tonic-gate */ 470*0Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nmblk; ++i, ++rbp) { 471*0Sstevel@tonic-gate rbp->rb_next = NULL; 472*0Sstevel@tonic-gate bp = &rbp->rb_bh; 473*0Sstevel@tonic-gate bioinit(bp); 474*0Sstevel@tonic-gate bp->b_edev = ul->un_dev; 475*0Sstevel@tonic-gate bp->b_iodone = trans_not_done; 476*0Sstevel@tonic-gate bp->b_bufsize = MAPBLOCKSIZE; 477*0Sstevel@tonic-gate } 478*0Sstevel@tonic-gate 479*0Sstevel@tonic-gate doingforceroll = 0; 480*0Sstevel@tonic-gate 481*0Sstevel@tonic-gate again: 482*0Sstevel@tonic-gate /* 483*0Sstevel@tonic-gate * LOOP FOREVER 484*0Sstevel@tonic-gate */ 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * exit on demand 488*0Sstevel@tonic-gate */ 489*0Sstevel@tonic-gate mutex_enter(&logmap->mtm_mutex); 490*0Sstevel@tonic-gate if ((ul->un_flags & LDL_ERROR) || (logmap->mtm_flags & MTM_ROLL_EXIT)) { 491*0Sstevel@tonic-gate kmem_free(rbs, nmblk * sizeof (rollbuf_t)); 492*0Sstevel@tonic-gate kmem_free(roll_bufs, nmblk * MAPBLOCKSIZE); 493*0Sstevel@tonic-gate logmap->mtm_flags &= ~(MTM_FORCE_ROLL | MTM_ROLL_RUNNING | 494*0Sstevel@tonic-gate MTM_ROLL_EXIT | MTM_ROLLING); 495*0Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 496*0Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 497*0Sstevel@tonic-gate thread_exit(); 498*0Sstevel@tonic-gate /* NOTREACHED */ 499*0Sstevel@tonic-gate } 500*0Sstevel@tonic-gate 501*0Sstevel@tonic-gate /* 502*0Sstevel@tonic-gate * MT_SCAN debug mode 503*0Sstevel@tonic-gate * don't roll except in FORCEROLL situations 504*0Sstevel@tonic-gate */ 505*0Sstevel@tonic-gate if (logmap->mtm_debug & MT_SCAN) 506*0Sstevel@tonic-gate if ((logmap->mtm_flags & MTM_FORCE_ROLL) == 0) { 507*0Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 508*0Sstevel@tonic-gate trans_roll_wait(logmap, &cprinfo); 509*0Sstevel@tonic-gate goto again; 510*0Sstevel@tonic-gate } 511*0Sstevel@tonic-gate ASSERT(logmap->mtm_trimlof == 0); 512*0Sstevel@tonic-gate 513*0Sstevel@tonic-gate /* 514*0Sstevel@tonic-gate * If we've finished a force roll cycle then wakeup any 515*0Sstevel@tonic-gate * waiters. 516*0Sstevel@tonic-gate */ 517*0Sstevel@tonic-gate if (doingforceroll) { 518*0Sstevel@tonic-gate doingforceroll = 0; 519*0Sstevel@tonic-gate logmap->mtm_flags &= ~MTM_FORCE_ROLL; 520*0Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 521*0Sstevel@tonic-gate cv_broadcast(&logmap->mtm_from_roll_cv); 522*0Sstevel@tonic-gate } else { 523*0Sstevel@tonic-gate mutex_exit(&logmap->mtm_mutex); 524*0Sstevel@tonic-gate } 525*0Sstevel@tonic-gate 526*0Sstevel@tonic-gate /* 527*0Sstevel@tonic-gate * If someone wants us to roll something; then do it 528*0Sstevel@tonic-gate */ 529*0Sstevel@tonic-gate if (logmap->mtm_flags & MTM_FORCE_ROLL) { 530*0Sstevel@tonic-gate doingforceroll = 1; 531*0Sstevel@tonic-gate goto rollsomething; 532*0Sstevel@tonic-gate } 533*0Sstevel@tonic-gate 534*0Sstevel@tonic-gate /* 535*0Sstevel@tonic-gate * Log is busy, check if logmap is getting full. 536*0Sstevel@tonic-gate */ 537*0Sstevel@tonic-gate if (logmap_need_roll(logmap)) { 538*0Sstevel@tonic-gate goto rollsomething; 539*0Sstevel@tonic-gate } 540*0Sstevel@tonic-gate 541*0Sstevel@tonic-gate /* 542*0Sstevel@tonic-gate * Check if the log is idle and is not empty 543*0Sstevel@tonic-gate */ 544*0Sstevel@tonic-gate if (!logmap->mtm_ref && !ldl_empty(ul)) { 545*0Sstevel@tonic-gate goto rollsomething; 546*0Sstevel@tonic-gate } 547*0Sstevel@tonic-gate 548*0Sstevel@tonic-gate /* 549*0Sstevel@tonic-gate * Log is busy, check if its getting full 550*0Sstevel@tonic-gate */ 551*0Sstevel@tonic-gate if (ldl_need_roll(ul)) { 552*0Sstevel@tonic-gate goto rollsomething; 553*0Sstevel@tonic-gate } 554*0Sstevel@tonic-gate 555*0Sstevel@tonic-gate /* 556*0Sstevel@tonic-gate * nothing to do; wait a bit and then start over 557*0Sstevel@tonic-gate */ 558*0Sstevel@tonic-gate trans_roll_wait(logmap, &cprinfo); 559*0Sstevel@tonic-gate goto again; 560*0Sstevel@tonic-gate 561*0Sstevel@tonic-gate /* 562*0Sstevel@tonic-gate * ROLL SOMETHING 563*0Sstevel@tonic-gate */ 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate rollsomething: 566*0Sstevel@tonic-gate /* 567*0Sstevel@tonic-gate * Use the cached roll buffers, or read the master 568*0Sstevel@tonic-gate * and overlay the deltas 569*0Sstevel@tonic-gate */ 570*0Sstevel@tonic-gate switch (log_roll_read(ul, rbs, nmblk, roll_bufs, &nbuf)) { 571*0Sstevel@tonic-gate case 1: trans_roll_wait(logmap, &cprinfo); 572*0Sstevel@tonic-gate /* FALLTHROUGH */ 573*0Sstevel@tonic-gate case 2: goto again; 574*0Sstevel@tonic-gate /* default case is success */ 575*0Sstevel@tonic-gate } 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate /* 578*0Sstevel@tonic-gate * Asynchronously write out the deltas 579*0Sstevel@tonic-gate */ 580*0Sstevel@tonic-gate if (log_roll_write(ul, rbs, nbuf)) 581*0Sstevel@tonic-gate goto again; 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate /* 584*0Sstevel@tonic-gate * free up the deltas in the logmap 585*0Sstevel@tonic-gate */ 586*0Sstevel@tonic-gate for (i = 0, rbp = rbs; i < nbuf; ++i, ++rbp) { 587*0Sstevel@tonic-gate bp = &rbp->rb_bh; 588*0Sstevel@tonic-gate logmap_remove_roll(logmap, 589*0Sstevel@tonic-gate ldbtob(bp->b_blkno) & (offset_t)MAPBLOCKMASK, MAPBLOCKSIZE); 590*0Sstevel@tonic-gate } 591*0Sstevel@tonic-gate 592*0Sstevel@tonic-gate /* 593*0Sstevel@tonic-gate * free up log space; if possible 594*0Sstevel@tonic-gate */ 595*0Sstevel@tonic-gate logmap_sethead(logmap, ul); 596*0Sstevel@tonic-gate 597*0Sstevel@tonic-gate /* 598*0Sstevel@tonic-gate * LOOP 599*0Sstevel@tonic-gate */ 600*0Sstevel@tonic-gate goto again; 601*0Sstevel@tonic-gate } 602