151882Sbostic /*- 251882Sbostic * Copyright (c) 1991 The Regents of the University of California. 351882Sbostic * All rights reserved. 451882Sbostic * 551882Sbostic * %sccs.include.redist.c% 651882Sbostic * 7*55941Sbostic * @(#)lfs_syscalls.c 7.19 (Berkeley) 08/21/92 851882Sbostic */ 951882Sbostic 1051882Sbostic #include <sys/param.h> 1151882Sbostic #include <sys/proc.h> 1251882Sbostic #include <sys/buf.h> 1351882Sbostic #include <sys/mount.h> 1451882Sbostic #include <sys/vnode.h> 1551882Sbostic #include <sys/malloc.h> 1651882Sbostic #include <sys/kernel.h> 1751882Sbostic 1851882Sbostic #include <ufs/ufs/quota.h> 1951882Sbostic #include <ufs/ufs/inode.h> 2051882Sbostic #include <ufs/ufs/ufsmount.h> 21*55941Sbostic #include <ufs/ufs/ufs_extern.h> 2251882Sbostic 2351882Sbostic #include <ufs/lfs/lfs.h> 2451882Sbostic #include <ufs/lfs/lfs_extern.h> 2551882Sbostic 26*55941Sbostic struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t)); 27*55941Sbostic 2851882Sbostic /* 2951882Sbostic * lfs_markv: 3051882Sbostic * 3151882Sbostic * This will mark inodes and blocks dirty, so they are written into the log. 3251882Sbostic * It will block until all the blocks have been written. The segment create 3351882Sbostic * time passed in the block_info and inode_info structures is used to decide 3451882Sbostic * if the data is valid for each block (in case some process dirtied a block 3551882Sbostic * or inode that is being cleaned between the determination that a block is 3651882Sbostic * live and the lfs_markv call). 3751882Sbostic * 3851882Sbostic * 0 on success 3951882Sbostic * -1/errno is return on error. 4051882Sbostic */ 4151882Sbostic int 4251882Sbostic lfs_markv(p, uap, retval) 4351882Sbostic struct proc *p; 4451882Sbostic struct args { 4551882Sbostic fsid_t fsid; /* file system */ 4651882Sbostic BLOCK_INFO *blkiov; /* block array */ 4751882Sbostic int blkcnt; /* count of block array entries */ 4851882Sbostic } *uap; 4951882Sbostic int *retval; 5051882Sbostic { 51*55941Sbostic struct segment *sp; 5251882Sbostic BLOCK_INFO *blkp; 5351882Sbostic IFILE *ifp; 54*55941Sbostic struct buf *bp, **bpp; 5552087Sbostic struct inode *ip; 5651882Sbostic struct lfs *fs; 5751882Sbostic struct mount *mntp; 5851882Sbostic struct vnode *vp; 5952173Sbostic void *start; 6052087Sbostic ino_t lastino; 61*55941Sbostic daddr_t b_daddr, v_daddr; 6251882Sbostic u_long bsize; 6351882Sbostic int cnt, error; 6451882Sbostic 6551882Sbostic if (error = suser(p->p_ucred, &p->p_acflag)) 6651882Sbostic return (error); 6751882Sbostic if ((mntp = getvfs(&uap->fsid)) == NULL) 6851882Sbostic return (EINVAL); 69*55941Sbostic /* Initialize a segment. */ 70*55941Sbostic sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 71*55941Sbostic sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 72*55941Sbostic sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 73*55941Sbostic sp->seg_flags = SEGM_CKP; 7451882Sbostic 7551882Sbostic cnt = uap->blkcnt; 7652996Sbostic start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 77*55941Sbostic if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO))) 78*55941Sbostic goto err1; 7951882Sbostic 80*55941Sbostic /* Mark blocks/inodes dirty. */ 8152087Sbostic fs = VFSTOUFS(mntp)->um_lfs; 8252820Sbostic bsize = fs->lfs_bsize; 83*55941Sbostic error = 0; 84*55941Sbostic 85*55941Sbostic lfs_seglock(fs); 86*55941Sbostic lfs_initseg(fs, sp); 87*55941Sbostic sp->seg_flags |= SEGM_CLEAN; 88*55941Sbostic for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM, 89*55941Sbostic blkp = start; cnt--; ++blkp) { 9052087Sbostic /* 9152087Sbostic * Get the IFILE entry (only once) and see if the file still 9252087Sbostic * exists. 9352087Sbostic */ 9452087Sbostic if (lastino != blkp->bi_inode) { 95*55941Sbostic if (lastino != LFS_UNUSED_INUM) { 96*55941Sbostic lfs_updatemeta(sp, vp); 97*55941Sbostic lfs_writeinode(fs, sp, ip); 98*55941Sbostic vput(vp); 99*55941Sbostic } 10052087Sbostic lastino = blkp->bi_inode; 10152087Sbostic LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); 102*55941Sbostic v_daddr = ifp->if_daddr; 10352087Sbostic brelse(bp); 104*55941Sbostic if (v_daddr == LFS_UNUSED_DADDR) 10552087Sbostic continue; 106*55941Sbostic /* Get the vnode/inode. */ 107*55941Sbostic if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, 108*55941Sbostic blkp->bi_lbn == LFS_UNUSED_LBN ? NULL : 109*55941Sbostic blkp->bi_bp)) { 110*55941Sbostic #ifdef DIAGNOSTIC 111*55941Sbostic printf("lfs_markv: VFS_VGET failed (%d)\n", 112*55941Sbostic blkp->bi_inode); 113*55941Sbostic #endif 114*55941Sbostic v_daddr == LFS_UNUSED_DADDR; 115*55941Sbostic continue; 116*55941Sbostic } 117*55941Sbostic ip = VTOI(vp); 118*55941Sbostic } else if (v_daddr == LFS_UNUSED_DADDR) 119*55941Sbostic continue; 12052087Sbostic 121*55941Sbostic /* If this BLOCK_INFO didn't contain a block, keep going. */ 122*55941Sbostic if (blkp->bi_lbn == LFS_UNUSED_LBN) 12351882Sbostic continue; 12452173Sbostic /* 12552173Sbostic * If modify time later than segment create time, see if the 12652173Sbostic * block has been replaced. 12752173Sbostic */ 12854103Smckusick if (ip->i_mtime.ts_sec > blkp->bi_segcreate && 129*55941Sbostic (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr) || 130*55941Sbostic b_daddr != blkp->bi_daddr)) 13152173Sbostic continue; 132*55941Sbostic /* 133*55941Sbostic * If we got to here, then we are keeping the block. If it 134*55941Sbostic * is an indirect block, we want to actually put it in the 135*55941Sbostic * buffer cache so that it can be updated in the finish_meta 136*55941Sbostic * section. If it's not, we need to allocate a fake buffer 137*55941Sbostic * so that writeseg can perform the copyin and write the buffer. 138*55941Sbostic */ 139*55941Sbostic if (blkp->bi_lbn >= 0) /* Data Block */ 140*55941Sbostic bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize, 141*55941Sbostic blkp->bi_bp); 142*55941Sbostic else { 143*55941Sbostic bp = getblk(vp, blkp->bi_lbn, bsize); 144*55941Sbostic if (!(bp->b_flags & B_CACHE) && 145*55941Sbostic (error = copyin(blkp->bi_bp, bp->b_un.b_addr, 146*55941Sbostic bsize))) 147*55941Sbostic goto err2; 148*55941Sbostic if (error = VOP_BWRITE(bp)) 149*55941Sbostic goto err2; 15052087Sbostic } 151*55941Sbostic lfs_gatherblock(sp, bp, NULL); 15251882Sbostic } 153*55941Sbostic lfs_updatemeta(sp, vp); 154*55941Sbostic lfs_writeinode(fs, sp, ip); 155*55941Sbostic vput(vp); 156*55941Sbostic (void) lfs_writeseg(fs, sp); 157*55941Sbostic lfs_segunlock(fs); 15852173Sbostic free(start, M_SEGMENT); 159*55941Sbostic free(sp->bpp, M_SEGMENT); 160*55941Sbostic free(sp, M_SEGMENT); 161*55941Sbostic return (error); 162*55941Sbostic /* 163*55941Sbostic * XXX If we come in to error 2, we might have indirect blocks that were 164*55941Sbostic * updated and now have bad block pointers. I don't know what to do 165*55941Sbostic * about this. 166*55941Sbostic */ 16751882Sbostic 168*55941Sbostic err2: vput(vp); 169*55941Sbostic /* Free up fakebuffers */ 170*55941Sbostic for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp) 171*55941Sbostic if ((*bpp)->b_flags & B_CALL) { 172*55941Sbostic brelvp(*bpp); 173*55941Sbostic free(*bpp, M_SEGMENT); 174*55941Sbostic } else 175*55941Sbostic brelse(*bpp); 176*55941Sbostic lfs_segunlock(fs); 177*55941Sbostic err1: 178*55941Sbostic free(sp->bpp, M_SEGMENT); 179*55941Sbostic free(sp, M_SEGMENT); 18052173Sbostic free(start, M_SEGMENT); 181*55941Sbostic return(error); 18251882Sbostic } 18351882Sbostic 18451882Sbostic /* 18551882Sbostic * lfs_bmapv: 18651882Sbostic * 18752087Sbostic * This will fill in the current disk address for arrays of blocks. 18851882Sbostic * 18951882Sbostic * 0 on success 19051882Sbostic * -1/errno is return on error. 19151882Sbostic */ 19251882Sbostic int 19351882Sbostic lfs_bmapv(p, uap, retval) 19451882Sbostic struct proc *p; 19551882Sbostic struct args { 19651882Sbostic fsid_t fsid; /* file system */ 19751882Sbostic BLOCK_INFO *blkiov; /* block array */ 19851882Sbostic int blkcnt; /* count of block array entries */ 19951882Sbostic } *uap; 20051882Sbostic int *retval; 20151882Sbostic { 20251882Sbostic BLOCK_INFO *blkp; 20351882Sbostic struct mount *mntp; 20451882Sbostic struct vnode *vp; 20552173Sbostic void *start; 20651882Sbostic daddr_t daddr; 20752173Sbostic int cnt, error, step; 20851882Sbostic 20951882Sbostic if (error = suser(p->p_ucred, &p->p_acflag)) 21051882Sbostic return (error); 21151882Sbostic if ((mntp = getvfs(&uap->fsid)) == NULL) 21251882Sbostic return (EINVAL); 21351882Sbostic 21451882Sbostic cnt = uap->blkcnt; 21552173Sbostic start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 21651882Sbostic if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) { 21751882Sbostic free(blkp, M_SEGMENT); 21851882Sbostic return (error); 21951882Sbostic } 22051882Sbostic 22152173Sbostic for (step = cnt; step--; ++blkp) { 22254662Smckusick if (VFS_VGET(mntp, blkp->bi_inode, &vp)) 22352173Sbostic daddr = LFS_UNUSED_DADDR; 22452173Sbostic else { 22553531Sheideman if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr)) 22652173Sbostic daddr = LFS_UNUSED_DADDR; 22752173Sbostic vput(vp); 22852173Sbostic } 22952173Sbostic blkp->bi_daddr = daddr; 23052173Sbostic } 23152173Sbostic copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO)); 23252173Sbostic free(start, M_SEGMENT); 23351882Sbostic return (0); 23451882Sbostic } 23551882Sbostic 23651882Sbostic /* 23751882Sbostic * lfs_segclean: 23851882Sbostic * 23951882Sbostic * Mark the segment clean. 24051882Sbostic * 24151882Sbostic * 0 on success 24251882Sbostic * -1/errno is return on error. 24351882Sbostic */ 24451882Sbostic int 24551882Sbostic lfs_segclean(p, uap, retval) 24651882Sbostic struct proc *p; 24751882Sbostic struct args { 24851882Sbostic fsid_t fsid; /* file system */ 24951882Sbostic u_long segment; /* segment number */ 25051882Sbostic } *uap; 25151882Sbostic int *retval; 25251882Sbostic { 25351928Sbostic CLEANERINFO *cip; 25451882Sbostic SEGUSE *sup; 25551882Sbostic struct buf *bp; 25651882Sbostic struct mount *mntp; 25751882Sbostic struct lfs *fs; 25851882Sbostic int error; 25951882Sbostic 26051882Sbostic if (error = suser(p->p_ucred, &p->p_acflag)) 26151882Sbostic return (error); 26251882Sbostic if ((mntp = getvfs(&uap->fsid)) == NULL) 26351882Sbostic return (EINVAL); 26451882Sbostic 26551882Sbostic fs = VFSTOUFS(mntp)->um_lfs; 26651928Sbostic 26751882Sbostic LFS_SEGENTRY(sup, fs, uap->segment, bp); 268*55941Sbostic fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1; 26955593Sbostic fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) + 27055593Sbostic sup->su_ninos * btodb(fs->lfs_bsize); 27151882Sbostic sup->su_flags &= ~SEGUSE_DIRTY; 27255804Sbostic sup->su_nbytes -= sup->su_nsums * LFS_SUMMARY_SIZE; 27355593Sbostic sup->su_ninos = 0; 27455593Sbostic sup->su_nsums = 0; 275*55941Sbostic (void) VOP_BWRITE(bp); 27651928Sbostic 27751928Sbostic LFS_CLEANERINFO(cip, fs, bp); 27851928Sbostic ++cip->clean; 27951928Sbostic --cip->dirty; 280*55941Sbostic (void) VOP_BWRITE(bp); 281*55941Sbostic wakeup(&fs->lfs_avail); 28251882Sbostic return (0); 28351882Sbostic } 28451882Sbostic 28551882Sbostic /* 28651882Sbostic * lfs_segwait: 28751882Sbostic * 28851882Sbostic * This will block until a segment in file system fsid is written. A timeout 28951882Sbostic * in milliseconds may be specified which will awake the cleaner automatically. 29051882Sbostic * An fsid of -1 means any file system, and a timeout of 0 means forever. 29151882Sbostic * 29251882Sbostic * 0 on success 29351882Sbostic * 1 on timeout 29451882Sbostic * -1/errno is return on error. 29551882Sbostic */ 29651882Sbostic int 29751882Sbostic lfs_segwait(p, uap, retval) 29851882Sbostic struct proc *p; 29951882Sbostic struct args { 30051882Sbostic fsid_t fsid; /* file system */ 30151882Sbostic struct timeval *tv; /* timeout */ 30251882Sbostic } *uap; 30351882Sbostic int *retval; 30451882Sbostic { 30551882Sbostic extern int lfs_allclean_wakeup; 30651882Sbostic struct mount *mntp; 30751882Sbostic struct timeval atv; 30851882Sbostic void *addr; 30951882Sbostic u_long timeout; 31051882Sbostic int error, s; 31151882Sbostic 312*55941Sbostic if (error = suser(p->p_ucred, &p->p_acflag)) { 31351882Sbostic return (error); 314*55941Sbostic } 31551882Sbostic #ifdef WHEN_QUADS_WORK 31651882Sbostic if (uap->fsid == (fsid_t)-1) 31751882Sbostic addr = &lfs_allclean_wakeup; 31851882Sbostic else { 31951882Sbostic if ((mntp = getvfs(&uap->fsid)) == NULL) 32051882Sbostic return (EINVAL); 32151882Sbostic addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 32251882Sbostic } 32351882Sbostic #else 32451882Sbostic if ((mntp = getvfs(&uap->fsid)) == NULL) 32551882Sbostic addr = &lfs_allclean_wakeup; 32651882Sbostic else 32751882Sbostic addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 32851882Sbostic #endif 32951882Sbostic 33051882Sbostic if (uap->tv) { 33151882Sbostic if (error = copyin(uap->tv, &atv, sizeof(struct timeval))) 33251882Sbostic return (error); 33351882Sbostic if (itimerfix(&atv)) 33451882Sbostic return (EINVAL); 33554764Smckusick s = splclock(); 33654764Smckusick timevaladd(&atv, (struct timeval *)&time); 33751882Sbostic timeout = hzto(&atv); 33854277Sbostic splx(s); 33951882Sbostic } else 34051882Sbostic timeout = 0; 34151882Sbostic 34251882Sbostic error = tsleep(addr, PCATCH | PUSER, "segment", timeout); 34351882Sbostic return (error == ERESTART ? EINTR : 0); 34451882Sbostic } 345*55941Sbostic 346*55941Sbostic /* 347*55941Sbostic * VFS_VGET call specialized for the cleaner. The cleaner already knows the 348*55941Sbostic * daddr from the ifile, so don't look it up again. If the cleaner is 349*55941Sbostic * processing IINFO structures, it may have the ondisk inode already, so 350*55941Sbostic * don't go retrieving it again. 351*55941Sbostic */ 352*55941Sbostic int 353*55941Sbostic lfs_fastvget(mp, ino, daddr, vpp, dinp) 354*55941Sbostic struct mount *mp; 355*55941Sbostic ino_t ino; 356*55941Sbostic daddr_t daddr; 357*55941Sbostic struct vnode **vpp; 358*55941Sbostic struct dinode *dinp; 359*55941Sbostic { 360*55941Sbostic register struct inode *ip; 361*55941Sbostic struct vnode *vp; 362*55941Sbostic struct ufsmount *ump; 363*55941Sbostic struct buf *bp; 364*55941Sbostic dev_t dev; 365*55941Sbostic int error; 366*55941Sbostic 367*55941Sbostic ump = VFSTOUFS(mp); 368*55941Sbostic dev = ump->um_dev; 369*55941Sbostic if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 370*55941Sbostic return (0); 371*55941Sbostic 372*55941Sbostic /* Allocate new vnode/inode. */ 373*55941Sbostic if (error = lfs_vcreate(mp, ino, &vp)) { 374*55941Sbostic *vpp = NULL; 375*55941Sbostic return (error); 376*55941Sbostic } 377*55941Sbostic 378*55941Sbostic /* 379*55941Sbostic * Put it onto its hash chain and lock it so that other requests for 380*55941Sbostic * this inode will block if they arrive while we are sleeping waiting 381*55941Sbostic * for old data structures to be purged or for the contents of the 382*55941Sbostic * disk portion of this inode to be read. 383*55941Sbostic */ 384*55941Sbostic ip = VTOI(vp); 385*55941Sbostic ufs_ihashins(ip); 386*55941Sbostic 387*55941Sbostic /* 388*55941Sbostic * XXX 389*55941Sbostic * This may not need to be here, logically it should go down with 390*55941Sbostic * the i_devvp initialization. 391*55941Sbostic * Ask Kirk. 392*55941Sbostic */ 393*55941Sbostic ip->i_lfs = ump->um_lfs; 394*55941Sbostic 395*55941Sbostic /* Read in the disk contents for the inode, copy into the inode. */ 396*55941Sbostic if (dinp) 397*55941Sbostic if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode))) 398*55941Sbostic return (error); 399*55941Sbostic else { 400*55941Sbostic if (error = bread(ump->um_devvp, daddr, 401*55941Sbostic (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) { 402*55941Sbostic /* 403*55941Sbostic * The inode does not contain anything useful, so it 404*55941Sbostic * would be misleading to leave it on its hash chain. 405*55941Sbostic * Iput() will return it to the free list. 406*55941Sbostic */ 407*55941Sbostic ufs_ihashrem(ip); 408*55941Sbostic 409*55941Sbostic /* Unlock and discard unneeded inode. */ 410*55941Sbostic ufs_iput(ip); 411*55941Sbostic brelse(bp); 412*55941Sbostic *vpp = NULL; 413*55941Sbostic return (error); 414*55941Sbostic } 415*55941Sbostic ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino); 416*55941Sbostic brelse(bp); 417*55941Sbostic } 418*55941Sbostic 419*55941Sbostic /* 420*55941Sbostic * Initialize the vnode from the inode, check for aliases. In all 421*55941Sbostic * cases re-init ip, the underlying vnode/inode may have changed. 422*55941Sbostic */ 423*55941Sbostic if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) { 424*55941Sbostic ufs_iput(ip); 425*55941Sbostic *vpp = NULL; 426*55941Sbostic return (error); 427*55941Sbostic } 428*55941Sbostic /* 429*55941Sbostic * Finish inode initialization now that aliasing has been resolved. 430*55941Sbostic */ 431*55941Sbostic ip->i_devvp = ump->um_devvp; 432*55941Sbostic ip->i_flag |= IMOD; 433*55941Sbostic ++ump->um_lfs->lfs_uinodes; 434*55941Sbostic VREF(ip->i_devvp); 435*55941Sbostic *vpp = vp; 436*55941Sbostic return (0); 437*55941Sbostic } 438*55941Sbostic struct buf * 439*55941Sbostic lfs_fakebuf(vp, lbn, size, uaddr) 440*55941Sbostic struct vnode *vp; 441*55941Sbostic int lbn; 442*55941Sbostic size_t size; 443*55941Sbostic caddr_t uaddr; 444*55941Sbostic { 445*55941Sbostic struct buf *bp; 446*55941Sbostic 447*55941Sbostic bp = lfs_newbuf(vp, lbn, 0); 448*55941Sbostic bp->b_saveaddr = uaddr; 449*55941Sbostic bp->b_bufsize = size; 450*55941Sbostic bp->b_bcount = size; 451*55941Sbostic bp->b_flags |= B_INVAL; 452*55941Sbostic return(bp); 453*55941Sbostic } 454