Lines Matching +full:write +full:- +full:1 +full:- +full:bps

9  * or https://opensource.org/licenses/CDDL-1.0.
65 int zfs_bclone_enabled = 1;
87 static int zfs_dio_enabled = 1;
102 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
105 atomic_inc_32(&zp->z_sync_writes_cnt);
106 zil_commit(zfsvfs->z_log, zp->z_id);
107 atomic_dec_32(&zp->z_sync_writes_cnt);
128 file_sz = zp->z_size;
139 if (zn_has_cached_data(zp, 0, file_sz - 1))
142 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
143 error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
158 * We could find a hole that begins after the logical end-of-file,
160 * EOF falls mid-block, then indicate that the "virtual hole"
233 * zfs_write() -> update_pages()
240 objset_t *os = zfsvfs->z_os;
244 if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED ||
246 zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) {
253 } else if (os->os_direct == ZFS_DIRECT_ALWAYS &&
256 if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) ||
260 } else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) {
282 IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT);
293 * IN: zp - inode of file to be read from.
294 * uio - structure supplying read location, range info,
296 * ioflag - O_SYNC flags; used to provide FRSYNC semantics.
298 * cr - credentials of caller.
300 * OUT: uio - updated offset and range, buffer filled.
305 * inode - atime updated if byte count > 0
319 if (zp->z_pflags & ZFS_AV_QUARANTINED) {
349 * Only do this for non-snapshots.
357 if (zfsvfs->z_log &&
358 (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
359 zil_commit(zfsvfs->z_log, zp->z_id);
364 zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
368 * If we are reading past end-of-file we can skip
371 if (zfs_uio_offset(uio) >= zp->z_size) {
375 ASSERT(zfs_uio_offset(uio) < zp->z_size);
389 ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
393 if (uio->uio_extflg & UIO_DIRECT) {
405 * request is page-aligned. In this case, as much of the file
410 * ZFS as at a minimum the I/O request must be page-aligned.
412 dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t);
414 n -= dio_remaining_resid;
418 ssize_t nbytes = MIN(n, chunk_size -
426 zfs_uio_offset(uio) + nbytes - 1)) {
429 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
445 if (uio->uio_extflg & UIO_DIRECT) {
447 uio->uio_extflg &= ~UIO_DIRECT;
464 (zfs_uio_offset(uio) - start_offset) != 0)
470 n -= nbytes;
473 if (error == 0 && (uio->uio_extflg & UIO_DIRECT) &&
479 uio->uio_extflg &= ~UIO_DIRECT;
482 zfs_uio_offset(uio) + dio_remaining_resid - 1)) {
485 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio,
488 uio->uio_extflg |= UIO_DIRECT;
492 } else if (error && (uio->uio_extflg & UIO_DIRECT)) {
495 int64_t nread = start_resid - n;
497 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
502 uio->uio_extflg |= UIO_DIRECT;
507 if (uio->uio_extflg & UIO_DIRECT)
519 zilog_t *zilog = zfsvfs->z_log;
526 * Clear Set-UID/Set-GID bits on successful write if not
531 * to another app after the partial write is committed.
536 mutex_enter(&zp->z_acl_lock);
537 if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 &&
538 (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
540 ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
543 zp->z_mode &= ~(S_ISUID | S_ISGID);
544 newmode = zp->z_mode;
545 (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
548 mutex_exit(&zp->z_acl_lock);
559 va.va_nodeid = zp->z_id;
566 mutex_exit(&zp->z_acl_lock);
571 * Write the bytes to a file.
573 * IN: zp - znode of file to be written to.
574 * uio - structure supplying write location, range info,
576 * ioflag - O_APPEND flag set if in append mode.
578 * cr - credentials of caller.
580 * OUT: uio - updated offset and range.
586 * ip - ctime|mtime updated if byte count > 0
597 * Fasttrack empty write
613 &zp->z_size, 8);
615 &zp->z_pflags, 8);
618 * Callers might not be able to detect properly that we are read-only,
631 if ((zp->z_pflags & ZFS_IMMUTABLE) ||
632 ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
633 (zfs_uio_offset(uio) < zp->z_size))) {
641 offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
657 * Pre-fault the pages to ensure slow (eg NFS) pages
660 ssize_t pfbytes = MIN(n, DMU_MAX_ACCESS >> 1);
673 * semantics. We reset the write offset once we have the lock.
675 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
676 woff = lr->lr_offset;
677 if (lr->lr_length == UINT64_MAX) {
679 * We overlocked the file because this write will cause
683 woff = zp->z_size;
695 * this write, then this range lock will lock the entire file
696 * so that we can re-write the block safely.
698 lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
715 if (n > limit - woff)
716 n = limit - woff;
718 uint64_t end_size = MAX(zp->z_size, woff + n);
719 zilog_t *zilog = zfsvfs->z_log;
721 (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
725 const uint64_t projid = zp->z_projid;
729 * (lr_length == UINT64_MAX), we will direct the write to the ARC.
745 * a recorsize of 1K. The first 1K issued in the loop below will go
746 * through the ARC; however, the following 3 1K requests will
749 if (uio->uio_extflg & UIO_DIRECT && lr->lr_length == UINT64_MAX) {
750 uio->uio_extflg &= ~UIO_DIRECT;
755 * Write the file in reasonable size chunks. Each chunk is written
757 * and allows us to do more fine-grained space accounting.
772 if (lr->lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) {
773 if (zp->z_blksz > zfsvfs->z_max_blksz &&
774 !ISP2(zp->z_blksz)) {
780 blksz = 1 << highbit64(zp->z_blksz);
782 blksz = zfsvfs->z_max_blksz;
786 blksz = MAX(blksz, zp->z_blksz);
788 blksz = zp->z_blksz;
793 if (n >= blksz && woff >= zp->z_size &&
795 !(uio->uio_extflg & UIO_DIRECT) &&
798 * This write covers a full block. "Borrow" a buffer
804 abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
808 if ((error = zfs_uiocopy(abuf->b_data, blksz,
815 nbytes = MIN(n, (DMU_MAX_ACCESS >> 1) -
829 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
830 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
831 dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
850 * If rangelock_enter() over-locked we grow the blocksize
855 if (lr->lr_length == UINT64_MAX) {
864 error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
879 n -= tx_bytes - zfs_uio_resid(uio);
880 pfbytes -= tx_bytes - zfs_uio_resid(uio);
894 tx_bytes -= zfs_uio_resid(uio);
897 * Thus, we're writing a full block at a block-aligned
904 sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
940 * the Direct I/O write has completed, but this is the penality
944 zn_has_cached_data(zp, woff, woff + tx_bytes - 1)) {
945 update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
953 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
954 (void *)&zp->z_size, sizeof (uint64_t), tx);
969 while ((end_size = zp->z_size) < zfs_uio_offset(uio)) {
970 (void) atomic_cas_64(&zp->z_size, end_size,
979 if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
980 zp->z_size = zfsvfs->z_replay_eof;
982 error1 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
993 uio->uio_extflg & UIO_DIRECT ? B_TRUE : B_FALSE, NULL,
1000 * At this point it can be re-enabled for subsequent writes.
1004 uio->uio_extflg |= UIO_DIRECT;
1011 n -= nbytes;
1012 pfbytes -= nbytes;
1017 uio->uio_extflg |= UIO_DIRECT;
1027 if (uio->uio_extflg & UIO_DIRECT)
1033 * at least a partial write, so it's successful.
1035 if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
1042 zil_commit(zilog, zp->z_id);
1044 int64_t nwritten = start_resid - zfs_uio_resid(uio);
1045 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
1076 zilog = zfsvfs->z_log;
1079 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1100 objset_t *os = zfsvfs->z_os;
1102 uint64_t object = lr->lr_foid;
1103 uint64_t offset = lr->lr_offset;
1104 uint64_t size = lr->lr_length;
1117 if (zp->z_unlinked) {
1126 if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1137 zgd->zgd_lwb = lwb;
1138 zgd->zgd_private = zp;
1141 * Write records come in two flavors: immediate and indirect.
1145 * we don't have to write the data twice.
1147 if (buf != NULL) { /* immediate write */
1148 zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset,
1151 if (offset >= zp->z_size) {
1158 } else { /* indirect write */
1163 * that no one can change the data. We need to re-check
1168 size = zp->z_blksz;
1170 offset -= blkoff;
1171 zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
1173 if (zp->z_blksz == size)
1176 zfs_rangelock_exit(zgd->zgd_lr);
1179 if (lr->lr_offset >= zp->z_size)
1194 zgd->zgd_db = dbp;
1197 mutex_enter(&db->db_mtx);
1199 dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg);
1200 if (dr != NULL && dr->dt.dl.dr_diowrite)
1202 mutex_exit(&db->db_mtx);
1211 * A Direct I/O write always covers an entire
1214 ASSERT3U(dbp->db_size, ==, zp->z_blksz);
1215 lr->lr_blkptr = dr->dt.dl.dr_overridden_by;
1220 blkptr_t *bp = &lr->lr_blkptr;
1221 zgd->zgd_bp = bp;
1223 ASSERT3U(dbp->db_offset, ==, offset);
1224 ASSERT3U(dbp->db_size, ==, size);
1226 error = dmu_sync(zio, lr->lr_common.lrc_txg,
1228 ASSERT(error || lr->lr_length <= size);
1231 * On success, we need to wait for the write I/O
1240 lr->lr_common.lrc_txtype = TX_WRITE2;
1245 * it is the old, currently-on-disk BP.
1247 zgd->zgd_bp = NULL;
1263 znode_t *zp = zgd->zgd_private;
1265 if (zgd->zgd_db)
1266 dmu_buf_rele(zgd->zgd_db, zgd);
1268 zfs_rangelock_exit(zgd->zgd_lr);
1346 blkptr_t *bps;
1368 inos = inzfsvfs->z_os;
1369 outos = outzfsvfs->z_os;
1389 ASSERT(!outzfsvfs->z_replay);
1395 if (inos->os_encrypted != outos->os_encrypted) {
1404 if (inos != outos && inos->os_encrypted &&
1422 if (inzp->z_pflags & ZFS_AV_QUARANTINED) {
1427 if (inoff >= inzp->z_size) {
1432 if (len > inzp->z_size - inoff) {
1433 len = inzp->z_size - inoff;
1442 * Callers might not be able to detect properly that we are read-only,
1455 if ((outzp->z_pflags & ZFS_IMMUTABLE) != 0) {
1471 if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
1478 inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len,
1480 outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len,
1483 outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len,
1485 inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len,
1489 inblksz = inzp->z_blksz;
1498 if (inblksz < outzp->z_blksz) {
1502 if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz ||
1503 outlr->lr_length != UINT64_MAX)) {
1509 * Block size must be power-of-2 if destination offset != 0.
1510 * There can be no multiple blocks of non-power-of-2 size.
1528 (len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
1540 if (len <= inblksz && inblksz < outzfsvfs->z_max_blksz &&
1541 outzp->z_size <= inblksz && outoff + len > inblksz) {
1561 &outzp->z_size, 8);
1563 zilog = outzfsvfs->z_log;
1565 sizeof (bps[0]);
1569 projid = outzp->z_projid;
1571 bps = vmem_alloc(sizeof (bps[0]) * maxblocks, KM_SLEEP);
1576 * and allows us to do more fine-grained space accounting.
1594 error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
1606 last_synced_txg + 1);
1617 dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE);
1618 db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl);
1635 if (outlr->lr_length == UINT64_MAX) {
1642 if (inblksz != outzp->z_blksz) {
1653 ((len - 1) / inblksz + 1) * inblksz);
1656 error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
1657 bps, nbps);
1663 if (zn_has_cached_data(outzp, outoff, outoff + size - 1)) {
1676 while ((outsize = outzp->z_size) < outoff + size) {
1677 (void) atomic_cas_64(&outzp->z_size, outsize,
1681 error = sa_bulk_update(outzp->z_sa_hdl, bulk, count, tx);
1684 size, inblksz, bps, nbps);
1693 len -= size;
1702 vmem_free(bps, sizeof (bps[0]) * maxblocks);
1717 if (outos->os_sync == ZFS_SYNC_ALWAYS) {
1718 zil_commit(zilog, outzp->z_id);
1744 const blkptr_t *bps, size_t nbps)
1760 ASSERT(spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os),
1766 ASSERT(zfsvfs->z_replay);
1777 &zp->z_size, 8);
1782 tx = dmu_tx_create(zfsvfs->z_os);
1784 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1785 db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
1797 if (zp->z_blksz < blksz)
1800 dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps);
1804 if (zp->z_size < off + len)
1805 zp->z_size = off + len;
1807 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1813 VERIFY(zil_replaying(zfsvfs->z_log, tx));