zfs_vnops.c - OpenGrok cross reference for /freebsd-src/sys/contrib/openzfs/module/zfs/zfs

Lines Matching +full:write +full:- +full:1 +full:- +full:bps
9  * or https://opensource.org/licenses/CDDL-1.0.
65 int zfs_bclone_enabled = 1;
87 static int zfs_dio_enabled = 1;
102 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
105 		atomic_inc_32(&zp->z_sync_writes_cnt);
106 		zil_commit(zfsvfs->z_log, zp->z_id);
107 		atomic_dec_32(&zp->z_sync_writes_cnt);
128 	file_sz = zp->z_size;
139 	if (zn_has_cached_data(zp, 0, file_sz - 1))
142 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
143 	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
158 	 * We could find a hole that begins after the logical end-of-file,
160 	 * EOF falls mid-block, then indicate that the "virtual hole"
233  * zfs_write() -> update_pages()
240 	objset_t *os = zfsvfs->z_os;
244 	if (!zfs_dio_enabled || os->os_direct == ZFS_DIRECT_DISABLED ||
246 	    zfs_uio_offset(uio) + zfs_uio_resid(uio) - 1)) {
253 	} else if (os->os_direct == ZFS_DIRECT_ALWAYS &&
256 		if ((rw == UIO_WRITE && zfs_uio_resid(uio) >= zp->z_blksz) ||
260 	} else if (os->os_direct == ZFS_DIRECT_ALWAYS && (ioflag & O_DIRECT)) {
282 	IMPLY(ioflag & O_DIRECT, uio->uio_extflg & UIO_DIRECT);
293  *	IN:	zp	- inode of file to be read from.
294  *		uio	- structure supplying read location, range info,
296  *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
298  *		cr	- credentials of caller.
300  *	OUT:	uio	- updated offset and range, buffer filled.
305  *	inode - atime updated if byte count > 0
319 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
349 	 * Only do this for non-snapshots.
357 	if (zfsvfs->z_log &&
358 	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
359 		zil_commit(zfsvfs->z_log, zp->z_id);
364 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
368 	 * If we are reading past end-of-file we can skip
371 	if (zfs_uio_offset(uio) >= zp->z_size) {
375 	ASSERT(zfs_uio_offset(uio) < zp->z_size);
389 	ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
393 	if (uio->uio_extflg & UIO_DIRECT) {
405 		 * request is page-aligned. In this case, as much of the file
410 		 * ZFS as at a minimum the I/O request must be page-aligned.
412 		dio_remaining_resid = n - P2ALIGN_TYPED(n, PAGE_SIZE, ssize_t);
414 			n -= dio_remaining_resid;
418 		ssize_t nbytes = MIN(n, chunk_size -
426 		    zfs_uio_offset(uio) + nbytes - 1)) {
429 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
445 				if (uio->uio_extflg & UIO_DIRECT) {
447 					uio->uio_extflg &= ~UIO_DIRECT;
464 			    (zfs_uio_offset(uio) - start_offset) != 0)
470 		n -= nbytes;
473 	if (error == 0 && (uio->uio_extflg & UIO_DIRECT) &&
479 		uio->uio_extflg &= ~UIO_DIRECT;
482 		    zfs_uio_offset(uio) + dio_remaining_resid - 1)) {
485 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), uio,
488 		uio->uio_extflg |= UIO_DIRECT;
492 	} else if (error && (uio->uio_extflg & UIO_DIRECT)) {
495 	int64_t nread = start_resid - n;
497 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
502 		uio->uio_extflg |= UIO_DIRECT;
507 	if (uio->uio_extflg & UIO_DIRECT)
519 	zilog_t *zilog = zfsvfs->z_log;
526 	 * Clear Set-UID/Set-GID bits on successful write if not
531 	 * to another app after the partial write is committed.
536 	mutex_enter(&zp->z_acl_lock);
537 	if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 &&
538 	    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
540 	    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
543 		zp->z_mode &= ~(S_ISUID | S_ISGID);
544 		newmode = zp->z_mode;
545 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
548 		mutex_exit(&zp->z_acl_lock);
559 			va.va_nodeid = zp->z_id;
566 		mutex_exit(&zp->z_acl_lock);
571  * Write the bytes to a file.
573  *	IN:	zp	- znode of file to be written to.
574  *		uio	- structure supplying write location, range info,
576  *		ioflag	- O_APPEND flag set if in append mode.
578  *		cr	- credentials of caller.
580  *	OUT:	uio	- updated offset and range.
586  *	ip - ctime|mtime updated if byte count > 0
597 	 * Fasttrack empty write
613 	    &zp->z_size, 8);
615 	    &zp->z_pflags, 8);
618 	 * Callers might not be able to detect properly that we are read-only,
631 	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
632 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
633 	    (zfs_uio_offset(uio) < zp->z_size))) {
641 	offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
657 	 * Pre-fault the pages to ensure slow (eg NFS) pages
660 	ssize_t pfbytes = MIN(n, DMU_MAX_ACCESS >> 1);
673 		 * semantics.  We reset the write offset once we have the lock.
675 		lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
676 		woff = lr->lr_offset;
677 		if (lr->lr_length == UINT64_MAX) {
679 			 * We overlocked the file because this write will cause
683 			woff = zp->z_size;
695 		 * this write, then this range lock will lock the entire file
696 		 * so that we can re-write the block safely.
698 		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
715 	if (n > limit - woff)
716 		n = limit - woff;
718 	uint64_t end_size = MAX(zp->z_size, woff + n);
719 	zilog_t *zilog = zfsvfs->z_log;
721 	    (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
725 	const uint64_t projid = zp->z_projid;
729 	 * (lr_length == UINT64_MAX), we will direct the write to the ARC.
745 	 * a recorsize of 1K. The first 1K issued in the loop below will go
746 	 * through the ARC; however, the following 3 1K requests will
749 	if (uio->uio_extflg & UIO_DIRECT && lr->lr_length == UINT64_MAX) {
750 		uio->uio_extflg &= ~UIO_DIRECT;
755 	 * Write the file in reasonable size chunks.  Each chunk is written
757 	 * and allows us to do more fine-grained space accounting.
772 		if (lr->lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) {
773 			if (zp->z_blksz > zfsvfs->z_max_blksz &&
774 			    !ISP2(zp->z_blksz)) {
780 				blksz = 1 << highbit64(zp->z_blksz);
782 				blksz = zfsvfs->z_max_blksz;
786 			blksz = MAX(blksz, zp->z_blksz);
788 			blksz = zp->z_blksz;
793 		if (n >= blksz && woff >= zp->z_size &&
795 		    !(uio->uio_extflg & UIO_DIRECT) &&
798 			 * This write covers a full block.  "Borrow" a buffer
804 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
808 			if ((error = zfs_uiocopy(abuf->b_data, blksz,
815 			nbytes = MIN(n, (DMU_MAX_ACCESS >> 1) -
829 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
830 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
831 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
850 		 * If rangelock_enter() over-locked we grow the blocksize
855 		if (lr->lr_length == UINT64_MAX) {
864 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
879 				n -= tx_bytes - zfs_uio_resid(uio);
880 				pfbytes -= tx_bytes - zfs_uio_resid(uio);
894 			tx_bytes -= zfs_uio_resid(uio);
897 			 * Thus, we're writing a full block at a block-aligned
904 			    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
940 		 * the Direct I/O write has completed, but this is the penality
944 		    zn_has_cached_data(zp, woff, woff + tx_bytes - 1)) {
945 			update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
953 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
954 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
969 		while ((end_size = zp->z_size) < zfs_uio_offset(uio)) {
970 			(void) atomic_cas_64(&zp->z_size, end_size,
979 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
980 			zp->z_size = zfsvfs->z_replay_eof;
982 		error1 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
993 		    uio->uio_extflg & UIO_DIRECT ? B_TRUE : B_FALSE, NULL,
1000 		 * At this point it can be re-enabled for subsequent writes.
1004 			uio->uio_extflg |= UIO_DIRECT;
1011 		n -= nbytes;
1012 		pfbytes -= nbytes;
1017 		uio->uio_extflg |= UIO_DIRECT;
1027 	if (uio->uio_extflg & UIO_DIRECT)
1033 	 * at least a partial write, so it's successful.
1035 	if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
1042 		zil_commit(zilog, zp->z_id);
1044 	int64_t nwritten = start_resid - zfs_uio_resid(uio);
1045 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
1076 	zilog = zfsvfs->z_log;
1079 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1100 	objset_t *os = zfsvfs->z_os;
1102 	uint64_t object = lr->lr_foid;
1103 	uint64_t offset = lr->lr_offset;
1104 	uint64_t size = lr->lr_length;
1117 	if (zp->z_unlinked) {
1126 	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1137 	zgd->zgd_lwb = lwb;
1138 	zgd->zgd_private = zp;
1141 	 * Write records come in two flavors: immediate and indirect.
1145 	 * we don't have to write the data twice.
1147 	if (buf != NULL) { /* immediate write */
1148 		zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset,
1151 		if (offset >= zp->z_size) {
1158 	} else { /* indirect write */
1163 		 * that no one can change the data. We need to re-check
1168 			size = zp->z_blksz;
1170 			offset -= blkoff;
1171 			zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
1173 			if (zp->z_blksz == size)
1176 			zfs_rangelock_exit(zgd->zgd_lr);
1179 		if (lr->lr_offset >= zp->z_size)
1194 			zgd->zgd_db = dbp;
1197 			mutex_enter(&db->db_mtx);
1199 			    dbuf_find_dirty_eq(db, lr->lr_common.lrc_txg);
1200 			if (dr != NULL && dr->dt.dl.dr_diowrite)
1202 			mutex_exit(&db->db_mtx);
1211 				 * A Direct I/O write always covers an entire
1214 				ASSERT3U(dbp->db_size, ==, zp->z_blksz);
1215 				lr->lr_blkptr = dr->dt.dl.dr_overridden_by;
1220 			blkptr_t *bp = &lr->lr_blkptr;
1221 			zgd->zgd_bp = bp;
1223 			ASSERT3U(dbp->db_offset, ==, offset);
1224 			ASSERT3U(dbp->db_size, ==, size);
1226 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
1228 			ASSERT(error || lr->lr_length <= size);
1231 			 * On success, we need to wait for the write I/O
1240 				lr->lr_common.lrc_txtype = TX_WRITE2;
1245 				 * it is the old, currently-on-disk BP.
1247 				zgd->zgd_bp = NULL;
1263 	znode_t *zp = zgd->zgd_private;
1265 	if (zgd->zgd_db)
1266 		dmu_buf_rele(zgd->zgd_db, zgd);
1268 	zfs_rangelock_exit(zgd->zgd_lr);
1346 	blkptr_t	*bps;
1368 	inos = inzfsvfs->z_os;
1369 	outos = outzfsvfs->z_os;
1389 	ASSERT(!outzfsvfs->z_replay);
1395 	if (inos->os_encrypted != outos->os_encrypted) {
1404 	if (inos != outos && inos->os_encrypted &&
1422 	if (inzp->z_pflags & ZFS_AV_QUARANTINED) {
1427 	if (inoff >= inzp->z_size) {
1432 	if (len > inzp->z_size - inoff) {
1433 		len = inzp->z_size - inoff;
1442 	 * Callers might not be able to detect properly that we are read-only,
1455 	if ((outzp->z_pflags & ZFS_IMMUTABLE) != 0) {
1471 	if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
1478 		inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len,
1480 		outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len,
1483 		outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len,
1485 		inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len,
1489 	inblksz = inzp->z_blksz;
1498 	if (inblksz < outzp->z_blksz) {
1502 	if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz ||
1503 	    outlr->lr_length != UINT64_MAX)) {
1509 	 * Block size must be power-of-2 if destination offset != 0.
1510 	 * There can be no multiple blocks of non-power-of-2 size.
1528 	    (len < inzp->z_size - inoff || len < outzp->z_size - outoff)) {
1540 	if (len <= inblksz && inblksz < outzfsvfs->z_max_blksz &&
1541 	    outzp->z_size <= inblksz && outoff + len > inblksz) {
1561 	    &outzp->z_size, 8);
1563 	zilog = outzfsvfs->z_log;
1565 	    sizeof (bps[0]);
1569 	projid = outzp->z_projid;
1571 	bps = vmem_alloc(sizeof (bps[0]) * maxblocks, KM_SLEEP);
1576 	 * and allows us to do more fine-grained space accounting.
1594 		error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
1606 				    last_synced_txg + 1);
1617 		dmu_tx_hold_sa(tx, outzp->z_sa_hdl, B_FALSE);
1618 		db = (dmu_buf_impl_t *)sa_get_db(outzp->z_sa_hdl);
1635 		if (outlr->lr_length == UINT64_MAX) {
1642 			if (inblksz != outzp->z_blksz) {
1653 			    ((len - 1) / inblksz + 1) * inblksz);
1656 		error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
1657 		    bps, nbps);
1663 		if (zn_has_cached_data(outzp, outoff, outoff + size - 1)) {
1676 		while ((outsize = outzp->z_size) < outoff + size) {
1677 			(void) atomic_cas_64(&outzp->z_size, outsize,
1681 		error = sa_bulk_update(outzp->z_sa_hdl, bulk, count, tx);
1684 		    size, inblksz, bps, nbps);
1693 		len -= size;
1702 	vmem_free(bps, sizeof (bps[0]) * maxblocks);
1717 		if (outos->os_sync == ZFS_SYNC_ALWAYS) {
1718 			zil_commit(zilog, outzp->z_id);
1744     const blkptr_t *bps, size_t nbps)
1760 	ASSERT(spa_feature_is_enabled(dmu_objset_spa(zfsvfs->z_os),
1766 	ASSERT(zfsvfs->z_replay);
1777 	    &zp->z_size, 8);
1782 	tx = dmu_tx_create(zfsvfs->z_os);
1784 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1785 	db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
1797 	if (zp->z_blksz < blksz)
1800 	dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps);
1804 	if (zp->z_size < off + len)
1805 		zp->z_size = off + len;
1807 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1813 	VERIFY(zil_replaying(zfsvfs->z_log, tx));