xref: /freebsd-src/sys/contrib/openzfs/cmd/zstream/zstream_redup.c (revision 7a7741af18d6c8a804cc643cb7ecda9d730c6aa6)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * This file and its contents are supplied under the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License ("CDDL"), version 1.0.
6eda14cbcSMatt Macy  * You may only use this file in accordance with the terms of version
7eda14cbcSMatt Macy  * 1.0 of the CDDL.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * A full copy of the text of the CDDL should have accompanied this
10eda14cbcSMatt Macy  * source.  A copy of the CDDL is also available via the Internet at
11eda14cbcSMatt Macy  * http://www.illumos.org/license/CDDL.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * CDDL HEADER END
14eda14cbcSMatt Macy  */
15eda14cbcSMatt Macy 
16eda14cbcSMatt Macy /*
17eda14cbcSMatt Macy  * Copyright (c) 2020 by Delphix. All rights reserved.
18eda14cbcSMatt Macy  */
19eda14cbcSMatt Macy 
20eda14cbcSMatt Macy #include <assert.h>
21eda14cbcSMatt Macy #include <cityhash.h>
22eda14cbcSMatt Macy #include <ctype.h>
23eda14cbcSMatt Macy #include <errno.h>
24eda14cbcSMatt Macy #include <fcntl.h>
25eda14cbcSMatt Macy #include <libzfs.h>
26eda14cbcSMatt Macy #include <libzutil.h>
27eda14cbcSMatt Macy #include <stddef.h>
28eda14cbcSMatt Macy #include <stdio.h>
29eda14cbcSMatt Macy #include <stdlib.h>
30da5137abSMartin Matuska #include <string.h>
31eda14cbcSMatt Macy #include <umem.h>
32eda14cbcSMatt Macy #include <unistd.h>
33eda14cbcSMatt Macy #include <sys/debug.h>
34eda14cbcSMatt Macy #include <sys/stat.h>
35eda14cbcSMatt Macy #include <sys/zfs_ioctl.h>
36eda14cbcSMatt Macy #include <sys/zio_checksum.h>
37eda14cbcSMatt Macy #include "zfs_fletcher.h"
38eda14cbcSMatt Macy #include "zstream.h"
39eda14cbcSMatt Macy 
40eda14cbcSMatt Macy 
41eda14cbcSMatt Macy #define	MAX_RDT_PHYSMEM_PERCENT		20
42eda14cbcSMatt Macy #define	SMALLEST_POSSIBLE_MAX_RDT_MB		128
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy typedef struct redup_entry {
45eda14cbcSMatt Macy 	struct redup_entry	*rde_next;
46eda14cbcSMatt Macy 	uint64_t rde_guid;
47eda14cbcSMatt Macy 	uint64_t rde_object;
48eda14cbcSMatt Macy 	uint64_t rde_offset;
49eda14cbcSMatt Macy 	uint64_t rde_stream_offset;
50eda14cbcSMatt Macy } redup_entry_t;
51eda14cbcSMatt Macy 
52eda14cbcSMatt Macy typedef struct redup_table {
53eda14cbcSMatt Macy 	redup_entry_t	**redup_hash_array;
54eda14cbcSMatt Macy 	umem_cache_t	*ddecache;
55eda14cbcSMatt Macy 	uint64_t	ddt_count;
56eda14cbcSMatt Macy 	int		numhashbits;
57eda14cbcSMatt Macy } redup_table_t;
58eda14cbcSMatt Macy 
59a0b956f5SMartin Matuska void *
60eda14cbcSMatt Macy safe_calloc(size_t n)
61eda14cbcSMatt Macy {
62eda14cbcSMatt Macy 	void *rv = calloc(1, n);
63eda14cbcSMatt Macy 	if (rv == NULL) {
64eda14cbcSMatt Macy 		fprintf(stderr,
65eda14cbcSMatt Macy 		    "Error: could not allocate %u bytes of memory\n",
66eda14cbcSMatt Macy 		    (int)n);
67eda14cbcSMatt Macy 		exit(1);
68eda14cbcSMatt Macy 	}
69eda14cbcSMatt Macy 	return (rv);
70eda14cbcSMatt Macy }
71eda14cbcSMatt Macy 
72eda14cbcSMatt Macy /*
73eda14cbcSMatt Macy  * Safe version of fread(), exits on error.
74eda14cbcSMatt Macy  */
75a0b956f5SMartin Matuska int
76eda14cbcSMatt Macy sfread(void *buf, size_t size, FILE *fp)
77eda14cbcSMatt Macy {
78eda14cbcSMatt Macy 	int rv = fread(buf, size, 1, fp);
79eda14cbcSMatt Macy 	if (rv == 0 && ferror(fp)) {
80eda14cbcSMatt Macy 		(void) fprintf(stderr, "Error while reading file: %s\n",
81eda14cbcSMatt Macy 		    strerror(errno));
82eda14cbcSMatt Macy 		exit(1);
83eda14cbcSMatt Macy 	}
84eda14cbcSMatt Macy 	return (rv);
85eda14cbcSMatt Macy }
86eda14cbcSMatt Macy 
87eda14cbcSMatt Macy /*
88eda14cbcSMatt Macy  * Safe version of pread(), exits on error.
89eda14cbcSMatt Macy  */
90eda14cbcSMatt Macy static void
91eda14cbcSMatt Macy spread(int fd, void *buf, size_t count, off_t offset)
92eda14cbcSMatt Macy {
93eda14cbcSMatt Macy 	ssize_t err = pread(fd, buf, count, offset);
94eda14cbcSMatt Macy 	if (err == -1) {
95eda14cbcSMatt Macy 		(void) fprintf(stderr,
96eda14cbcSMatt Macy 		    "Error while reading file: %s\n",
97eda14cbcSMatt Macy 		    strerror(errno));
98eda14cbcSMatt Macy 		exit(1);
99eda14cbcSMatt Macy 	} else if (err != count) {
100eda14cbcSMatt Macy 		(void) fprintf(stderr,
101eda14cbcSMatt Macy 		    "Error while reading file: short read\n");
102eda14cbcSMatt Macy 		exit(1);
103eda14cbcSMatt Macy 	}
104eda14cbcSMatt Macy }
105eda14cbcSMatt Macy 
106eda14cbcSMatt Macy static int
107eda14cbcSMatt Macy dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
108eda14cbcSMatt Macy     zio_cksum_t *zc, int outfd)
109eda14cbcSMatt Macy {
110eda14cbcSMatt Macy 	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
111eda14cbcSMatt Macy 	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
112eda14cbcSMatt Macy 	fletcher_4_incremental_native(drr,
113eda14cbcSMatt Macy 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
114eda14cbcSMatt Macy 	if (drr->drr_type != DRR_BEGIN) {
115eda14cbcSMatt Macy 		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
116eda14cbcSMatt Macy 		    drr_checksum.drr_checksum));
117eda14cbcSMatt Macy 		drr->drr_u.drr_checksum.drr_checksum = *zc;
118eda14cbcSMatt Macy 	}
119eda14cbcSMatt Macy 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
120eda14cbcSMatt Macy 	    sizeof (zio_cksum_t), zc);
121eda14cbcSMatt Macy 	if (write(outfd, drr, sizeof (*drr)) == -1)
122eda14cbcSMatt Macy 		return (errno);
123eda14cbcSMatt Macy 	if (payload_len != 0) {
124eda14cbcSMatt Macy 		fletcher_4_incremental_native(payload, payload_len, zc);
125eda14cbcSMatt Macy 		if (write(outfd, payload, payload_len) == -1)
126eda14cbcSMatt Macy 			return (errno);
127eda14cbcSMatt Macy 	}
128eda14cbcSMatt Macy 	return (0);
129eda14cbcSMatt Macy }
130eda14cbcSMatt Macy 
131eda14cbcSMatt Macy static void
132eda14cbcSMatt Macy rdt_insert(redup_table_t *rdt,
133eda14cbcSMatt Macy     uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
134eda14cbcSMatt Macy {
135*7a7741afSMartin Matuska 	uint64_t ch = cityhash3(guid, object, offset);
136eda14cbcSMatt Macy 	uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
137eda14cbcSMatt Macy 	redup_entry_t **rdepp;
138eda14cbcSMatt Macy 
139eda14cbcSMatt Macy 	rdepp = &(rdt->redup_hash_array[hashcode]);
140eda14cbcSMatt Macy 	redup_entry_t *rde = umem_cache_alloc(rdt->ddecache, UMEM_NOFAIL);
141eda14cbcSMatt Macy 	rde->rde_next = *rdepp;
142eda14cbcSMatt Macy 	rde->rde_guid = guid;
143eda14cbcSMatt Macy 	rde->rde_object = object;
144eda14cbcSMatt Macy 	rde->rde_offset = offset;
145eda14cbcSMatt Macy 	rde->rde_stream_offset = stream_offset;
146eda14cbcSMatt Macy 	*rdepp = rde;
147eda14cbcSMatt Macy 	rdt->ddt_count++;
148eda14cbcSMatt Macy }
149eda14cbcSMatt Macy 
150eda14cbcSMatt Macy static void
151eda14cbcSMatt Macy rdt_lookup(redup_table_t *rdt,
152eda14cbcSMatt Macy     uint64_t guid, uint64_t object, uint64_t offset,
153eda14cbcSMatt Macy     uint64_t *stream_offsetp)
154eda14cbcSMatt Macy {
155*7a7741afSMartin Matuska 	uint64_t ch = cityhash3(guid, object, offset);
156eda14cbcSMatt Macy 	uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
157eda14cbcSMatt Macy 
158eda14cbcSMatt Macy 	for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
159eda14cbcSMatt Macy 	    rde != NULL; rde = rde->rde_next) {
160eda14cbcSMatt Macy 		if (rde->rde_guid == guid &&
161eda14cbcSMatt Macy 		    rde->rde_object == object &&
162eda14cbcSMatt Macy 		    rde->rde_offset == offset) {
163eda14cbcSMatt Macy 			*stream_offsetp = rde->rde_stream_offset;
164eda14cbcSMatt Macy 			return;
165eda14cbcSMatt Macy 		}
166eda14cbcSMatt Macy 	}
167eda14cbcSMatt Macy 	assert(!"could not find expected redup table entry");
168eda14cbcSMatt Macy }
169eda14cbcSMatt Macy 
170eda14cbcSMatt Macy /*
171eda14cbcSMatt Macy  * Convert a dedup stream (generated by "zfs send -D") to a
172eda14cbcSMatt Macy  * non-deduplicated stream.  The entire infd will be converted, including
173eda14cbcSMatt Macy  * any substreams in a stream package (generated by "zfs send -RD"). The
174eda14cbcSMatt Macy  * infd must be seekable.
175eda14cbcSMatt Macy  */
176eda14cbcSMatt Macy static void
177eda14cbcSMatt Macy zfs_redup_stream(int infd, int outfd, boolean_t verbose)
178eda14cbcSMatt Macy {
179eda14cbcSMatt Macy 	int bufsz = SPA_MAXBLOCKSIZE;
180aca928a5SMartin Matuska 	dmu_replay_record_t thedrr;
181eda14cbcSMatt Macy 	dmu_replay_record_t *drr = &thedrr;
182eda14cbcSMatt Macy 	redup_table_t rdt;
183eda14cbcSMatt Macy 	zio_cksum_t stream_cksum;
184eda14cbcSMatt Macy 	uint64_t numbuckets;
185eda14cbcSMatt Macy 	uint64_t num_records = 0;
186eda14cbcSMatt Macy 	uint64_t num_write_byref_records = 0;
187eda14cbcSMatt Macy 
188aca928a5SMartin Matuska 	memset(&thedrr, 0, sizeof (dmu_replay_record_t));
189aca928a5SMartin Matuska 
190eda14cbcSMatt Macy #ifdef _ILP32
191eda14cbcSMatt Macy 	uint64_t max_rde_size = SMALLEST_POSSIBLE_MAX_RDT_MB << 20;
192eda14cbcSMatt Macy #else
193eda14cbcSMatt Macy 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
194eda14cbcSMatt Macy 	uint64_t max_rde_size =
195eda14cbcSMatt Macy 	    MAX((physmem * MAX_RDT_PHYSMEM_PERCENT) / 100,
196eda14cbcSMatt Macy 	    SMALLEST_POSSIBLE_MAX_RDT_MB << 20);
197eda14cbcSMatt Macy #endif
198eda14cbcSMatt Macy 
199eda14cbcSMatt Macy 	numbuckets = max_rde_size / (sizeof (redup_entry_t));
200eda14cbcSMatt Macy 
201eda14cbcSMatt Macy 	/*
202eda14cbcSMatt Macy 	 * numbuckets must be a power of 2.  Increase number to
203eda14cbcSMatt Macy 	 * a power of 2 if necessary.
204eda14cbcSMatt Macy 	 */
205eda14cbcSMatt Macy 	if (!ISP2(numbuckets))
206eda14cbcSMatt Macy 		numbuckets = 1ULL << highbit64(numbuckets);
207eda14cbcSMatt Macy 
208eda14cbcSMatt Macy 	rdt.redup_hash_array =
209eda14cbcSMatt Macy 	    safe_calloc(numbuckets * sizeof (redup_entry_t *));
210eda14cbcSMatt Macy 	rdt.ddecache = umem_cache_create("rde", sizeof (redup_entry_t), 0,
211eda14cbcSMatt Macy 	    NULL, NULL, NULL, NULL, NULL, 0);
212eda14cbcSMatt Macy 	rdt.numhashbits = highbit64(numbuckets) - 1;
213eda14cbcSMatt Macy 	rdt.ddt_count = 0;
214eda14cbcSMatt Macy 
215eda14cbcSMatt Macy 	char *buf = safe_calloc(bufsz);
216eda14cbcSMatt Macy 	FILE *ofp = fdopen(infd, "r");
217eda14cbcSMatt Macy 	long offset = ftell(ofp);
21815f0b8c3SMartin Matuska 	int begin = 0;
21915f0b8c3SMartin Matuska 	boolean_t seen = B_FALSE;
220eda14cbcSMatt Macy 	while (sfread(drr, sizeof (*drr), ofp) != 0) {
221eda14cbcSMatt Macy 		num_records++;
222eda14cbcSMatt Macy 
223eda14cbcSMatt Macy 		/*
224eda14cbcSMatt Macy 		 * We need to regenerate the checksum.
225eda14cbcSMatt Macy 		 */
226eda14cbcSMatt Macy 		if (drr->drr_type != DRR_BEGIN) {
227da5137abSMartin Matuska 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
228eda14cbcSMatt Macy 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
229eda14cbcSMatt Macy 		}
230eda14cbcSMatt Macy 
231eda14cbcSMatt Macy 		uint64_t payload_size = 0;
232eda14cbcSMatt Macy 		switch (drr->drr_type) {
233eda14cbcSMatt Macy 		case DRR_BEGIN:
234eda14cbcSMatt Macy 		{
235eda14cbcSMatt Macy 			struct drr_begin *drrb = &drr->drr_u.drr_begin;
236eda14cbcSMatt Macy 			int fflags;
237eda14cbcSMatt Macy 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
23815f0b8c3SMartin Matuska 			VERIFY0(begin++);
23915f0b8c3SMartin Matuska 			seen = B_TRUE;
240eda14cbcSMatt Macy 
241eda14cbcSMatt Macy 			assert(drrb->drr_magic == DMU_BACKUP_MAGIC);
242eda14cbcSMatt Macy 
243eda14cbcSMatt Macy 			/* clear the DEDUP feature flag for this stream */
244eda14cbcSMatt Macy 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
245eda14cbcSMatt Macy 			fflags &= ~(DMU_BACKUP_FEATURE_DEDUP |
246eda14cbcSMatt Macy 			    DMU_BACKUP_FEATURE_DEDUPPROPS);
2479db44a8eSMartin Matuska 			/* cppcheck-suppress syntaxError */
248eda14cbcSMatt Macy 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
249eda14cbcSMatt Macy 
25015f0b8c3SMartin Matuska 			uint32_t sz = drr->drr_payloadlen;
25115f0b8c3SMartin Matuska 
25215f0b8c3SMartin Matuska 			VERIFY3U(sz, <=, 1U << 28);
25315f0b8c3SMartin Matuska 
254eda14cbcSMatt Macy 			if (sz != 0) {
255eda14cbcSMatt Macy 				if (sz > bufsz) {
256eda14cbcSMatt Macy 					free(buf);
257eda14cbcSMatt Macy 					buf = safe_calloc(sz);
258eda14cbcSMatt Macy 					bufsz = sz;
259eda14cbcSMatt Macy 				}
260eda14cbcSMatt Macy 				(void) sfread(buf, sz, ofp);
261eda14cbcSMatt Macy 			}
262eda14cbcSMatt Macy 			payload_size = sz;
263eda14cbcSMatt Macy 			break;
264eda14cbcSMatt Macy 		}
265eda14cbcSMatt Macy 
266eda14cbcSMatt Macy 		case DRR_END:
267eda14cbcSMatt Macy 		{
268eda14cbcSMatt Macy 			struct drr_end *drre = &drr->drr_u.drr_end;
269eda14cbcSMatt Macy 			/*
27015f0b8c3SMartin Matuska 			 * We would prefer to just check --begin == 0, but
27115f0b8c3SMartin Matuska 			 * replication streams have an end of stream END
27215f0b8c3SMartin Matuska 			 * record, so we must avoid tripping it.
27315f0b8c3SMartin Matuska 			 */
27415f0b8c3SMartin Matuska 			VERIFY3B(seen, ==, B_TRUE);
27515f0b8c3SMartin Matuska 			begin--;
27615f0b8c3SMartin Matuska 			/*
277eda14cbcSMatt Macy 			 * Use the recalculated checksum, unless this is
278eda14cbcSMatt Macy 			 * the END record of a stream package, which has
279eda14cbcSMatt Macy 			 * no checksum.
280eda14cbcSMatt Macy 			 */
281eda14cbcSMatt Macy 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
282eda14cbcSMatt Macy 				drre->drr_checksum = stream_cksum;
283eda14cbcSMatt Macy 			break;
284eda14cbcSMatt Macy 		}
285eda14cbcSMatt Macy 
286eda14cbcSMatt Macy 		case DRR_OBJECT:
287eda14cbcSMatt Macy 		{
288eda14cbcSMatt Macy 			struct drr_object *drro = &drr->drr_u.drr_object;
28915f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
290eda14cbcSMatt Macy 
291eda14cbcSMatt Macy 			if (drro->drr_bonuslen > 0) {
292eda14cbcSMatt Macy 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
293eda14cbcSMatt Macy 				(void) sfread(buf, payload_size, ofp);
294eda14cbcSMatt Macy 			}
295eda14cbcSMatt Macy 			break;
296eda14cbcSMatt Macy 		}
297eda14cbcSMatt Macy 
298eda14cbcSMatt Macy 		case DRR_SPILL:
299eda14cbcSMatt Macy 		{
300eda14cbcSMatt Macy 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
30115f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
302eda14cbcSMatt Macy 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
303eda14cbcSMatt Macy 			(void) sfread(buf, payload_size, ofp);
304eda14cbcSMatt Macy 			break;
305eda14cbcSMatt Macy 		}
306eda14cbcSMatt Macy 
307eda14cbcSMatt Macy 		case DRR_WRITE_BYREF:
308eda14cbcSMatt Macy 		{
309eda14cbcSMatt Macy 			struct drr_write_byref drrwb =
310eda14cbcSMatt Macy 			    drr->drr_u.drr_write_byref;
31115f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
312eda14cbcSMatt Macy 
313eda14cbcSMatt Macy 			num_write_byref_records++;
314eda14cbcSMatt Macy 
315eda14cbcSMatt Macy 			/*
316eda14cbcSMatt Macy 			 * Look up in hash table by drrwb->drr_refguid,
317eda14cbcSMatt Macy 			 * drr_refobject, drr_refoffset.  Replace this
318eda14cbcSMatt Macy 			 * record with the found WRITE record, but with
319eda14cbcSMatt Macy 			 * drr_object,drr_offset,drr_toguid replaced with ours.
320eda14cbcSMatt Macy 			 */
321eda14cbcSMatt Macy 			uint64_t stream_offset = 0;
322eda14cbcSMatt Macy 			rdt_lookup(&rdt, drrwb.drr_refguid,
323eda14cbcSMatt Macy 			    drrwb.drr_refobject, drrwb.drr_refoffset,
324eda14cbcSMatt Macy 			    &stream_offset);
325eda14cbcSMatt Macy 
326eda14cbcSMatt Macy 			spread(infd, drr, sizeof (*drr), stream_offset);
327eda14cbcSMatt Macy 
328eda14cbcSMatt Macy 			assert(drr->drr_type == DRR_WRITE);
329eda14cbcSMatt Macy 			struct drr_write *drrw = &drr->drr_u.drr_write;
330eda14cbcSMatt Macy 			assert(drrw->drr_toguid == drrwb.drr_refguid);
331eda14cbcSMatt Macy 			assert(drrw->drr_object == drrwb.drr_refobject);
332eda14cbcSMatt Macy 			assert(drrw->drr_offset == drrwb.drr_refoffset);
333eda14cbcSMatt Macy 
334eda14cbcSMatt Macy 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
335eda14cbcSMatt Macy 			spread(infd, buf, payload_size,
336eda14cbcSMatt Macy 			    stream_offset + sizeof (*drr));
337eda14cbcSMatt Macy 
338eda14cbcSMatt Macy 			drrw->drr_toguid = drrwb.drr_toguid;
339eda14cbcSMatt Macy 			drrw->drr_object = drrwb.drr_object;
340eda14cbcSMatt Macy 			drrw->drr_offset = drrwb.drr_offset;
341eda14cbcSMatt Macy 			break;
342eda14cbcSMatt Macy 		}
343eda14cbcSMatt Macy 
344eda14cbcSMatt Macy 		case DRR_WRITE:
345eda14cbcSMatt Macy 		{
346eda14cbcSMatt Macy 			struct drr_write *drrw = &drr->drr_u.drr_write;
34715f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
348eda14cbcSMatt Macy 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
349eda14cbcSMatt Macy 			(void) sfread(buf, payload_size, ofp);
350eda14cbcSMatt Macy 
351eda14cbcSMatt Macy 			rdt_insert(&rdt, drrw->drr_toguid,
352eda14cbcSMatt Macy 			    drrw->drr_object, drrw->drr_offset, offset);
353eda14cbcSMatt Macy 			break;
354eda14cbcSMatt Macy 		}
355eda14cbcSMatt Macy 
356eda14cbcSMatt Macy 		case DRR_WRITE_EMBEDDED:
357eda14cbcSMatt Macy 		{
358eda14cbcSMatt Macy 			struct drr_write_embedded *drrwe =
359eda14cbcSMatt Macy 			    &drr->drr_u.drr_write_embedded;
36015f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
361eda14cbcSMatt Macy 			payload_size =
362eda14cbcSMatt Macy 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
363eda14cbcSMatt Macy 			(void) sfread(buf, payload_size, ofp);
364eda14cbcSMatt Macy 			break;
365eda14cbcSMatt Macy 		}
366eda14cbcSMatt Macy 
367eda14cbcSMatt Macy 		case DRR_FREEOBJECTS:
368eda14cbcSMatt Macy 		case DRR_FREE:
369eda14cbcSMatt Macy 		case DRR_OBJECT_RANGE:
37015f0b8c3SMartin Matuska 			VERIFY3S(begin, ==, 1);
371eda14cbcSMatt Macy 			break;
372eda14cbcSMatt Macy 
373eda14cbcSMatt Macy 		default:
374eda14cbcSMatt Macy 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
375eda14cbcSMatt Macy 			    drr->drr_type);
376eda14cbcSMatt Macy 			/* should never happen, so assert */
377eda14cbcSMatt Macy 			assert(B_FALSE);
378eda14cbcSMatt Macy 		}
379eda14cbcSMatt Macy 
380eda14cbcSMatt Macy 		if (feof(ofp)) {
381eda14cbcSMatt Macy 			fprintf(stderr, "Error: unexpected end-of-file\n");
382eda14cbcSMatt Macy 			exit(1);
383eda14cbcSMatt Macy 		}
384eda14cbcSMatt Macy 		if (ferror(ofp)) {
385eda14cbcSMatt Macy 			fprintf(stderr, "Error while reading file: %s\n",
386eda14cbcSMatt Macy 			    strerror(errno));
387eda14cbcSMatt Macy 			exit(1);
388eda14cbcSMatt Macy 		}
389eda14cbcSMatt Macy 
390eda14cbcSMatt Macy 		/*
391eda14cbcSMatt Macy 		 * We need to recalculate the checksum, and it needs to be
392eda14cbcSMatt Macy 		 * initially zero to do that.  BEGIN records don't have
393eda14cbcSMatt Macy 		 * a checksum.
394eda14cbcSMatt Macy 		 */
395eda14cbcSMatt Macy 		if (drr->drr_type != DRR_BEGIN) {
396da5137abSMartin Matuska 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
397eda14cbcSMatt Macy 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
398eda14cbcSMatt Macy 		}
399eda14cbcSMatt Macy 		if (dump_record(drr, buf, payload_size,
400eda14cbcSMatt Macy 		    &stream_cksum, outfd) != 0)
401eda14cbcSMatt Macy 			break;
402eda14cbcSMatt Macy 		if (drr->drr_type == DRR_END) {
403eda14cbcSMatt Macy 			/*
404eda14cbcSMatt Macy 			 * Typically the END record is either the last
405eda14cbcSMatt Macy 			 * thing in the stream, or it is followed
406eda14cbcSMatt Macy 			 * by a BEGIN record (which also zeros the checksum).
407eda14cbcSMatt Macy 			 * However, a stream package ends with two END
408eda14cbcSMatt Macy 			 * records.  The last END record's checksum starts
409eda14cbcSMatt Macy 			 * from zero.
410eda14cbcSMatt Macy 			 */
411eda14cbcSMatt Macy 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
412eda14cbcSMatt Macy 		}
413eda14cbcSMatt Macy 		offset = ftell(ofp);
414eda14cbcSMatt Macy 	}
415eda14cbcSMatt Macy 
416eda14cbcSMatt Macy 	if (verbose) {
417eda14cbcSMatt Macy 		char mem_str[16];
418eda14cbcSMatt Macy 		zfs_nicenum(rdt.ddt_count * sizeof (redup_entry_t),
419eda14cbcSMatt Macy 		    mem_str, sizeof (mem_str));
420eda14cbcSMatt Macy 		fprintf(stderr, "converted stream with %llu total records, "
421eda14cbcSMatt Macy 		    "including %llu dedup records, using %sB memory.\n",
422eda14cbcSMatt Macy 		    (long long)num_records,
423eda14cbcSMatt Macy 		    (long long)num_write_byref_records,
424eda14cbcSMatt Macy 		    mem_str);
425eda14cbcSMatt Macy 	}
426eda14cbcSMatt Macy 
427eda14cbcSMatt Macy 	umem_cache_destroy(rdt.ddecache);
428eda14cbcSMatt Macy 	free(rdt.redup_hash_array);
429eda14cbcSMatt Macy 	free(buf);
430eda14cbcSMatt Macy 	(void) fclose(ofp);
431eda14cbcSMatt Macy }
432eda14cbcSMatt Macy 
433eda14cbcSMatt Macy int
434eda14cbcSMatt Macy zstream_do_redup(int argc, char *argv[])
435eda14cbcSMatt Macy {
436eda14cbcSMatt Macy 	boolean_t verbose = B_FALSE;
4377877fdebSMatt Macy 	int c;
438eda14cbcSMatt Macy 
439eda14cbcSMatt Macy 	while ((c = getopt(argc, argv, "v")) != -1) {
440eda14cbcSMatt Macy 		switch (c) {
441eda14cbcSMatt Macy 		case 'v':
442eda14cbcSMatt Macy 			verbose = B_TRUE;
443eda14cbcSMatt Macy 			break;
444eda14cbcSMatt Macy 		case '?':
445eda14cbcSMatt Macy 			(void) fprintf(stderr, "invalid option '%c'\n",
446eda14cbcSMatt Macy 			    optopt);
447eda14cbcSMatt Macy 			zstream_usage();
448eda14cbcSMatt Macy 			break;
449eda14cbcSMatt Macy 		}
450eda14cbcSMatt Macy 	}
451eda14cbcSMatt Macy 
452eda14cbcSMatt Macy 	argc -= optind;
453eda14cbcSMatt Macy 	argv += optind;
454eda14cbcSMatt Macy 
455eda14cbcSMatt Macy 	if (argc != 1)
456eda14cbcSMatt Macy 		zstream_usage();
457eda14cbcSMatt Macy 
458eda14cbcSMatt Macy 	const char *filename = argv[0];
459eda14cbcSMatt Macy 
460eda14cbcSMatt Macy 	if (isatty(STDOUT_FILENO)) {
461eda14cbcSMatt Macy 		(void) fprintf(stderr,
462eda14cbcSMatt Macy 		    "Error: Stream can not be written to a terminal.\n"
463eda14cbcSMatt Macy 		    "You must redirect standard output.\n");
464eda14cbcSMatt Macy 		return (1);
465eda14cbcSMatt Macy 	}
466eda14cbcSMatt Macy 
467eda14cbcSMatt Macy 	int fd = open(filename, O_RDONLY);
468eda14cbcSMatt Macy 	if (fd == -1) {
469eda14cbcSMatt Macy 		(void) fprintf(stderr,
470eda14cbcSMatt Macy 		    "Error while opening file '%s': %s\n",
471eda14cbcSMatt Macy 		    filename, strerror(errno));
472eda14cbcSMatt Macy 		exit(1);
473eda14cbcSMatt Macy 	}
474eda14cbcSMatt Macy 
475eda14cbcSMatt Macy 	fletcher_4_init();
476eda14cbcSMatt Macy 	zfs_redup_stream(fd, STDOUT_FILENO, verbose);
477eda14cbcSMatt Macy 	fletcher_4_fini();
478eda14cbcSMatt Macy 
479eda14cbcSMatt Macy 	close(fd);
480eda14cbcSMatt Macy 
481eda14cbcSMatt Macy 	return (0);
482eda14cbcSMatt Macy }
483