xref: /openbsd-src/usr.bin/rsync/blocks.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$Id: blocks.c,v 1.14 2019/03/23 16:04:28 deraadt Exp $ */
2 /*
3  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/stat.h>
18 
19 #include <assert.h>
20 #include <endian.h>
21 #include <errno.h>
22 #include <inttypes.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include <openssl/md4.h>
29 
30 #include "extern.h"
31 
32 /*
33  * From our current position of "offs" in buffer "buf" of total size
34  * "size", see if we can find a matching block in our list of blocks.
35  * The "hint" refers to the block that *might* work.
36  * Returns the blk or NULL if no matching block was found.
37  */
38 static struct blk *
39 blk_find(struct sess *sess, const void *buf, off_t size, off_t offs,
40 	const struct blkset *blks, const char *path, size_t hint)
41 {
42 	unsigned char	 md[MD4_DIGEST_LENGTH];
43 	uint32_t	 fhash;
44 	off_t		 remain, osz;
45 	size_t		 i;
46 	int		 have_md = 0;
47 
48 	/*
49 	 * First, compute our fast hash.
50 	 * FIXME: yes, this can be a rolling computation, but I'm
51 	 * deliberately making it simple first.
52 	 */
53 
54 	remain = size - offs;
55 	assert(remain);
56 	osz = remain < (off_t)blks->len ? remain : (off_t)blks->len;
57 	fhash = hash_fast(buf + offs, (size_t)osz);
58 	have_md = 0;
59 
60 	/*
61 	 * Start with our match hint.
62 	 * This just runs the fast and slow check with the hint.
63 	 */
64 
65 	if (hint < blks->blksz &&
66 	    fhash == blks->blks[hint].chksum_short &&
67 	    (size_t)osz == blks->blks[hint].len) {
68 		hash_slow(buf + offs, (size_t)osz, md, sess);
69 		have_md = 1;
70 		if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) {
71 			LOG4(sess, "%s: found matching hinted match: "
72 			    "position %jd, block %zu (position %jd, size %zu)",
73 			    path,
74 			    (intmax_t)offs, blks->blks[hint].idx,
75 			    (intmax_t)blks->blks[hint].offs,
76 			    blks->blks[hint].len);
77 			return &blks->blks[hint];
78 		}
79 	}
80 
81 	/*
82 	 * Now loop and look for the fast hash.
83 	 * If it's found, move on to the slow hash.
84 	 */
85 
86 	for (i = 0; i < blks->blksz; i++) {
87 		if (fhash != blks->blks[i].chksum_short)
88 			continue;
89 		if ((size_t)osz != blks->blks[i].len)
90 			continue;
91 
92 		LOG4(sess, "%s: found matching fast match: "
93 		    "position %jd, block %zu (position %jd, size %zu)",
94 		    path,
95 		    (intmax_t)offs, blks->blks[i].idx,
96 		    (intmax_t)blks->blks[i].offs,
97 		    blks->blks[i].len);
98 
99 		/* Compute slow hash on demand. */
100 
101 		if (have_md == 0) {
102 			hash_slow(buf + offs, (size_t)osz, md, sess);
103 			have_md = 1;
104 		}
105 
106 		if (memcmp(md, blks->blks[i].chksum_long, blks->csum))
107 			continue;
108 
109 		LOG4(sess, "%s: sender verifies slow match", path);
110 		return &blks->blks[i];
111 	}
112 
113 	return NULL;
114 }
115 
116 /*
117  * Given a local file "path" and the blocks created by a remote machine,
118  * find out which blocks of our file they don't have and send them.
119  * This function is reentrant: it must be called while there's still
120  * data to send.
121  */
122 void
123 blk_match(struct sess *sess, const struct blkset *blks,
124 	const char *path, struct blkstat *st)
125 {
126 	off_t		 last, end, sz;
127 	int32_t		 tok;
128 	struct blk	*blk;
129 
130 	/*
131 	 * If the file's empty or we don't have any blocks from the
132 	 * sender, then simply send the whole file.
133 	 * Otherwise, run the hash matching routine and send raw chunks
134 	 * and subsequent matching tokens.
135 	 */
136 
137 	if (st->mapsz && blks->blksz) {
138 		/*
139 		 * Stop searching at the length of the file minus the
140 		 * size of the last block.
141 		 * The reason for this being that we don't need to do an
142 		 * incremental hash within the last block---if it
143 		 * doesn't match, it doesn't match.
144 		 */
145 
146 		end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len;
147 		last = st->offs;
148 
149 		for ( ; st->offs < end; st->offs++) {
150 			blk = blk_find(sess, st->map, st->mapsz,
151 				st->offs, blks, path, st->hint);
152 			if (blk == NULL)
153 				continue;
154 
155 			sz = st->offs - last;
156 			st->dirty += sz;
157 			st->total += sz;
158 			LOG4(sess,
159 			    "%s: flushing %jd B before %zu B block %zu",
160 			    path, (intmax_t)sz,
161 			    blk->len, blk->idx);
162 			tok = -(blk->idx + 1);
163 
164 			/*
165 			 * Write the data we have, then follow it with
166 			 * the tag of the block that matches.
167 			 */
168 
169 			st->curpos = last;
170 			st->curlen = st->curpos + sz;
171 			st->curtok = tok;
172 			assert(st->curtok != 0);
173 			st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
174 			st->total += blk->len;
175 			st->offs += blk->len;
176 			st->hint = blk->idx + 1;
177 			return;
178 		}
179 
180 		/* Emit remaining data and send terminator token. */
181 
182 		sz = st->mapsz - last;
183 		LOG4(sess, "%s: flushing remaining %jd B",
184 		    path, (intmax_t)sz);
185 
186 		st->total += sz;
187 		st->dirty += sz;
188 		st->curpos = last;
189 		st->curlen = st->curpos + sz;
190 		st->curtok = 0;
191 		st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
192 	} else {
193 		st->curpos = 0;
194 		st->curlen = st->mapsz;
195 		st->curtok = 0;
196 		st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
197 		st->dirty = st->total = st->mapsz;
198 
199 		LOG4(sess, "%s: flushing whole file %zu B",
200 		    path, st->mapsz);
201 	}
202 }
203 
204 /*
205  * Buffer the message from sender to the receiver indicating that the
206  * block set has been received.
207  * Symmetrises blk_send_ack().
208  */
209 void
210 blk_recv_ack(struct sess *sess, char buf[20],
211 	const struct blkset *blocks, int32_t idx)
212 {
213 	size_t	 pos = 0, sz;
214 
215 	sz = sizeof(int32_t) + /* index */
216 	     sizeof(int32_t) + /* block count */
217 	     sizeof(int32_t) + /* block length */
218 	     sizeof(int32_t) + /* checksum length */
219 	     sizeof(int32_t); /* block remainder */
220 	assert(sz == 20);
221 
222 	io_buffer_int(sess, buf, &pos, sz, idx);
223 	io_buffer_int(sess, buf, &pos, sz, blocks->blksz);
224 	io_buffer_int(sess, buf, &pos, sz, blocks->len);
225 	io_buffer_int(sess, buf, &pos, sz, blocks->csum);
226 	io_buffer_int(sess, buf, &pos, sz, blocks->rem);
227 	assert(pos == sz);
228 }
229 
230 /*
231  * Read all of the checksums for a file's blocks.
232  * Returns the set of blocks or NULL on failure.
233  */
234 struct blkset *
235 blk_recv(struct sess *sess, int fd, const char *path)
236 {
237 	struct blkset	*s;
238 	int32_t		 i;
239 	size_t		 j;
240 	struct blk	*b;
241 	off_t		 offs = 0;
242 
243 	if ((s = calloc(1, sizeof(struct blkset))) == NULL) {
244 		ERR(sess, "calloc");
245 		return NULL;
246 	}
247 
248 	/*
249 	 * The block prologue consists of a few values that we'll need
250 	 * in reading the individual blocks for this file.
251 	 * FIXME: read into buffer and unbuffer.
252 	 */
253 
254 	if (!io_read_size(sess, fd, &s->blksz)) {
255 		ERRX1(sess, "io_read_size");
256 		goto out;
257 	} else if (!io_read_size(sess, fd, &s->len)) {
258 		ERRX1(sess, "io_read_size");
259 		goto out;
260 	} else if (!io_read_size(sess, fd, &s->csum)) {
261 		ERRX1(sess, "io_read_int");
262 		goto out;
263 	} else if (!io_read_size(sess, fd, &s->rem)) {
264 		ERRX1(sess, "io_read_int");
265 		goto out;
266 	} else if (s->rem && s->rem >= s->len) {
267 		ERRX(sess, "block remainder is "
268 			"greater than block size");
269 		goto out;
270 	}
271 
272 	LOG3(sess, "%s: read block prologue: %zu blocks of "
273 	    "%zu B, %zu B remainder, %zu B checksum", path,
274 	    s->blksz, s->len, s->rem, s->csum);
275 
276 	if (s->blksz) {
277 		s->blks = calloc(s->blksz, sizeof(struct blk));
278 		if (s->blks == NULL) {
279 			ERR(sess, "calloc");
280 			goto out;
281 		}
282 	}
283 
284 	/*
285 	 * Read each block individually.
286 	 * FIXME: read buffer and unbuffer.
287 	 */
288 
289 	for (j = 0; j < s->blksz; j++) {
290 		b = &s->blks[j];
291 		if (!io_read_int(sess, fd, &i)) {
292 			ERRX1(sess, "io_read_int");
293 			goto out;
294 		}
295 		b->chksum_short = i;
296 
297 		assert(s->csum <= sizeof(b->chksum_long));
298 		if (!io_read_buf(sess,
299 		    fd, b->chksum_long, s->csum)) {
300 			ERRX1(sess, "io_read_buf");
301 			goto out;
302 		}
303 
304 		/*
305 		 * If we're the last block, then we're assigned the
306 		 * remainder of the data.
307 		 */
308 
309 		b->offs = offs;
310 		b->idx = j;
311 		b->len = (j == (s->blksz - 1) && s->rem) ?
312 			s->rem : s->len;
313 		offs += b->len;
314 
315 		LOG4(sess, "%s: read block %zu, length %zu B",
316 		    path, b->idx, b->len);
317 	}
318 
319 	s->size = offs;
320 	LOG3(sess, "%s: read blocks: %zu blocks, %jd B total blocked data",
321 	    path, s->blksz, (intmax_t)s->size);
322 	return s;
323 out:
324 	free(s->blks);
325 	free(s);
326 	return NULL;
327 }
328 
329 /*
330  * Symmetrise blk_recv_ack(), except w/o the leading identifier.
331  * Return zero on failure, non-zero on success.
332  */
333 int
334 blk_send_ack(struct sess *sess, int fd, struct blkset *p)
335 {
336 	char	 buf[16];
337 	size_t	 pos = 0, sz;
338 
339 	/* Put the entire send routine into a buffer. */
340 
341 	sz = sizeof(int32_t) + /* block count */
342 	     sizeof(int32_t) + /* block length */
343 	     sizeof(int32_t) + /* checksum length */
344 	     sizeof(int32_t); /* block remainder */
345 	assert(sz <= sizeof(buf));
346 
347 	if (!io_read_buf(sess, fd, buf, sz)) {
348 		ERRX1(sess, "io_read_buf");
349 		return 0;
350 	}
351 
352 	if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz))
353 		ERRX1(sess, "io_unbuffer_size");
354 	else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len))
355 		ERRX1(sess, "io_unbuffer_size");
356 	else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum))
357 		ERRX1(sess, "io_unbuffer_size");
358 	else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem))
359 		ERRX1(sess, "io_unbuffer_size");
360 	else if (p->len && p->rem >= p->len)
361 		ERRX1(sess, "non-zero length is less than remainder");
362 	else if (p->csum == 0 || p->csum > 16)
363 		ERRX1(sess, "inappropriate checksum length");
364 	else
365 		return 1;
366 
367 	return 0;
368 }
369 
370 /*
371  * Transmit the metadata for set and blocks.
372  * Return zero on failure, non-zero on success.
373  */
374 int
375 blk_send(struct sess *sess, int fd, size_t idx,
376 	const struct blkset *p, const char *path)
377 {
378 	char	*buf;
379 	size_t	 i, pos = 0, sz;
380 	int	 rc = 0;
381 
382 	/* Put the entire send routine into a buffer. */
383 
384 	sz = sizeof(int32_t) + /* identifier */
385 	    sizeof(int32_t) + /* block count */
386 	    sizeof(int32_t) + /* block length */
387 	    sizeof(int32_t) + /* checksum length */
388 	    sizeof(int32_t) + /* block remainder */
389 	    p->blksz *
390 	    (sizeof(int32_t) + /* short checksum */
391 		p->csum); /* long checksum */
392 
393 	if ((buf = malloc(sz)) == NULL) {
394 		ERR(sess, "malloc");
395 		return 0;
396 	}
397 
398 	io_buffer_int(sess, buf, &pos, sz, idx);
399 	io_buffer_int(sess, buf, &pos, sz, p->blksz);
400 	io_buffer_int(sess, buf, &pos, sz, p->len);
401 	io_buffer_int(sess, buf, &pos, sz, p->csum);
402 	io_buffer_int(sess, buf, &pos, sz, p->rem);
403 
404 	for (i = 0; i < p->blksz; i++) {
405 		io_buffer_int(sess, buf, &pos,
406 			sz, p->blks[i].chksum_short);
407 		io_buffer_buf(sess, buf, &pos, sz,
408 			p->blks[i].chksum_long, p->csum);
409 	}
410 
411 	assert(pos == sz);
412 
413 	if (!io_write_buf(sess, fd, buf, sz)) {
414 		ERRX1(sess, "io_write_buf");
415 		goto out;
416 	}
417 
418 	LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, "
419 	    "%zu B remainder, %zu B checksum",
420 	    path, p->blksz, p->len, p->rem, p->csum);
421 	rc = 1;
422 out:
423 	free(buf);
424 	return rc;
425 }
426