1 /* $Id: blocks.c,v 1.14 2019/03/23 16:04:28 deraadt Exp $ */ 2 /* 3 * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 #include <sys/stat.h> 18 19 #include <assert.h> 20 #include <endian.h> 21 #include <errno.h> 22 #include <inttypes.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <unistd.h> 27 28 #include <openssl/md4.h> 29 30 #include "extern.h" 31 32 /* 33 * From our current position of "offs" in buffer "buf" of total size 34 * "size", see if we can find a matching block in our list of blocks. 35 * The "hint" refers to the block that *might* work. 36 * Returns the blk or NULL if no matching block was found. 37 */ 38 static struct blk * 39 blk_find(struct sess *sess, const void *buf, off_t size, off_t offs, 40 const struct blkset *blks, const char *path, size_t hint) 41 { 42 unsigned char md[MD4_DIGEST_LENGTH]; 43 uint32_t fhash; 44 off_t remain, osz; 45 size_t i; 46 int have_md = 0; 47 48 /* 49 * First, compute our fast hash. 50 * FIXME: yes, this can be a rolling computation, but I'm 51 * deliberately making it simple first. 52 */ 53 54 remain = size - offs; 55 assert(remain); 56 osz = remain < (off_t)blks->len ? remain : (off_t)blks->len; 57 fhash = hash_fast(buf + offs, (size_t)osz); 58 have_md = 0; 59 60 /* 61 * Start with our match hint. 62 * This just runs the fast and slow check with the hint. 63 */ 64 65 if (hint < blks->blksz && 66 fhash == blks->blks[hint].chksum_short && 67 (size_t)osz == blks->blks[hint].len) { 68 hash_slow(buf + offs, (size_t)osz, md, sess); 69 have_md = 1; 70 if (memcmp(md, blks->blks[hint].chksum_long, blks->csum) == 0) { 71 LOG4(sess, "%s: found matching hinted match: " 72 "position %jd, block %zu (position %jd, size %zu)", 73 path, 74 (intmax_t)offs, blks->blks[hint].idx, 75 (intmax_t)blks->blks[hint].offs, 76 blks->blks[hint].len); 77 return &blks->blks[hint]; 78 } 79 } 80 81 /* 82 * Now loop and look for the fast hash. 83 * If it's found, move on to the slow hash. 84 */ 85 86 for (i = 0; i < blks->blksz; i++) { 87 if (fhash != blks->blks[i].chksum_short) 88 continue; 89 if ((size_t)osz != blks->blks[i].len) 90 continue; 91 92 LOG4(sess, "%s: found matching fast match: " 93 "position %jd, block %zu (position %jd, size %zu)", 94 path, 95 (intmax_t)offs, blks->blks[i].idx, 96 (intmax_t)blks->blks[i].offs, 97 blks->blks[i].len); 98 99 /* Compute slow hash on demand. */ 100 101 if (have_md == 0) { 102 hash_slow(buf + offs, (size_t)osz, md, sess); 103 have_md = 1; 104 } 105 106 if (memcmp(md, blks->blks[i].chksum_long, blks->csum)) 107 continue; 108 109 LOG4(sess, "%s: sender verifies slow match", path); 110 return &blks->blks[i]; 111 } 112 113 return NULL; 114 } 115 116 /* 117 * Given a local file "path" and the blocks created by a remote machine, 118 * find out which blocks of our file they don't have and send them. 119 * This function is reentrant: it must be called while there's still 120 * data to send. 121 */ 122 void 123 blk_match(struct sess *sess, const struct blkset *blks, 124 const char *path, struct blkstat *st) 125 { 126 off_t last, end, sz; 127 int32_t tok; 128 struct blk *blk; 129 130 /* 131 * If the file's empty or we don't have any blocks from the 132 * sender, then simply send the whole file. 133 * Otherwise, run the hash matching routine and send raw chunks 134 * and subsequent matching tokens. 135 */ 136 137 if (st->mapsz && blks->blksz) { 138 /* 139 * Stop searching at the length of the file minus the 140 * size of the last block. 141 * The reason for this being that we don't need to do an 142 * incremental hash within the last block---if it 143 * doesn't match, it doesn't match. 144 */ 145 146 end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len; 147 last = st->offs; 148 149 for ( ; st->offs < end; st->offs++) { 150 blk = blk_find(sess, st->map, st->mapsz, 151 st->offs, blks, path, st->hint); 152 if (blk == NULL) 153 continue; 154 155 sz = st->offs - last; 156 st->dirty += sz; 157 st->total += sz; 158 LOG4(sess, 159 "%s: flushing %jd B before %zu B block %zu", 160 path, (intmax_t)sz, 161 blk->len, blk->idx); 162 tok = -(blk->idx + 1); 163 164 /* 165 * Write the data we have, then follow it with 166 * the tag of the block that matches. 167 */ 168 169 st->curpos = last; 170 st->curlen = st->curpos + sz; 171 st->curtok = tok; 172 assert(st->curtok != 0); 173 st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; 174 st->total += blk->len; 175 st->offs += blk->len; 176 st->hint = blk->idx + 1; 177 return; 178 } 179 180 /* Emit remaining data and send terminator token. */ 181 182 sz = st->mapsz - last; 183 LOG4(sess, "%s: flushing remaining %jd B", 184 path, (intmax_t)sz); 185 186 st->total += sz; 187 st->dirty += sz; 188 st->curpos = last; 189 st->curlen = st->curpos + sz; 190 st->curtok = 0; 191 st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; 192 } else { 193 st->curpos = 0; 194 st->curlen = st->mapsz; 195 st->curtok = 0; 196 st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK; 197 st->dirty = st->total = st->mapsz; 198 199 LOG4(sess, "%s: flushing whole file %zu B", 200 path, st->mapsz); 201 } 202 } 203 204 /* 205 * Buffer the message from sender to the receiver indicating that the 206 * block set has been received. 207 * Symmetrises blk_send_ack(). 208 */ 209 void 210 blk_recv_ack(struct sess *sess, char buf[20], 211 const struct blkset *blocks, int32_t idx) 212 { 213 size_t pos = 0, sz; 214 215 sz = sizeof(int32_t) + /* index */ 216 sizeof(int32_t) + /* block count */ 217 sizeof(int32_t) + /* block length */ 218 sizeof(int32_t) + /* checksum length */ 219 sizeof(int32_t); /* block remainder */ 220 assert(sz == 20); 221 222 io_buffer_int(sess, buf, &pos, sz, idx); 223 io_buffer_int(sess, buf, &pos, sz, blocks->blksz); 224 io_buffer_int(sess, buf, &pos, sz, blocks->len); 225 io_buffer_int(sess, buf, &pos, sz, blocks->csum); 226 io_buffer_int(sess, buf, &pos, sz, blocks->rem); 227 assert(pos == sz); 228 } 229 230 /* 231 * Read all of the checksums for a file's blocks. 232 * Returns the set of blocks or NULL on failure. 233 */ 234 struct blkset * 235 blk_recv(struct sess *sess, int fd, const char *path) 236 { 237 struct blkset *s; 238 int32_t i; 239 size_t j; 240 struct blk *b; 241 off_t offs = 0; 242 243 if ((s = calloc(1, sizeof(struct blkset))) == NULL) { 244 ERR(sess, "calloc"); 245 return NULL; 246 } 247 248 /* 249 * The block prologue consists of a few values that we'll need 250 * in reading the individual blocks for this file. 251 * FIXME: read into buffer and unbuffer. 252 */ 253 254 if (!io_read_size(sess, fd, &s->blksz)) { 255 ERRX1(sess, "io_read_size"); 256 goto out; 257 } else if (!io_read_size(sess, fd, &s->len)) { 258 ERRX1(sess, "io_read_size"); 259 goto out; 260 } else if (!io_read_size(sess, fd, &s->csum)) { 261 ERRX1(sess, "io_read_int"); 262 goto out; 263 } else if (!io_read_size(sess, fd, &s->rem)) { 264 ERRX1(sess, "io_read_int"); 265 goto out; 266 } else if (s->rem && s->rem >= s->len) { 267 ERRX(sess, "block remainder is " 268 "greater than block size"); 269 goto out; 270 } 271 272 LOG3(sess, "%s: read block prologue: %zu blocks of " 273 "%zu B, %zu B remainder, %zu B checksum", path, 274 s->blksz, s->len, s->rem, s->csum); 275 276 if (s->blksz) { 277 s->blks = calloc(s->blksz, sizeof(struct blk)); 278 if (s->blks == NULL) { 279 ERR(sess, "calloc"); 280 goto out; 281 } 282 } 283 284 /* 285 * Read each block individually. 286 * FIXME: read buffer and unbuffer. 287 */ 288 289 for (j = 0; j < s->blksz; j++) { 290 b = &s->blks[j]; 291 if (!io_read_int(sess, fd, &i)) { 292 ERRX1(sess, "io_read_int"); 293 goto out; 294 } 295 b->chksum_short = i; 296 297 assert(s->csum <= sizeof(b->chksum_long)); 298 if (!io_read_buf(sess, 299 fd, b->chksum_long, s->csum)) { 300 ERRX1(sess, "io_read_buf"); 301 goto out; 302 } 303 304 /* 305 * If we're the last block, then we're assigned the 306 * remainder of the data. 307 */ 308 309 b->offs = offs; 310 b->idx = j; 311 b->len = (j == (s->blksz - 1) && s->rem) ? 312 s->rem : s->len; 313 offs += b->len; 314 315 LOG4(sess, "%s: read block %zu, length %zu B", 316 path, b->idx, b->len); 317 } 318 319 s->size = offs; 320 LOG3(sess, "%s: read blocks: %zu blocks, %jd B total blocked data", 321 path, s->blksz, (intmax_t)s->size); 322 return s; 323 out: 324 free(s->blks); 325 free(s); 326 return NULL; 327 } 328 329 /* 330 * Symmetrise blk_recv_ack(), except w/o the leading identifier. 331 * Return zero on failure, non-zero on success. 332 */ 333 int 334 blk_send_ack(struct sess *sess, int fd, struct blkset *p) 335 { 336 char buf[16]; 337 size_t pos = 0, sz; 338 339 /* Put the entire send routine into a buffer. */ 340 341 sz = sizeof(int32_t) + /* block count */ 342 sizeof(int32_t) + /* block length */ 343 sizeof(int32_t) + /* checksum length */ 344 sizeof(int32_t); /* block remainder */ 345 assert(sz <= sizeof(buf)); 346 347 if (!io_read_buf(sess, fd, buf, sz)) { 348 ERRX1(sess, "io_read_buf"); 349 return 0; 350 } 351 352 if (!io_unbuffer_size(sess, buf, &pos, sz, &p->blksz)) 353 ERRX1(sess, "io_unbuffer_size"); 354 else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->len)) 355 ERRX1(sess, "io_unbuffer_size"); 356 else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->csum)) 357 ERRX1(sess, "io_unbuffer_size"); 358 else if (!io_unbuffer_size(sess, buf, &pos, sz, &p->rem)) 359 ERRX1(sess, "io_unbuffer_size"); 360 else if (p->len && p->rem >= p->len) 361 ERRX1(sess, "non-zero length is less than remainder"); 362 else if (p->csum == 0 || p->csum > 16) 363 ERRX1(sess, "inappropriate checksum length"); 364 else 365 return 1; 366 367 return 0; 368 } 369 370 /* 371 * Transmit the metadata for set and blocks. 372 * Return zero on failure, non-zero on success. 373 */ 374 int 375 blk_send(struct sess *sess, int fd, size_t idx, 376 const struct blkset *p, const char *path) 377 { 378 char *buf; 379 size_t i, pos = 0, sz; 380 int rc = 0; 381 382 /* Put the entire send routine into a buffer. */ 383 384 sz = sizeof(int32_t) + /* identifier */ 385 sizeof(int32_t) + /* block count */ 386 sizeof(int32_t) + /* block length */ 387 sizeof(int32_t) + /* checksum length */ 388 sizeof(int32_t) + /* block remainder */ 389 p->blksz * 390 (sizeof(int32_t) + /* short checksum */ 391 p->csum); /* long checksum */ 392 393 if ((buf = malloc(sz)) == NULL) { 394 ERR(sess, "malloc"); 395 return 0; 396 } 397 398 io_buffer_int(sess, buf, &pos, sz, idx); 399 io_buffer_int(sess, buf, &pos, sz, p->blksz); 400 io_buffer_int(sess, buf, &pos, sz, p->len); 401 io_buffer_int(sess, buf, &pos, sz, p->csum); 402 io_buffer_int(sess, buf, &pos, sz, p->rem); 403 404 for (i = 0; i < p->blksz; i++) { 405 io_buffer_int(sess, buf, &pos, 406 sz, p->blks[i].chksum_short); 407 io_buffer_buf(sess, buf, &pos, sz, 408 p->blks[i].chksum_long, p->csum); 409 } 410 411 assert(pos == sz); 412 413 if (!io_write_buf(sess, fd, buf, sz)) { 414 ERRX1(sess, "io_write_buf"); 415 goto out; 416 } 417 418 LOG3(sess, "%s: sent block prologue: %zu blocks of %zu B, " 419 "%zu B remainder, %zu B checksum", 420 path, p->blksz, p->len, p->rem, p->csum); 421 rc = 1; 422 out: 423 free(buf); 424 return rc; 425 } 426