xref: /netbsd-src/external/gpl3/gdb.old/dist/zlib/examples/gzlog.c (revision 6881a4007f077b54e5f51159c52b9b25f57deb0d)
14e00368fSchristos /*
24e00368fSchristos  * gzlog.c
3*6881a400Schristos  * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved
44e00368fSchristos  * For conditions of distribution and use, see copyright notice in gzlog.h
5*6881a400Schristos  * version 2.3, 25 May 2019
64e00368fSchristos  */
74e00368fSchristos 
84e00368fSchristos /*
94e00368fSchristos    gzlog provides a mechanism for frequently appending short strings to a gzip
104e00368fSchristos    file that is efficient both in execution time and compression ratio.  The
114e00368fSchristos    strategy is to write the short strings in an uncompressed form to the end of
124e00368fSchristos    the gzip file, only compressing when the amount of uncompressed data has
134e00368fSchristos    reached a given threshold.
144e00368fSchristos 
154e00368fSchristos    gzlog also provides protection against interruptions in the process due to
164e00368fSchristos    system crashes.  The status of the operation is recorded in an extra field
174e00368fSchristos    in the gzip file, and is only updated once the gzip file is brought to a
184e00368fSchristos    valid state.  The last data to be appended or compressed is saved in an
194e00368fSchristos    auxiliary file, so that if the operation is interrupted, it can be completed
204e00368fSchristos    the next time an append operation is attempted.
214e00368fSchristos 
224e00368fSchristos    gzlog maintains another auxiliary file with the last 32K of data from the
234e00368fSchristos    compressed portion, which is preloaded for the compression of the subsequent
244e00368fSchristos    data.  This minimizes the impact to the compression ratio of appending.
254e00368fSchristos  */
264e00368fSchristos 
274e00368fSchristos /*
284e00368fSchristos    Operations Concept:
294e00368fSchristos 
304e00368fSchristos    Files (log name "foo"):
314e00368fSchristos    foo.gz -- gzip file with the complete log
324e00368fSchristos    foo.add -- last message to append or last data to compress
334e00368fSchristos    foo.dict -- dictionary of the last 32K of data for next compression
344e00368fSchristos    foo.temp -- temporary dictionary file for compression after this one
354e00368fSchristos    foo.lock -- lock file for reading and writing the other files
364e00368fSchristos    foo.repairs -- log file for log file recovery operations (not compressed)
374e00368fSchristos 
384e00368fSchristos    gzip file structure:
394e00368fSchristos    - fixed-length (no file name) header with extra field (see below)
404e00368fSchristos    - compressed data ending initially with empty stored block
414e00368fSchristos    - uncompressed data filling out originally empty stored block and
424e00368fSchristos      subsequent stored blocks as needed (16K max each)
434e00368fSchristos    - gzip trailer
444e00368fSchristos    - no junk at end (no other gzip streams)
454e00368fSchristos 
464e00368fSchristos    When appending data, the information in the first three items above plus the
474e00368fSchristos    foo.add file are sufficient to recover an interrupted append operation.  The
484e00368fSchristos    extra field has the necessary information to restore the start of the last
494e00368fSchristos    stored block and determine where to append the data in the foo.add file, as
504e00368fSchristos    well as the crc and length of the gzip data before the append operation.
514e00368fSchristos 
524e00368fSchristos    The foo.add file is created before the gzip file is marked for append, and
534e00368fSchristos    deleted after the gzip file is marked as complete.  So if the append
544e00368fSchristos    operation is interrupted, the data to add will still be there.  If due to
554e00368fSchristos    some external force, the foo.add file gets deleted between when the append
564e00368fSchristos    operation was interrupted and when recovery is attempted, the gzip file will
574e00368fSchristos    still be restored, but without the appended data.
584e00368fSchristos 
594e00368fSchristos    When compressing data, the information in the first two items above plus the
604e00368fSchristos    foo.add file are sufficient to recover an interrupted compress operation.
614e00368fSchristos    The extra field has the necessary information to find the end of the
624e00368fSchristos    compressed data, and contains both the crc and length of just the compressed
634e00368fSchristos    data and of the complete set of data including the contents of the foo.add
644e00368fSchristos    file.
654e00368fSchristos 
664e00368fSchristos    Again, the foo.add file is maintained during the compress operation in case
674e00368fSchristos    of an interruption.  If in the unlikely event the foo.add file with the data
684e00368fSchristos    to be compressed is missing due to some external force, a gzip file with
694e00368fSchristos    just the previous compressed data will be reconstructed.  In this case, all
704e00368fSchristos    of the data that was to be compressed is lost (approximately one megabyte).
714e00368fSchristos    This will not occur if all that happened was an interruption of the compress
724e00368fSchristos    operation.
734e00368fSchristos 
744e00368fSchristos    The third state that is marked is the replacement of the old dictionary with
754e00368fSchristos    the new dictionary after a compress operation.  Once compression is
764e00368fSchristos    complete, the gzip file is marked as being in the replace state.  This
774e00368fSchristos    completes the gzip file, so an interrupt after being so marked does not
784e00368fSchristos    result in recompression.  Then the dictionary file is replaced, and the gzip
794e00368fSchristos    file is marked as completed.  This state prevents the possibility of
804e00368fSchristos    restarting compression with the wrong dictionary file.
814e00368fSchristos 
824e00368fSchristos    All three operations are wrapped by a lock/unlock procedure.  In order to
834e00368fSchristos    gain exclusive access to the log files, first a foo.lock file must be
844e00368fSchristos    exclusively created.  When all operations are complete, the lock is
854e00368fSchristos    released by deleting the foo.lock file.  If when attempting to create the
864e00368fSchristos    lock file, it already exists and the modify time of the lock file is more
874e00368fSchristos    than five minutes old (set by the PATIENCE define below), then the old
884e00368fSchristos    lock file is considered stale and deleted, and the exclusive creation of
894e00368fSchristos    the lock file is retried.  To assure that there are no false assessments
904e00368fSchristos    of the staleness of the lock file, the operations periodically touch the
914e00368fSchristos    lock file to update the modified date.
924e00368fSchristos 
934e00368fSchristos    Following is the definition of the extra field with all of the information
944e00368fSchristos    required to enable the above append and compress operations and their
954e00368fSchristos    recovery if interrupted.  Multi-byte values are stored little endian
964e00368fSchristos    (consistent with the gzip format).  File pointers are eight bytes long.
974e00368fSchristos    The crc's and lengths for the gzip trailer are four bytes long.  (Note that
984e00368fSchristos    the length at the end of a gzip file is used for error checking only, and
994e00368fSchristos    for large files is actually the length modulo 2^32.)  The stored block
1004e00368fSchristos    length is two bytes long.  The gzip extra field two-byte identification is
1014e00368fSchristos    "ap" for append.  It is assumed that writing the extra field to the file is
1024e00368fSchristos    an "atomic" operation.  That is, either all of the extra field is written
1034e00368fSchristos    to the file, or none of it is, if the operation is interrupted right at the
1044e00368fSchristos    point of updating the extra field.  This is a reasonable assumption, since
1054e00368fSchristos    the extra field is within the first 52 bytes of the file, which is smaller
1064e00368fSchristos    than any expected block size for a mass storage device (usually 512 bytes or
1074e00368fSchristos    larger).
1084e00368fSchristos 
1094e00368fSchristos    Extra field (35 bytes):
1104e00368fSchristos    - Pointer to first stored block length -- this points to the two-byte length
1114e00368fSchristos      of the first stored block, which is followed by the two-byte, one's
1124e00368fSchristos      complement of that length.  The stored block length is preceded by the
1134e00368fSchristos      three-bit header of the stored block, which is the actual start of the
1144e00368fSchristos      stored block in the deflate format.  See the bit offset field below.
1154e00368fSchristos    - Pointer to the last stored block length.  This is the same as above, but
1164e00368fSchristos      for the last stored block of the uncompressed data in the gzip file.
1174e00368fSchristos      Initially this is the same as the first stored block length pointer.
1184e00368fSchristos      When the stored block gets to 16K (see the MAX_STORE define), then a new
1194e00368fSchristos      stored block as added, at which point the last stored block length pointer
1204e00368fSchristos      is different from the first stored block length pointer.  When they are
1214e00368fSchristos      different, the first bit of the last stored block header is eight bits, or
1224e00368fSchristos      one byte back from the block length.
1234e00368fSchristos    - Compressed data crc and length.  This is the crc and length of the data
1244e00368fSchristos      that is in the compressed portion of the deflate stream.  These are used
1254e00368fSchristos      only in the event that the foo.add file containing the data to compress is
1264e00368fSchristos      lost after a compress operation is interrupted.
1274e00368fSchristos    - Total data crc and length.  This is the crc and length of all of the data
1284e00368fSchristos      stored in the gzip file, compressed and uncompressed.  It is used to
1294e00368fSchristos      reconstruct the gzip trailer when compressing, as well as when recovering
1304e00368fSchristos      interrupted operations.
1314e00368fSchristos    - Final stored block length.  This is used to quickly find where to append,
1324e00368fSchristos      and allows the restoration of the original final stored block state when
1334e00368fSchristos      an append operation is interrupted.
1344e00368fSchristos    - First stored block start as the number of bits back from the final stored
1354e00368fSchristos      block first length byte.  This value is in the range of 3..10, and is
1364e00368fSchristos      stored as the low three bits of the final byte of the extra field after
1374e00368fSchristos      subtracting three (0..7).  This allows the last-block bit of the stored
1384e00368fSchristos      block header to be updated when a new stored block is added, for the case
1394e00368fSchristos      when the first stored block and the last stored block are the same.  (When
1404e00368fSchristos      they are different, the numbers of bits back is known to be eight.)  This
1414e00368fSchristos      also allows for new compressed data to be appended to the old compressed
1424e00368fSchristos      data in the compress operation, overwriting the previous first stored
1434e00368fSchristos      block, or for the compressed data to be terminated and a valid gzip file
1444e00368fSchristos      reconstructed on the off chance that a compression operation was
1454e00368fSchristos      interrupted and the data to compress in the foo.add file was deleted.
1464e00368fSchristos    - The operation in process.  This is the next two bits in the last byte (the
1474e00368fSchristos      bits under the mask 0x18).  The are interpreted as 0: nothing in process,
1484e00368fSchristos      1: append in process, 2: compress in process, 3: replace in process.
1494e00368fSchristos    - The top three bits of the last byte in the extra field are reserved and
1504e00368fSchristos      are currently set to zero.
1514e00368fSchristos 
1524e00368fSchristos    Main procedure:
1534e00368fSchristos    - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of
1544e00368fSchristos      the system open() call.  If the modify time of an existing lock file is
1554e00368fSchristos      more than PATIENCE seconds old, then the lock file is deleted and the
1564e00368fSchristos      exclusive create is retried.
1574e00368fSchristos    - Load the extra field from the foo.gz file, and see if an operation was in
1584e00368fSchristos      progress but not completed.  If so, apply the recovery procedure below.
1594e00368fSchristos    - Perform the append procedure with the provided data.
1604e00368fSchristos    - If the uncompressed data in the foo.gz file is 1MB or more, apply the
1614e00368fSchristos      compress procedure.
1624e00368fSchristos    - Delete the foo.lock file.
1634e00368fSchristos 
1644e00368fSchristos    Append procedure:
1654e00368fSchristos    - Put what to append in the foo.add file so that the operation can be
1664e00368fSchristos      restarted if this procedure is interrupted.
1674e00368fSchristos    - Mark the foo.gz extra field with the append operation in progress.
1684e00368fSchristos    + Restore the original last-block bit and stored block length of the last
1694e00368fSchristos      stored block from the information in the extra field, in case a previous
1704e00368fSchristos      append operation was interrupted.
1714e00368fSchristos    - Append the provided data to the last stored block, creating new stored
1724e00368fSchristos      blocks as needed and updating the stored blocks last-block bits and
1734e00368fSchristos      lengths.
1744e00368fSchristos    - Update the crc and length with the new data, and write the gzip trailer.
1754e00368fSchristos    - Write over the extra field (with a single write operation) with the new
1764e00368fSchristos      pointers, lengths, and crc's, and mark the gzip file as not in process.
1774e00368fSchristos      Though there is still a foo.add file, it will be ignored since nothing
1784e00368fSchristos      is in process.  If a foo.add file is leftover from a previously
1794e00368fSchristos      completed operation, it is truncated when writing new data to it.
1804e00368fSchristos    - Delete the foo.add file.
1814e00368fSchristos 
1824e00368fSchristos    Compress and replace procedures:
1834e00368fSchristos    - Read all of the uncompressed data in the stored blocks in foo.gz and write
1844e00368fSchristos      it to foo.add.  Also write foo.temp with the last 32K of that data to
1854e00368fSchristos      provide a dictionary for the next invocation of this procedure.
1864e00368fSchristos    - Rewrite the extra field marking foo.gz with a compression in process.
1874e00368fSchristos    * If there is no data provided to compress (due to a missing foo.add file
1884e00368fSchristos      when recovering), reconstruct and truncate the foo.gz file to contain
1894e00368fSchristos      only the previous compressed data and proceed to the step after the next
1904e00368fSchristos      one.  Otherwise ...
1914e00368fSchristos    - Compress the data with the dictionary in foo.dict, and write to the
1924e00368fSchristos      foo.gz file starting at the bit immediately following the last previously
1934e00368fSchristos      compressed block.  If there is no foo.dict, proceed anyway with the
1944e00368fSchristos      compression at slightly reduced efficiency.  (For the foo.dict file to be
1954e00368fSchristos      missing requires some external failure beyond simply the interruption of
1964e00368fSchristos      a compress operation.)  During this process, the foo.lock file is
1974e00368fSchristos      periodically touched to assure that that file is not considered stale by
1984e00368fSchristos      another process before we're done.  The deflation is terminated with a
1994e00368fSchristos      non-last empty static block (10 bits long), that is then located and
2004e00368fSchristos      written over by a last-bit-set empty stored block.
2014e00368fSchristos    - Append the crc and length of the data in the gzip file (previously
2024e00368fSchristos      calculated during the append operations).
2034e00368fSchristos    - Write over the extra field with the updated stored block offsets, bits
2044e00368fSchristos      back, crc's, and lengths, and mark foo.gz as in process for a replacement
2054e00368fSchristos      of the dictionary.
2064e00368fSchristos    @ Delete the foo.add file.
2074e00368fSchristos    - Replace foo.dict with foo.temp.
2084e00368fSchristos    - Write over the extra field, marking foo.gz as complete.
2094e00368fSchristos 
2104e00368fSchristos    Recovery procedure:
2114e00368fSchristos    - If not a replace recovery, read in the foo.add file, and provide that data
2124e00368fSchristos      to the appropriate recovery below.  If there is no foo.add file, provide
2134e00368fSchristos      a zero data length to the recovery.  In that case, the append recovery
2144e00368fSchristos      restores the foo.gz to the previous compressed + uncompressed data state.
2154e00368fSchristos      For the the compress recovery, a missing foo.add file results in foo.gz
2164e00368fSchristos      being restored to the previous compressed-only data state.
2174e00368fSchristos    - Append recovery:
2184e00368fSchristos      - Pick up append at + step above
2194e00368fSchristos    - Compress recovery:
2204e00368fSchristos      - Pick up compress at * step above
2214e00368fSchristos    - Replace recovery:
2224e00368fSchristos      - Pick up compress at @ step above
2234e00368fSchristos    - Log the repair with a date stamp in foo.repairs
2244e00368fSchristos  */
2254e00368fSchristos 
2264e00368fSchristos #include <sys/types.h>
2274e00368fSchristos #include <stdio.h>      /* rename, fopen, fprintf, fclose */
2284e00368fSchristos #include <stdlib.h>     /* malloc, free */
2294e00368fSchristos #include <string.h>     /* strlen, strrchr, strcpy, strncpy, strcmp */
2304e00368fSchristos #include <fcntl.h>      /* open */
2314e00368fSchristos #include <unistd.h>     /* lseek, read, write, close, unlink, sleep, */
2324e00368fSchristos                         /* ftruncate, fsync */
2334e00368fSchristos #include <errno.h>      /* errno */
2344e00368fSchristos #include <time.h>       /* time, ctime */
2354e00368fSchristos #include <sys/stat.h>   /* stat */
2364e00368fSchristos #include <sys/time.h>   /* utimes */
2374e00368fSchristos #include "zlib.h"       /* crc32 */
2384e00368fSchristos 
2394e00368fSchristos #include "gzlog.h"      /* header for external access */
2404e00368fSchristos 
2414e00368fSchristos #define local static
2424e00368fSchristos typedef unsigned int uint;
2434e00368fSchristos typedef unsigned long ulong;
2444e00368fSchristos 
2454e00368fSchristos /* Macro for debugging to deterministically force recovery operations */
246699b0f92Schristos #ifdef GZLOG_DEBUG
2474e00368fSchristos     #include <setjmp.h>         /* longjmp */
2484e00368fSchristos     jmp_buf gzlog_jump;         /* where to go back to */
2494e00368fSchristos     int gzlog_bail = 0;         /* which point to bail at (1..8) */
2504e00368fSchristos     int gzlog_count = -1;       /* number of times through to wait */
2514e00368fSchristos #   define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \
2524e00368fSchristos                             longjmp(gzlog_jump, gzlog_bail); } while (0)
2534e00368fSchristos #else
2544e00368fSchristos #   define BAIL(n)
2554e00368fSchristos #endif
2564e00368fSchristos 
2574e00368fSchristos /* how old the lock file can be in seconds before considering it stale */
2584e00368fSchristos #define PATIENCE 300
2594e00368fSchristos 
2604e00368fSchristos /* maximum stored block size in Kbytes -- must be in 1..63 */
2614e00368fSchristos #define MAX_STORE 16
2624e00368fSchristos 
2634e00368fSchristos /* number of stored Kbytes to trigger compression (must be >= 32 to allow
2644e00368fSchristos    dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to
2654e00368fSchristos    discard the stored block headers contribution of five bytes each) */
2664e00368fSchristos #define TRIGGER 1024
2674e00368fSchristos 
2684e00368fSchristos /* size of a deflate dictionary (this cannot be changed) */
2694e00368fSchristos #define DICT 32768U
2704e00368fSchristos 
2714e00368fSchristos /* values for the operation (2 bits) */
2724e00368fSchristos #define NO_OP 0
2734e00368fSchristos #define APPEND_OP 1
2744e00368fSchristos #define COMPRESS_OP 2
2754e00368fSchristos #define REPLACE_OP 3
2764e00368fSchristos 
2774e00368fSchristos /* macros to extract little-endian integers from an unsigned byte buffer */
2784e00368fSchristos #define PULL2(p) ((p)[0]+((uint)((p)[1])<<8))
2794e00368fSchristos #define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16))
2804e00368fSchristos #define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32))
2814e00368fSchristos 
2824e00368fSchristos /* macros to store integers into a byte buffer in little-endian order */
2834e00368fSchristos #define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0)
2844e00368fSchristos #define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0)
2854e00368fSchristos #define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0)
2864e00368fSchristos 
2874e00368fSchristos /* internal structure for log information */
2884e00368fSchristos #define LOGID "\106\035\172"    /* should be three non-zero characters */
2894e00368fSchristos struct log {
2904e00368fSchristos     char id[4];     /* contains LOGID to detect inadvertent overwrites */
2914e00368fSchristos     int fd;         /* file descriptor for .gz file, opened read/write */
2924e00368fSchristos     char *path;     /* allocated path, e.g. "/var/log/foo" or "foo" */
2934e00368fSchristos     char *end;      /* end of path, for appending suffices such as ".gz" */
2944e00368fSchristos     off_t first;    /* offset of first stored block first length byte */
2954e00368fSchristos     int back;       /* location of first block id in bits back from first */
2964e00368fSchristos     uint stored;    /* bytes currently in last stored block */
2974e00368fSchristos     off_t last;     /* offset of last stored block first length byte */
2984e00368fSchristos     ulong ccrc;     /* crc of compressed data */
2994e00368fSchristos     ulong clen;     /* length (modulo 2^32) of compressed data */
3004e00368fSchristos     ulong tcrc;     /* crc of total data */
3014e00368fSchristos     ulong tlen;     /* length (modulo 2^32) of total data */
3024e00368fSchristos     time_t lock;    /* last modify time of our lock file */
3034e00368fSchristos };
3044e00368fSchristos 
3054e00368fSchristos /* gzip header for gzlog */
3064e00368fSchristos local unsigned char log_gzhead[] = {
3074e00368fSchristos     0x1f, 0x8b,                 /* magic gzip id */
3084e00368fSchristos     8,                          /* compression method is deflate */
3094e00368fSchristos     4,                          /* there is an extra field (no file name) */
3104e00368fSchristos     0, 0, 0, 0,                 /* no modification time provided */
3114e00368fSchristos     0, 0xff,                    /* no extra flags, no OS specified */
3124e00368fSchristos     39, 0, 'a', 'p', 35, 0      /* extra field with "ap" subfield */
3134e00368fSchristos                                 /* 35 is EXTRA, 39 is EXTRA + 4 */
3144e00368fSchristos };
3154e00368fSchristos 
3164e00368fSchristos #define HEAD sizeof(log_gzhead)     /* should be 16 */
3174e00368fSchristos 
3184e00368fSchristos /* initial gzip extra field content (52 == HEAD + EXTRA + 1) */
3194e00368fSchristos local unsigned char log_gzext[] = {
3204e00368fSchristos     52, 0, 0, 0, 0, 0, 0, 0,    /* offset of first stored block length */
3214e00368fSchristos     52, 0, 0, 0, 0, 0, 0, 0,    /* offset of last stored block length */
3224e00368fSchristos     0, 0, 0, 0, 0, 0, 0, 0,     /* compressed data crc and length */
3234e00368fSchristos     0, 0, 0, 0, 0, 0, 0, 0,     /* total data crc and length */
3244e00368fSchristos     0, 0,                       /* final stored block data length */
3254e00368fSchristos     5                           /* op is NO_OP, last bit 8 bits back */
3264e00368fSchristos };
3274e00368fSchristos 
3284e00368fSchristos #define EXTRA sizeof(log_gzext)     /* should be 35 */
3294e00368fSchristos 
3304e00368fSchristos /* initial gzip data and trailer */
3314e00368fSchristos local unsigned char log_gzbody[] = {
3324e00368fSchristos     1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */
3334e00368fSchristos     0, 0, 0, 0,                 /* crc */
3344e00368fSchristos     0, 0, 0, 0                  /* uncompressed length */
3354e00368fSchristos };
3364e00368fSchristos 
3374e00368fSchristos #define BODY sizeof(log_gzbody)
3384e00368fSchristos 
3394e00368fSchristos /* Exclusively create foo.lock in order to negotiate exclusive access to the
3404e00368fSchristos    foo.* files.  If the modify time of an existing lock file is greater than
3414e00368fSchristos    PATIENCE seconds in the past, then consider the lock file to have been
3424e00368fSchristos    abandoned, delete it, and try the exclusive create again.  Save the lock
3434e00368fSchristos    file modify time for verification of ownership.  Return 0 on success, or -1
3444e00368fSchristos    on failure, usually due to an access restriction or invalid path.  Note that
3454e00368fSchristos    if stat() or unlink() fails, it may be due to another process noticing the
3464e00368fSchristos    abandoned lock file a smidge sooner and deleting it, so those are not
3474e00368fSchristos    flagged as an error. */
3484e00368fSchristos local int log_lock(struct log *log)
3494e00368fSchristos {
3504e00368fSchristos     int fd;
3514e00368fSchristos     struct stat st;
3524e00368fSchristos 
3534e00368fSchristos     strcpy(log->end, ".lock");
3544e00368fSchristos     while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) {
3554e00368fSchristos         if (errno != EEXIST)
3564e00368fSchristos             return -1;
3574e00368fSchristos         if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) {
3584e00368fSchristos             unlink(log->path);
3594e00368fSchristos             continue;
3604e00368fSchristos         }
3614e00368fSchristos         sleep(2);       /* relinquish the CPU for two seconds while waiting */
3624e00368fSchristos     }
3634e00368fSchristos     close(fd);
3644e00368fSchristos     if (stat(log->path, &st) == 0)
3654e00368fSchristos         log->lock = st.st_mtime;
3664e00368fSchristos     return 0;
3674e00368fSchristos }
3684e00368fSchristos 
3694e00368fSchristos /* Update the modify time of the lock file to now, in order to prevent another
3704e00368fSchristos    task from thinking that the lock is stale.  Save the lock file modify time
3714e00368fSchristos    for verification of ownership. */
3724e00368fSchristos local void log_touch(struct log *log)
3734e00368fSchristos {
3744e00368fSchristos     struct stat st;
3754e00368fSchristos 
3764e00368fSchristos     strcpy(log->end, ".lock");
3774e00368fSchristos     utimes(log->path, NULL);
3784e00368fSchristos     if (stat(log->path, &st) == 0)
3794e00368fSchristos         log->lock = st.st_mtime;
3804e00368fSchristos }
3814e00368fSchristos 
3824e00368fSchristos /* Check the log file modify time against what is expected.  Return true if
3834e00368fSchristos    this is not our lock.  If it is our lock, touch it to keep it. */
3844e00368fSchristos local int log_check(struct log *log)
3854e00368fSchristos {
3864e00368fSchristos     struct stat st;
3874e00368fSchristos 
3884e00368fSchristos     strcpy(log->end, ".lock");
3894e00368fSchristos     if (stat(log->path, &st) || st.st_mtime != log->lock)
3904e00368fSchristos         return 1;
3914e00368fSchristos     log_touch(log);
3924e00368fSchristos     return 0;
3934e00368fSchristos }
3944e00368fSchristos 
3954e00368fSchristos /* Unlock a previously acquired lock, but only if it's ours. */
3964e00368fSchristos local void log_unlock(struct log *log)
3974e00368fSchristos {
3984e00368fSchristos     if (log_check(log))
3994e00368fSchristos         return;
4004e00368fSchristos     strcpy(log->end, ".lock");
4014e00368fSchristos     unlink(log->path);
4024e00368fSchristos     log->lock = 0;
4034e00368fSchristos }
4044e00368fSchristos 
4054e00368fSchristos /* Check the gzip header and read in the extra field, filling in the values in
4064e00368fSchristos    the log structure.  Return op on success or -1 if the gzip header was not as
4074e00368fSchristos    expected.  op is the current operation in progress last written to the extra
4084e00368fSchristos    field.  This assumes that the gzip file has already been opened, with the
4094e00368fSchristos    file descriptor log->fd. */
4104e00368fSchristos local int log_head(struct log *log)
4114e00368fSchristos {
4124e00368fSchristos     int op;
4134e00368fSchristos     unsigned char buf[HEAD + EXTRA];
4144e00368fSchristos 
4154e00368fSchristos     if (lseek(log->fd, 0, SEEK_SET) < 0 ||
4164e00368fSchristos         read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA ||
4174e00368fSchristos         memcmp(buf, log_gzhead, HEAD)) {
4184e00368fSchristos         return -1;
4194e00368fSchristos     }
4204e00368fSchristos     log->first = PULL8(buf + HEAD);
4214e00368fSchristos     log->last = PULL8(buf + HEAD + 8);
4224e00368fSchristos     log->ccrc = PULL4(buf + HEAD + 16);
4234e00368fSchristos     log->clen = PULL4(buf + HEAD + 20);
4244e00368fSchristos     log->tcrc = PULL4(buf + HEAD + 24);
4254e00368fSchristos     log->tlen = PULL4(buf + HEAD + 28);
4264e00368fSchristos     log->stored = PULL2(buf + HEAD + 32);
4274e00368fSchristos     log->back = 3 + (buf[HEAD + 34] & 7);
4284e00368fSchristos     op = (buf[HEAD + 34] >> 3) & 3;
4294e00368fSchristos     return op;
4304e00368fSchristos }
4314e00368fSchristos 
4324e00368fSchristos /* Write over the extra field contents, marking the operation as op.  Use fsync
4334e00368fSchristos    to assure that the device is written to, and in the requested order.  This
4344e00368fSchristos    operation, and only this operation, is assumed to be atomic in order to
4354e00368fSchristos    assure that the log is recoverable in the event of an interruption at any
4364e00368fSchristos    point in the process.  Return -1 if the write to foo.gz failed. */
4374e00368fSchristos local int log_mark(struct log *log, int op)
4384e00368fSchristos {
4394e00368fSchristos     int ret;
4404e00368fSchristos     unsigned char ext[EXTRA];
4414e00368fSchristos 
4424e00368fSchristos     PUT8(ext, log->first);
4434e00368fSchristos     PUT8(ext + 8, log->last);
4444e00368fSchristos     PUT4(ext + 16, log->ccrc);
4454e00368fSchristos     PUT4(ext + 20, log->clen);
4464e00368fSchristos     PUT4(ext + 24, log->tcrc);
4474e00368fSchristos     PUT4(ext + 28, log->tlen);
4484e00368fSchristos     PUT2(ext + 32, log->stored);
4494e00368fSchristos     ext[34] = log->back - 3 + (op << 3);
4504e00368fSchristos     fsync(log->fd);
4514e00368fSchristos     ret = lseek(log->fd, HEAD, SEEK_SET) < 0 ||
4524e00368fSchristos           write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0;
4534e00368fSchristos     fsync(log->fd);
4544e00368fSchristos     return ret;
4554e00368fSchristos }
4564e00368fSchristos 
4574e00368fSchristos /* Rewrite the last block header bits and subsequent zero bits to get to a byte
4584e00368fSchristos    boundary, setting the last block bit if last is true, and then write the
4594e00368fSchristos    remainder of the stored block header (length and one's complement).  Leave
4604e00368fSchristos    the file pointer after the end of the last stored block data.  Return -1 if
4614e00368fSchristos    there is a read or write failure on the foo.gz file */
4624e00368fSchristos local int log_last(struct log *log, int last)
4634e00368fSchristos {
4644e00368fSchristos     int back, len, mask;
4654e00368fSchristos     unsigned char buf[6];
4664e00368fSchristos 
4674e00368fSchristos     /* determine the locations of the bytes and bits to modify */
4684e00368fSchristos     back = log->last == log->first ? log->back : 8;
4694e00368fSchristos     len = back > 8 ? 2 : 1;                 /* bytes back from log->last */
4704e00368fSchristos     mask = 0x80 >> ((back - 1) & 7);        /* mask for block last-bit */
4714e00368fSchristos 
4724e00368fSchristos     /* get the byte to modify (one or two back) into buf[0] -- don't need to
4734e00368fSchristos        read the byte if the last-bit is eight bits back, since in that case
4744e00368fSchristos        the entire byte will be modified */
4754e00368fSchristos     buf[0] = 0;
4764e00368fSchristos     if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
4774e00368fSchristos                       read(log->fd, buf, 1) != 1))
4784e00368fSchristos         return -1;
4794e00368fSchristos 
4804e00368fSchristos     /* change the last-bit of the last stored block as requested -- note
4814e00368fSchristos        that all bits above the last-bit are set to zero, per the type bits
4824e00368fSchristos        of a stored block being 00 and per the convention that the bits to
4834e00368fSchristos        bring the stream to a byte boundary are also zeros */
4844e00368fSchristos     buf[1] = 0;
4854e00368fSchristos     buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0);
4864e00368fSchristos 
4874e00368fSchristos     /* write the modified stored block header and lengths, move the file
4884e00368fSchristos        pointer to after the last stored block data */
4894e00368fSchristos     PUT2(buf + 2, log->stored);
4904e00368fSchristos     PUT2(buf + 4, log->stored ^ 0xffff);
4914e00368fSchristos     return lseek(log->fd, log->last - len, SEEK_SET) < 0 ||
4924e00368fSchristos            write(log->fd, buf + 2 - len, len + 4) != len + 4 ||
4934e00368fSchristos            lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0;
4944e00368fSchristos }
4954e00368fSchristos 
4964e00368fSchristos /* Append len bytes from data to the locked and open log file.  len may be zero
4974e00368fSchristos    if recovering and no .add file was found.  In that case, the previous state
4984e00368fSchristos    of the foo.gz file is restored.  The data is appended uncompressed in
4994e00368fSchristos    deflate stored blocks.  Return -1 if there was an error reading or writing
5004e00368fSchristos    the foo.gz file. */
5014e00368fSchristos local int log_append(struct log *log, unsigned char *data, size_t len)
5024e00368fSchristos {
5034e00368fSchristos     uint put;
5044e00368fSchristos     off_t end;
5054e00368fSchristos     unsigned char buf[8];
5064e00368fSchristos 
5074e00368fSchristos     /* set the last block last-bit and length, in case recovering an
5084e00368fSchristos        interrupted append, then position the file pointer to append to the
5094e00368fSchristos        block */
5104e00368fSchristos     if (log_last(log, 1))
5114e00368fSchristos         return -1;
5124e00368fSchristos 
5134e00368fSchristos     /* append, adding stored blocks and updating the offset of the last stored
5144e00368fSchristos        block as needed, and update the total crc and length */
5154e00368fSchristos     while (len) {
5164e00368fSchristos         /* append as much as we can to the last block */
5174e00368fSchristos         put = (MAX_STORE << 10) - log->stored;
5184e00368fSchristos         if (put > len)
5194e00368fSchristos             put = (uint)len;
5204e00368fSchristos         if (put) {
5214e00368fSchristos             if (write(log->fd, data, put) != put)
5224e00368fSchristos                 return -1;
5234e00368fSchristos             BAIL(1);
5244e00368fSchristos             log->tcrc = crc32(log->tcrc, data, put);
5254e00368fSchristos             log->tlen += put;
5264e00368fSchristos             log->stored += put;
5274e00368fSchristos             data += put;
5284e00368fSchristos             len -= put;
5294e00368fSchristos         }
5304e00368fSchristos 
5314e00368fSchristos         /* if we need to, add a new empty stored block */
5324e00368fSchristos         if (len) {
5334e00368fSchristos             /* mark current block as not last */
5344e00368fSchristos             if (log_last(log, 0))
5354e00368fSchristos                 return -1;
5364e00368fSchristos 
5374e00368fSchristos             /* point to new, empty stored block */
5384e00368fSchristos             log->last += 4 + log->stored + 1;
5394e00368fSchristos             log->stored = 0;
5404e00368fSchristos         }
5414e00368fSchristos 
5424e00368fSchristos         /* mark last block as last, update its length */
5434e00368fSchristos         if (log_last(log, 1))
5444e00368fSchristos             return -1;
5454e00368fSchristos         BAIL(2);
5464e00368fSchristos     }
5474e00368fSchristos 
5484e00368fSchristos     /* write the new crc and length trailer, and truncate just in case (could
5494e00368fSchristos        be recovering from partial append with a missing foo.add file) */
5504e00368fSchristos     PUT4(buf, log->tcrc);
5514e00368fSchristos     PUT4(buf + 4, log->tlen);
5524e00368fSchristos     if (write(log->fd, buf, 8) != 8 ||
5534e00368fSchristos         (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
5544e00368fSchristos         return -1;
5554e00368fSchristos 
5564e00368fSchristos     /* write the extra field, marking the log file as done, delete .add file */
5574e00368fSchristos     if (log_mark(log, NO_OP))
5584e00368fSchristos         return -1;
5594e00368fSchristos     strcpy(log->end, ".add");
5604e00368fSchristos     unlink(log->path);          /* ignore error, since may not exist */
5614e00368fSchristos     return 0;
5624e00368fSchristos }
5634e00368fSchristos 
5644e00368fSchristos /* Replace the foo.dict file with the foo.temp file.  Also delete the foo.add
5654e00368fSchristos    file, since the compress operation may have been interrupted before that was
5664e00368fSchristos    done.  Returns 1 if memory could not be allocated, or -1 if reading or
5674e00368fSchristos    writing foo.gz fails, or if the rename fails for some reason other than
5684e00368fSchristos    foo.temp not existing.  foo.temp not existing is a permitted error, since
5694e00368fSchristos    the replace operation may have been interrupted after the rename is done,
5704e00368fSchristos    but before foo.gz is marked as complete. */
5714e00368fSchristos local int log_replace(struct log *log)
5724e00368fSchristos {
5734e00368fSchristos     int ret;
5744e00368fSchristos     char *dest;
5754e00368fSchristos 
5764e00368fSchristos     /* delete foo.add file */
5774e00368fSchristos     strcpy(log->end, ".add");
5784e00368fSchristos     unlink(log->path);         /* ignore error, since may not exist */
5794e00368fSchristos     BAIL(3);
5804e00368fSchristos 
5814e00368fSchristos     /* rename foo.name to foo.dict, replacing foo.dict if it exists */
5824e00368fSchristos     strcpy(log->end, ".dict");
5834e00368fSchristos     dest = malloc(strlen(log->path) + 1);
5844e00368fSchristos     if (dest == NULL)
5854e00368fSchristos         return -2;
5864e00368fSchristos     strcpy(dest, log->path);
5874e00368fSchristos     strcpy(log->end, ".temp");
5884e00368fSchristos     ret = rename(log->path, dest);
5894e00368fSchristos     free(dest);
5904e00368fSchristos     if (ret && errno != ENOENT)
5914e00368fSchristos         return -1;
5924e00368fSchristos     BAIL(4);
5934e00368fSchristos 
5944e00368fSchristos     /* mark the foo.gz file as done */
5954e00368fSchristos     return log_mark(log, NO_OP);
5964e00368fSchristos }
5974e00368fSchristos 
5984e00368fSchristos /* Compress the len bytes at data and append the compressed data to the
5994e00368fSchristos    foo.gz deflate data immediately after the previous compressed data.  This
6004e00368fSchristos    overwrites the previous uncompressed data, which was stored in foo.add
6014e00368fSchristos    and is the data provided in data[0..len-1].  If this operation is
6024e00368fSchristos    interrupted, it picks up at the start of this routine, with the foo.add
6034e00368fSchristos    file read in again.  If there is no data to compress (len == 0), then we
6044e00368fSchristos    simply terminate the foo.gz file after the previously compressed data,
6054e00368fSchristos    appending a final empty stored block and the gzip trailer.  Return -1 if
6064e00368fSchristos    reading or writing the log.gz file failed, or -2 if there was a memory
6074e00368fSchristos    allocation failure. */
6084e00368fSchristos local int log_compress(struct log *log, unsigned char *data, size_t len)
6094e00368fSchristos {
6104e00368fSchristos     int fd;
6114e00368fSchristos     uint got, max;
6124e00368fSchristos     ssize_t dict;
6134e00368fSchristos     off_t end;
6144e00368fSchristos     z_stream strm;
6154e00368fSchristos     unsigned char buf[DICT];
6164e00368fSchristos 
6174e00368fSchristos     /* compress and append compressed data */
6184e00368fSchristos     if (len) {
6194e00368fSchristos         /* set up for deflate, allocating memory */
6204e00368fSchristos         strm.zalloc = Z_NULL;
6214e00368fSchristos         strm.zfree = Z_NULL;
6224e00368fSchristos         strm.opaque = Z_NULL;
6234e00368fSchristos         if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8,
6244e00368fSchristos                          Z_DEFAULT_STRATEGY) != Z_OK)
6254e00368fSchristos             return -2;
6264e00368fSchristos 
6274e00368fSchristos         /* read in dictionary (last 32K of data that was compressed) */
6284e00368fSchristos         strcpy(log->end, ".dict");
6294e00368fSchristos         fd = open(log->path, O_RDONLY, 0);
6304e00368fSchristos         if (fd >= 0) {
6314e00368fSchristos             dict = read(fd, buf, DICT);
6324e00368fSchristos             close(fd);
6334e00368fSchristos             if (dict < 0) {
6344e00368fSchristos                 deflateEnd(&strm);
6354e00368fSchristos                 return -1;
6364e00368fSchristos             }
6374e00368fSchristos             if (dict)
6384e00368fSchristos                 deflateSetDictionary(&strm, buf, (uint)dict);
6394e00368fSchristos         }
6404e00368fSchristos         log_touch(log);
6414e00368fSchristos 
6424e00368fSchristos         /* prime deflate with last bits of previous block, position write
6434e00368fSchristos            pointer to write those bits and overwrite what follows */
6444e00368fSchristos         if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1),
6454e00368fSchristos                 SEEK_SET) < 0 ||
6464e00368fSchristos             read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) {
6474e00368fSchristos             deflateEnd(&strm);
6484e00368fSchristos             return -1;
6494e00368fSchristos         }
6504e00368fSchristos         deflatePrime(&strm, (8 - log->back) & 7, *buf);
6514e00368fSchristos 
6524e00368fSchristos         /* compress, finishing with a partial non-last empty static block */
6534e00368fSchristos         strm.next_in = data;
6544e00368fSchristos         max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */
6554e00368fSchristos         do {
6564e00368fSchristos             strm.avail_in = len > max ? max : (uint)len;
6574e00368fSchristos             len -= strm.avail_in;
6584e00368fSchristos             do {
6594e00368fSchristos                 strm.avail_out = DICT;
6604e00368fSchristos                 strm.next_out = buf;
6614e00368fSchristos                 deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH);
6624e00368fSchristos                 got = DICT - strm.avail_out;
6634e00368fSchristos                 if (got && write(log->fd, buf, got) != got) {
6644e00368fSchristos                     deflateEnd(&strm);
6654e00368fSchristos                     return -1;
6664e00368fSchristos                 }
6674e00368fSchristos                 log_touch(log);
6684e00368fSchristos             } while (strm.avail_out == 0);
6694e00368fSchristos         } while (len);
6704e00368fSchristos         deflateEnd(&strm);
6714e00368fSchristos         BAIL(5);
6724e00368fSchristos 
6734e00368fSchristos         /* find start of empty static block -- scanning backwards the first one
6744e00368fSchristos            bit is the second bit of the block, if the last byte is zero, then
6754e00368fSchristos            we know the byte before that has a one in the top bit, since an
6764e00368fSchristos            empty static block is ten bits long */
6774e00368fSchristos         if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 ||
6784e00368fSchristos             read(log->fd, buf, 1) != 1)
6794e00368fSchristos             return -1;
6804e00368fSchristos         log->first++;
6814e00368fSchristos         if (*buf) {
6824e00368fSchristos             log->back = 1;
6834e00368fSchristos             while ((*buf & ((uint)1 << (8 - log->back++))) == 0)
6844e00368fSchristos                 ;       /* guaranteed to terminate, since *buf != 0 */
6854e00368fSchristos         }
6864e00368fSchristos         else
6874e00368fSchristos             log->back = 10;
6884e00368fSchristos 
6894e00368fSchristos         /* update compressed crc and length */
6904e00368fSchristos         log->ccrc = log->tcrc;
6914e00368fSchristos         log->clen = log->tlen;
6924e00368fSchristos     }
6934e00368fSchristos     else {
6944e00368fSchristos         /* no data to compress -- fix up existing gzip stream */
6954e00368fSchristos         log->tcrc = log->ccrc;
6964e00368fSchristos         log->tlen = log->clen;
6974e00368fSchristos     }
6984e00368fSchristos 
6994e00368fSchristos     /* complete and truncate gzip stream */
7004e00368fSchristos     log->last = log->first;
7014e00368fSchristos     log->stored = 0;
7024e00368fSchristos     PUT4(buf, log->tcrc);
7034e00368fSchristos     PUT4(buf + 4, log->tlen);
7044e00368fSchristos     if (log_last(log, 1) || write(log->fd, buf, 8) != 8 ||
7054e00368fSchristos         (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end))
7064e00368fSchristos         return -1;
7074e00368fSchristos     BAIL(6);
7084e00368fSchristos 
7094e00368fSchristos     /* mark as being in the replace operation */
7104e00368fSchristos     if (log_mark(log, REPLACE_OP))
7114e00368fSchristos         return -1;
7124e00368fSchristos 
7134e00368fSchristos     /* execute the replace operation and mark the file as done */
7144e00368fSchristos     return log_replace(log);
7154e00368fSchristos }
7164e00368fSchristos 
7174e00368fSchristos /* log a repair record to the .repairs file */
7184e00368fSchristos local void log_log(struct log *log, int op, char *record)
7194e00368fSchristos {
7204e00368fSchristos     time_t now;
7214e00368fSchristos     FILE *rec;
7224e00368fSchristos 
7234e00368fSchristos     now = time(NULL);
7244e00368fSchristos     strcpy(log->end, ".repairs");
7254e00368fSchristos     rec = fopen(log->path, "a");
7264e00368fSchristos     if (rec == NULL)
7274e00368fSchristos         return;
7284e00368fSchristos     fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ?
7294e00368fSchristos             "append" : (op == COMPRESS_OP ? "compress" : "replace"), record);
7304e00368fSchristos     fclose(rec);
7314e00368fSchristos     return;
7324e00368fSchristos }
7334e00368fSchristos 
7344e00368fSchristos /* Recover the interrupted operation op.  First read foo.add for recovering an
7354e00368fSchristos    append or compress operation.  Return -1 if there was an error reading or
7364e00368fSchristos    writing foo.gz or reading an existing foo.add, or -2 if there was a memory
7374e00368fSchristos    allocation failure. */
7384e00368fSchristos local int log_recover(struct log *log, int op)
7394e00368fSchristos {
7404e00368fSchristos     int fd, ret = 0;
7414e00368fSchristos     unsigned char *data = NULL;
7424e00368fSchristos     size_t len = 0;
7434e00368fSchristos     struct stat st;
7444e00368fSchristos 
7454e00368fSchristos     /* log recovery */
7464e00368fSchristos     log_log(log, op, "start");
7474e00368fSchristos 
7484e00368fSchristos     /* load foo.add file if expected and present */
7494e00368fSchristos     if (op == APPEND_OP || op == COMPRESS_OP) {
7504e00368fSchristos         strcpy(log->end, ".add");
7514e00368fSchristos         if (stat(log->path, &st) == 0 && st.st_size) {
7524e00368fSchristos             len = (size_t)(st.st_size);
753ed8eb4c2Schristos             if ((off_t)len != st.st_size ||
754ed8eb4c2Schristos                     (data = malloc(st.st_size)) == NULL) {
7554e00368fSchristos                 log_log(log, op, "allocation failure");
7564e00368fSchristos                 return -2;
7574e00368fSchristos             }
7584e00368fSchristos             if ((fd = open(log->path, O_RDONLY, 0)) < 0) {
759*6881a400Schristos                 free(data);
7604e00368fSchristos                 log_log(log, op, ".add file read failure");
7614e00368fSchristos                 return -1;
7624e00368fSchristos             }
763ed8eb4c2Schristos             ret = (size_t)read(fd, data, len) != len;
7644e00368fSchristos             close(fd);
7654e00368fSchristos             if (ret) {
766*6881a400Schristos                 free(data);
7674e00368fSchristos                 log_log(log, op, ".add file read failure");
7684e00368fSchristos                 return -1;
7694e00368fSchristos             }
7704e00368fSchristos             log_log(log, op, "loaded .add file");
7714e00368fSchristos         }
7724e00368fSchristos         else
7734e00368fSchristos             log_log(log, op, "missing .add file!");
7744e00368fSchristos     }
7754e00368fSchristos 
7764e00368fSchristos     /* recover the interrupted operation */
7774e00368fSchristos     switch (op) {
7784e00368fSchristos     case APPEND_OP:
7794e00368fSchristos         ret = log_append(log, data, len);
7804e00368fSchristos         break;
7814e00368fSchristos     case COMPRESS_OP:
7824e00368fSchristos         ret = log_compress(log, data, len);
7834e00368fSchristos         break;
7844e00368fSchristos     case REPLACE_OP:
7854e00368fSchristos         ret = log_replace(log);
7864e00368fSchristos     }
7874e00368fSchristos 
7884e00368fSchristos     /* log status */
7894e00368fSchristos     log_log(log, op, ret ? "failure" : "complete");
7904e00368fSchristos 
7914e00368fSchristos     /* clean up */
7924e00368fSchristos     if (data != NULL)
7934e00368fSchristos         free(data);
7944e00368fSchristos     return ret;
7954e00368fSchristos }
7964e00368fSchristos 
7974e00368fSchristos /* Close the foo.gz file (if open) and release the lock. */
7984e00368fSchristos local void log_close(struct log *log)
7994e00368fSchristos {
8004e00368fSchristos     if (log->fd >= 0)
8014e00368fSchristos         close(log->fd);
8024e00368fSchristos     log->fd = -1;
8034e00368fSchristos     log_unlock(log);
8044e00368fSchristos }
8054e00368fSchristos 
8064e00368fSchristos /* Open foo.gz, verify the header, and load the extra field contents, after
8074e00368fSchristos    first creating the foo.lock file to gain exclusive access to the foo.*
8084e00368fSchristos    files.  If foo.gz does not exist or is empty, then write the initial header,
8094e00368fSchristos    extra, and body content of an empty foo.gz log file.  If there is an error
8104e00368fSchristos    creating the lock file due to access restrictions, or an error reading or
8114e00368fSchristos    writing the foo.gz file, or if the foo.gz file is not a proper log file for
8124e00368fSchristos    this object (e.g. not a gzip file or does not contain the expected extra
8134e00368fSchristos    field), then return true.  If there is an error, the lock is released.
8144e00368fSchristos    Otherwise, the lock is left in place. */
8154e00368fSchristos local int log_open(struct log *log)
8164e00368fSchristos {
8174e00368fSchristos     int op;
8184e00368fSchristos 
8194e00368fSchristos     /* release open file resource if left over -- can occur if lock lost
8204e00368fSchristos        between gzlog_open() and gzlog_write() */
8214e00368fSchristos     if (log->fd >= 0)
8224e00368fSchristos         close(log->fd);
8234e00368fSchristos     log->fd = -1;
8244e00368fSchristos 
8254e00368fSchristos     /* negotiate exclusive access */
8264e00368fSchristos     if (log_lock(log) < 0)
8274e00368fSchristos         return -1;
8284e00368fSchristos 
8294e00368fSchristos     /* open the log file, foo.gz */
8304e00368fSchristos     strcpy(log->end, ".gz");
8314e00368fSchristos     log->fd = open(log->path, O_RDWR | O_CREAT, 0644);
8324e00368fSchristos     if (log->fd < 0) {
8334e00368fSchristos         log_close(log);
8344e00368fSchristos         return -1;
8354e00368fSchristos     }
8364e00368fSchristos 
8374e00368fSchristos     /* if new, initialize foo.gz with an empty log, delete old dictionary */
8384e00368fSchristos     if (lseek(log->fd, 0, SEEK_END) == 0) {
8394e00368fSchristos         if (write(log->fd, log_gzhead, HEAD) != HEAD ||
8404e00368fSchristos             write(log->fd, log_gzext, EXTRA) != EXTRA ||
8414e00368fSchristos             write(log->fd, log_gzbody, BODY) != BODY) {
8424e00368fSchristos             log_close(log);
8434e00368fSchristos             return -1;
8444e00368fSchristos         }
8454e00368fSchristos         strcpy(log->end, ".dict");
8464e00368fSchristos         unlink(log->path);
8474e00368fSchristos     }
8484e00368fSchristos 
8494e00368fSchristos     /* verify log file and load extra field information */
8504e00368fSchristos     if ((op = log_head(log)) < 0) {
8514e00368fSchristos         log_close(log);
8524e00368fSchristos         return -1;
8534e00368fSchristos     }
8544e00368fSchristos 
8554e00368fSchristos     /* check for interrupted process and if so, recover */
8564e00368fSchristos     if (op != NO_OP && log_recover(log, op)) {
8574e00368fSchristos         log_close(log);
8584e00368fSchristos         return -1;
8594e00368fSchristos     }
8604e00368fSchristos 
8614e00368fSchristos     /* touch the lock file to prevent another process from grabbing it */
8624e00368fSchristos     log_touch(log);
8634e00368fSchristos     return 0;
8644e00368fSchristos }
8654e00368fSchristos 
8664e00368fSchristos /* See gzlog.h for the description of the external methods below */
8674e00368fSchristos gzlog *gzlog_open(char *path)
8684e00368fSchristos {
8694e00368fSchristos     size_t n;
8704e00368fSchristos     struct log *log;
8714e00368fSchristos 
8724e00368fSchristos     /* check arguments */
8734e00368fSchristos     if (path == NULL || *path == 0)
8744e00368fSchristos         return NULL;
8754e00368fSchristos 
8764e00368fSchristos     /* allocate and initialize log structure */
8774e00368fSchristos     log = malloc(sizeof(struct log));
8784e00368fSchristos     if (log == NULL)
8794e00368fSchristos         return NULL;
8804e00368fSchristos     strcpy(log->id, LOGID);
8814e00368fSchristos     log->fd = -1;
8824e00368fSchristos 
8834e00368fSchristos     /* save path and end of path for name construction */
8844e00368fSchristos     n = strlen(path);
8854e00368fSchristos     log->path = malloc(n + 9);              /* allow for ".repairs" */
8864e00368fSchristos     if (log->path == NULL) {
8874e00368fSchristos         free(log);
8884e00368fSchristos         return NULL;
8894e00368fSchristos     }
8904e00368fSchristos     strcpy(log->path, path);
8914e00368fSchristos     log->end = log->path + n;
8924e00368fSchristos 
8934e00368fSchristos     /* gain exclusive access and verify log file -- may perform a
8944e00368fSchristos        recovery operation if needed */
8954e00368fSchristos     if (log_open(log)) {
8964e00368fSchristos         free(log->path);
8974e00368fSchristos         free(log);
8984e00368fSchristos         return NULL;
8994e00368fSchristos     }
9004e00368fSchristos 
9014e00368fSchristos     /* return pointer to log structure */
9024e00368fSchristos     return log;
9034e00368fSchristos }
9044e00368fSchristos 
9054e00368fSchristos /* gzlog_compress() return values:
9064e00368fSchristos     0: all good
9074e00368fSchristos    -1: file i/o error (usually access issue)
9084e00368fSchristos    -2: memory allocation failure
9094e00368fSchristos    -3: invalid log pointer argument */
9104e00368fSchristos int gzlog_compress(gzlog *logd)
9114e00368fSchristos {
9124e00368fSchristos     int fd, ret;
9134e00368fSchristos     uint block;
9144e00368fSchristos     size_t len, next;
9154e00368fSchristos     unsigned char *data, buf[5];
9164e00368fSchristos     struct log *log = logd;
9174e00368fSchristos 
9184e00368fSchristos     /* check arguments */
919ed8eb4c2Schristos     if (log == NULL || strcmp(log->id, LOGID))
9204e00368fSchristos         return -3;
9214e00368fSchristos 
9224e00368fSchristos     /* see if we lost the lock -- if so get it again and reload the extra
9234e00368fSchristos        field information (it probably changed), recover last operation if
9244e00368fSchristos        necessary */
9254e00368fSchristos     if (log_check(log) && log_open(log))
9264e00368fSchristos         return -1;
9274e00368fSchristos 
9284e00368fSchristos     /* create space for uncompressed data */
9294e00368fSchristos     len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) +
9304e00368fSchristos           log->stored;
9314e00368fSchristos     if ((data = malloc(len)) == NULL)
9324e00368fSchristos         return -2;
9334e00368fSchristos 
9344e00368fSchristos     /* do statement here is just a cheap trick for error handling */
9354e00368fSchristos     do {
9364e00368fSchristos         /* read in the uncompressed data */
9374e00368fSchristos         if (lseek(log->fd, log->first - 1, SEEK_SET) < 0)
9384e00368fSchristos             break;
9394e00368fSchristos         next = 0;
9404e00368fSchristos         while (next < len) {
9414e00368fSchristos             if (read(log->fd, buf, 5) != 5)
9424e00368fSchristos                 break;
9434e00368fSchristos             block = PULL2(buf + 1);
9444e00368fSchristos             if (next + block > len ||
9454e00368fSchristos                 read(log->fd, (char *)data + next, block) != block)
9464e00368fSchristos                 break;
9474e00368fSchristos             next += block;
9484e00368fSchristos         }
9494e00368fSchristos         if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored)
9504e00368fSchristos             break;
9514e00368fSchristos         log_touch(log);
9524e00368fSchristos 
9534e00368fSchristos         /* write the uncompressed data to the .add file */
9544e00368fSchristos         strcpy(log->end, ".add");
9554e00368fSchristos         fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
9564e00368fSchristos         if (fd < 0)
9574e00368fSchristos             break;
958ed8eb4c2Schristos         ret = (size_t)write(fd, data, len) != len;
9594e00368fSchristos         if (ret | close(fd))
9604e00368fSchristos             break;
9614e00368fSchristos         log_touch(log);
9624e00368fSchristos 
9634e00368fSchristos         /* write the dictionary for the next compress to the .temp file */
9644e00368fSchristos         strcpy(log->end, ".temp");
9654e00368fSchristos         fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
9664e00368fSchristos         if (fd < 0)
9674e00368fSchristos             break;
9684e00368fSchristos         next = DICT > len ? len : DICT;
969ed8eb4c2Schristos         ret = (size_t)write(fd, (char *)data + len - next, next) != next;
9704e00368fSchristos         if (ret | close(fd))
9714e00368fSchristos             break;
9724e00368fSchristos         log_touch(log);
9734e00368fSchristos 
9744e00368fSchristos         /* roll back to compressed data, mark the compress in progress */
9754e00368fSchristos         log->last = log->first;
9764e00368fSchristos         log->stored = 0;
9774e00368fSchristos         if (log_mark(log, COMPRESS_OP))
9784e00368fSchristos             break;
9794e00368fSchristos         BAIL(7);
9804e00368fSchristos 
9814e00368fSchristos         /* compress and append the data (clears mark) */
9824e00368fSchristos         ret = log_compress(log, data, len);
9834e00368fSchristos         free(data);
9844e00368fSchristos         return ret;
9854e00368fSchristos     } while (0);
9864e00368fSchristos 
9874e00368fSchristos     /* broke out of do above on i/o error */
9884e00368fSchristos     free(data);
9894e00368fSchristos     return -1;
9904e00368fSchristos }
9914e00368fSchristos 
9924e00368fSchristos /* gzlog_write() return values:
9934e00368fSchristos     0: all good
9944e00368fSchristos    -1: file i/o error (usually access issue)
9954e00368fSchristos    -2: memory allocation failure
9964e00368fSchristos    -3: invalid log pointer argument */
9974e00368fSchristos int gzlog_write(gzlog *logd, void *data, size_t len)
9984e00368fSchristos {
9994e00368fSchristos     int fd, ret;
10004e00368fSchristos     struct log *log = logd;
10014e00368fSchristos 
10024e00368fSchristos     /* check arguments */
1003ed8eb4c2Schristos     if (log == NULL || strcmp(log->id, LOGID))
10044e00368fSchristos         return -3;
1005ed8eb4c2Schristos     if (data == NULL || len <= 0)
10064e00368fSchristos         return 0;
10074e00368fSchristos 
10084e00368fSchristos     /* see if we lost the lock -- if so get it again and reload the extra
10094e00368fSchristos        field information (it probably changed), recover last operation if
10104e00368fSchristos        necessary */
10114e00368fSchristos     if (log_check(log) && log_open(log))
10124e00368fSchristos         return -1;
10134e00368fSchristos 
10144e00368fSchristos     /* create and write .add file */
10154e00368fSchristos     strcpy(log->end, ".add");
10164e00368fSchristos     fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
10174e00368fSchristos     if (fd < 0)
10184e00368fSchristos         return -1;
1019ed8eb4c2Schristos     ret = (size_t)write(fd, data, len) != len;
10204e00368fSchristos     if (ret | close(fd))
10214e00368fSchristos         return -1;
10224e00368fSchristos     log_touch(log);
10234e00368fSchristos 
10244e00368fSchristos     /* mark log file with append in progress */
10254e00368fSchristos     if (log_mark(log, APPEND_OP))
10264e00368fSchristos         return -1;
10274e00368fSchristos     BAIL(8);
10284e00368fSchristos 
10294e00368fSchristos     /* append data (clears mark) */
10304e00368fSchristos     if (log_append(log, data, len))
10314e00368fSchristos         return -1;
10324e00368fSchristos 
10334e00368fSchristos     /* check to see if it's time to compress -- if not, then done */
10344e00368fSchristos     if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER)
10354e00368fSchristos         return 0;
10364e00368fSchristos 
10374e00368fSchristos     /* time to compress */
10384e00368fSchristos     return gzlog_compress(log);
10394e00368fSchristos }
10404e00368fSchristos 
10414e00368fSchristos /* gzlog_close() return values:
10424e00368fSchristos     0: ok
10434e00368fSchristos    -3: invalid log pointer argument */
10444e00368fSchristos int gzlog_close(gzlog *logd)
10454e00368fSchristos {
10464e00368fSchristos     struct log *log = logd;
10474e00368fSchristos 
10484e00368fSchristos     /* check arguments */
10494e00368fSchristos     if (log == NULL || strcmp(log->id, LOGID))
10504e00368fSchristos         return -3;
10514e00368fSchristos 
10524e00368fSchristos     /* close the log file and release the lock */
10534e00368fSchristos     log_close(log);
10544e00368fSchristos 
10554e00368fSchristos     /* free structure and return */
10564e00368fSchristos     if (log->path != NULL)
10574e00368fSchristos         free(log->path);
10584e00368fSchristos     strcpy(log->id, "bad");
10594e00368fSchristos     free(log);
10604e00368fSchristos     return 0;
10614e00368fSchristos }
1062