14e00368fSchristos /* 24e00368fSchristos * gzlog.c 3*6881a400Schristos * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved 44e00368fSchristos * For conditions of distribution and use, see copyright notice in gzlog.h 5*6881a400Schristos * version 2.3, 25 May 2019 64e00368fSchristos */ 74e00368fSchristos 84e00368fSchristos /* 94e00368fSchristos gzlog provides a mechanism for frequently appending short strings to a gzip 104e00368fSchristos file that is efficient both in execution time and compression ratio. The 114e00368fSchristos strategy is to write the short strings in an uncompressed form to the end of 124e00368fSchristos the gzip file, only compressing when the amount of uncompressed data has 134e00368fSchristos reached a given threshold. 144e00368fSchristos 154e00368fSchristos gzlog also provides protection against interruptions in the process due to 164e00368fSchristos system crashes. The status of the operation is recorded in an extra field 174e00368fSchristos in the gzip file, and is only updated once the gzip file is brought to a 184e00368fSchristos valid state. The last data to be appended or compressed is saved in an 194e00368fSchristos auxiliary file, so that if the operation is interrupted, it can be completed 204e00368fSchristos the next time an append operation is attempted. 214e00368fSchristos 224e00368fSchristos gzlog maintains another auxiliary file with the last 32K of data from the 234e00368fSchristos compressed portion, which is preloaded for the compression of the subsequent 244e00368fSchristos data. This minimizes the impact to the compression ratio of appending. 254e00368fSchristos */ 264e00368fSchristos 274e00368fSchristos /* 284e00368fSchristos Operations Concept: 294e00368fSchristos 304e00368fSchristos Files (log name "foo"): 314e00368fSchristos foo.gz -- gzip file with the complete log 324e00368fSchristos foo.add -- last message to append or last data to compress 334e00368fSchristos foo.dict -- dictionary of the last 32K of data for next compression 344e00368fSchristos foo.temp -- temporary dictionary file for compression after this one 354e00368fSchristos foo.lock -- lock file for reading and writing the other files 364e00368fSchristos foo.repairs -- log file for log file recovery operations (not compressed) 374e00368fSchristos 384e00368fSchristos gzip file structure: 394e00368fSchristos - fixed-length (no file name) header with extra field (see below) 404e00368fSchristos - compressed data ending initially with empty stored block 414e00368fSchristos - uncompressed data filling out originally empty stored block and 424e00368fSchristos subsequent stored blocks as needed (16K max each) 434e00368fSchristos - gzip trailer 444e00368fSchristos - no junk at end (no other gzip streams) 454e00368fSchristos 464e00368fSchristos When appending data, the information in the first three items above plus the 474e00368fSchristos foo.add file are sufficient to recover an interrupted append operation. The 484e00368fSchristos extra field has the necessary information to restore the start of the last 494e00368fSchristos stored block and determine where to append the data in the foo.add file, as 504e00368fSchristos well as the crc and length of the gzip data before the append operation. 514e00368fSchristos 524e00368fSchristos The foo.add file is created before the gzip file is marked for append, and 534e00368fSchristos deleted after the gzip file is marked as complete. So if the append 544e00368fSchristos operation is interrupted, the data to add will still be there. If due to 554e00368fSchristos some external force, the foo.add file gets deleted between when the append 564e00368fSchristos operation was interrupted and when recovery is attempted, the gzip file will 574e00368fSchristos still be restored, but without the appended data. 584e00368fSchristos 594e00368fSchristos When compressing data, the information in the first two items above plus the 604e00368fSchristos foo.add file are sufficient to recover an interrupted compress operation. 614e00368fSchristos The extra field has the necessary information to find the end of the 624e00368fSchristos compressed data, and contains both the crc and length of just the compressed 634e00368fSchristos data and of the complete set of data including the contents of the foo.add 644e00368fSchristos file. 654e00368fSchristos 664e00368fSchristos Again, the foo.add file is maintained during the compress operation in case 674e00368fSchristos of an interruption. If in the unlikely event the foo.add file with the data 684e00368fSchristos to be compressed is missing due to some external force, a gzip file with 694e00368fSchristos just the previous compressed data will be reconstructed. In this case, all 704e00368fSchristos of the data that was to be compressed is lost (approximately one megabyte). 714e00368fSchristos This will not occur if all that happened was an interruption of the compress 724e00368fSchristos operation. 734e00368fSchristos 744e00368fSchristos The third state that is marked is the replacement of the old dictionary with 754e00368fSchristos the new dictionary after a compress operation. Once compression is 764e00368fSchristos complete, the gzip file is marked as being in the replace state. This 774e00368fSchristos completes the gzip file, so an interrupt after being so marked does not 784e00368fSchristos result in recompression. Then the dictionary file is replaced, and the gzip 794e00368fSchristos file is marked as completed. This state prevents the possibility of 804e00368fSchristos restarting compression with the wrong dictionary file. 814e00368fSchristos 824e00368fSchristos All three operations are wrapped by a lock/unlock procedure. In order to 834e00368fSchristos gain exclusive access to the log files, first a foo.lock file must be 844e00368fSchristos exclusively created. When all operations are complete, the lock is 854e00368fSchristos released by deleting the foo.lock file. If when attempting to create the 864e00368fSchristos lock file, it already exists and the modify time of the lock file is more 874e00368fSchristos than five minutes old (set by the PATIENCE define below), then the old 884e00368fSchristos lock file is considered stale and deleted, and the exclusive creation of 894e00368fSchristos the lock file is retried. To assure that there are no false assessments 904e00368fSchristos of the staleness of the lock file, the operations periodically touch the 914e00368fSchristos lock file to update the modified date. 924e00368fSchristos 934e00368fSchristos Following is the definition of the extra field with all of the information 944e00368fSchristos required to enable the above append and compress operations and their 954e00368fSchristos recovery if interrupted. Multi-byte values are stored little endian 964e00368fSchristos (consistent with the gzip format). File pointers are eight bytes long. 974e00368fSchristos The crc's and lengths for the gzip trailer are four bytes long. (Note that 984e00368fSchristos the length at the end of a gzip file is used for error checking only, and 994e00368fSchristos for large files is actually the length modulo 2^32.) The stored block 1004e00368fSchristos length is two bytes long. The gzip extra field two-byte identification is 1014e00368fSchristos "ap" for append. It is assumed that writing the extra field to the file is 1024e00368fSchristos an "atomic" operation. That is, either all of the extra field is written 1034e00368fSchristos to the file, or none of it is, if the operation is interrupted right at the 1044e00368fSchristos point of updating the extra field. This is a reasonable assumption, since 1054e00368fSchristos the extra field is within the first 52 bytes of the file, which is smaller 1064e00368fSchristos than any expected block size for a mass storage device (usually 512 bytes or 1074e00368fSchristos larger). 1084e00368fSchristos 1094e00368fSchristos Extra field (35 bytes): 1104e00368fSchristos - Pointer to first stored block length -- this points to the two-byte length 1114e00368fSchristos of the first stored block, which is followed by the two-byte, one's 1124e00368fSchristos complement of that length. The stored block length is preceded by the 1134e00368fSchristos three-bit header of the stored block, which is the actual start of the 1144e00368fSchristos stored block in the deflate format. See the bit offset field below. 1154e00368fSchristos - Pointer to the last stored block length. This is the same as above, but 1164e00368fSchristos for the last stored block of the uncompressed data in the gzip file. 1174e00368fSchristos Initially this is the same as the first stored block length pointer. 1184e00368fSchristos When the stored block gets to 16K (see the MAX_STORE define), then a new 1194e00368fSchristos stored block as added, at which point the last stored block length pointer 1204e00368fSchristos is different from the first stored block length pointer. When they are 1214e00368fSchristos different, the first bit of the last stored block header is eight bits, or 1224e00368fSchristos one byte back from the block length. 1234e00368fSchristos - Compressed data crc and length. This is the crc and length of the data 1244e00368fSchristos that is in the compressed portion of the deflate stream. These are used 1254e00368fSchristos only in the event that the foo.add file containing the data to compress is 1264e00368fSchristos lost after a compress operation is interrupted. 1274e00368fSchristos - Total data crc and length. This is the crc and length of all of the data 1284e00368fSchristos stored in the gzip file, compressed and uncompressed. It is used to 1294e00368fSchristos reconstruct the gzip trailer when compressing, as well as when recovering 1304e00368fSchristos interrupted operations. 1314e00368fSchristos - Final stored block length. This is used to quickly find where to append, 1324e00368fSchristos and allows the restoration of the original final stored block state when 1334e00368fSchristos an append operation is interrupted. 1344e00368fSchristos - First stored block start as the number of bits back from the final stored 1354e00368fSchristos block first length byte. This value is in the range of 3..10, and is 1364e00368fSchristos stored as the low three bits of the final byte of the extra field after 1374e00368fSchristos subtracting three (0..7). This allows the last-block bit of the stored 1384e00368fSchristos block header to be updated when a new stored block is added, for the case 1394e00368fSchristos when the first stored block and the last stored block are the same. (When 1404e00368fSchristos they are different, the numbers of bits back is known to be eight.) This 1414e00368fSchristos also allows for new compressed data to be appended to the old compressed 1424e00368fSchristos data in the compress operation, overwriting the previous first stored 1434e00368fSchristos block, or for the compressed data to be terminated and a valid gzip file 1444e00368fSchristos reconstructed on the off chance that a compression operation was 1454e00368fSchristos interrupted and the data to compress in the foo.add file was deleted. 1464e00368fSchristos - The operation in process. This is the next two bits in the last byte (the 1474e00368fSchristos bits under the mask 0x18). The are interpreted as 0: nothing in process, 1484e00368fSchristos 1: append in process, 2: compress in process, 3: replace in process. 1494e00368fSchristos - The top three bits of the last byte in the extra field are reserved and 1504e00368fSchristos are currently set to zero. 1514e00368fSchristos 1524e00368fSchristos Main procedure: 1534e00368fSchristos - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of 1544e00368fSchristos the system open() call. If the modify time of an existing lock file is 1554e00368fSchristos more than PATIENCE seconds old, then the lock file is deleted and the 1564e00368fSchristos exclusive create is retried. 1574e00368fSchristos - Load the extra field from the foo.gz file, and see if an operation was in 1584e00368fSchristos progress but not completed. If so, apply the recovery procedure below. 1594e00368fSchristos - Perform the append procedure with the provided data. 1604e00368fSchristos - If the uncompressed data in the foo.gz file is 1MB or more, apply the 1614e00368fSchristos compress procedure. 1624e00368fSchristos - Delete the foo.lock file. 1634e00368fSchristos 1644e00368fSchristos Append procedure: 1654e00368fSchristos - Put what to append in the foo.add file so that the operation can be 1664e00368fSchristos restarted if this procedure is interrupted. 1674e00368fSchristos - Mark the foo.gz extra field with the append operation in progress. 1684e00368fSchristos + Restore the original last-block bit and stored block length of the last 1694e00368fSchristos stored block from the information in the extra field, in case a previous 1704e00368fSchristos append operation was interrupted. 1714e00368fSchristos - Append the provided data to the last stored block, creating new stored 1724e00368fSchristos blocks as needed and updating the stored blocks last-block bits and 1734e00368fSchristos lengths. 1744e00368fSchristos - Update the crc and length with the new data, and write the gzip trailer. 1754e00368fSchristos - Write over the extra field (with a single write operation) with the new 1764e00368fSchristos pointers, lengths, and crc's, and mark the gzip file as not in process. 1774e00368fSchristos Though there is still a foo.add file, it will be ignored since nothing 1784e00368fSchristos is in process. If a foo.add file is leftover from a previously 1794e00368fSchristos completed operation, it is truncated when writing new data to it. 1804e00368fSchristos - Delete the foo.add file. 1814e00368fSchristos 1824e00368fSchristos Compress and replace procedures: 1834e00368fSchristos - Read all of the uncompressed data in the stored blocks in foo.gz and write 1844e00368fSchristos it to foo.add. Also write foo.temp with the last 32K of that data to 1854e00368fSchristos provide a dictionary for the next invocation of this procedure. 1864e00368fSchristos - Rewrite the extra field marking foo.gz with a compression in process. 1874e00368fSchristos * If there is no data provided to compress (due to a missing foo.add file 1884e00368fSchristos when recovering), reconstruct and truncate the foo.gz file to contain 1894e00368fSchristos only the previous compressed data and proceed to the step after the next 1904e00368fSchristos one. Otherwise ... 1914e00368fSchristos - Compress the data with the dictionary in foo.dict, and write to the 1924e00368fSchristos foo.gz file starting at the bit immediately following the last previously 1934e00368fSchristos compressed block. If there is no foo.dict, proceed anyway with the 1944e00368fSchristos compression at slightly reduced efficiency. (For the foo.dict file to be 1954e00368fSchristos missing requires some external failure beyond simply the interruption of 1964e00368fSchristos a compress operation.) During this process, the foo.lock file is 1974e00368fSchristos periodically touched to assure that that file is not considered stale by 1984e00368fSchristos another process before we're done. The deflation is terminated with a 1994e00368fSchristos non-last empty static block (10 bits long), that is then located and 2004e00368fSchristos written over by a last-bit-set empty stored block. 2014e00368fSchristos - Append the crc and length of the data in the gzip file (previously 2024e00368fSchristos calculated during the append operations). 2034e00368fSchristos - Write over the extra field with the updated stored block offsets, bits 2044e00368fSchristos back, crc's, and lengths, and mark foo.gz as in process for a replacement 2054e00368fSchristos of the dictionary. 2064e00368fSchristos @ Delete the foo.add file. 2074e00368fSchristos - Replace foo.dict with foo.temp. 2084e00368fSchristos - Write over the extra field, marking foo.gz as complete. 2094e00368fSchristos 2104e00368fSchristos Recovery procedure: 2114e00368fSchristos - If not a replace recovery, read in the foo.add file, and provide that data 2124e00368fSchristos to the appropriate recovery below. If there is no foo.add file, provide 2134e00368fSchristos a zero data length to the recovery. In that case, the append recovery 2144e00368fSchristos restores the foo.gz to the previous compressed + uncompressed data state. 2154e00368fSchristos For the the compress recovery, a missing foo.add file results in foo.gz 2164e00368fSchristos being restored to the previous compressed-only data state. 2174e00368fSchristos - Append recovery: 2184e00368fSchristos - Pick up append at + step above 2194e00368fSchristos - Compress recovery: 2204e00368fSchristos - Pick up compress at * step above 2214e00368fSchristos - Replace recovery: 2224e00368fSchristos - Pick up compress at @ step above 2234e00368fSchristos - Log the repair with a date stamp in foo.repairs 2244e00368fSchristos */ 2254e00368fSchristos 2264e00368fSchristos #include <sys/types.h> 2274e00368fSchristos #include <stdio.h> /* rename, fopen, fprintf, fclose */ 2284e00368fSchristos #include <stdlib.h> /* malloc, free */ 2294e00368fSchristos #include <string.h> /* strlen, strrchr, strcpy, strncpy, strcmp */ 2304e00368fSchristos #include <fcntl.h> /* open */ 2314e00368fSchristos #include <unistd.h> /* lseek, read, write, close, unlink, sleep, */ 2324e00368fSchristos /* ftruncate, fsync */ 2334e00368fSchristos #include <errno.h> /* errno */ 2344e00368fSchristos #include <time.h> /* time, ctime */ 2354e00368fSchristos #include <sys/stat.h> /* stat */ 2364e00368fSchristos #include <sys/time.h> /* utimes */ 2374e00368fSchristos #include "zlib.h" /* crc32 */ 2384e00368fSchristos 2394e00368fSchristos #include "gzlog.h" /* header for external access */ 2404e00368fSchristos 2414e00368fSchristos #define local static 2424e00368fSchristos typedef unsigned int uint; 2434e00368fSchristos typedef unsigned long ulong; 2444e00368fSchristos 2454e00368fSchristos /* Macro for debugging to deterministically force recovery operations */ 246699b0f92Schristos #ifdef GZLOG_DEBUG 2474e00368fSchristos #include <setjmp.h> /* longjmp */ 2484e00368fSchristos jmp_buf gzlog_jump; /* where to go back to */ 2494e00368fSchristos int gzlog_bail = 0; /* which point to bail at (1..8) */ 2504e00368fSchristos int gzlog_count = -1; /* number of times through to wait */ 2514e00368fSchristos # define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ 2524e00368fSchristos longjmp(gzlog_jump, gzlog_bail); } while (0) 2534e00368fSchristos #else 2544e00368fSchristos # define BAIL(n) 2554e00368fSchristos #endif 2564e00368fSchristos 2574e00368fSchristos /* how old the lock file can be in seconds before considering it stale */ 2584e00368fSchristos #define PATIENCE 300 2594e00368fSchristos 2604e00368fSchristos /* maximum stored block size in Kbytes -- must be in 1..63 */ 2614e00368fSchristos #define MAX_STORE 16 2624e00368fSchristos 2634e00368fSchristos /* number of stored Kbytes to trigger compression (must be >= 32 to allow 2644e00368fSchristos dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to 2654e00368fSchristos discard the stored block headers contribution of five bytes each) */ 2664e00368fSchristos #define TRIGGER 1024 2674e00368fSchristos 2684e00368fSchristos /* size of a deflate dictionary (this cannot be changed) */ 2694e00368fSchristos #define DICT 32768U 2704e00368fSchristos 2714e00368fSchristos /* values for the operation (2 bits) */ 2724e00368fSchristos #define NO_OP 0 2734e00368fSchristos #define APPEND_OP 1 2744e00368fSchristos #define COMPRESS_OP 2 2754e00368fSchristos #define REPLACE_OP 3 2764e00368fSchristos 2774e00368fSchristos /* macros to extract little-endian integers from an unsigned byte buffer */ 2784e00368fSchristos #define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) 2794e00368fSchristos #define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) 2804e00368fSchristos #define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) 2814e00368fSchristos 2824e00368fSchristos /* macros to store integers into a byte buffer in little-endian order */ 2834e00368fSchristos #define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) 2844e00368fSchristos #define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) 2854e00368fSchristos #define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) 2864e00368fSchristos 2874e00368fSchristos /* internal structure for log information */ 2884e00368fSchristos #define LOGID "\106\035\172" /* should be three non-zero characters */ 2894e00368fSchristos struct log { 2904e00368fSchristos char id[4]; /* contains LOGID to detect inadvertent overwrites */ 2914e00368fSchristos int fd; /* file descriptor for .gz file, opened read/write */ 2924e00368fSchristos char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */ 2934e00368fSchristos char *end; /* end of path, for appending suffices such as ".gz" */ 2944e00368fSchristos off_t first; /* offset of first stored block first length byte */ 2954e00368fSchristos int back; /* location of first block id in bits back from first */ 2964e00368fSchristos uint stored; /* bytes currently in last stored block */ 2974e00368fSchristos off_t last; /* offset of last stored block first length byte */ 2984e00368fSchristos ulong ccrc; /* crc of compressed data */ 2994e00368fSchristos ulong clen; /* length (modulo 2^32) of compressed data */ 3004e00368fSchristos ulong tcrc; /* crc of total data */ 3014e00368fSchristos ulong tlen; /* length (modulo 2^32) of total data */ 3024e00368fSchristos time_t lock; /* last modify time of our lock file */ 3034e00368fSchristos }; 3044e00368fSchristos 3054e00368fSchristos /* gzip header for gzlog */ 3064e00368fSchristos local unsigned char log_gzhead[] = { 3074e00368fSchristos 0x1f, 0x8b, /* magic gzip id */ 3084e00368fSchristos 8, /* compression method is deflate */ 3094e00368fSchristos 4, /* there is an extra field (no file name) */ 3104e00368fSchristos 0, 0, 0, 0, /* no modification time provided */ 3114e00368fSchristos 0, 0xff, /* no extra flags, no OS specified */ 3124e00368fSchristos 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */ 3134e00368fSchristos /* 35 is EXTRA, 39 is EXTRA + 4 */ 3144e00368fSchristos }; 3154e00368fSchristos 3164e00368fSchristos #define HEAD sizeof(log_gzhead) /* should be 16 */ 3174e00368fSchristos 3184e00368fSchristos /* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ 3194e00368fSchristos local unsigned char log_gzext[] = { 3204e00368fSchristos 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */ 3214e00368fSchristos 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */ 3224e00368fSchristos 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */ 3234e00368fSchristos 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */ 3244e00368fSchristos 0, 0, /* final stored block data length */ 3254e00368fSchristos 5 /* op is NO_OP, last bit 8 bits back */ 3264e00368fSchristos }; 3274e00368fSchristos 3284e00368fSchristos #define EXTRA sizeof(log_gzext) /* should be 35 */ 3294e00368fSchristos 3304e00368fSchristos /* initial gzip data and trailer */ 3314e00368fSchristos local unsigned char log_gzbody[] = { 3324e00368fSchristos 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ 3334e00368fSchristos 0, 0, 0, 0, /* crc */ 3344e00368fSchristos 0, 0, 0, 0 /* uncompressed length */ 3354e00368fSchristos }; 3364e00368fSchristos 3374e00368fSchristos #define BODY sizeof(log_gzbody) 3384e00368fSchristos 3394e00368fSchristos /* Exclusively create foo.lock in order to negotiate exclusive access to the 3404e00368fSchristos foo.* files. If the modify time of an existing lock file is greater than 3414e00368fSchristos PATIENCE seconds in the past, then consider the lock file to have been 3424e00368fSchristos abandoned, delete it, and try the exclusive create again. Save the lock 3434e00368fSchristos file modify time for verification of ownership. Return 0 on success, or -1 3444e00368fSchristos on failure, usually due to an access restriction or invalid path. Note that 3454e00368fSchristos if stat() or unlink() fails, it may be due to another process noticing the 3464e00368fSchristos abandoned lock file a smidge sooner and deleting it, so those are not 3474e00368fSchristos flagged as an error. */ 3484e00368fSchristos local int log_lock(struct log *log) 3494e00368fSchristos { 3504e00368fSchristos int fd; 3514e00368fSchristos struct stat st; 3524e00368fSchristos 3534e00368fSchristos strcpy(log->end, ".lock"); 3544e00368fSchristos while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { 3554e00368fSchristos if (errno != EEXIST) 3564e00368fSchristos return -1; 3574e00368fSchristos if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { 3584e00368fSchristos unlink(log->path); 3594e00368fSchristos continue; 3604e00368fSchristos } 3614e00368fSchristos sleep(2); /* relinquish the CPU for two seconds while waiting */ 3624e00368fSchristos } 3634e00368fSchristos close(fd); 3644e00368fSchristos if (stat(log->path, &st) == 0) 3654e00368fSchristos log->lock = st.st_mtime; 3664e00368fSchristos return 0; 3674e00368fSchristos } 3684e00368fSchristos 3694e00368fSchristos /* Update the modify time of the lock file to now, in order to prevent another 3704e00368fSchristos task from thinking that the lock is stale. Save the lock file modify time 3714e00368fSchristos for verification of ownership. */ 3724e00368fSchristos local void log_touch(struct log *log) 3734e00368fSchristos { 3744e00368fSchristos struct stat st; 3754e00368fSchristos 3764e00368fSchristos strcpy(log->end, ".lock"); 3774e00368fSchristos utimes(log->path, NULL); 3784e00368fSchristos if (stat(log->path, &st) == 0) 3794e00368fSchristos log->lock = st.st_mtime; 3804e00368fSchristos } 3814e00368fSchristos 3824e00368fSchristos /* Check the log file modify time against what is expected. Return true if 3834e00368fSchristos this is not our lock. If it is our lock, touch it to keep it. */ 3844e00368fSchristos local int log_check(struct log *log) 3854e00368fSchristos { 3864e00368fSchristos struct stat st; 3874e00368fSchristos 3884e00368fSchristos strcpy(log->end, ".lock"); 3894e00368fSchristos if (stat(log->path, &st) || st.st_mtime != log->lock) 3904e00368fSchristos return 1; 3914e00368fSchristos log_touch(log); 3924e00368fSchristos return 0; 3934e00368fSchristos } 3944e00368fSchristos 3954e00368fSchristos /* Unlock a previously acquired lock, but only if it's ours. */ 3964e00368fSchristos local void log_unlock(struct log *log) 3974e00368fSchristos { 3984e00368fSchristos if (log_check(log)) 3994e00368fSchristos return; 4004e00368fSchristos strcpy(log->end, ".lock"); 4014e00368fSchristos unlink(log->path); 4024e00368fSchristos log->lock = 0; 4034e00368fSchristos } 4044e00368fSchristos 4054e00368fSchristos /* Check the gzip header and read in the extra field, filling in the values in 4064e00368fSchristos the log structure. Return op on success or -1 if the gzip header was not as 4074e00368fSchristos expected. op is the current operation in progress last written to the extra 4084e00368fSchristos field. This assumes that the gzip file has already been opened, with the 4094e00368fSchristos file descriptor log->fd. */ 4104e00368fSchristos local int log_head(struct log *log) 4114e00368fSchristos { 4124e00368fSchristos int op; 4134e00368fSchristos unsigned char buf[HEAD + EXTRA]; 4144e00368fSchristos 4154e00368fSchristos if (lseek(log->fd, 0, SEEK_SET) < 0 || 4164e00368fSchristos read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || 4174e00368fSchristos memcmp(buf, log_gzhead, HEAD)) { 4184e00368fSchristos return -1; 4194e00368fSchristos } 4204e00368fSchristos log->first = PULL8(buf + HEAD); 4214e00368fSchristos log->last = PULL8(buf + HEAD + 8); 4224e00368fSchristos log->ccrc = PULL4(buf + HEAD + 16); 4234e00368fSchristos log->clen = PULL4(buf + HEAD + 20); 4244e00368fSchristos log->tcrc = PULL4(buf + HEAD + 24); 4254e00368fSchristos log->tlen = PULL4(buf + HEAD + 28); 4264e00368fSchristos log->stored = PULL2(buf + HEAD + 32); 4274e00368fSchristos log->back = 3 + (buf[HEAD + 34] & 7); 4284e00368fSchristos op = (buf[HEAD + 34] >> 3) & 3; 4294e00368fSchristos return op; 4304e00368fSchristos } 4314e00368fSchristos 4324e00368fSchristos /* Write over the extra field contents, marking the operation as op. Use fsync 4334e00368fSchristos to assure that the device is written to, and in the requested order. This 4344e00368fSchristos operation, and only this operation, is assumed to be atomic in order to 4354e00368fSchristos assure that the log is recoverable in the event of an interruption at any 4364e00368fSchristos point in the process. Return -1 if the write to foo.gz failed. */ 4374e00368fSchristos local int log_mark(struct log *log, int op) 4384e00368fSchristos { 4394e00368fSchristos int ret; 4404e00368fSchristos unsigned char ext[EXTRA]; 4414e00368fSchristos 4424e00368fSchristos PUT8(ext, log->first); 4434e00368fSchristos PUT8(ext + 8, log->last); 4444e00368fSchristos PUT4(ext + 16, log->ccrc); 4454e00368fSchristos PUT4(ext + 20, log->clen); 4464e00368fSchristos PUT4(ext + 24, log->tcrc); 4474e00368fSchristos PUT4(ext + 28, log->tlen); 4484e00368fSchristos PUT2(ext + 32, log->stored); 4494e00368fSchristos ext[34] = log->back - 3 + (op << 3); 4504e00368fSchristos fsync(log->fd); 4514e00368fSchristos ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || 4524e00368fSchristos write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; 4534e00368fSchristos fsync(log->fd); 4544e00368fSchristos return ret; 4554e00368fSchristos } 4564e00368fSchristos 4574e00368fSchristos /* Rewrite the last block header bits and subsequent zero bits to get to a byte 4584e00368fSchristos boundary, setting the last block bit if last is true, and then write the 4594e00368fSchristos remainder of the stored block header (length and one's complement). Leave 4604e00368fSchristos the file pointer after the end of the last stored block data. Return -1 if 4614e00368fSchristos there is a read or write failure on the foo.gz file */ 4624e00368fSchristos local int log_last(struct log *log, int last) 4634e00368fSchristos { 4644e00368fSchristos int back, len, mask; 4654e00368fSchristos unsigned char buf[6]; 4664e00368fSchristos 4674e00368fSchristos /* determine the locations of the bytes and bits to modify */ 4684e00368fSchristos back = log->last == log->first ? log->back : 8; 4694e00368fSchristos len = back > 8 ? 2 : 1; /* bytes back from log->last */ 4704e00368fSchristos mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */ 4714e00368fSchristos 4724e00368fSchristos /* get the byte to modify (one or two back) into buf[0] -- don't need to 4734e00368fSchristos read the byte if the last-bit is eight bits back, since in that case 4744e00368fSchristos the entire byte will be modified */ 4754e00368fSchristos buf[0] = 0; 4764e00368fSchristos if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || 4774e00368fSchristos read(log->fd, buf, 1) != 1)) 4784e00368fSchristos return -1; 4794e00368fSchristos 4804e00368fSchristos /* change the last-bit of the last stored block as requested -- note 4814e00368fSchristos that all bits above the last-bit are set to zero, per the type bits 4824e00368fSchristos of a stored block being 00 and per the convention that the bits to 4834e00368fSchristos bring the stream to a byte boundary are also zeros */ 4844e00368fSchristos buf[1] = 0; 4854e00368fSchristos buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); 4864e00368fSchristos 4874e00368fSchristos /* write the modified stored block header and lengths, move the file 4884e00368fSchristos pointer to after the last stored block data */ 4894e00368fSchristos PUT2(buf + 2, log->stored); 4904e00368fSchristos PUT2(buf + 4, log->stored ^ 0xffff); 4914e00368fSchristos return lseek(log->fd, log->last - len, SEEK_SET) < 0 || 4924e00368fSchristos write(log->fd, buf + 2 - len, len + 4) != len + 4 || 4934e00368fSchristos lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; 4944e00368fSchristos } 4954e00368fSchristos 4964e00368fSchristos /* Append len bytes from data to the locked and open log file. len may be zero 4974e00368fSchristos if recovering and no .add file was found. In that case, the previous state 4984e00368fSchristos of the foo.gz file is restored. The data is appended uncompressed in 4994e00368fSchristos deflate stored blocks. Return -1 if there was an error reading or writing 5004e00368fSchristos the foo.gz file. */ 5014e00368fSchristos local int log_append(struct log *log, unsigned char *data, size_t len) 5024e00368fSchristos { 5034e00368fSchristos uint put; 5044e00368fSchristos off_t end; 5054e00368fSchristos unsigned char buf[8]; 5064e00368fSchristos 5074e00368fSchristos /* set the last block last-bit and length, in case recovering an 5084e00368fSchristos interrupted append, then position the file pointer to append to the 5094e00368fSchristos block */ 5104e00368fSchristos if (log_last(log, 1)) 5114e00368fSchristos return -1; 5124e00368fSchristos 5134e00368fSchristos /* append, adding stored blocks and updating the offset of the last stored 5144e00368fSchristos block as needed, and update the total crc and length */ 5154e00368fSchristos while (len) { 5164e00368fSchristos /* append as much as we can to the last block */ 5174e00368fSchristos put = (MAX_STORE << 10) - log->stored; 5184e00368fSchristos if (put > len) 5194e00368fSchristos put = (uint)len; 5204e00368fSchristos if (put) { 5214e00368fSchristos if (write(log->fd, data, put) != put) 5224e00368fSchristos return -1; 5234e00368fSchristos BAIL(1); 5244e00368fSchristos log->tcrc = crc32(log->tcrc, data, put); 5254e00368fSchristos log->tlen += put; 5264e00368fSchristos log->stored += put; 5274e00368fSchristos data += put; 5284e00368fSchristos len -= put; 5294e00368fSchristos } 5304e00368fSchristos 5314e00368fSchristos /* if we need to, add a new empty stored block */ 5324e00368fSchristos if (len) { 5334e00368fSchristos /* mark current block as not last */ 5344e00368fSchristos if (log_last(log, 0)) 5354e00368fSchristos return -1; 5364e00368fSchristos 5374e00368fSchristos /* point to new, empty stored block */ 5384e00368fSchristos log->last += 4 + log->stored + 1; 5394e00368fSchristos log->stored = 0; 5404e00368fSchristos } 5414e00368fSchristos 5424e00368fSchristos /* mark last block as last, update its length */ 5434e00368fSchristos if (log_last(log, 1)) 5444e00368fSchristos return -1; 5454e00368fSchristos BAIL(2); 5464e00368fSchristos } 5474e00368fSchristos 5484e00368fSchristos /* write the new crc and length trailer, and truncate just in case (could 5494e00368fSchristos be recovering from partial append with a missing foo.add file) */ 5504e00368fSchristos PUT4(buf, log->tcrc); 5514e00368fSchristos PUT4(buf + 4, log->tlen); 5524e00368fSchristos if (write(log->fd, buf, 8) != 8 || 5534e00368fSchristos (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) 5544e00368fSchristos return -1; 5554e00368fSchristos 5564e00368fSchristos /* write the extra field, marking the log file as done, delete .add file */ 5574e00368fSchristos if (log_mark(log, NO_OP)) 5584e00368fSchristos return -1; 5594e00368fSchristos strcpy(log->end, ".add"); 5604e00368fSchristos unlink(log->path); /* ignore error, since may not exist */ 5614e00368fSchristos return 0; 5624e00368fSchristos } 5634e00368fSchristos 5644e00368fSchristos /* Replace the foo.dict file with the foo.temp file. Also delete the foo.add 5654e00368fSchristos file, since the compress operation may have been interrupted before that was 5664e00368fSchristos done. Returns 1 if memory could not be allocated, or -1 if reading or 5674e00368fSchristos writing foo.gz fails, or if the rename fails for some reason other than 5684e00368fSchristos foo.temp not existing. foo.temp not existing is a permitted error, since 5694e00368fSchristos the replace operation may have been interrupted after the rename is done, 5704e00368fSchristos but before foo.gz is marked as complete. */ 5714e00368fSchristos local int log_replace(struct log *log) 5724e00368fSchristos { 5734e00368fSchristos int ret; 5744e00368fSchristos char *dest; 5754e00368fSchristos 5764e00368fSchristos /* delete foo.add file */ 5774e00368fSchristos strcpy(log->end, ".add"); 5784e00368fSchristos unlink(log->path); /* ignore error, since may not exist */ 5794e00368fSchristos BAIL(3); 5804e00368fSchristos 5814e00368fSchristos /* rename foo.name to foo.dict, replacing foo.dict if it exists */ 5824e00368fSchristos strcpy(log->end, ".dict"); 5834e00368fSchristos dest = malloc(strlen(log->path) + 1); 5844e00368fSchristos if (dest == NULL) 5854e00368fSchristos return -2; 5864e00368fSchristos strcpy(dest, log->path); 5874e00368fSchristos strcpy(log->end, ".temp"); 5884e00368fSchristos ret = rename(log->path, dest); 5894e00368fSchristos free(dest); 5904e00368fSchristos if (ret && errno != ENOENT) 5914e00368fSchristos return -1; 5924e00368fSchristos BAIL(4); 5934e00368fSchristos 5944e00368fSchristos /* mark the foo.gz file as done */ 5954e00368fSchristos return log_mark(log, NO_OP); 5964e00368fSchristos } 5974e00368fSchristos 5984e00368fSchristos /* Compress the len bytes at data and append the compressed data to the 5994e00368fSchristos foo.gz deflate data immediately after the previous compressed data. This 6004e00368fSchristos overwrites the previous uncompressed data, which was stored in foo.add 6014e00368fSchristos and is the data provided in data[0..len-1]. If this operation is 6024e00368fSchristos interrupted, it picks up at the start of this routine, with the foo.add 6034e00368fSchristos file read in again. If there is no data to compress (len == 0), then we 6044e00368fSchristos simply terminate the foo.gz file after the previously compressed data, 6054e00368fSchristos appending a final empty stored block and the gzip trailer. Return -1 if 6064e00368fSchristos reading or writing the log.gz file failed, or -2 if there was a memory 6074e00368fSchristos allocation failure. */ 6084e00368fSchristos local int log_compress(struct log *log, unsigned char *data, size_t len) 6094e00368fSchristos { 6104e00368fSchristos int fd; 6114e00368fSchristos uint got, max; 6124e00368fSchristos ssize_t dict; 6134e00368fSchristos off_t end; 6144e00368fSchristos z_stream strm; 6154e00368fSchristos unsigned char buf[DICT]; 6164e00368fSchristos 6174e00368fSchristos /* compress and append compressed data */ 6184e00368fSchristos if (len) { 6194e00368fSchristos /* set up for deflate, allocating memory */ 6204e00368fSchristos strm.zalloc = Z_NULL; 6214e00368fSchristos strm.zfree = Z_NULL; 6224e00368fSchristos strm.opaque = Z_NULL; 6234e00368fSchristos if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, 6244e00368fSchristos Z_DEFAULT_STRATEGY) != Z_OK) 6254e00368fSchristos return -2; 6264e00368fSchristos 6274e00368fSchristos /* read in dictionary (last 32K of data that was compressed) */ 6284e00368fSchristos strcpy(log->end, ".dict"); 6294e00368fSchristos fd = open(log->path, O_RDONLY, 0); 6304e00368fSchristos if (fd >= 0) { 6314e00368fSchristos dict = read(fd, buf, DICT); 6324e00368fSchristos close(fd); 6334e00368fSchristos if (dict < 0) { 6344e00368fSchristos deflateEnd(&strm); 6354e00368fSchristos return -1; 6364e00368fSchristos } 6374e00368fSchristos if (dict) 6384e00368fSchristos deflateSetDictionary(&strm, buf, (uint)dict); 6394e00368fSchristos } 6404e00368fSchristos log_touch(log); 6414e00368fSchristos 6424e00368fSchristos /* prime deflate with last bits of previous block, position write 6434e00368fSchristos pointer to write those bits and overwrite what follows */ 6444e00368fSchristos if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), 6454e00368fSchristos SEEK_SET) < 0 || 6464e00368fSchristos read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { 6474e00368fSchristos deflateEnd(&strm); 6484e00368fSchristos return -1; 6494e00368fSchristos } 6504e00368fSchristos deflatePrime(&strm, (8 - log->back) & 7, *buf); 6514e00368fSchristos 6524e00368fSchristos /* compress, finishing with a partial non-last empty static block */ 6534e00368fSchristos strm.next_in = data; 6544e00368fSchristos max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ 6554e00368fSchristos do { 6564e00368fSchristos strm.avail_in = len > max ? max : (uint)len; 6574e00368fSchristos len -= strm.avail_in; 6584e00368fSchristos do { 6594e00368fSchristos strm.avail_out = DICT; 6604e00368fSchristos strm.next_out = buf; 6614e00368fSchristos deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); 6624e00368fSchristos got = DICT - strm.avail_out; 6634e00368fSchristos if (got && write(log->fd, buf, got) != got) { 6644e00368fSchristos deflateEnd(&strm); 6654e00368fSchristos return -1; 6664e00368fSchristos } 6674e00368fSchristos log_touch(log); 6684e00368fSchristos } while (strm.avail_out == 0); 6694e00368fSchristos } while (len); 6704e00368fSchristos deflateEnd(&strm); 6714e00368fSchristos BAIL(5); 6724e00368fSchristos 6734e00368fSchristos /* find start of empty static block -- scanning backwards the first one 6744e00368fSchristos bit is the second bit of the block, if the last byte is zero, then 6754e00368fSchristos we know the byte before that has a one in the top bit, since an 6764e00368fSchristos empty static block is ten bits long */ 6774e00368fSchristos if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || 6784e00368fSchristos read(log->fd, buf, 1) != 1) 6794e00368fSchristos return -1; 6804e00368fSchristos log->first++; 6814e00368fSchristos if (*buf) { 6824e00368fSchristos log->back = 1; 6834e00368fSchristos while ((*buf & ((uint)1 << (8 - log->back++))) == 0) 6844e00368fSchristos ; /* guaranteed to terminate, since *buf != 0 */ 6854e00368fSchristos } 6864e00368fSchristos else 6874e00368fSchristos log->back = 10; 6884e00368fSchristos 6894e00368fSchristos /* update compressed crc and length */ 6904e00368fSchristos log->ccrc = log->tcrc; 6914e00368fSchristos log->clen = log->tlen; 6924e00368fSchristos } 6934e00368fSchristos else { 6944e00368fSchristos /* no data to compress -- fix up existing gzip stream */ 6954e00368fSchristos log->tcrc = log->ccrc; 6964e00368fSchristos log->tlen = log->clen; 6974e00368fSchristos } 6984e00368fSchristos 6994e00368fSchristos /* complete and truncate gzip stream */ 7004e00368fSchristos log->last = log->first; 7014e00368fSchristos log->stored = 0; 7024e00368fSchristos PUT4(buf, log->tcrc); 7034e00368fSchristos PUT4(buf + 4, log->tlen); 7044e00368fSchristos if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || 7054e00368fSchristos (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) 7064e00368fSchristos return -1; 7074e00368fSchristos BAIL(6); 7084e00368fSchristos 7094e00368fSchristos /* mark as being in the replace operation */ 7104e00368fSchristos if (log_mark(log, REPLACE_OP)) 7114e00368fSchristos return -1; 7124e00368fSchristos 7134e00368fSchristos /* execute the replace operation and mark the file as done */ 7144e00368fSchristos return log_replace(log); 7154e00368fSchristos } 7164e00368fSchristos 7174e00368fSchristos /* log a repair record to the .repairs file */ 7184e00368fSchristos local void log_log(struct log *log, int op, char *record) 7194e00368fSchristos { 7204e00368fSchristos time_t now; 7214e00368fSchristos FILE *rec; 7224e00368fSchristos 7234e00368fSchristos now = time(NULL); 7244e00368fSchristos strcpy(log->end, ".repairs"); 7254e00368fSchristos rec = fopen(log->path, "a"); 7264e00368fSchristos if (rec == NULL) 7274e00368fSchristos return; 7284e00368fSchristos fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? 7294e00368fSchristos "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); 7304e00368fSchristos fclose(rec); 7314e00368fSchristos return; 7324e00368fSchristos } 7334e00368fSchristos 7344e00368fSchristos /* Recover the interrupted operation op. First read foo.add for recovering an 7354e00368fSchristos append or compress operation. Return -1 if there was an error reading or 7364e00368fSchristos writing foo.gz or reading an existing foo.add, or -2 if there was a memory 7374e00368fSchristos allocation failure. */ 7384e00368fSchristos local int log_recover(struct log *log, int op) 7394e00368fSchristos { 7404e00368fSchristos int fd, ret = 0; 7414e00368fSchristos unsigned char *data = NULL; 7424e00368fSchristos size_t len = 0; 7434e00368fSchristos struct stat st; 7444e00368fSchristos 7454e00368fSchristos /* log recovery */ 7464e00368fSchristos log_log(log, op, "start"); 7474e00368fSchristos 7484e00368fSchristos /* load foo.add file if expected and present */ 7494e00368fSchristos if (op == APPEND_OP || op == COMPRESS_OP) { 7504e00368fSchristos strcpy(log->end, ".add"); 7514e00368fSchristos if (stat(log->path, &st) == 0 && st.st_size) { 7524e00368fSchristos len = (size_t)(st.st_size); 753ed8eb4c2Schristos if ((off_t)len != st.st_size || 754ed8eb4c2Schristos (data = malloc(st.st_size)) == NULL) { 7554e00368fSchristos log_log(log, op, "allocation failure"); 7564e00368fSchristos return -2; 7574e00368fSchristos } 7584e00368fSchristos if ((fd = open(log->path, O_RDONLY, 0)) < 0) { 759*6881a400Schristos free(data); 7604e00368fSchristos log_log(log, op, ".add file read failure"); 7614e00368fSchristos return -1; 7624e00368fSchristos } 763ed8eb4c2Schristos ret = (size_t)read(fd, data, len) != len; 7644e00368fSchristos close(fd); 7654e00368fSchristos if (ret) { 766*6881a400Schristos free(data); 7674e00368fSchristos log_log(log, op, ".add file read failure"); 7684e00368fSchristos return -1; 7694e00368fSchristos } 7704e00368fSchristos log_log(log, op, "loaded .add file"); 7714e00368fSchristos } 7724e00368fSchristos else 7734e00368fSchristos log_log(log, op, "missing .add file!"); 7744e00368fSchristos } 7754e00368fSchristos 7764e00368fSchristos /* recover the interrupted operation */ 7774e00368fSchristos switch (op) { 7784e00368fSchristos case APPEND_OP: 7794e00368fSchristos ret = log_append(log, data, len); 7804e00368fSchristos break; 7814e00368fSchristos case COMPRESS_OP: 7824e00368fSchristos ret = log_compress(log, data, len); 7834e00368fSchristos break; 7844e00368fSchristos case REPLACE_OP: 7854e00368fSchristos ret = log_replace(log); 7864e00368fSchristos } 7874e00368fSchristos 7884e00368fSchristos /* log status */ 7894e00368fSchristos log_log(log, op, ret ? "failure" : "complete"); 7904e00368fSchristos 7914e00368fSchristos /* clean up */ 7924e00368fSchristos if (data != NULL) 7934e00368fSchristos free(data); 7944e00368fSchristos return ret; 7954e00368fSchristos } 7964e00368fSchristos 7974e00368fSchristos /* Close the foo.gz file (if open) and release the lock. */ 7984e00368fSchristos local void log_close(struct log *log) 7994e00368fSchristos { 8004e00368fSchristos if (log->fd >= 0) 8014e00368fSchristos close(log->fd); 8024e00368fSchristos log->fd = -1; 8034e00368fSchristos log_unlock(log); 8044e00368fSchristos } 8054e00368fSchristos 8064e00368fSchristos /* Open foo.gz, verify the header, and load the extra field contents, after 8074e00368fSchristos first creating the foo.lock file to gain exclusive access to the foo.* 8084e00368fSchristos files. If foo.gz does not exist or is empty, then write the initial header, 8094e00368fSchristos extra, and body content of an empty foo.gz log file. If there is an error 8104e00368fSchristos creating the lock file due to access restrictions, or an error reading or 8114e00368fSchristos writing the foo.gz file, or if the foo.gz file is not a proper log file for 8124e00368fSchristos this object (e.g. not a gzip file or does not contain the expected extra 8134e00368fSchristos field), then return true. If there is an error, the lock is released. 8144e00368fSchristos Otherwise, the lock is left in place. */ 8154e00368fSchristos local int log_open(struct log *log) 8164e00368fSchristos { 8174e00368fSchristos int op; 8184e00368fSchristos 8194e00368fSchristos /* release open file resource if left over -- can occur if lock lost 8204e00368fSchristos between gzlog_open() and gzlog_write() */ 8214e00368fSchristos if (log->fd >= 0) 8224e00368fSchristos close(log->fd); 8234e00368fSchristos log->fd = -1; 8244e00368fSchristos 8254e00368fSchristos /* negotiate exclusive access */ 8264e00368fSchristos if (log_lock(log) < 0) 8274e00368fSchristos return -1; 8284e00368fSchristos 8294e00368fSchristos /* open the log file, foo.gz */ 8304e00368fSchristos strcpy(log->end, ".gz"); 8314e00368fSchristos log->fd = open(log->path, O_RDWR | O_CREAT, 0644); 8324e00368fSchristos if (log->fd < 0) { 8334e00368fSchristos log_close(log); 8344e00368fSchristos return -1; 8354e00368fSchristos } 8364e00368fSchristos 8374e00368fSchristos /* if new, initialize foo.gz with an empty log, delete old dictionary */ 8384e00368fSchristos if (lseek(log->fd, 0, SEEK_END) == 0) { 8394e00368fSchristos if (write(log->fd, log_gzhead, HEAD) != HEAD || 8404e00368fSchristos write(log->fd, log_gzext, EXTRA) != EXTRA || 8414e00368fSchristos write(log->fd, log_gzbody, BODY) != BODY) { 8424e00368fSchristos log_close(log); 8434e00368fSchristos return -1; 8444e00368fSchristos } 8454e00368fSchristos strcpy(log->end, ".dict"); 8464e00368fSchristos unlink(log->path); 8474e00368fSchristos } 8484e00368fSchristos 8494e00368fSchristos /* verify log file and load extra field information */ 8504e00368fSchristos if ((op = log_head(log)) < 0) { 8514e00368fSchristos log_close(log); 8524e00368fSchristos return -1; 8534e00368fSchristos } 8544e00368fSchristos 8554e00368fSchristos /* check for interrupted process and if so, recover */ 8564e00368fSchristos if (op != NO_OP && log_recover(log, op)) { 8574e00368fSchristos log_close(log); 8584e00368fSchristos return -1; 8594e00368fSchristos } 8604e00368fSchristos 8614e00368fSchristos /* touch the lock file to prevent another process from grabbing it */ 8624e00368fSchristos log_touch(log); 8634e00368fSchristos return 0; 8644e00368fSchristos } 8654e00368fSchristos 8664e00368fSchristos /* See gzlog.h for the description of the external methods below */ 8674e00368fSchristos gzlog *gzlog_open(char *path) 8684e00368fSchristos { 8694e00368fSchristos size_t n; 8704e00368fSchristos struct log *log; 8714e00368fSchristos 8724e00368fSchristos /* check arguments */ 8734e00368fSchristos if (path == NULL || *path == 0) 8744e00368fSchristos return NULL; 8754e00368fSchristos 8764e00368fSchristos /* allocate and initialize log structure */ 8774e00368fSchristos log = malloc(sizeof(struct log)); 8784e00368fSchristos if (log == NULL) 8794e00368fSchristos return NULL; 8804e00368fSchristos strcpy(log->id, LOGID); 8814e00368fSchristos log->fd = -1; 8824e00368fSchristos 8834e00368fSchristos /* save path and end of path for name construction */ 8844e00368fSchristos n = strlen(path); 8854e00368fSchristos log->path = malloc(n + 9); /* allow for ".repairs" */ 8864e00368fSchristos if (log->path == NULL) { 8874e00368fSchristos free(log); 8884e00368fSchristos return NULL; 8894e00368fSchristos } 8904e00368fSchristos strcpy(log->path, path); 8914e00368fSchristos log->end = log->path + n; 8924e00368fSchristos 8934e00368fSchristos /* gain exclusive access and verify log file -- may perform a 8944e00368fSchristos recovery operation if needed */ 8954e00368fSchristos if (log_open(log)) { 8964e00368fSchristos free(log->path); 8974e00368fSchristos free(log); 8984e00368fSchristos return NULL; 8994e00368fSchristos } 9004e00368fSchristos 9014e00368fSchristos /* return pointer to log structure */ 9024e00368fSchristos return log; 9034e00368fSchristos } 9044e00368fSchristos 9054e00368fSchristos /* gzlog_compress() return values: 9064e00368fSchristos 0: all good 9074e00368fSchristos -1: file i/o error (usually access issue) 9084e00368fSchristos -2: memory allocation failure 9094e00368fSchristos -3: invalid log pointer argument */ 9104e00368fSchristos int gzlog_compress(gzlog *logd) 9114e00368fSchristos { 9124e00368fSchristos int fd, ret; 9134e00368fSchristos uint block; 9144e00368fSchristos size_t len, next; 9154e00368fSchristos unsigned char *data, buf[5]; 9164e00368fSchristos struct log *log = logd; 9174e00368fSchristos 9184e00368fSchristos /* check arguments */ 919ed8eb4c2Schristos if (log == NULL || strcmp(log->id, LOGID)) 9204e00368fSchristos return -3; 9214e00368fSchristos 9224e00368fSchristos /* see if we lost the lock -- if so get it again and reload the extra 9234e00368fSchristos field information (it probably changed), recover last operation if 9244e00368fSchristos necessary */ 9254e00368fSchristos if (log_check(log) && log_open(log)) 9264e00368fSchristos return -1; 9274e00368fSchristos 9284e00368fSchristos /* create space for uncompressed data */ 9294e00368fSchristos len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + 9304e00368fSchristos log->stored; 9314e00368fSchristos if ((data = malloc(len)) == NULL) 9324e00368fSchristos return -2; 9334e00368fSchristos 9344e00368fSchristos /* do statement here is just a cheap trick for error handling */ 9354e00368fSchristos do { 9364e00368fSchristos /* read in the uncompressed data */ 9374e00368fSchristos if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) 9384e00368fSchristos break; 9394e00368fSchristos next = 0; 9404e00368fSchristos while (next < len) { 9414e00368fSchristos if (read(log->fd, buf, 5) != 5) 9424e00368fSchristos break; 9434e00368fSchristos block = PULL2(buf + 1); 9444e00368fSchristos if (next + block > len || 9454e00368fSchristos read(log->fd, (char *)data + next, block) != block) 9464e00368fSchristos break; 9474e00368fSchristos next += block; 9484e00368fSchristos } 9494e00368fSchristos if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) 9504e00368fSchristos break; 9514e00368fSchristos log_touch(log); 9524e00368fSchristos 9534e00368fSchristos /* write the uncompressed data to the .add file */ 9544e00368fSchristos strcpy(log->end, ".add"); 9554e00368fSchristos fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); 9564e00368fSchristos if (fd < 0) 9574e00368fSchristos break; 958ed8eb4c2Schristos ret = (size_t)write(fd, data, len) != len; 9594e00368fSchristos if (ret | close(fd)) 9604e00368fSchristos break; 9614e00368fSchristos log_touch(log); 9624e00368fSchristos 9634e00368fSchristos /* write the dictionary for the next compress to the .temp file */ 9644e00368fSchristos strcpy(log->end, ".temp"); 9654e00368fSchristos fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); 9664e00368fSchristos if (fd < 0) 9674e00368fSchristos break; 9684e00368fSchristos next = DICT > len ? len : DICT; 969ed8eb4c2Schristos ret = (size_t)write(fd, (char *)data + len - next, next) != next; 9704e00368fSchristos if (ret | close(fd)) 9714e00368fSchristos break; 9724e00368fSchristos log_touch(log); 9734e00368fSchristos 9744e00368fSchristos /* roll back to compressed data, mark the compress in progress */ 9754e00368fSchristos log->last = log->first; 9764e00368fSchristos log->stored = 0; 9774e00368fSchristos if (log_mark(log, COMPRESS_OP)) 9784e00368fSchristos break; 9794e00368fSchristos BAIL(7); 9804e00368fSchristos 9814e00368fSchristos /* compress and append the data (clears mark) */ 9824e00368fSchristos ret = log_compress(log, data, len); 9834e00368fSchristos free(data); 9844e00368fSchristos return ret; 9854e00368fSchristos } while (0); 9864e00368fSchristos 9874e00368fSchristos /* broke out of do above on i/o error */ 9884e00368fSchristos free(data); 9894e00368fSchristos return -1; 9904e00368fSchristos } 9914e00368fSchristos 9924e00368fSchristos /* gzlog_write() return values: 9934e00368fSchristos 0: all good 9944e00368fSchristos -1: file i/o error (usually access issue) 9954e00368fSchristos -2: memory allocation failure 9964e00368fSchristos -3: invalid log pointer argument */ 9974e00368fSchristos int gzlog_write(gzlog *logd, void *data, size_t len) 9984e00368fSchristos { 9994e00368fSchristos int fd, ret; 10004e00368fSchristos struct log *log = logd; 10014e00368fSchristos 10024e00368fSchristos /* check arguments */ 1003ed8eb4c2Schristos if (log == NULL || strcmp(log->id, LOGID)) 10044e00368fSchristos return -3; 1005ed8eb4c2Schristos if (data == NULL || len <= 0) 10064e00368fSchristos return 0; 10074e00368fSchristos 10084e00368fSchristos /* see if we lost the lock -- if so get it again and reload the extra 10094e00368fSchristos field information (it probably changed), recover last operation if 10104e00368fSchristos necessary */ 10114e00368fSchristos if (log_check(log) && log_open(log)) 10124e00368fSchristos return -1; 10134e00368fSchristos 10144e00368fSchristos /* create and write .add file */ 10154e00368fSchristos strcpy(log->end, ".add"); 10164e00368fSchristos fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); 10174e00368fSchristos if (fd < 0) 10184e00368fSchristos return -1; 1019ed8eb4c2Schristos ret = (size_t)write(fd, data, len) != len; 10204e00368fSchristos if (ret | close(fd)) 10214e00368fSchristos return -1; 10224e00368fSchristos log_touch(log); 10234e00368fSchristos 10244e00368fSchristos /* mark log file with append in progress */ 10254e00368fSchristos if (log_mark(log, APPEND_OP)) 10264e00368fSchristos return -1; 10274e00368fSchristos BAIL(8); 10284e00368fSchristos 10294e00368fSchristos /* append data (clears mark) */ 10304e00368fSchristos if (log_append(log, data, len)) 10314e00368fSchristos return -1; 10324e00368fSchristos 10334e00368fSchristos /* check to see if it's time to compress -- if not, then done */ 10344e00368fSchristos if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) 10354e00368fSchristos return 0; 10364e00368fSchristos 10374e00368fSchristos /* time to compress */ 10384e00368fSchristos return gzlog_compress(log); 10394e00368fSchristos } 10404e00368fSchristos 10414e00368fSchristos /* gzlog_close() return values: 10424e00368fSchristos 0: ok 10434e00368fSchristos -3: invalid log pointer argument */ 10444e00368fSchristos int gzlog_close(gzlog *logd) 10454e00368fSchristos { 10464e00368fSchristos struct log *log = logd; 10474e00368fSchristos 10484e00368fSchristos /* check arguments */ 10494e00368fSchristos if (log == NULL || strcmp(log->id, LOGID)) 10504e00368fSchristos return -3; 10514e00368fSchristos 10524e00368fSchristos /* close the log file and release the lock */ 10534e00368fSchristos log_close(log); 10544e00368fSchristos 10554e00368fSchristos /* free structure and return */ 10564e00368fSchristos if (log->path != NULL) 10574e00368fSchristos free(log->path); 10584e00368fSchristos strcpy(log->id, "bad"); 10594e00368fSchristos free(log); 10604e00368fSchristos return 0; 10614e00368fSchristos } 1062