1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos * You may select, at your option, one of the above-listed licenses. 9*3117ece4Schristos */ 10*3117ece4Schristos 11*3117ece4Schristos #include "data.h" 12*3117ece4Schristos 13*3117ece4Schristos #include <assert.h> 14*3117ece4Schristos #include <errno.h> 15*3117ece4Schristos #include <stdio.h> 16*3117ece4Schristos #include <string.h> 17*3117ece4Schristos #include <stdlib.h> /* free() */ 18*3117ece4Schristos 19*3117ece4Schristos #include <sys/stat.h> 20*3117ece4Schristos 21*3117ece4Schristos #include <curl/curl.h> 22*3117ece4Schristos 23*3117ece4Schristos #include "mem.h" 24*3117ece4Schristos #include "util.h" 25*3117ece4Schristos #define XXH_STATIC_LINKING_ONLY 26*3117ece4Schristos #include "xxhash.h" 27*3117ece4Schristos 28*3117ece4Schristos /** 29*3117ece4Schristos * Data objects 30*3117ece4Schristos */ 31*3117ece4Schristos 32*3117ece4Schristos #define REGRESSION_RELEASE(x) \ 33*3117ece4Schristos "https://github.com/facebook/zstd/releases/download/regression-data/" x 34*3117ece4Schristos 35*3117ece4Schristos data_t silesia = { 36*3117ece4Schristos .name = "silesia", 37*3117ece4Schristos .type = data_type_dir, 38*3117ece4Schristos .data = 39*3117ece4Schristos { 40*3117ece4Schristos .url = REGRESSION_RELEASE("silesia.tar.zst"), 41*3117ece4Schristos .xxhash64 = 0x48a199f92f93e977LL, 42*3117ece4Schristos }, 43*3117ece4Schristos }; 44*3117ece4Schristos 45*3117ece4Schristos data_t silesia_tar = { 46*3117ece4Schristos .name = "silesia.tar", 47*3117ece4Schristos .type = data_type_file, 48*3117ece4Schristos .data = 49*3117ece4Schristos { 50*3117ece4Schristos .url = REGRESSION_RELEASE("silesia.tar.zst"), 51*3117ece4Schristos .xxhash64 = 0x48a199f92f93e977LL, 52*3117ece4Schristos }, 53*3117ece4Schristos }; 54*3117ece4Schristos 55*3117ece4Schristos data_t github = { 56*3117ece4Schristos .name = "github", 57*3117ece4Schristos .type = data_type_dir, 58*3117ece4Schristos .data = 59*3117ece4Schristos { 60*3117ece4Schristos .url = REGRESSION_RELEASE("github.tar.zst"), 61*3117ece4Schristos .xxhash64 = 0xa9b1b44b020df292LL, 62*3117ece4Schristos }, 63*3117ece4Schristos .dict = 64*3117ece4Schristos { 65*3117ece4Schristos .url = REGRESSION_RELEASE("github.dict.zst"), 66*3117ece4Schristos .xxhash64 = 0x1eddc6f737d3cb53LL, 67*3117ece4Schristos 68*3117ece4Schristos }, 69*3117ece4Schristos }; 70*3117ece4Schristos 71*3117ece4Schristos data_t github_tar = { 72*3117ece4Schristos .name = "github.tar", 73*3117ece4Schristos .type = data_type_file, 74*3117ece4Schristos .data = 75*3117ece4Schristos { 76*3117ece4Schristos .url = REGRESSION_RELEASE("github.tar.zst"), 77*3117ece4Schristos .xxhash64 = 0xa9b1b44b020df292LL, 78*3117ece4Schristos }, 79*3117ece4Schristos .dict = 80*3117ece4Schristos { 81*3117ece4Schristos .url = REGRESSION_RELEASE("github.dict.zst"), 82*3117ece4Schristos .xxhash64 = 0x1eddc6f737d3cb53LL, 83*3117ece4Schristos 84*3117ece4Schristos }, 85*3117ece4Schristos }; 86*3117ece4Schristos 87*3117ece4Schristos static data_t* g_data[] = { 88*3117ece4Schristos &silesia, 89*3117ece4Schristos &silesia_tar, 90*3117ece4Schristos &github, 91*3117ece4Schristos &github_tar, 92*3117ece4Schristos NULL, 93*3117ece4Schristos }; 94*3117ece4Schristos 95*3117ece4Schristos data_t const* const* data = (data_t const* const*)g_data; 96*3117ece4Schristos 97*3117ece4Schristos /** 98*3117ece4Schristos * data helpers. 99*3117ece4Schristos */ 100*3117ece4Schristos 101*3117ece4Schristos int data_has_dict(data_t const* data) { 102*3117ece4Schristos return data->dict.url != NULL; 103*3117ece4Schristos } 104*3117ece4Schristos 105*3117ece4Schristos /** 106*3117ece4Schristos * data buffer helper functions (documented in header). 107*3117ece4Schristos */ 108*3117ece4Schristos 109*3117ece4Schristos data_buffer_t data_buffer_create(size_t const capacity) { 110*3117ece4Schristos data_buffer_t buffer = {}; 111*3117ece4Schristos 112*3117ece4Schristos buffer.data = (uint8_t*)malloc(capacity); 113*3117ece4Schristos if (buffer.data == NULL) 114*3117ece4Schristos return buffer; 115*3117ece4Schristos buffer.capacity = capacity; 116*3117ece4Schristos return buffer; 117*3117ece4Schristos } 118*3117ece4Schristos 119*3117ece4Schristos data_buffer_t data_buffer_read(char const* filename) { 120*3117ece4Schristos data_buffer_t buffer = {}; 121*3117ece4Schristos 122*3117ece4Schristos uint64_t const size = UTIL_getFileSize(filename); 123*3117ece4Schristos if (size == UTIL_FILESIZE_UNKNOWN) { 124*3117ece4Schristos fprintf(stderr, "unknown size for %s\n", filename); 125*3117ece4Schristos return buffer; 126*3117ece4Schristos } 127*3117ece4Schristos 128*3117ece4Schristos buffer.data = (uint8_t*)malloc(size); 129*3117ece4Schristos if (buffer.data == NULL) { 130*3117ece4Schristos fprintf(stderr, "malloc failed\n"); 131*3117ece4Schristos return buffer; 132*3117ece4Schristos } 133*3117ece4Schristos buffer.capacity = size; 134*3117ece4Schristos 135*3117ece4Schristos FILE* file = fopen(filename, "rb"); 136*3117ece4Schristos if (file == NULL) { 137*3117ece4Schristos fprintf(stderr, "file null\n"); 138*3117ece4Schristos goto err; 139*3117ece4Schristos } 140*3117ece4Schristos buffer.size = fread(buffer.data, 1, buffer.capacity, file); 141*3117ece4Schristos fclose(file); 142*3117ece4Schristos if (buffer.size != buffer.capacity) { 143*3117ece4Schristos fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity); 144*3117ece4Schristos goto err; 145*3117ece4Schristos } 146*3117ece4Schristos 147*3117ece4Schristos return buffer; 148*3117ece4Schristos err: 149*3117ece4Schristos free(buffer.data); 150*3117ece4Schristos memset(&buffer, 0, sizeof(buffer)); 151*3117ece4Schristos return buffer; 152*3117ece4Schristos } 153*3117ece4Schristos 154*3117ece4Schristos data_buffer_t data_buffer_get_data(data_t const* data) { 155*3117ece4Schristos data_buffer_t const kEmptyBuffer = {}; 156*3117ece4Schristos 157*3117ece4Schristos if (data->type != data_type_file) 158*3117ece4Schristos return kEmptyBuffer; 159*3117ece4Schristos 160*3117ece4Schristos return data_buffer_read(data->data.path); 161*3117ece4Schristos } 162*3117ece4Schristos 163*3117ece4Schristos data_buffer_t data_buffer_get_dict(data_t const* data) { 164*3117ece4Schristos data_buffer_t const kEmptyBuffer = {}; 165*3117ece4Schristos 166*3117ece4Schristos if (!data_has_dict(data)) 167*3117ece4Schristos return kEmptyBuffer; 168*3117ece4Schristos 169*3117ece4Schristos return data_buffer_read(data->dict.path); 170*3117ece4Schristos } 171*3117ece4Schristos 172*3117ece4Schristos int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) { 173*3117ece4Schristos size_t const size = 174*3117ece4Schristos buffer1.size < buffer2.size ? buffer1.size : buffer2.size; 175*3117ece4Schristos int const cmp = memcmp(buffer1.data, buffer2.data, size); 176*3117ece4Schristos if (cmp != 0) 177*3117ece4Schristos return cmp; 178*3117ece4Schristos if (buffer1.size < buffer2.size) 179*3117ece4Schristos return -1; 180*3117ece4Schristos if (buffer1.size == buffer2.size) 181*3117ece4Schristos return 0; 182*3117ece4Schristos assert(buffer1.size > buffer2.size); 183*3117ece4Schristos return 1; 184*3117ece4Schristos } 185*3117ece4Schristos 186*3117ece4Schristos void data_buffer_free(data_buffer_t buffer) { 187*3117ece4Schristos free(buffer.data); 188*3117ece4Schristos } 189*3117ece4Schristos 190*3117ece4Schristos /** 191*3117ece4Schristos * data filenames helpers. 192*3117ece4Schristos */ 193*3117ece4Schristos 194*3117ece4Schristos FileNamesTable* data_filenames_get(data_t const* data) 195*3117ece4Schristos { 196*3117ece4Schristos char const* const path = data->data.path; 197*3117ece4Schristos return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ ); 198*3117ece4Schristos } 199*3117ece4Schristos 200*3117ece4Schristos /** 201*3117ece4Schristos * data buffers helpers. 202*3117ece4Schristos */ 203*3117ece4Schristos 204*3117ece4Schristos data_buffers_t data_buffers_get(data_t const* data) { 205*3117ece4Schristos data_buffers_t buffers = {.size = 0}; 206*3117ece4Schristos FileNamesTable* const filenames = data_filenames_get(data); 207*3117ece4Schristos if (filenames == NULL) return buffers; 208*3117ece4Schristos if (filenames->tableSize == 0) { 209*3117ece4Schristos UTIL_freeFileNamesTable(filenames); 210*3117ece4Schristos return buffers; 211*3117ece4Schristos } 212*3117ece4Schristos 213*3117ece4Schristos data_buffer_t* buffersPtr = 214*3117ece4Schristos (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr)); 215*3117ece4Schristos if (buffersPtr == NULL) { 216*3117ece4Schristos UTIL_freeFileNamesTable(filenames); 217*3117ece4Schristos return buffers; 218*3117ece4Schristos } 219*3117ece4Schristos buffers.buffers = (data_buffer_t const*)buffersPtr; 220*3117ece4Schristos buffers.size = filenames->tableSize; 221*3117ece4Schristos 222*3117ece4Schristos for (size_t i = 0; i < filenames->tableSize; ++i) { 223*3117ece4Schristos buffersPtr[i] = data_buffer_read(filenames->fileNames[i]); 224*3117ece4Schristos if (buffersPtr[i].data == NULL) { 225*3117ece4Schristos data_buffers_t const kEmptyBuffer = {}; 226*3117ece4Schristos data_buffers_free(buffers); 227*3117ece4Schristos UTIL_freeFileNamesTable(filenames); 228*3117ece4Schristos return kEmptyBuffer; 229*3117ece4Schristos } 230*3117ece4Schristos } 231*3117ece4Schristos 232*3117ece4Schristos UTIL_freeFileNamesTable(filenames); 233*3117ece4Schristos return buffers; 234*3117ece4Schristos } 235*3117ece4Schristos 236*3117ece4Schristos /** 237*3117ece4Schristos * Frees the data buffers. 238*3117ece4Schristos */ 239*3117ece4Schristos void data_buffers_free(data_buffers_t buffers) { 240*3117ece4Schristos free((data_buffer_t*)buffers.buffers); 241*3117ece4Schristos } 242*3117ece4Schristos 243*3117ece4Schristos /** 244*3117ece4Schristos * Initialization and download functions. 245*3117ece4Schristos */ 246*3117ece4Schristos 247*3117ece4Schristos static char* g_data_dir = NULL; 248*3117ece4Schristos 249*3117ece4Schristos /* mkdir -p */ 250*3117ece4Schristos static int ensure_directory_exists(char const* indir) { 251*3117ece4Schristos char* const dir = strdup(indir); 252*3117ece4Schristos char* end = dir; 253*3117ece4Schristos int ret = 0; 254*3117ece4Schristos if (dir == NULL) { 255*3117ece4Schristos ret = EINVAL; 256*3117ece4Schristos goto out; 257*3117ece4Schristos } 258*3117ece4Schristos do { 259*3117ece4Schristos /* Find the next directory level. */ 260*3117ece4Schristos for (++end; *end != '\0' && *end != '/'; ++end) 261*3117ece4Schristos ; 262*3117ece4Schristos /* End the string there, make the directory, and restore the string. */ 263*3117ece4Schristos char const save = *end; 264*3117ece4Schristos *end = '\0'; 265*3117ece4Schristos int const isdir = UTIL_isDirectory(dir); 266*3117ece4Schristos ret = mkdir(dir, S_IRWXU); 267*3117ece4Schristos *end = save; 268*3117ece4Schristos /* Its okay if the directory already exists. */ 269*3117ece4Schristos if (ret == 0 || (errno == EEXIST && isdir)) 270*3117ece4Schristos continue; 271*3117ece4Schristos ret = errno; 272*3117ece4Schristos fprintf(stderr, "mkdir() failed\n"); 273*3117ece4Schristos goto out; 274*3117ece4Schristos } while (*end != '\0'); 275*3117ece4Schristos 276*3117ece4Schristos ret = 0; 277*3117ece4Schristos out: 278*3117ece4Schristos free(dir); 279*3117ece4Schristos return ret; 280*3117ece4Schristos } 281*3117ece4Schristos 282*3117ece4Schristos /** Concatenate 3 strings into a new buffer. */ 283*3117ece4Schristos static char* cat3(char const* str1, char const* str2, char const* str3) { 284*3117ece4Schristos size_t const size1 = strlen(str1); 285*3117ece4Schristos size_t const size2 = strlen(str2); 286*3117ece4Schristos size_t const size3 = str3 == NULL ? 0 : strlen(str3); 287*3117ece4Schristos size_t const size = size1 + size2 + size3 + 1; 288*3117ece4Schristos char* const dst = (char*)malloc(size); 289*3117ece4Schristos if (dst == NULL) 290*3117ece4Schristos return NULL; 291*3117ece4Schristos strcpy(dst, str1); 292*3117ece4Schristos strcpy(dst + size1, str2); 293*3117ece4Schristos if (str3 != NULL) 294*3117ece4Schristos strcpy(dst + size1 + size2, str3); 295*3117ece4Schristos assert(strlen(dst) == size1 + size2 + size3); 296*3117ece4Schristos return dst; 297*3117ece4Schristos } 298*3117ece4Schristos 299*3117ece4Schristos static char* cat2(char const* str1, char const* str2) { 300*3117ece4Schristos return cat3(str1, str2, NULL); 301*3117ece4Schristos } 302*3117ece4Schristos 303*3117ece4Schristos /** 304*3117ece4Schristos * State needed by the curl callback. 305*3117ece4Schristos * It takes data from curl, hashes it, and writes it to the file. 306*3117ece4Schristos */ 307*3117ece4Schristos typedef struct { 308*3117ece4Schristos FILE* file; 309*3117ece4Schristos XXH64_state_t xxhash64; 310*3117ece4Schristos int error; 311*3117ece4Schristos } curl_data_t; 312*3117ece4Schristos 313*3117ece4Schristos /** Create the curl state. */ 314*3117ece4Schristos static curl_data_t curl_data_create( 315*3117ece4Schristos data_resource_t const* resource, 316*3117ece4Schristos data_type_t type) { 317*3117ece4Schristos curl_data_t cdata = {}; 318*3117ece4Schristos 319*3117ece4Schristos XXH64_reset(&cdata.xxhash64, 0); 320*3117ece4Schristos 321*3117ece4Schristos assert(UTIL_isDirectory(g_data_dir)); 322*3117ece4Schristos 323*3117ece4Schristos if (type == data_type_file) { 324*3117ece4Schristos /* Decompress the resource and store to the path. */ 325*3117ece4Schristos char* cmd = cat3("zstd -dqfo '", resource->path, "'"); 326*3117ece4Schristos if (cmd == NULL) { 327*3117ece4Schristos cdata.error = ENOMEM; 328*3117ece4Schristos return cdata; 329*3117ece4Schristos } 330*3117ece4Schristos cdata.file = popen(cmd, "w"); 331*3117ece4Schristos free(cmd); 332*3117ece4Schristos } else { 333*3117ece4Schristos /* Decompress and extract the resource to the cache directory. */ 334*3117ece4Schristos char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'"); 335*3117ece4Schristos if (cmd == NULL) { 336*3117ece4Schristos cdata.error = ENOMEM; 337*3117ece4Schristos return cdata; 338*3117ece4Schristos } 339*3117ece4Schristos cdata.file = popen(cmd, "w"); 340*3117ece4Schristos free(cmd); 341*3117ece4Schristos } 342*3117ece4Schristos if (cdata.file == NULL) { 343*3117ece4Schristos cdata.error = errno; 344*3117ece4Schristos } 345*3117ece4Schristos 346*3117ece4Schristos return cdata; 347*3117ece4Schristos } 348*3117ece4Schristos 349*3117ece4Schristos /** Free the curl state. */ 350*3117ece4Schristos static int curl_data_free(curl_data_t cdata) { 351*3117ece4Schristos return pclose(cdata.file); 352*3117ece4Schristos } 353*3117ece4Schristos 354*3117ece4Schristos /** curl callback. Updates the hash, and writes to the file. */ 355*3117ece4Schristos static size_t curl_write(void* data, size_t size, size_t count, void* ptr) { 356*3117ece4Schristos curl_data_t* cdata = (curl_data_t*)ptr; 357*3117ece4Schristos size_t const written = fwrite(data, size, count, cdata->file); 358*3117ece4Schristos XXH64_update(&cdata->xxhash64, data, written * size); 359*3117ece4Schristos return written; 360*3117ece4Schristos } 361*3117ece4Schristos 362*3117ece4Schristos static int curl_download_resource( 363*3117ece4Schristos CURL* curl, 364*3117ece4Schristos data_resource_t const* resource, 365*3117ece4Schristos data_type_t type) { 366*3117ece4Schristos curl_data_t cdata; 367*3117ece4Schristos /* Download the data. */ 368*3117ece4Schristos if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0) 369*3117ece4Schristos return EINVAL; 370*3117ece4Schristos if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0) 371*3117ece4Schristos return EINVAL; 372*3117ece4Schristos cdata = curl_data_create(resource, type); 373*3117ece4Schristos if (cdata.error != 0) 374*3117ece4Schristos return cdata.error; 375*3117ece4Schristos int const curl_err = curl_easy_perform(curl); 376*3117ece4Schristos int const close_err = curl_data_free(cdata); 377*3117ece4Schristos if (curl_err) { 378*3117ece4Schristos fprintf( 379*3117ece4Schristos stderr, 380*3117ece4Schristos "downloading '%s' for '%s' failed\n", 381*3117ece4Schristos resource->url, 382*3117ece4Schristos resource->path); 383*3117ece4Schristos return EIO; 384*3117ece4Schristos } 385*3117ece4Schristos if (close_err) { 386*3117ece4Schristos fprintf(stderr, "writing data to '%s' failed\n", resource->path); 387*3117ece4Schristos return EIO; 388*3117ece4Schristos } 389*3117ece4Schristos /* check that the file exists. */ 390*3117ece4Schristos if (type == data_type_file && !UTIL_isRegularFile(resource->path)) { 391*3117ece4Schristos fprintf(stderr, "output file '%s' does not exist\n", resource->path); 392*3117ece4Schristos return EIO; 393*3117ece4Schristos } 394*3117ece4Schristos if (type == data_type_dir && !UTIL_isDirectory(resource->path)) { 395*3117ece4Schristos fprintf( 396*3117ece4Schristos stderr, "output directory '%s' does not exist\n", resource->path); 397*3117ece4Schristos return EIO; 398*3117ece4Schristos } 399*3117ece4Schristos /* Check that the hash matches. */ 400*3117ece4Schristos if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) { 401*3117ece4Schristos fprintf( 402*3117ece4Schristos stderr, 403*3117ece4Schristos "checksum does not match: 0x%llxLL != 0x%llxLL\n", 404*3117ece4Schristos (unsigned long long)XXH64_digest(&cdata.xxhash64), 405*3117ece4Schristos (unsigned long long)resource->xxhash64); 406*3117ece4Schristos return EINVAL; 407*3117ece4Schristos } 408*3117ece4Schristos 409*3117ece4Schristos return 0; 410*3117ece4Schristos } 411*3117ece4Schristos 412*3117ece4Schristos /** Download a single data object. */ 413*3117ece4Schristos static int curl_download_datum(CURL* curl, data_t const* data) { 414*3117ece4Schristos int ret; 415*3117ece4Schristos ret = curl_download_resource(curl, &data->data, data->type); 416*3117ece4Schristos if (ret != 0) 417*3117ece4Schristos return ret; 418*3117ece4Schristos if (data_has_dict(data)) { 419*3117ece4Schristos ret = curl_download_resource(curl, &data->dict, data_type_file); 420*3117ece4Schristos if (ret != 0) 421*3117ece4Schristos return ret; 422*3117ece4Schristos } 423*3117ece4Schristos return ret; 424*3117ece4Schristos } 425*3117ece4Schristos 426*3117ece4Schristos /** Download all the data. */ 427*3117ece4Schristos static int curl_download_data(data_t const* const* data) { 428*3117ece4Schristos if (curl_global_init(CURL_GLOBAL_ALL) != 0) 429*3117ece4Schristos return EFAULT; 430*3117ece4Schristos 431*3117ece4Schristos curl_data_t cdata = {}; 432*3117ece4Schristos CURL* curl = curl_easy_init(); 433*3117ece4Schristos int err = EFAULT; 434*3117ece4Schristos 435*3117ece4Schristos if (curl == NULL) 436*3117ece4Schristos return EFAULT; 437*3117ece4Schristos 438*3117ece4Schristos if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0) 439*3117ece4Schristos goto out; 440*3117ece4Schristos if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0) 441*3117ece4Schristos goto out; 442*3117ece4Schristos if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0) 443*3117ece4Schristos goto out; 444*3117ece4Schristos 445*3117ece4Schristos assert(data != NULL); 446*3117ece4Schristos for (; *data != NULL; ++data) { 447*3117ece4Schristos if (curl_download_datum(curl, *data) != 0) 448*3117ece4Schristos goto out; 449*3117ece4Schristos } 450*3117ece4Schristos 451*3117ece4Schristos err = 0; 452*3117ece4Schristos out: 453*3117ece4Schristos curl_easy_cleanup(curl); 454*3117ece4Schristos curl_global_cleanup(); 455*3117ece4Schristos return err; 456*3117ece4Schristos } 457*3117ece4Schristos 458*3117ece4Schristos /** Fill the path member variable of the data objects. */ 459*3117ece4Schristos static int data_create_paths(data_t* const* data, char const* dir) { 460*3117ece4Schristos size_t const dirlen = strlen(dir); 461*3117ece4Schristos assert(data != NULL); 462*3117ece4Schristos for (; *data != NULL; ++data) { 463*3117ece4Schristos data_t* const datum = *data; 464*3117ece4Schristos datum->data.path = cat3(dir, "/", datum->name); 465*3117ece4Schristos if (datum->data.path == NULL) 466*3117ece4Schristos return ENOMEM; 467*3117ece4Schristos if (data_has_dict(datum)) { 468*3117ece4Schristos datum->dict.path = cat2(datum->data.path, ".dict"); 469*3117ece4Schristos if (datum->dict.path == NULL) 470*3117ece4Schristos return ENOMEM; 471*3117ece4Schristos } 472*3117ece4Schristos } 473*3117ece4Schristos return 0; 474*3117ece4Schristos } 475*3117ece4Schristos 476*3117ece4Schristos /** Free the path member variable of the data objects. */ 477*3117ece4Schristos static void data_free_paths(data_t* const* data) { 478*3117ece4Schristos assert(data != NULL); 479*3117ece4Schristos for (; *data != NULL; ++data) { 480*3117ece4Schristos data_t* datum = *data; 481*3117ece4Schristos free((void*)datum->data.path); 482*3117ece4Schristos free((void*)datum->dict.path); 483*3117ece4Schristos datum->data.path = NULL; 484*3117ece4Schristos datum->dict.path = NULL; 485*3117ece4Schristos } 486*3117ece4Schristos } 487*3117ece4Schristos 488*3117ece4Schristos static char const kStampName[] = "STAMP"; 489*3117ece4Schristos 490*3117ece4Schristos static void xxh_update_le(XXH64_state_t* state, uint64_t data) { 491*3117ece4Schristos if (!MEM_isLittleEndian()) 492*3117ece4Schristos data = MEM_swap64(data); 493*3117ece4Schristos XXH64_update(state, &data, sizeof(data)); 494*3117ece4Schristos } 495*3117ece4Schristos 496*3117ece4Schristos /** Hash the data to create the stamp. */ 497*3117ece4Schristos static uint64_t stamp_hash(data_t const* const* data) { 498*3117ece4Schristos XXH64_state_t state; 499*3117ece4Schristos 500*3117ece4Schristos XXH64_reset(&state, 0); 501*3117ece4Schristos assert(data != NULL); 502*3117ece4Schristos for (; *data != NULL; ++data) { 503*3117ece4Schristos data_t const* datum = *data; 504*3117ece4Schristos /* We don't care about the URL that we fetch from. */ 505*3117ece4Schristos /* The path is derived from the name. */ 506*3117ece4Schristos XXH64_update(&state, datum->name, strlen(datum->name)); 507*3117ece4Schristos xxh_update_le(&state, datum->data.xxhash64); 508*3117ece4Schristos xxh_update_le(&state, datum->dict.xxhash64); 509*3117ece4Schristos xxh_update_le(&state, datum->type); 510*3117ece4Schristos } 511*3117ece4Schristos return XXH64_digest(&state); 512*3117ece4Schristos } 513*3117ece4Schristos 514*3117ece4Schristos /** Check if the stamp matches the stamp in the cache directory. */ 515*3117ece4Schristos static int stamp_check(char const* dir, data_t const* const* data) { 516*3117ece4Schristos char* stamp = cat3(dir, "/", kStampName); 517*3117ece4Schristos uint64_t const expected = stamp_hash(data); 518*3117ece4Schristos XXH64_canonical_t actual; 519*3117ece4Schristos FILE* stampfile = NULL; 520*3117ece4Schristos int matches = 0; 521*3117ece4Schristos 522*3117ece4Schristos if (stamp == NULL) 523*3117ece4Schristos goto out; 524*3117ece4Schristos if (!UTIL_isRegularFile(stamp)) { 525*3117ece4Schristos fprintf(stderr, "stamp does not exist: recreating the data cache\n"); 526*3117ece4Schristos goto out; 527*3117ece4Schristos } 528*3117ece4Schristos 529*3117ece4Schristos stampfile = fopen(stamp, "rb"); 530*3117ece4Schristos if (stampfile == NULL) { 531*3117ece4Schristos fprintf(stderr, "could not open stamp: recreating the data cache\n"); 532*3117ece4Schristos goto out; 533*3117ece4Schristos } 534*3117ece4Schristos 535*3117ece4Schristos size_t b; 536*3117ece4Schristos if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) { 537*3117ece4Schristos fprintf(stderr, "invalid stamp: recreating the data cache\n"); 538*3117ece4Schristos goto out; 539*3117ece4Schristos } 540*3117ece4Schristos 541*3117ece4Schristos matches = (expected == XXH64_hashFromCanonical(&actual)); 542*3117ece4Schristos if (matches) 543*3117ece4Schristos fprintf(stderr, "stamp matches: reusing the cached data\n"); 544*3117ece4Schristos else 545*3117ece4Schristos fprintf(stderr, "stamp does not match: recreating the data cache\n"); 546*3117ece4Schristos 547*3117ece4Schristos out: 548*3117ece4Schristos free(stamp); 549*3117ece4Schristos if (stampfile != NULL) 550*3117ece4Schristos fclose(stampfile); 551*3117ece4Schristos return matches; 552*3117ece4Schristos } 553*3117ece4Schristos 554*3117ece4Schristos /** On success write a new stamp, on failure delete the old stamp. */ 555*3117ece4Schristos static int 556*3117ece4Schristos stamp_write(char const* dir, data_t const* const* data, int const data_err) { 557*3117ece4Schristos char* stamp = cat3(dir, "/", kStampName); 558*3117ece4Schristos FILE* stampfile = NULL; 559*3117ece4Schristos int err = EIO; 560*3117ece4Schristos 561*3117ece4Schristos if (stamp == NULL) 562*3117ece4Schristos return ENOMEM; 563*3117ece4Schristos 564*3117ece4Schristos if (data_err != 0) { 565*3117ece4Schristos err = data_err; 566*3117ece4Schristos goto out; 567*3117ece4Schristos } 568*3117ece4Schristos XXH64_canonical_t hash; 569*3117ece4Schristos 570*3117ece4Schristos XXH64_canonicalFromHash(&hash, stamp_hash(data)); 571*3117ece4Schristos 572*3117ece4Schristos stampfile = fopen(stamp, "wb"); 573*3117ece4Schristos if (stampfile == NULL) 574*3117ece4Schristos goto out; 575*3117ece4Schristos if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1) 576*3117ece4Schristos goto out; 577*3117ece4Schristos err = 0; 578*3117ece4Schristos fprintf(stderr, "stamped new data cache\n"); 579*3117ece4Schristos out: 580*3117ece4Schristos if (err != 0) 581*3117ece4Schristos /* Ignore errors. */ 582*3117ece4Schristos unlink(stamp); 583*3117ece4Schristos free(stamp); 584*3117ece4Schristos if (stampfile != NULL) 585*3117ece4Schristos fclose(stampfile); 586*3117ece4Schristos return err; 587*3117ece4Schristos } 588*3117ece4Schristos 589*3117ece4Schristos int data_init(char const* dir) { 590*3117ece4Schristos int err; 591*3117ece4Schristos 592*3117ece4Schristos if (dir == NULL) 593*3117ece4Schristos return EINVAL; 594*3117ece4Schristos 595*3117ece4Schristos /* This must be first to simplify logic. */ 596*3117ece4Schristos err = ensure_directory_exists(dir); 597*3117ece4Schristos if (err != 0) 598*3117ece4Schristos return err; 599*3117ece4Schristos 600*3117ece4Schristos /* Save the cache directory. */ 601*3117ece4Schristos g_data_dir = strdup(dir); 602*3117ece4Schristos if (g_data_dir == NULL) 603*3117ece4Schristos return ENOMEM; 604*3117ece4Schristos 605*3117ece4Schristos err = data_create_paths(g_data, dir); 606*3117ece4Schristos if (err != 0) 607*3117ece4Schristos return err; 608*3117ece4Schristos 609*3117ece4Schristos /* If the stamp matches then we are good to go. 610*3117ece4Schristos * This must be called before any modifications to the data cache. 611*3117ece4Schristos * After this point, we MUST call stamp_write() to update the STAMP, 612*3117ece4Schristos * since we've updated the data cache. 613*3117ece4Schristos */ 614*3117ece4Schristos if (stamp_check(dir, data)) 615*3117ece4Schristos return 0; 616*3117ece4Schristos 617*3117ece4Schristos err = curl_download_data(data); 618*3117ece4Schristos if (err != 0) 619*3117ece4Schristos goto out; 620*3117ece4Schristos 621*3117ece4Schristos out: 622*3117ece4Schristos /* This must be last, since it must know if data_init() succeeded. */ 623*3117ece4Schristos stamp_write(dir, data, err); 624*3117ece4Schristos return err; 625*3117ece4Schristos } 626*3117ece4Schristos 627*3117ece4Schristos void data_finish(void) { 628*3117ece4Schristos data_free_paths(g_data); 629*3117ece4Schristos free(g_data_dir); 630*3117ece4Schristos g_data_dir = NULL; 631*3117ece4Schristos } 632