xref: /netbsd-src/external/bsd/zstd/dist/tests/regression/data.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos  */
10*3117ece4Schristos 
11*3117ece4Schristos #include "data.h"
12*3117ece4Schristos 
13*3117ece4Schristos #include <assert.h>
14*3117ece4Schristos #include <errno.h>
15*3117ece4Schristos #include <stdio.h>
16*3117ece4Schristos #include <string.h>
17*3117ece4Schristos #include <stdlib.h>   /* free() */
18*3117ece4Schristos 
19*3117ece4Schristos #include <sys/stat.h>
20*3117ece4Schristos 
21*3117ece4Schristos #include <curl/curl.h>
22*3117ece4Schristos 
23*3117ece4Schristos #include "mem.h"
24*3117ece4Schristos #include "util.h"
25*3117ece4Schristos #define XXH_STATIC_LINKING_ONLY
26*3117ece4Schristos #include "xxhash.h"
27*3117ece4Schristos 
28*3117ece4Schristos /**
29*3117ece4Schristos  * Data objects
30*3117ece4Schristos  */
31*3117ece4Schristos 
32*3117ece4Schristos #define REGRESSION_RELEASE(x) \
33*3117ece4Schristos     "https://github.com/facebook/zstd/releases/download/regression-data/" x
34*3117ece4Schristos 
35*3117ece4Schristos data_t silesia = {
36*3117ece4Schristos     .name = "silesia",
37*3117ece4Schristos     .type = data_type_dir,
38*3117ece4Schristos     .data =
39*3117ece4Schristos         {
40*3117ece4Schristos             .url = REGRESSION_RELEASE("silesia.tar.zst"),
41*3117ece4Schristos             .xxhash64 = 0x48a199f92f93e977LL,
42*3117ece4Schristos         },
43*3117ece4Schristos };
44*3117ece4Schristos 
45*3117ece4Schristos data_t silesia_tar = {
46*3117ece4Schristos     .name = "silesia.tar",
47*3117ece4Schristos     .type = data_type_file,
48*3117ece4Schristos     .data =
49*3117ece4Schristos         {
50*3117ece4Schristos             .url = REGRESSION_RELEASE("silesia.tar.zst"),
51*3117ece4Schristos             .xxhash64 = 0x48a199f92f93e977LL,
52*3117ece4Schristos         },
53*3117ece4Schristos };
54*3117ece4Schristos 
55*3117ece4Schristos data_t github = {
56*3117ece4Schristos     .name = "github",
57*3117ece4Schristos     .type = data_type_dir,
58*3117ece4Schristos     .data =
59*3117ece4Schristos         {
60*3117ece4Schristos             .url = REGRESSION_RELEASE("github.tar.zst"),
61*3117ece4Schristos             .xxhash64 = 0xa9b1b44b020df292LL,
62*3117ece4Schristos         },
63*3117ece4Schristos     .dict =
64*3117ece4Schristos         {
65*3117ece4Schristos             .url = REGRESSION_RELEASE("github.dict.zst"),
66*3117ece4Schristos             .xxhash64 = 0x1eddc6f737d3cb53LL,
67*3117ece4Schristos 
68*3117ece4Schristos         },
69*3117ece4Schristos };
70*3117ece4Schristos 
71*3117ece4Schristos data_t github_tar = {
72*3117ece4Schristos     .name = "github.tar",
73*3117ece4Schristos     .type = data_type_file,
74*3117ece4Schristos     .data =
75*3117ece4Schristos         {
76*3117ece4Schristos             .url = REGRESSION_RELEASE("github.tar.zst"),
77*3117ece4Schristos             .xxhash64 = 0xa9b1b44b020df292LL,
78*3117ece4Schristos         },
79*3117ece4Schristos     .dict =
80*3117ece4Schristos         {
81*3117ece4Schristos             .url = REGRESSION_RELEASE("github.dict.zst"),
82*3117ece4Schristos             .xxhash64 = 0x1eddc6f737d3cb53LL,
83*3117ece4Schristos 
84*3117ece4Schristos         },
85*3117ece4Schristos };
86*3117ece4Schristos 
87*3117ece4Schristos static data_t* g_data[] = {
88*3117ece4Schristos     &silesia,
89*3117ece4Schristos     &silesia_tar,
90*3117ece4Schristos     &github,
91*3117ece4Schristos     &github_tar,
92*3117ece4Schristos     NULL,
93*3117ece4Schristos };
94*3117ece4Schristos 
95*3117ece4Schristos data_t const* const* data = (data_t const* const*)g_data;
96*3117ece4Schristos 
97*3117ece4Schristos /**
98*3117ece4Schristos  * data helpers.
99*3117ece4Schristos  */
100*3117ece4Schristos 
101*3117ece4Schristos int data_has_dict(data_t const* data) {
102*3117ece4Schristos     return data->dict.url != NULL;
103*3117ece4Schristos }
104*3117ece4Schristos 
105*3117ece4Schristos /**
106*3117ece4Schristos  * data buffer helper functions (documented in header).
107*3117ece4Schristos  */
108*3117ece4Schristos 
109*3117ece4Schristos data_buffer_t data_buffer_create(size_t const capacity) {
110*3117ece4Schristos     data_buffer_t buffer = {};
111*3117ece4Schristos 
112*3117ece4Schristos     buffer.data = (uint8_t*)malloc(capacity);
113*3117ece4Schristos     if (buffer.data == NULL)
114*3117ece4Schristos         return buffer;
115*3117ece4Schristos     buffer.capacity = capacity;
116*3117ece4Schristos     return buffer;
117*3117ece4Schristos }
118*3117ece4Schristos 
119*3117ece4Schristos data_buffer_t data_buffer_read(char const* filename) {
120*3117ece4Schristos     data_buffer_t buffer = {};
121*3117ece4Schristos 
122*3117ece4Schristos     uint64_t const size = UTIL_getFileSize(filename);
123*3117ece4Schristos     if (size == UTIL_FILESIZE_UNKNOWN) {
124*3117ece4Schristos         fprintf(stderr, "unknown size for %s\n", filename);
125*3117ece4Schristos         return buffer;
126*3117ece4Schristos     }
127*3117ece4Schristos 
128*3117ece4Schristos     buffer.data = (uint8_t*)malloc(size);
129*3117ece4Schristos     if (buffer.data == NULL) {
130*3117ece4Schristos         fprintf(stderr, "malloc failed\n");
131*3117ece4Schristos         return buffer;
132*3117ece4Schristos     }
133*3117ece4Schristos     buffer.capacity = size;
134*3117ece4Schristos 
135*3117ece4Schristos     FILE* file = fopen(filename, "rb");
136*3117ece4Schristos     if (file == NULL) {
137*3117ece4Schristos         fprintf(stderr, "file null\n");
138*3117ece4Schristos         goto err;
139*3117ece4Schristos     }
140*3117ece4Schristos     buffer.size = fread(buffer.data, 1, buffer.capacity, file);
141*3117ece4Schristos     fclose(file);
142*3117ece4Schristos     if (buffer.size != buffer.capacity) {
143*3117ece4Schristos         fprintf(stderr, "read %zu != %zu\n", buffer.size, buffer.capacity);
144*3117ece4Schristos         goto err;
145*3117ece4Schristos     }
146*3117ece4Schristos 
147*3117ece4Schristos     return buffer;
148*3117ece4Schristos err:
149*3117ece4Schristos     free(buffer.data);
150*3117ece4Schristos     memset(&buffer, 0, sizeof(buffer));
151*3117ece4Schristos     return buffer;
152*3117ece4Schristos }
153*3117ece4Schristos 
154*3117ece4Schristos data_buffer_t data_buffer_get_data(data_t const* data) {
155*3117ece4Schristos     data_buffer_t const kEmptyBuffer = {};
156*3117ece4Schristos 
157*3117ece4Schristos     if (data->type != data_type_file)
158*3117ece4Schristos         return kEmptyBuffer;
159*3117ece4Schristos 
160*3117ece4Schristos     return data_buffer_read(data->data.path);
161*3117ece4Schristos }
162*3117ece4Schristos 
163*3117ece4Schristos data_buffer_t data_buffer_get_dict(data_t const* data) {
164*3117ece4Schristos     data_buffer_t const kEmptyBuffer = {};
165*3117ece4Schristos 
166*3117ece4Schristos     if (!data_has_dict(data))
167*3117ece4Schristos         return kEmptyBuffer;
168*3117ece4Schristos 
169*3117ece4Schristos     return data_buffer_read(data->dict.path);
170*3117ece4Schristos }
171*3117ece4Schristos 
172*3117ece4Schristos int data_buffer_compare(data_buffer_t buffer1, data_buffer_t buffer2) {
173*3117ece4Schristos     size_t const size =
174*3117ece4Schristos         buffer1.size < buffer2.size ? buffer1.size : buffer2.size;
175*3117ece4Schristos     int const cmp = memcmp(buffer1.data, buffer2.data, size);
176*3117ece4Schristos     if (cmp != 0)
177*3117ece4Schristos         return cmp;
178*3117ece4Schristos     if (buffer1.size < buffer2.size)
179*3117ece4Schristos         return -1;
180*3117ece4Schristos     if (buffer1.size == buffer2.size)
181*3117ece4Schristos         return 0;
182*3117ece4Schristos     assert(buffer1.size > buffer2.size);
183*3117ece4Schristos     return 1;
184*3117ece4Schristos }
185*3117ece4Schristos 
186*3117ece4Schristos void data_buffer_free(data_buffer_t buffer) {
187*3117ece4Schristos     free(buffer.data);
188*3117ece4Schristos }
189*3117ece4Schristos 
190*3117ece4Schristos /**
191*3117ece4Schristos  * data filenames helpers.
192*3117ece4Schristos  */
193*3117ece4Schristos 
194*3117ece4Schristos FileNamesTable* data_filenames_get(data_t const* data)
195*3117ece4Schristos {
196*3117ece4Schristos     char const* const path = data->data.path;
197*3117ece4Schristos     return UTIL_createExpandedFNT(&path, 1, 0 /* followLinks */ );
198*3117ece4Schristos }
199*3117ece4Schristos 
200*3117ece4Schristos /**
201*3117ece4Schristos  * data buffers helpers.
202*3117ece4Schristos  */
203*3117ece4Schristos 
204*3117ece4Schristos data_buffers_t data_buffers_get(data_t const* data) {
205*3117ece4Schristos     data_buffers_t buffers = {.size = 0};
206*3117ece4Schristos     FileNamesTable* const filenames = data_filenames_get(data);
207*3117ece4Schristos     if (filenames == NULL) return buffers;
208*3117ece4Schristos     if (filenames->tableSize == 0) {
209*3117ece4Schristos         UTIL_freeFileNamesTable(filenames);
210*3117ece4Schristos         return buffers;
211*3117ece4Schristos     }
212*3117ece4Schristos 
213*3117ece4Schristos     data_buffer_t* buffersPtr =
214*3117ece4Schristos         (data_buffer_t*)malloc(filenames->tableSize * sizeof(*buffersPtr));
215*3117ece4Schristos     if (buffersPtr == NULL) {
216*3117ece4Schristos         UTIL_freeFileNamesTable(filenames);
217*3117ece4Schristos         return buffers;
218*3117ece4Schristos     }
219*3117ece4Schristos     buffers.buffers = (data_buffer_t const*)buffersPtr;
220*3117ece4Schristos     buffers.size = filenames->tableSize;
221*3117ece4Schristos 
222*3117ece4Schristos     for (size_t i = 0; i < filenames->tableSize; ++i) {
223*3117ece4Schristos         buffersPtr[i] = data_buffer_read(filenames->fileNames[i]);
224*3117ece4Schristos         if (buffersPtr[i].data == NULL) {
225*3117ece4Schristos             data_buffers_t const kEmptyBuffer = {};
226*3117ece4Schristos             data_buffers_free(buffers);
227*3117ece4Schristos             UTIL_freeFileNamesTable(filenames);
228*3117ece4Schristos             return kEmptyBuffer;
229*3117ece4Schristos         }
230*3117ece4Schristos     }
231*3117ece4Schristos 
232*3117ece4Schristos     UTIL_freeFileNamesTable(filenames);
233*3117ece4Schristos     return buffers;
234*3117ece4Schristos }
235*3117ece4Schristos 
236*3117ece4Schristos /**
237*3117ece4Schristos  * Frees the data buffers.
238*3117ece4Schristos  */
239*3117ece4Schristos void data_buffers_free(data_buffers_t buffers) {
240*3117ece4Schristos     free((data_buffer_t*)buffers.buffers);
241*3117ece4Schristos }
242*3117ece4Schristos 
243*3117ece4Schristos /**
244*3117ece4Schristos  * Initialization and download functions.
245*3117ece4Schristos  */
246*3117ece4Schristos 
247*3117ece4Schristos static char* g_data_dir = NULL;
248*3117ece4Schristos 
249*3117ece4Schristos /* mkdir -p */
250*3117ece4Schristos static int ensure_directory_exists(char const* indir) {
251*3117ece4Schristos     char* const dir = strdup(indir);
252*3117ece4Schristos     char* end = dir;
253*3117ece4Schristos     int ret = 0;
254*3117ece4Schristos     if (dir == NULL) {
255*3117ece4Schristos         ret = EINVAL;
256*3117ece4Schristos         goto out;
257*3117ece4Schristos     }
258*3117ece4Schristos     do {
259*3117ece4Schristos         /* Find the next directory level. */
260*3117ece4Schristos         for (++end; *end != '\0' && *end != '/'; ++end)
261*3117ece4Schristos             ;
262*3117ece4Schristos         /* End the string there, make the directory, and restore the string. */
263*3117ece4Schristos         char const save = *end;
264*3117ece4Schristos         *end = '\0';
265*3117ece4Schristos         int const isdir = UTIL_isDirectory(dir);
266*3117ece4Schristos         ret = mkdir(dir, S_IRWXU);
267*3117ece4Schristos         *end = save;
268*3117ece4Schristos         /* Its okay if the directory already exists. */
269*3117ece4Schristos         if (ret == 0 || (errno == EEXIST && isdir))
270*3117ece4Schristos             continue;
271*3117ece4Schristos         ret = errno;
272*3117ece4Schristos         fprintf(stderr, "mkdir() failed\n");
273*3117ece4Schristos         goto out;
274*3117ece4Schristos     } while (*end != '\0');
275*3117ece4Schristos 
276*3117ece4Schristos     ret = 0;
277*3117ece4Schristos out:
278*3117ece4Schristos     free(dir);
279*3117ece4Schristos     return ret;
280*3117ece4Schristos }
281*3117ece4Schristos 
282*3117ece4Schristos /** Concatenate 3 strings into a new buffer. */
283*3117ece4Schristos static char* cat3(char const* str1, char const* str2, char const* str3) {
284*3117ece4Schristos     size_t const size1 = strlen(str1);
285*3117ece4Schristos     size_t const size2 = strlen(str2);
286*3117ece4Schristos     size_t const size3 = str3 == NULL ? 0 : strlen(str3);
287*3117ece4Schristos     size_t const size = size1 + size2 + size3 + 1;
288*3117ece4Schristos     char* const dst = (char*)malloc(size);
289*3117ece4Schristos     if (dst == NULL)
290*3117ece4Schristos         return NULL;
291*3117ece4Schristos     strcpy(dst, str1);
292*3117ece4Schristos     strcpy(dst + size1, str2);
293*3117ece4Schristos     if (str3 != NULL)
294*3117ece4Schristos         strcpy(dst + size1 + size2, str3);
295*3117ece4Schristos     assert(strlen(dst) == size1 + size2 + size3);
296*3117ece4Schristos     return dst;
297*3117ece4Schristos }
298*3117ece4Schristos 
299*3117ece4Schristos static char* cat2(char const* str1, char const* str2) {
300*3117ece4Schristos     return cat3(str1, str2, NULL);
301*3117ece4Schristos }
302*3117ece4Schristos 
303*3117ece4Schristos /**
304*3117ece4Schristos  * State needed by the curl callback.
305*3117ece4Schristos  * It takes data from curl, hashes it, and writes it to the file.
306*3117ece4Schristos  */
307*3117ece4Schristos typedef struct {
308*3117ece4Schristos     FILE* file;
309*3117ece4Schristos     XXH64_state_t xxhash64;
310*3117ece4Schristos     int error;
311*3117ece4Schristos } curl_data_t;
312*3117ece4Schristos 
313*3117ece4Schristos /** Create the curl state. */
314*3117ece4Schristos static curl_data_t curl_data_create(
315*3117ece4Schristos     data_resource_t const* resource,
316*3117ece4Schristos     data_type_t type) {
317*3117ece4Schristos     curl_data_t cdata = {};
318*3117ece4Schristos 
319*3117ece4Schristos     XXH64_reset(&cdata.xxhash64, 0);
320*3117ece4Schristos 
321*3117ece4Schristos     assert(UTIL_isDirectory(g_data_dir));
322*3117ece4Schristos 
323*3117ece4Schristos     if (type == data_type_file) {
324*3117ece4Schristos         /* Decompress the resource and store to the path. */
325*3117ece4Schristos         char* cmd = cat3("zstd -dqfo '", resource->path, "'");
326*3117ece4Schristos         if (cmd == NULL) {
327*3117ece4Schristos             cdata.error = ENOMEM;
328*3117ece4Schristos             return cdata;
329*3117ece4Schristos         }
330*3117ece4Schristos         cdata.file = popen(cmd, "w");
331*3117ece4Schristos         free(cmd);
332*3117ece4Schristos     } else {
333*3117ece4Schristos         /* Decompress and extract the resource to the cache directory. */
334*3117ece4Schristos         char* cmd = cat3("zstd -dc | tar -x -C '", g_data_dir, "'");
335*3117ece4Schristos         if (cmd == NULL) {
336*3117ece4Schristos             cdata.error = ENOMEM;
337*3117ece4Schristos             return cdata;
338*3117ece4Schristos         }
339*3117ece4Schristos         cdata.file = popen(cmd, "w");
340*3117ece4Schristos         free(cmd);
341*3117ece4Schristos     }
342*3117ece4Schristos     if (cdata.file == NULL) {
343*3117ece4Schristos         cdata.error = errno;
344*3117ece4Schristos     }
345*3117ece4Schristos 
346*3117ece4Schristos     return cdata;
347*3117ece4Schristos }
348*3117ece4Schristos 
349*3117ece4Schristos /** Free the curl state. */
350*3117ece4Schristos static int curl_data_free(curl_data_t cdata) {
351*3117ece4Schristos     return pclose(cdata.file);
352*3117ece4Schristos }
353*3117ece4Schristos 
354*3117ece4Schristos /** curl callback. Updates the hash, and writes to the file. */
355*3117ece4Schristos static size_t curl_write(void* data, size_t size, size_t count, void* ptr) {
356*3117ece4Schristos     curl_data_t* cdata = (curl_data_t*)ptr;
357*3117ece4Schristos     size_t const written = fwrite(data, size, count, cdata->file);
358*3117ece4Schristos     XXH64_update(&cdata->xxhash64, data, written * size);
359*3117ece4Schristos     return written;
360*3117ece4Schristos }
361*3117ece4Schristos 
362*3117ece4Schristos static int curl_download_resource(
363*3117ece4Schristos     CURL* curl,
364*3117ece4Schristos     data_resource_t const* resource,
365*3117ece4Schristos     data_type_t type) {
366*3117ece4Schristos     curl_data_t cdata;
367*3117ece4Schristos     /* Download the data. */
368*3117ece4Schristos     if (curl_easy_setopt(curl, CURLOPT_URL, resource->url) != 0)
369*3117ece4Schristos         return EINVAL;
370*3117ece4Schristos     if (curl_easy_setopt(curl, CURLOPT_WRITEDATA, &cdata) != 0)
371*3117ece4Schristos         return EINVAL;
372*3117ece4Schristos     cdata = curl_data_create(resource, type);
373*3117ece4Schristos     if (cdata.error != 0)
374*3117ece4Schristos         return cdata.error;
375*3117ece4Schristos     int const curl_err = curl_easy_perform(curl);
376*3117ece4Schristos     int const close_err = curl_data_free(cdata);
377*3117ece4Schristos     if (curl_err) {
378*3117ece4Schristos         fprintf(
379*3117ece4Schristos             stderr,
380*3117ece4Schristos             "downloading '%s' for '%s' failed\n",
381*3117ece4Schristos             resource->url,
382*3117ece4Schristos             resource->path);
383*3117ece4Schristos         return EIO;
384*3117ece4Schristos     }
385*3117ece4Schristos     if (close_err) {
386*3117ece4Schristos         fprintf(stderr, "writing data to '%s' failed\n", resource->path);
387*3117ece4Schristos         return EIO;
388*3117ece4Schristos     }
389*3117ece4Schristos     /* check that the file exists. */
390*3117ece4Schristos     if (type == data_type_file && !UTIL_isRegularFile(resource->path)) {
391*3117ece4Schristos         fprintf(stderr, "output file '%s' does not exist\n", resource->path);
392*3117ece4Schristos         return EIO;
393*3117ece4Schristos     }
394*3117ece4Schristos     if (type == data_type_dir && !UTIL_isDirectory(resource->path)) {
395*3117ece4Schristos         fprintf(
396*3117ece4Schristos             stderr, "output directory '%s' does not exist\n", resource->path);
397*3117ece4Schristos         return EIO;
398*3117ece4Schristos     }
399*3117ece4Schristos     /* Check that the hash matches. */
400*3117ece4Schristos     if (XXH64_digest(&cdata.xxhash64) != resource->xxhash64) {
401*3117ece4Schristos         fprintf(
402*3117ece4Schristos             stderr,
403*3117ece4Schristos             "checksum does not match: 0x%llxLL != 0x%llxLL\n",
404*3117ece4Schristos             (unsigned long long)XXH64_digest(&cdata.xxhash64),
405*3117ece4Schristos             (unsigned long long)resource->xxhash64);
406*3117ece4Schristos         return EINVAL;
407*3117ece4Schristos     }
408*3117ece4Schristos 
409*3117ece4Schristos     return 0;
410*3117ece4Schristos }
411*3117ece4Schristos 
412*3117ece4Schristos /** Download a single data object. */
413*3117ece4Schristos static int curl_download_datum(CURL* curl, data_t const* data) {
414*3117ece4Schristos     int ret;
415*3117ece4Schristos     ret = curl_download_resource(curl, &data->data, data->type);
416*3117ece4Schristos     if (ret != 0)
417*3117ece4Schristos         return ret;
418*3117ece4Schristos     if (data_has_dict(data)) {
419*3117ece4Schristos         ret = curl_download_resource(curl, &data->dict, data_type_file);
420*3117ece4Schristos         if (ret != 0)
421*3117ece4Schristos             return ret;
422*3117ece4Schristos     }
423*3117ece4Schristos     return ret;
424*3117ece4Schristos }
425*3117ece4Schristos 
426*3117ece4Schristos /** Download all the data. */
427*3117ece4Schristos static int curl_download_data(data_t const* const* data) {
428*3117ece4Schristos     if (curl_global_init(CURL_GLOBAL_ALL) != 0)
429*3117ece4Schristos         return EFAULT;
430*3117ece4Schristos 
431*3117ece4Schristos     curl_data_t cdata = {};
432*3117ece4Schristos     CURL* curl = curl_easy_init();
433*3117ece4Schristos     int err = EFAULT;
434*3117ece4Schristos 
435*3117ece4Schristos     if (curl == NULL)
436*3117ece4Schristos         return EFAULT;
437*3117ece4Schristos 
438*3117ece4Schristos     if (curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L) != 0)
439*3117ece4Schristos         goto out;
440*3117ece4Schristos     if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L) != 0)
441*3117ece4Schristos         goto out;
442*3117ece4Schristos     if (curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curl_write) != 0)
443*3117ece4Schristos         goto out;
444*3117ece4Schristos 
445*3117ece4Schristos     assert(data != NULL);
446*3117ece4Schristos     for (; *data != NULL; ++data) {
447*3117ece4Schristos         if (curl_download_datum(curl, *data) != 0)
448*3117ece4Schristos             goto out;
449*3117ece4Schristos     }
450*3117ece4Schristos 
451*3117ece4Schristos     err = 0;
452*3117ece4Schristos out:
453*3117ece4Schristos     curl_easy_cleanup(curl);
454*3117ece4Schristos     curl_global_cleanup();
455*3117ece4Schristos     return err;
456*3117ece4Schristos }
457*3117ece4Schristos 
458*3117ece4Schristos /** Fill the path member variable of the data objects. */
459*3117ece4Schristos static int data_create_paths(data_t* const* data, char const* dir) {
460*3117ece4Schristos     size_t const dirlen = strlen(dir);
461*3117ece4Schristos     assert(data != NULL);
462*3117ece4Schristos     for (; *data != NULL; ++data) {
463*3117ece4Schristos         data_t* const datum = *data;
464*3117ece4Schristos         datum->data.path = cat3(dir, "/", datum->name);
465*3117ece4Schristos         if (datum->data.path == NULL)
466*3117ece4Schristos             return ENOMEM;
467*3117ece4Schristos         if (data_has_dict(datum)) {
468*3117ece4Schristos             datum->dict.path = cat2(datum->data.path, ".dict");
469*3117ece4Schristos             if (datum->dict.path == NULL)
470*3117ece4Schristos                 return ENOMEM;
471*3117ece4Schristos         }
472*3117ece4Schristos     }
473*3117ece4Schristos     return 0;
474*3117ece4Schristos }
475*3117ece4Schristos 
476*3117ece4Schristos /** Free the path member variable of the data objects. */
477*3117ece4Schristos static void data_free_paths(data_t* const* data) {
478*3117ece4Schristos     assert(data != NULL);
479*3117ece4Schristos     for (; *data != NULL; ++data) {
480*3117ece4Schristos         data_t* datum = *data;
481*3117ece4Schristos         free((void*)datum->data.path);
482*3117ece4Schristos         free((void*)datum->dict.path);
483*3117ece4Schristos         datum->data.path = NULL;
484*3117ece4Schristos         datum->dict.path = NULL;
485*3117ece4Schristos     }
486*3117ece4Schristos }
487*3117ece4Schristos 
488*3117ece4Schristos static char const kStampName[] = "STAMP";
489*3117ece4Schristos 
490*3117ece4Schristos static void xxh_update_le(XXH64_state_t* state, uint64_t data) {
491*3117ece4Schristos     if (!MEM_isLittleEndian())
492*3117ece4Schristos         data = MEM_swap64(data);
493*3117ece4Schristos     XXH64_update(state, &data, sizeof(data));
494*3117ece4Schristos }
495*3117ece4Schristos 
496*3117ece4Schristos /** Hash the data to create the stamp. */
497*3117ece4Schristos static uint64_t stamp_hash(data_t const* const* data) {
498*3117ece4Schristos     XXH64_state_t state;
499*3117ece4Schristos 
500*3117ece4Schristos     XXH64_reset(&state, 0);
501*3117ece4Schristos     assert(data != NULL);
502*3117ece4Schristos     for (; *data != NULL; ++data) {
503*3117ece4Schristos         data_t const* datum = *data;
504*3117ece4Schristos         /* We don't care about the URL that we fetch from. */
505*3117ece4Schristos         /* The path is derived from the name. */
506*3117ece4Schristos         XXH64_update(&state, datum->name, strlen(datum->name));
507*3117ece4Schristos         xxh_update_le(&state, datum->data.xxhash64);
508*3117ece4Schristos         xxh_update_le(&state, datum->dict.xxhash64);
509*3117ece4Schristos         xxh_update_le(&state, datum->type);
510*3117ece4Schristos     }
511*3117ece4Schristos     return XXH64_digest(&state);
512*3117ece4Schristos }
513*3117ece4Schristos 
514*3117ece4Schristos /** Check if the stamp matches the stamp in the cache directory. */
515*3117ece4Schristos static int stamp_check(char const* dir, data_t const* const* data) {
516*3117ece4Schristos     char* stamp = cat3(dir, "/", kStampName);
517*3117ece4Schristos     uint64_t const expected = stamp_hash(data);
518*3117ece4Schristos     XXH64_canonical_t actual;
519*3117ece4Schristos     FILE* stampfile = NULL;
520*3117ece4Schristos     int matches = 0;
521*3117ece4Schristos 
522*3117ece4Schristos     if (stamp == NULL)
523*3117ece4Schristos         goto out;
524*3117ece4Schristos     if (!UTIL_isRegularFile(stamp)) {
525*3117ece4Schristos         fprintf(stderr, "stamp does not exist: recreating the data cache\n");
526*3117ece4Schristos         goto out;
527*3117ece4Schristos     }
528*3117ece4Schristos 
529*3117ece4Schristos     stampfile = fopen(stamp, "rb");
530*3117ece4Schristos     if (stampfile == NULL) {
531*3117ece4Schristos         fprintf(stderr, "could not open stamp: recreating the data cache\n");
532*3117ece4Schristos         goto out;
533*3117ece4Schristos     }
534*3117ece4Schristos 
535*3117ece4Schristos     size_t b;
536*3117ece4Schristos     if ((b = fread(&actual, sizeof(actual), 1, stampfile)) != 1) {
537*3117ece4Schristos         fprintf(stderr, "invalid stamp: recreating the data cache\n");
538*3117ece4Schristos         goto out;
539*3117ece4Schristos     }
540*3117ece4Schristos 
541*3117ece4Schristos     matches = (expected == XXH64_hashFromCanonical(&actual));
542*3117ece4Schristos     if (matches)
543*3117ece4Schristos         fprintf(stderr, "stamp matches: reusing the cached data\n");
544*3117ece4Schristos     else
545*3117ece4Schristos         fprintf(stderr, "stamp does not match: recreating the data cache\n");
546*3117ece4Schristos 
547*3117ece4Schristos out:
548*3117ece4Schristos     free(stamp);
549*3117ece4Schristos     if (stampfile != NULL)
550*3117ece4Schristos         fclose(stampfile);
551*3117ece4Schristos     return matches;
552*3117ece4Schristos }
553*3117ece4Schristos 
554*3117ece4Schristos /** On success write a new stamp, on failure delete the old stamp. */
555*3117ece4Schristos static int
556*3117ece4Schristos stamp_write(char const* dir, data_t const* const* data, int const data_err) {
557*3117ece4Schristos     char* stamp = cat3(dir, "/", kStampName);
558*3117ece4Schristos     FILE* stampfile = NULL;
559*3117ece4Schristos     int err = EIO;
560*3117ece4Schristos 
561*3117ece4Schristos     if (stamp == NULL)
562*3117ece4Schristos         return ENOMEM;
563*3117ece4Schristos 
564*3117ece4Schristos     if (data_err != 0) {
565*3117ece4Schristos         err = data_err;
566*3117ece4Schristos         goto out;
567*3117ece4Schristos     }
568*3117ece4Schristos     XXH64_canonical_t hash;
569*3117ece4Schristos 
570*3117ece4Schristos     XXH64_canonicalFromHash(&hash, stamp_hash(data));
571*3117ece4Schristos 
572*3117ece4Schristos     stampfile = fopen(stamp, "wb");
573*3117ece4Schristos     if (stampfile == NULL)
574*3117ece4Schristos         goto out;
575*3117ece4Schristos     if (fwrite(&hash, sizeof(hash), 1, stampfile) != 1)
576*3117ece4Schristos         goto out;
577*3117ece4Schristos     err = 0;
578*3117ece4Schristos     fprintf(stderr, "stamped new data cache\n");
579*3117ece4Schristos out:
580*3117ece4Schristos     if (err != 0)
581*3117ece4Schristos         /* Ignore errors. */
582*3117ece4Schristos         unlink(stamp);
583*3117ece4Schristos     free(stamp);
584*3117ece4Schristos     if (stampfile != NULL)
585*3117ece4Schristos         fclose(stampfile);
586*3117ece4Schristos     return err;
587*3117ece4Schristos }
588*3117ece4Schristos 
589*3117ece4Schristos int data_init(char const* dir) {
590*3117ece4Schristos     int err;
591*3117ece4Schristos 
592*3117ece4Schristos     if (dir == NULL)
593*3117ece4Schristos         return EINVAL;
594*3117ece4Schristos 
595*3117ece4Schristos     /* This must be first to simplify logic. */
596*3117ece4Schristos     err = ensure_directory_exists(dir);
597*3117ece4Schristos     if (err != 0)
598*3117ece4Schristos         return err;
599*3117ece4Schristos 
600*3117ece4Schristos     /* Save the cache directory. */
601*3117ece4Schristos     g_data_dir = strdup(dir);
602*3117ece4Schristos     if (g_data_dir == NULL)
603*3117ece4Schristos         return ENOMEM;
604*3117ece4Schristos 
605*3117ece4Schristos     err = data_create_paths(g_data, dir);
606*3117ece4Schristos     if (err != 0)
607*3117ece4Schristos         return err;
608*3117ece4Schristos 
609*3117ece4Schristos     /* If the stamp matches then we are good to go.
610*3117ece4Schristos      * This must be called before any modifications to the data cache.
611*3117ece4Schristos      * After this point, we MUST call stamp_write() to update the STAMP,
612*3117ece4Schristos      * since we've updated the data cache.
613*3117ece4Schristos      */
614*3117ece4Schristos     if (stamp_check(dir, data))
615*3117ece4Schristos         return 0;
616*3117ece4Schristos 
617*3117ece4Schristos     err = curl_download_data(data);
618*3117ece4Schristos     if (err != 0)
619*3117ece4Schristos         goto out;
620*3117ece4Schristos 
621*3117ece4Schristos out:
622*3117ece4Schristos     /* This must be last, since it must know if data_init() succeeded. */
623*3117ece4Schristos     stamp_write(dir, data, err);
624*3117ece4Schristos     return err;
625*3117ece4Schristos }
626*3117ece4Schristos 
627*3117ece4Schristos void data_finish(void) {
628*3117ece4Schristos     data_free_paths(g_data);
629*3117ece4Schristos     free(g_data_dir);
630*3117ece4Schristos     g_data_dir = NULL;
631*3117ece4Schristos }
632