xref: /netbsd-src/external/bsd/zstd/dist/examples/dictionary_compression.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos /*
2*3117ece4Schristos  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*3117ece4Schristos  * All rights reserved.
4*3117ece4Schristos  *
5*3117ece4Schristos  * This source code is licensed under both the BSD-style license (found in the
6*3117ece4Schristos  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*3117ece4Schristos  * in the COPYING file in the root directory of this source tree).
8*3117ece4Schristos  * You may select, at your option, one of the above-listed licenses.
9*3117ece4Schristos **/
10*3117ece4Schristos 
11*3117ece4Schristos /* This example deals with Dictionary compression,
12*3117ece4Schristos  * its counterpart is `examples/dictionary_decompression.c` .
13*3117ece4Schristos  * These examples presume that a dictionary already exists.
14*3117ece4Schristos  * The main method to create a dictionary is `zstd --train`,
15*3117ece4Schristos  * look at the CLI documentation for details.
16*3117ece4Schristos  * Another possible method is to employ dictionary training API,
17*3117ece4Schristos  * published in `lib/zdict.h` .
18*3117ece4Schristos **/
19*3117ece4Schristos 
20*3117ece4Schristos #include <stdio.h>     // printf
21*3117ece4Schristos #include <stdlib.h>    // free
22*3117ece4Schristos #include <string.h>    // memset, strcat
23*3117ece4Schristos #include <zstd.h>      // presumes zstd library is installed
24*3117ece4Schristos #include "common.h"    // Helper functions, CHECK(), and CHECK_ZSTD()
25*3117ece4Schristos 
26*3117ece4Schristos /* createDict() :
27*3117ece4Schristos ** `dictFileName` is supposed already created using `zstd --train` */
28*3117ece4Schristos static ZSTD_CDict* createCDict_orDie(const char* dictFileName, int cLevel)
29*3117ece4Schristos {
30*3117ece4Schristos     size_t dictSize;
31*3117ece4Schristos     printf("loading dictionary %s \n", dictFileName);
32*3117ece4Schristos     void* const dictBuffer = mallocAndLoadFile_orDie(dictFileName, &dictSize);
33*3117ece4Schristos     ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer, dictSize, cLevel);
34*3117ece4Schristos     CHECK(cdict != NULL, "ZSTD_createCDict() failed!");
35*3117ece4Schristos     free(dictBuffer);
36*3117ece4Schristos     return cdict;
37*3117ece4Schristos }
38*3117ece4Schristos 
39*3117ece4Schristos 
40*3117ece4Schristos static void compress(const char* fname, const char* oname, const ZSTD_CDict* cdict)
41*3117ece4Schristos {
42*3117ece4Schristos     size_t fSize;
43*3117ece4Schristos     void* const fBuff = mallocAndLoadFile_orDie(fname, &fSize);
44*3117ece4Schristos     size_t const cBuffSize = ZSTD_compressBound(fSize);
45*3117ece4Schristos     void* const cBuff = malloc_orDie(cBuffSize);
46*3117ece4Schristos 
47*3117ece4Schristos     /* Compress using the dictionary.
48*3117ece4Schristos      * This function writes the dictionary id, and content size into the header.
49*3117ece4Schristos      * But, it doesn't use a checksum. You can control these options using the
50*3117ece4Schristos      * advanced API: ZSTD_CCtx_setParameter(), ZSTD_CCtx_refCDict(),
51*3117ece4Schristos      * and ZSTD_compress2().
52*3117ece4Schristos      */
53*3117ece4Schristos     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
54*3117ece4Schristos     CHECK(cctx != NULL, "ZSTD_createCCtx() failed!");
55*3117ece4Schristos     size_t const cSize = ZSTD_compress_usingCDict(cctx, cBuff, cBuffSize, fBuff, fSize, cdict);
56*3117ece4Schristos     CHECK_ZSTD(cSize);
57*3117ece4Schristos 
58*3117ece4Schristos     saveFile_orDie(oname, cBuff, cSize);
59*3117ece4Schristos 
60*3117ece4Schristos     /* success */
61*3117ece4Schristos     printf("%25s : %6u -> %7u - %s \n", fname, (unsigned)fSize, (unsigned)cSize, oname);
62*3117ece4Schristos 
63*3117ece4Schristos     ZSTD_freeCCtx(cctx);   /* never fails */
64*3117ece4Schristos     free(fBuff);
65*3117ece4Schristos     free(cBuff);
66*3117ece4Schristos }
67*3117ece4Schristos 
68*3117ece4Schristos 
69*3117ece4Schristos static char* createOutFilename_orDie(const char* filename)
70*3117ece4Schristos {
71*3117ece4Schristos     size_t const inL = strlen(filename);
72*3117ece4Schristos     size_t const outL = inL + 5;
73*3117ece4Schristos     void* outSpace = malloc_orDie(outL);
74*3117ece4Schristos     memset(outSpace, 0, outL);
75*3117ece4Schristos     strcat(outSpace, filename);
76*3117ece4Schristos     strcat(outSpace, ".zst");
77*3117ece4Schristos     return (char*)outSpace;
78*3117ece4Schristos }
79*3117ece4Schristos 
80*3117ece4Schristos int main(int argc, const char** argv)
81*3117ece4Schristos {
82*3117ece4Schristos     const char* const exeName = argv[0];
83*3117ece4Schristos     int const cLevel = 3;
84*3117ece4Schristos 
85*3117ece4Schristos     if (argc<3) {
86*3117ece4Schristos         fprintf(stderr, "wrong arguments\n");
87*3117ece4Schristos         fprintf(stderr, "usage:\n");
88*3117ece4Schristos         fprintf(stderr, "%s [FILES] dictionary\n", exeName);
89*3117ece4Schristos         return 1;
90*3117ece4Schristos     }
91*3117ece4Schristos 
92*3117ece4Schristos     /* load dictionary only once */
93*3117ece4Schristos     const char* const dictName = argv[argc-1];
94*3117ece4Schristos     ZSTD_CDict* const dictPtr = createCDict_orDie(dictName, cLevel);
95*3117ece4Schristos 
96*3117ece4Schristos     int u;
97*3117ece4Schristos     for (u=1; u<argc-1; u++) {
98*3117ece4Schristos         const char* inFilename = argv[u];
99*3117ece4Schristos         char* const outFilename = createOutFilename_orDie(inFilename);
100*3117ece4Schristos         compress(inFilename, outFilename, dictPtr);
101*3117ece4Schristos         free(outFilename);
102*3117ece4Schristos     }
103*3117ece4Schristos 
104*3117ece4Schristos     ZSTD_freeCDict(dictPtr);
105*3117ece4Schristos     printf("All %u files compressed. \n", argc-2);
106*3117ece4Schristos     return 0;
107*3117ece4Schristos }
108