xref: /netbsd-src/external/public-domain/xz/dist/doc/examples/02_decompress.c (revision 75cbb3d6e4fac2c8c149ebd3a1104dd4ac8c9b3a)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       02_decompress.c
4 /// \brief      Decompress .xz files to stdout
5 ///
6 /// Usage:      ./02_decompress INPUT_FILES... > OUTFILE
7 ///
8 /// Example:    ./02_decompress foo.xz bar.xz > foobar
9 //
10 //  Author:     Lasse Collin
11 //
12 //  This file has been put into the public domain.
13 //  You can do whatever you want with this file.
14 //
15 ///////////////////////////////////////////////////////////////////////////////
16 
17 #include <stdbool.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <lzma.h>
23 
24 
25 static bool
init_decoder(lzma_stream * strm)26 init_decoder(lzma_stream *strm)
27 {
28 	// Initialize a .xz decoder. The decoder supports a memory usage limit
29 	// and a set of flags.
30 	//
31 	// The memory usage of the decompressor depends on the settings used
32 	// to compress a .xz file. It can vary from less than a megabyte to
33 	// a few gigabytes, but in practice (at least for now) it rarely
34 	// exceeds 65 MiB because that's how much memory is required to
35 	// decompress files created with "xz -9". Settings requiring more
36 	// memory take extra effort to use and don't (at least for now)
37 	// provide significantly better compression in most cases.
38 	//
39 	// Memory usage limit is useful if it is important that the
40 	// decompressor won't consume gigabytes of memory. The need
41 	// for limiting depends on the application. In this example,
42 	// no memory usage limiting is used. This is done by setting
43 	// the limit to UINT64_MAX.
44 	//
45 	// The .xz format allows concatenating compressed files as is:
46 	//
47 	//     echo foo | xz > foobar.xz
48 	//     echo bar | xz >> foobar.xz
49 	//
50 	// When decompressing normal standalone .xz files, LZMA_CONCATENATED
51 	// should always be used to support decompression of concatenated
52 	// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
53 	// after the first .xz stream. This can be useful when .xz data has
54 	// been embedded inside another file format.
55 	//
56 	// Flags other than LZMA_CONCATENATED are supported too, and can
57 	// be combined with bitwise-or. See lzma/container.h
58 	// (src/liblzma/api/lzma/container.h in the source package or e.g.
59 	// /usr/include/lzma/container.h depending on the install prefix)
60 	// for details.
61 	lzma_ret ret = lzma_stream_decoder(
62 			strm, UINT64_MAX, LZMA_CONCATENATED);
63 
64 	// Return successfully if the initialization went fine.
65 	if (ret == LZMA_OK)
66 		return true;
67 
68 	// Something went wrong. The possible errors are documented in
69 	// lzma/container.h (src/liblzma/api/lzma/container.h in the source
70 	// package or e.g. /usr/include/lzma/container.h depending on the
71 	// install prefix).
72 	//
73 	// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
74 	// specify a very tiny limit, the error will be delayed until
75 	// the first headers have been parsed by a call to lzma_code().
76 	const char *msg;
77 	switch (ret) {
78 	case LZMA_MEM_ERROR:
79 		msg = "Memory allocation failed";
80 		break;
81 
82 	case LZMA_OPTIONS_ERROR:
83 		msg = "Unsupported decompressor flags";
84 		break;
85 
86 	default:
87 		// This is most likely LZMA_PROG_ERROR indicating a bug in
88 		// this program or in liblzma. It is inconvenient to have a
89 		// separate error message for errors that should be impossible
90 		// to occur, but knowing the error code is important for
91 		// debugging. That's why it is good to print the error code
92 		// at least when there is no good error message to show.
93 		msg = "Unknown error, possibly a bug";
94 		break;
95 	}
96 
97 	fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
98 			msg, ret);
99 	return false;
100 }
101 
102 
103 static bool
decompress(lzma_stream * strm,const char * inname,FILE * infile,FILE * outfile)104 decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
105 {
106 	// When LZMA_CONCATENATED flag was used when initializing the decoder,
107 	// we need to tell lzma_code() when there will be no more input.
108 	// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
109 	// in the same way as it is done when encoding.
110 	//
111 	// When LZMA_CONCATENATED isn't used, there is no need to use
112 	// LZMA_FINISH to tell when all the input has been read, but it
113 	// is still OK to use it if you want. When LZMA_CONCATENATED isn't
114 	// used, the decoder will stop after the first .xz stream. In that
115 	// case some unused data may be left in strm->next_in.
116 	lzma_action action = LZMA_RUN;
117 
118 	uint8_t inbuf[BUFSIZ];
119 	uint8_t outbuf[BUFSIZ];
120 
121 	strm->next_in = NULL;
122 	strm->avail_in = 0;
123 	strm->next_out = outbuf;
124 	strm->avail_out = sizeof(outbuf);
125 
126 	while (true) {
127 		if (strm->avail_in == 0 && !feof(infile)) {
128 			strm->next_in = inbuf;
129 			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
130 					infile);
131 
132 			if (ferror(infile)) {
133 				fprintf(stderr, "%s: Read error: %s\n",
134 						inname, strerror(errno));
135 				return false;
136 			}
137 
138 			// Once the end of the input file has been reached,
139 			// we need to tell lzma_code() that no more input
140 			// will be coming. As said before, this isn't required
141 			// if the LZMA_CONCATENATED flag isn't used when
142 			// initializing the decoder.
143 			if (feof(infile))
144 				action = LZMA_FINISH;
145 		}
146 
147 		lzma_ret ret = lzma_code(strm, action);
148 
149 		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
150 			size_t write_size = sizeof(outbuf) - strm->avail_out;
151 
152 			if (fwrite(outbuf, 1, write_size, outfile)
153 					!= write_size) {
154 				fprintf(stderr, "Write error: %s\n",
155 						strerror(errno));
156 				return false;
157 			}
158 
159 			strm->next_out = outbuf;
160 			strm->avail_out = sizeof(outbuf);
161 		}
162 
163 		if (ret != LZMA_OK) {
164 			// Once everything has been decoded successfully, the
165 			// return value of lzma_code() will be LZMA_STREAM_END.
166 			//
167 			// It is important to check for LZMA_STREAM_END. Do not
168 			// assume that getting ret != LZMA_OK would mean that
169 			// everything has gone well or that when you aren't
170 			// getting more output it must have successfully
171 			// decoded everything.
172 			if (ret == LZMA_STREAM_END)
173 				return true;
174 
175 			// It's not LZMA_OK nor LZMA_STREAM_END,
176 			// so it must be an error code. See lzma/base.h
177 			// (src/liblzma/api/lzma/base.h in the source package
178 			// or e.g. /usr/include/lzma/base.h depending on the
179 			// install prefix) for the list and documentation of
180 			// possible values. Many values listen in lzma_ret
181 			// enumeration aren't possible in this example, but
182 			// can be made possible by enabling memory usage limit
183 			// or adding flags to the decoder initialization.
184 			const char *msg;
185 			switch (ret) {
186 			case LZMA_MEM_ERROR:
187 				msg = "Memory allocation failed";
188 				break;
189 
190 			case LZMA_FORMAT_ERROR:
191 				// .xz magic bytes weren't found.
192 				msg = "The input is not in the .xz format";
193 				break;
194 
195 			case LZMA_OPTIONS_ERROR:
196 				// For example, the headers specify a filter
197 				// that isn't supported by this liblzma
198 				// version (or it hasn't been enabled when
199 				// building liblzma, but no-one sane does
200 				// that unless building liblzma for an
201 				// embedded system). Upgrading to a newer
202 				// liblzma might help.
203 				//
204 				// Note that it is unlikely that the file has
205 				// accidentally became corrupt if you get this
206 				// error. The integrity of the .xz headers is
207 				// always verified with a CRC32, so
208 				// unintentionally corrupt files can be
209 				// distinguished from unsupported files.
210 				msg = "Unsupported compression options";
211 				break;
212 
213 			case LZMA_DATA_ERROR:
214 				msg = "Compressed file is corrupt";
215 				break;
216 
217 			case LZMA_BUF_ERROR:
218 				// Typically this error means that a valid
219 				// file has got truncated, but it might also
220 				// be a damaged part in the file that makes
221 				// the decoder think the file is truncated.
222 				// If you prefer, you can use the same error
223 				// message for this as for LZMA_DATA_ERROR.
224 				msg = "Compressed file is truncated or "
225 						"otherwise corrupt";
226 				break;
227 
228 			default:
229 				// This is most likely LZMA_PROG_ERROR.
230 				msg = "Unknown error, possibly a bug";
231 				break;
232 			}
233 
234 			fprintf(stderr, "%s: Decoder error: "
235 					"%s (error code %u)\n",
236 					inname, msg, ret);
237 			return false;
238 		}
239 	}
240 }
241 
242 
243 extern int
main(int argc,char ** argv)244 main(int argc, char **argv)
245 {
246 	if (argc <= 1) {
247 		fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
248 		return EXIT_FAILURE;
249 	}
250 
251 	lzma_stream strm = LZMA_STREAM_INIT;
252 
253 	bool success = true;
254 
255 	// Try to decompress all files.
256 	for (int i = 1; i < argc; ++i) {
257 		if (!init_decoder(&strm)) {
258 			// Decoder initialization failed. There's no point
259 			// to retry it so we need to exit.
260 			success = false;
261 			break;
262 		}
263 
264 		FILE *infile = fopen(argv[i], "rb");
265 
266 		if (infile == NULL) {
267 			fprintf(stderr, "%s: Error opening the "
268 					"input file: %s\n",
269 					argv[i], strerror(errno));
270 			success = false;
271 		} else {
272 			success &= decompress(&strm, argv[i], infile, stdout);
273 			fclose(infile);
274 		}
275 	}
276 
277 	// Free the memory allocated for the decoder. This only needs to be
278 	// done after the last file.
279 	lzma_end(&strm);
280 
281 	if (fclose(stdout)) {
282 		fprintf(stderr, "Write error: %s\n", strerror(errno));
283 		success = false;
284 	}
285 
286 	return success ? EXIT_SUCCESS : EXIT_FAILURE;
287 }
288