xref: /netbsd-src/sys/dev/stbi/stb_image.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* stbi-1.29 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
2    when you control the images you're loading
3                                      no warranty implied; use at your own risk
4 
5    QUICK NOTES:
6       Primarily of interest to game developers and other people who can
7           avoid problematic images and only need the trivial interface
8 
9       JPEG baseline (no JPEG progressive)
10       PNG 8-bit only
11 
12       TGA (not sure what subset, if a subset)
13       BMP non-1bpp, non-RLE
14       PSD (composited view only, no extra channels)
15 
16       GIF (*comp always reports as 4-channel)
17       HDR (radiance rgbE format)
18       PIC (Softimage PIC)
19 
20       - decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
21       - supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
22 
23    Latest revisions:
24       1.29 (2010-08-16) various warning fixes from Aurelien Pocheville
25       1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ)
26       1.27 (2010-08-01) cast-to-uint8 to fix warnings (Laurent Gomila)
27                         allow trailing 0s at end of image data (Laurent Gomila)
28       1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ
29       1.25 (2010-07-17) refix trans_data warning (Won Chun)
30       1.24 (2010-07-12) perf improvements reading from files
31                         minor perf improvements for jpeg
32                         deprecated type-specific functions in hope of feedback
33                         attempt to fix trans_data warning (Won Chun)
34       1.23              fixed bug in iPhone support
35       1.22 (2010-07-10) removed image *writing* support to stb_image_write.h
36                         stbi_info support from Jetro Lauha
37                         GIF support from Jean-Marc Lienher
38                         iPhone PNG-extensions from James Brown
39                         warning-fixes from Nicolas Schulz and Janez Zemva
40       1.21              fix use of 'uint8' in header (reported by jon blow)
41       1.20              added support for Softimage PIC, by Tom Seddon
42 
43    See end of file for full revision history.
44 
45    TODO:
46       stbi_info support for BMP,PSD,HDR,PIC
47       rewrite stbi_info and load_file variations to share file handling code
48            (current system allows individual functions to be called directly,
49            since each does all the work, but I doubt anyone uses this in practice)
50 
51 
52  ============================    Contributors    =========================
53 
54  Image formats                                Optimizations & bugfixes
55     Sean Barrett (jpeg, png, bmp)                Fabian "ryg" Giesen
56     Nicolas Schulz (hdr, psd)
57     Jonathan Dummer (tga)                     Bug fixes & warning fixes
58     Jean-Marc Lienher (gif)                      Marc LeBlanc
59     Tom Seddon (pic)                             Christpher Lloyd
60     Thatcher Ulrich (psd)                        Dave Moore
61                                                  Won Chun
62                                                  the Horde3D community
63  Extensions, features                            Janez Zemva
64     Jetro Lauha (stbi_info)                      Jonathan Blow
65     James "moose2000" Brown (iPhone PNG)         Laurent Gomila
66                                                  Aruelien Pocheville
67 
68  If your name should be here but isn't, let Sean know.
69 
70 */
71 
72 #ifdef _KERNEL
73 #include <dev/stbi/stbiconfig.h>
74 #endif
75 
76 #ifndef STBI_INCLUDE_STB_IMAGE_H
77 #define STBI_INCLUDE_STB_IMAGE_H
78 
79 // To get a header file for this, either cut and paste the header,
80 // or create stb_image.h, #define STBI_HEADER_FILE_ONLY, and
81 // then include stb_image.c from it.
82 
83 ////   begin header file  ////////////////////////////////////////////////////
84 //
85 // Limitations:
86 //    - no jpeg progressive support
87 //    - non-HDR formats support 8-bit samples only (jpeg, png)
88 //    - no delayed line count (jpeg) -- IJG doesn't support either
89 //    - no 1-bit BMP
90 //    - GIF always returns *comp=4
91 //
92 // Basic usage (see HDR discussion below):
93 //    int x,y,n;
94 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
95 //    // ... process data if not NULL ...
96 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
97 //    // ... replace '0' with '1'..'4' to force that many components per pixel
98 //    stbi_image_free(data)
99 //
100 // Standard parameters:
101 //    int *x       -- outputs image width in pixels
102 //    int *y       -- outputs image height in pixels
103 //    int *comp    -- outputs # of image components in image file
104 //    int req_comp -- if non-zero, # of image components requested in result
105 //
106 // The return value from an image loader is an 'unsigned char *' which points
107 // to the pixel data. The pixel data consists of *y scanlines of *x pixels,
108 // with each pixel consisting of N interleaved 8-bit components; the first
109 // pixel pointed to is top-left-most in the image. There is no padding between
110 // image scanlines or between pixels, regardless of format. The number of
111 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
112 // If req_comp is non-zero, *comp has the number of components that _would_
113 // have been output otherwise. E.g. if you set req_comp to 4, you will always
114 // get RGBA output, but you can check *comp to easily see if it's opaque.
115 //
116 // An output image with N components has the following components interleaved
117 // in this order in each pixel:
118 //
119 //     N=#comp     components
120 //       1           grey
121 //       2           grey, alpha
122 //       3           red, green, blue
123 //       4           red, green, blue, alpha
124 //
125 // If image loading fails for any reason, the return value will be NULL,
126 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
127 // can be queried for an extremely brief, end-user unfriendly explanation
128 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
129 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
130 // more user-friendly ones.
131 //
132 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
133 //
134 // ===========================================================================
135 //
136 // iPhone PNG support:
137 //
138 // By default we convert iphone-formatted PNGs back to RGB; nominally they
139 // would silently load as BGR, except the existing code should have just
140 // failed on such iPhone PNGs. But you can disable this conversion by
141 // by calling stbi_convert_iphone_png_to_rgb(0), in which case
142 // you will always just get the native iphone "format" through.
143 //
144 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
145 // pixel to remove any premultiplied alpha *only* if the image file explicitly
146 // says there's premultiplied data (currently only happens in iPhone images,
147 // and only if iPhone convert-to-rgb processing is on).
148 //
149 // ===========================================================================
150 //
151 // HDR image support   (disable by defining STBI_NO_HDR)
152 //
153 // stb_image now supports loading HDR images in general, and currently
154 // the Radiance .HDR file format, although the support is provided
155 // generically. You can still load any file through the existing interface;
156 // if you attempt to load an HDR file, it will be automatically remapped to
157 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
158 // both of these constants can be reconfigured through this interface:
159 //
160 //     stbi_hdr_to_ldr_gamma(2.2f);
161 //     stbi_hdr_to_ldr_scale(1.0f);
162 //
163 // (note, do not use _inverse_ constants; stbi_image will invert them
164 // appropriately).
165 //
166 // Additionally, there is a new, parallel interface for loading files as
167 // (linear) floats to preserve the full dynamic range:
168 //
169 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
170 //
171 // If you load LDR images through this interface, those images will
172 // be promoted to floating point values, run through the inverse of
173 // constants corresponding to the above:
174 //
175 //     stbi_ldr_to_hdr_scale(1.0f);
176 //     stbi_ldr_to_hdr_gamma(2.2f);
177 //
178 // Finally, given a filename (or an open file or memory block--see header
179 // file for details) containing image data, you can query for the "most
180 // appropriate" interface to use (that is, whether the image is HDR or
181 // not), using:
182 //
183 //     stbi_is_hdr(char *filename);
184 
185 #ifndef STBI_NO_STDIO
186 #include <stdio.h>
187 #endif
188 
189 #define STBI_VERSION 1
190 
191 enum
192 {
193    STBI_default = 0, // only used for req_comp
194 
195    STBI_grey       = 1,
196    STBI_grey_alpha = 2,
197    STBI_rgb        = 3,
198    STBI_rgb_alpha  = 4
199 };
200 
201 typedef unsigned char stbi_uc;
202 
203 #ifdef __cplusplus
204 extern "C" {
205 #endif
206 
207 // PRIMARY API - works on images of any type
208 
209 // load image by filename, open file, or memory buffer
210 extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
211 
212 #ifndef STBI_NO_STDIO
213 extern stbi_uc *stbi_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
214 extern stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
215 // for stbi_load_from_file, file pointer is left pointing immediately after image
216 #endif
217 
218 #ifndef STBI_NO_HDR
219    extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
220 
221    #ifndef STBI_NO_STDIO
222    extern float *stbi_loadf            (char const *filename,   int *x, int *y, int *comp, int req_comp);
223    extern float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
224    #endif
225 
226    extern void   stbi_hdr_to_ldr_gamma(float gamma);
227    extern void   stbi_hdr_to_ldr_scale(float scale);
228 
229    extern void   stbi_ldr_to_hdr_gamma(float gamma);
230    extern void   stbi_ldr_to_hdr_scale(float scale);
231 #endif // STBI_NO_HDR
232 
233 // get a VERY brief reason for failure
234 // NOT THREADSAFE
235 extern const char *stbi_failure_reason  (void);
236 
237 // free the loaded image -- this is just free()
238 extern void     stbi_image_free      (void *retval_from_stbi_load);
239 
240 // get image dimensions & components without fully decoding
241 extern int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
242 extern int      stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
243 
244 #ifndef STBI_NO_STDIO
245 extern int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
246 extern int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
247 
248 extern int      stbi_is_hdr          (char const *filename);
249 extern int      stbi_is_hdr_from_file(FILE *f);
250 #endif
251 
252 // for image formats that explicitly notate that they have premultiplied alpha,
253 // we just return the colors as stored in the file. set this flag to force
254 // unpremultiplication. results are undefined if the unpremultiply overflow.
255 extern void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
256 
257 // indicate whether we should process iphone images back to canonical format,
258 // or just pass them through "as-is"
259 extern void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
260 
261 
262 // ZLIB client - used by PNG, available for other purposes
263 
264 extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
265 extern char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
266 extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
267 extern int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
268 
269 extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
270 extern int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
271 
272 // define new loaders
273 typedef struct
274 {
275    int       (*test_memory)(stbi_uc const *buffer, int len);
276    stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
277    #ifndef STBI_NO_STDIO
278    int       (*test_file)(FILE *f);
279    stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp);
280    #endif
281 } stbi_loader;
282 
283 // register a loader by filling out the above structure (you must define ALL functions)
284 // returns 1 if added or already added, 0 if not added (too many loaders)
285 // NOT THREADSAFE
286 extern int stbi_register_loader(stbi_loader *loader);
287 
288 // define faster low-level operations (typically SIMD support)
289 #ifdef STBI_SIMD
290 typedef void (*stbi_idct_8x8)(stbi_uc *out, int out_stride, short data[64], unsigned short *dequantize);
291 // compute an integer IDCT on "input"
292 //     input[x] = data[x] * dequantize[x]
293 //     write results to 'out': 64 samples, each run of 8 spaced by 'out_stride'
294 //                             CLAMP results to 0..255
295 typedef void (*stbi_YCbCr_to_RGB_run)(stbi_uc *output, stbi_uc const  *y, stbi_uc const *cb, stbi_uc const *cr, int count, int step);
296 // compute a conversion from YCbCr to RGB
297 //     'count' pixels
298 //     write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B
299 //     y: Y input channel
300 //     cb: Cb input channel; scale/biased to be 0..255
301 //     cr: Cr input channel; scale/biased to be 0..255
302 
303 extern void stbi_install_idct(stbi_idct_8x8 func);
304 extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func);
305 #endif // STBI_SIMD
306 
307 
308 
309 
310 // TYPE-SPECIFIC ACCESS
311 
312 #ifdef STBI_TYPE_SPECIFIC_FUNCTIONS
313 
314 // is it a jpeg?
315 extern int      stbi_jpeg_test_memory     (stbi_uc const *buffer, int len);
316 extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
317 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
318 
319 #ifndef STBI_NO_STDIO
320 extern stbi_uc *stbi_jpeg_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
321 extern int      stbi_jpeg_test_file       (FILE *f);
322 extern stbi_uc *stbi_jpeg_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
323 
324 extern int      stbi_jpeg_info            (char const *filename,     int *x, int *y, int *comp);
325 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
326 #endif
327 
328 // is it a png?
329 extern int      stbi_png_test_memory      (stbi_uc const *buffer, int len);
330 extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
331 extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
332 
333 #ifndef STBI_NO_STDIO
334 extern stbi_uc *stbi_png_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
335 extern int      stbi_png_info             (char const *filename,     int *x, int *y, int *comp);
336 extern int      stbi_png_test_file        (FILE *f);
337 extern stbi_uc *stbi_png_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
338 extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
339 #endif
340 
341 // is it a bmp?
342 extern int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len);
343 
344 extern stbi_uc *stbi_bmp_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
345 extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
346 #ifndef STBI_NO_STDIO
347 extern int      stbi_bmp_test_file        (FILE *f);
348 extern stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
349 #endif
350 
351 // is it a tga?
352 extern int      stbi_tga_test_memory      (stbi_uc const *buffer, int len);
353 
354 extern stbi_uc *stbi_tga_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
355 extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
356 extern int stbi_tga_info_from_memory      (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
357 #ifndef STBI_NO_STDIO
358 extern int stbi_tga_info_from_file        (FILE *f, int *x, int *y, int *comp);
359 extern int      stbi_tga_test_file        (FILE *f);
360 extern stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
361 #endif
362 
363 // is it a psd?
364 extern int      stbi_psd_test_memory      (stbi_uc const *buffer, int len);
365 
366 extern stbi_uc *stbi_psd_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
367 extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
368 #ifndef STBI_NO_STDIO
369 extern int      stbi_psd_test_file        (FILE *f);
370 extern stbi_uc *stbi_psd_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
371 #endif
372 
373 // is it an hdr?
374 extern int      stbi_hdr_test_memory      (stbi_uc const *buffer, int len);
375 
376 extern float *  stbi_hdr_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
377 extern float *  stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
378 #ifndef STBI_NO_STDIO
379 extern int      stbi_hdr_test_file        (FILE *f);
380 extern float *  stbi_hdr_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
381 #endif
382 
383 // is it a pic?
384 extern int      stbi_pic_test_memory      (stbi_uc const *buffer, int len);
385 
386 extern stbi_uc *stbi_pic_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
387 extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
388 #ifndef STBI_NO_STDIO
389 extern int      stbi_pic_test_file        (FILE *f);
390 extern stbi_uc *stbi_pic_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
391 #endif
392 
393 // is it a gif?
394 extern int      stbi_gif_test_memory      (stbi_uc const *buffer, int len);
395 
396 extern stbi_uc *stbi_gif_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
397 extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
398 extern int      stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
399 
400 #ifndef STBI_NO_STDIO
401 extern int      stbi_gif_test_file        (FILE *f);
402 extern stbi_uc *stbi_gif_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
403 extern int      stbi_gif_info             (char const *filename,     int *x, int *y, int *comp);
404 extern int      stbi_gif_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
405 #endif
406 
407 #endif//STBI_TYPE_SPECIFIC_FUNCTIONS
408 
409 
410 
411 
412 #ifdef __cplusplus
413 }
414 #endif
415 
416 //
417 //
418 ////   end header file   /////////////////////////////////////////////////////
419 #endif // STBI_INCLUDE_STB_IMAGE_H
420 
421 #ifndef STBI_HEADER_FILE_ONLY
422 
423 #ifndef STBI_NO_HDR
424 #include <math.h>  // ldexp
425 #include <string.h> // strcmp
426 #endif
427 
428 #ifndef STBI_NO_STDIO
429 #include <stdio.h>
430 #endif
431 #ifdef _KERNEL
432 #include <sys/cdefs.h>
433 __KERNEL_RCSID(0, "$NetBSD: stb_image.c,v 1.9 2018/02/04 09:18:44 mrg Exp $");
434 #include <sys/param.h>
435 #include <sys/systm.h>
436 #include <sys/kernel.h>
437 #include <sys/types.h>
438 #include <sys/malloc.h>
439 #else
440 #include <stdlib.h>
441 #include <memory.h>
442 #include <assert.h>
443 #include <stdarg.h>
444 #endif
445 
446 #ifdef _KERNEL
447 #define	MALLOC(size)		malloc((size), M_TEMP, M_WAITOK)
448 #define	REALLOC(ptr, size)	realloc((ptr), (size), M_TEMP, M_WAITOK)
449 #define	FREE(ptr) \
450     do { if (ptr) free((ptr), M_TEMP); } while (/*CONSTCOND*/0)
451 #else
452 #define	MALLOC(size)		malloc((size))
453 #define	REALLOC(ptr, size)	realloc((ptr), (size))
454 #define	FREE(ptr)		free((ptr))
455 #endif
456 
457 #ifndef _MSC_VER
458   #ifdef __cplusplus
459   #define __forceinline inline
460   #else
461   #define __forceinline
462   #endif
463 #endif
464 
465 
466 // implementation:
467 typedef unsigned char uint8;
468 typedef unsigned short uint16;
469 typedef   signed short  int16;
470 typedef unsigned int   uint32;
471 typedef   signed int    int32;
472 #ifndef __NetBSD__
473 typedef unsigned int   uint;
474 #endif
475 
476 // should produce compiler error if size is wrong
477 typedef unsigned char validate_uint32[sizeof(uint32)==4 ? 1 : -1];
478 
479 #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
480 #define STBI_NO_WRITE
481 #endif
482 
483 #define STBI_NOTUSED(v)  v=v
484 
485 #ifdef _MSC_VER
486 #define STBI_HAS_LRTOL
487 #endif
488 
489 #ifdef STBI_HAS_LRTOL
490    #define stbi_lrot(x,y)  _lrotl(x,y)
491 #else
492    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
493 #endif
494 
495 //////////////////////////////////////////////////////////////////////////////
496 //
497 // Generic API that works on all image types
498 //
499 
500 // deprecated functions
501 
502 // is it a jpeg?
503 extern int      stbi_jpeg_test_memory     (stbi_uc const *buffer, int len);
504 extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
505 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
506 
507 #ifndef STBI_NO_STDIO
508 extern stbi_uc *stbi_jpeg_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
509 extern int      stbi_jpeg_test_file       (FILE *f);
510 extern stbi_uc *stbi_jpeg_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
511 
512 extern int      stbi_jpeg_info            (char const *filename,     int *x, int *y, int *comp);
513 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
514 #endif
515 
516 // is it a png?
517 extern int      stbi_png_test_memory      (stbi_uc const *buffer, int len);
518 extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
519 extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
520 
521 #ifndef STBI_NO_STDIO
522 extern stbi_uc *stbi_png_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
523 extern int      stbi_png_info             (char const *filename,     int *x, int *y, int *comp);
524 extern int      stbi_png_test_file        (FILE *f);
525 extern stbi_uc *stbi_png_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
526 extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
527 #endif
528 
529 // is it a bmp?
530 extern int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len);
531 
532 extern stbi_uc *stbi_bmp_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
533 extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
534 #ifndef STBI_NO_STDIO
535 extern int      stbi_bmp_test_file        (FILE *f);
536 extern stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
537 #endif
538 
539 // is it a tga?
540 extern int      stbi_tga_test_memory      (stbi_uc const *buffer, int len);
541 
542 extern stbi_uc *stbi_tga_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
543 extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
544 #ifndef STBI_NO_STDIO
545 extern int      stbi_tga_test_file        (FILE *f);
546 extern stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
547 #endif
548 
549 // is it a psd?
550 extern int      stbi_psd_test_memory      (stbi_uc const *buffer, int len);
551 
552 extern stbi_uc *stbi_psd_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
553 extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
554 #ifndef STBI_NO_STDIO
555 extern int      stbi_psd_test_file        (FILE *f);
556 extern stbi_uc *stbi_psd_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
557 #endif
558 
559 // is it an hdr?
560 extern int      stbi_hdr_test_memory      (stbi_uc const *buffer, int len);
561 
562 extern float *  stbi_hdr_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
563 extern float *  stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
564 #ifndef STBI_NO_STDIO
565 extern int      stbi_hdr_test_file        (FILE *f);
566 extern float *  stbi_hdr_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
567 #endif
568 
569 // is it a pic?
570 extern int      stbi_pic_test_memory      (stbi_uc const *buffer, int len);
571 
572 extern stbi_uc *stbi_pic_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
573 extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
574 #ifndef STBI_NO_STDIO
575 extern int      stbi_pic_test_file        (FILE *f);
576 extern stbi_uc *stbi_pic_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
577 #endif
578 
579 // is it a gif?
580 extern int      stbi_gif_test_memory      (stbi_uc const *buffer, int len);
581 
582 extern stbi_uc *stbi_gif_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
583 extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
584 extern int      stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
585 
586 #ifndef STBI_NO_STDIO
587 extern int      stbi_gif_test_file        (FILE *f);
588 extern stbi_uc *stbi_gif_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
589 extern int      stbi_gif_info             (char const *filename,     int *x, int *y, int *comp);
590 extern int      stbi_gif_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
591 #endif
592 
593 
594 // this is not threadsafe
595 static const char *failure_reason;
596 
597 const char *stbi_failure_reason(void)
598 {
599    return failure_reason;
600 }
601 
602 #ifndef STBI_NO_FAILURE_STRINGS
603 static int e(const char *str)
604 {
605    failure_reason = str;
606    return 0;
607 }
608 #endif
609 
610 #ifdef STBI_NO_FAILURE_STRINGS
611    #define e(x,y)  0
612 #elif defined(STBI_FAILURE_USERMSG)
613    #define e(x,y)  e(y)
614 #else
615    #define e(x,y)  e(x)
616 #endif
617 
618 #define epf(x,y)   ((float *) (e(x,y)?NULL:NULL))
619 #define epuc(x,y)  ((unsigned char *) (e(x,y)?NULL:NULL))
620 
621 void stbi_image_free(void *retval_from_stbi_load)
622 {
623    FREE(retval_from_stbi_load);
624 }
625 
626 #define MAX_LOADERS  32
627 stbi_loader *loaders[MAX_LOADERS];
628 static int max_loaders = 0;
629 
630 int stbi_register_loader(stbi_loader *loader)
631 {
632    int i;
633    for (i=0; i < MAX_LOADERS; ++i) {
634       // already present?
635       if (loaders[i] == loader)
636          return 1;
637       // end of the list?
638       if (loaders[i] == NULL) {
639          loaders[i] = loader;
640          max_loaders = i+1;
641          return 1;
642       }
643    }
644    // no room for it
645    return 0;
646 }
647 
648 #ifndef STBI_NO_HDR
649 static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
650 static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp);
651 #endif
652 
653 #ifndef STBI_NO_STDIO
654 unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
655 {
656    FILE *f = fopen(filename, "rb");
657    unsigned char *result;
658    if (!f) return epuc("can't fopen", "Unable to open file");
659    result = stbi_load_from_file(f,x,y,comp,req_comp);
660    fclose(f);
661    return result;
662 }
663 
664 unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
665 {
666    int i;
667    if (stbi_jpeg_test_file(f)) return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
668    if (stbi_png_test_file(f))  return stbi_png_load_from_file(f,x,y,comp,req_comp);
669    if (stbi_bmp_test_file(f))  return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
670    if (stbi_gif_test_file(f))  return stbi_gif_load_from_file(f,x,y,comp,req_comp);
671    if (stbi_psd_test_file(f))  return stbi_psd_load_from_file(f,x,y,comp,req_comp);
672    if (stbi_pic_test_file(f))  return stbi_pic_load_from_file(f,x,y,comp,req_comp);
673 
674    #ifndef STBI_NO_HDR
675    if (stbi_hdr_test_file(f)) {
676       float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
677       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
678    }
679    #endif
680 
681    for (i=0; i < max_loaders; ++i)
682       if (loaders[i]->test_file(f))
683          return loaders[i]->load_from_file(f,x,y,comp,req_comp);
684    // test tga last because it's a crappy test!
685    if (stbi_tga_test_file(f))
686       return stbi_tga_load_from_file(f,x,y,comp,req_comp);
687    return epuc("unknown image type", "Image not of any known type, or corrupt");
688 }
689 #endif
690 
691 unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
692 {
693    int i;
694    if (stbi_jpeg_test_memory(buffer,len)) return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
695    if (stbi_png_test_memory(buffer,len))  return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
696    if (stbi_bmp_test_memory(buffer,len))  return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
697    if (stbi_gif_test_memory(buffer,len))  return stbi_gif_load_from_memory(buffer,len,x,y,comp,req_comp);
698    if (stbi_psd_test_memory(buffer,len))  return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
699    if (stbi_pic_test_memory(buffer,len))  return stbi_pic_load_from_memory(buffer,len,x,y,comp,req_comp);
700 
701    #ifndef STBI_NO_HDR
702    if (stbi_hdr_test_memory(buffer, len)) {
703       float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
704       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
705    }
706    #endif
707 
708    for (i=0; i < max_loaders; ++i)
709       if (loaders[i]->test_memory(buffer,len))
710          return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
711    // test tga last because it's a crappy test!
712    if (stbi_tga_test_memory(buffer,len))
713       return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
714    return epuc("unknown image type", "Image not of any known type, or corrupt");
715 }
716 
717 #ifndef STBI_NO_HDR
718 
719 #ifndef STBI_NO_STDIO
720 float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
721 {
722    FILE *f = fopen(filename, "rb");
723    float *result;
724    if (!f) return epf("can't fopen", "Unable to open file");
725    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
726    fclose(f);
727    return result;
728 }
729 
730 float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
731 {
732    unsigned char *data;
733    #ifndef STBI_NO_HDR
734    if (stbi_hdr_test_file(f))
735       return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
736    #endif
737    data = stbi_load_from_file(f, x, y, comp, req_comp);
738    if (data)
739       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
740    return epf("unknown image type", "Image not of any known type, or corrupt");
741 }
742 #endif
743 
744 float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
745 {
746    stbi_uc *data;
747    #ifndef STBI_NO_HDR
748    if (stbi_hdr_test_memory(buffer, len))
749       return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
750    #endif
751    data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
752    if (data)
753       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
754    return epf("unknown image type", "Image not of any known type, or corrupt");
755 }
756 #endif
757 
758 // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
759 // defined, for API simplicity; if STBI_NO_HDR is defined, it always
760 // reports false!
761 
762 int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
763 {
764    #ifndef STBI_NO_HDR
765    return stbi_hdr_test_memory(buffer, len);
766    #else
767    STBI_NOTUSED(buffer);
768    STBI_NOTUSED(len);
769    return 0;
770    #endif
771 }
772 
773 #ifndef STBI_NO_STDIO
774 extern int      stbi_is_hdr          (char const *filename)
775 {
776    FILE *f = fopen(filename, "rb");
777    int result=0;
778    if (f) {
779       result = stbi_is_hdr_from_file(f);
780       fclose(f);
781    }
782    return result;
783 }
784 
785 extern int      stbi_is_hdr_from_file(FILE *f)
786 {
787    #ifndef STBI_NO_HDR
788    return stbi_hdr_test_file(f);
789    #else
790    return 0;
791    #endif
792 }
793 
794 #endif
795 
796 #ifndef STBI_NO_HDR
797 static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
798 static float l2h_gamma=2.2f, l2h_scale=1.0f;
799 
800 void   stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
801 void   stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
802 
803 void   stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
804 void   stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
805 #endif
806 
807 
808 //////////////////////////////////////////////////////////////////////////////
809 //
810 // Common code used by all image loaders
811 //
812 
813 enum
814 {
815    SCAN_load=0,
816    SCAN_type,
817    SCAN_header
818 };
819 
820 typedef struct
821 {
822    uint32 img_x, img_y;
823    int img_n, img_out_n;
824 
825    #ifndef STBI_NO_STDIO
826    FILE  *img_file;
827    int buflen;
828    uint8 buffer_start[128];
829    int from_file;
830    #endif
831    uint8 const *img_buffer, *img_buffer_end;
832 } stbi;
833 
834 #ifndef STBI_NO_STDIO
835 static void start_file(stbi *s, FILE *f)
836 {
837    s->img_file = f;
838    s->buflen = sizeof(s->buffer_start);
839    s->img_buffer_end = s->buffer_start + s->buflen;
840    s->img_buffer = s->img_buffer_end;
841    s->from_file = 1;
842 }
843 #endif
844 
845 static void start_mem(stbi *s, uint8 const *buffer, int len)
846 {
847 #ifndef STBI_NO_STDIO
848    s->img_file = NULL;
849    s->from_file = 0;
850 #endif
851    s->img_buffer = (uint8 const *) buffer;
852    s->img_buffer_end = (uint8 const *) buffer+len;
853 }
854 
855 #ifndef STBI_NO_STDIO
856 static void refill_buffer(stbi *s)
857 {
858    int n = fread(s->buffer_start, 1, s->buflen, s->img_file);
859    if (n == 0) {
860       s->from_file = 0;
861       s->img_buffer = s->img_buffer_end-1;
862 #if 0
863       *s->img_buffer = 0;
864 #endif
865    } else {
866       s->img_buffer = s->buffer_start;
867       s->img_buffer_end = s->buffer_start + n;
868    }
869 }
870 #endif
871 
872 __forceinline static int get8(stbi *s)
873 {
874    if (s->img_buffer < s->img_buffer_end)
875       return *s->img_buffer++;
876 #ifndef STBI_NO_STDIO
877    if (s->from_file) {
878       refill_buffer(s);
879       return *s->img_buffer++;
880    }
881 #endif
882    return 0;
883 }
884 
885 __forceinline static int at_eof(stbi *s)
886 {
887 #ifndef STBI_NO_STDIO
888    if (s->img_file) {
889       if (!feof(s->img_file)) return 0;
890       // if feof() is true, check if buffer = end
891       // special case: we've only got the special 0 character at the end
892       if (s->from_file == 0) return 1;
893    }
894 #endif
895    return s->img_buffer >= s->img_buffer_end;
896 }
897 
898 __forceinline static uint8 get8u(stbi *s)
899 {
900    return (uint8) get8(s);
901 }
902 
903 static void skip(stbi *s, int n)
904 {
905 #ifndef STBI_NO_STDIO
906    if (s->img_file) {
907       int blen = s->img_buffer_end - s->img_buffer;
908       if (blen < n) {
909          s->img_buffer = s->img_buffer_end;
910          fseek(s->img_file, n - blen, SEEK_CUR);
911          return;
912       }
913    }
914 #endif
915    s->img_buffer += n;
916 }
917 
918 static int getn(stbi *s, stbi_uc *buffer, int n)
919 {
920 #ifndef STBI_NO_STDIO
921    if (s->img_file) {
922       int blen = s->img_buffer_end - s->img_buffer;
923       if (blen < n) {
924          int res;
925          memcpy(buffer, s->img_buffer, blen);
926          res = ((int) fread(buffer + blen, 1, n - blen, s->img_file) == (n-blen));
927          s->img_buffer = s->img_buffer_end;
928          return res;
929       }
930    }
931 #endif
932    if (s->img_buffer+n <= s->img_buffer_end) {
933       memcpy(buffer, s->img_buffer, n);
934       s->img_buffer += n;
935       return 1;
936    } else
937       return 0;
938 }
939 
940 static int get16(stbi *s)
941 {
942    int z = get8(s);
943    return (z << 8) + get8(s);
944 }
945 
946 static uint32 get32(stbi *s)
947 {
948    uint32 z = get16(s);
949    return (z << 16) + get16(s);
950 }
951 
952 static int get16le(stbi *s)
953 {
954    int z = get8(s);
955    return z + (get8(s) << 8);
956 }
957 
958 static uint32 get32le(stbi *s)
959 {
960    uint32 z = get16le(s);
961    return z + (get16le(s) << 16);
962 }
963 
964 //////////////////////////////////////////////////////////////////////////////
965 //
966 //  generic converter from built-in img_n to req_comp
967 //    individual types do this automatically as much as possible (e.g. jpeg
968 //    does all cases internally since it needs to colorspace convert anyway,
969 //    and it never has alpha, so very few cases ). png can automatically
970 //    interleave an alpha=255 channel, but falls back to this for other cases
971 //
972 //  assume data buffer is malloced, so malloc a new one and free that one
973 //  only failure mode is malloc failing
974 
975 static uint8 compute_y(int r, int g, int b)
976 {
977    return (uint8) (((r*77) + (g*150) +  (29*b)) >> 8);
978 }
979 
980 static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
981 {
982    int i,j;
983    unsigned char *good;
984 
985    if (req_comp == img_n) return data;
986    assert(req_comp >= 1 && req_comp <= 4);
987 
988    good = MALLOC(req_comp * x * y);
989    if (good == NULL) {
990       FREE(data);
991       return epuc("outofmem", "Out of memory");
992    }
993 
994    for (j=0; j < (int) y; ++j) {
995       unsigned char *src  = data + j * x * img_n   ;
996       unsigned char *dest = good + j * x * req_comp;
997 
998       #define COMBO(a,b)  ((a)*8+(b))
999       #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1000       // convert source image with img_n components to one with req_comp components;
1001       // avoid switch per pixel, so use switch per scanline and massive macros
1002       switch (COMBO(img_n, req_comp)) {
1003          CASE(1,2) dest[0]=src[0], dest[1]=255;
1004 		break;
1005          CASE(1,3) dest[0]=dest[1]=dest[2]=src[0];
1006 		break;
1007          CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;
1008 		break;
1009          CASE(2,1) dest[0]=src[0];
1010 		break;
1011          CASE(2,3) dest[0]=dest[1]=dest[2]=src[0];
1012 		break;
1013          CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];
1014 		break;
1015          CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;
1016 		break;
1017          CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]);
1018 		break;
1019          CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255;
1020 		break;
1021          CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]);
1022 		break;
1023          CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3];
1024 		break;
1025          CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];
1026 		break;
1027          default: assert(0);
1028       }
1029       #undef CASE
1030    }
1031 
1032    FREE(data);
1033    return good;
1034 }
1035 
1036 #ifndef STBI_NO_HDR
1037 static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1038 {
1039    int i,k,n;
1040    float *output = MALLOC(x * y * comp * sizeof(float));
1041    if (output == NULL) { FREE(data); return epf("outofmem", "Out of memory"); }
1042    // compute number of non-alpha components
1043    if (comp & 1) n = comp; else n = comp-1;
1044    for (i=0; i < x*y; ++i) {
1045       for (k=0; k < n; ++k) {
1046          output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
1047       }
1048       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1049    }
1050    FREE(data);
1051    return output;
1052 }
1053 
1054 #define float2int(x)   ((int) (x))
1055 static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp)
1056 {
1057    int i,k,n;
1058    stbi_uc *output = MALLOC(x * y * comp);
1059    if (output == NULL) { FREE(data); return epuc("outofmem", "Out of memory"); }
1060    // compute number of non-alpha components
1061    if (comp & 1) n = comp; else n = comp-1;
1062    for (i=0; i < x*y; ++i) {
1063       for (k=0; k < n; ++k) {
1064          float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
1065          if (z < 0) z = 0;
1066          if (z > 255) z = 255;
1067          output[i*comp + k] = (uint8) float2int(z);
1068       }
1069       if (k < comp) {
1070          float z = data[i*comp+k] * 255 + 0.5f;
1071          if (z < 0) z = 0;
1072          if (z > 255) z = 255;
1073          output[i*comp + k] = (uint8) float2int(z);
1074       }
1075    }
1076    FREE(data);
1077    return output;
1078 }
1079 #endif
1080 
1081 //////////////////////////////////////////////////////////////////////////////
1082 //
1083 //  "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
1084 //
1085 //    simple implementation
1086 //      - channel subsampling of at most 2 in each dimension
1087 //      - doesn't support delayed output of y-dimension
1088 //      - simple interface (only one output format: 8-bit interleaved RGB)
1089 //      - doesn't try to recover corrupt jpegs
1090 //      - doesn't allow partial loading, loading multiple at once
1091 //      - still fast on x86 (copying globals into locals doesn't help x86)
1092 //      - allocates lots of intermediate memory (full size of all components)
1093 //        - non-interleaved case requires this anyway
1094 //        - allows good upsampling (see next)
1095 //    high-quality
1096 //      - upsampled channels are bilinearly interpolated, even across blocks
1097 //      - quality integer IDCT derived from IJG's 'slow'
1098 //    performance
1099 //      - fast huffman; reasonable integer IDCT
1100 //      - uses a lot of intermediate memory, could cache poorly
1101 //      - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
1102 //          stb_jpeg:   1.34 seconds (MSVC6, default release build)
1103 //          stb_jpeg:   1.06 seconds (MSVC6, processor = Pentium Pro)
1104 //          IJL11.dll:  1.08 seconds (compiled by intel)
1105 //          IJG 1998:   0.98 seconds (MSVC6, makefile provided by IJG)
1106 //          IJG 1998:   0.95 seconds (MSVC6, makefile + proc=PPro)
1107 
1108 // huffman decoding acceleration
1109 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1110 
1111 typedef struct
1112 {
1113    uint8  fast[1 << FAST_BITS];
1114    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1115    uint16 code[256];
1116    uint8  values[256];
1117    uint8  size[257];
1118    unsigned int maxcode[18];
1119    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1120 } huffman;
1121 
1122 typedef struct
1123 {
1124    #ifdef STBI_SIMD
1125    unsigned short dequant2[4][64];
1126    #endif
1127    stbi s;
1128    huffman huff_dc[4];
1129    huffman huff_ac[4];
1130    uint8 dequant[4][64];
1131 
1132 // sizes for components, interleaved MCUs
1133    int img_h_max, img_v_max;
1134    int img_mcu_x, img_mcu_y;
1135    int img_mcu_w, img_mcu_h;
1136 
1137 // definition of jpeg image component
1138    struct
1139    {
1140       int id;
1141       int h,v;
1142       int tq;
1143       int hd,ha;
1144       int dc_pred;
1145 
1146       int x,y,w2,h2;
1147       uint8 *data;
1148       void *raw_data;
1149       uint8 *linebuf;
1150    } img_comp[4];
1151 
1152    uint32         code_buffer; // jpeg entropy-coded buffer
1153    int            code_bits;   // number of valid bits
1154    unsigned char  marker;      // marker seen while filling entropy buffer
1155    int            nomore;      // flag if we saw a marker so must stop
1156 
1157    int scan_n, order[4];
1158    int restart_interval, todo;
1159 } jpeg;
1160 
1161 static int build_huffman(huffman *h, int *count)
1162 {
1163    int i,j,k=0,code;
1164    // build size list for each symbol (from JPEG spec)
1165    for (i=0; i < 16; ++i)
1166       for (j=0; j < count[i]; ++j)
1167          h->size[k++] = (uint8) (i+1);
1168    h->size[k] = 0;
1169 
1170    // compute actual symbols (from jpeg spec)
1171    code = 0;
1172    k = 0;
1173    for(j=1; j <= 16; ++j) {
1174       // compute delta to add to code to compute symbol id
1175       h->delta[j] = k - code;
1176       if (h->size[k] == j) {
1177          while (h->size[k] == j)
1178             h->code[k++] = (uint16) (code++);
1179          if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
1180       }
1181       // compute largest code + 1 for this size, preshifted as needed later
1182       h->maxcode[j] = code << (16-j);
1183       code <<= 1;
1184    }
1185    h->maxcode[j] = 0xffffffff;
1186 
1187    // build non-spec acceleration table; 255 is flag for not-accelerated
1188    memset(h->fast, 255, 1 << FAST_BITS);
1189    for (i=0; i < k; ++i) {
1190       int s = h->size[i];
1191       if (s <= FAST_BITS) {
1192          int c = h->code[i] << (FAST_BITS-s);
1193          int m = 1 << (FAST_BITS-s);
1194          for (j=0; j < m; ++j) {
1195             h->fast[c+j] = (uint8) i;
1196          }
1197       }
1198    }
1199    return 1;
1200 }
1201 
1202 static void grow_buffer_unsafe(jpeg *j)
1203 {
1204    do {
1205       int b = j->nomore ? 0 : get8(&j->s);
1206       if (b == 0xff) {
1207          int c = get8(&j->s);
1208          if (c != 0) {
1209             j->marker = (unsigned char) c;
1210             j->nomore = 1;
1211             return;
1212          }
1213       }
1214       j->code_buffer |= b << (24 - j->code_bits);
1215       j->code_bits += 8;
1216    } while (j->code_bits <= 24);
1217 }
1218 
1219 // (1 << n) - 1
1220 static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1221 
1222 // decode a jpeg huffman value from the bitstream
1223 __forceinline static int decode(jpeg *j, huffman *h)
1224 {
1225    unsigned int temp;
1226    int c,k;
1227 
1228    if (j->code_bits < 16) grow_buffer_unsafe(j);
1229 
1230    // look at the top FAST_BITS and determine what symbol ID it is,
1231    // if the code is <= FAST_BITS
1232    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1233    k = h->fast[c];
1234    if (k < 255) {
1235       int s = h->size[k];
1236       if (s > j->code_bits)
1237          return -1;
1238       j->code_buffer <<= s;
1239       j->code_bits -= s;
1240       return h->values[k];
1241    }
1242 
1243    // naive test is to shift the code_buffer down so k bits are
1244    // valid, then test against maxcode. To speed this up, we've
1245    // preshifted maxcode left so that it has (16-k) 0s at the
1246    // end; in other words, regardless of the number of bits, it
1247    // wants to be compared against something shifted to have 16;
1248    // that way we don't need to shift inside the loop.
1249    temp = j->code_buffer >> 16;
1250    for (k=FAST_BITS+1 ; ; ++k)
1251       if (temp < h->maxcode[k])
1252          break;
1253    if (k == 17) {
1254       // error! code not found
1255       j->code_bits -= 16;
1256       return -1;
1257    }
1258 
1259    if (k > j->code_bits)
1260       return -1;
1261 
1262    // convert the huffman code to the symbol id
1263    c = ((j->code_buffer >> (32 - k)) & bmask[k]) + h->delta[k];
1264    assert((((j->code_buffer) >> (32 - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
1265 
1266    // convert the id to a symbol
1267    j->code_bits -= k;
1268    j->code_buffer <<= k;
1269    return h->values[c];
1270 }
1271 
1272 // combined JPEG 'receive' and JPEG 'extend', since baseline
1273 // always extends everything it receives.
1274 __forceinline static int extend_receive(jpeg *j, int n)
1275 {
1276    unsigned int m = 1 << (n-1);
1277    unsigned int k;
1278    if (j->code_bits < n) grow_buffer_unsafe(j);
1279 
1280    #if 1
1281    k = stbi_lrot(j->code_buffer, n);
1282    j->code_buffer = k & ~bmask[n];
1283    k &= bmask[n];
1284    j->code_bits -= n;
1285    #else
1286    k = (j->code_buffer >> (32 - n)) & bmask[n];
1287    j->code_bits -= n;
1288    j->code_buffer <<= n;
1289    #endif
1290    // the following test is probably a random branch that won't
1291    // predict well. I tried to table accelerate it but failed.
1292    // maybe it's compiling as a conditional move?
1293    if (k < m)
1294       return (UINT_MAX << n) + k + 1;
1295    else
1296       return k;
1297 }
1298 
1299 // given a value that's at position X in the zigzag stream,
1300 // where does it appear in the 8x8 matrix coded as row-major?
1301 static uint8 dezigzag[64+15] =
1302 {
1303     0,  1,  8, 16,  9,  2,  3, 10,
1304    17, 24, 32, 25, 18, 11,  4,  5,
1305    12, 19, 26, 33, 40, 48, 41, 34,
1306    27, 20, 13,  6,  7, 14, 21, 28,
1307    35, 42, 49, 56, 57, 50, 43, 36,
1308    29, 22, 15, 23, 30, 37, 44, 51,
1309    58, 59, 52, 45, 38, 31, 39, 46,
1310    53, 60, 61, 54, 47, 55, 62, 63,
1311    // let corrupt input sample past end
1312    63, 63, 63, 63, 63, 63, 63, 63,
1313    63, 63, 63, 63, 63, 63, 63
1314 };
1315 
1316 // decode one 64-entry block--
1317 static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
1318 {
1319    int diff,dc,k;
1320    int t = decode(j, hdc);
1321    if (t < 0) return e("bad huffman code","Corrupt JPEG");
1322 
1323    // 0 all the ac values now so we can do it 32-bits at a time
1324    memset(data,0,64*sizeof(data[0]));
1325 
1326    diff = t ? extend_receive(j, t) : 0;
1327    dc = j->img_comp[b].dc_pred + diff;
1328    j->img_comp[b].dc_pred = dc;
1329    data[0] = (short) dc;
1330 
1331    // decode AC components, see JPEG spec
1332    k = 1;
1333    do {
1334       int r,s;
1335       int rs = decode(j, hac);
1336       if (rs < 0) return e("bad huffman code","Corrupt JPEG");
1337       s = rs & 15;
1338       r = rs >> 4;
1339       if (s == 0) {
1340          if (rs != 0xf0) break; // end block
1341          k += 16;
1342       } else {
1343          k += r;
1344          // decode into unzigzag'd location
1345          data[dezigzag[k++]] = (short) extend_receive(j,s);
1346       }
1347    } while (k < 64);
1348    return 1;
1349 }
1350 
1351 // take a -128..127 value and clamp it and convert to 0..255
1352 __forceinline static uint8 clamp(int x)
1353 {
1354    // trick to use a single test to catch both cases
1355    if ((unsigned int) x > 255) {
1356       if (x < 0) return 0;
1357       if (x > 255) return 255;
1358    }
1359    return (uint8) x;
1360 }
1361 
1362 #define f2f(x)  (int) (((x) * 4096 + 0.5))
1363 #define fsh(x)  ((x) << 12)
1364 
1365 // derived from jidctint -- DCT_ISLOW
1366 #define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7)       \
1367    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1368    p2 = s2;                                    \
1369    p3 = s6;                                    \
1370    p1 = (p2+p3) * f2f(0.5411961f);             \
1371    t2 = p1 + p3*f2f(-1.847759065f);            \
1372    t3 = p1 + p2*f2f( 0.765366865f);            \
1373    p2 = s0;                                    \
1374    p3 = s4;                                    \
1375    t0 = fsh(p2+p3);                            \
1376    t1 = fsh(p2-p3);                            \
1377    x0 = t0+t3;                                 \
1378    x3 = t0-t3;                                 \
1379    x1 = t1+t2;                                 \
1380    x2 = t1-t2;                                 \
1381    t0 = s7;                                    \
1382    t1 = s5;                                    \
1383    t2 = s3;                                    \
1384    t3 = s1;                                    \
1385    p3 = t0+t2;                                 \
1386    p4 = t1+t3;                                 \
1387    p1 = t0+t3;                                 \
1388    p2 = t1+t2;                                 \
1389    p5 = (p3+p4)*f2f( 1.175875602f);            \
1390    t0 = t0*f2f( 0.298631336f);                 \
1391    t1 = t1*f2f( 2.053119869f);                 \
1392    t2 = t2*f2f( 3.072711026f);                 \
1393    t3 = t3*f2f( 1.501321110f);                 \
1394    p1 = p5 + p1*f2f(-0.899976223f);            \
1395    p2 = p5 + p2*f2f(-2.562915447f);            \
1396    p3 = p3*f2f(-1.961570560f);                 \
1397    p4 = p4*f2f(-0.390180644f);                 \
1398    t3 += p1+p4;                                \
1399    t2 += p2+p3;                                \
1400    t1 += p2+p4;                                \
1401    t0 += p1+p3;
1402 
1403 #ifdef STBI_SIMD
1404 typedef unsigned short stbi_dequantize_t;
1405 #else
1406 typedef uint8 stbi_dequantize_t;
1407 #endif
1408 
1409 // .344 seconds on 3*anemones.jpg
1410 static void idct_block(uint8 *out, int out_stride, short data[64], stbi_dequantize_t *dequantize)
1411 {
1412    int i,val[64],*v=val;
1413    stbi_dequantize_t *dq = dequantize;
1414    uint8 *o;
1415    short *d = data;
1416 
1417    // columns
1418    for (i=0; i < 8; ++i,++d,++dq, ++v) {
1419       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1420       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1421            && d[40]==0 && d[48]==0 && d[56]==0) {
1422          //    no shortcut                 0     seconds
1423          //    (1|2|3|4|5|6|7)==0          0     seconds
1424          //    all separate               -0.047 seconds
1425          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1426          int dcterm = d[0] * dq[0] << 2;
1427          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1428       } else {
1429          IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
1430                  d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
1431          // constants scaled things up by 1<<12; let's bring them back
1432          // down, but keep 2 extra bits of precision
1433          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1434          v[ 0] = (x0+t3) >> 10;
1435          v[56] = (x0-t3) >> 10;
1436          v[ 8] = (x1+t2) >> 10;
1437          v[48] = (x1-t2) >> 10;
1438          v[16] = (x2+t1) >> 10;
1439          v[40] = (x2-t1) >> 10;
1440          v[24] = (x3+t0) >> 10;
1441          v[32] = (x3-t0) >> 10;
1442       }
1443    }
1444 
1445    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1446       // no fast case since the first 1D IDCT spread components out
1447       IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1448       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1449       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1450       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1451       // so we want to round that, which means adding 0.5 * 1<<17,
1452       // aka 65536. Also, we'll end up with -128 to 127 that we want
1453       // to encode as 0..255 by adding 128, so we'll add that before the shift
1454       x0 += 65536 + (128<<17);
1455       x1 += 65536 + (128<<17);
1456       x2 += 65536 + (128<<17);
1457       x3 += 65536 + (128<<17);
1458       // tried computing the shifts into temps, or'ing the temps to see
1459       // if any were out of range, but that was slower
1460       o[0] = clamp((x0+t3) >> 17);
1461       o[7] = clamp((x0-t3) >> 17);
1462       o[1] = clamp((x1+t2) >> 17);
1463       o[6] = clamp((x1-t2) >> 17);
1464       o[2] = clamp((x2+t1) >> 17);
1465       o[5] = clamp((x2-t1) >> 17);
1466       o[3] = clamp((x3+t0) >> 17);
1467       o[4] = clamp((x3-t0) >> 17);
1468    }
1469 }
1470 
1471 #ifdef STBI_SIMD
1472 static stbi_idct_8x8 stbi_idct_installed = idct_block;
1473 
1474 extern void stbi_install_idct(stbi_idct_8x8 func)
1475 {
1476    stbi_idct_installed = func;
1477 }
1478 #endif
1479 
1480 #define MARKER_none  0xff
1481 // if there's a pending marker from the entropy stream, return that
1482 // otherwise, fetch from the stream and get a marker. if there's no
1483 // marker, return 0xff, which is never a valid marker value
1484 static uint8 get_marker(jpeg *j)
1485 {
1486    uint8 x;
1487    if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; }
1488    x = get8u(&j->s);
1489    if (x != 0xff) return MARKER_none;
1490    while (x == 0xff)
1491       x = get8u(&j->s);
1492    return x;
1493 }
1494 
1495 // in each scan, we'll have scan_n components, and the order
1496 // of the components is specified by order[]
1497 #define RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
1498 
1499 // after a restart interval, reset the entropy decoder and
1500 // the dc prediction
1501 static void reset(jpeg *j)
1502 {
1503    j->code_bits = 0;
1504    j->code_buffer = 0;
1505    j->nomore = 0;
1506    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1507    j->marker = MARKER_none;
1508    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1509    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1510    // since we don't even allow 1<<30 pixels
1511 }
1512 
1513 static int parse_entropy_coded_data(jpeg *z)
1514 {
1515    reset(z);
1516    if (z->scan_n == 1) {
1517       int i,j;
1518       #ifdef STBI_SIMD
1519       __declspec(align(16))
1520       #endif
1521       short data[64];
1522       int n = z->order[0];
1523       // non-interleaved data, we just need to process one block at a time,
1524       // in trivial scanline order
1525       // number of blocks to do just depends on how many actual "pixels" this
1526       // component has, independent of interleaved MCU blocking and such
1527       int w = (z->img_comp[n].x+7) >> 3;
1528       int h = (z->img_comp[n].y+7) >> 3;
1529       for (j=0; j < h; ++j) {
1530          for (i=0; i < w; ++i) {
1531             if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1532             #ifdef STBI_SIMD
1533             stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1534             #else
1535             idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1536             #endif
1537             // every data block is an MCU, so countdown the restart interval
1538             if (--z->todo <= 0) {
1539                if (z->code_bits < 24) grow_buffer_unsafe(z);
1540                // if it's NOT a restart, then just bail, so we get corrupt data
1541                // rather than no data
1542                if (!RESTART(z->marker)) return 1;
1543                reset(z);
1544             }
1545          }
1546       }
1547    } else { // interleaved!
1548       int i,j,k,x,y;
1549       short data[64];
1550       for (j=0; j < z->img_mcu_y; ++j) {
1551          for (i=0; i < z->img_mcu_x; ++i) {
1552             // scan an interleaved mcu... process scan_n components in order
1553             for (k=0; k < z->scan_n; ++k) {
1554                int n = z->order[k];
1555                // scan out an mcu's worth of this component; that's just determined
1556                // by the basic H and V specified for the component
1557                for (y=0; y < z->img_comp[n].v; ++y) {
1558                   for (x=0; x < z->img_comp[n].h; ++x) {
1559                      int x2 = (i*z->img_comp[n].h + x)*8;
1560                      int y2 = (j*z->img_comp[n].v + y)*8;
1561                      if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1562                      #ifdef STBI_SIMD
1563                      stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1564                      #else
1565                      idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1566                      #endif
1567                   }
1568                }
1569             }
1570             // after all interleaved components, that's an interleaved MCU,
1571             // so now count down the restart interval
1572             if (--z->todo <= 0) {
1573                if (z->code_bits < 24) grow_buffer_unsafe(z);
1574                // if it's NOT a restart, then just bail, so we get corrupt data
1575                // rather than no data
1576                if (!RESTART(z->marker)) return 1;
1577                reset(z);
1578             }
1579          }
1580       }
1581    }
1582    return 1;
1583 }
1584 
1585 static int process_marker(jpeg *z, int marker)
1586 {
1587    int L;
1588    switch (marker) {
1589       case MARKER_none: // no marker found
1590          return e("expected marker","Corrupt JPEG");
1591 
1592       case 0xC2: // SOF - progressive
1593          return e("progressive jpeg","JPEG format not supported (progressive)");
1594 
1595       case 0xDD: // DRI - specify restart interval
1596          if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG");
1597          z->restart_interval = get16(&z->s);
1598          return 1;
1599 
1600       case 0xDB: // DQT - define quantization table
1601          L = get16(&z->s)-2;
1602          while (L > 0) {
1603             int q = get8(&z->s);
1604             int p = q >> 4;
1605             int t = q & 15,i;
1606             if (p != 0) return e("bad DQT type","Corrupt JPEG");
1607             if (t > 3) return e("bad DQT table","Corrupt JPEG");
1608             for (i=0; i < 64; ++i)
1609                z->dequant[t][dezigzag[i]] = get8u(&z->s);
1610             #ifdef STBI_SIMD
1611             for (i=0; i < 64; ++i)
1612                z->dequant2[t][i] = z->dequant[t][i];
1613             #endif
1614             L -= 65;
1615          }
1616          return L==0;
1617 
1618       case 0xC4: // DHT - define huffman table
1619          L = get16(&z->s)-2;
1620          while (L > 0) {
1621             uint8 *v;
1622             int sizes[16],i,m=0;
1623             int q = get8(&z->s);
1624             int tc = q >> 4;
1625             int th = q & 15;
1626             if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
1627             for (i=0; i < 16; ++i) {
1628                sizes[i] = get8(&z->s);
1629                m += sizes[i];
1630             }
1631             L -= 17;
1632             if (tc == 0) {
1633                if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1634                v = z->huff_dc[th].values;
1635             } else {
1636                if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1637                v = z->huff_ac[th].values;
1638             }
1639             for (i=0; i < m; ++i)
1640                v[i] = get8u(&z->s);
1641             L -= m;
1642          }
1643          return L==0;
1644    }
1645    // check for comment block or APP blocks
1646    if ((marker >= 0xE0 && marker <= 0xEF) || marker == 0xFE) {
1647       skip(&z->s, get16(&z->s)-2);
1648       return 1;
1649    }
1650    return 0;
1651 }
1652 
1653 // after we see SOS
1654 static int process_scan_header(jpeg *z)
1655 {
1656    int i;
1657    int Ls = get16(&z->s);
1658    z->scan_n = get8(&z->s);
1659    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG");
1660    if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG");
1661    for (i=0; i < z->scan_n; ++i) {
1662       int id = get8(&z->s), which;
1663       int q = get8(&z->s);
1664       for (which = 0; which < z->s.img_n; ++which)
1665          if (z->img_comp[which].id == id)
1666             break;
1667       if (which == z->s.img_n) return 0;
1668       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
1669       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
1670       z->order[i] = which;
1671    }
1672    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1673    get8(&z->s); // should be 63, but might be 0
1674    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1675 
1676    return 1;
1677 }
1678 
1679 static int process_frame_header(jpeg *z, int scan)
1680 {
1681    stbi *s = &z->s;
1682    int Lf,p,i,q, h_max=1,v_max=1,c;
1683    Lf = get16(s);         if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
1684    p  = get8(s);          if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
1685    s->img_y = get16(s);   if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1686    s->img_x = get16(s);   if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
1687    c = get8(s);
1688    if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG");    // JFIF requires
1689    s->img_n = c;
1690    for (i=0; i < c; ++i) {
1691       z->img_comp[i].data = NULL;
1692       z->img_comp[i].linebuf = NULL;
1693    }
1694 
1695    if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG");
1696 
1697    for (i=0; i < s->img_n; ++i) {
1698       z->img_comp[i].id = get8(s);
1699       if (z->img_comp[i].id != i+1)   // JFIF requires
1700          if (z->img_comp[i].id != i)  // some version of jpegtran outputs non-JFIF-compliant files!
1701             return e("bad component ID","Corrupt JPEG");
1702       q = get8(s);
1703       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
1704       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
1705       z->img_comp[i].tq = get8(s);  if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
1706    }
1707 
1708    if (scan != SCAN_load) return 1;
1709 
1710    if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1711 
1712    for (i=0; i < s->img_n; ++i) {
1713       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1714       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1715    }
1716 
1717    // compute interleaved mcu info
1718    z->img_h_max = h_max;
1719    z->img_v_max = v_max;
1720    z->img_mcu_w = h_max * 8;
1721    z->img_mcu_h = v_max * 8;
1722    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1723    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1724 
1725    for (i=0; i < s->img_n; ++i) {
1726       // number of effective pixels (e.g. for non-interleaved MCU)
1727       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1728       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1729       // to simplify generation, we'll allocate enough memory to decode
1730       // the bogus oversized data from using interleaved MCUs and their
1731       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1732       // discard the extra data until colorspace conversion
1733       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1734       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1735       z->img_comp[i].raw_data = MALLOC(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1736       if (z->img_comp[i].raw_data == NULL) {
1737          for(--i; i >= 0; --i) {
1738             FREE(z->img_comp[i].raw_data);
1739             z->img_comp[i].data = NULL;
1740          }
1741          return e("outofmem", "Out of memory");
1742       }
1743       // align blocks for installable-idct using mmx/sse
1744       z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1745       z->img_comp[i].linebuf = NULL;
1746    }
1747 
1748    return 1;
1749 }
1750 
1751 // use comparisons since in some cases we handle more than one case (e.g. SOF)
1752 #define DNL(x)         ((x) == 0xdc)
1753 #define SOI(x)         ((x) == 0xd8)
1754 #define EOI(x)         ((x) == 0xd9)
1755 #define SOF(x)         ((x) == 0xc0 || (x) == 0xc1)
1756 #define SOS(x)         ((x) == 0xda)
1757 
1758 static int decode_jpeg_header(jpeg *z, int scan)
1759 {
1760    int m;
1761    z->marker = MARKER_none; // initialize cached marker to empty
1762    m = get_marker(z);
1763    if (!SOI(m)) return e("no SOI","Corrupt JPEG");
1764    if (scan == SCAN_type) return 1;
1765    m = get_marker(z);
1766    while (!SOF(m)) {
1767       if (!process_marker(z,m)) return 0;
1768       m = get_marker(z);
1769       while (m == MARKER_none) {
1770          // some files have extra padding after their blocks, so ok, we'll scan
1771          if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG");
1772          m = get_marker(z);
1773       }
1774    }
1775    if (!process_frame_header(z, scan)) return 0;
1776    return 1;
1777 }
1778 
1779 static int decode_jpeg_image(jpeg *j)
1780 {
1781    int m;
1782    j->restart_interval = 0;
1783    if (!decode_jpeg_header(j, SCAN_load)) return 0;
1784    m = get_marker(j);
1785    while (!EOI(m)) {
1786       if (SOS(m)) {
1787          if (!process_scan_header(j)) return 0;
1788          if (!parse_entropy_coded_data(j)) return 0;
1789          if (j->marker == MARKER_none ) {
1790             // handle 0s at the end of image data from IP Kamera 9060
1791             while (!at_eof(&j->s)) {
1792                int x = get8(&j->s);
1793                if (x == 255) {
1794                   j->marker = get8u(&j->s);
1795                   break;
1796                } else if (x != 0) {
1797                   return 0;
1798                }
1799             }
1800             // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0
1801          }
1802       } else {
1803          if (!process_marker(j, m)) return 0;
1804       }
1805       m = get_marker(j);
1806    }
1807    return 1;
1808 }
1809 
1810 // static jfif-centered resampling (across block boundaries)
1811 
1812 typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1813                                     int w, int hs);
1814 
1815 #define div4(x) ((uint8) ((x) >> 2))
1816 
1817 static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1818 {
1819    STBI_NOTUSED(out);
1820    STBI_NOTUSED(in_far);
1821    STBI_NOTUSED(w);
1822    STBI_NOTUSED(hs);
1823    return in_near;
1824 }
1825 
1826 static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1827 {
1828    // need to generate two samples vertically for every one in input
1829    int i;
1830    STBI_NOTUSED(hs);
1831    for (i=0; i < w; ++i)
1832       out[i] = div4(3*in_near[i] + in_far[i] + 2);
1833    return out;
1834 }
1835 
1836 static uint8*  resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1837 {
1838    // need to generate two samples horizontally for every one in input
1839    int i;
1840    uint8 *input = in_near;
1841 
1842    if (w == 1) {
1843       // if only one sample, can't do any interpolation
1844       out[0] = out[1] = input[0];
1845       return out;
1846    }
1847 
1848    out[0] = input[0];
1849    out[1] = div4(input[0]*3 + input[1] + 2);
1850    for (i=1; i < w-1; ++i) {
1851       int n = 3*input[i]+2;
1852       out[i*2+0] = div4(n+input[i-1]);
1853       out[i*2+1] = div4(n+input[i+1]);
1854    }
1855    out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2);
1856    out[i*2+1] = input[w-1];
1857 
1858    STBI_NOTUSED(in_far);
1859    STBI_NOTUSED(hs);
1860 
1861    return out;
1862 }
1863 
1864 #define div16(x) ((uint8) ((x) >> 4))
1865 
1866 static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1867 {
1868    // need to generate 2x2 samples for every one in input
1869    int i,t0,t1;
1870    if (w == 1) {
1871       out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2);
1872       return out;
1873    }
1874 
1875    t1 = 3*in_near[0] + in_far[0];
1876    out[0] = div4(t1+2);
1877    for (i=1; i < w; ++i) {
1878       t0 = t1;
1879       t1 = 3*in_near[i]+in_far[i];
1880       out[i*2-1] = div16(3*t0 + t1 + 8);
1881       out[i*2  ] = div16(3*t1 + t0 + 8);
1882    }
1883    out[w*2-1] = div4(t1+2);
1884 
1885    STBI_NOTUSED(hs);
1886 
1887    return out;
1888 }
1889 
1890 static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1891 {
1892    // resample with nearest-neighbor
1893    int i,j;
1894    in_far = in_far;
1895    for (i=0; i < w; ++i)
1896       for (j=0; j < hs; ++j)
1897          out[i*hs+j] = in_near[i];
1898    return out;
1899 }
1900 
1901 #define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
1902 
1903 // 0.38 seconds on 3*anemones.jpg   (0.25 with processor = Pro)
1904 // VC6 without processor=Pro is generating multiple LEAs per multiply!
1905 static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1906 {
1907    int i;
1908    for (i=0; i < count; ++i) {
1909       int y_fixed = (y[i] << 16) + 32768; // rounding
1910       int r,g,b;
1911       int cr = pcr[i] - 128;
1912       int cb = pcb[i] - 128;
1913       r = y_fixed + cr*float2fixed(1.40200f);
1914       g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
1915       b = y_fixed                            + cb*float2fixed(1.77200f);
1916       r >>= 16;
1917       g >>= 16;
1918       b >>= 16;
1919       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1920       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1921       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1922       out[0] = (uint8)r;
1923       out[1] = (uint8)g;
1924       out[2] = (uint8)b;
1925       out[3] = 255;
1926       out += step;
1927    }
1928 }
1929 
1930 #ifdef STBI_SIMD
1931 static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1932 
1933 void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1934 {
1935    stbi_YCbCr_installed = func;
1936 }
1937 #endif
1938 
1939 
1940 // clean up the temporary component buffers
1941 static void cleanup_jpeg(jpeg *j)
1942 {
1943    int i;
1944    for (i=0; i < j->s.img_n; ++i) {
1945       if (j->img_comp[i].data) {
1946          FREE(j->img_comp[i].raw_data);
1947          j->img_comp[i].data = NULL;
1948       }
1949       if (j->img_comp[i].linebuf) {
1950          FREE(j->img_comp[i].linebuf);
1951          j->img_comp[i].linebuf = NULL;
1952       }
1953    }
1954 }
1955 
1956 typedef struct
1957 {
1958    resample_row_func resample;
1959    uint8 *line0,*line1;
1960    int hs,vs;   // expansion factor in each axis
1961    int w_lores; // horizontal pixels pre-expansion
1962    int ystep;   // how far through vertical expansion we are
1963    int ypos;    // which pre-expansion row we're on
1964 } stbi_resample;
1965 
1966 static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1967 {
1968    int n, decode_n;
1969    // validate req_comp
1970    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1971    z->s.img_n = 0;
1972 
1973    // load a jpeg image from whichever source
1974    if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; }
1975 
1976    // determine actual number of components to generate
1977    n = req_comp ? req_comp : z->s.img_n;
1978 
1979    if (z->s.img_n == 3 && n < 3)
1980       decode_n = 1;
1981    else
1982       decode_n = z->s.img_n;
1983 
1984    // resample and color-convert
1985    {
1986       int k;
1987       uint i,j;
1988       uint8 *output;
1989       uint8 *coutput[4];
1990 
1991       stbi_resample res_comp[4];
1992 
1993       for (k=0; k < decode_n; ++k) {
1994          stbi_resample *r = &res_comp[k];
1995 
1996          // allocate line buffer big enough for upsampling off the edges
1997          // with upsample factor of 4
1998          z->img_comp[k].linebuf = MALLOC(z->s.img_x + 3);
1999          if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
2000 
2001          r->hs      = z->img_h_max / z->img_comp[k].h;
2002          r->vs      = z->img_v_max / z->img_comp[k].v;
2003          r->ystep   = r->vs >> 1;
2004          r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
2005          r->ypos    = 0;
2006          r->line0   = r->line1 = z->img_comp[k].data;
2007 
2008          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
2009          else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
2010          else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
2011          else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
2012          else                               r->resample = resample_row_generic;
2013       }
2014 
2015       // can't error after this so, this is safe
2016       output = MALLOC(n * z->s.img_x * z->s.img_y + 1);
2017       if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
2018 
2019       // now go ahead and resample
2020       for (j=0; j < z->s.img_y; ++j) {
2021          uint8 *out = output + n * z->s.img_x * j;
2022          for (k=0; k < decode_n; ++k) {
2023             stbi_resample *r = &res_comp[k];
2024             int y_bot = r->ystep >= (r->vs >> 1);
2025             coutput[k] = r->resample(z->img_comp[k].linebuf,
2026                                      y_bot ? r->line1 : r->line0,
2027                                      y_bot ? r->line0 : r->line1,
2028                                      r->w_lores, r->hs);
2029             if (++r->ystep >= r->vs) {
2030                r->ystep = 0;
2031                r->line0 = r->line1;
2032                if (++r->ypos < z->img_comp[k].y)
2033                   r->line1 += z->img_comp[k].w2;
2034             }
2035          }
2036          if (n >= 3) {
2037             uint8 *y = coutput[0];
2038             if (z->s.img_n == 3) {
2039                #ifdef STBI_SIMD
2040                stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
2041                #else
2042                YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
2043                #endif
2044             } else
2045                for (i=0; i < z->s.img_x; ++i) {
2046                   out[0] = out[1] = out[2] = y[i];
2047                   out[3] = 255; // not used if n==3
2048                   out += n;
2049                }
2050          } else {
2051             uint8 *y = coutput[0];
2052             if (n == 1)
2053                for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
2054             else
2055                for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
2056          }
2057       }
2058       cleanup_jpeg(z);
2059       *out_x = z->s.img_x;
2060       *out_y = z->s.img_y;
2061       if (comp) *comp  = z->s.img_n; // report original components, not output
2062       return output;
2063    }
2064 }
2065 
2066 #ifndef STBI_NO_STDIO
2067 unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2068 {
2069    jpeg j;
2070    start_file(&j.s, f);
2071    return load_jpeg_image(&j, x,y,comp,req_comp);
2072 }
2073 
2074 unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2075 {
2076    unsigned char *data;
2077    FILE *f = fopen(filename, "rb");
2078    if (!f) return NULL;
2079    data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
2080    fclose(f);
2081    return data;
2082 }
2083 #endif
2084 
2085 unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2086 {
2087    #ifdef STBI_SMALL_STACK
2088    unsigned char *result;
2089    jpeg *j = MALLOC(sizeof(*j));
2090    start_mem(&j->s, buffer, len);
2091    result = load_jpeg_image(j,x,y,comp,req_comp);
2092    FREE(j);
2093    return result;
2094    #else
2095    jpeg j;
2096    start_mem(&j.s, buffer,len);
2097    return load_jpeg_image(&j, x,y,comp,req_comp);
2098    #endif
2099 }
2100 
2101 static int stbi_jpeg_info_raw(jpeg *j, int *x, int *y, int *comp)
2102 {
2103    if (!decode_jpeg_header(j, SCAN_header))
2104       return 0;
2105    if (x) *x = j->s.img_x;
2106    if (y) *y = j->s.img_y;
2107    if (comp) *comp = j->s.img_n;
2108    return 1;
2109 }
2110 
2111 #ifndef STBI_NO_STDIO
2112 int stbi_jpeg_test_file(FILE *f)
2113 {
2114    int n,r;
2115    jpeg j;
2116    n = ftell(f);
2117    start_file(&j.s, f);
2118    r = decode_jpeg_header(&j, SCAN_type);
2119    fseek(f,n,SEEK_SET);
2120    return r;
2121 }
2122 
2123 int stbi_jpeg_info_from_file(FILE *f, int *x, int *y, int *comp)
2124 {
2125     jpeg j;
2126     long n = ftell(f);
2127     int res;
2128     start_file(&j.s, f);
2129     res = stbi_jpeg_info_raw(&j, x, y, comp);
2130     fseek(f, n, SEEK_SET);
2131     return res;
2132 }
2133 
2134 int stbi_jpeg_info(char const *filename, int *x, int *y, int *comp)
2135 {
2136     FILE *f = fopen(filename, "rb");
2137     int result;
2138     if (!f) return e("can't fopen", "Unable to open file");
2139     result = stbi_jpeg_info_from_file(f, x, y, comp);
2140     fclose(f);
2141     return result;
2142 }
2143 #endif
2144 
2145 int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
2146 {
2147    jpeg j;
2148    start_mem(&j.s, buffer,len);
2149    return decode_jpeg_header(&j, SCAN_type);
2150 }
2151 
2152 int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
2153 {
2154     jpeg j;
2155     start_mem(&j.s, buffer, len);
2156     return stbi_jpeg_info_raw(&j, x, y, comp);
2157 }
2158 
2159 #ifndef STBI_NO_STDIO
2160 extern int      stbi_jpeg_info            (char const *filename,           int *x, int *y, int *comp);
2161 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
2162 #endif
2163 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2164 
2165 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
2166 //    simple implementation
2167 //      - all input must be provided in an upfront buffer
2168 //      - all output is written to a single output buffer (can malloc/realloc)
2169 //    performance
2170 //      - fast huffman
2171 
2172 // fast-way is faster to check than jpeg huffman, but slow way is slower
2173 #define ZFAST_BITS  9 // accelerate all cases in default tables
2174 #define ZFAST_MASK  ((1 << ZFAST_BITS) - 1)
2175 
2176 // zlib-style huffman encoding
2177 // (jpegs packs from left, zlib from right, so can't share code)
2178 typedef struct
2179 {
2180    uint16 fast[1 << ZFAST_BITS];
2181    uint16 firstcode[16];
2182    int maxcode[17];
2183    uint16 firstsymbol[16];
2184    uint8  size[288];
2185    uint16 value[288];
2186 } zhuffman;
2187 
2188 __forceinline static int bitreverse16(int n)
2189 {
2190   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
2191   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
2192   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
2193   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
2194   return n;
2195 }
2196 
2197 __forceinline static int bit_reverse(int v, int bits)
2198 {
2199    assert(bits <= 16);
2200    // to bit reverse n bits, reverse 16 and shift
2201    // e.g. 11 bits, bit reverse and shift away 5
2202    return bitreverse16(v) >> (16-bits);
2203 }
2204 
2205 static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
2206 {
2207    int i,k=0;
2208    int code, next_code[16], sizes[17];
2209 
2210    // DEFLATE spec for generating codes
2211    memset(sizes, 0, sizeof(sizes));
2212    memset(z->fast, 255, sizeof(z->fast));
2213    for (i=0; i < num; ++i)
2214       ++sizes[sizelist[i]];
2215    sizes[0] = 0;
2216    for (i=1; i < 16; ++i)
2217       assert(sizes[i] <= (1 << i));
2218    code = 0;
2219    for (i=1; i < 16; ++i) {
2220       next_code[i] = code;
2221       z->firstcode[i] = (uint16) code;
2222       z->firstsymbol[i] = (uint16) k;
2223       code = (code + sizes[i]);
2224       if (sizes[i])
2225          if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
2226       z->maxcode[i] = code << (16-i); // preshift for inner loop
2227       code <<= 1;
2228       k += sizes[i];
2229    }
2230    z->maxcode[16] = 0x10000; // sentinel
2231    for (i=0; i < num; ++i) {
2232       int s = sizelist[i];
2233       if (s) {
2234          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
2235          z->size[c] = (uint8)s;
2236          z->value[c] = (uint16)i;
2237          if (s <= ZFAST_BITS) {
2238             int m = bit_reverse(next_code[s],s);
2239             while (m < (1 << ZFAST_BITS)) {
2240                z->fast[m] = (uint16) c;
2241                m += (1 << s);
2242             }
2243          }
2244          ++next_code[s];
2245       }
2246    }
2247    return 1;
2248 }
2249 
2250 // zlib-from-memory implementation for PNG reading
2251 //    because PNG allows splitting the zlib stream arbitrarily,
2252 //    and it's annoying structurally to have PNG call ZLIB call PNG,
2253 //    we require PNG read all the IDATs and combine them into a single
2254 //    memory buffer
2255 
2256 typedef struct
2257 {
2258    uint8 const *zbuffer, *zbuffer_end;
2259    int num_bits;
2260    uint32 code_buffer;
2261 
2262    char *zout;
2263    char *zout_start;
2264    char *zout_end;
2265    int   z_expandable;
2266 
2267    zhuffman z_length, z_distance;
2268 } zbuf;
2269 
2270 __forceinline static int zget8(zbuf *z)
2271 {
2272    if (z->zbuffer >= z->zbuffer_end) return 0;
2273    return *z->zbuffer++;
2274 }
2275 
2276 static void fill_bits(zbuf *z)
2277 {
2278    do {
2279       assert(z->code_buffer < (1U << z->num_bits));
2280       z->code_buffer |= zget8(z) << z->num_bits;
2281       z->num_bits += 8;
2282    } while (z->num_bits <= 24);
2283 }
2284 
2285 __forceinline static unsigned int zreceive(zbuf *z, int n)
2286 {
2287    unsigned int k;
2288    if (z->num_bits < n) fill_bits(z);
2289    k = z->code_buffer & ((1 << n) - 1);
2290    z->code_buffer >>= n;
2291    z->num_bits -= n;
2292    return k;
2293 }
2294 
2295 __forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
2296 {
2297    int b,s,k;
2298    if (a->num_bits < 16) fill_bits(a);
2299    b = z->fast[a->code_buffer & ZFAST_MASK];
2300    if (b < 0xffff) {
2301       s = z->size[b];
2302       a->code_buffer >>= s;
2303       a->num_bits -= s;
2304       return z->value[b];
2305    }
2306 
2307    // not resolved by fast table, so compute it the slow way
2308    // use jpeg approach, which requires MSbits at top
2309    k = bit_reverse(a->code_buffer, 16);
2310    for (s=ZFAST_BITS+1; ; ++s)
2311       if (k < z->maxcode[s])
2312          break;
2313    if (s == 16) return -1; // invalid code!
2314    // code size is s, so:
2315    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
2316    assert(z->size[b] == s);
2317    a->code_buffer >>= s;
2318    a->num_bits -= s;
2319    return z->value[b];
2320 }
2321 
2322 static int expand(zbuf *z, int n)  // need to make room for n bytes
2323 {
2324    char *q;
2325    int cur, limit;
2326    if (!z->z_expandable) return e("output buffer limit","Corrupt PNG");
2327    cur   = (int) (z->zout     - z->zout_start);
2328    limit = (int) (z->zout_end - z->zout_start);
2329    while (cur + n > limit)
2330       limit *= 2;
2331    q = (char *) REALLOC(z->zout_start, limit);
2332    if (q == NULL) return e("outofmem", "Out of memory");
2333    z->zout_start = q;
2334    z->zout       = q + cur;
2335    z->zout_end   = q + limit;
2336    return 1;
2337 }
2338 
2339 static int length_base[31] = {
2340    3,4,5,6,7,8,9,10,11,13,
2341    15,17,19,23,27,31,35,43,51,59,
2342    67,83,99,115,131,163,195,227,258,0,0 };
2343 
2344 static int length_extra[31]=
2345 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
2346 
2347 static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
2348 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
2349 
2350 static int dist_extra[32] =
2351 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
2352 
2353 static int parse_huffman_block(zbuf *a)
2354 {
2355    for(;;) {
2356       int z = zhuffman_decode(a, &a->z_length);
2357       if (z < 256) {
2358          if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes
2359          if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
2360          *a->zout++ = (char) z;
2361       } else {
2362          uint8 *p;
2363          int len,dist;
2364          if (z == 256) return 1;
2365          z -= 257;
2366          len = length_base[z];
2367          if (length_extra[z]) len += zreceive(a, length_extra[z]);
2368          z = zhuffman_decode(a, &a->z_distance);
2369          if (z < 0) return e("bad huffman code","Corrupt PNG");
2370          dist = dist_base[z];
2371          if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
2372          if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG");
2373          if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
2374          p = (uint8 *) (a->zout - dist);
2375          while (len--)
2376             *a->zout++ = *p++;
2377       }
2378    }
2379 }
2380 
2381 static int compute_huffman_codes(zbuf *a)
2382 {
2383    static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
2384    zhuffman z_codelength;
2385    uint8 lencodes[286+32+137];//padding for maximum single op
2386    uint8 codelength_sizes[19];
2387    int i,n;
2388 
2389    int hlit  = zreceive(a,5) + 257;
2390    int hdist = zreceive(a,5) + 1;
2391    int hclen = zreceive(a,4) + 4;
2392 
2393    memset(codelength_sizes, 0, sizeof(codelength_sizes));
2394    for (i=0; i < hclen; ++i) {
2395       int s = zreceive(a,3);
2396       codelength_sizes[length_dezigzag[i]] = (uint8) s;
2397    }
2398    if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
2399 
2400    n = 0;
2401    while (n < hlit + hdist) {
2402       int c = zhuffman_decode(a, &z_codelength);
2403       assert(c >= 0 && c < 19);
2404       if (c < 16)
2405          lencodes[n++] = (uint8) c;
2406       else if (c == 16) {
2407          c = zreceive(a,2)+3;
2408          memset(lencodes+n, lencodes[n-1], c);
2409          n += c;
2410       } else if (c == 17) {
2411          c = zreceive(a,3)+3;
2412          memset(lencodes+n, 0, c);
2413          n += c;
2414       } else {
2415          assert(c == 18);
2416          c = zreceive(a,7)+11;
2417          memset(lencodes+n, 0, c);
2418          n += c;
2419       }
2420    }
2421    if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG");
2422    if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
2423    if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
2424    return 1;
2425 }
2426 
2427 static int parse_uncompressed_block(zbuf *a)
2428 {
2429    uint8 header[4];
2430    int len,nlen,k;
2431    if (a->num_bits & 7)
2432       zreceive(a, a->num_bits & 7); // discard
2433    // drain the bit-packed data into header
2434    k = 0;
2435    while (a->num_bits > 0) {
2436       header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
2437       a->code_buffer >>= 8;
2438       a->num_bits -= 8;
2439    }
2440    assert(a->num_bits == 0);
2441    // now fill header the normal way
2442    while (k < 4)
2443       header[k++] = (uint8) zget8(a);
2444    len  = header[1] * 256 + header[0];
2445    nlen = header[3] * 256 + header[2];
2446    if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG");
2447    if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG");
2448    if (a->zout + len > a->zout_end)
2449       if (!expand(a, len)) return 0;
2450    memcpy(a->zout, a->zbuffer, len);
2451    a->zbuffer += len;
2452    a->zout += len;
2453    return 1;
2454 }
2455 
2456 static int parse_zlib_header(zbuf *a)
2457 {
2458    int cmf   = zget8(a);
2459    int cm    = cmf & 15;
2460    /* int cinfo = cmf >> 4; */
2461    int flg   = zget8(a);
2462    if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec
2463    if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
2464    if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png
2465    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
2466    return 1;
2467 }
2468 
2469 // @TODO: should statically initialize these for optimal thread safety
2470 static uint8 default_length[288], default_distance[32];
2471 static void init_defaults(void)
2472 {
2473    int i;   // use <= to match clearly with spec
2474    for (i=0; i <= 143; ++i)     default_length[i]   = 8;
2475    for (   ; i <= 255; ++i)     default_length[i]   = 9;
2476    for (   ; i <= 279; ++i)     default_length[i]   = 7;
2477    for (   ; i <= 287; ++i)     default_length[i]   = 8;
2478 
2479    for (i=0; i <=  31; ++i)     default_distance[i] = 5;
2480 }
2481 
2482 int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
2483 static int parse_zlib(zbuf *a, int parse_header)
2484 {
2485    int final, type;
2486    if (parse_header)
2487       if (!parse_zlib_header(a)) return 0;
2488    a->num_bits = 0;
2489    a->code_buffer = 0;
2490    do {
2491       final = zreceive(a,1);
2492       type = zreceive(a,2);
2493       if (type == 0) {
2494          if (!parse_uncompressed_block(a)) return 0;
2495       } else if (type == 3) {
2496          return 0;
2497       } else {
2498          if (type == 1) {
2499             // use fixed code lengths
2500             if (!default_distance[31]) init_defaults();
2501             if (!zbuild_huffman(&a->z_length  , default_length  , 288)) return 0;
2502             if (!zbuild_huffman(&a->z_distance, default_distance,  32)) return 0;
2503          } else {
2504             if (!compute_huffman_codes(a)) return 0;
2505          }
2506          if (!parse_huffman_block(a)) return 0;
2507       }
2508       if (stbi_png_partial && a->zout - a->zout_start > 65536)
2509          break;
2510    } while (!final);
2511    return 1;
2512 }
2513 
2514 static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
2515 {
2516    a->zout_start = obuf;
2517    a->zout       = obuf;
2518    a->zout_end   = obuf + olen;
2519    a->z_expandable = exp;
2520 
2521    return parse_zlib(a, parse_header);
2522 }
2523 
2524 char *stbi_zlib_decode_malloc_guesssize(const char * buffer, int len, int initial_size, int *outlen)
2525 {
2526    zbuf a;
2527    char *p = MALLOC(initial_size);
2528    if (p == NULL) return NULL;
2529    a.zbuffer = (uint8 const *) buffer;
2530    a.zbuffer_end = (uint8 const *) buffer + len;
2531    if (do_zlib(&a, p, initial_size, 1, 1)) {
2532       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2533       return a.zout_start;
2534    } else {
2535       FREE(a.zout_start);
2536       return NULL;
2537    }
2538 }
2539 
2540 char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
2541 {
2542    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
2543 }
2544 
2545 char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
2546 {
2547    zbuf a;
2548    char *p = MALLOC(initial_size);
2549    if (p == NULL) return NULL;
2550    a.zbuffer = (uint8 const *) buffer;
2551    a.zbuffer_end = (uint8 const *) buffer + len;
2552    if (do_zlib(&a, p, initial_size, 1, parse_header)) {
2553       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2554       return a.zout_start;
2555    } else {
2556       FREE(a.zout_start);
2557       return NULL;
2558    }
2559 }
2560 
2561 int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
2562 {
2563    zbuf a;
2564    a.zbuffer = (uint8 const *) ibuffer;
2565    a.zbuffer_end = (uint8 const *) ibuffer + ilen;
2566    if (do_zlib(&a, obuffer, olen, 0, 1))
2567       return (int) (a.zout - a.zout_start);
2568    else
2569       return -1;
2570 }
2571 
2572 char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
2573 {
2574    zbuf a;
2575    char *p = MALLOC(16384);
2576    if (p == NULL) return NULL;
2577    a.zbuffer = (uint8 const *) buffer;
2578    a.zbuffer_end = (uint8 const *) buffer+len;
2579    if (do_zlib(&a, p, 16384, 1, 0)) {
2580       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2581       return a.zout_start;
2582    } else {
2583       FREE(a.zout_start);
2584       return NULL;
2585    }
2586 }
2587 
2588 int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2589 {
2590    zbuf a;
2591    a.zbuffer = (uint8 const *) ibuffer;
2592    a.zbuffer_end = (uint8 const *) ibuffer + ilen;
2593    if (do_zlib(&a, obuffer, olen, 0, 0))
2594       return (int) (a.zout - a.zout_start);
2595    else
2596       return -1;
2597 }
2598 
2599 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
2600 //    simple implementation
2601 //      - only 8-bit samples
2602 //      - no CRC checking
2603 //      - allocates lots of intermediate memory
2604 //        - avoids problem of streaming data between subsystems
2605 //        - avoids explicit window management
2606 //    performance
2607 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2608 
2609 
2610 typedef struct
2611 {
2612    uint32 length;
2613    uint32 type;
2614 } chunk;
2615 
2616 #define PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2617 
2618 static chunk get_chunk_header(stbi *s)
2619 {
2620    chunk c;
2621    c.length = get32(s);
2622    c.type   = get32(s);
2623    return c;
2624 }
2625 
2626 static int check_png_header(stbi *s)
2627 {
2628    static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2629    int i;
2630    for (i=0; i < 8; ++i)
2631       if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG");
2632    return 1;
2633 }
2634 
2635 typedef struct
2636 {
2637    stbi s;
2638    uint8 *idata, *expanded, *out;
2639 } png;
2640 
2641 
2642 enum {
2643    F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2644    F_avg_first, F_paeth_first
2645 };
2646 
2647 static uint8 first_row_filter[5] =
2648 {
2649    F_none, F_sub, F_none, F_avg_first, F_paeth_first
2650 };
2651 
2652 static int paeth(int a, int b, int c)
2653 {
2654    int p = a + b - c;
2655    int pa = abs(p-a);
2656    int pb = abs(p-b);
2657    int pc = abs(p-c);
2658    if (pa <= pb && pa <= pc) return a;
2659    if (pb <= pc) return b;
2660    return c;
2661 }
2662 
2663 // create the png data from post-deflated data
2664 static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2665 {
2666    stbi *s = &a->s;
2667    uint32 i,j,stride = x*out_n;
2668    int k;
2669    int img_n = s->img_n; // copy it into a local for later
2670    assert(out_n == s->img_n || out_n == s->img_n+1);
2671    if (stbi_png_partial) y = 1;
2672    a->out = MALLOC(x * y * out_n);
2673    if (!a->out) return e("outofmem", "Out of memory");
2674    if (!stbi_png_partial) {
2675       if (s->img_x == x && s->img_y == y) {
2676          if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2677       } else { // interlaced:
2678          if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2679       }
2680    }
2681    for (j=0; j < y; ++j) {
2682       uint8 *cur = a->out + stride*j;
2683       uint8 *prior = cur - stride;
2684       int filter = *raw++;
2685       if (filter > 4) return e("invalid filter","Corrupt PNG");
2686       // if first row, use special filter that doesn't sample previous row
2687       if (j == 0) filter = first_row_filter[filter];
2688       // handle first pixel explicitly
2689       for (k=0; k < img_n; ++k) {
2690          switch (filter) {
2691             case F_none       : cur[k] = raw[k]; break;
2692             case F_sub        : cur[k] = raw[k]; break;
2693             case F_up         : cur[k] = raw[k] + prior[k]; break;
2694             case F_avg        : cur[k] = raw[k] + (prior[k]>>1); break;
2695             case F_paeth      : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2696             case F_avg_first  : cur[k] = raw[k]; break;
2697             case F_paeth_first: cur[k] = raw[k]; break;
2698          }
2699       }
2700       if (img_n != out_n) cur[img_n] = 255;
2701       raw += img_n;
2702       cur += out_n;
2703       prior += out_n;
2704       // this is a little gross, so that we don't switch per-pixel or per-component
2705       if (img_n == out_n) {
2706          #define CASE(f) \
2707              case f:     \
2708                 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2709                    for (k=0; k < img_n; ++k)
2710          switch (filter) {
2711             CASE(F_none)  cur[k] = raw[k];
2712 		break;
2713             CASE(F_sub)   cur[k] = raw[k] + cur[k-img_n];
2714 		break;
2715             CASE(F_up)    cur[k] = raw[k] + prior[k];
2716 		break;
2717             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1);
2718 		break;
2719             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n]));
2720 		break;
2721             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-img_n] >> 1);
2722 		break;
2723             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0));
2724 		break;
2725          }
2726          #undef CASE
2727       } else {
2728          assert(img_n+1 == out_n);
2729          #define CASE(f) \
2730              case f:     \
2731                 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2732                    for (k=0; k < img_n; ++k)
2733          switch (filter) {
2734             CASE(F_none)  cur[k] = raw[k];
2735 		break;
2736             CASE(F_sub)   cur[k] = raw[k] + cur[k-out_n];
2737 		break;
2738             CASE(F_up)    cur[k] = raw[k] + prior[k];
2739 		break;
2740             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1);
2741 		break;
2742             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n]));
2743 		break;
2744             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-out_n] >> 1);
2745 		break;
2746             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0));
2747 		break;
2748          }
2749          #undef CASE
2750       }
2751    }
2752    return 1;
2753 }
2754 
2755 static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2756 {
2757    uint8 *final;
2758    int p;
2759    int save;
2760    if (!interlaced)
2761       return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y);
2762    save = stbi_png_partial;
2763    stbi_png_partial = 0;
2764 
2765    // de-interlacing
2766    final = MALLOC(a->s.img_x * a->s.img_y * out_n);
2767    for (p=0; p < 7; ++p) {
2768       int xorig[] = { 0,4,0,2,0,1,0 };
2769       int yorig[] = { 0,0,4,0,2,0,1 };
2770       int xspc[]  = { 8,8,4,4,2,2,1 };
2771       int yspc[]  = { 8,8,8,4,4,2,2 };
2772       int i,j,x,y;
2773       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2774       x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
2775       y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
2776       if (x && y) {
2777          if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
2778             FREE(final);
2779             return 0;
2780          }
2781          for (j=0; j < y; ++j)
2782             for (i=0; i < x; ++i)
2783                memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
2784                       a->out + (j*x+i)*out_n, out_n);
2785          FREE(a->out);
2786          raw += (x*out_n+1)*y;
2787          raw_len -= (x*out_n+1)*y;
2788       }
2789    }
2790    a->out = final;
2791 
2792    stbi_png_partial = save;
2793    return 1;
2794 }
2795 
2796 static int compute_transparency(png *z, uint8 tc[3], int out_n)
2797 {
2798    stbi *s = &z->s;
2799    uint32 i, pixel_count = s->img_x * s->img_y;
2800    uint8 *p = z->out;
2801 
2802    // compute color-based transparency, assuming we've
2803    // already got 255 as the alpha value in the output
2804    assert(out_n == 2 || out_n == 4);
2805 
2806    if (out_n == 2) {
2807       for (i=0; i < pixel_count; ++i) {
2808          p[1] = (p[0] == tc[0] ? 0 : 255);
2809          p += 2;
2810       }
2811    } else {
2812       for (i=0; i < pixel_count; ++i) {
2813          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2814             p[3] = 0;
2815          p += 4;
2816       }
2817    }
2818    return 1;
2819 }
2820 
2821 static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2822 {
2823    uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2824    uint8 *p, *temp_out, *orig = a->out;
2825 
2826    p = MALLOC(pixel_count * pal_img_n);
2827    if (p == NULL) return e("outofmem", "Out of memory");
2828 
2829    // between here and FREE(out) below, exiting would leak
2830    temp_out = p;
2831 
2832    if (pal_img_n == 3) {
2833       for (i=0; i < pixel_count; ++i) {
2834          int n = orig[i]*4;
2835          p[0] = palette[n  ];
2836          p[1] = palette[n+1];
2837          p[2] = palette[n+2];
2838          p += 3;
2839       }
2840    } else {
2841       for (i=0; i < pixel_count; ++i) {
2842          int n = orig[i]*4;
2843          p[0] = palette[n  ];
2844          p[1] = palette[n+1];
2845          p[2] = palette[n+2];
2846          p[3] = palette[n+3];
2847          p += 4;
2848       }
2849    }
2850    FREE(a->out);
2851    a->out = temp_out;
2852 
2853    STBI_NOTUSED(len);
2854 
2855    return 1;
2856 }
2857 
2858 static int stbi_unpremultiply_on_load = 0;
2859 static int stbi_de_iphone_flag = 0;
2860 
2861 void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
2862 {
2863    stbi_unpremultiply_on_load = flag_true_if_should_unpremultiply;
2864 }
2865 void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
2866 {
2867    stbi_de_iphone_flag = flag_true_if_should_convert;
2868 }
2869 
2870 static void stbi_de_iphone(png *z)
2871 {
2872    stbi *s = &z->s;
2873    uint32 i, pixel_count = s->img_x * s->img_y;
2874    uint8 *p = z->out;
2875 
2876    if (s->img_out_n == 3) {  // convert bgr to rgb
2877       for (i=0; i < pixel_count; ++i) {
2878          uint8 t = p[0];
2879          p[0] = p[2];
2880          p[2] = t;
2881          p += 3;
2882       }
2883    } else {
2884       assert(s->img_out_n == 4);
2885       if (stbi_unpremultiply_on_load) {
2886          // convert bgr to rgb and unpremultiply
2887          for (i=0; i < pixel_count; ++i) {
2888             uint8 a = p[3];
2889             uint8 t = p[0];
2890             if (a) {
2891                p[0] = p[2] * 255 / a;
2892                p[1] = p[1] * 255 / a;
2893                p[2] =  t   * 255 / a;
2894             } else {
2895                p[0] = p[2];
2896                p[2] = t;
2897             }
2898             p += 4;
2899          }
2900       } else {
2901          // convert bgr to rgb
2902          for (i=0; i < pixel_count; ++i) {
2903             uint8 t = p[0];
2904             p[0] = p[2];
2905             p[2] = t;
2906             p += 4;
2907          }
2908       }
2909    }
2910 }
2911 
2912 static int parse_png_file(png *z, int scan, int req_comp)
2913 {
2914    uint8 palette[1024], pal_img_n=0;
2915    uint8 has_trans=0, tc[3];
2916    uint32 ioff=0, idata_limit=0, i, pal_len=0;
2917    int first=1,k,interlace=0, iphone=0;
2918    stbi *s = &z->s;
2919 
2920    if (!check_png_header(s)) return 0;
2921 
2922    if (scan == SCAN_type) return 1;
2923 
2924    for (;;) {
2925       chunk c = get_chunk_header(s);
2926       switch (c.type) {
2927          case PNG_TYPE('C','g','B','I'):
2928             iphone = stbi_de_iphone_flag;
2929             skip(s, c.length);
2930             break;
2931          case PNG_TYPE('I','H','D','R'): {
2932             int depth,color,comp,filter;
2933             if (!first) return e("multiple IHDR","Corrupt PNG");
2934             first = 0;
2935             if (c.length != 13) return e("bad IHDR len","Corrupt PNG");
2936             s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)");
2937             s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)");
2938             depth = get8(s);  if (depth != 8)        return e("8bit only","PNG not supported: 8-bit only");
2939             color = get8(s);  if (color > 6)         return e("bad ctype","Corrupt PNG");
2940             if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG");
2941             comp  = get8(s);  if (comp) return e("bad comp method","Corrupt PNG");
2942             filter= get8(s);  if (filter) return e("bad filter method","Corrupt PNG");
2943             interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG");
2944             if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG");
2945             if (!pal_img_n) {
2946                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2947                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2948                if (scan == SCAN_header) return 1;
2949             } else {
2950                // if paletted, then pal_n is our final components, and
2951                // img_n is # components to decompress/filter.
2952                s->img_n = 1;
2953                if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG");
2954                // if SCAN_header, have to scan to see if we have a tRNS
2955             }
2956             break;
2957          }
2958 
2959          case PNG_TYPE('P','L','T','E'):  {
2960             if (first) return e("first not IHDR", "Corrupt PNG");
2961             if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG");
2962             pal_len = c.length / 3;
2963             if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG");
2964             for (i=0; i < pal_len; ++i) {
2965                palette[i*4+0] = get8u(s);
2966                palette[i*4+1] = get8u(s);
2967                palette[i*4+2] = get8u(s);
2968                palette[i*4+3] = 255;
2969             }
2970             break;
2971          }
2972 
2973          case PNG_TYPE('t','R','N','S'): {
2974             if (first) return e("first not IHDR", "Corrupt PNG");
2975             if (z->idata) return e("tRNS after IDAT","Corrupt PNG");
2976             if (pal_img_n) {
2977                if (scan == SCAN_header) { s->img_n = 4; return 1; }
2978                if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG");
2979                if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG");
2980                pal_img_n = 4;
2981                for (i=0; i < c.length; ++i)
2982                   palette[i*4+3] = get8u(s);
2983             } else {
2984                if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG");
2985                if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG");
2986                has_trans = 1;
2987                for (k=0; k < s->img_n; ++k)
2988                   tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2989             }
2990             break;
2991          }
2992 
2993          case PNG_TYPE('I','D','A','T'): {
2994             if (first) return e("first not IHDR", "Corrupt PNG");
2995             if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG");
2996             if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2997             if (ioff + c.length > idata_limit) {
2998                uint8 *p;
2999                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
3000                while (ioff + c.length > idata_limit)
3001                   idata_limit *= 2;
3002                p = (uint8 *) REALLOC(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory");
3003                z->idata = p;
3004             }
3005             if (!getn(s, z->idata+ioff,c.length)) return e("outofdata","Corrupt PNG");
3006             ioff += c.length;
3007             break;
3008          }
3009 
3010          case PNG_TYPE('I','E','N','D'): {
3011             uint32 raw_len;
3012             if (first) return e("first not IHDR", "Corrupt PNG");
3013             if (scan != SCAN_load) return 1;
3014             if (z->idata == NULL) return e("no IDAT","Corrupt PNG");
3015             z->expanded = (uint8 *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !iphone);
3016             if (z->expanded == NULL) return 0; // zlib should set error
3017             FREE(z->idata); z->idata = NULL;
3018             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
3019                s->img_out_n = s->img_n+1;
3020             else
3021                s->img_out_n = s->img_n;
3022             if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
3023             if (has_trans)
3024                if (!compute_transparency(z, tc, s->img_out_n)) return 0;
3025             if (iphone && s->img_out_n > 2)
3026                stbi_de_iphone(z);
3027             if (pal_img_n) {
3028                // pal_img_n == 3 or 4
3029                s->img_n = pal_img_n; // record the actual colors we had
3030                s->img_out_n = pal_img_n;
3031                if (req_comp >= 3) s->img_out_n = req_comp;
3032                if (!expand_palette(z, palette, pal_len, s->img_out_n))
3033                   return 0;
3034             }
3035             FREE(z->expanded); z->expanded = NULL;
3036             return 1;
3037          }
3038 
3039          default:
3040             // if critical, fail
3041             if (first) return e("first not IHDR", "Corrupt PNG");
3042             if ((c.type & (1 << 29)) == 0) {
3043                #ifndef STBI_NO_FAILURE_STRINGS
3044                // not threadsafe
3045                static char invalid_chunk[] = "XXXX chunk not known";
3046                invalid_chunk[0] = (uint8) (c.type >> 24);
3047                invalid_chunk[1] = (uint8) (c.type >> 16);
3048                invalid_chunk[2] = (uint8) (c.type >>  8);
3049                invalid_chunk[3] = (uint8) (c.type >>  0);
3050                #endif
3051                return e(invalid_chunk, "PNG not supported: unknown chunk type");
3052             }
3053             skip(s, c.length);
3054             break;
3055       }
3056       // end of chunk, read and skip CRC
3057       get32(s);
3058    }
3059 }
3060 
3061 static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
3062 {
3063    unsigned char *result=NULL;
3064    p->expanded = NULL;
3065    p->idata = NULL;
3066    p->out = NULL;
3067    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
3068    if (parse_png_file(p, SCAN_load, req_comp)) {
3069       result = p->out;
3070       p->out = NULL;
3071       if (req_comp && req_comp != p->s.img_out_n) {
3072          result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
3073          p->s.img_out_n = req_comp;
3074          if (result == NULL) return result;
3075       }
3076       *x = p->s.img_x;
3077       *y = p->s.img_y;
3078       if (n) *n = p->s.img_n;
3079    }
3080    FREE(p->expanded); p->expanded = NULL;
3081    FREE(p->idata);    p->idata    = NULL;
3082 
3083    return result;
3084 }
3085 
3086 #ifndef STBI_NO_STDIO
3087 unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3088 {
3089    png p;
3090    start_file(&p.s, f);
3091    return do_png(&p, x,y,comp,req_comp);
3092 }
3093 
3094 unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3095 {
3096    unsigned char *data;
3097    FILE *f = fopen(filename, "rb");
3098    if (!f) return NULL;
3099    data = stbi_png_load_from_file(f,x,y,comp,req_comp);
3100    fclose(f);
3101    return data;
3102 }
3103 #endif
3104 
3105 unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3106 {
3107    png p;
3108    start_mem(&p.s, buffer,len);
3109    return do_png(&p, x,y,comp,req_comp);
3110 }
3111 
3112 #ifndef STBI_NO_STDIO
3113 int stbi_png_test_file(FILE *f)
3114 {
3115    png p;
3116    int n,r;
3117    n = ftell(f);
3118    start_file(&p.s, f);
3119    r = parse_png_file(&p, SCAN_type,STBI_default);
3120    fseek(f,n,SEEK_SET);
3121    return r;
3122 }
3123 #endif
3124 
3125 int stbi_png_test_memory(stbi_uc const *buffer, int len)
3126 {
3127    png p;
3128    start_mem(&p.s, buffer, len);
3129    return parse_png_file(&p, SCAN_type,STBI_default);
3130 }
3131 
3132 static int stbi_png_info_raw(png *p, int *x, int *y, int *comp)
3133 {
3134    if (!parse_png_file(p, SCAN_header, 0))
3135       return 0;
3136    if (x) *x = p->s.img_x;
3137    if (y) *y = p->s.img_y;
3138    if (comp) *comp = p->s.img_n;
3139    return 1;
3140 }
3141 
3142 #ifndef STBI_NO_STDIO
3143 int      stbi_png_info             (char const *filename,           int *x, int *y, int *comp)
3144 {
3145    int res;
3146    FILE *f = fopen(filename, "rb");
3147    if (!f) return 0;
3148    res = stbi_png_info_from_file(f, x, y, comp);
3149    fclose(f);
3150    return res;
3151 }
3152 
3153 int stbi_png_info_from_file(FILE *f, int *x, int *y, int *comp)
3154 {
3155    png p;
3156    int res;
3157    long n = ftell(f);
3158    start_file(&p.s, f);
3159    res = stbi_png_info_raw(&p, x, y, comp);
3160    fseek(f, n, SEEK_SET);
3161    return res;
3162 }
3163 #endif // !STBI_NO_STDIO
3164 
3165 int stbi_png_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
3166 {
3167    png p;
3168    start_mem(&p.s, buffer, len);
3169    return stbi_png_info_raw(&p, x, y, comp);
3170 }
3171 
3172 // Microsoft/Windows BMP image
3173 
3174 static int bmp_test(stbi *s)
3175 {
3176    int sz;
3177    if (get8(s) != 'B') return 0;
3178    if (get8(s) != 'M') return 0;
3179    get32le(s); // discard filesize
3180    get16le(s); // discard reserved
3181    get16le(s); // discard reserved
3182    get32le(s); // discard data offset
3183    sz = get32le(s);
3184    if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
3185    return 0;
3186 }
3187 
3188 #ifndef STBI_NO_STDIO
3189 int      stbi_bmp_test_file        (FILE *f)
3190 {
3191    stbi s;
3192    int r,n = ftell(f);
3193    start_file(&s,f);
3194    r = bmp_test(&s);
3195    fseek(f,n,SEEK_SET);
3196    return r;
3197 }
3198 #endif
3199 
3200 int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len)
3201 {
3202    stbi s;
3203    start_mem(&s, buffer, len);
3204    return bmp_test(&s);
3205 }
3206 
3207 // returns 0..31 for the highest set bit
3208 static int high_bit(unsigned int z)
3209 {
3210    int n=0;
3211    if (z == 0) return -1;
3212    if (z >= 0x10000) n += 16, z >>= 16;
3213    if (z >= 0x00100) n +=  8, z >>=  8;
3214    if (z >= 0x00010) n +=  4, z >>=  4;
3215    if (z >= 0x00004) n +=  2, z >>=  2;
3216    if (z >= 0x00002) n +=  1, z >>=  1;
3217    return n;
3218 }
3219 
3220 static int bitcount(unsigned int a)
3221 {
3222    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
3223    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
3224    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
3225    a = (a + (a >> 8)); // max 16 per 8 bits
3226    a = (a + (a >> 16)); // max 32 per 8 bits
3227    return a & 0xff;
3228 }
3229 
3230 static int shiftsigned(int v, int shift, int bits)
3231 {
3232    int result;
3233    int z=0;
3234 
3235    if (shift < 0) v <<= -shift;
3236    else v >>= shift;
3237    result = v;
3238 
3239    z = bits;
3240    while (z < 8) {
3241       result += v >> z;
3242       z += bits;
3243    }
3244    return result;
3245 }
3246 
3247 static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3248 {
3249    uint8 *out;
3250    unsigned int mr=0,mg=0,mb=0,ma=0;
3251    stbi_uc pal[256][4];
3252    int psize=0,i,j,compress=0,width;
3253    int bpp, flip_vertically, pad, target, offset, hsz;
3254    if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
3255    get32le(s); // discard filesize
3256    get16le(s); // discard reserved
3257    get16le(s); // discard reserved
3258    offset = get32le(s);
3259    hsz = get32le(s);
3260    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
3261    if (hsz == 12) {
3262       s->img_x = get16le(s);
3263       s->img_y = get16le(s);
3264    } else {
3265       s->img_x = get32le(s);
3266       s->img_y = get32le(s);
3267    }
3268    if (get16le(s) != 1) return epuc("bad BMP", "bad BMP");
3269    bpp = get16le(s);
3270    if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
3271    flip_vertically = ((int) s->img_y) > 0;
3272    s->img_y = abs((int) s->img_y);
3273    if (hsz == 12) {
3274       if (bpp < 24)
3275          psize = (offset - 14 - 24) / 3;
3276    } else {
3277       compress = get32le(s);
3278       if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
3279       get32le(s); // discard sizeof
3280       get32le(s); // discard hres
3281       get32le(s); // discard vres
3282       get32le(s); // discard colorsused
3283       get32le(s); // discard max important
3284       if (hsz == 40 || hsz == 56) {
3285          if (hsz == 56) {
3286             get32le(s);
3287             get32le(s);
3288             get32le(s);
3289             get32le(s);
3290          }
3291          if (bpp == 16 || bpp == 32) {
3292             mr = mg = mb = 0;
3293             if (compress == 0) {
3294                if (bpp == 32) {
3295                   mr = 0xffu << 16;
3296                   mg = 0xffu <<  8;
3297                   mb = 0xffu <<  0;
3298                   ma = 0xffu << 24;
3299                } else {
3300                   mr = 31u << 10;
3301                   mg = 31u <<  5;
3302                   mb = 31u <<  0;
3303                }
3304             } else if (compress == 3) {
3305                mr = get32le(s);
3306                mg = get32le(s);
3307                mb = get32le(s);
3308                // not documented, but generated by photoshop and handled by mspaint
3309                if (mr == mg && mg == mb) {
3310                   // ?!?!?
3311                   return epuc("bad BMP", "bad BMP");
3312                }
3313             } else
3314                return epuc("bad BMP", "bad BMP");
3315          }
3316       } else {
3317          assert(hsz == 108);
3318          mr = get32le(s);
3319          mg = get32le(s);
3320          mb = get32le(s);
3321          ma = get32le(s);
3322          get32le(s); // discard color space
3323          for (i=0; i < 12; ++i)
3324             get32le(s); // discard color space parameters
3325       }
3326       if (bpp < 16)
3327          psize = (offset - 14 - hsz) >> 2;
3328    }
3329    s->img_n = ma ? 4 : 3;
3330    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
3331       target = req_comp;
3332    else
3333       target = s->img_n; // if they want monochrome, we'll post-convert
3334    out = MALLOC(target * s->img_x * s->img_y);
3335    if (!out) return epuc("outofmem", "Out of memory");
3336    if (bpp < 16) {
3337       int z=0;
3338       if (psize == 0 || psize > 256) { FREE(out); return epuc("invalid", "Corrupt BMP"); }
3339       for (i=0; i < psize; ++i) {
3340          pal[i][2] = get8u(s);
3341          pal[i][1] = get8u(s);
3342          pal[i][0] = get8u(s);
3343          if (hsz != 12) get8(s);
3344          pal[i][3] = 255;
3345       }
3346       skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
3347       if (bpp == 4) width = (s->img_x + 1) >> 1;
3348       else if (bpp == 8) width = s->img_x;
3349       else { FREE(out); return epuc("bad bpp", "Corrupt BMP"); }
3350       pad = (-width)&3;
3351       for (j=0; j < (int) s->img_y; ++j) {
3352          for (i=0; i < (int) s->img_x; i += 2) {
3353             int v=get8(s),v2=0;
3354             if (bpp == 4) {
3355                v2 = v & 15;
3356                v >>= 4;
3357             }
3358             out[z++] = pal[v][0];
3359             out[z++] = pal[v][1];
3360             out[z++] = pal[v][2];
3361             if (target == 4) out[z++] = 255;
3362             if (i+1 == (int) s->img_x) break;
3363             v = (bpp == 8) ? get8(s) : v2;
3364             out[z++] = pal[v][0];
3365             out[z++] = pal[v][1];
3366             out[z++] = pal[v][2];
3367             if (target == 4) out[z++] = 255;
3368          }
3369          skip(s, pad);
3370       }
3371    } else {
3372       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
3373       int z = 0;
3374       int easy=0;
3375       skip(s, offset - 14 - hsz);
3376       if (bpp == 24) width = 3 * s->img_x;
3377       else if (bpp == 16) width = 2*s->img_x;
3378       else /* bpp = 32 and pad = 0 */ width=0;
3379       pad = (-width) & 3;
3380       if (bpp == 24) {
3381          easy = 1;
3382       } else if (bpp == 32) {
3383          if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
3384             easy = 2;
3385       }
3386       if (!easy) {
3387          if (!mr || !mg || !mb) {
3388 	    FREE(out);
3389 	    return epuc("bad masks", "Corrupt BMP");
3390 	 }
3391          // right shift amt to put high bit in position #7
3392          rshift = high_bit(mr)-7; rcount = bitcount(mr);
3393          gshift = high_bit(mg)-7; gcount = bitcount(mr);
3394          bshift = high_bit(mb)-7; bcount = bitcount(mr);
3395          ashift = high_bit(ma)-7; acount = bitcount(mr);
3396       }
3397       for (j=0; j < (int) s->img_y; ++j) {
3398          if (easy) {
3399             for (i=0; i < (int) s->img_x; ++i) {
3400                int a;
3401                out[z+2] = get8u(s);
3402                out[z+1] = get8u(s);
3403                out[z+0] = get8u(s);
3404                z += 3;
3405                a = (easy == 2 ? get8(s) : 255);
3406                if (target == 4) out[z++] = (uint8) a;
3407             }
3408          } else {
3409             for (i=0; i < (int) s->img_x; ++i) {
3410                uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
3411                int a;
3412                out[z++] = (uint8) shiftsigned(v & mr, rshift, rcount);
3413                out[z++] = (uint8) shiftsigned(v & mg, gshift, gcount);
3414                out[z++] = (uint8) shiftsigned(v & mb, bshift, bcount);
3415                a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
3416                if (target == 4) out[z++] = (uint8) a;
3417             }
3418          }
3419          skip(s, pad);
3420       }
3421    }
3422    if (flip_vertically) {
3423       stbi_uc t;
3424       for (j=0; j < (int) s->img_y>>1; ++j) {
3425          stbi_uc *p1 = out +      j     *s->img_x*target;
3426          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
3427          for (i=0; i < (int) s->img_x*target; ++i) {
3428             t = p1[i], p1[i] = p2[i], p2[i] = t;
3429          }
3430       }
3431    }
3432 
3433    if (req_comp && req_comp != target) {
3434       out = convert_format(out, target, req_comp, s->img_x, s->img_y);
3435       if (out == NULL) return out; // convert_format frees input on failure
3436    }
3437 
3438    *x = s->img_x;
3439    *y = s->img_y;
3440    if (comp) *comp = target;
3441    return out;
3442 }
3443 
3444 #ifndef STBI_NO_STDIO
3445 stbi_uc *stbi_bmp_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3446 {
3447    stbi_uc *data;
3448    FILE *f = fopen(filename, "rb");
3449    if (!f) return NULL;
3450    data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
3451    fclose(f);
3452    return data;
3453 }
3454 
3455 stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3456 {
3457    stbi s;
3458    start_file(&s, f);
3459    return bmp_load(&s, x,y,comp,req_comp);
3460 }
3461 #endif
3462 
3463 stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3464 {
3465    stbi s;
3466    start_mem(&s, buffer, len);
3467    return bmp_load(&s, x,y,comp,req_comp);
3468 }
3469 
3470 // Targa Truevision - TGA
3471 // by Jonathan Dummer
3472 
3473 static int tga_info(stbi *s, int *x, int *y, int *comp)
3474 {
3475     int tga_w, tga_h, tga_comp;
3476     int sz;
3477     get8u(s);                   // discard Offset
3478     sz = get8u(s);              // color type
3479     if( sz > 1 ) return 0;      // only RGB or indexed allowed
3480     sz = get8u(s);              // image type
3481     // only RGB or grey allowed, +/- RLE
3482     if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
3483     get16le(s);                 // discard palette start
3484     get16le(s);                 // discard palette length
3485     get8(s);                    // discard bits per palette color entry
3486     get16le(s);                 // discard x origin
3487     get16le(s);                 // discard y origin
3488     tga_w = get16le(s);
3489     if( tga_w < 1 ) return 0;   // test width
3490     tga_h = get16le(s);
3491     if( tga_h < 1 ) return 0;   // test height
3492     sz = get8(s);               // bits per pixel
3493     // only RGB or RGBA or grey allowed
3494     if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) return 0;
3495     tga_comp = sz;
3496     if (x) *x = tga_w;
3497     if (y) *y = tga_h;
3498     if (comp) *comp = tga_comp / 8;
3499     return 1;                   // seems to have passed everything
3500 }
3501 
3502 #ifndef STBI_NO_STDIO
3503 int stbi_tga_info_from_file(FILE *f, int *x, int *y, int *comp)
3504 {
3505     stbi s;
3506     int r;
3507     long n = ftell(f);
3508     start_file(&s, f);
3509     r = tga_info(&s, x, y, comp);
3510     fseek(f, n, SEEK_SET);
3511     return r;
3512 }
3513 #endif
3514 
3515 int stbi_tga_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
3516 {
3517     stbi s;
3518     start_mem(&s, buffer, len);
3519     return tga_info(&s, x, y, comp);
3520 }
3521 
3522 static int tga_test(stbi *s)
3523 {
3524    int sz;
3525    get8u(s);      //   discard Offset
3526    sz = get8u(s);   //   color type
3527    if ( sz > 1 ) return 0;   //   only RGB or indexed allowed
3528    sz = get8u(s);   //   image type
3529    if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0;   //   only RGB or grey allowed, +/- RLE
3530    get16(s);      //   discard palette start
3531    get16(s);      //   discard palette length
3532    get8(s);         //   discard bits per palette color entry
3533    get16(s);      //   discard x origin
3534    get16(s);      //   discard y origin
3535    if ( get16(s) < 1 ) return 0;      //   test width
3536    if ( get16(s) < 1 ) return 0;      //   test height
3537    sz = get8(s);   //   bits per pixel
3538    if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0;   //   only RGB or RGBA or grey allowed
3539    return 1;      //   seems to have passed everything
3540 }
3541 
3542 #ifndef STBI_NO_STDIO
3543 int      stbi_tga_test_file        (FILE *f)
3544 {
3545    stbi s;
3546    int r,n = ftell(f);
3547    start_file(&s, f);
3548    r = tga_test(&s);
3549    fseek(f,n,SEEK_SET);
3550    return r;
3551 }
3552 #endif
3553 
3554 int      stbi_tga_test_memory      (stbi_uc const *buffer, int len)
3555 {
3556    stbi s;
3557    start_mem(&s, buffer, len);
3558    return tga_test(&s);
3559 }
3560 
3561 static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3562 {
3563    //   read in the TGA header stuff
3564    int tga_offset = get8u(s);
3565    int tga_indexed = get8u(s);
3566    int tga_image_type = get8u(s);
3567    int tga_is_RLE = 0;
3568    int tga_palette_start = get16le(s);
3569    int tga_palette_len = get16le(s);
3570    int tga_palette_bits = get8u(s);
3571    int tga_x_origin = get16le(s);
3572    int tga_y_origin = get16le(s);
3573    int tga_width = get16le(s);
3574    int tga_height = get16le(s);
3575    int tga_bits_per_pixel = get8u(s);
3576    int tga_inverted = get8u(s);
3577    //   image data
3578    unsigned char *tga_data;
3579    unsigned char *tga_palette = NULL;
3580    int i, j;
3581    unsigned char raw_data[4];
3582    unsigned char trans_data[4];
3583    int RLE_count = 0;
3584    int RLE_repeating = 0;
3585    int read_next_pixel = 1;
3586 
3587    //   do a tiny bit of precessing
3588    if ( tga_image_type >= 8 )
3589    {
3590       tga_image_type -= 8;
3591       tga_is_RLE = 1;
3592    }
3593    /* int tga_alpha_bits = tga_inverted & 15; */
3594    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
3595 
3596    //   error check
3597    if ( //(tga_indexed) ||
3598       (tga_width < 1) || (tga_height < 1) ||
3599       (tga_image_type < 1) || (tga_image_type > 3) ||
3600       ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
3601       (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
3602       )
3603    {
3604       return NULL;
3605    }
3606 
3607    //   If I'm paletted, then I'll use the number of bits from the palette
3608    if ( tga_indexed )
3609    {
3610       tga_bits_per_pixel = tga_palette_bits;
3611    }
3612 
3613    //   tga info
3614    *x = tga_width;
3615    *y = tga_height;
3616    if ( (req_comp < 1) || (req_comp > 4) )
3617    {
3618       //   just use whatever the file was
3619       req_comp = tga_bits_per_pixel / 8;
3620       *comp = req_comp;
3621    } else
3622    {
3623       //   force a new number of components
3624       *comp = tga_bits_per_pixel/8;
3625    }
3626    tga_data = MALLOC( tga_width * tga_height * req_comp );
3627 
3628    //   skip to the data's starting position (offset usually = 0)
3629    skip(s, tga_offset );
3630    //   do I need to load a palette?
3631    if ( tga_indexed )
3632    {
3633       //   any data to skip? (offset usually = 0)
3634       skip(s, tga_palette_start );
3635       //   load the palette
3636       tga_palette = MALLOC( tga_palette_len * tga_palette_bits / 8 );
3637       if (!getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ))
3638          return NULL;
3639    }
3640    //   load the data
3641    trans_data[0] = trans_data[1] = trans_data[2] = trans_data[3] = 0;
3642    for (i=0; i < tga_width * tga_height; ++i)
3643    {
3644       //   if I'm in RLE mode, do I need to get a RLE chunk?
3645       if ( tga_is_RLE )
3646       {
3647          if ( RLE_count == 0 )
3648          {
3649             //   yep, get the next byte as a RLE command
3650             int RLE_cmd = get8u(s);
3651             RLE_count = 1 + (RLE_cmd & 127);
3652             RLE_repeating = RLE_cmd >> 7;
3653             read_next_pixel = 1;
3654          } else if ( !RLE_repeating )
3655          {
3656             read_next_pixel = 1;
3657          }
3658       } else
3659       {
3660          read_next_pixel = 1;
3661       }
3662       //   OK, if I need to read a pixel, do it now
3663       if ( read_next_pixel )
3664       {
3665          //   load however much data we did have
3666          if ( tga_indexed )
3667          {
3668             //   read in 1 byte, then perform the lookup
3669             int pal_idx = get8u(s);
3670             if ( pal_idx >= tga_palette_len )
3671             {
3672                //   invalid index
3673                pal_idx = 0;
3674             }
3675             pal_idx *= tga_bits_per_pixel / 8;
3676             for (j = 0; j*8 < tga_bits_per_pixel; ++j)
3677             {
3678                raw_data[j] = tga_palette[pal_idx+j];
3679             }
3680          } else
3681          {
3682             //   read in the data raw
3683             for (j = 0; j*8 < tga_bits_per_pixel; ++j)
3684             {
3685                raw_data[j] = get8u(s);
3686             }
3687          }
3688          //   convert raw to the intermediate format
3689          switch (tga_bits_per_pixel)
3690          {
3691          case 8:
3692             //   Luminous => RGBA
3693             trans_data[0] = raw_data[0];
3694             trans_data[1] = raw_data[0];
3695             trans_data[2] = raw_data[0];
3696             trans_data[3] = 255;
3697             break;
3698          case 16:
3699             //   Luminous,Alpha => RGBA
3700             trans_data[0] = raw_data[0];
3701             trans_data[1] = raw_data[0];
3702             trans_data[2] = raw_data[0];
3703             trans_data[3] = raw_data[1];
3704             break;
3705          case 24:
3706             //   BGR => RGBA
3707             trans_data[0] = raw_data[2];
3708             trans_data[1] = raw_data[1];
3709             trans_data[2] = raw_data[0];
3710             trans_data[3] = 255;
3711             break;
3712          case 32:
3713             //   BGRA => RGBA
3714             trans_data[0] = raw_data[2];
3715             trans_data[1] = raw_data[1];
3716             trans_data[2] = raw_data[0];
3717             trans_data[3] = raw_data[3];
3718             break;
3719          }
3720          //   clear the reading flag for the next pixel
3721          read_next_pixel = 0;
3722       } // end of reading a pixel
3723       //   convert to final format
3724       switch (req_comp)
3725       {
3726       case 1:
3727          //   RGBA => Luminance
3728          tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3729          break;
3730       case 2:
3731          //   RGBA => Luminance,Alpha
3732          tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3733          tga_data[i*req_comp+1] = trans_data[3];
3734          break;
3735       case 3:
3736          //   RGBA => RGB
3737          tga_data[i*req_comp+0] = trans_data[0];
3738          tga_data[i*req_comp+1] = trans_data[1];
3739          tga_data[i*req_comp+2] = trans_data[2];
3740          break;
3741       case 4:
3742          //   RGBA => RGBA
3743          tga_data[i*req_comp+0] = trans_data[0];
3744          tga_data[i*req_comp+1] = trans_data[1];
3745          tga_data[i*req_comp+2] = trans_data[2];
3746          tga_data[i*req_comp+3] = trans_data[3];
3747          break;
3748       }
3749       //   in case we're in RLE mode, keep counting down
3750       --RLE_count;
3751    }
3752    //   do I need to invert the image?
3753    if ( tga_inverted )
3754    {
3755       for (j = 0; j*2 < tga_height; ++j)
3756       {
3757          int index1 = j * tga_width * req_comp;
3758          int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3759          for (i = tga_width * req_comp; i > 0; --i)
3760          {
3761             unsigned char temp = tga_data[index1];
3762             tga_data[index1] = tga_data[index2];
3763             tga_data[index2] = temp;
3764             ++index1;
3765             ++index2;
3766          }
3767       }
3768    }
3769    //   clear my palette, if I had one
3770    if ( tga_palette != NULL )
3771    {
3772       FREE( tga_palette );
3773    }
3774    //   the things I do to get rid of an error message, and yet keep
3775    //   Microsoft's C compilers happy... [8^(
3776    tga_palette_start = tga_palette_len = tga_palette_bits =
3777          tga_x_origin = tga_y_origin = 0;
3778    //   OK, done
3779    return tga_data;
3780 }
3781 
3782 #ifndef STBI_NO_STDIO
3783 stbi_uc *stbi_tga_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3784 {
3785    stbi_uc *data;
3786    FILE *f = fopen(filename, "rb");
3787    if (!f) return NULL;
3788    data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
3789    fclose(f);
3790    return data;
3791 }
3792 
3793 stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3794 {
3795    stbi s;
3796    start_file(&s, f);
3797    return tga_load(&s, x,y,comp,req_comp);
3798 }
3799 #endif
3800 
3801 stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3802 {
3803    stbi s;
3804    start_mem(&s, buffer, len);
3805    return tga_load(&s, x,y,comp,req_comp);
3806 }
3807 
3808 
3809 // *************************************************************************************************
3810 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
3811 
3812 static int psd_test(stbi *s)
3813 {
3814    if (get32(s) != 0x38425053) return 0;   // "8BPS"
3815    else return 1;
3816 }
3817 
3818 #ifndef STBI_NO_STDIO
3819 int stbi_psd_test_file(FILE *f)
3820 {
3821    stbi s;
3822    int r,n = ftell(f);
3823    start_file(&s, f);
3824    r = psd_test(&s);
3825    fseek(f,n,SEEK_SET);
3826    return r;
3827 }
3828 #endif
3829 
3830 int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3831 {
3832    stbi s;
3833    start_mem(&s, buffer, len);
3834    return psd_test(&s);
3835 }
3836 
3837 static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3838 {
3839    int   pixelCount;
3840    int channelCount, compression;
3841    int channel, i, count, len;
3842    int w,h;
3843    uint8 *out;
3844 
3845    // Check identifier
3846    if (get32(s) != 0x38425053)   // "8BPS"
3847       return epuc("not PSD", "Corrupt PSD image");
3848 
3849    // Check file type version.
3850    if (get16(s) != 1)
3851       return epuc("wrong version", "Unsupported version of PSD image");
3852 
3853    // Skip 6 reserved bytes.
3854    skip(s, 6 );
3855 
3856    // Read the number of channels (R, G, B, A, etc).
3857    channelCount = get16(s);
3858    if (channelCount < 0 || channelCount > 16)
3859       return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3860 
3861    // Read the rows and columns of the image.
3862    h = get32(s);
3863    w = get32(s);
3864 
3865    // Make sure the depth is 8 bits.
3866    if (get16(s) != 8)
3867       return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3868 
3869    // Make sure the color mode is RGB.
3870    // Valid options are:
3871    //   0: Bitmap
3872    //   1: Grayscale
3873    //   2: Indexed color
3874    //   3: RGB color
3875    //   4: CMYK color
3876    //   7: Multichannel
3877    //   8: Duotone
3878    //   9: Lab color
3879    if (get16(s) != 3)
3880       return epuc("wrong color format", "PSD is not in RGB color format");
3881 
3882    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
3883    skip(s,get32(s) );
3884 
3885    // Skip the image resources.  (resolution, pen tool paths, etc)
3886    skip(s, get32(s) );
3887 
3888    // Skip the reserved data.
3889    skip(s, get32(s) );
3890 
3891    // Find out if the data is compressed.
3892    // Known values:
3893    //   0: no compression
3894    //   1: RLE compressed
3895    compression = get16(s);
3896    if (compression > 1)
3897       return epuc("bad compression", "PSD has an unknown compression format");
3898 
3899    // Create the destination image.
3900    out = MALLOC(4 * w*h);
3901    if (!out) return epuc("outofmem", "Out of memory");
3902    pixelCount = w*h;
3903 
3904    // Initialize the data to zero.
3905    //memset( out, 0, pixelCount * 4 );
3906 
3907    // Finally, the image data.
3908    if (compression) {
3909       // RLE as used by .PSD and .TIFF
3910       // Loop until you get the number of unpacked bytes you are expecting:
3911       //     Read the next source byte into n.
3912       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3913       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3914       //     Else if n is 128, noop.
3915       // Endloop
3916 
3917       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3918       // which we're going to just skip.
3919       skip(s, h * channelCount * 2 );
3920 
3921       // Read the RLE data by channel.
3922       for (channel = 0; channel < 4; channel++) {
3923          uint8 *p;
3924 
3925          p = out+channel;
3926          if (channel >= channelCount) {
3927             // Fill this channel with default data.
3928             for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3929          } else {
3930             // Read the RLE data.
3931             count = 0;
3932             while (count < pixelCount) {
3933                len = get8(s);
3934                if (len == 128) {
3935                   // No-op.
3936                } else if (len < 128) {
3937                   // Copy next len+1 bytes literally.
3938                   len++;
3939                   count += len;
3940                   while (len) {
3941                      *p = get8u(s);
3942                      p += 4;
3943                      len--;
3944                   }
3945                } else if (len > 128) {
3946                   uint8   val;
3947                   // Next -len+1 bytes in the dest are replicated from next source byte.
3948                   // (Interpret len as a negative 8-bit int.)
3949                   len ^= 0x0FF;
3950                   len += 2;
3951                   val = get8u(s);
3952                   count += len;
3953                   while (len) {
3954                      *p = val;
3955                      p += 4;
3956                      len--;
3957                   }
3958                }
3959             }
3960          }
3961       }
3962 
3963    } else {
3964       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
3965       // where each channel consists of an 8-bit value for each pixel in the image.
3966 
3967       // Read the data by channel.
3968       for (channel = 0; channel < 4; channel++) {
3969          uint8 *p;
3970 
3971          p = out + channel;
3972          if (channel > channelCount) {
3973             // Fill this channel with default data.
3974             for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3975          } else {
3976             // Read the data.
3977             for (i = 0; i < pixelCount; i++)
3978                *p = get8u(s), p += 4;
3979          }
3980       }
3981    }
3982 
3983    if (req_comp && req_comp != 4) {
3984       out = convert_format(out, 4, req_comp, w, h);
3985       if (out == NULL) return out; // convert_format frees input on failure
3986    }
3987 
3988    if (comp) *comp = channelCount;
3989    *y = h;
3990    *x = w;
3991 
3992    return out;
3993 }
3994 
3995 #ifndef STBI_NO_STDIO
3996 stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3997 {
3998    stbi_uc *data;
3999    FILE *f = fopen(filename, "rb");
4000    if (!f) return NULL;
4001    data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
4002    fclose(f);
4003    return data;
4004 }
4005 
4006 stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
4007 {
4008    stbi s;
4009    start_file(&s, f);
4010    return psd_load(&s, x,y,comp,req_comp);
4011 }
4012 #endif
4013 
4014 stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4015 {
4016    stbi s;
4017    start_mem(&s, buffer, len);
4018    return psd_load(&s, x,y,comp,req_comp);
4019 }
4020 
4021 // *************************************************************************************************
4022 // Softimage PIC loader
4023 // by Tom Seddon
4024 //
4025 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
4026 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
4027 
4028 static int pic_is4(stbi *s,const char *str)
4029 {
4030    int i;
4031    for (i=0; i<4; ++i)
4032       if (get8(s) != (stbi_uc)str[i])
4033          return 0;
4034 
4035    return 1;
4036 }
4037 
4038 static int pic_test(stbi *s)
4039 {
4040    int i;
4041 
4042    if (!pic_is4(s,"\x53\x80\xF6\x34"))
4043       return 0;
4044 
4045    for(i=0;i<84;++i)
4046       get8(s);
4047 
4048    if (!pic_is4(s,"PICT"))
4049       return 0;
4050 
4051    return 1;
4052 }
4053 
4054 typedef struct
4055 {
4056    stbi_uc size,type,channel;
4057 } pic_packet_t;
4058 
4059 static stbi_uc *pic_readval(stbi *s, int channel, stbi_uc *dest)
4060 {
4061    int mask=0x80, i;
4062 
4063    for (i=0; i<4; ++i, mask>>=1) {
4064       if (channel & mask) {
4065          if (at_eof(s)) return epuc("bad file","PIC file too short");
4066          dest[i]=get8u(s);
4067       }
4068    }
4069 
4070    return dest;
4071 }
4072 
4073 static void pic_copyval(int channel,stbi_uc *dest,const stbi_uc *src)
4074 {
4075    int mask=0x80,i;
4076 
4077    for (i=0;i<4; ++i, mask>>=1)
4078       if (channel&mask)
4079          dest[i]=src[i];
4080 }
4081 
4082 static stbi_uc *pic_load2(stbi *s,int width,int height,int *comp, stbi_uc *result)
4083 {
4084    int act_comp=0,num_packets=0,y,chained;
4085    pic_packet_t packets[10];
4086 
4087    // this will (should...) cater for even some bizarre stuff like having data
4088     // for the same channel in multiple packets.
4089    do {
4090       pic_packet_t *packet;
4091 
4092       if (num_packets==sizeof(packets)/sizeof(packets[0]))
4093          return epuc("bad format","too many packets");
4094 
4095       packet = &packets[num_packets++];
4096 
4097       chained = get8(s);
4098       packet->size    = get8u(s);
4099       packet->type    = get8u(s);
4100       packet->channel = get8u(s);
4101 
4102       act_comp |= packet->channel;
4103 
4104       if (at_eof(s))          return epuc("bad file","file too short (reading packets)");
4105       if (packet->size != 8)  return epuc("bad format","packet isn't 8bpp");
4106    } while (chained);
4107 
4108    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
4109 
4110    for(y=0; y<height; ++y) {
4111       int packet_idx;
4112 
4113       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
4114          pic_packet_t *packet = &packets[packet_idx];
4115          stbi_uc *dest = result+y*width*4;
4116 
4117          switch (packet->type) {
4118             default:
4119                return epuc("bad format","packet has bad compression type");
4120 
4121             case 0: {//uncompressed
4122                int x;
4123 
4124                for(x=0;x<width;++x, dest+=4)
4125                   if (!pic_readval(s,packet->channel,dest))
4126                      return 0;
4127                break;
4128             }
4129 
4130             case 1://Pure RLE
4131                {
4132                   int left=width, i;
4133 
4134                   while (left>0) {
4135                      stbi_uc count,value[4];
4136 
4137                      count=get8u(s);
4138                      if (at_eof(s))   return epuc("bad file","file too short (pure read count)");
4139 
4140                      if (count > left)
4141                         count = (uint8) left;
4142 
4143                      if (!pic_readval(s,packet->channel,value))  return 0;
4144 
4145                      for(i=0; i<count; ++i,dest+=4)
4146                         pic_copyval(packet->channel,dest,value);
4147                      left -= count;
4148                   }
4149                }
4150                break;
4151 
4152             case 2: {//Mixed RLE
4153                int left=width;
4154                while (left>0) {
4155                   int count = get8(s), i;
4156                   if (at_eof(s))  return epuc("bad file","file too short (mixed read count)");
4157 
4158                   if (count >= 128) { // Repeated
4159                      stbi_uc value[4];
4160 
4161                      if (count==128)
4162                         count = get16(s);
4163                      else
4164                         count -= 127;
4165                      if (count > left)
4166                         return epuc("bad file","scanline overrun");
4167 
4168                      if (!pic_readval(s,packet->channel,value))
4169                         return 0;
4170 
4171                      for(i=0;i<count;++i, dest += 4)
4172                         pic_copyval(packet->channel,dest,value);
4173                   } else { // Raw
4174                      ++count;
4175                      if (count>left) return epuc("bad file","scanline overrun");
4176 
4177                      for(i=0;i<count;++i, dest+=4)
4178                         if (!pic_readval(s,packet->channel,dest))
4179                            return 0;
4180                   }
4181                   left-=count;
4182                }
4183                break;
4184             }
4185          }
4186       }
4187    }
4188 
4189    return result;
4190 }
4191 
4192 static stbi_uc *pic_load(stbi *s,int *px,int *py,int *comp,int req_comp)
4193 {
4194    stbi_uc *result;
4195    int i, x,y;
4196 
4197    for (i=0; i<92; ++i)
4198       get8(s);
4199 
4200    x = get16(s);
4201    y = get16(s);
4202    if (at_eof(s))  return epuc("bad file","file too short (pic header)");
4203    if ((1 << 28) / x < y) return epuc("too large", "Image too large to decode");
4204 
4205    get32(s); //skip `ratio'
4206    get16(s); //skip `fields'
4207    get16(s); //skip `pad'
4208 
4209    // intermediate buffer is RGBA
4210    result = MALLOC(x*y*4);
4211    memset(result, 0xff, x*y*4);
4212 
4213    if (!pic_load2(s,x,y,comp, result)) {
4214       FREE(result);
4215       result=0;
4216    }
4217    *px = x;
4218    *py = y;
4219    if (req_comp == 0) req_comp = *comp;
4220    result=convert_format(result,4,req_comp,x,y);
4221 
4222    return result;
4223 }
4224 
4225 int stbi_pic_test_memory(stbi_uc const *buffer, int len)
4226 {
4227    stbi s;
4228    start_mem(&s,buffer,len);
4229    return pic_test(&s);
4230 }
4231 
4232 stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4233 {
4234    stbi s;
4235    start_mem(&s,buffer,len);
4236    return pic_load(&s,x,y,comp,req_comp);
4237 }
4238 
4239 #ifndef STBI_NO_STDIO
4240 int stbi_pic_test_file(FILE *f)
4241 {
4242    int result;
4243    long l = ftell(f);
4244    stbi s;
4245    start_file(&s,f);
4246    result = pic_test(&s);
4247    fseek(f,l,SEEK_SET);
4248    return result;
4249 }
4250 
4251 stbi_uc *stbi_pic_load(char const *filename,int *x, int *y, int *comp, int req_comp)
4252 {
4253    stbi_uc *result;
4254    FILE *f=fopen(filename,"rb");
4255    if (!f) return 0;
4256    result = stbi_pic_load_from_file(f,x,y,comp,req_comp);
4257    fclose(f);
4258    return result;
4259 }
4260 
4261 stbi_uc *stbi_pic_load_from_file(FILE *f,int *x, int *y, int *comp, int req_comp)
4262 {
4263    stbi s;
4264    start_file(&s,f);
4265    return pic_load(&s,x,y,comp,req_comp);
4266 }
4267 #endif
4268 
4269 // *************************************************************************************************
4270 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
4271 typedef struct stbi_gif_lzw_struct {
4272    int16 prefix;
4273    uint8 first;
4274    uint8 suffix;
4275 } stbi_gif_lzw;
4276 
4277 typedef struct stbi_gif_struct
4278 {
4279    int w,h;
4280    stbi_uc *out;                 // output buffer (always 4 components)
4281    int flags, bgindex, ratio, transparent, eflags;
4282    uint8  pal[256][4];
4283    uint8 lpal[256][4];
4284    stbi_gif_lzw codes[4096];
4285    uint8 *color_table;
4286    int parse, step;
4287    int lflags;
4288    int start_x, start_y;
4289    int max_x, max_y;
4290    int cur_x, cur_y;
4291    int line_size;
4292 } stbi_gif;
4293 
4294 static int gif_test(stbi *s)
4295 {
4296    int sz;
4297    if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') return 0;
4298    sz = get8(s);
4299    if (sz != '9' && sz != '7') return 0;
4300    if (get8(s) != 'a') return 0;
4301    return 1;
4302 }
4303 
4304 #ifndef STBI_NO_STDIO
4305 int      stbi_gif_test_file        (FILE *f)
4306 {
4307    stbi s;
4308    int r,n = ftell(f);
4309    start_file(&s,f);
4310    r = gif_test(&s);
4311    fseek(f,n,SEEK_SET);
4312    return r;
4313 }
4314 #endif
4315 
4316 int      stbi_gif_test_memory      (stbi_uc const *buffer, int len)
4317 {
4318    stbi s;
4319    start_mem(&s, buffer, len);
4320    return gif_test(&s);
4321 }
4322 
4323 static void stbi_gif_parse_colortable(stbi *s, uint8 pal[256][4], int num_entries, int transp)
4324 {
4325    int i;
4326    for (i=0; i < num_entries; ++i) {
4327       pal[i][2] = get8u(s);
4328       pal[i][1] = get8u(s);
4329       pal[i][0] = get8u(s);
4330       pal[i][3] = transp ? 0 : 255;
4331    }
4332 }
4333 
4334 static int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info)
4335 {
4336    uint8 ver;
4337    if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8')
4338       return e("not GIF", "Corrupt GIF");
4339 
4340    ver = get8u(s);
4341    if (ver != '7' && ver != '9')    return e("not GIF", "Corrupt GIF");
4342    if (get8(s) != 'a')                      return e("not GIF", "Corrupt GIF");
4343 
4344    failure_reason = "";
4345    g->w = get16le(s);
4346    g->h = get16le(s);
4347    g->flags = get8(s);
4348    g->bgindex = get8(s);
4349    g->ratio = get8(s);
4350    g->transparent = -1;
4351 
4352    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
4353 
4354    if (is_info) return 1;
4355 
4356    if (g->flags & 0x80)
4357       stbi_gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
4358 
4359    return 1;
4360 }
4361 
4362 static int stbi_gif_info_raw(stbi *s, int *x, int *y, int *comp)
4363 {
4364    stbi_gif g;
4365    if (!stbi_gif_header(s, &g, comp, 1)) return 0;
4366    if (x) *x = g.w;
4367    if (y) *y = g.h;
4368    return 1;
4369 }
4370 
4371 static void stbi_out_gif_code(stbi_gif *g, uint16 code)
4372 {
4373    uint8 *p, *c;
4374 
4375    // recurse to decode the prefixes, since the linked-list is backwards,
4376    // and working backwards through an interleaved image would be nasty
4377    if (g->codes[code].prefix >= 0)
4378       stbi_out_gif_code(g, g->codes[code].prefix);
4379 
4380    if (g->cur_y >= g->max_y) return;
4381 
4382    p = &g->out[g->cur_x + g->cur_y];
4383    c = &g->color_table[g->codes[code].suffix * 4];
4384 
4385    if (c[3] >= 128) {
4386       p[0] = c[2];
4387       p[1] = c[1];
4388       p[2] = c[0];
4389       p[3] = c[3];
4390    }
4391    g->cur_x += 4;
4392 
4393    if (g->cur_x >= g->max_x) {
4394       g->cur_x = g->start_x;
4395       g->cur_y += g->step;
4396 
4397       while (g->cur_y >= g->max_y && g->parse > 0) {
4398          g->step = (1 << g->parse) * g->line_size;
4399          g->cur_y = g->start_y + (g->step >> 1);
4400          --g->parse;
4401       }
4402    }
4403 }
4404 
4405 static uint8 *stbi_process_gif_raster(stbi *s, stbi_gif *g)
4406 {
4407    uint8 lzw_cs;
4408    int32 len, code;
4409    uint32 first;
4410    int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
4411    stbi_gif_lzw *p;
4412 
4413    lzw_cs = get8u(s);
4414    clear = 1 << lzw_cs;
4415    first = 1;
4416    codesize = lzw_cs + 1;
4417    codemask = (1 << codesize) - 1;
4418    bits = 0;
4419    valid_bits = 0;
4420    for (code = 0; code < clear; code++) {
4421       g->codes[code].prefix = -1;
4422       g->codes[code].first = (uint8) code;
4423       g->codes[code].suffix = (uint8) code;
4424    }
4425 
4426    // support no starting clear code
4427    avail = clear+2;
4428    oldcode = -1;
4429 
4430    len = 0;
4431    for(;;) {
4432       if (valid_bits < codesize) {
4433          if (len == 0) {
4434             len = get8(s); // start new block
4435             if (len == 0)
4436                return g->out;
4437          }
4438          --len;
4439          bits |= (int32) get8(s) << valid_bits;
4440          valid_bits += 8;
4441       } else {
4442          code = bits & codemask;
4443          bits >>= codesize;
4444          valid_bits -= codesize;
4445          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
4446          if (code == clear) {  // clear code
4447             codesize = lzw_cs + 1;
4448             codemask = (1 << codesize) - 1;
4449             avail = clear + 2;
4450             oldcode = -1;
4451             first = 0;
4452          } else if (code == clear + 1) { // end of stream code
4453             skip(s, len);
4454             while ((len = get8(s)) > 0)
4455                skip(s,len);
4456             return g->out;
4457          } else if (code <= avail) {
4458             if (first) return epuc("no clear code", "Corrupt GIF");
4459 
4460             if (oldcode >= 0) {
4461                p = &g->codes[avail++];
4462                if (avail > 4096)        return epuc("too many codes", "Corrupt GIF");
4463                p->prefix = (int16) oldcode;
4464                p->first = g->codes[oldcode].first;
4465                p->suffix = (code == avail) ? p->first : g->codes[code].first;
4466             } else if (code == avail)
4467                return epuc("illegal code in raster", "Corrupt GIF");
4468 
4469             stbi_out_gif_code(g, (uint16) code);
4470 
4471             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
4472                codesize++;
4473                codemask = (1 << codesize) - 1;
4474             }
4475 
4476             oldcode = code;
4477          } else {
4478             return epuc("illegal code in raster", "Corrupt GIF");
4479          }
4480       }
4481    }
4482 }
4483 
4484 static void stbi_fill_gif_background(stbi_gif *g)
4485 {
4486    int i;
4487    uint8 *c = g->pal[g->bgindex];
4488    // @OPTIMIZE: write a dword at a time
4489    for (i = 0; i < g->w * g->h * 4; i += 4) {
4490       uint8 *p  = &g->out[i];
4491       p[0] = c[2];
4492       p[1] = c[1];
4493       p[2] = c[0];
4494       p[3] = c[3];
4495    }
4496 }
4497 
4498 // this function is designed to support animated gifs, although stb_image doesn't support it
4499 static uint8 *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp)
4500 {
4501    int i;
4502    uint8 *old_out = 0;
4503 
4504    if (g->out == 0) {
4505       if (!stbi_gif_header(s, g, comp,0))     return 0; // failure_reason set by stbi_gif_header
4506       g->out = MALLOC(4 * g->w * g->h);
4507       if (g->out == 0)                      return epuc("outofmem", "Out of memory");
4508       stbi_fill_gif_background(g);
4509    } else {
4510       // animated-gif-only path
4511       if (((g->eflags & 0x1C) >> 2) == 3) {
4512          old_out = g->out;
4513          g->out = MALLOC(4 * g->w * g->h);
4514          if (g->out == 0)                   return epuc("outofmem", "Out of memory");
4515          memcpy(g->out, old_out, g->w*g->h*4);
4516       }
4517    }
4518 
4519    for (;;) {
4520       switch (get8(s)) {
4521          case 0x2C: /* Image Descriptor */
4522          {
4523             int32 x, y, w, h;
4524             uint8 *o;
4525 
4526             x = get16le(s);
4527             y = get16le(s);
4528             w = get16le(s);
4529             h = get16le(s);
4530             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
4531                return epuc("bad Image Descriptor", "Corrupt GIF");
4532 
4533             g->line_size = g->w * 4;
4534             g->start_x = x * 4;
4535             g->start_y = y * g->line_size;
4536             g->max_x   = g->start_x + w * 4;
4537             g->max_y   = g->start_y + h * g->line_size;
4538             g->cur_x   = g->start_x;
4539             g->cur_y   = g->start_y;
4540 
4541             g->lflags = get8(s);
4542 
4543             if (g->lflags & 0x40) {
4544                g->step = 8 * g->line_size; // first interlaced spacing
4545                g->parse = 3;
4546             } else {
4547                g->step = g->line_size;
4548                g->parse = 0;
4549             }
4550 
4551             if (g->lflags & 0x80) {
4552                stbi_gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
4553                g->color_table = (uint8 *) g->lpal;
4554             } else if (g->flags & 0x80) {
4555                for (i=0; i < 256; ++i)  // @OPTIMIZE: reset only the previous transparent
4556                   g->pal[i][3] = 255;
4557                if (g->transparent >= 0 && (g->eflags & 0x01))
4558                   g->pal[g->transparent][3] = 0;
4559                g->color_table = (uint8 *) g->pal;
4560             } else
4561                return epuc("missing color table", "Corrupt GIF");
4562 
4563             o = stbi_process_gif_raster(s, g);
4564             if (o == NULL) return NULL;
4565 
4566             if (req_comp && req_comp != 4)
4567                o = convert_format(o, 4, req_comp, g->w, g->h);
4568             return o;
4569          }
4570 
4571          case 0x21: // Comment Extension.
4572          {
4573             int len;
4574             if (get8(s) == 0xF9) { // Graphic Control Extension.
4575                len = get8(s);
4576                if (len == 4) {
4577                   g->eflags = get8(s);
4578                   get16le(s); // delay
4579                   g->transparent = get8(s);
4580                } else {
4581                   skip(s, len);
4582                   break;
4583                }
4584             }
4585             while ((len = get8(s)) != 0)
4586                skip(s, len);
4587             break;
4588          }
4589 
4590          case 0x3B: // gif stream termination code
4591             return (uint8 *) 1;
4592 
4593          default:
4594             return epuc("unknown code", "Corrupt GIF");
4595       }
4596    }
4597 }
4598 
4599 #ifndef STBI_NO_STDIO
4600 stbi_uc *stbi_gif_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
4601 {
4602    uint8 *data;
4603    FILE *f = fopen(filename, "rb");
4604    if (!f) return NULL;
4605    data = stbi_gif_load_from_file(f, x,y,comp,req_comp);
4606    fclose(f);
4607    return data;
4608 }
4609 
4610 stbi_uc *stbi_gif_load_from_file   (FILE *f, int *x, int *y, int *comp, int req_comp)
4611 {
4612    uint8 *u = 0;
4613    stbi s;
4614    stbi_gif g={0};
4615    start_file(&s, f);
4616 
4617    u = stbi_gif_load_next(&s, &g, comp, req_comp);
4618    if (u == (void *) 1) u = 0;  // end of animated gif marker
4619    if (u) {
4620       *x = g.w;
4621       *y = g.h;
4622    }
4623 
4624    return u;
4625 }
4626 #endif
4627 
4628 stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4629 {
4630    uint8 *u = 0;
4631    stbi s;
4632    stbi_gif *pg;
4633 
4634    #ifdef STBI_SMALL_STACK
4635    pg = MALLOC(sizeof(*pg));
4636    if (pg == NULL)
4637       return NULL;
4638    #else
4639    stbi_gif g;
4640    pg = &g;
4641    #endif
4642 
4643    memset(pg, 0, sizeof(*pg));
4644    start_mem(&s, buffer, len);
4645    u = stbi_gif_load_next(&s, pg, comp, req_comp);
4646    if (u == (void *) 1) u = 0;  // end of animated gif marker
4647    if (u) {
4648       *x = pg->w;
4649       *y = pg->h;
4650    }
4651 
4652    #ifdef STBI_SMALL_STACK
4653    FREE(pg);
4654    #endif
4655 
4656    return u;
4657 }
4658 
4659 #ifndef STBI_NO_STDIO
4660 int      stbi_gif_info             (char const *filename,           int *x, int *y, int *comp)
4661 {
4662    int res;
4663    FILE *f = fopen(filename, "rb");
4664    if (!f) return 0;
4665    res = stbi_gif_info_from_file(f, x, y, comp);
4666    fclose(f);
4667    return res;
4668 }
4669 
4670 int stbi_gif_info_from_file(FILE *f, int *x, int *y, int *comp)
4671 {
4672    stbi s;
4673    int res;
4674    long n = ftell(f);
4675    start_file(&s, f);
4676    res = stbi_gif_info_raw(&s, x, y, comp);
4677    fseek(f, n, SEEK_SET);
4678    return res;
4679 }
4680 #endif // !STBI_NO_STDIO
4681 
4682 int stbi_gif_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
4683 {
4684    stbi s;
4685    start_mem(&s, buffer, len);
4686    return stbi_gif_info_raw(&s, x, y, comp);
4687 }
4688 
4689 
4690 
4691 
4692 // *************************************************************************************************
4693 // Radiance RGBE HDR loader
4694 // originally by Nicolas Schulz
4695 #ifndef STBI_NO_HDR
4696 static int hdr_test(stbi *s)
4697 {
4698    const char *signature = "#?RADIANCE\n";
4699    int i;
4700    for (i=0; signature[i]; ++i)
4701       if (get8(s) != signature[i])
4702          return 0;
4703    return 1;
4704 }
4705 
4706 int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
4707 {
4708    stbi s;
4709    start_mem(&s, buffer, len);
4710    return hdr_test(&s);
4711 }
4712 
4713 #ifndef STBI_NO_STDIO
4714 int stbi_hdr_test_file(FILE *f)
4715 {
4716    stbi s;
4717    int r,n = ftell(f);
4718    start_file(&s, f);
4719    r = hdr_test(&s);
4720    fseek(f,n,SEEK_SET);
4721    return r;
4722 }
4723 #endif
4724 
4725 #define HDR_BUFLEN  1024
4726 static char *hdr_gettoken(stbi *z, char *buffer)
4727 {
4728    int len=0;
4729    char c = '\0';
4730 
4731    c = (char) get8(z);
4732 
4733    while (!at_eof(z) && c != '\n') {
4734       buffer[len++] = c;
4735       if (len == HDR_BUFLEN-1) {
4736          // flush to end of line
4737          while (!at_eof(z) && get8(z) != '\n')
4738             ;
4739          break;
4740       }
4741       c = (char) get8(z);
4742    }
4743 
4744    buffer[len] = 0;
4745    return buffer;
4746 }
4747 
4748 static void hdr_convert(float *output, stbi_uc *input, int req_comp)
4749 {
4750    if ( input[3] != 0 ) {
4751       float f1;
4752       // Exponent
4753       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
4754       if (req_comp <= 2)
4755          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
4756       else {
4757          output[0] = input[0] * f1;
4758          output[1] = input[1] * f1;
4759          output[2] = input[2] * f1;
4760       }
4761       if (req_comp == 2) output[1] = 1;
4762       if (req_comp == 4) output[3] = 1;
4763    } else {
4764       switch (req_comp) {
4765          case 4: output[3] = 1; /* fallthrough */
4766          case 3: output[0] = output[1] = output[2] = 0;
4767                  break;
4768          case 2: output[1] = 1; /* fallthrough */
4769          case 1: output[0] = 0;
4770                  break;
4771       }
4772    }
4773 }
4774 
4775 
4776 static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4777 {
4778    char buffer[HDR_BUFLEN];
4779    char *token;
4780    int valid = 0;
4781    int width, height;
4782    stbi_uc *scanline;
4783    float *hdr_data;
4784    int len;
4785    unsigned char count, value;
4786    int i, j, k, c1,c2, z;
4787 
4788 
4789    // Check identifier
4790    if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
4791       return epf("not HDR", "Corrupt HDR image");
4792 
4793    // Parse header
4794    for(;;) {
4795       token = hdr_gettoken(s,buffer);
4796       if (token[0] == 0) break;
4797       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
4798    }
4799 
4800    if (!valid)    return epf("unsupported format", "Unsupported HDR format");
4801 
4802    // Parse width and height
4803    // can't use sscanf() if we're not using stdio!
4804    token = hdr_gettoken(s,buffer);
4805    if (strncmp(token, "-Y ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
4806    token += 3;
4807    height = strtol(token, &token, 10);
4808    while (*token == ' ') ++token;
4809    if (strncmp(token, "+X ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
4810    token += 3;
4811    width = strtol(token, NULL, 10);
4812 
4813    *x = width;
4814    *y = height;
4815 
4816    *comp = 3;
4817    if (req_comp == 0) req_comp = 3;
4818 
4819    // Read data
4820    hdr_data = MALLOC(height * width * req_comp * sizeof(float));
4821 
4822    // Load image data
4823    // image data is stored as some number of sca
4824    if ( width < 8 || width >= 32768) {
4825       // Read flat data
4826       for (j=0; j < height; ++j) {
4827          for (i=0; i < width; ++i) {
4828             stbi_uc rgbe[4];
4829            main_decode_loop:
4830             getn(s, rgbe, 4);
4831             hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
4832          }
4833       }
4834    } else {
4835       // Read RLE-encoded data
4836       scanline = NULL;
4837 
4838       for (j = 0; j < height; ++j) {
4839          c1 = get8(s);
4840          c2 = get8(s);
4841          len = get8(s);
4842          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
4843             // not run-length encoded, so we have to actually use THIS data as a decoded
4844             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
4845             uint8 rgbe[4];
4846             rgbe[0] = (uint8) c1;
4847             rgbe[1] = (uint8) c2;
4848             rgbe[2] = (uint8) len;
4849             rgbe[3] = (uint8) get8u(s);
4850             hdr_convert(hdr_data, rgbe, req_comp);
4851             i = 1;
4852             j = 0;
4853             FREE(scanline);
4854             goto main_decode_loop; // yes, this makes no sense
4855          }
4856          len <<= 8;
4857          len |= get8(s);
4858          if (len != width) { FREE(hdr_data); FREE(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); }
4859          if (scanline == NULL) scanline = MALLOC(width * 4);
4860 
4861          for (k = 0; k < 4; ++k) {
4862             i = 0;
4863             while (i < width) {
4864                count = get8u(s);
4865                if (count > 128) {
4866                   // Run
4867                   value = get8u(s);
4868                   count -= 128;
4869                   for (z = 0; z < count; ++z)
4870                      scanline[i++ * 4 + k] = value;
4871                } else {
4872                   // Dump
4873                   for (z = 0; z < count; ++z)
4874                      scanline[i++ * 4 + k] = get8u(s);
4875                }
4876             }
4877          }
4878          for (i=0; i < width; ++i)
4879             hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
4880       }
4881       FREE(scanline);
4882    }
4883 
4884    return hdr_data;
4885 }
4886 
4887 #ifndef STBI_NO_STDIO
4888 float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
4889 {
4890    stbi s;
4891    start_file(&s,f);
4892    return hdr_load(&s,x,y,comp,req_comp);
4893 }
4894 #endif
4895 
4896 float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4897 {
4898    stbi s;
4899    start_mem(&s,buffer, len);
4900    return hdr_load(&s,x,y,comp,req_comp);
4901 }
4902 
4903 #endif // STBI_NO_HDR
4904 
4905 
4906 #ifndef STBI_NO_STDIO
4907 int stbi_info(char const *filename, int *x, int *y, int *comp)
4908 {
4909     FILE *f = fopen(filename, "rb");
4910     int result;
4911     if (!f) return e("can't fopen", "Unable to open file");
4912     result = stbi_info_from_file(f, x, y, comp);
4913     fclose(f);
4914     return result;
4915 }
4916 
4917 int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
4918 {
4919    if (stbi_jpeg_info_from_file(f, x, y, comp))
4920        return 1;
4921    if (stbi_png_info_from_file(f, x, y, comp))
4922        return 1;
4923    if (stbi_gif_info_from_file(f, x, y, comp))
4924        return 1;
4925    // @TODO: stbi_bmp_info_from_file
4926    // @TODO: stbi_psd_info_from_file
4927    #ifndef STBI_NO_HDR
4928    // @TODO: stbi_hdr_info_from_file
4929    #endif
4930    // test tga last because it's a crappy test!
4931    if (stbi_tga_info_from_file(f, x, y, comp))
4932        return 1;
4933    return e("unknown image type", "Image not of any known type, or corrupt");
4934 }
4935 #endif // !STBI_NO_STDIO
4936 
4937 int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
4938 {
4939    if (stbi_jpeg_info_from_memory(buffer, len, x, y, comp))
4940        return 1;
4941    if (stbi_png_info_from_memory(buffer, len, x, y, comp))
4942        return 1;
4943    if (stbi_gif_info_from_memory(buffer, len, x, y, comp))
4944        return 1;
4945    // @TODO: stbi_bmp_info_from_memory
4946    // @TODO: stbi_psd_info_from_memory
4947    #ifndef STBI_NO_HDR
4948    // @TODO: stbi_hdr_info_from_memory
4949    #endif
4950    // test tga last because it's a crappy test!
4951    if (stbi_tga_info_from_memory(buffer, len, x, y, comp))
4952        return 1;
4953    return e("unknown image type", "Image not of any known type, or corrupt");
4954 }
4955 
4956 #endif // STBI_HEADER_FILE_ONLY
4957 
4958 /*
4959    revision history:
4960       1.29 (2010-08-16) various warning fixes from Aurelien Pocheville
4961       1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ)
4962       1.27 (2010-08-01)
4963              cast-to-uint8 to fix warnings
4964       1.26 (2010-07-24)
4965              fix bug in file buffering for PNG reported by SpartanJ
4966       1.25 (2010-07-17)
4967              refix trans_data warning (Won Chun)
4968       1.24 (2010-07-12)
4969              perf improvements reading from files on platforms with lock-heavy fgetc()
4970              minor perf improvements for jpeg
4971              deprecated type-specific functions so we'll get feedback if they're needed
4972              attempt to fix trans_data warning (Won Chun)
4973       1.23   fixed bug in iPhone support
4974       1.22 (2010-07-10)
4975              removed image *writing* support
4976              removed image *writing* support
4977              stbi_info support from Jetro Lauha
4978              GIF support from Jean-Marc Lienher
4979              iPhone PNG-extensions from James Brown
4980              warning-fixes from Nicolas Schulz and Janez Zemva (i.e. Janez (U+017D)emva)
4981       1.21   fix use of 'uint8' in header (reported by jon blow)
4982       1.20   added support for Softimage PIC, by Tom Seddon
4983       1.19   bug in interlaced PNG corruption check (found by ryg)
4984       1.18 2008-08-02
4985              fix a threading bug (local mutable static)
4986       1.17   support interlaced PNG
4987       1.16   major bugfix - convert_format converted one too many pixels
4988       1.15   initialize some fields for thread safety
4989       1.14   fix threadsafe conversion bug
4990              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
4991       1.13   threadsafe
4992       1.12   const qualifiers in the API
4993       1.11   Support installable IDCT, colorspace conversion routines
4994       1.10   Fixes for 64-bit (don't use "unsigned long")
4995              optimized upsampling by Fabian "ryg" Giesen
4996       1.09   Fix format-conversion for PSD code (bad global variables!)
4997       1.08   Thatcher Ulrich's PSD code integrated by Nicolas Schulz
4998       1.07   attempt to fix C++ warning/errors again
4999       1.06   attempt to fix C++ warning/errors again
5000       1.05   fix TGA loading to return correct *comp and use good luminance calc
5001       1.04   default float alpha is 1, not 255; use 'void *' for stbi_image_free
5002       1.03   bugfixes to STBI_NO_STDIO, STBI_NO_HDR
5003       1.02   support for (subset of) HDR files, float interface for preferred access to them
5004       1.01   fix bug: possible bug in handling right-side up bmps... not sure
5005              fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
5006       1.00   interface to zlib that skips zlib header
5007       0.99   correct handling of alpha in palette
5008       0.98   TGA loader by lonesock; dynamically add loaders (untested)
5009       0.97   jpeg errors on too large a file; also catch another malloc failure
5010       0.96   fix detection of invalid v value - particleman@mollyrocket forum
5011       0.95   during header scan, seek to markers in case of padding
5012       0.94   STBI_NO_STDIO to disable stdio usage; rename all #defines the same
5013       0.93   handle jpegtran output; verbose errors
5014       0.92   read 4,8,16,24,32-bit BMP files of several formats
5015       0.91   output 24-bit Windows 3.0 BMP files
5016       0.90   fix a few more warnings; bump version number to approach 1.0
5017       0.61   bugfixes due to Marc LeBlanc, Christopher Lloyd
5018       0.60   fix compiling as c++
5019       0.59   fix warnings: merge Dave Moore's -Wall fixes
5020       0.58   fix bug: zlib uncompressed mode len/nlen was wrong endian
5021       0.57   fix bug: jpg last huffman symbol before marker was >9 bits but less
5022                       than 16 available
5023       0.56   fix bug: zlib uncompressed mode len vs. nlen
5024       0.55   fix bug: restart_interval not initialized to 0
5025       0.54   allow NULL for 'int *comp'
5026       0.53   fix bug in png 3->4; speedup png decoding
5027       0.52   png handles req_comp=3,4 directly; minor cleanup; jpeg comments
5028       0.51   obey req_comp requests, 1-component jpegs return as 1-component,
5029              on 'test' only check type, not whether we support this variant
5030 */
5031