xref: /netbsd-src/sys/dev/stbi/stb_image.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /* stbi-1.29 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
2    when you control the images you're loading
3                                      no warranty implied; use at your own risk
4 
5    QUICK NOTES:
6       Primarily of interest to game developers and other people who can
7           avoid problematic images and only need the trivial interface
8 
9       JPEG baseline (no JPEG progressive)
10       PNG 8-bit only
11 
12       TGA (not sure what subset, if a subset)
13       BMP non-1bpp, non-RLE
14       PSD (composited view only, no extra channels)
15 
16       GIF (*comp always reports as 4-channel)
17       HDR (radiance rgbE format)
18       PIC (Softimage PIC)
19 
20       - decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
21       - supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
22 
23    Latest revisions:
24       1.29 (2010-08-16) various warning fixes from Aurelien Pocheville
25       1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ)
26       1.27 (2010-08-01) cast-to-uint8 to fix warnings (Laurent Gomila)
27                         allow trailing 0s at end of image data (Laurent Gomila)
28       1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ
29       1.25 (2010-07-17) refix trans_data warning (Won Chun)
30       1.24 (2010-07-12) perf improvements reading from files
31                         minor perf improvements for jpeg
32                         deprecated type-specific functions in hope of feedback
33                         attempt to fix trans_data warning (Won Chun)
34       1.23              fixed bug in iPhone support
35       1.22 (2010-07-10) removed image *writing* support to stb_image_write.h
36                         stbi_info support from Jetro Lauha
37                         GIF support from Jean-Marc Lienher
38                         iPhone PNG-extensions from James Brown
39                         warning-fixes from Nicolas Schulz and Janez Zemva
40       1.21              fix use of 'uint8' in header (reported by jon blow)
41       1.20              added support for Softimage PIC, by Tom Seddon
42 
43    See end of file for full revision history.
44 
45    TODO:
46       stbi_info support for BMP,PSD,HDR,PIC
47       rewrite stbi_info and load_file variations to share file handling code
48            (current system allows individual functions to be called directly,
49            since each does all the work, but I doubt anyone uses this in practice)
50 
51 
52  ============================    Contributors    =========================
53 
54  Image formats                                Optimizations & bugfixes
55     Sean Barrett (jpeg, png, bmp)                Fabian "ryg" Giesen
56     Nicolas Schulz (hdr, psd)
57     Jonathan Dummer (tga)                     Bug fixes & warning fixes
58     Jean-Marc Lienher (gif)                      Marc LeBlanc
59     Tom Seddon (pic)                             Christpher Lloyd
60     Thatcher Ulrich (psd)                        Dave Moore
61                                                  Won Chun
62                                                  the Horde3D community
63  Extensions, features                            Janez Zemva
64     Jetro Lauha (stbi_info)                      Jonathan Blow
65     James "moose2000" Brown (iPhone PNG)         Laurent Gomila
66                                                  Aruelien Pocheville
67 
68  If your name should be here but isn't, let Sean know.
69 
70 */
71 
72 #ifdef _KERNEL
73 #include <dev/stbi/stbiconfig.h>
74 #endif
75 
76 #ifndef STBI_INCLUDE_STB_IMAGE_H
77 #define STBI_INCLUDE_STB_IMAGE_H
78 
79 // To get a header file for this, either cut and paste the header,
80 // or create stb_image.h, #define STBI_HEADER_FILE_ONLY, and
81 // then include stb_image.c from it.
82 
83 ////   begin header file  ////////////////////////////////////////////////////
84 //
85 // Limitations:
86 //    - no jpeg progressive support
87 //    - non-HDR formats support 8-bit samples only (jpeg, png)
88 //    - no delayed line count (jpeg) -- IJG doesn't support either
89 //    - no 1-bit BMP
90 //    - GIF always returns *comp=4
91 //
92 // Basic usage (see HDR discussion below):
93 //    int x,y,n;
94 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
95 //    // ... process data if not NULL ...
96 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
97 //    // ... replace '0' with '1'..'4' to force that many components per pixel
98 //    stbi_image_free(data)
99 //
100 // Standard parameters:
101 //    int *x       -- outputs image width in pixels
102 //    int *y       -- outputs image height in pixels
103 //    int *comp    -- outputs # of image components in image file
104 //    int req_comp -- if non-zero, # of image components requested in result
105 //
106 // The return value from an image loader is an 'unsigned char *' which points
107 // to the pixel data. The pixel data consists of *y scanlines of *x pixels,
108 // with each pixel consisting of N interleaved 8-bit components; the first
109 // pixel pointed to is top-left-most in the image. There is no padding between
110 // image scanlines or between pixels, regardless of format. The number of
111 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
112 // If req_comp is non-zero, *comp has the number of components that _would_
113 // have been output otherwise. E.g. if you set req_comp to 4, you will always
114 // get RGBA output, but you can check *comp to easily see if it's opaque.
115 //
116 // An output image with N components has the following components interleaved
117 // in this order in each pixel:
118 //
119 //     N=#comp     components
120 //       1           grey
121 //       2           grey, alpha
122 //       3           red, green, blue
123 //       4           red, green, blue, alpha
124 //
125 // If image loading fails for any reason, the return value will be NULL,
126 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
127 // can be queried for an extremely brief, end-user unfriendly explanation
128 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
129 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
130 // more user-friendly ones.
131 //
132 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
133 //
134 // ===========================================================================
135 //
136 // iPhone PNG support:
137 //
138 // By default we convert iphone-formatted PNGs back to RGB; nominally they
139 // would silently load as BGR, except the existing code should have just
140 // failed on such iPhone PNGs. But you can disable this conversion by
141 // by calling stbi_convert_iphone_png_to_rgb(0), in which case
142 // you will always just get the native iphone "format" through.
143 //
144 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
145 // pixel to remove any premultiplied alpha *only* if the image file explicitly
146 // says there's premultiplied data (currently only happens in iPhone images,
147 // and only if iPhone convert-to-rgb processing is on).
148 //
149 // ===========================================================================
150 //
151 // HDR image support   (disable by defining STBI_NO_HDR)
152 //
153 // stb_image now supports loading HDR images in general, and currently
154 // the Radiance .HDR file format, although the support is provided
155 // generically. You can still load any file through the existing interface;
156 // if you attempt to load an HDR file, it will be automatically remapped to
157 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
158 // both of these constants can be reconfigured through this interface:
159 //
160 //     stbi_hdr_to_ldr_gamma(2.2f);
161 //     stbi_hdr_to_ldr_scale(1.0f);
162 //
163 // (note, do not use _inverse_ constants; stbi_image will invert them
164 // appropriately).
165 //
166 // Additionally, there is a new, parallel interface for loading files as
167 // (linear) floats to preserve the full dynamic range:
168 //
169 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
170 //
171 // If you load LDR images through this interface, those images will
172 // be promoted to floating point values, run through the inverse of
173 // constants corresponding to the above:
174 //
175 //     stbi_ldr_to_hdr_scale(1.0f);
176 //     stbi_ldr_to_hdr_gamma(2.2f);
177 //
178 // Finally, given a filename (or an open file or memory block--see header
179 // file for details) containing image data, you can query for the "most
180 // appropriate" interface to use (that is, whether the image is HDR or
181 // not), using:
182 //
183 //     stbi_is_hdr(char *filename);
184 
185 #ifndef STBI_NO_STDIO
186 #include <stdio.h>
187 #endif
188 
189 #define STBI_VERSION 1
190 
191 enum
192 {
193    STBI_default = 0, // only used for req_comp
194 
195    STBI_grey       = 1,
196    STBI_grey_alpha = 2,
197    STBI_rgb        = 3,
198    STBI_rgb_alpha  = 4
199 };
200 
201 typedef unsigned char stbi_uc;
202 
203 #ifdef __cplusplus
204 extern "C" {
205 #endif
206 
207 // PRIMARY API - works on images of any type
208 
209 // load image by filename, open file, or memory buffer
210 extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
211 
212 #ifndef STBI_NO_STDIO
213 extern stbi_uc *stbi_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
214 extern stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
215 // for stbi_load_from_file, file pointer is left pointing immediately after image
216 #endif
217 
218 #ifndef STBI_NO_HDR
219    extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
220 
221    #ifndef STBI_NO_STDIO
222    extern float *stbi_loadf            (char const *filename,   int *x, int *y, int *comp, int req_comp);
223    extern float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
224    #endif
225 
226    extern void   stbi_hdr_to_ldr_gamma(float gamma);
227    extern void   stbi_hdr_to_ldr_scale(float scale);
228 
229    extern void   stbi_ldr_to_hdr_gamma(float gamma);
230    extern void   stbi_ldr_to_hdr_scale(float scale);
231 #endif // STBI_NO_HDR
232 
233 // get a VERY brief reason for failure
234 // NOT THREADSAFE
235 extern const char *stbi_failure_reason  (void);
236 
237 // free the loaded image -- this is just free()
238 extern void     stbi_image_free      (void *retval_from_stbi_load);
239 
240 // get image dimensions & components without fully decoding
241 extern int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
242 extern int      stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
243 
244 #ifndef STBI_NO_STDIO
245 extern int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
246 extern int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
247 
248 extern int      stbi_is_hdr          (char const *filename);
249 extern int      stbi_is_hdr_from_file(FILE *f);
250 #endif
251 
252 // for image formats that explicitly notate that they have premultiplied alpha,
253 // we just return the colors as stored in the file. set this flag to force
254 // unpremultiplication. results are undefined if the unpremultiply overflow.
255 extern void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
256 
257 // indicate whether we should process iphone images back to canonical format,
258 // or just pass them through "as-is"
259 extern void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
260 
261 
262 // ZLIB client - used by PNG, available for other purposes
263 
264 extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
265 extern char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
266 extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
267 extern int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
268 
269 extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
270 extern int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
271 
272 // define new loaders
273 typedef struct
274 {
275    int       (*test_memory)(stbi_uc const *buffer, int len);
276    stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
277    #ifndef STBI_NO_STDIO
278    int       (*test_file)(FILE *f);
279    stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp);
280    #endif
281 } stbi_loader;
282 
283 // register a loader by filling out the above structure (you must define ALL functions)
284 // returns 1 if added or already added, 0 if not added (too many loaders)
285 // NOT THREADSAFE
286 extern int stbi_register_loader(stbi_loader *loader);
287 
288 // define faster low-level operations (typically SIMD support)
289 #ifdef STBI_SIMD
290 typedef void (*stbi_idct_8x8)(stbi_uc *out, int out_stride, short data[64], unsigned short *dequantize);
291 // compute an integer IDCT on "input"
292 //     input[x] = data[x] * dequantize[x]
293 //     write results to 'out': 64 samples, each run of 8 spaced by 'out_stride'
294 //                             CLAMP results to 0..255
295 typedef void (*stbi_YCbCr_to_RGB_run)(stbi_uc *output, stbi_uc const  *y, stbi_uc const *cb, stbi_uc const *cr, int count, int step);
296 // compute a conversion from YCbCr to RGB
297 //     'count' pixels
298 //     write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B
299 //     y: Y input channel
300 //     cb: Cb input channel; scale/biased to be 0..255
301 //     cr: Cr input channel; scale/biased to be 0..255
302 
303 extern void stbi_install_idct(stbi_idct_8x8 func);
304 extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func);
305 #endif // STBI_SIMD
306 
307 
308 
309 
310 // TYPE-SPECIFIC ACCESS
311 
312 #ifdef STBI_TYPE_SPECIFIC_FUNCTIONS
313 
314 // is it a jpeg?
315 extern int      stbi_jpeg_test_memory     (stbi_uc const *buffer, int len);
316 extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
317 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
318 
319 #ifndef STBI_NO_STDIO
320 extern stbi_uc *stbi_jpeg_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
321 extern int      stbi_jpeg_test_file       (FILE *f);
322 extern stbi_uc *stbi_jpeg_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
323 
324 extern int      stbi_jpeg_info            (char const *filename,     int *x, int *y, int *comp);
325 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
326 #endif
327 
328 // is it a png?
329 extern int      stbi_png_test_memory      (stbi_uc const *buffer, int len);
330 extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
331 extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
332 
333 #ifndef STBI_NO_STDIO
334 extern stbi_uc *stbi_png_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
335 extern int      stbi_png_info             (char const *filename,     int *x, int *y, int *comp);
336 extern int      stbi_png_test_file        (FILE *f);
337 extern stbi_uc *stbi_png_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
338 extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
339 #endif
340 
341 // is it a bmp?
342 extern int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len);
343 
344 extern stbi_uc *stbi_bmp_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
345 extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
346 #ifndef STBI_NO_STDIO
347 extern int      stbi_bmp_test_file        (FILE *f);
348 extern stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
349 #endif
350 
351 // is it a tga?
352 extern int      stbi_tga_test_memory      (stbi_uc const *buffer, int len);
353 
354 extern stbi_uc *stbi_tga_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
355 extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
356 extern int stbi_tga_info_from_memory      (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
357 #ifndef STBI_NO_STDIO
358 extern int stbi_tga_info_from_file        (FILE *f, int *x, int *y, int *comp);
359 extern int      stbi_tga_test_file        (FILE *f);
360 extern stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
361 #endif
362 
363 // is it a psd?
364 extern int      stbi_psd_test_memory      (stbi_uc const *buffer, int len);
365 
366 extern stbi_uc *stbi_psd_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
367 extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
368 #ifndef STBI_NO_STDIO
369 extern int      stbi_psd_test_file        (FILE *f);
370 extern stbi_uc *stbi_psd_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
371 #endif
372 
373 // is it an hdr?
374 extern int      stbi_hdr_test_memory      (stbi_uc const *buffer, int len);
375 
376 extern float *  stbi_hdr_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
377 extern float *  stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
378 #ifndef STBI_NO_STDIO
379 extern int      stbi_hdr_test_file        (FILE *f);
380 extern float *  stbi_hdr_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
381 #endif
382 
383 // is it a pic?
384 extern int      stbi_pic_test_memory      (stbi_uc const *buffer, int len);
385 
386 extern stbi_uc *stbi_pic_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
387 extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
388 #ifndef STBI_NO_STDIO
389 extern int      stbi_pic_test_file        (FILE *f);
390 extern stbi_uc *stbi_pic_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
391 #endif
392 
393 // is it a gif?
394 extern int      stbi_gif_test_memory      (stbi_uc const *buffer, int len);
395 
396 extern stbi_uc *stbi_gif_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
397 extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
398 extern int      stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
399 
400 #ifndef STBI_NO_STDIO
401 extern int      stbi_gif_test_file        (FILE *f);
402 extern stbi_uc *stbi_gif_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
403 extern int      stbi_gif_info             (char const *filename,     int *x, int *y, int *comp);
404 extern int      stbi_gif_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
405 #endif
406 
407 #endif//STBI_TYPE_SPECIFIC_FUNCTIONS
408 
409 
410 
411 
412 #ifdef __cplusplus
413 }
414 #endif
415 
416 //
417 //
418 ////   end header file   /////////////////////////////////////////////////////
419 #endif // STBI_INCLUDE_STB_IMAGE_H
420 
421 #ifndef STBI_HEADER_FILE_ONLY
422 
423 #ifndef STBI_NO_HDR
424 #include <math.h>  // ldexp
425 #include <string.h> // strcmp
426 #endif
427 
428 #ifndef STBI_NO_STDIO
429 #include <stdio.h>
430 #endif
431 #ifdef _KERNEL
432 #include <sys/cdefs.h>
433 __KERNEL_RCSID(0, "$NetBSD: stb_image.c,v 1.1 2011/02/06 23:13:04 jmcneill Exp $");
434 #include <sys/param.h>
435 #include <sys/systm.h>
436 #include <sys/kernel.h>
437 #include <sys/types.h>
438 #include <sys/malloc.h>
439 #else
440 #include <stdlib.h>
441 #include <memory.h>
442 #include <assert.h>
443 #include <stdarg.h>
444 #endif
445 
446 #ifdef _KERNEL
447 #define	MALLOC(size)		malloc((size), M_TEMP, M_WAITOK)
448 #define	REALLOC(ptr, size)	realloc((ptr), (size), M_TEMP, M_WAITOK)
449 #define	FREE(ptr)		free((ptr), M_TEMP)
450 #else
451 #define	MALLOC(size)		malloc((size))
452 #define	REALLOC(ptr, size)	realloc((ptr), (size))
453 #define	FREE(ptr)		free((ptr))
454 #endif
455 
456 #ifndef _MSC_VER
457   #ifdef __cplusplus
458   #define __forceinline inline
459   #else
460   #define __forceinline
461   #endif
462 #endif
463 
464 
465 // implementation:
466 typedef unsigned char uint8;
467 typedef unsigned short uint16;
468 typedef   signed short  int16;
469 typedef unsigned int   uint32;
470 typedef   signed int    int32;
471 #ifndef __NetBSD__
472 typedef unsigned int   uint;
473 #endif
474 
475 // should produce compiler error if size is wrong
476 typedef unsigned char validate_uint32[sizeof(uint32)==4 ? 1 : -1];
477 
478 #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
479 #define STBI_NO_WRITE
480 #endif
481 
482 #define STBI_NOTUSED(v)  v=v
483 
484 #ifdef _MSC_VER
485 #define STBI_HAS_LRTOL
486 #endif
487 
488 #ifdef STBI_HAS_LRTOL
489    #define stbi_lrot(x,y)  _lrotl(x,y)
490 #else
491    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
492 #endif
493 
494 //////////////////////////////////////////////////////////////////////////////
495 //
496 // Generic API that works on all image types
497 //
498 
499 // deprecated functions
500 
501 // is it a jpeg?
502 extern int      stbi_jpeg_test_memory     (stbi_uc const *buffer, int len);
503 extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
504 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
505 
506 #ifndef STBI_NO_STDIO
507 extern stbi_uc *stbi_jpeg_load            (char const *filename,     int *x, int *y, int *comp, int req_comp);
508 extern int      stbi_jpeg_test_file       (FILE *f);
509 extern stbi_uc *stbi_jpeg_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
510 
511 extern int      stbi_jpeg_info            (char const *filename,     int *x, int *y, int *comp);
512 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
513 #endif
514 
515 // is it a png?
516 extern int      stbi_png_test_memory      (stbi_uc const *buffer, int len);
517 extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
518 extern int      stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
519 
520 #ifndef STBI_NO_STDIO
521 extern stbi_uc *stbi_png_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
522 extern int      stbi_png_info             (char const *filename,     int *x, int *y, int *comp);
523 extern int      stbi_png_test_file        (FILE *f);
524 extern stbi_uc *stbi_png_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
525 extern int      stbi_png_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
526 #endif
527 
528 // is it a bmp?
529 extern int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len);
530 
531 extern stbi_uc *stbi_bmp_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
532 extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
533 #ifndef STBI_NO_STDIO
534 extern int      stbi_bmp_test_file        (FILE *f);
535 extern stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
536 #endif
537 
538 // is it a tga?
539 extern int      stbi_tga_test_memory      (stbi_uc const *buffer, int len);
540 
541 extern stbi_uc *stbi_tga_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
542 extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
543 #ifndef STBI_NO_STDIO
544 extern int      stbi_tga_test_file        (FILE *f);
545 extern stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
546 #endif
547 
548 // is it a psd?
549 extern int      stbi_psd_test_memory      (stbi_uc const *buffer, int len);
550 
551 extern stbi_uc *stbi_psd_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
552 extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
553 #ifndef STBI_NO_STDIO
554 extern int      stbi_psd_test_file        (FILE *f);
555 extern stbi_uc *stbi_psd_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
556 #endif
557 
558 // is it an hdr?
559 extern int      stbi_hdr_test_memory      (stbi_uc const *buffer, int len);
560 
561 extern float *  stbi_hdr_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
562 extern float *  stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
563 #ifndef STBI_NO_STDIO
564 extern int      stbi_hdr_test_file        (FILE *f);
565 extern float *  stbi_hdr_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
566 #endif
567 
568 // is it a pic?
569 extern int      stbi_pic_test_memory      (stbi_uc const *buffer, int len);
570 
571 extern stbi_uc *stbi_pic_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
572 extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
573 #ifndef STBI_NO_STDIO
574 extern int      stbi_pic_test_file        (FILE *f);
575 extern stbi_uc *stbi_pic_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
576 #endif
577 
578 // is it a gif?
579 extern int      stbi_gif_test_memory      (stbi_uc const *buffer, int len);
580 
581 extern stbi_uc *stbi_gif_load             (char const *filename,     int *x, int *y, int *comp, int req_comp);
582 extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
583 extern int      stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp);
584 
585 #ifndef STBI_NO_STDIO
586 extern int      stbi_gif_test_file        (FILE *f);
587 extern stbi_uc *stbi_gif_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp);
588 extern int      stbi_gif_info             (char const *filename,     int *x, int *y, int *comp);
589 extern int      stbi_gif_info_from_file   (FILE *f,                  int *x, int *y, int *comp);
590 #endif
591 
592 
593 // this is not threadsafe
594 static const char *failure_reason;
595 
596 const char *stbi_failure_reason(void)
597 {
598    return failure_reason;
599 }
600 
601 #ifndef STBI_NO_FAILURE_STRINGS
602 static int e(const char *str)
603 {
604    failure_reason = str;
605    return 0;
606 }
607 #endif
608 
609 #ifdef STBI_NO_FAILURE_STRINGS
610    #define e(x,y)  0
611 #elif defined(STBI_FAILURE_USERMSG)
612    #define e(x,y)  e(y)
613 #else
614    #define e(x,y)  e(x)
615 #endif
616 
617 #define epf(x,y)   ((float *) (e(x,y)?NULL:NULL))
618 #define epuc(x,y)  ((unsigned char *) (e(x,y)?NULL:NULL))
619 
620 void stbi_image_free(void *retval_from_stbi_load)
621 {
622    FREE(retval_from_stbi_load);
623 }
624 
625 #define MAX_LOADERS  32
626 stbi_loader *loaders[MAX_LOADERS];
627 static int max_loaders = 0;
628 
629 int stbi_register_loader(stbi_loader *loader)
630 {
631    int i;
632    for (i=0; i < MAX_LOADERS; ++i) {
633       // already present?
634       if (loaders[i] == loader)
635          return 1;
636       // end of the list?
637       if (loaders[i] == NULL) {
638          loaders[i] = loader;
639          max_loaders = i+1;
640          return 1;
641       }
642    }
643    // no room for it
644    return 0;
645 }
646 
647 #ifndef STBI_NO_HDR
648 static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
649 static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp);
650 #endif
651 
652 #ifndef STBI_NO_STDIO
653 unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
654 {
655    FILE *f = fopen(filename, "rb");
656    unsigned char *result;
657    if (!f) return epuc("can't fopen", "Unable to open file");
658    result = stbi_load_from_file(f,x,y,comp,req_comp);
659    fclose(f);
660    return result;
661 }
662 
663 unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
664 {
665    int i;
666    if (stbi_jpeg_test_file(f)) return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
667    if (stbi_png_test_file(f))  return stbi_png_load_from_file(f,x,y,comp,req_comp);
668    if (stbi_bmp_test_file(f))  return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
669    if (stbi_gif_test_file(f))  return stbi_gif_load_from_file(f,x,y,comp,req_comp);
670    if (stbi_psd_test_file(f))  return stbi_psd_load_from_file(f,x,y,comp,req_comp);
671    if (stbi_pic_test_file(f))  return stbi_pic_load_from_file(f,x,y,comp,req_comp);
672 
673    #ifndef STBI_NO_HDR
674    if (stbi_hdr_test_file(f)) {
675       float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
676       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
677    }
678    #endif
679 
680    for (i=0; i < max_loaders; ++i)
681       if (loaders[i]->test_file(f))
682          return loaders[i]->load_from_file(f,x,y,comp,req_comp);
683    // test tga last because it's a crappy test!
684    if (stbi_tga_test_file(f))
685       return stbi_tga_load_from_file(f,x,y,comp,req_comp);
686    return epuc("unknown image type", "Image not of any known type, or corrupt");
687 }
688 #endif
689 
690 unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
691 {
692    int i;
693    if (stbi_jpeg_test_memory(buffer,len)) return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
694    if (stbi_png_test_memory(buffer,len))  return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
695    if (stbi_bmp_test_memory(buffer,len))  return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
696    if (stbi_gif_test_memory(buffer,len))  return stbi_gif_load_from_memory(buffer,len,x,y,comp,req_comp);
697    if (stbi_psd_test_memory(buffer,len))  return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp);
698    if (stbi_pic_test_memory(buffer,len))  return stbi_pic_load_from_memory(buffer,len,x,y,comp,req_comp);
699 
700    #ifndef STBI_NO_HDR
701    if (stbi_hdr_test_memory(buffer, len)) {
702       float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
703       return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
704    }
705    #endif
706 
707    for (i=0; i < max_loaders; ++i)
708       if (loaders[i]->test_memory(buffer,len))
709          return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
710    // test tga last because it's a crappy test!
711    if (stbi_tga_test_memory(buffer,len))
712       return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
713    return epuc("unknown image type", "Image not of any known type, or corrupt");
714 }
715 
716 #ifndef STBI_NO_HDR
717 
718 #ifndef STBI_NO_STDIO
719 float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
720 {
721    FILE *f = fopen(filename, "rb");
722    float *result;
723    if (!f) return epf("can't fopen", "Unable to open file");
724    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
725    fclose(f);
726    return result;
727 }
728 
729 float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
730 {
731    unsigned char *data;
732    #ifndef STBI_NO_HDR
733    if (stbi_hdr_test_file(f))
734       return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
735    #endif
736    data = stbi_load_from_file(f, x, y, comp, req_comp);
737    if (data)
738       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
739    return epf("unknown image type", "Image not of any known type, or corrupt");
740 }
741 #endif
742 
743 float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
744 {
745    stbi_uc *data;
746    #ifndef STBI_NO_HDR
747    if (stbi_hdr_test_memory(buffer, len))
748       return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
749    #endif
750    data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
751    if (data)
752       return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
753    return epf("unknown image type", "Image not of any known type, or corrupt");
754 }
755 #endif
756 
757 // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
758 // defined, for API simplicity; if STBI_NO_HDR is defined, it always
759 // reports false!
760 
761 int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
762 {
763    #ifndef STBI_NO_HDR
764    return stbi_hdr_test_memory(buffer, len);
765    #else
766    STBI_NOTUSED(buffer);
767    STBI_NOTUSED(len);
768    return 0;
769    #endif
770 }
771 
772 #ifndef STBI_NO_STDIO
773 extern int      stbi_is_hdr          (char const *filename)
774 {
775    FILE *f = fopen(filename, "rb");
776    int result=0;
777    if (f) {
778       result = stbi_is_hdr_from_file(f);
779       fclose(f);
780    }
781    return result;
782 }
783 
784 extern int      stbi_is_hdr_from_file(FILE *f)
785 {
786    #ifndef STBI_NO_HDR
787    return stbi_hdr_test_file(f);
788    #else
789    return 0;
790    #endif
791 }
792 
793 #endif
794 
795 #ifndef STBI_NO_HDR
796 static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
797 static float l2h_gamma=2.2f, l2h_scale=1.0f;
798 
799 void   stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
800 void   stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
801 
802 void   stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
803 void   stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
804 #endif
805 
806 
807 //////////////////////////////////////////////////////////////////////////////
808 //
809 // Common code used by all image loaders
810 //
811 
812 enum
813 {
814    SCAN_load=0,
815    SCAN_type,
816    SCAN_header
817 };
818 
819 typedef struct
820 {
821    uint32 img_x, img_y;
822    int img_n, img_out_n;
823 
824    #ifndef STBI_NO_STDIO
825    FILE  *img_file;
826    int buflen;
827    uint8 buffer_start[128];
828    int from_file;
829    #endif
830    uint8 const *img_buffer, *img_buffer_end;
831 } stbi;
832 
833 #ifndef STBI_NO_STDIO
834 static void start_file(stbi *s, FILE *f)
835 {
836    s->img_file = f;
837    s->buflen = sizeof(s->buffer_start);
838    s->img_buffer_end = s->buffer_start + s->buflen;
839    s->img_buffer = s->img_buffer_end;
840    s->from_file = 1;
841 }
842 #endif
843 
844 static void start_mem(stbi *s, uint8 const *buffer, int len)
845 {
846 #ifndef STBI_NO_STDIO
847    s->img_file = NULL;
848    s->from_file = 0;
849 #endif
850    s->img_buffer = (uint8 const *) buffer;
851    s->img_buffer_end = (uint8 const *) buffer+len;
852 }
853 
854 #ifndef STBI_NO_STDIO
855 static void refill_buffer(stbi *s)
856 {
857    int n = fread(s->buffer_start, 1, s->buflen, s->img_file);
858    if (n == 0) {
859       s->from_file = 0;
860       s->img_buffer = s->img_buffer_end-1;
861 #if 0
862       *s->img_buffer = 0;
863 #endif
864    } else {
865       s->img_buffer = s->buffer_start;
866       s->img_buffer_end = s->buffer_start + n;
867    }
868 }
869 #endif
870 
871 __forceinline static int get8(stbi *s)
872 {
873    if (s->img_buffer < s->img_buffer_end)
874       return *s->img_buffer++;
875 #ifndef STBI_NO_STDIO
876    if (s->from_file) {
877       refill_buffer(s);
878       return *s->img_buffer++;
879    }
880 #endif
881    return 0;
882 }
883 
884 __forceinline static int at_eof(stbi *s)
885 {
886 #ifndef STBI_NO_STDIO
887    if (s->img_file) {
888       if (!feof(s->img_file)) return 0;
889       // if feof() is true, check if buffer = end
890       // special case: we've only got the special 0 character at the end
891       if (s->from_file == 0) return 1;
892    }
893 #endif
894    return s->img_buffer >= s->img_buffer_end;
895 }
896 
897 __forceinline static uint8 get8u(stbi *s)
898 {
899    return (uint8) get8(s);
900 }
901 
902 static void skip(stbi *s, int n)
903 {
904 #ifndef STBI_NO_STDIO
905    if (s->img_file) {
906       int blen = s->img_buffer_end - s->img_buffer;
907       if (blen < n) {
908          s->img_buffer = s->img_buffer_end;
909          fseek(s->img_file, n - blen, SEEK_CUR);
910          return;
911       }
912    }
913 #endif
914    s->img_buffer += n;
915 }
916 
917 static int getn(stbi *s, stbi_uc *buffer, int n)
918 {
919 #ifndef STBI_NO_STDIO
920    if (s->img_file) {
921       int blen = s->img_buffer_end - s->img_buffer;
922       if (blen < n) {
923          int res;
924          memcpy(buffer, s->img_buffer, blen);
925          res = ((int) fread(buffer + blen, 1, n - blen, s->img_file) == (n-blen));
926          s->img_buffer = s->img_buffer_end;
927          return res;
928       }
929    }
930 #endif
931    if (s->img_buffer+n <= s->img_buffer_end) {
932       memcpy(buffer, s->img_buffer, n);
933       s->img_buffer += n;
934       return 1;
935    } else
936       return 0;
937 }
938 
939 static int get16(stbi *s)
940 {
941    int z = get8(s);
942    return (z << 8) + get8(s);
943 }
944 
945 static uint32 get32(stbi *s)
946 {
947    uint32 z = get16(s);
948    return (z << 16) + get16(s);
949 }
950 
951 static int get16le(stbi *s)
952 {
953    int z = get8(s);
954    return z + (get8(s) << 8);
955 }
956 
957 static uint32 get32le(stbi *s)
958 {
959    uint32 z = get16le(s);
960    return z + (get16le(s) << 16);
961 }
962 
963 //////////////////////////////////////////////////////////////////////////////
964 //
965 //  generic converter from built-in img_n to req_comp
966 //    individual types do this automatically as much as possible (e.g. jpeg
967 //    does all cases internally since it needs to colorspace convert anyway,
968 //    and it never has alpha, so very few cases ). png can automatically
969 //    interleave an alpha=255 channel, but falls back to this for other cases
970 //
971 //  assume data buffer is malloced, so malloc a new one and free that one
972 //  only failure mode is malloc failing
973 
974 static uint8 compute_y(int r, int g, int b)
975 {
976    return (uint8) (((r*77) + (g*150) +  (29*b)) >> 8);
977 }
978 
979 static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
980 {
981    int i,j;
982    unsigned char *good;
983 
984    if (req_comp == img_n) return data;
985    assert(req_comp >= 1 && req_comp <= 4);
986 
987    good = (unsigned char *) MALLOC(req_comp * x * y);
988    if (good == NULL) {
989       FREE(data);
990       return epuc("outofmem", "Out of memory");
991    }
992 
993    for (j=0; j < (int) y; ++j) {
994       unsigned char *src  = data + j * x * img_n   ;
995       unsigned char *dest = good + j * x * req_comp;
996 
997       #define COMBO(a,b)  ((a)*8+(b))
998       #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
999       // convert source image with img_n components to one with req_comp components;
1000       // avoid switch per pixel, so use switch per scanline and massive macros
1001       switch (COMBO(img_n, req_comp)) {
1002          CASE(1,2) dest[0]=src[0], dest[1]=255; break;
1003          CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1004          CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
1005          CASE(2,1) dest[0]=src[0]; break;
1006          CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1007          CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
1008          CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
1009          CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
1010          CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
1011          CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
1012          CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
1013          CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
1014          default: assert(0);
1015       }
1016       #undef CASE
1017    }
1018 
1019    FREE(data);
1020    return good;
1021 }
1022 
1023 #ifndef STBI_NO_HDR
1024 static float   *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1025 {
1026    int i,k,n;
1027    float *output = (float *) MALLOC(x * y * comp * sizeof(float));
1028    if (output == NULL) { FREE(data); return epf("outofmem", "Out of memory"); }
1029    // compute number of non-alpha components
1030    if (comp & 1) n = comp; else n = comp-1;
1031    for (i=0; i < x*y; ++i) {
1032       for (k=0; k < n; ++k) {
1033          output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale;
1034       }
1035       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1036    }
1037    FREE(data);
1038    return output;
1039 }
1040 
1041 #define float2int(x)   ((int) (x))
1042 static stbi_uc *hdr_to_ldr(float   *data, int x, int y, int comp)
1043 {
1044    int i,k,n;
1045    stbi_uc *output = (stbi_uc *) MALLOC(x * y * comp);
1046    if (output == NULL) { FREE(data); return epuc("outofmem", "Out of memory"); }
1047    // compute number of non-alpha components
1048    if (comp & 1) n = comp; else n = comp-1;
1049    for (i=0; i < x*y; ++i) {
1050       for (k=0; k < n; ++k) {
1051          float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
1052          if (z < 0) z = 0;
1053          if (z > 255) z = 255;
1054          output[i*comp + k] = (uint8) float2int(z);
1055       }
1056       if (k < comp) {
1057          float z = data[i*comp+k] * 255 + 0.5f;
1058          if (z < 0) z = 0;
1059          if (z > 255) z = 255;
1060          output[i*comp + k] = (uint8) float2int(z);
1061       }
1062    }
1063    FREE(data);
1064    return output;
1065 }
1066 #endif
1067 
1068 //////////////////////////////////////////////////////////////////////////////
1069 //
1070 //  "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
1071 //
1072 //    simple implementation
1073 //      - channel subsampling of at most 2 in each dimension
1074 //      - doesn't support delayed output of y-dimension
1075 //      - simple interface (only one output format: 8-bit interleaved RGB)
1076 //      - doesn't try to recover corrupt jpegs
1077 //      - doesn't allow partial loading, loading multiple at once
1078 //      - still fast on x86 (copying globals into locals doesn't help x86)
1079 //      - allocates lots of intermediate memory (full size of all components)
1080 //        - non-interleaved case requires this anyway
1081 //        - allows good upsampling (see next)
1082 //    high-quality
1083 //      - upsampled channels are bilinearly interpolated, even across blocks
1084 //      - quality integer IDCT derived from IJG's 'slow'
1085 //    performance
1086 //      - fast huffman; reasonable integer IDCT
1087 //      - uses a lot of intermediate memory, could cache poorly
1088 //      - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
1089 //          stb_jpeg:   1.34 seconds (MSVC6, default release build)
1090 //          stb_jpeg:   1.06 seconds (MSVC6, processor = Pentium Pro)
1091 //          IJL11.dll:  1.08 seconds (compiled by intel)
1092 //          IJG 1998:   0.98 seconds (MSVC6, makefile provided by IJG)
1093 //          IJG 1998:   0.95 seconds (MSVC6, makefile + proc=PPro)
1094 
1095 // huffman decoding acceleration
1096 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1097 
1098 typedef struct
1099 {
1100    uint8  fast[1 << FAST_BITS];
1101    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1102    uint16 code[256];
1103    uint8  values[256];
1104    uint8  size[257];
1105    unsigned int maxcode[18];
1106    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1107 } huffman;
1108 
1109 typedef struct
1110 {
1111    #ifdef STBI_SIMD
1112    unsigned short dequant2[4][64];
1113    #endif
1114    stbi s;
1115    huffman huff_dc[4];
1116    huffman huff_ac[4];
1117    uint8 dequant[4][64];
1118 
1119 // sizes for components, interleaved MCUs
1120    int img_h_max, img_v_max;
1121    int img_mcu_x, img_mcu_y;
1122    int img_mcu_w, img_mcu_h;
1123 
1124 // definition of jpeg image component
1125    struct
1126    {
1127       int id;
1128       int h,v;
1129       int tq;
1130       int hd,ha;
1131       int dc_pred;
1132 
1133       int x,y,w2,h2;
1134       uint8 *data;
1135       void *raw_data;
1136       uint8 *linebuf;
1137    } img_comp[4];
1138 
1139    uint32         code_buffer; // jpeg entropy-coded buffer
1140    int            code_bits;   // number of valid bits
1141    unsigned char  marker;      // marker seen while filling entropy buffer
1142    int            nomore;      // flag if we saw a marker so must stop
1143 
1144    int scan_n, order[4];
1145    int restart_interval, todo;
1146 } jpeg;
1147 
1148 static int build_huffman(huffman *h, int *count)
1149 {
1150    int i,j,k=0,code;
1151    // build size list for each symbol (from JPEG spec)
1152    for (i=0; i < 16; ++i)
1153       for (j=0; j < count[i]; ++j)
1154          h->size[k++] = (uint8) (i+1);
1155    h->size[k] = 0;
1156 
1157    // compute actual symbols (from jpeg spec)
1158    code = 0;
1159    k = 0;
1160    for(j=1; j <= 16; ++j) {
1161       // compute delta to add to code to compute symbol id
1162       h->delta[j] = k - code;
1163       if (h->size[k] == j) {
1164          while (h->size[k] == j)
1165             h->code[k++] = (uint16) (code++);
1166          if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
1167       }
1168       // compute largest code + 1 for this size, preshifted as needed later
1169       h->maxcode[j] = code << (16-j);
1170       code <<= 1;
1171    }
1172    h->maxcode[j] = 0xffffffff;
1173 
1174    // build non-spec acceleration table; 255 is flag for not-accelerated
1175    memset(h->fast, 255, 1 << FAST_BITS);
1176    for (i=0; i < k; ++i) {
1177       int s = h->size[i];
1178       if (s <= FAST_BITS) {
1179          int c = h->code[i] << (FAST_BITS-s);
1180          int m = 1 << (FAST_BITS-s);
1181          for (j=0; j < m; ++j) {
1182             h->fast[c+j] = (uint8) i;
1183          }
1184       }
1185    }
1186    return 1;
1187 }
1188 
1189 static void grow_buffer_unsafe(jpeg *j)
1190 {
1191    do {
1192       int b = j->nomore ? 0 : get8(&j->s);
1193       if (b == 0xff) {
1194          int c = get8(&j->s);
1195          if (c != 0) {
1196             j->marker = (unsigned char) c;
1197             j->nomore = 1;
1198             return;
1199          }
1200       }
1201       j->code_buffer |= b << (24 - j->code_bits);
1202       j->code_bits += 8;
1203    } while (j->code_bits <= 24);
1204 }
1205 
1206 // (1 << n) - 1
1207 static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1208 
1209 // decode a jpeg huffman value from the bitstream
1210 __forceinline static int decode(jpeg *j, huffman *h)
1211 {
1212    unsigned int temp;
1213    int c,k;
1214 
1215    if (j->code_bits < 16) grow_buffer_unsafe(j);
1216 
1217    // look at the top FAST_BITS and determine what symbol ID it is,
1218    // if the code is <= FAST_BITS
1219    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1220    k = h->fast[c];
1221    if (k < 255) {
1222       int s = h->size[k];
1223       if (s > j->code_bits)
1224          return -1;
1225       j->code_buffer <<= s;
1226       j->code_bits -= s;
1227       return h->values[k];
1228    }
1229 
1230    // naive test is to shift the code_buffer down so k bits are
1231    // valid, then test against maxcode. To speed this up, we've
1232    // preshifted maxcode left so that it has (16-k) 0s at the
1233    // end; in other words, regardless of the number of bits, it
1234    // wants to be compared against something shifted to have 16;
1235    // that way we don't need to shift inside the loop.
1236    temp = j->code_buffer >> 16;
1237    for (k=FAST_BITS+1 ; ; ++k)
1238       if (temp < h->maxcode[k])
1239          break;
1240    if (k == 17) {
1241       // error! code not found
1242       j->code_bits -= 16;
1243       return -1;
1244    }
1245 
1246    if (k > j->code_bits)
1247       return -1;
1248 
1249    // convert the huffman code to the symbol id
1250    c = ((j->code_buffer >> (32 - k)) & bmask[k]) + h->delta[k];
1251    assert((((j->code_buffer) >> (32 - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
1252 
1253    // convert the id to a symbol
1254    j->code_bits -= k;
1255    j->code_buffer <<= k;
1256    return h->values[c];
1257 }
1258 
1259 // combined JPEG 'receive' and JPEG 'extend', since baseline
1260 // always extends everything it receives.
1261 __forceinline static int extend_receive(jpeg *j, int n)
1262 {
1263    unsigned int m = 1 << (n-1);
1264    unsigned int k;
1265    if (j->code_bits < n) grow_buffer_unsafe(j);
1266 
1267    #if 1
1268    k = stbi_lrot(j->code_buffer, n);
1269    j->code_buffer = k & ~bmask[n];
1270    k &= bmask[n];
1271    j->code_bits -= n;
1272    #else
1273    k = (j->code_buffer >> (32 - n)) & bmask[n];
1274    j->code_bits -= n;
1275    j->code_buffer <<= n;
1276    #endif
1277    // the following test is probably a random branch that won't
1278    // predict well. I tried to table accelerate it but failed.
1279    // maybe it's compiling as a conditional move?
1280    if (k < m)
1281       return (-1 << n) + k + 1;
1282    else
1283       return k;
1284 }
1285 
1286 // given a value that's at position X in the zigzag stream,
1287 // where does it appear in the 8x8 matrix coded as row-major?
1288 static uint8 dezigzag[64+15] =
1289 {
1290     0,  1,  8, 16,  9,  2,  3, 10,
1291    17, 24, 32, 25, 18, 11,  4,  5,
1292    12, 19, 26, 33, 40, 48, 41, 34,
1293    27, 20, 13,  6,  7, 14, 21, 28,
1294    35, 42, 49, 56, 57, 50, 43, 36,
1295    29, 22, 15, 23, 30, 37, 44, 51,
1296    58, 59, 52, 45, 38, 31, 39, 46,
1297    53, 60, 61, 54, 47, 55, 62, 63,
1298    // let corrupt input sample past end
1299    63, 63, 63, 63, 63, 63, 63, 63,
1300    63, 63, 63, 63, 63, 63, 63
1301 };
1302 
1303 // decode one 64-entry block--
1304 static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
1305 {
1306    int diff,dc,k;
1307    int t = decode(j, hdc);
1308    if (t < 0) return e("bad huffman code","Corrupt JPEG");
1309 
1310    // 0 all the ac values now so we can do it 32-bits at a time
1311    memset(data,0,64*sizeof(data[0]));
1312 
1313    diff = t ? extend_receive(j, t) : 0;
1314    dc = j->img_comp[b].dc_pred + diff;
1315    j->img_comp[b].dc_pred = dc;
1316    data[0] = (short) dc;
1317 
1318    // decode AC components, see JPEG spec
1319    k = 1;
1320    do {
1321       int r,s;
1322       int rs = decode(j, hac);
1323       if (rs < 0) return e("bad huffman code","Corrupt JPEG");
1324       s = rs & 15;
1325       r = rs >> 4;
1326       if (s == 0) {
1327          if (rs != 0xf0) break; // end block
1328          k += 16;
1329       } else {
1330          k += r;
1331          // decode into unzigzag'd location
1332          data[dezigzag[k++]] = (short) extend_receive(j,s);
1333       }
1334    } while (k < 64);
1335    return 1;
1336 }
1337 
1338 // take a -128..127 value and clamp it and convert to 0..255
1339 __forceinline static uint8 clamp(int x)
1340 {
1341    // trick to use a single test to catch both cases
1342    if ((unsigned int) x > 255) {
1343       if (x < 0) return 0;
1344       if (x > 255) return 255;
1345    }
1346    return (uint8) x;
1347 }
1348 
1349 #define f2f(x)  (int) (((x) * 4096 + 0.5))
1350 #define fsh(x)  ((x) << 12)
1351 
1352 // derived from jidctint -- DCT_ISLOW
1353 #define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7)       \
1354    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1355    p2 = s2;                                    \
1356    p3 = s6;                                    \
1357    p1 = (p2+p3) * f2f(0.5411961f);             \
1358    t2 = p1 + p3*f2f(-1.847759065f);            \
1359    t3 = p1 + p2*f2f( 0.765366865f);            \
1360    p2 = s0;                                    \
1361    p3 = s4;                                    \
1362    t0 = fsh(p2+p3);                            \
1363    t1 = fsh(p2-p3);                            \
1364    x0 = t0+t3;                                 \
1365    x3 = t0-t3;                                 \
1366    x1 = t1+t2;                                 \
1367    x2 = t1-t2;                                 \
1368    t0 = s7;                                    \
1369    t1 = s5;                                    \
1370    t2 = s3;                                    \
1371    t3 = s1;                                    \
1372    p3 = t0+t2;                                 \
1373    p4 = t1+t3;                                 \
1374    p1 = t0+t3;                                 \
1375    p2 = t1+t2;                                 \
1376    p5 = (p3+p4)*f2f( 1.175875602f);            \
1377    t0 = t0*f2f( 0.298631336f);                 \
1378    t1 = t1*f2f( 2.053119869f);                 \
1379    t2 = t2*f2f( 3.072711026f);                 \
1380    t3 = t3*f2f( 1.501321110f);                 \
1381    p1 = p5 + p1*f2f(-0.899976223f);            \
1382    p2 = p5 + p2*f2f(-2.562915447f);            \
1383    p3 = p3*f2f(-1.961570560f);                 \
1384    p4 = p4*f2f(-0.390180644f);                 \
1385    t3 += p1+p4;                                \
1386    t2 += p2+p3;                                \
1387    t1 += p2+p4;                                \
1388    t0 += p1+p3;
1389 
1390 #ifdef STBI_SIMD
1391 typedef unsigned short stbi_dequantize_t;
1392 #else
1393 typedef uint8 stbi_dequantize_t;
1394 #endif
1395 
1396 // .344 seconds on 3*anemones.jpg
1397 static void idct_block(uint8 *out, int out_stride, short data[64], stbi_dequantize_t *dequantize)
1398 {
1399    int i,val[64],*v=val;
1400    stbi_dequantize_t *dq = dequantize;
1401    uint8 *o;
1402    short *d = data;
1403 
1404    // columns
1405    for (i=0; i < 8; ++i,++d,++dq, ++v) {
1406       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1407       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1408            && d[40]==0 && d[48]==0 && d[56]==0) {
1409          //    no shortcut                 0     seconds
1410          //    (1|2|3|4|5|6|7)==0          0     seconds
1411          //    all separate               -0.047 seconds
1412          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1413          int dcterm = d[0] * dq[0] << 2;
1414          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1415       } else {
1416          IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
1417                  d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
1418          // constants scaled things up by 1<<12; let's bring them back
1419          // down, but keep 2 extra bits of precision
1420          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1421          v[ 0] = (x0+t3) >> 10;
1422          v[56] = (x0-t3) >> 10;
1423          v[ 8] = (x1+t2) >> 10;
1424          v[48] = (x1-t2) >> 10;
1425          v[16] = (x2+t1) >> 10;
1426          v[40] = (x2-t1) >> 10;
1427          v[24] = (x3+t0) >> 10;
1428          v[32] = (x3-t0) >> 10;
1429       }
1430    }
1431 
1432    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1433       // no fast case since the first 1D IDCT spread components out
1434       IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1435       // constants scaled things up by 1<<12, plus we had 1<<2 from first
1436       // loop, plus horizontal and vertical each scale by sqrt(8) so together
1437       // we've got an extra 1<<3, so 1<<17 total we need to remove.
1438       // so we want to round that, which means adding 0.5 * 1<<17,
1439       // aka 65536. Also, we'll end up with -128 to 127 that we want
1440       // to encode as 0..255 by adding 128, so we'll add that before the shift
1441       x0 += 65536 + (128<<17);
1442       x1 += 65536 + (128<<17);
1443       x2 += 65536 + (128<<17);
1444       x3 += 65536 + (128<<17);
1445       // tried computing the shifts into temps, or'ing the temps to see
1446       // if any were out of range, but that was slower
1447       o[0] = clamp((x0+t3) >> 17);
1448       o[7] = clamp((x0-t3) >> 17);
1449       o[1] = clamp((x1+t2) >> 17);
1450       o[6] = clamp((x1-t2) >> 17);
1451       o[2] = clamp((x2+t1) >> 17);
1452       o[5] = clamp((x2-t1) >> 17);
1453       o[3] = clamp((x3+t0) >> 17);
1454       o[4] = clamp((x3-t0) >> 17);
1455    }
1456 }
1457 
1458 #ifdef STBI_SIMD
1459 static stbi_idct_8x8 stbi_idct_installed = idct_block;
1460 
1461 extern void stbi_install_idct(stbi_idct_8x8 func)
1462 {
1463    stbi_idct_installed = func;
1464 }
1465 #endif
1466 
1467 #define MARKER_none  0xff
1468 // if there's a pending marker from the entropy stream, return that
1469 // otherwise, fetch from the stream and get a marker. if there's no
1470 // marker, return 0xff, which is never a valid marker value
1471 static uint8 get_marker(jpeg *j)
1472 {
1473    uint8 x;
1474    if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; }
1475    x = get8u(&j->s);
1476    if (x != 0xff) return MARKER_none;
1477    while (x == 0xff)
1478       x = get8u(&j->s);
1479    return x;
1480 }
1481 
1482 // in each scan, we'll have scan_n components, and the order
1483 // of the components is specified by order[]
1484 #define RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
1485 
1486 // after a restart interval, reset the entropy decoder and
1487 // the dc prediction
1488 static void reset(jpeg *j)
1489 {
1490    j->code_bits = 0;
1491    j->code_buffer = 0;
1492    j->nomore = 0;
1493    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1494    j->marker = MARKER_none;
1495    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1496    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1497    // since we don't even allow 1<<30 pixels
1498 }
1499 
1500 static int parse_entropy_coded_data(jpeg *z)
1501 {
1502    reset(z);
1503    if (z->scan_n == 1) {
1504       int i,j;
1505       #ifdef STBI_SIMD
1506       __declspec(align(16))
1507       #endif
1508       short data[64];
1509       int n = z->order[0];
1510       // non-interleaved data, we just need to process one block at a time,
1511       // in trivial scanline order
1512       // number of blocks to do just depends on how many actual "pixels" this
1513       // component has, independent of interleaved MCU blocking and such
1514       int w = (z->img_comp[n].x+7) >> 3;
1515       int h = (z->img_comp[n].y+7) >> 3;
1516       for (j=0; j < h; ++j) {
1517          for (i=0; i < w; ++i) {
1518             if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1519             #ifdef STBI_SIMD
1520             stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1521             #else
1522             idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1523             #endif
1524             // every data block is an MCU, so countdown the restart interval
1525             if (--z->todo <= 0) {
1526                if (z->code_bits < 24) grow_buffer_unsafe(z);
1527                // if it's NOT a restart, then just bail, so we get corrupt data
1528                // rather than no data
1529                if (!RESTART(z->marker)) return 1;
1530                reset(z);
1531             }
1532          }
1533       }
1534    } else { // interleaved!
1535       int i,j,k,x,y;
1536       short data[64];
1537       for (j=0; j < z->img_mcu_y; ++j) {
1538          for (i=0; i < z->img_mcu_x; ++i) {
1539             // scan an interleaved mcu... process scan_n components in order
1540             for (k=0; k < z->scan_n; ++k) {
1541                int n = z->order[k];
1542                // scan out an mcu's worth of this component; that's just determined
1543                // by the basic H and V specified for the component
1544                for (y=0; y < z->img_comp[n].v; ++y) {
1545                   for (x=0; x < z->img_comp[n].h; ++x) {
1546                      int x2 = (i*z->img_comp[n].h + x)*8;
1547                      int y2 = (j*z->img_comp[n].v + y)*8;
1548                      if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0;
1549                      #ifdef STBI_SIMD
1550                      stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1551                      #else
1552                      idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1553                      #endif
1554                   }
1555                }
1556             }
1557             // after all interleaved components, that's an interleaved MCU,
1558             // so now count down the restart interval
1559             if (--z->todo <= 0) {
1560                if (z->code_bits < 24) grow_buffer_unsafe(z);
1561                // if it's NOT a restart, then just bail, so we get corrupt data
1562                // rather than no data
1563                if (!RESTART(z->marker)) return 1;
1564                reset(z);
1565             }
1566          }
1567       }
1568    }
1569    return 1;
1570 }
1571 
1572 static int process_marker(jpeg *z, int marker)
1573 {
1574    int L;
1575    switch (marker) {
1576       case MARKER_none: // no marker found
1577          return e("expected marker","Corrupt JPEG");
1578 
1579       case 0xC2: // SOF - progressive
1580          return e("progressive jpeg","JPEG format not supported (progressive)");
1581 
1582       case 0xDD: // DRI - specify restart interval
1583          if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG");
1584          z->restart_interval = get16(&z->s);
1585          return 1;
1586 
1587       case 0xDB: // DQT - define quantization table
1588          L = get16(&z->s)-2;
1589          while (L > 0) {
1590             int q = get8(&z->s);
1591             int p = q >> 4;
1592             int t = q & 15,i;
1593             if (p != 0) return e("bad DQT type","Corrupt JPEG");
1594             if (t > 3) return e("bad DQT table","Corrupt JPEG");
1595             for (i=0; i < 64; ++i)
1596                z->dequant[t][dezigzag[i]] = get8u(&z->s);
1597             #ifdef STBI_SIMD
1598             for (i=0; i < 64; ++i)
1599                z->dequant2[t][i] = z->dequant[t][i];
1600             #endif
1601             L -= 65;
1602          }
1603          return L==0;
1604 
1605       case 0xC4: // DHT - define huffman table
1606          L = get16(&z->s)-2;
1607          while (L > 0) {
1608             uint8 *v;
1609             int sizes[16],i,m=0;
1610             int q = get8(&z->s);
1611             int tc = q >> 4;
1612             int th = q & 15;
1613             if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
1614             for (i=0; i < 16; ++i) {
1615                sizes[i] = get8(&z->s);
1616                m += sizes[i];
1617             }
1618             L -= 17;
1619             if (tc == 0) {
1620                if (!build_huffman(z->huff_dc+th, sizes)) return 0;
1621                v = z->huff_dc[th].values;
1622             } else {
1623                if (!build_huffman(z->huff_ac+th, sizes)) return 0;
1624                v = z->huff_ac[th].values;
1625             }
1626             for (i=0; i < m; ++i)
1627                v[i] = get8u(&z->s);
1628             L -= m;
1629          }
1630          return L==0;
1631    }
1632    // check for comment block or APP blocks
1633    if ((marker >= 0xE0 && marker <= 0xEF) || marker == 0xFE) {
1634       skip(&z->s, get16(&z->s)-2);
1635       return 1;
1636    }
1637    return 0;
1638 }
1639 
1640 // after we see SOS
1641 static int process_scan_header(jpeg *z)
1642 {
1643    int i;
1644    int Ls = get16(&z->s);
1645    z->scan_n = get8(&z->s);
1646    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG");
1647    if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG");
1648    for (i=0; i < z->scan_n; ++i) {
1649       int id = get8(&z->s), which;
1650       int q = get8(&z->s);
1651       for (which = 0; which < z->s.img_n; ++which)
1652          if (z->img_comp[which].id == id)
1653             break;
1654       if (which == z->s.img_n) return 0;
1655       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
1656       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
1657       z->order[i] = which;
1658    }
1659    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1660    get8(&z->s); // should be 63, but might be 0
1661    if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG");
1662 
1663    return 1;
1664 }
1665 
1666 static int process_frame_header(jpeg *z, int scan)
1667 {
1668    stbi *s = &z->s;
1669    int Lf,p,i,q, h_max=1,v_max=1,c;
1670    Lf = get16(s);         if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
1671    p  = get8(s);          if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
1672    s->img_y = get16(s);   if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1673    s->img_x = get16(s);   if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
1674    c = get8(s);
1675    if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG");    // JFIF requires
1676    s->img_n = c;
1677    for (i=0; i < c; ++i) {
1678       z->img_comp[i].data = NULL;
1679       z->img_comp[i].linebuf = NULL;
1680    }
1681 
1682    if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG");
1683 
1684    for (i=0; i < s->img_n; ++i) {
1685       z->img_comp[i].id = get8(s);
1686       if (z->img_comp[i].id != i+1)   // JFIF requires
1687          if (z->img_comp[i].id != i)  // some version of jpegtran outputs non-JFIF-compliant files!
1688             return e("bad component ID","Corrupt JPEG");
1689       q = get8(s);
1690       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
1691       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
1692       z->img_comp[i].tq = get8(s);  if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
1693    }
1694 
1695    if (scan != SCAN_load) return 1;
1696 
1697    if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1698 
1699    for (i=0; i < s->img_n; ++i) {
1700       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1701       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1702    }
1703 
1704    // compute interleaved mcu info
1705    z->img_h_max = h_max;
1706    z->img_v_max = v_max;
1707    z->img_mcu_w = h_max * 8;
1708    z->img_mcu_h = v_max * 8;
1709    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
1710    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
1711 
1712    for (i=0; i < s->img_n; ++i) {
1713       // number of effective pixels (e.g. for non-interleaved MCU)
1714       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
1715       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
1716       // to simplify generation, we'll allocate enough memory to decode
1717       // the bogus oversized data from using interleaved MCUs and their
1718       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1719       // discard the extra data until colorspace conversion
1720       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1721       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1722       z->img_comp[i].raw_data = MALLOC(z->img_comp[i].w2 * z->img_comp[i].h2+15);
1723       if (z->img_comp[i].raw_data == NULL) {
1724          for(--i; i >= 0; --i) {
1725             FREE(z->img_comp[i].raw_data);
1726             z->img_comp[i].data = NULL;
1727          }
1728          return e("outofmem", "Out of memory");
1729       }
1730       // align blocks for installable-idct using mmx/sse
1731       z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
1732       z->img_comp[i].linebuf = NULL;
1733    }
1734 
1735    return 1;
1736 }
1737 
1738 // use comparisons since in some cases we handle more than one case (e.g. SOF)
1739 #define DNL(x)         ((x) == 0xdc)
1740 #define SOI(x)         ((x) == 0xd8)
1741 #define EOI(x)         ((x) == 0xd9)
1742 #define SOF(x)         ((x) == 0xc0 || (x) == 0xc1)
1743 #define SOS(x)         ((x) == 0xda)
1744 
1745 static int decode_jpeg_header(jpeg *z, int scan)
1746 {
1747    int m;
1748    z->marker = MARKER_none; // initialize cached marker to empty
1749    m = get_marker(z);
1750    if (!SOI(m)) return e("no SOI","Corrupt JPEG");
1751    if (scan == SCAN_type) return 1;
1752    m = get_marker(z);
1753    while (!SOF(m)) {
1754       if (!process_marker(z,m)) return 0;
1755       m = get_marker(z);
1756       while (m == MARKER_none) {
1757          // some files have extra padding after their blocks, so ok, we'll scan
1758          if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG");
1759          m = get_marker(z);
1760       }
1761    }
1762    if (!process_frame_header(z, scan)) return 0;
1763    return 1;
1764 }
1765 
1766 static int decode_jpeg_image(jpeg *j)
1767 {
1768    int m;
1769    j->restart_interval = 0;
1770    if (!decode_jpeg_header(j, SCAN_load)) return 0;
1771    m = get_marker(j);
1772    while (!EOI(m)) {
1773       if (SOS(m)) {
1774          if (!process_scan_header(j)) return 0;
1775          if (!parse_entropy_coded_data(j)) return 0;
1776          if (j->marker == MARKER_none ) {
1777             // handle 0s at the end of image data from IP Kamera 9060
1778             while (!at_eof(&j->s)) {
1779                int x = get8(&j->s);
1780                if (x == 255) {
1781                   j->marker = get8u(&j->s);
1782                   break;
1783                } else if (x != 0) {
1784                   return 0;
1785                }
1786             }
1787             // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0
1788          }
1789       } else {
1790          if (!process_marker(j, m)) return 0;
1791       }
1792       m = get_marker(j);
1793    }
1794    return 1;
1795 }
1796 
1797 // static jfif-centered resampling (across block boundaries)
1798 
1799 typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1800                                     int w, int hs);
1801 
1802 #define div4(x) ((uint8) ((x) >> 2))
1803 
1804 static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1805 {
1806    STBI_NOTUSED(out);
1807    STBI_NOTUSED(in_far);
1808    STBI_NOTUSED(w);
1809    STBI_NOTUSED(hs);
1810    return in_near;
1811 }
1812 
1813 static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1814 {
1815    // need to generate two samples vertically for every one in input
1816    int i;
1817    STBI_NOTUSED(hs);
1818    for (i=0; i < w; ++i)
1819       out[i] = div4(3*in_near[i] + in_far[i] + 2);
1820    return out;
1821 }
1822 
1823 static uint8*  resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1824 {
1825    // need to generate two samples horizontally for every one in input
1826    int i;
1827    uint8 *input = in_near;
1828 
1829    if (w == 1) {
1830       // if only one sample, can't do any interpolation
1831       out[0] = out[1] = input[0];
1832       return out;
1833    }
1834 
1835    out[0] = input[0];
1836    out[1] = div4(input[0]*3 + input[1] + 2);
1837    for (i=1; i < w-1; ++i) {
1838       int n = 3*input[i]+2;
1839       out[i*2+0] = div4(n+input[i-1]);
1840       out[i*2+1] = div4(n+input[i+1]);
1841    }
1842    out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2);
1843    out[i*2+1] = input[w-1];
1844 
1845    STBI_NOTUSED(in_far);
1846    STBI_NOTUSED(hs);
1847 
1848    return out;
1849 }
1850 
1851 #define div16(x) ((uint8) ((x) >> 4))
1852 
1853 static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1854 {
1855    // need to generate 2x2 samples for every one in input
1856    int i,t0,t1;
1857    if (w == 1) {
1858       out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2);
1859       return out;
1860    }
1861 
1862    t1 = 3*in_near[0] + in_far[0];
1863    out[0] = div4(t1+2);
1864    for (i=1; i < w; ++i) {
1865       t0 = t1;
1866       t1 = 3*in_near[i]+in_far[i];
1867       out[i*2-1] = div16(3*t0 + t1 + 8);
1868       out[i*2  ] = div16(3*t1 + t0 + 8);
1869    }
1870    out[w*2-1] = div4(t1+2);
1871 
1872    STBI_NOTUSED(hs);
1873 
1874    return out;
1875 }
1876 
1877 static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1878 {
1879    // resample with nearest-neighbor
1880    int i,j;
1881    in_far = in_far;
1882    for (i=0; i < w; ++i)
1883       for (j=0; j < hs; ++j)
1884          out[i*hs+j] = in_near[i];
1885    return out;
1886 }
1887 
1888 #define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
1889 
1890 // 0.38 seconds on 3*anemones.jpg   (0.25 with processor = Pro)
1891 // VC6 without processor=Pro is generating multiple LEAs per multiply!
1892 static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1893 {
1894    int i;
1895    for (i=0; i < count; ++i) {
1896       int y_fixed = (y[i] << 16) + 32768; // rounding
1897       int r,g,b;
1898       int cr = pcr[i] - 128;
1899       int cb = pcb[i] - 128;
1900       r = y_fixed + cr*float2fixed(1.40200f);
1901       g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
1902       b = y_fixed                            + cb*float2fixed(1.77200f);
1903       r >>= 16;
1904       g >>= 16;
1905       b >>= 16;
1906       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
1907       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
1908       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
1909       out[0] = (uint8)r;
1910       out[1] = (uint8)g;
1911       out[2] = (uint8)b;
1912       out[3] = 255;
1913       out += step;
1914    }
1915 }
1916 
1917 #ifdef STBI_SIMD
1918 static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1919 
1920 void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1921 {
1922    stbi_YCbCr_installed = func;
1923 }
1924 #endif
1925 
1926 
1927 // clean up the temporary component buffers
1928 static void cleanup_jpeg(jpeg *j)
1929 {
1930    int i;
1931    for (i=0; i < j->s.img_n; ++i) {
1932       if (j->img_comp[i].data) {
1933          FREE(j->img_comp[i].raw_data);
1934          j->img_comp[i].data = NULL;
1935       }
1936       if (j->img_comp[i].linebuf) {
1937          FREE(j->img_comp[i].linebuf);
1938          j->img_comp[i].linebuf = NULL;
1939       }
1940    }
1941 }
1942 
1943 typedef struct
1944 {
1945    resample_row_func resample;
1946    uint8 *line0,*line1;
1947    int hs,vs;   // expansion factor in each axis
1948    int w_lores; // horizontal pixels pre-expansion
1949    int ystep;   // how far through vertical expansion we are
1950    int ypos;    // which pre-expansion row we're on
1951 } stbi_resample;
1952 
1953 static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1954 {
1955    int n, decode_n;
1956    // validate req_comp
1957    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1958    z->s.img_n = 0;
1959 
1960    // load a jpeg image from whichever source
1961    if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; }
1962 
1963    // determine actual number of components to generate
1964    n = req_comp ? req_comp : z->s.img_n;
1965 
1966    if (z->s.img_n == 3 && n < 3)
1967       decode_n = 1;
1968    else
1969       decode_n = z->s.img_n;
1970 
1971    // resample and color-convert
1972    {
1973       int k;
1974       uint i,j;
1975       uint8 *output;
1976       uint8 *coutput[4];
1977 
1978       stbi_resample res_comp[4];
1979 
1980       for (k=0; k < decode_n; ++k) {
1981          stbi_resample *r = &res_comp[k];
1982 
1983          // allocate line buffer big enough for upsampling off the edges
1984          // with upsample factor of 4
1985          z->img_comp[k].linebuf = (uint8 *) MALLOC(z->s.img_x + 3);
1986          if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
1987 
1988          r->hs      = z->img_h_max / z->img_comp[k].h;
1989          r->vs      = z->img_v_max / z->img_comp[k].v;
1990          r->ystep   = r->vs >> 1;
1991          r->w_lores = (z->s.img_x + r->hs-1) / r->hs;
1992          r->ypos    = 0;
1993          r->line0   = r->line1 = z->img_comp[k].data;
1994 
1995          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
1996          else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2;
1997          else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2;
1998          else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2;
1999          else                               r->resample = resample_row_generic;
2000       }
2001 
2002       // can't error after this so, this is safe
2003       output = (uint8 *) MALLOC(n * z->s.img_x * z->s.img_y + 1);
2004       if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); }
2005 
2006       // now go ahead and resample
2007       for (j=0; j < z->s.img_y; ++j) {
2008          uint8 *out = output + n * z->s.img_x * j;
2009          for (k=0; k < decode_n; ++k) {
2010             stbi_resample *r = &res_comp[k];
2011             int y_bot = r->ystep >= (r->vs >> 1);
2012             coutput[k] = r->resample(z->img_comp[k].linebuf,
2013                                      y_bot ? r->line1 : r->line0,
2014                                      y_bot ? r->line0 : r->line1,
2015                                      r->w_lores, r->hs);
2016             if (++r->ystep >= r->vs) {
2017                r->ystep = 0;
2018                r->line0 = r->line1;
2019                if (++r->ypos < z->img_comp[k].y)
2020                   r->line1 += z->img_comp[k].w2;
2021             }
2022          }
2023          if (n >= 3) {
2024             uint8 *y = coutput[0];
2025             if (z->s.img_n == 3) {
2026                #ifdef STBI_SIMD
2027                stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
2028                #else
2029                YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
2030                #endif
2031             } else
2032                for (i=0; i < z->s.img_x; ++i) {
2033                   out[0] = out[1] = out[2] = y[i];
2034                   out[3] = 255; // not used if n==3
2035                   out += n;
2036                }
2037          } else {
2038             uint8 *y = coutput[0];
2039             if (n == 1)
2040                for (i=0; i < z->s.img_x; ++i) out[i] = y[i];
2041             else
2042                for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255;
2043          }
2044       }
2045       cleanup_jpeg(z);
2046       *out_x = z->s.img_x;
2047       *out_y = z->s.img_y;
2048       if (comp) *comp  = z->s.img_n; // report original components, not output
2049       return output;
2050    }
2051 }
2052 
2053 #ifndef STBI_NO_STDIO
2054 unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2055 {
2056    jpeg j;
2057    start_file(&j.s, f);
2058    return load_jpeg_image(&j, x,y,comp,req_comp);
2059 }
2060 
2061 unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2062 {
2063    unsigned char *data;
2064    FILE *f = fopen(filename, "rb");
2065    if (!f) return NULL;
2066    data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
2067    fclose(f);
2068    return data;
2069 }
2070 #endif
2071 
2072 unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2073 {
2074    #ifdef STBI_SMALL_STACK
2075    unsigned char *result;
2076    jpeg *j = (jpeg *) MALLOC(sizeof(*j));
2077    start_mem(&j->s, buffer, len);
2078    result = load_jpeg_image(j,x,y,comp,req_comp);
2079    FREE(j);
2080    return result;
2081    #else
2082    jpeg j;
2083    start_mem(&j.s, buffer,len);
2084    return load_jpeg_image(&j, x,y,comp,req_comp);
2085    #endif
2086 }
2087 
2088 static int stbi_jpeg_info_raw(jpeg *j, int *x, int *y, int *comp)
2089 {
2090    if (!decode_jpeg_header(j, SCAN_header))
2091       return 0;
2092    if (x) *x = j->s.img_x;
2093    if (y) *y = j->s.img_y;
2094    if (comp) *comp = j->s.img_n;
2095    return 1;
2096 }
2097 
2098 #ifndef STBI_NO_STDIO
2099 int stbi_jpeg_test_file(FILE *f)
2100 {
2101    int n,r;
2102    jpeg j;
2103    n = ftell(f);
2104    start_file(&j.s, f);
2105    r = decode_jpeg_header(&j, SCAN_type);
2106    fseek(f,n,SEEK_SET);
2107    return r;
2108 }
2109 
2110 int stbi_jpeg_info_from_file(FILE *f, int *x, int *y, int *comp)
2111 {
2112     jpeg j;
2113     long n = ftell(f);
2114     int res;
2115     start_file(&j.s, f);
2116     res = stbi_jpeg_info_raw(&j, x, y, comp);
2117     fseek(f, n, SEEK_SET);
2118     return res;
2119 }
2120 
2121 int stbi_jpeg_info(char const *filename, int *x, int *y, int *comp)
2122 {
2123     FILE *f = fopen(filename, "rb");
2124     int result;
2125     if (!f) return e("can't fopen", "Unable to open file");
2126     result = stbi_jpeg_info_from_file(f, x, y, comp);
2127     fclose(f);
2128     return result;
2129 }
2130 #endif
2131 
2132 int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
2133 {
2134    jpeg j;
2135    start_mem(&j.s, buffer,len);
2136    return decode_jpeg_header(&j, SCAN_type);
2137 }
2138 
2139 int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
2140 {
2141     jpeg j;
2142     start_mem(&j.s, buffer, len);
2143     return stbi_jpeg_info_raw(&j, x, y, comp);
2144 }
2145 
2146 #ifndef STBI_NO_STDIO
2147 extern int      stbi_jpeg_info            (char const *filename,           int *x, int *y, int *comp);
2148 extern int      stbi_jpeg_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
2149 #endif
2150 extern int      stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
2151 
2152 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
2153 //    simple implementation
2154 //      - all input must be provided in an upfront buffer
2155 //      - all output is written to a single output buffer (can malloc/realloc)
2156 //    performance
2157 //      - fast huffman
2158 
2159 // fast-way is faster to check than jpeg huffman, but slow way is slower
2160 #define ZFAST_BITS  9 // accelerate all cases in default tables
2161 #define ZFAST_MASK  ((1 << ZFAST_BITS) - 1)
2162 
2163 // zlib-style huffman encoding
2164 // (jpegs packs from left, zlib from right, so can't share code)
2165 typedef struct
2166 {
2167    uint16 fast[1 << ZFAST_BITS];
2168    uint16 firstcode[16];
2169    int maxcode[17];
2170    uint16 firstsymbol[16];
2171    uint8  size[288];
2172    uint16 value[288];
2173 } zhuffman;
2174 
2175 __forceinline static int bitreverse16(int n)
2176 {
2177   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
2178   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
2179   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
2180   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
2181   return n;
2182 }
2183 
2184 __forceinline static int bit_reverse(int v, int bits)
2185 {
2186    assert(bits <= 16);
2187    // to bit reverse n bits, reverse 16 and shift
2188    // e.g. 11 bits, bit reverse and shift away 5
2189    return bitreverse16(v) >> (16-bits);
2190 }
2191 
2192 static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
2193 {
2194    int i,k=0;
2195    int code, next_code[16], sizes[17];
2196 
2197    // DEFLATE spec for generating codes
2198    memset(sizes, 0, sizeof(sizes));
2199    memset(z->fast, 255, sizeof(z->fast));
2200    for (i=0; i < num; ++i)
2201       ++sizes[sizelist[i]];
2202    sizes[0] = 0;
2203    for (i=1; i < 16; ++i)
2204       assert(sizes[i] <= (1 << i));
2205    code = 0;
2206    for (i=1; i < 16; ++i) {
2207       next_code[i] = code;
2208       z->firstcode[i] = (uint16) code;
2209       z->firstsymbol[i] = (uint16) k;
2210       code = (code + sizes[i]);
2211       if (sizes[i])
2212          if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
2213       z->maxcode[i] = code << (16-i); // preshift for inner loop
2214       code <<= 1;
2215       k += sizes[i];
2216    }
2217    z->maxcode[16] = 0x10000; // sentinel
2218    for (i=0; i < num; ++i) {
2219       int s = sizelist[i];
2220       if (s) {
2221          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
2222          z->size[c] = (uint8)s;
2223          z->value[c] = (uint16)i;
2224          if (s <= ZFAST_BITS) {
2225             int m = bit_reverse(next_code[s],s);
2226             while (m < (1 << ZFAST_BITS)) {
2227                z->fast[m] = (uint16) c;
2228                m += (1 << s);
2229             }
2230          }
2231          ++next_code[s];
2232       }
2233    }
2234    return 1;
2235 }
2236 
2237 // zlib-from-memory implementation for PNG reading
2238 //    because PNG allows splitting the zlib stream arbitrarily,
2239 //    and it's annoying structurally to have PNG call ZLIB call PNG,
2240 //    we require PNG read all the IDATs and combine them into a single
2241 //    memory buffer
2242 
2243 typedef struct
2244 {
2245    uint8 const *zbuffer, *zbuffer_end;
2246    int num_bits;
2247    uint32 code_buffer;
2248 
2249    char *zout;
2250    char *zout_start;
2251    char *zout_end;
2252    int   z_expandable;
2253 
2254    zhuffman z_length, z_distance;
2255 } zbuf;
2256 
2257 __forceinline static int zget8(zbuf *z)
2258 {
2259    if (z->zbuffer >= z->zbuffer_end) return 0;
2260    return *z->zbuffer++;
2261 }
2262 
2263 static void fill_bits(zbuf *z)
2264 {
2265    do {
2266       assert(z->code_buffer < (1U << z->num_bits));
2267       z->code_buffer |= zget8(z) << z->num_bits;
2268       z->num_bits += 8;
2269    } while (z->num_bits <= 24);
2270 }
2271 
2272 __forceinline static unsigned int zreceive(zbuf *z, int n)
2273 {
2274    unsigned int k;
2275    if (z->num_bits < n) fill_bits(z);
2276    k = z->code_buffer & ((1 << n) - 1);
2277    z->code_buffer >>= n;
2278    z->num_bits -= n;
2279    return k;
2280 }
2281 
2282 __forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
2283 {
2284    int b,s,k;
2285    if (a->num_bits < 16) fill_bits(a);
2286    b = z->fast[a->code_buffer & ZFAST_MASK];
2287    if (b < 0xffff) {
2288       s = z->size[b];
2289       a->code_buffer >>= s;
2290       a->num_bits -= s;
2291       return z->value[b];
2292    }
2293 
2294    // not resolved by fast table, so compute it the slow way
2295    // use jpeg approach, which requires MSbits at top
2296    k = bit_reverse(a->code_buffer, 16);
2297    for (s=ZFAST_BITS+1; ; ++s)
2298       if (k < z->maxcode[s])
2299          break;
2300    if (s == 16) return -1; // invalid code!
2301    // code size is s, so:
2302    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
2303    assert(z->size[b] == s);
2304    a->code_buffer >>= s;
2305    a->num_bits -= s;
2306    return z->value[b];
2307 }
2308 
2309 static int expand(zbuf *z, int n)  // need to make room for n bytes
2310 {
2311    char *q;
2312    int cur, limit;
2313    if (!z->z_expandable) return e("output buffer limit","Corrupt PNG");
2314    cur   = (int) (z->zout     - z->zout_start);
2315    limit = (int) (z->zout_end - z->zout_start);
2316    while (cur + n > limit)
2317       limit *= 2;
2318    q = (char *) REALLOC(z->zout_start, limit);
2319    if (q == NULL) return e("outofmem", "Out of memory");
2320    z->zout_start = q;
2321    z->zout       = q + cur;
2322    z->zout_end   = q + limit;
2323    return 1;
2324 }
2325 
2326 static int length_base[31] = {
2327    3,4,5,6,7,8,9,10,11,13,
2328    15,17,19,23,27,31,35,43,51,59,
2329    67,83,99,115,131,163,195,227,258,0,0 };
2330 
2331 static int length_extra[31]=
2332 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
2333 
2334 static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
2335 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
2336 
2337 static int dist_extra[32] =
2338 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
2339 
2340 static int parse_huffman_block(zbuf *a)
2341 {
2342    for(;;) {
2343       int z = zhuffman_decode(a, &a->z_length);
2344       if (z < 256) {
2345          if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes
2346          if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0;
2347          *a->zout++ = (char) z;
2348       } else {
2349          uint8 *p;
2350          int len,dist;
2351          if (z == 256) return 1;
2352          z -= 257;
2353          len = length_base[z];
2354          if (length_extra[z]) len += zreceive(a, length_extra[z]);
2355          z = zhuffman_decode(a, &a->z_distance);
2356          if (z < 0) return e("bad huffman code","Corrupt PNG");
2357          dist = dist_base[z];
2358          if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
2359          if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG");
2360          if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0;
2361          p = (uint8 *) (a->zout - dist);
2362          while (len--)
2363             *a->zout++ = *p++;
2364       }
2365    }
2366 }
2367 
2368 static int compute_huffman_codes(zbuf *a)
2369 {
2370    static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
2371    zhuffman z_codelength;
2372    uint8 lencodes[286+32+137];//padding for maximum single op
2373    uint8 codelength_sizes[19];
2374    int i,n;
2375 
2376    int hlit  = zreceive(a,5) + 257;
2377    int hdist = zreceive(a,5) + 1;
2378    int hclen = zreceive(a,4) + 4;
2379 
2380    memset(codelength_sizes, 0, sizeof(codelength_sizes));
2381    for (i=0; i < hclen; ++i) {
2382       int s = zreceive(a,3);
2383       codelength_sizes[length_dezigzag[i]] = (uint8) s;
2384    }
2385    if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
2386 
2387    n = 0;
2388    while (n < hlit + hdist) {
2389       int c = zhuffman_decode(a, &z_codelength);
2390       assert(c >= 0 && c < 19);
2391       if (c < 16)
2392          lencodes[n++] = (uint8) c;
2393       else if (c == 16) {
2394          c = zreceive(a,2)+3;
2395          memset(lencodes+n, lencodes[n-1], c);
2396          n += c;
2397       } else if (c == 17) {
2398          c = zreceive(a,3)+3;
2399          memset(lencodes+n, 0, c);
2400          n += c;
2401       } else {
2402          assert(c == 18);
2403          c = zreceive(a,7)+11;
2404          memset(lencodes+n, 0, c);
2405          n += c;
2406       }
2407    }
2408    if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG");
2409    if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
2410    if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
2411    return 1;
2412 }
2413 
2414 static int parse_uncompressed_block(zbuf *a)
2415 {
2416    uint8 header[4];
2417    int len,nlen,k;
2418    if (a->num_bits & 7)
2419       zreceive(a, a->num_bits & 7); // discard
2420    // drain the bit-packed data into header
2421    k = 0;
2422    while (a->num_bits > 0) {
2423       header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns?
2424       a->code_buffer >>= 8;
2425       a->num_bits -= 8;
2426    }
2427    assert(a->num_bits == 0);
2428    // now fill header the normal way
2429    while (k < 4)
2430       header[k++] = (uint8) zget8(a);
2431    len  = header[1] * 256 + header[0];
2432    nlen = header[3] * 256 + header[2];
2433    if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG");
2434    if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG");
2435    if (a->zout + len > a->zout_end)
2436       if (!expand(a, len)) return 0;
2437    memcpy(a->zout, a->zbuffer, len);
2438    a->zbuffer += len;
2439    a->zout += len;
2440    return 1;
2441 }
2442 
2443 static int parse_zlib_header(zbuf *a)
2444 {
2445    int cmf   = zget8(a);
2446    int cm    = cmf & 15;
2447    /* int cinfo = cmf >> 4; */
2448    int flg   = zget8(a);
2449    if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec
2450    if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
2451    if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png
2452    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
2453    return 1;
2454 }
2455 
2456 // @TODO: should statically initialize these for optimal thread safety
2457 static uint8 default_length[288], default_distance[32];
2458 static void init_defaults(void)
2459 {
2460    int i;   // use <= to match clearly with spec
2461    for (i=0; i <= 143; ++i)     default_length[i]   = 8;
2462    for (   ; i <= 255; ++i)     default_length[i]   = 9;
2463    for (   ; i <= 279; ++i)     default_length[i]   = 7;
2464    for (   ; i <= 287; ++i)     default_length[i]   = 8;
2465 
2466    for (i=0; i <=  31; ++i)     default_distance[i] = 5;
2467 }
2468 
2469 int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
2470 static int parse_zlib(zbuf *a, int parse_header)
2471 {
2472    int final, type;
2473    if (parse_header)
2474       if (!parse_zlib_header(a)) return 0;
2475    a->num_bits = 0;
2476    a->code_buffer = 0;
2477    do {
2478       final = zreceive(a,1);
2479       type = zreceive(a,2);
2480       if (type == 0) {
2481          if (!parse_uncompressed_block(a)) return 0;
2482       } else if (type == 3) {
2483          return 0;
2484       } else {
2485          if (type == 1) {
2486             // use fixed code lengths
2487             if (!default_distance[31]) init_defaults();
2488             if (!zbuild_huffman(&a->z_length  , default_length  , 288)) return 0;
2489             if (!zbuild_huffman(&a->z_distance, default_distance,  32)) return 0;
2490          } else {
2491             if (!compute_huffman_codes(a)) return 0;
2492          }
2493          if (!parse_huffman_block(a)) return 0;
2494       }
2495       if (stbi_png_partial && a->zout - a->zout_start > 65536)
2496          break;
2497    } while (!final);
2498    return 1;
2499 }
2500 
2501 static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
2502 {
2503    a->zout_start = obuf;
2504    a->zout       = obuf;
2505    a->zout_end   = obuf + olen;
2506    a->z_expandable = exp;
2507 
2508    return parse_zlib(a, parse_header);
2509 }
2510 
2511 char *stbi_zlib_decode_malloc_guesssize(const char * buffer, int len, int initial_size, int *outlen)
2512 {
2513    zbuf a;
2514    char *p = (char *) MALLOC(initial_size);
2515    if (p == NULL) return NULL;
2516    a.zbuffer = (uint8 const *) buffer;
2517    a.zbuffer_end = (uint8 const *) buffer + len;
2518    if (do_zlib(&a, p, initial_size, 1, 1)) {
2519       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2520       return a.zout_start;
2521    } else {
2522       FREE(a.zout_start);
2523       return NULL;
2524    }
2525 }
2526 
2527 char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
2528 {
2529    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
2530 }
2531 
2532 char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
2533 {
2534    zbuf a;
2535    char *p = (char *) MALLOC(initial_size);
2536    if (p == NULL) return NULL;
2537    a.zbuffer = (uint8 const *) buffer;
2538    a.zbuffer_end = (uint8 const *) buffer + len;
2539    if (do_zlib(&a, p, initial_size, 1, parse_header)) {
2540       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2541       return a.zout_start;
2542    } else {
2543       FREE(a.zout_start);
2544       return NULL;
2545    }
2546 }
2547 
2548 int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
2549 {
2550    zbuf a;
2551    a.zbuffer = (uint8 const *) ibuffer;
2552    a.zbuffer_end = (uint8 const *) ibuffer + ilen;
2553    if (do_zlib(&a, obuffer, olen, 0, 1))
2554       return (int) (a.zout - a.zout_start);
2555    else
2556       return -1;
2557 }
2558 
2559 char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
2560 {
2561    zbuf a;
2562    char *p = (char *) MALLOC(16384);
2563    if (p == NULL) return NULL;
2564    a.zbuffer = (uint8 const *) buffer;
2565    a.zbuffer_end = (uint8 const *) buffer+len;
2566    if (do_zlib(&a, p, 16384, 1, 0)) {
2567       if (outlen) *outlen = (int) (a.zout - a.zout_start);
2568       return a.zout_start;
2569    } else {
2570       FREE(a.zout_start);
2571       return NULL;
2572    }
2573 }
2574 
2575 int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2576 {
2577    zbuf a;
2578    a.zbuffer = (uint8 const *) ibuffer;
2579    a.zbuffer_end = (uint8 const *) ibuffer + ilen;
2580    if (do_zlib(&a, obuffer, olen, 0, 0))
2581       return (int) (a.zout - a.zout_start);
2582    else
2583       return -1;
2584 }
2585 
2586 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
2587 //    simple implementation
2588 //      - only 8-bit samples
2589 //      - no CRC checking
2590 //      - allocates lots of intermediate memory
2591 //        - avoids problem of streaming data between subsystems
2592 //        - avoids explicit window management
2593 //    performance
2594 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2595 
2596 
2597 typedef struct
2598 {
2599    uint32 length;
2600    uint32 type;
2601 } chunk;
2602 
2603 #define PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2604 
2605 static chunk get_chunk_header(stbi *s)
2606 {
2607    chunk c;
2608    c.length = get32(s);
2609    c.type   = get32(s);
2610    return c;
2611 }
2612 
2613 static int check_png_header(stbi *s)
2614 {
2615    static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 };
2616    int i;
2617    for (i=0; i < 8; ++i)
2618       if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG");
2619    return 1;
2620 }
2621 
2622 typedef struct
2623 {
2624    stbi s;
2625    uint8 *idata, *expanded, *out;
2626 } png;
2627 
2628 
2629 enum {
2630    F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4,
2631    F_avg_first, F_paeth_first
2632 };
2633 
2634 static uint8 first_row_filter[5] =
2635 {
2636    F_none, F_sub, F_none, F_avg_first, F_paeth_first
2637 };
2638 
2639 static int paeth(int a, int b, int c)
2640 {
2641    int p = a + b - c;
2642    int pa = abs(p-a);
2643    int pb = abs(p-b);
2644    int pc = abs(p-c);
2645    if (pa <= pb && pa <= pc) return a;
2646    if (pb <= pc) return b;
2647    return c;
2648 }
2649 
2650 // create the png data from post-deflated data
2651 static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2652 {
2653    stbi *s = &a->s;
2654    uint32 i,j,stride = x*out_n;
2655    int k;
2656    int img_n = s->img_n; // copy it into a local for later
2657    assert(out_n == s->img_n || out_n == s->img_n+1);
2658    if (stbi_png_partial) y = 1;
2659    a->out = (uint8 *) MALLOC(x * y * out_n);
2660    if (!a->out) return e("outofmem", "Out of memory");
2661    if (!stbi_png_partial) {
2662       if (s->img_x == x && s->img_y == y) {
2663          if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2664       } else { // interlaced:
2665          if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG");
2666       }
2667    }
2668    for (j=0; j < y; ++j) {
2669       uint8 *cur = a->out + stride*j;
2670       uint8 *prior = cur - stride;
2671       int filter = *raw++;
2672       if (filter > 4) return e("invalid filter","Corrupt PNG");
2673       // if first row, use special filter that doesn't sample previous row
2674       if (j == 0) filter = first_row_filter[filter];
2675       // handle first pixel explicitly
2676       for (k=0; k < img_n; ++k) {
2677          switch (filter) {
2678             case F_none       : cur[k] = raw[k]; break;
2679             case F_sub        : cur[k] = raw[k]; break;
2680             case F_up         : cur[k] = raw[k] + prior[k]; break;
2681             case F_avg        : cur[k] = raw[k] + (prior[k]>>1); break;
2682             case F_paeth      : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break;
2683             case F_avg_first  : cur[k] = raw[k]; break;
2684             case F_paeth_first: cur[k] = raw[k]; break;
2685          }
2686       }
2687       if (img_n != out_n) cur[img_n] = 255;
2688       raw += img_n;
2689       cur += out_n;
2690       prior += out_n;
2691       // this is a little gross, so that we don't switch per-pixel or per-component
2692       if (img_n == out_n) {
2693          #define CASE(f) \
2694              case f:     \
2695                 for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \
2696                    for (k=0; k < img_n; ++k)
2697          switch (filter) {
2698             CASE(F_none)  cur[k] = raw[k]; break;
2699             CASE(F_sub)   cur[k] = raw[k] + cur[k-img_n]; break;
2700             CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2701             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break;
2702             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break;
2703             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-img_n] >> 1); break;
2704             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break;
2705          }
2706          #undef CASE
2707       } else {
2708          assert(img_n+1 == out_n);
2709          #define CASE(f) \
2710              case f:     \
2711                 for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
2712                    for (k=0; k < img_n; ++k)
2713          switch (filter) {
2714             CASE(F_none)  cur[k] = raw[k]; break;
2715             CASE(F_sub)   cur[k] = raw[k] + cur[k-out_n]; break;
2716             CASE(F_up)    cur[k] = raw[k] + prior[k]; break;
2717             CASE(F_avg)   cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break;
2718             CASE(F_paeth)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
2719             CASE(F_avg_first)    cur[k] = raw[k] + (cur[k-out_n] >> 1); break;
2720             CASE(F_paeth_first)  cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break;
2721          }
2722          #undef CASE
2723       }
2724    }
2725    return 1;
2726 }
2727 
2728 static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2729 {
2730    uint8 *final;
2731    int p;
2732    int save;
2733    if (!interlaced)
2734       return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y);
2735    save = stbi_png_partial;
2736    stbi_png_partial = 0;
2737 
2738    // de-interlacing
2739    final = (uint8 *) MALLOC(a->s.img_x * a->s.img_y * out_n);
2740    for (p=0; p < 7; ++p) {
2741       int xorig[] = { 0,4,0,2,0,1,0 };
2742       int yorig[] = { 0,0,4,0,2,0,1 };
2743       int xspc[]  = { 8,8,4,4,2,2,1 };
2744       int yspc[]  = { 8,8,8,4,4,2,2 };
2745       int i,j,x,y;
2746       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2747       x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p];
2748       y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p];
2749       if (x && y) {
2750          if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) {
2751             FREE(final);
2752             return 0;
2753          }
2754          for (j=0; j < y; ++j)
2755             for (i=0; i < x; ++i)
2756                memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n,
2757                       a->out + (j*x+i)*out_n, out_n);
2758          FREE(a->out);
2759          raw += (x*out_n+1)*y;
2760          raw_len -= (x*out_n+1)*y;
2761       }
2762    }
2763    a->out = final;
2764 
2765    stbi_png_partial = save;
2766    return 1;
2767 }
2768 
2769 static int compute_transparency(png *z, uint8 tc[3], int out_n)
2770 {
2771    stbi *s = &z->s;
2772    uint32 i, pixel_count = s->img_x * s->img_y;
2773    uint8 *p = z->out;
2774 
2775    // compute color-based transparency, assuming we've
2776    // already got 255 as the alpha value in the output
2777    assert(out_n == 2 || out_n == 4);
2778 
2779    if (out_n == 2) {
2780       for (i=0; i < pixel_count; ++i) {
2781          p[1] = (p[0] == tc[0] ? 0 : 255);
2782          p += 2;
2783       }
2784    } else {
2785       for (i=0; i < pixel_count; ++i) {
2786          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2787             p[3] = 0;
2788          p += 4;
2789       }
2790    }
2791    return 1;
2792 }
2793 
2794 static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2795 {
2796    uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2797    uint8 *p, *temp_out, *orig = a->out;
2798 
2799    p = (uint8 *) MALLOC(pixel_count * pal_img_n);
2800    if (p == NULL) return e("outofmem", "Out of memory");
2801 
2802    // between here and FREE(out) below, exitting would leak
2803    temp_out = p;
2804 
2805    if (pal_img_n == 3) {
2806       for (i=0; i < pixel_count; ++i) {
2807          int n = orig[i]*4;
2808          p[0] = palette[n  ];
2809          p[1] = palette[n+1];
2810          p[2] = palette[n+2];
2811          p += 3;
2812       }
2813    } else {
2814       for (i=0; i < pixel_count; ++i) {
2815          int n = orig[i]*4;
2816          p[0] = palette[n  ];
2817          p[1] = palette[n+1];
2818          p[2] = palette[n+2];
2819          p[3] = palette[n+3];
2820          p += 4;
2821       }
2822    }
2823    FREE(a->out);
2824    a->out = temp_out;
2825 
2826    STBI_NOTUSED(len);
2827 
2828    return 1;
2829 }
2830 
2831 static int stbi_unpremultiply_on_load = 0;
2832 static int stbi_de_iphone_flag = 0;
2833 
2834 void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
2835 {
2836    stbi_unpremultiply_on_load = flag_true_if_should_unpremultiply;
2837 }
2838 void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
2839 {
2840    stbi_de_iphone_flag = flag_true_if_should_convert;
2841 }
2842 
2843 static void stbi_de_iphone(png *z)
2844 {
2845    stbi *s = &z->s;
2846    uint32 i, pixel_count = s->img_x * s->img_y;
2847    uint8 *p = z->out;
2848 
2849    if (s->img_out_n == 3) {  // convert bgr to rgb
2850       for (i=0; i < pixel_count; ++i) {
2851          uint8 t = p[0];
2852          p[0] = p[2];
2853          p[2] = t;
2854          p += 3;
2855       }
2856    } else {
2857       assert(s->img_out_n == 4);
2858       if (stbi_unpremultiply_on_load) {
2859          // convert bgr to rgb and unpremultiply
2860          for (i=0; i < pixel_count; ++i) {
2861             uint8 a = p[3];
2862             uint8 t = p[0];
2863             if (a) {
2864                p[0] = p[2] * 255 / a;
2865                p[1] = p[1] * 255 / a;
2866                p[2] =  t   * 255 / a;
2867             } else {
2868                p[0] = p[2];
2869                p[2] = t;
2870             }
2871             p += 4;
2872          }
2873       } else {
2874          // convert bgr to rgb
2875          for (i=0; i < pixel_count; ++i) {
2876             uint8 t = p[0];
2877             p[0] = p[2];
2878             p[2] = t;
2879             p += 4;
2880          }
2881       }
2882    }
2883 }
2884 
2885 static int parse_png_file(png *z, int scan, int req_comp)
2886 {
2887    uint8 palette[1024], pal_img_n=0;
2888    uint8 has_trans=0, tc[3];
2889    uint32 ioff=0, idata_limit=0, i, pal_len=0;
2890    int first=1,k,interlace=0, iphone=0;
2891    stbi *s = &z->s;
2892 
2893    if (!check_png_header(s)) return 0;
2894 
2895    if (scan == SCAN_type) return 1;
2896 
2897    for (;;) {
2898       chunk c = get_chunk_header(s);
2899       switch (c.type) {
2900          case PNG_TYPE('C','g','B','I'):
2901             iphone = stbi_de_iphone_flag;
2902             skip(s, c.length);
2903             break;
2904          case PNG_TYPE('I','H','D','R'): {
2905             int depth,color,comp,filter;
2906             if (!first) return e("multiple IHDR","Corrupt PNG");
2907             first = 0;
2908             if (c.length != 13) return e("bad IHDR len","Corrupt PNG");
2909             s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)");
2910             s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)");
2911             depth = get8(s);  if (depth != 8)        return e("8bit only","PNG not supported: 8-bit only");
2912             color = get8(s);  if (color > 6)         return e("bad ctype","Corrupt PNG");
2913             if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG");
2914             comp  = get8(s);  if (comp) return e("bad comp method","Corrupt PNG");
2915             filter= get8(s);  if (filter) return e("bad filter method","Corrupt PNG");
2916             interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG");
2917             if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG");
2918             if (!pal_img_n) {
2919                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2920                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2921                if (scan == SCAN_header) return 1;
2922             } else {
2923                // if paletted, then pal_n is our final components, and
2924                // img_n is # components to decompress/filter.
2925                s->img_n = 1;
2926                if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG");
2927                // if SCAN_header, have to scan to see if we have a tRNS
2928             }
2929             break;
2930          }
2931 
2932          case PNG_TYPE('P','L','T','E'):  {
2933             if (first) return e("first not IHDR", "Corrupt PNG");
2934             if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG");
2935             pal_len = c.length / 3;
2936             if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG");
2937             for (i=0; i < pal_len; ++i) {
2938                palette[i*4+0] = get8u(s);
2939                palette[i*4+1] = get8u(s);
2940                palette[i*4+2] = get8u(s);
2941                palette[i*4+3] = 255;
2942             }
2943             break;
2944          }
2945 
2946          case PNG_TYPE('t','R','N','S'): {
2947             if (first) return e("first not IHDR", "Corrupt PNG");
2948             if (z->idata) return e("tRNS after IDAT","Corrupt PNG");
2949             if (pal_img_n) {
2950                if (scan == SCAN_header) { s->img_n = 4; return 1; }
2951                if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG");
2952                if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG");
2953                pal_img_n = 4;
2954                for (i=0; i < c.length; ++i)
2955                   palette[i*4+3] = get8u(s);
2956             } else {
2957                if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG");
2958                if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG");
2959                has_trans = 1;
2960                for (k=0; k < s->img_n; ++k)
2961                   tc[k] = (uint8) get16(s); // non 8-bit images will be larger
2962             }
2963             break;
2964          }
2965 
2966          case PNG_TYPE('I','D','A','T'): {
2967             if (first) return e("first not IHDR", "Corrupt PNG");
2968             if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG");
2969             if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
2970             if (ioff + c.length > idata_limit) {
2971                uint8 *p;
2972                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2973                while (ioff + c.length > idata_limit)
2974                   idata_limit *= 2;
2975                p = (uint8 *) REALLOC(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory");
2976                z->idata = p;
2977             }
2978             if (!getn(s, z->idata+ioff,c.length)) return e("outofdata","Corrupt PNG");
2979             ioff += c.length;
2980             break;
2981          }
2982 
2983          case PNG_TYPE('I','E','N','D'): {
2984             uint32 raw_len;
2985             if (first) return e("first not IHDR", "Corrupt PNG");
2986             if (scan != SCAN_load) return 1;
2987             if (z->idata == NULL) return e("no IDAT","Corrupt PNG");
2988             z->expanded = (uint8 *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !iphone);
2989             if (z->expanded == NULL) return 0; // zlib should set error
2990             FREE(z->idata); z->idata = NULL;
2991             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
2992                s->img_out_n = s->img_n+1;
2993             else
2994                s->img_out_n = s->img_n;
2995             if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
2996             if (has_trans)
2997                if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2998             if (iphone && s->img_out_n > 2)
2999                stbi_de_iphone(z);
3000             if (pal_img_n) {
3001                // pal_img_n == 3 or 4
3002                s->img_n = pal_img_n; // record the actual colors we had
3003                s->img_out_n = pal_img_n;
3004                if (req_comp >= 3) s->img_out_n = req_comp;
3005                if (!expand_palette(z, palette, pal_len, s->img_out_n))
3006                   return 0;
3007             }
3008             FREE(z->expanded); z->expanded = NULL;
3009             return 1;
3010          }
3011 
3012          default:
3013             // if critical, fail
3014             if (first) return e("first not IHDR", "Corrupt PNG");
3015             if ((c.type & (1 << 29)) == 0) {
3016                #ifndef STBI_NO_FAILURE_STRINGS
3017                // not threadsafe
3018                static char invalid_chunk[] = "XXXX chunk not known";
3019                invalid_chunk[0] = (uint8) (c.type >> 24);
3020                invalid_chunk[1] = (uint8) (c.type >> 16);
3021                invalid_chunk[2] = (uint8) (c.type >>  8);
3022                invalid_chunk[3] = (uint8) (c.type >>  0);
3023                #endif
3024                return e(invalid_chunk, "PNG not supported: unknown chunk type");
3025             }
3026             skip(s, c.length);
3027             break;
3028       }
3029       // end of chunk, read and skip CRC
3030       get32(s);
3031    }
3032 }
3033 
3034 static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
3035 {
3036    unsigned char *result=NULL;
3037    p->expanded = NULL;
3038    p->idata = NULL;
3039    p->out = NULL;
3040    if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
3041    if (parse_png_file(p, SCAN_load, req_comp)) {
3042       result = p->out;
3043       p->out = NULL;
3044       if (req_comp && req_comp != p->s.img_out_n) {
3045          result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
3046          p->s.img_out_n = req_comp;
3047          if (result == NULL) return result;
3048       }
3049       *x = p->s.img_x;
3050       *y = p->s.img_y;
3051       if (n) *n = p->s.img_n;
3052    }
3053    FREE(p->out);      p->out      = NULL;
3054    FREE(p->expanded); p->expanded = NULL;
3055    FREE(p->idata);    p->idata    = NULL;
3056 
3057    return result;
3058 }
3059 
3060 #ifndef STBI_NO_STDIO
3061 unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3062 {
3063    png p;
3064    start_file(&p.s, f);
3065    return do_png(&p, x,y,comp,req_comp);
3066 }
3067 
3068 unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3069 {
3070    unsigned char *data;
3071    FILE *f = fopen(filename, "rb");
3072    if (!f) return NULL;
3073    data = stbi_png_load_from_file(f,x,y,comp,req_comp);
3074    fclose(f);
3075    return data;
3076 }
3077 #endif
3078 
3079 unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3080 {
3081    png p;
3082    start_mem(&p.s, buffer,len);
3083    return do_png(&p, x,y,comp,req_comp);
3084 }
3085 
3086 #ifndef STBI_NO_STDIO
3087 int stbi_png_test_file(FILE *f)
3088 {
3089    png p;
3090    int n,r;
3091    n = ftell(f);
3092    start_file(&p.s, f);
3093    r = parse_png_file(&p, SCAN_type,STBI_default);
3094    fseek(f,n,SEEK_SET);
3095    return r;
3096 }
3097 #endif
3098 
3099 int stbi_png_test_memory(stbi_uc const *buffer, int len)
3100 {
3101    png p;
3102    start_mem(&p.s, buffer, len);
3103    return parse_png_file(&p, SCAN_type,STBI_default);
3104 }
3105 
3106 static int stbi_png_info_raw(png *p, int *x, int *y, int *comp)
3107 {
3108    if (!parse_png_file(p, SCAN_header, 0))
3109       return 0;
3110    if (x) *x = p->s.img_x;
3111    if (y) *y = p->s.img_y;
3112    if (comp) *comp = p->s.img_n;
3113    return 1;
3114 }
3115 
3116 #ifndef STBI_NO_STDIO
3117 int      stbi_png_info             (char const *filename,           int *x, int *y, int *comp)
3118 {
3119    int res;
3120    FILE *f = fopen(filename, "rb");
3121    if (!f) return 0;
3122    res = stbi_png_info_from_file(f, x, y, comp);
3123    fclose(f);
3124    return res;
3125 }
3126 
3127 int stbi_png_info_from_file(FILE *f, int *x, int *y, int *comp)
3128 {
3129    png p;
3130    int res;
3131    long n = ftell(f);
3132    start_file(&p.s, f);
3133    res = stbi_png_info_raw(&p, x, y, comp);
3134    fseek(f, n, SEEK_SET);
3135    return res;
3136 }
3137 #endif // !STBI_NO_STDIO
3138 
3139 int stbi_png_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
3140 {
3141    png p;
3142    start_mem(&p.s, buffer, len);
3143    return stbi_png_info_raw(&p, x, y, comp);
3144 }
3145 
3146 // Microsoft/Windows BMP image
3147 
3148 static int bmp_test(stbi *s)
3149 {
3150    int sz;
3151    if (get8(s) != 'B') return 0;
3152    if (get8(s) != 'M') return 0;
3153    get32le(s); // discard filesize
3154    get16le(s); // discard reserved
3155    get16le(s); // discard reserved
3156    get32le(s); // discard data offset
3157    sz = get32le(s);
3158    if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
3159    return 0;
3160 }
3161 
3162 #ifndef STBI_NO_STDIO
3163 int      stbi_bmp_test_file        (FILE *f)
3164 {
3165    stbi s;
3166    int r,n = ftell(f);
3167    start_file(&s,f);
3168    r = bmp_test(&s);
3169    fseek(f,n,SEEK_SET);
3170    return r;
3171 }
3172 #endif
3173 
3174 int      stbi_bmp_test_memory      (stbi_uc const *buffer, int len)
3175 {
3176    stbi s;
3177    start_mem(&s, buffer, len);
3178    return bmp_test(&s);
3179 }
3180 
3181 // returns 0..31 for the highest set bit
3182 static int high_bit(unsigned int z)
3183 {
3184    int n=0;
3185    if (z == 0) return -1;
3186    if (z >= 0x10000) n += 16, z >>= 16;
3187    if (z >= 0x00100) n +=  8, z >>=  8;
3188    if (z >= 0x00010) n +=  4, z >>=  4;
3189    if (z >= 0x00004) n +=  2, z >>=  2;
3190    if (z >= 0x00002) n +=  1, z >>=  1;
3191    return n;
3192 }
3193 
3194 static int bitcount(unsigned int a)
3195 {
3196    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
3197    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
3198    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
3199    a = (a + (a >> 8)); // max 16 per 8 bits
3200    a = (a + (a >> 16)); // max 32 per 8 bits
3201    return a & 0xff;
3202 }
3203 
3204 static int shiftsigned(int v, int shift, int bits)
3205 {
3206    int result;
3207    int z=0;
3208 
3209    if (shift < 0) v <<= -shift;
3210    else v >>= shift;
3211    result = v;
3212 
3213    z = bits;
3214    while (z < 8) {
3215       result += v >> z;
3216       z += bits;
3217    }
3218    return result;
3219 }
3220 
3221 static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3222 {
3223    uint8 *out;
3224    unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0;
3225    stbi_uc pal[256][4];
3226    int psize=0,i,j,compress=0,width;
3227    int bpp, flip_vertically, pad, target, offset, hsz;
3228    if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
3229    get32le(s); // discard filesize
3230    get16le(s); // discard reserved
3231    get16le(s); // discard reserved
3232    offset = get32le(s);
3233    hsz = get32le(s);
3234    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
3235    if (hsz == 12) {
3236       s->img_x = get16le(s);
3237       s->img_y = get16le(s);
3238    } else {
3239       s->img_x = get32le(s);
3240       s->img_y = get32le(s);
3241    }
3242    if (get16le(s) != 1) return epuc("bad BMP", "bad BMP");
3243    bpp = get16le(s);
3244    if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
3245    flip_vertically = ((int) s->img_y) > 0;
3246    s->img_y = abs((int) s->img_y);
3247    if (hsz == 12) {
3248       if (bpp < 24)
3249          psize = (offset - 14 - 24) / 3;
3250    } else {
3251       compress = get32le(s);
3252       if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
3253       get32le(s); // discard sizeof
3254       get32le(s); // discard hres
3255       get32le(s); // discard vres
3256       get32le(s); // discard colorsused
3257       get32le(s); // discard max important
3258       if (hsz == 40 || hsz == 56) {
3259          if (hsz == 56) {
3260             get32le(s);
3261             get32le(s);
3262             get32le(s);
3263             get32le(s);
3264          }
3265          if (bpp == 16 || bpp == 32) {
3266             mr = mg = mb = 0;
3267             if (compress == 0) {
3268                if (bpp == 32) {
3269                   mr = 0xffu << 16;
3270                   mg = 0xffu <<  8;
3271                   mb = 0xffu <<  0;
3272                   ma = 0xffu << 24;
3273                   fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
3274                } else {
3275                   mr = 31u << 10;
3276                   mg = 31u <<  5;
3277                   mb = 31u <<  0;
3278                }
3279             } else if (compress == 3) {
3280                mr = get32le(s);
3281                mg = get32le(s);
3282                mb = get32le(s);
3283                // not documented, but generated by photoshop and handled by mspaint
3284                if (mr == mg && mg == mb) {
3285                   // ?!?!?
3286                   return epuc("bad BMP", "bad BMP");
3287                }
3288             } else
3289                return epuc("bad BMP", "bad BMP");
3290          }
3291       } else {
3292          assert(hsz == 108);
3293          mr = get32le(s);
3294          mg = get32le(s);
3295          mb = get32le(s);
3296          ma = get32le(s);
3297          get32le(s); // discard color space
3298          for (i=0; i < 12; ++i)
3299             get32le(s); // discard color space parameters
3300       }
3301       if (bpp < 16)
3302          psize = (offset - 14 - hsz) >> 2;
3303    }
3304    s->img_n = ma ? 4 : 3;
3305    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
3306       target = req_comp;
3307    else
3308       target = s->img_n; // if they want monochrome, we'll post-convert
3309    out = (stbi_uc *) MALLOC(target * s->img_x * s->img_y);
3310    if (!out) return epuc("outofmem", "Out of memory");
3311    if (bpp < 16) {
3312       int z=0;
3313       if (psize == 0 || psize > 256) { FREE(out); return epuc("invalid", "Corrupt BMP"); }
3314       for (i=0; i < psize; ++i) {
3315          pal[i][2] = get8u(s);
3316          pal[i][1] = get8u(s);
3317          pal[i][0] = get8u(s);
3318          if (hsz != 12) get8(s);
3319          pal[i][3] = 255;
3320       }
3321       skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
3322       if (bpp == 4) width = (s->img_x + 1) >> 1;
3323       else if (bpp == 8) width = s->img_x;
3324       else { FREE(out); return epuc("bad bpp", "Corrupt BMP"); }
3325       pad = (-width)&3;
3326       for (j=0; j < (int) s->img_y; ++j) {
3327          for (i=0; i < (int) s->img_x; i += 2) {
3328             int v=get8(s),v2=0;
3329             if (bpp == 4) {
3330                v2 = v & 15;
3331                v >>= 4;
3332             }
3333             out[z++] = pal[v][0];
3334             out[z++] = pal[v][1];
3335             out[z++] = pal[v][2];
3336             if (target == 4) out[z++] = 255;
3337             if (i+1 == (int) s->img_x) break;
3338             v = (bpp == 8) ? get8(s) : v2;
3339             out[z++] = pal[v][0];
3340             out[z++] = pal[v][1];
3341             out[z++] = pal[v][2];
3342             if (target == 4) out[z++] = 255;
3343          }
3344          skip(s, pad);
3345       }
3346    } else {
3347       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
3348       int z = 0;
3349       int easy=0;
3350       skip(s, offset - 14 - hsz);
3351       if (bpp == 24) width = 3 * s->img_x;
3352       else if (bpp == 16) width = 2*s->img_x;
3353       else /* bpp = 32 and pad = 0 */ width=0;
3354       pad = (-width) & 3;
3355       if (bpp == 24) {
3356          easy = 1;
3357       } else if (bpp == 32) {
3358          if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
3359             easy = 2;
3360       }
3361       if (!easy) {
3362          if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP");
3363          // right shift amt to put high bit in position #7
3364          rshift = high_bit(mr)-7; rcount = bitcount(mr);
3365          gshift = high_bit(mg)-7; gcount = bitcount(mr);
3366          bshift = high_bit(mb)-7; bcount = bitcount(mr);
3367          ashift = high_bit(ma)-7; acount = bitcount(mr);
3368       }
3369       for (j=0; j < (int) s->img_y; ++j) {
3370          if (easy) {
3371             for (i=0; i < (int) s->img_x; ++i) {
3372                int a;
3373                out[z+2] = get8u(s);
3374                out[z+1] = get8u(s);
3375                out[z+0] = get8u(s);
3376                z += 3;
3377                a = (easy == 2 ? get8(s) : 255);
3378                if (target == 4) out[z++] = (uint8) a;
3379             }
3380          } else {
3381             for (i=0; i < (int) s->img_x; ++i) {
3382                uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
3383                int a;
3384                out[z++] = (uint8) shiftsigned(v & mr, rshift, rcount);
3385                out[z++] = (uint8) shiftsigned(v & mg, gshift, gcount);
3386                out[z++] = (uint8) shiftsigned(v & mb, bshift, bcount);
3387                a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
3388                if (target == 4) out[z++] = (uint8) a;
3389             }
3390          }
3391          skip(s, pad);
3392       }
3393    }
3394    if (flip_vertically) {
3395       stbi_uc t;
3396       for (j=0; j < (int) s->img_y>>1; ++j) {
3397          stbi_uc *p1 = out +      j     *s->img_x*target;
3398          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
3399          for (i=0; i < (int) s->img_x*target; ++i) {
3400             t = p1[i], p1[i] = p2[i], p2[i] = t;
3401          }
3402       }
3403    }
3404 
3405    if (req_comp && req_comp != target) {
3406       out = convert_format(out, target, req_comp, s->img_x, s->img_y);
3407       if (out == NULL) return out; // convert_format frees input on failure
3408    }
3409 
3410    *x = s->img_x;
3411    *y = s->img_y;
3412    if (comp) *comp = target;
3413    return out;
3414 }
3415 
3416 #ifndef STBI_NO_STDIO
3417 stbi_uc *stbi_bmp_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3418 {
3419    stbi_uc *data;
3420    FILE *f = fopen(filename, "rb");
3421    if (!f) return NULL;
3422    data = stbi_bmp_load_from_file(f, x,y,comp,req_comp);
3423    fclose(f);
3424    return data;
3425 }
3426 
3427 stbi_uc *stbi_bmp_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3428 {
3429    stbi s;
3430    start_file(&s, f);
3431    return bmp_load(&s, x,y,comp,req_comp);
3432 }
3433 #endif
3434 
3435 stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3436 {
3437    stbi s;
3438    start_mem(&s, buffer, len);
3439    return bmp_load(&s, x,y,comp,req_comp);
3440 }
3441 
3442 // Targa Truevision - TGA
3443 // by Jonathan Dummer
3444 
3445 static int tga_info(stbi *s, int *x, int *y, int *comp)
3446 {
3447     int tga_w, tga_h, tga_comp;
3448     int sz;
3449     get8u(s);                   // discard Offset
3450     sz = get8u(s);              // color type
3451     if( sz > 1 ) return 0;      // only RGB or indexed allowed
3452     sz = get8u(s);              // image type
3453     // only RGB or grey allowed, +/- RLE
3454     if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
3455     get16le(s);                 // discard palette start
3456     get16le(s);                 // discard palette length
3457     get8(s);                    // discard bits per palette color entry
3458     get16le(s);                 // discard x origin
3459     get16le(s);                 // discard y origin
3460     tga_w = get16le(s);
3461     if( tga_w < 1 ) return 0;   // test width
3462     tga_h = get16le(s);
3463     if( tga_h < 1 ) return 0;   // test height
3464     sz = get8(s);               // bits per pixel
3465     // only RGB or RGBA or grey allowed
3466     if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) return 0;
3467     tga_comp = sz;
3468     if (x) *x = tga_w;
3469     if (y) *y = tga_h;
3470     if (comp) *comp = tga_comp / 8;
3471     return 1;                   // seems to have passed everything
3472 }
3473 
3474 #ifndef STBI_NO_STDIO
3475 int stbi_tga_info_from_file(FILE *f, int *x, int *y, int *comp)
3476 {
3477     stbi s;
3478     int r;
3479     long n = ftell(f);
3480     start_file(&s, f);
3481     r = tga_info(&s, x, y, comp);
3482     fseek(f, n, SEEK_SET);
3483     return r;
3484 }
3485 #endif
3486 
3487 int stbi_tga_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
3488 {
3489     stbi s;
3490     start_mem(&s, buffer, len);
3491     return tga_info(&s, x, y, comp);
3492 }
3493 
3494 static int tga_test(stbi *s)
3495 {
3496    int sz;
3497    get8u(s);      //   discard Offset
3498    sz = get8u(s);   //   color type
3499    if ( sz > 1 ) return 0;   //   only RGB or indexed allowed
3500    sz = get8u(s);   //   image type
3501    if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0;   //   only RGB or grey allowed, +/- RLE
3502    get16(s);      //   discard palette start
3503    get16(s);      //   discard palette length
3504    get8(s);         //   discard bits per palette color entry
3505    get16(s);      //   discard x origin
3506    get16(s);      //   discard y origin
3507    if ( get16(s) < 1 ) return 0;      //   test width
3508    if ( get16(s) < 1 ) return 0;      //   test height
3509    sz = get8(s);   //   bits per pixel
3510    if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0;   //   only RGB or RGBA or grey allowed
3511    return 1;      //   seems to have passed everything
3512 }
3513 
3514 #ifndef STBI_NO_STDIO
3515 int      stbi_tga_test_file        (FILE *f)
3516 {
3517    stbi s;
3518    int r,n = ftell(f);
3519    start_file(&s, f);
3520    r = tga_test(&s);
3521    fseek(f,n,SEEK_SET);
3522    return r;
3523 }
3524 #endif
3525 
3526 int      stbi_tga_test_memory      (stbi_uc const *buffer, int len)
3527 {
3528    stbi s;
3529    start_mem(&s, buffer, len);
3530    return tga_test(&s);
3531 }
3532 
3533 static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3534 {
3535    //   read in the TGA header stuff
3536    int tga_offset = get8u(s);
3537    int tga_indexed = get8u(s);
3538    int tga_image_type = get8u(s);
3539    int tga_is_RLE = 0;
3540    int tga_palette_start = get16le(s);
3541    int tga_palette_len = get16le(s);
3542    int tga_palette_bits = get8u(s);
3543    int tga_x_origin = get16le(s);
3544    int tga_y_origin = get16le(s);
3545    int tga_width = get16le(s);
3546    int tga_height = get16le(s);
3547    int tga_bits_per_pixel = get8u(s);
3548    int tga_inverted = get8u(s);
3549    //   image data
3550    unsigned char *tga_data;
3551    unsigned char *tga_palette = NULL;
3552    int i, j;
3553    unsigned char raw_data[4];
3554    unsigned char trans_data[4];
3555    int RLE_count = 0;
3556    int RLE_repeating = 0;
3557    int read_next_pixel = 1;
3558 
3559    //   do a tiny bit of precessing
3560    if ( tga_image_type >= 8 )
3561    {
3562       tga_image_type -= 8;
3563       tga_is_RLE = 1;
3564    }
3565    /* int tga_alpha_bits = tga_inverted & 15; */
3566    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
3567 
3568    //   error check
3569    if ( //(tga_indexed) ||
3570       (tga_width < 1) || (tga_height < 1) ||
3571       (tga_image_type < 1) || (tga_image_type > 3) ||
3572       ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
3573       (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
3574       )
3575    {
3576       return NULL;
3577    }
3578 
3579    //   If I'm paletted, then I'll use the number of bits from the palette
3580    if ( tga_indexed )
3581    {
3582       tga_bits_per_pixel = tga_palette_bits;
3583    }
3584 
3585    //   tga info
3586    *x = tga_width;
3587    *y = tga_height;
3588    if ( (req_comp < 1) || (req_comp > 4) )
3589    {
3590       //   just use whatever the file was
3591       req_comp = tga_bits_per_pixel / 8;
3592       *comp = req_comp;
3593    } else
3594    {
3595       //   force a new number of components
3596       *comp = tga_bits_per_pixel/8;
3597    }
3598    tga_data = (unsigned char*)MALLOC( tga_width * tga_height * req_comp );
3599 
3600    //   skip to the data's starting position (offset usually = 0)
3601    skip(s, tga_offset );
3602    //   do I need to load a palette?
3603    if ( tga_indexed )
3604    {
3605       //   any data to skip? (offset usually = 0)
3606       skip(s, tga_palette_start );
3607       //   load the palette
3608       tga_palette = (unsigned char*)MALLOC( tga_palette_len * tga_palette_bits / 8 );
3609       if (!getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ))
3610          return NULL;
3611    }
3612    //   load the data
3613    trans_data[0] = trans_data[1] = trans_data[2] = trans_data[3] = 0;
3614    for (i=0; i < tga_width * tga_height; ++i)
3615    {
3616       //   if I'm in RLE mode, do I need to get a RLE chunk?
3617       if ( tga_is_RLE )
3618       {
3619          if ( RLE_count == 0 )
3620          {
3621             //   yep, get the next byte as a RLE command
3622             int RLE_cmd = get8u(s);
3623             RLE_count = 1 + (RLE_cmd & 127);
3624             RLE_repeating = RLE_cmd >> 7;
3625             read_next_pixel = 1;
3626          } else if ( !RLE_repeating )
3627          {
3628             read_next_pixel = 1;
3629          }
3630       } else
3631       {
3632          read_next_pixel = 1;
3633       }
3634       //   OK, if I need to read a pixel, do it now
3635       if ( read_next_pixel )
3636       {
3637          //   load however much data we did have
3638          if ( tga_indexed )
3639          {
3640             //   read in 1 byte, then perform the lookup
3641             int pal_idx = get8u(s);
3642             if ( pal_idx >= tga_palette_len )
3643             {
3644                //   invalid index
3645                pal_idx = 0;
3646             }
3647             pal_idx *= tga_bits_per_pixel / 8;
3648             for (j = 0; j*8 < tga_bits_per_pixel; ++j)
3649             {
3650                raw_data[j] = tga_palette[pal_idx+j];
3651             }
3652          } else
3653          {
3654             //   read in the data raw
3655             for (j = 0; j*8 < tga_bits_per_pixel; ++j)
3656             {
3657                raw_data[j] = get8u(s);
3658             }
3659          }
3660          //   convert raw to the intermediate format
3661          switch (tga_bits_per_pixel)
3662          {
3663          case 8:
3664             //   Luminous => RGBA
3665             trans_data[0] = raw_data[0];
3666             trans_data[1] = raw_data[0];
3667             trans_data[2] = raw_data[0];
3668             trans_data[3] = 255;
3669             break;
3670          case 16:
3671             //   Luminous,Alpha => RGBA
3672             trans_data[0] = raw_data[0];
3673             trans_data[1] = raw_data[0];
3674             trans_data[2] = raw_data[0];
3675             trans_data[3] = raw_data[1];
3676             break;
3677          case 24:
3678             //   BGR => RGBA
3679             trans_data[0] = raw_data[2];
3680             trans_data[1] = raw_data[1];
3681             trans_data[2] = raw_data[0];
3682             trans_data[3] = 255;
3683             break;
3684          case 32:
3685             //   BGRA => RGBA
3686             trans_data[0] = raw_data[2];
3687             trans_data[1] = raw_data[1];
3688             trans_data[2] = raw_data[0];
3689             trans_data[3] = raw_data[3];
3690             break;
3691          }
3692          //   clear the reading flag for the next pixel
3693          read_next_pixel = 0;
3694       } // end of reading a pixel
3695       //   convert to final format
3696       switch (req_comp)
3697       {
3698       case 1:
3699          //   RGBA => Luminance
3700          tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3701          break;
3702       case 2:
3703          //   RGBA => Luminance,Alpha
3704          tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]);
3705          tga_data[i*req_comp+1] = trans_data[3];
3706          break;
3707       case 3:
3708          //   RGBA => RGB
3709          tga_data[i*req_comp+0] = trans_data[0];
3710          tga_data[i*req_comp+1] = trans_data[1];
3711          tga_data[i*req_comp+2] = trans_data[2];
3712          break;
3713       case 4:
3714          //   RGBA => RGBA
3715          tga_data[i*req_comp+0] = trans_data[0];
3716          tga_data[i*req_comp+1] = trans_data[1];
3717          tga_data[i*req_comp+2] = trans_data[2];
3718          tga_data[i*req_comp+3] = trans_data[3];
3719          break;
3720       }
3721       //   in case we're in RLE mode, keep counting down
3722       --RLE_count;
3723    }
3724    //   do I need to invert the image?
3725    if ( tga_inverted )
3726    {
3727       for (j = 0; j*2 < tga_height; ++j)
3728       {
3729          int index1 = j * tga_width * req_comp;
3730          int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3731          for (i = tga_width * req_comp; i > 0; --i)
3732          {
3733             unsigned char temp = tga_data[index1];
3734             tga_data[index1] = tga_data[index2];
3735             tga_data[index2] = temp;
3736             ++index1;
3737             ++index2;
3738          }
3739       }
3740    }
3741    //   clear my palette, if I had one
3742    if ( tga_palette != NULL )
3743    {
3744       FREE( tga_palette );
3745    }
3746    //   the things I do to get rid of an error message, and yet keep
3747    //   Microsoft's C compilers happy... [8^(
3748    tga_palette_start = tga_palette_len = tga_palette_bits =
3749          tga_x_origin = tga_y_origin = 0;
3750    //   OK, done
3751    return tga_data;
3752 }
3753 
3754 #ifndef STBI_NO_STDIO
3755 stbi_uc *stbi_tga_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
3756 {
3757    stbi_uc *data;
3758    FILE *f = fopen(filename, "rb");
3759    if (!f) return NULL;
3760    data = stbi_tga_load_from_file(f, x,y,comp,req_comp);
3761    fclose(f);
3762    return data;
3763 }
3764 
3765 stbi_uc *stbi_tga_load_from_file   (FILE *f,                  int *x, int *y, int *comp, int req_comp)
3766 {
3767    stbi s;
3768    start_file(&s, f);
3769    return tga_load(&s, x,y,comp,req_comp);
3770 }
3771 #endif
3772 
3773 stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3774 {
3775    stbi s;
3776    start_mem(&s, buffer, len);
3777    return tga_load(&s, x,y,comp,req_comp);
3778 }
3779 
3780 
3781 // *************************************************************************************************
3782 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
3783 
3784 static int psd_test(stbi *s)
3785 {
3786    if (get32(s) != 0x38425053) return 0;   // "8BPS"
3787    else return 1;
3788 }
3789 
3790 #ifndef STBI_NO_STDIO
3791 int stbi_psd_test_file(FILE *f)
3792 {
3793    stbi s;
3794    int r,n = ftell(f);
3795    start_file(&s, f);
3796    r = psd_test(&s);
3797    fseek(f,n,SEEK_SET);
3798    return r;
3799 }
3800 #endif
3801 
3802 int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3803 {
3804    stbi s;
3805    start_mem(&s, buffer, len);
3806    return psd_test(&s);
3807 }
3808 
3809 static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3810 {
3811    int   pixelCount;
3812    int channelCount, compression;
3813    int channel, i, count, len;
3814    int w,h;
3815    uint8 *out;
3816 
3817    // Check identifier
3818    if (get32(s) != 0x38425053)   // "8BPS"
3819       return epuc("not PSD", "Corrupt PSD image");
3820 
3821    // Check file type version.
3822    if (get16(s) != 1)
3823       return epuc("wrong version", "Unsupported version of PSD image");
3824 
3825    // Skip 6 reserved bytes.
3826    skip(s, 6 );
3827 
3828    // Read the number of channels (R, G, B, A, etc).
3829    channelCount = get16(s);
3830    if (channelCount < 0 || channelCount > 16)
3831       return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3832 
3833    // Read the rows and columns of the image.
3834    h = get32(s);
3835    w = get32(s);
3836 
3837    // Make sure the depth is 8 bits.
3838    if (get16(s) != 8)
3839       return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3840 
3841    // Make sure the color mode is RGB.
3842    // Valid options are:
3843    //   0: Bitmap
3844    //   1: Grayscale
3845    //   2: Indexed color
3846    //   3: RGB color
3847    //   4: CMYK color
3848    //   7: Multichannel
3849    //   8: Duotone
3850    //   9: Lab color
3851    if (get16(s) != 3)
3852       return epuc("wrong color format", "PSD is not in RGB color format");
3853 
3854    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
3855    skip(s,get32(s) );
3856 
3857    // Skip the image resources.  (resolution, pen tool paths, etc)
3858    skip(s, get32(s) );
3859 
3860    // Skip the reserved data.
3861    skip(s, get32(s) );
3862 
3863    // Find out if the data is compressed.
3864    // Known values:
3865    //   0: no compression
3866    //   1: RLE compressed
3867    compression = get16(s);
3868    if (compression > 1)
3869       return epuc("bad compression", "PSD has an unknown compression format");
3870 
3871    // Create the destination image.
3872    out = (stbi_uc *) MALLOC(4 * w*h);
3873    if (!out) return epuc("outofmem", "Out of memory");
3874    pixelCount = w*h;
3875 
3876    // Initialize the data to zero.
3877    //memset( out, 0, pixelCount * 4 );
3878 
3879    // Finally, the image data.
3880    if (compression) {
3881       // RLE as used by .PSD and .TIFF
3882       // Loop until you get the number of unpacked bytes you are expecting:
3883       //     Read the next source byte into n.
3884       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3885       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3886       //     Else if n is 128, noop.
3887       // Endloop
3888 
3889       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3890       // which we're going to just skip.
3891       skip(s, h * channelCount * 2 );
3892 
3893       // Read the RLE data by channel.
3894       for (channel = 0; channel < 4; channel++) {
3895          uint8 *p;
3896 
3897          p = out+channel;
3898          if (channel >= channelCount) {
3899             // Fill this channel with default data.
3900             for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3901          } else {
3902             // Read the RLE data.
3903             count = 0;
3904             while (count < pixelCount) {
3905                len = get8(s);
3906                if (len == 128) {
3907                   // No-op.
3908                } else if (len < 128) {
3909                   // Copy next len+1 bytes literally.
3910                   len++;
3911                   count += len;
3912                   while (len) {
3913                      *p = get8u(s);
3914                      p += 4;
3915                      len--;
3916                   }
3917                } else if (len > 128) {
3918                   uint8   val;
3919                   // Next -len+1 bytes in the dest are replicated from next source byte.
3920                   // (Interpret len as a negative 8-bit int.)
3921                   len ^= 0x0FF;
3922                   len += 2;
3923                   val = get8u(s);
3924                   count += len;
3925                   while (len) {
3926                      *p = val;
3927                      p += 4;
3928                      len--;
3929                   }
3930                }
3931             }
3932          }
3933       }
3934 
3935    } else {
3936       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
3937       // where each channel consists of an 8-bit value for each pixel in the image.
3938 
3939       // Read the data by channel.
3940       for (channel = 0; channel < 4; channel++) {
3941          uint8 *p;
3942 
3943          p = out + channel;
3944          if (channel > channelCount) {
3945             // Fill this channel with default data.
3946             for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3947          } else {
3948             // Read the data.
3949             for (i = 0; i < pixelCount; i++)
3950                *p = get8u(s), p += 4;
3951          }
3952       }
3953    }
3954 
3955    if (req_comp && req_comp != 4) {
3956       out = convert_format(out, 4, req_comp, w, h);
3957       if (out == NULL) return out; // convert_format frees input on failure
3958    }
3959 
3960    if (comp) *comp = channelCount;
3961    *y = h;
3962    *x = w;
3963 
3964    return out;
3965 }
3966 
3967 #ifndef STBI_NO_STDIO
3968 stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3969 {
3970    stbi_uc *data;
3971    FILE *f = fopen(filename, "rb");
3972    if (!f) return NULL;
3973    data = stbi_psd_load_from_file(f, x,y,comp,req_comp);
3974    fclose(f);
3975    return data;
3976 }
3977 
3978 stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3979 {
3980    stbi s;
3981    start_file(&s, f);
3982    return psd_load(&s, x,y,comp,req_comp);
3983 }
3984 #endif
3985 
3986 stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3987 {
3988    stbi s;
3989    start_mem(&s, buffer, len);
3990    return psd_load(&s, x,y,comp,req_comp);
3991 }
3992 
3993 // *************************************************************************************************
3994 // Softimage PIC loader
3995 // by Tom Seddon
3996 //
3997 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
3998 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
3999 
4000 static int pic_is4(stbi *s,const char *str)
4001 {
4002    int i;
4003    for (i=0; i<4; ++i)
4004       if (get8(s) != (stbi_uc)str[i])
4005          return 0;
4006 
4007    return 1;
4008 }
4009 
4010 static int pic_test(stbi *s)
4011 {
4012    int i;
4013 
4014    if (!pic_is4(s,"\x53\x80\xF6\x34"))
4015       return 0;
4016 
4017    for(i=0;i<84;++i)
4018       get8(s);
4019 
4020    if (!pic_is4(s,"PICT"))
4021       return 0;
4022 
4023    return 1;
4024 }
4025 
4026 typedef struct
4027 {
4028    stbi_uc size,type,channel;
4029 } pic_packet_t;
4030 
4031 static stbi_uc *pic_readval(stbi *s, int channel, stbi_uc *dest)
4032 {
4033    int mask=0x80, i;
4034 
4035    for (i=0; i<4; ++i, mask>>=1) {
4036       if (channel & mask) {
4037          if (at_eof(s)) return epuc("bad file","PIC file too short");
4038          dest[i]=get8u(s);
4039       }
4040    }
4041 
4042    return dest;
4043 }
4044 
4045 static void pic_copyval(int channel,stbi_uc *dest,const stbi_uc *src)
4046 {
4047    int mask=0x80,i;
4048 
4049    for (i=0;i<4; ++i, mask>>=1)
4050       if (channel&mask)
4051          dest[i]=src[i];
4052 }
4053 
4054 static stbi_uc *pic_load2(stbi *s,int width,int height,int *comp, stbi_uc *result)
4055 {
4056    int act_comp=0,num_packets=0,y,chained;
4057    pic_packet_t packets[10];
4058 
4059    // this will (should...) cater for even some bizarre stuff like having data
4060     // for the same channel in multiple packets.
4061    do {
4062       pic_packet_t *packet;
4063 
4064       if (num_packets==sizeof(packets)/sizeof(packets[0]))
4065          return epuc("bad format","too many packets");
4066 
4067       packet = &packets[num_packets++];
4068 
4069       chained = get8(s);
4070       packet->size    = get8u(s);
4071       packet->type    = get8u(s);
4072       packet->channel = get8u(s);
4073 
4074       act_comp |= packet->channel;
4075 
4076       if (at_eof(s))          return epuc("bad file","file too short (reading packets)");
4077       if (packet->size != 8)  return epuc("bad format","packet isn't 8bpp");
4078    } while (chained);
4079 
4080    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
4081 
4082    for(y=0; y<height; ++y) {
4083       int packet_idx;
4084 
4085       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
4086          pic_packet_t *packet = &packets[packet_idx];
4087          stbi_uc *dest = result+y*width*4;
4088 
4089          switch (packet->type) {
4090             default:
4091                return epuc("bad format","packet has bad compression type");
4092 
4093             case 0: {//uncompressed
4094                int x;
4095 
4096                for(x=0;x<width;++x, dest+=4)
4097                   if (!pic_readval(s,packet->channel,dest))
4098                      return 0;
4099                break;
4100             }
4101 
4102             case 1://Pure RLE
4103                {
4104                   int left=width, i;
4105 
4106                   while (left>0) {
4107                      stbi_uc count,value[4];
4108 
4109                      count=get8u(s);
4110                      if (at_eof(s))   return epuc("bad file","file too short (pure read count)");
4111 
4112                      if (count > left)
4113                         count = (uint8) left;
4114 
4115                      if (!pic_readval(s,packet->channel,value))  return 0;
4116 
4117                      for(i=0; i<count; ++i,dest+=4)
4118                         pic_copyval(packet->channel,dest,value);
4119                      left -= count;
4120                   }
4121                }
4122                break;
4123 
4124             case 2: {//Mixed RLE
4125                int left=width;
4126                while (left>0) {
4127                   int count = get8(s), i;
4128                   if (at_eof(s))  return epuc("bad file","file too short (mixed read count)");
4129 
4130                   if (count >= 128) { // Repeated
4131                      stbi_uc value[4];
4132 
4133                      if (count==128)
4134                         count = get16(s);
4135                      else
4136                         count -= 127;
4137                      if (count > left)
4138                         return epuc("bad file","scanline overrun");
4139 
4140                      if (!pic_readval(s,packet->channel,value))
4141                         return 0;
4142 
4143                      for(i=0;i<count;++i, dest += 4)
4144                         pic_copyval(packet->channel,dest,value);
4145                   } else { // Raw
4146                      ++count;
4147                      if (count>left) return epuc("bad file","scanline overrun");
4148 
4149                      for(i=0;i<count;++i, dest+=4)
4150                         if (!pic_readval(s,packet->channel,dest))
4151                            return 0;
4152                   }
4153                   left-=count;
4154                }
4155                break;
4156             }
4157          }
4158       }
4159    }
4160 
4161    return result;
4162 }
4163 
4164 static stbi_uc *pic_load(stbi *s,int *px,int *py,int *comp,int req_comp)
4165 {
4166    stbi_uc *result;
4167    int i, x,y;
4168 
4169    for (i=0; i<92; ++i)
4170       get8(s);
4171 
4172    x = get16(s);
4173    y = get16(s);
4174    if (at_eof(s))  return epuc("bad file","file too short (pic header)");
4175    if ((1 << 28) / x < y) return epuc("too large", "Image too large to decode");
4176 
4177    get32(s); //skip `ratio'
4178    get16(s); //skip `fields'
4179    get16(s); //skip `pad'
4180 
4181    // intermediate buffer is RGBA
4182    result = (stbi_uc *) MALLOC(x*y*4);
4183    memset(result, 0xff, x*y*4);
4184 
4185    if (!pic_load2(s,x,y,comp, result)) {
4186       FREE(result);
4187       result=0;
4188    }
4189    *px = x;
4190    *py = y;
4191    if (req_comp == 0) req_comp = *comp;
4192    result=convert_format(result,4,req_comp,x,y);
4193 
4194    return result;
4195 }
4196 
4197 int stbi_pic_test_memory(stbi_uc const *buffer, int len)
4198 {
4199    stbi s;
4200    start_mem(&s,buffer,len);
4201    return pic_test(&s);
4202 }
4203 
4204 stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4205 {
4206    stbi s;
4207    start_mem(&s,buffer,len);
4208    return pic_load(&s,x,y,comp,req_comp);
4209 }
4210 
4211 #ifndef STBI_NO_STDIO
4212 int stbi_pic_test_file(FILE *f)
4213 {
4214    int result;
4215    long l = ftell(f);
4216    stbi s;
4217    start_file(&s,f);
4218    result = pic_test(&s);
4219    fseek(f,l,SEEK_SET);
4220    return result;
4221 }
4222 
4223 stbi_uc *stbi_pic_load(char const *filename,int *x, int *y, int *comp, int req_comp)
4224 {
4225    stbi_uc *result;
4226    FILE *f=fopen(filename,"rb");
4227    if (!f) return 0;
4228    result = stbi_pic_load_from_file(f,x,y,comp,req_comp);
4229    fclose(f);
4230    return result;
4231 }
4232 
4233 stbi_uc *stbi_pic_load_from_file(FILE *f,int *x, int *y, int *comp, int req_comp)
4234 {
4235    stbi s;
4236    start_file(&s,f);
4237    return pic_load(&s,x,y,comp,req_comp);
4238 }
4239 #endif
4240 
4241 // *************************************************************************************************
4242 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
4243 typedef struct stbi_gif_lzw_struct {
4244    int16 prefix;
4245    uint8 first;
4246    uint8 suffix;
4247 } stbi_gif_lzw;
4248 
4249 typedef struct stbi_gif_struct
4250 {
4251    int w,h;
4252    stbi_uc *out;                 // output buffer (always 4 components)
4253    int flags, bgindex, ratio, transparent, eflags;
4254    uint8  pal[256][4];
4255    uint8 lpal[256][4];
4256    stbi_gif_lzw codes[4096];
4257    uint8 *color_table;
4258    int parse, step;
4259    int lflags;
4260    int start_x, start_y;
4261    int max_x, max_y;
4262    int cur_x, cur_y;
4263    int line_size;
4264 } stbi_gif;
4265 
4266 static int gif_test(stbi *s)
4267 {
4268    int sz;
4269    if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') return 0;
4270    sz = get8(s);
4271    if (sz != '9' && sz != '7') return 0;
4272    if (get8(s) != 'a') return 0;
4273    return 1;
4274 }
4275 
4276 #ifndef STBI_NO_STDIO
4277 int      stbi_gif_test_file        (FILE *f)
4278 {
4279    stbi s;
4280    int r,n = ftell(f);
4281    start_file(&s,f);
4282    r = gif_test(&s);
4283    fseek(f,n,SEEK_SET);
4284    return r;
4285 }
4286 #endif
4287 
4288 int      stbi_gif_test_memory      (stbi_uc const *buffer, int len)
4289 {
4290    stbi s;
4291    start_mem(&s, buffer, len);
4292    return gif_test(&s);
4293 }
4294 
4295 static void stbi_gif_parse_colortable(stbi *s, uint8 pal[256][4], int num_entries, int transp)
4296 {
4297    int i;
4298    for (i=0; i < num_entries; ++i) {
4299       pal[i][2] = get8u(s);
4300       pal[i][1] = get8u(s);
4301       pal[i][0] = get8u(s);
4302       pal[i][3] = transp ? 0 : 255;
4303    }
4304 }
4305 
4306 static int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info)
4307 {
4308    uint8 ver;
4309    if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8')
4310       return e("not GIF", "Corrupt GIF");
4311 
4312    ver = get8u(s);
4313    if (ver != '7' && ver != '9')    return e("not GIF", "Corrupt GIF");
4314    if (get8(s) != 'a')                      return e("not GIF", "Corrupt GIF");
4315 
4316    failure_reason = "";
4317    g->w = get16le(s);
4318    g->h = get16le(s);
4319    g->flags = get8(s);
4320    g->bgindex = get8(s);
4321    g->ratio = get8(s);
4322    g->transparent = -1;
4323 
4324    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
4325 
4326    if (is_info) return 1;
4327 
4328    if (g->flags & 0x80)
4329       stbi_gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
4330 
4331    return 1;
4332 }
4333 
4334 static int stbi_gif_info_raw(stbi *s, int *x, int *y, int *comp)
4335 {
4336    stbi_gif g;
4337    if (!stbi_gif_header(s, &g, comp, 1)) return 0;
4338    if (x) *x = g.w;
4339    if (y) *y = g.h;
4340    return 1;
4341 }
4342 
4343 static void stbi_out_gif_code(stbi_gif *g, uint16 code)
4344 {
4345    uint8 *p, *c;
4346 
4347    // recurse to decode the prefixes, since the linked-list is backwards,
4348    // and working backwards through an interleaved image would be nasty
4349    if (g->codes[code].prefix >= 0)
4350       stbi_out_gif_code(g, g->codes[code].prefix);
4351 
4352    if (g->cur_y >= g->max_y) return;
4353 
4354    p = &g->out[g->cur_x + g->cur_y];
4355    c = &g->color_table[g->codes[code].suffix * 4];
4356 
4357    if (c[3] >= 128) {
4358       p[0] = c[2];
4359       p[1] = c[1];
4360       p[2] = c[0];
4361       p[3] = c[3];
4362    }
4363    g->cur_x += 4;
4364 
4365    if (g->cur_x >= g->max_x) {
4366       g->cur_x = g->start_x;
4367       g->cur_y += g->step;
4368 
4369       while (g->cur_y >= g->max_y && g->parse > 0) {
4370          g->step = (1 << g->parse) * g->line_size;
4371          g->cur_y = g->start_y + (g->step >> 1);
4372          --g->parse;
4373       }
4374    }
4375 }
4376 
4377 static uint8 *stbi_process_gif_raster(stbi *s, stbi_gif *g)
4378 {
4379    uint8 lzw_cs;
4380    int32 len, code;
4381    uint32 first;
4382    int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
4383    stbi_gif_lzw *p;
4384 
4385    lzw_cs = get8u(s);
4386    clear = 1 << lzw_cs;
4387    first = 1;
4388    codesize = lzw_cs + 1;
4389    codemask = (1 << codesize) - 1;
4390    bits = 0;
4391    valid_bits = 0;
4392    for (code = 0; code < clear; code++) {
4393       g->codes[code].prefix = -1;
4394       g->codes[code].first = (uint8) code;
4395       g->codes[code].suffix = (uint8) code;
4396    }
4397 
4398    // support no starting clear code
4399    avail = clear+2;
4400    oldcode = -1;
4401 
4402    len = 0;
4403    for(;;) {
4404       if (valid_bits < codesize) {
4405          if (len == 0) {
4406             len = get8(s); // start new block
4407             if (len == 0)
4408                return g->out;
4409          }
4410          --len;
4411          bits |= (int32) get8(s) << valid_bits;
4412          valid_bits += 8;
4413       } else {
4414          code = bits & codemask;
4415          bits >>= codesize;
4416          valid_bits -= codesize;
4417          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
4418          if (code == clear) {  // clear code
4419             codesize = lzw_cs + 1;
4420             codemask = (1 << codesize) - 1;
4421             avail = clear + 2;
4422             oldcode = -1;
4423             first = 0;
4424          } else if (code == clear + 1) { // end of stream code
4425             skip(s, len);
4426             while ((len = get8(s)) > 0)
4427                skip(s,len);
4428             return g->out;
4429          } else if (code <= avail) {
4430             if (first) return epuc("no clear code", "Corrupt GIF");
4431 
4432             if (oldcode >= 0) {
4433                p = &g->codes[avail++];
4434                if (avail > 4096)        return epuc("too many codes", "Corrupt GIF");
4435                p->prefix = (int16) oldcode;
4436                p->first = g->codes[oldcode].first;
4437                p->suffix = (code == avail) ? p->first : g->codes[code].first;
4438             } else if (code == avail)
4439                return epuc("illegal code in raster", "Corrupt GIF");
4440 
4441             stbi_out_gif_code(g, (uint16) code);
4442 
4443             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
4444                codesize++;
4445                codemask = (1 << codesize) - 1;
4446             }
4447 
4448             oldcode = code;
4449          } else {
4450             return epuc("illegal code in raster", "Corrupt GIF");
4451          }
4452       }
4453    }
4454 }
4455 
4456 static void stbi_fill_gif_background(stbi_gif *g)
4457 {
4458    int i;
4459    uint8 *c = g->pal[g->bgindex];
4460    // @OPTIMIZE: write a dword at a time
4461    for (i = 0; i < g->w * g->h * 4; i += 4) {
4462       uint8 *p  = &g->out[i];
4463       p[0] = c[2];
4464       p[1] = c[1];
4465       p[2] = c[0];
4466       p[3] = c[3];
4467    }
4468 }
4469 
4470 // this function is designed to support animated gifs, although stb_image doesn't support it
4471 static uint8 *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp)
4472 {
4473    int i;
4474    uint8 *old_out = 0;
4475 
4476    if (g->out == 0) {
4477       if (!stbi_gif_header(s, g, comp,0))     return 0; // failure_reason set by stbi_gif_header
4478       g->out = (uint8 *) MALLOC(4 * g->w * g->h);
4479       if (g->out == 0)                      return epuc("outofmem", "Out of memory");
4480       stbi_fill_gif_background(g);
4481    } else {
4482       // animated-gif-only path
4483       if (((g->eflags & 0x1C) >> 2) == 3) {
4484          old_out = g->out;
4485          g->out = (uint8 *) MALLOC(4 * g->w * g->h);
4486          if (g->out == 0)                   return epuc("outofmem", "Out of memory");
4487          memcpy(g->out, old_out, g->w*g->h*4);
4488       }
4489    }
4490 
4491    for (;;) {
4492       switch (get8(s)) {
4493          case 0x2C: /* Image Descriptor */
4494          {
4495             int32 x, y, w, h;
4496             uint8 *o;
4497 
4498             x = get16le(s);
4499             y = get16le(s);
4500             w = get16le(s);
4501             h = get16le(s);
4502             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
4503                return epuc("bad Image Descriptor", "Corrupt GIF");
4504 
4505             g->line_size = g->w * 4;
4506             g->start_x = x * 4;
4507             g->start_y = y * g->line_size;
4508             g->max_x   = g->start_x + w * 4;
4509             g->max_y   = g->start_y + h * g->line_size;
4510             g->cur_x   = g->start_x;
4511             g->cur_y   = g->start_y;
4512 
4513             g->lflags = get8(s);
4514 
4515             if (g->lflags & 0x40) {
4516                g->step = 8 * g->line_size; // first interlaced spacing
4517                g->parse = 3;
4518             } else {
4519                g->step = g->line_size;
4520                g->parse = 0;
4521             }
4522 
4523             if (g->lflags & 0x80) {
4524                stbi_gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
4525                g->color_table = (uint8 *) g->lpal;
4526             } else if (g->flags & 0x80) {
4527                for (i=0; i < 256; ++i)  // @OPTIMIZE: reset only the previous transparent
4528                   g->pal[i][3] = 255;
4529                if (g->transparent >= 0 && (g->eflags & 0x01))
4530                   g->pal[g->transparent][3] = 0;
4531                g->color_table = (uint8 *) g->pal;
4532             } else
4533                return epuc("missing color table", "Corrupt GIF");
4534 
4535             o = stbi_process_gif_raster(s, g);
4536             if (o == NULL) return NULL;
4537 
4538             if (req_comp && req_comp != 4)
4539                o = convert_format(o, 4, req_comp, g->w, g->h);
4540             return o;
4541          }
4542 
4543          case 0x21: // Comment Extension.
4544          {
4545             int len;
4546             if (get8(s) == 0xF9) { // Graphic Control Extension.
4547                len = get8(s);
4548                if (len == 4) {
4549                   g->eflags = get8(s);
4550                   get16le(s); // delay
4551                   g->transparent = get8(s);
4552                } else {
4553                   skip(s, len);
4554                   break;
4555                }
4556             }
4557             while ((len = get8(s)) != 0)
4558                skip(s, len);
4559             break;
4560          }
4561 
4562          case 0x3B: // gif stream termination code
4563             return (uint8 *) 1;
4564 
4565          default:
4566             return epuc("unknown code", "Corrupt GIF");
4567       }
4568    }
4569 }
4570 
4571 #ifndef STBI_NO_STDIO
4572 stbi_uc *stbi_gif_load             (char const *filename,           int *x, int *y, int *comp, int req_comp)
4573 {
4574    uint8 *data;
4575    FILE *f = fopen(filename, "rb");
4576    if (!f) return NULL;
4577    data = stbi_gif_load_from_file(f, x,y,comp,req_comp);
4578    fclose(f);
4579    return data;
4580 }
4581 
4582 stbi_uc *stbi_gif_load_from_file   (FILE *f, int *x, int *y, int *comp, int req_comp)
4583 {
4584    uint8 *u = 0;
4585    stbi s;
4586    stbi_gif g={0};
4587    start_file(&s, f);
4588 
4589    u = stbi_gif_load_next(&s, &g, comp, req_comp);
4590    if (u == (void *) 1) u = 0;  // end of animated gif marker
4591    if (u) {
4592       *x = g.w;
4593       *y = g.h;
4594    }
4595 
4596    return u;
4597 }
4598 #endif
4599 
4600 stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4601 {
4602    uint8 *u = 0;
4603    stbi s;
4604    stbi_gif g;
4605 
4606    memset(&g, 0, sizeof(g));
4607    start_mem(&s, buffer, len);
4608    u = stbi_gif_load_next(&s, &g, comp, req_comp);
4609    if (u == (void *) 1) u = 0;  // end of animated gif marker
4610    if (u) {
4611       *x = g.w;
4612       *y = g.h;
4613    }
4614    return u;
4615 }
4616 
4617 #ifndef STBI_NO_STDIO
4618 int      stbi_gif_info             (char const *filename,           int *x, int *y, int *comp)
4619 {
4620    int res;
4621    FILE *f = fopen(filename, "rb");
4622    if (!f) return 0;
4623    res = stbi_gif_info_from_file(f, x, y, comp);
4624    fclose(f);
4625    return res;
4626 }
4627 
4628 int stbi_gif_info_from_file(FILE *f, int *x, int *y, int *comp)
4629 {
4630    stbi s;
4631    int res;
4632    long n = ftell(f);
4633    start_file(&s, f);
4634    res = stbi_gif_info_raw(&s, x, y, comp);
4635    fseek(f, n, SEEK_SET);
4636    return res;
4637 }
4638 #endif // !STBI_NO_STDIO
4639 
4640 int stbi_gif_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
4641 {
4642    stbi s;
4643    start_mem(&s, buffer, len);
4644    return stbi_gif_info_raw(&s, x, y, comp);
4645 }
4646 
4647 
4648 
4649 
4650 // *************************************************************************************************
4651 // Radiance RGBE HDR loader
4652 // originally by Nicolas Schulz
4653 #ifndef STBI_NO_HDR
4654 static int hdr_test(stbi *s)
4655 {
4656    const char *signature = "#?RADIANCE\n";
4657    int i;
4658    for (i=0; signature[i]; ++i)
4659       if (get8(s) != signature[i])
4660          return 0;
4661    return 1;
4662 }
4663 
4664 int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
4665 {
4666    stbi s;
4667    start_mem(&s, buffer, len);
4668    return hdr_test(&s);
4669 }
4670 
4671 #ifndef STBI_NO_STDIO
4672 int stbi_hdr_test_file(FILE *f)
4673 {
4674    stbi s;
4675    int r,n = ftell(f);
4676    start_file(&s, f);
4677    r = hdr_test(&s);
4678    fseek(f,n,SEEK_SET);
4679    return r;
4680 }
4681 #endif
4682 
4683 #define HDR_BUFLEN  1024
4684 static char *hdr_gettoken(stbi *z, char *buffer)
4685 {
4686    int len=0;
4687    char c = '\0';
4688 
4689    c = (char) get8(z);
4690 
4691    while (!at_eof(z) && c != '\n') {
4692       buffer[len++] = c;
4693       if (len == HDR_BUFLEN-1) {
4694          // flush to end of line
4695          while (!at_eof(z) && get8(z) != '\n')
4696             ;
4697          break;
4698       }
4699       c = (char) get8(z);
4700    }
4701 
4702    buffer[len] = 0;
4703    return buffer;
4704 }
4705 
4706 static void hdr_convert(float *output, stbi_uc *input, int req_comp)
4707 {
4708    if ( input[3] != 0 ) {
4709       float f1;
4710       // Exponent
4711       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
4712       if (req_comp <= 2)
4713          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
4714       else {
4715          output[0] = input[0] * f1;
4716          output[1] = input[1] * f1;
4717          output[2] = input[2] * f1;
4718       }
4719       if (req_comp == 2) output[1] = 1;
4720       if (req_comp == 4) output[3] = 1;
4721    } else {
4722       switch (req_comp) {
4723          case 4: output[3] = 1; /* fallthrough */
4724          case 3: output[0] = output[1] = output[2] = 0;
4725                  break;
4726          case 2: output[1] = 1; /* fallthrough */
4727          case 1: output[0] = 0;
4728                  break;
4729       }
4730    }
4731 }
4732 
4733 
4734 static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4735 {
4736    char buffer[HDR_BUFLEN];
4737    char *token;
4738    int valid = 0;
4739    int width, height;
4740    stbi_uc *scanline;
4741    float *hdr_data;
4742    int len;
4743    unsigned char count, value;
4744    int i, j, k, c1,c2, z;
4745 
4746 
4747    // Check identifier
4748    if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
4749       return epf("not HDR", "Corrupt HDR image");
4750 
4751    // Parse header
4752    for(;;) {
4753       token = hdr_gettoken(s,buffer);
4754       if (token[0] == 0) break;
4755       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
4756    }
4757 
4758    if (!valid)    return epf("unsupported format", "Unsupported HDR format");
4759 
4760    // Parse width and height
4761    // can't use sscanf() if we're not using stdio!
4762    token = hdr_gettoken(s,buffer);
4763    if (strncmp(token, "-Y ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
4764    token += 3;
4765    height = strtol(token, &token, 10);
4766    while (*token == ' ') ++token;
4767    if (strncmp(token, "+X ", 3))  return epf("unsupported data layout", "Unsupported HDR format");
4768    token += 3;
4769    width = strtol(token, NULL, 10);
4770 
4771    *x = width;
4772    *y = height;
4773 
4774    *comp = 3;
4775    if (req_comp == 0) req_comp = 3;
4776 
4777    // Read data
4778    hdr_data = (float *) MALLOC(height * width * req_comp * sizeof(float));
4779 
4780    // Load image data
4781    // image data is stored as some number of sca
4782    if ( width < 8 || width >= 32768) {
4783       // Read flat data
4784       for (j=0; j < height; ++j) {
4785          for (i=0; i < width; ++i) {
4786             stbi_uc rgbe[4];
4787            main_decode_loop:
4788             getn(s, rgbe, 4);
4789             hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
4790          }
4791       }
4792    } else {
4793       // Read RLE-encoded data
4794       scanline = NULL;
4795 
4796       for (j = 0; j < height; ++j) {
4797          c1 = get8(s);
4798          c2 = get8(s);
4799          len = get8(s);
4800          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
4801             // not run-length encoded, so we have to actually use THIS data as a decoded
4802             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
4803             uint8 rgbe[4];
4804             rgbe[0] = (uint8) c1;
4805             rgbe[1] = (uint8) c2;
4806             rgbe[2] = (uint8) len;
4807             rgbe[3] = (uint8) get8u(s);
4808             hdr_convert(hdr_data, rgbe, req_comp);
4809             i = 1;
4810             j = 0;
4811             FREE(scanline);
4812             goto main_decode_loop; // yes, this makes no sense
4813          }
4814          len <<= 8;
4815          len |= get8(s);
4816          if (len != width) { FREE(hdr_data); FREE(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); }
4817          if (scanline == NULL) scanline = (stbi_uc *) MALLOC(width * 4);
4818 
4819          for (k = 0; k < 4; ++k) {
4820             i = 0;
4821             while (i < width) {
4822                count = get8u(s);
4823                if (count > 128) {
4824                   // Run
4825                   value = get8u(s);
4826                   count -= 128;
4827                   for (z = 0; z < count; ++z)
4828                      scanline[i++ * 4 + k] = value;
4829                } else {
4830                   // Dump
4831                   for (z = 0; z < count; ++z)
4832                      scanline[i++ * 4 + k] = get8u(s);
4833                }
4834             }
4835          }
4836          for (i=0; i < width; ++i)
4837             hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
4838       }
4839       FREE(scanline);
4840    }
4841 
4842    return hdr_data;
4843 }
4844 
4845 #ifndef STBI_NO_STDIO
4846 float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
4847 {
4848    stbi s;
4849    start_file(&s,f);
4850    return hdr_load(&s,x,y,comp,req_comp);
4851 }
4852 #endif
4853 
4854 float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4855 {
4856    stbi s;
4857    start_mem(&s,buffer, len);
4858    return hdr_load(&s,x,y,comp,req_comp);
4859 }
4860 
4861 #endif // STBI_NO_HDR
4862 
4863 
4864 #ifndef STBI_NO_STDIO
4865 int stbi_info(char const *filename, int *x, int *y, int *comp)
4866 {
4867     FILE *f = fopen(filename, "rb");
4868     int result;
4869     if (!f) return e("can't fopen", "Unable to open file");
4870     result = stbi_info_from_file(f, x, y, comp);
4871     fclose(f);
4872     return result;
4873 }
4874 
4875 int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
4876 {
4877    if (stbi_jpeg_info_from_file(f, x, y, comp))
4878        return 1;
4879    if (stbi_png_info_from_file(f, x, y, comp))
4880        return 1;
4881    if (stbi_gif_info_from_file(f, x, y, comp))
4882        return 1;
4883    // @TODO: stbi_bmp_info_from_file
4884    // @TODO: stbi_psd_info_from_file
4885    #ifndef STBI_NO_HDR
4886    // @TODO: stbi_hdr_info_from_file
4887    #endif
4888    // test tga last because it's a crappy test!
4889    if (stbi_tga_info_from_file(f, x, y, comp))
4890        return 1;
4891    return e("unknown image type", "Image not of any known type, or corrupt");
4892 }
4893 #endif // !STBI_NO_STDIO
4894 
4895 int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
4896 {
4897    if (stbi_jpeg_info_from_memory(buffer, len, x, y, comp))
4898        return 1;
4899    if (stbi_png_info_from_memory(buffer, len, x, y, comp))
4900        return 1;
4901    if (stbi_gif_info_from_memory(buffer, len, x, y, comp))
4902        return 1;
4903    // @TODO: stbi_bmp_info_from_memory
4904    // @TODO: stbi_psd_info_from_memory
4905    #ifndef STBI_NO_HDR
4906    // @TODO: stbi_hdr_info_from_memory
4907    #endif
4908    // test tga last because it's a crappy test!
4909    if (stbi_tga_info_from_memory(buffer, len, x, y, comp))
4910        return 1;
4911    return e("unknown image type", "Image not of any known type, or corrupt");
4912 }
4913 
4914 #endif // STBI_HEADER_FILE_ONLY
4915 
4916 /*
4917    revision history:
4918       1.29 (2010-08-16) various warning fixes from Aurelien Pocheville
4919       1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ)
4920       1.27 (2010-08-01)
4921              cast-to-uint8 to fix warnings
4922       1.26 (2010-07-24)
4923              fix bug in file buffering for PNG reported by SpartanJ
4924       1.25 (2010-07-17)
4925              refix trans_data warning (Won Chun)
4926       1.24 (2010-07-12)
4927              perf improvements reading from files on platforms with lock-heavy fgetc()
4928              minor perf improvements for jpeg
4929              deprecated type-specific functions so we'll get feedback if they're needed
4930              attempt to fix trans_data warning (Won Chun)
4931       1.23   fixed bug in iPhone support
4932       1.22 (2010-07-10)
4933              removed image *writing* support
4934              removed image *writing* support
4935              stbi_info support from Jetro Lauha
4936              GIF support from Jean-Marc Lienher
4937              iPhone PNG-extensions from James Brown
4938              warning-fixes from Nicolas Schulz and Janez Zemva (i.e. Janez (U+017D)emva)
4939       1.21   fix use of 'uint8' in header (reported by jon blow)
4940       1.20   added support for Softimage PIC, by Tom Seddon
4941       1.19   bug in interlaced PNG corruption check (found by ryg)
4942       1.18 2008-08-02
4943              fix a threading bug (local mutable static)
4944       1.17   support interlaced PNG
4945       1.16   major bugfix - convert_format converted one too many pixels
4946       1.15   initialize some fields for thread safety
4947       1.14   fix threadsafe conversion bug
4948              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
4949       1.13   threadsafe
4950       1.12   const qualifiers in the API
4951       1.11   Support installable IDCT, colorspace conversion routines
4952       1.10   Fixes for 64-bit (don't use "unsigned long")
4953              optimized upsampling by Fabian "ryg" Giesen
4954       1.09   Fix format-conversion for PSD code (bad global variables!)
4955       1.08   Thatcher Ulrich's PSD code integrated by Nicolas Schulz
4956       1.07   attempt to fix C++ warning/errors again
4957       1.06   attempt to fix C++ warning/errors again
4958       1.05   fix TGA loading to return correct *comp and use good luminance calc
4959       1.04   default float alpha is 1, not 255; use 'void *' for stbi_image_free
4960       1.03   bugfixes to STBI_NO_STDIO, STBI_NO_HDR
4961       1.02   support for (subset of) HDR files, float interface for preferred access to them
4962       1.01   fix bug: possible bug in handling right-side up bmps... not sure
4963              fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
4964       1.00   interface to zlib that skips zlib header
4965       0.99   correct handling of alpha in palette
4966       0.98   TGA loader by lonesock; dynamically add loaders (untested)
4967       0.97   jpeg errors on too large a file; also catch another malloc failure
4968       0.96   fix detection of invalid v value - particleman@mollyrocket forum
4969       0.95   during header scan, seek to markers in case of padding
4970       0.94   STBI_NO_STDIO to disable stdio usage; rename all #defines the same
4971       0.93   handle jpegtran output; verbose errors
4972       0.92   read 4,8,16,24,32-bit BMP files of several formats
4973       0.91   output 24-bit Windows 3.0 BMP files
4974       0.90   fix a few more warnings; bump version number to approach 1.0
4975       0.61   bugfixes due to Marc LeBlanc, Christopher Lloyd
4976       0.60   fix compiling as c++
4977       0.59   fix warnings: merge Dave Moore's -Wall fixes
4978       0.58   fix bug: zlib uncompressed mode len/nlen was wrong endian
4979       0.57   fix bug: jpg last huffman symbol before marker was >9 bits but less
4980                       than 16 available
4981       0.56   fix bug: zlib uncompressed mode len vs. nlen
4982       0.55   fix bug: restart_interval not initialized to 0
4983       0.54   allow NULL for 'int *comp'
4984       0.53   fix bug in png 3->4; speedup png decoding
4985       0.52   png handles req_comp=3,4 directly; minor cleanup; jpeg comments
4986       0.51   obey req_comp requests, 1-component jpegs return as 1-component,
4987              on 'test' only check type, not whether we support this variant
4988 */
4989