xref: /netbsd-src/external/gpl3/binutils.old/dist/gprofng/src/DbeJarFile.cc (revision c42dbd0ed2e61fe6eda8590caa852ccf34719964)
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2    Contributed by Oracle.
3 
4    This file is part of GNU Binutils.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, 51 Franklin Street - Fifth Floor, Boston,
19    MA 02110-1301, USA.  */
20 
21 #include "config.h"
22 #include <sys/types.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <errno.h>
26 
27 #include "zlib.h"
28 #include "util.h"
29 #include "DbeJarFile.h"
30 #include "Data_window.h"
31 #include "vec.h"
32 
33 static uint32_t
get_u1(unsigned char * b)34 get_u1 (unsigned char *b)
35 {
36   return (uint32_t) ((b)[0]);
37 }
38 
39 static uint32_t
get_u2(unsigned char * b)40 get_u2 (unsigned char *b)
41 {
42   return (get_u1 (b + 1) << 8) | get_u1 (b);
43 }
44 
45 static uint32_t
get_u4(unsigned char * b)46 get_u4 (unsigned char *b)
47 {
48   return (get_u2 (b + 2) << 16) | get_u2 (b);
49 }
50 
51 static uint64_t
get_u8(unsigned char * b)52 get_u8 (unsigned char *b)
53 {
54   return (((uint64_t) get_u4 (b + 4)) << 32) | get_u4 (b);
55 }
56 
57 enum
58 {
59   END_CENT_DIR_SIZE     = 22,
60   LOC_FILE_HEADER_SIZE  = 30,
61   CENT_FILE_HEADER_SIZE = 46,
62   ZIP64_LOCATOR_SIZE    = 20,
63   ZIP64_CENT_DIR_SIZE   = 56,
64   ZIP_BUF_SIZE          = 65536
65 };
66 
67 struct EndCentDir
68 {
69   uint64_t count;
70   uint64_t size;
71   uint64_t offset;
72 };
73 
74 class ZipEntry
75 {
76 public:
77 
ZipEntry()78   ZipEntry ()
79   {
80     name = NULL;
81     data_offset = 0;
82   }
83 
~ZipEntry()84   ~ZipEntry ()
85   {
86     free (name);
87   }
88 
89   int
compare(ZipEntry * ze)90   compare (ZipEntry *ze)
91   {
92     return dbe_strcmp (name, ze->name);
93   }
94 
95   char *name;       // entry name
96   int time;         // modification time
97   int64_t size;     // size of uncompressed data
98   int64_t csize;    // size of compressed data (zero if uncompressed)
99   uint32_t compressionMethod;
100   int64_t offset;   // offset of LOC header
101   int64_t data_offset;
102 };
103 
104 static int
cmp_names(const void * a,const void * b)105 cmp_names (const void *a, const void *b)
106 {
107   ZipEntry *e1 = *((ZipEntry **) a);
108   ZipEntry *e2 = *((ZipEntry **) b);
109   return e1->compare (e2);
110 }
111 
dump(const char * msg)112 template<> void Vector<ZipEntry *>::dump (const char *msg)
113 {
114   Dprintf (1, NTXT ("Vector<ZipEntry *> %s  [%lld]\n"), msg ? msg : NTXT (""), (long long) size ());
115   for (long i = 0, sz = size (); i < sz; i++)
116     {
117       ZipEntry *ze = get (i);
118       Dprintf (1, NTXT ("  %lld offset:%lld (0x%llx) size: %lld --> %lld %s\n"),
119 	       (long long) i, (long long) ze->offset, (long long) ze->offset,
120 	       (long long) ze->csize, (long long) ze->size, STR (ze->name));
121     }
122 }
123 
DbeJarFile(const char * jarName)124 DbeJarFile::DbeJarFile (const char *jarName)
125 {
126   name = strdup (jarName);
127   fnames = NULL;
128   dwin = new Data_window (name);
129   get_entries ();
130 }
131 
~DbeJarFile()132 DbeJarFile::~DbeJarFile ()
133 {
134   free (name);
135   delete fnames;
136 }
137 
138 void
get_entries()139 DbeJarFile::get_entries ()
140 {
141   Dprintf (DUMP_JAR_FILE, NTXT ("\nArchive: %s\n"), STR (name));
142   if (dwin->not_opened ())
143     {
144       append_msg (CMSG_ERROR, GTXT ("Cannot open file `%s'"), name);
145       return;
146     }
147   struct EndCentDir endCentDir;
148   if (get_EndCentDir (&endCentDir) == 0)
149     return;
150 
151   if (endCentDir.count == 0)
152     {
153       append_msg (CMSG_WARN, GTXT ("No files in %s"), name);
154       return;
155     }
156   unsigned char *b = (unsigned char *) dwin->bind (endCentDir.offset, endCentDir.size);
157   if (b == NULL)
158     {
159       append_msg (CMSG_ERROR, GTXT ("%s: cannot read the central directory record"), name);
160       return;
161     }
162 
163   fnames = new Vector<ZipEntry*>(endCentDir.count);
164   for (uint64_t i = 0, offset = endCentDir.offset, last = endCentDir.offset + endCentDir.size; i < endCentDir.count; i++)
165     {
166       if ((last - offset) < CENT_FILE_HEADER_SIZE)
167 	{
168 	  append_msg (CMSG_ERROR, GTXT ("%s: cannot read the central file header (%lld (from %lld), offset=0x%016llx last=0x%016llx"),
169 		      name, (long long) i, (long long) endCentDir.count, (long long) offset, (long long) last);
170 	  break;
171 	}
172       b = (unsigned char *) dwin->bind (offset, CENT_FILE_HEADER_SIZE);
173       //  Central file header
174       //  Offset Bytes    Description
175       //     0     4   central file header signature = 0x02014b50
176       //     4     2   version made by
177       //     6     2   version needed to extract
178       //     8     2   general purpose bit flag
179       //    10     2   compression method
180       //    12     2   last mod file time
181       //    14     2   last mod file date
182       //    16     4   crc-32
183       //    20     4   compressed size
184       //    24     4   uncompressed size
185       //    28     2   file name length
186       //    30     2   extra field length
187       //    32     2   file comment length
188       //    34     2   disk number start
189       //    36     2   internal file attributes
190       //    38     4   external file attributes
191       //    42     4   relative offset of local header
192       //    46         file name (variable size)
193       //               extra field (variable size)
194       //               file comment (variable size)
195       uint32_t signature = get_u4 (b);
196       if (signature != 0x02014b50)
197 	{
198 	  append_msg (CMSG_ERROR, GTXT ("%s: wrong header signature (%lld (total %lld), offset=0x%016llx last=0x%016llx"),
199 		      name, (long long) i, (long long) endCentDir.count, (long long) offset, (long long) last);
200 	  break;
201 	}
202       ZipEntry *ze = new ZipEntry ();
203       fnames->append (ze);
204       uint32_t name_len = get_u2 (b + 28);
205       uint32_t extra_len = get_u2 (b + 30);
206       uint32_t comment_len = get_u2 (b + 32);
207       ze->compressionMethod = get_u2 (b + 10);
208       ze->csize = get_u4 (b + 20);
209       ze->size = get_u4 (b + 24);
210       ze->offset = get_u4 (b + 42);
211       char *nm = (char *) dwin->bind (offset + 46, name_len);
212       if (nm)
213 	{
214 	  ze->name = (char *) malloc (name_len + 1);
215 	  strncpy (ze->name, nm, name_len);
216 	  ze->name[name_len] = 0;
217 	}
218       offset += CENT_FILE_HEADER_SIZE + name_len + extra_len + comment_len;
219     }
220   fnames->sort (cmp_names);
221   if (DUMP_JAR_FILE)
222     fnames->dump (get_basename (name));
223 }
224 
225 int
get_entry(const char * fname)226 DbeJarFile::get_entry (const char *fname)
227 {
228   if (fnames == NULL)
229     return -1;
230   ZipEntry zipEntry, *ze = &zipEntry;
231   ze->name = (char *) fname;
232   int ind = fnames->bisearch (0, -1, &ze, cmp_names);
233   ze->name = NULL;
234   return ind;
235 }
236 
237 long long
copy(char * toFileNname,int fromEntryNum)238 DbeJarFile::copy (char *toFileNname, int fromEntryNum)
239 {
240   if (fromEntryNum < 0 || fromEntryNum >= VecSize (fnames))
241     return -1;
242   ZipEntry *ze = fnames->get (fromEntryNum);
243   if (ze->data_offset == 0)
244     {
245       //  Local file header
246       //  Offset Bytes    Description
247       //     0     4   local file header signature = 0x04034b50
248       //     4     2   version needed to extract
249       //     6     2   general purpose bit flag
250       //     8     2   compression method
251       //    10     2   last mod file time
252       //    12     2   last mod file date
253       //    14     4   crc-32
254       //    18     4   compressed size
255       //    22     4   uncompressed size
256       //    26     2   file name length
257       //    28     2   extra field length
258       //    30     2   file name (variable size)
259       //               extra field (variable size)
260       unsigned char *b = (unsigned char *) dwin->bind (ze->offset, LOC_FILE_HEADER_SIZE);
261       if (b == NULL)
262 	{
263 	  append_msg (CMSG_ERROR,
264 		 GTXT ("%s: Cannot read a local file header (%s offset=0x%lld"),
265 		 name, STR (ze->name), (long long) ze->offset);
266 	  return -1;
267 	}
268       uint32_t signature = get_u4 (b);
269       if (signature != 0x04034b50)
270 	{
271 	  append_msg (CMSG_ERROR,
272 		      GTXT ("%s: wrong local header signature ('%s' offset=%lld (0x%llx)"),
273 		      name, STR (ze->name), (long long) ze->offset,
274 		      (long long) ze->offset);
275 	  return -1;
276 	}
277       ze->data_offset = ze->offset + LOC_FILE_HEADER_SIZE + get_u2 (b + 26) + get_u2 (b + 28);
278     }
279 
280   if (ze->compressionMethod == 0)
281     {
282       int fd = open (toFileNname, O_CREAT | O_WRONLY | O_LARGEFILE, 0644);
283       if (fd == -1)
284 	{
285 	  append_msg (CMSG_ERROR, GTXT ("Cannot create file %s (%s)"), toFileNname, STR (strerror (errno)));
286 	  return -1;
287 	}
288       long long len = dwin->copy_to_file (fd, ze->data_offset, ze->size);
289       close (fd);
290       if (len != ze->size)
291 	{
292 	  append_msg (CMSG_ERROR, GTXT ("%s: Cannot write %lld bytes (only %lld)"),
293 		      toFileNname, (long long) ze->size, (long long) len);
294 	  unlink (toFileNname);
295 	  return -1;
296 	}
297       return len;
298     }
299 
300   unsigned char *b = (unsigned char *) dwin->bind (ze->data_offset, ze->csize);
301   if (b == NULL)
302     {
303       append_msg (CMSG_ERROR,
304 		  GTXT ("%s: Cannot extract file %s (offset=0x%lld csize=%lld)"),
305 		  name, STR (ze->name), (long long) ze->offset,
306 		  (long long) ze->csize);
307       return -1;
308     }
309   z_stream strm;
310   strm.zalloc = Z_NULL;
311   strm.zfree = Z_NULL;
312   strm.opaque = Z_NULL;
313   strm.next_in = Z_NULL;
314   strm.avail_in = 0;
315   if (inflateInit2 (&strm, -MAX_WBITS) != Z_OK)
316     {
317       append_msg (CMSG_ERROR, GTXT ("%s: inflateInit2 failed (%s)"), STR (ze->name), STR (strm.msg));
318       return -1;
319     }
320   strm.avail_in = ze->csize;
321   strm.next_in = b;
322   int retval = ze->size;
323   unsigned char *buf = (unsigned char *) malloc (ze->size);
324   for (;;)
325     {
326       strm.next_out = buf;
327       strm.avail_out = ze->size;
328       int ret = inflate (&strm, Z_SYNC_FLUSH);
329       if ((ret == Z_NEED_DICT) || (ret == Z_DATA_ERROR) || (ret == Z_MEM_ERROR) || (ret == Z_STREAM_ERROR))
330 	{
331 	  append_msg (CMSG_ERROR, GTXT ("%s: inflate('%s') error %d (%s)"), name, STR (ze->name), ret, STR (strm.msg));
332 	  retval = -1;
333 	  break;
334 	}
335       if (strm.avail_out != 0)
336 	break;
337     }
338   inflateEnd (&strm);
339   if (retval != -1)
340     {
341       int fd = open (toFileNname, O_CREAT | O_WRONLY | O_LARGEFILE, 0644);
342       if (fd == -1)
343 	{
344 	  append_msg (CMSG_ERROR, GTXT ("Cannot create file %s (%s)"), toFileNname, STR (strerror (errno)));
345 	  retval = -1;
346 	}
347       else
348 	{
349 	  long long len = write (fd, buf, ze->size);
350 	  if (len != ze->size)
351 	    {
352 	      append_msg (CMSG_ERROR, GTXT ("%s: Cannot write %lld bytes (only %lld)"),
353 			  toFileNname, (long long) strm.avail_out, (long long) len);
354 	      retval = -1;
355 	    }
356 	  close (fd);
357 	}
358     }
359   free (buf);
360   return retval;
361 }
362 
363 int
get_EndCentDir(struct EndCentDir * endCentDir)364 DbeJarFile::get_EndCentDir (struct EndCentDir *endCentDir)
365 {
366   int64_t fsize = dwin->get_fsize ();
367   int64_t sz = (fsize < ZIP_BUF_SIZE) ? fsize : ZIP_BUF_SIZE;
368 
369   // Find the end of central directory record:
370   unsigned char *b = (unsigned char *) dwin->bind (fsize - sz, sz);
371   if (b == NULL)
372     {
373       append_msg (CMSG_ERROR, GTXT ("%s: cannot find the central directory record (fsize=%lld)"),
374 		  name, (long long) fsize);
375       return 0;
376     }
377 
378   //  End of central directory record:
379   //  Offset Bytes    Description
380   //     0     4    end of central directory signature = 0x06054b50
381   //     4     2    number of this disk
382   //     6     2    disk where central directory starts
383   //     8     2    number of central directory records on this disk
384   //    10     2    total number of central directory records
385   //    12     4    size of central directory(bytes)
386   //    16     4    offset of start of central directory, relative to start of archive
387   //    20     2    comment length(n)
388   //    22     n    comment
389 
390   endCentDir->count = 0;
391   endCentDir->size = 0;
392   endCentDir->offset = 0;
393   int64_t ecdrOffset = fsize;
394   for (int64_t i = END_CENT_DIR_SIZE; i < sz; i++)
395     {
396       b = (unsigned char *) dwin->bind (fsize - i, END_CENT_DIR_SIZE);
397       if (b == NULL)
398 	{
399 	  append_msg (CMSG_ERROR, GTXT ("%s: read failed (offset:0x%llx  bytes:%lld"),
400 		      name, (long long) (fsize - i), (long long) END_CENT_DIR_SIZE);
401 	  break;
402 	}
403       uint32_t signature = get_u4 (b);
404       if (signature == 0x06054b50)
405 	{
406 	  int64_t len_comment = get_u2 (b + 20);
407 	  if (i != (len_comment + END_CENT_DIR_SIZE))
408 	    continue;
409 	  ecdrOffset = fsize - i;
410 	  endCentDir->count = get_u2 (b + 10);
411 	  endCentDir->size = get_u4 (b + 12);
412 	  endCentDir->offset = get_u4 (b + 16);
413 	  Dprintf (DUMP_JAR_FILE,
414 		   "  Zip archive file size:              %10lld (0x%016llx)\n"
415 		   "  end-cent-dir record offset:         %10lld (0x%016llx)\n"
416 		   "  cent-dir offset:                    %10lld (0x%016llx)\n"
417 		   "  cent-dir size:                      %10lld (0x%016llx)\n"
418 		   "  cent-dir entries:                   %10lld\n",
419 		   (long long) fsize, (long long) fsize,
420 		   (long long) ecdrOffset, (long long) ecdrOffset,
421 		   (long long) endCentDir->offset, (long long) endCentDir->offset,
422 		   (long long) endCentDir->size, (long long) endCentDir->size,
423 		   (long long) endCentDir->count);
424 	  break;
425 	}
426     }
427   if (ecdrOffset == fsize)
428     {
429       append_msg (CMSG_ERROR,
430 		  GTXT ("%s: cannot find the central directory record"), name);
431       return 0;
432     }
433   if (endCentDir->count == 0xffff || endCentDir->offset == 0xffffffff
434       || endCentDir->size == 0xffffffff)
435     {
436       // Zip64 format:
437       //      Zip64 end of central directory record
438       //      Zip64 end of central directory locator  ( Can be absent )
439       //      End of central directory record
440       b = (unsigned char *) dwin->bind (ecdrOffset - ZIP64_LOCATOR_SIZE,
441 					ZIP64_LOCATOR_SIZE);
442       if (b == NULL)
443 	{
444 	  append_msg (CMSG_ERROR,
445 	     GTXT ("%s: cannot find the Zip64 central directory record"), name);
446 	  return 0;
447 	}
448       uint32_t signature = get_u4 (b);
449       if (signature == 0x07064b50)
450 	{ // Get an offset from the Zip64 cent-dir locator
451 	  //  Zip64 end of central directory locator
452 	  //  Offset Bytes    Description
453 	  //     0     4    Zip64 end of central dir locator signature = 0x07064b50
454 	  //     4     4    number of the disk with the start of the zip64 end of central directory
455 	  //     8     8    relative offset of the Zip64 end of central directory record
456 	  //    12     4    total number of disks
457 	  Dprintf (DUMP_JAR_FILE, "    cent-dir locator offset           %10lld (0x%016llx)\n",
458 		   (long long) (ecdrOffset - ZIP64_LOCATOR_SIZE), (long long) (ecdrOffset - ZIP64_LOCATOR_SIZE));
459 	  ecdrOffset = get_u8 (b + 8);
460 	}
461       else   // the Zip64 end of central directory locator is absent
462 	ecdrOffset -= ZIP64_CENT_DIR_SIZE;
463       Dprintf (DUMP_JAR_FILE, NTXT ("  Zip64 end-cent-dir record offset:   %10lld (0x%016llx)\n"),
464 	       (long long) ecdrOffset, (long long) ecdrOffset);
465 
466       b = (unsigned char *) dwin->bind (ecdrOffset, ZIP64_CENT_DIR_SIZE);
467       if (b == NULL)
468 	{
469 	  append_msg (CMSG_ERROR,
470 	     GTXT ("%s: cannot find the Zip64 central directory record"), name);
471 	  return 0;
472 	}
473       //  Zip64 end of central directory record
474       //  Offset Bytes    Description
475       //     0     4    Zip64 end of central dir signature = 0x06064b50
476       //     4     8    size of zip64 end of central directory record
477       //    12     2    version made by
478       //    14     2    version needed to extract
479       //    16     4    number of this disk
480       //    20     4    number of the disk with the start of the central directory
481       //    24     8    total number of entries in the central directory on this disk
482       //    32     8    total number of entries in the central directory
483       //    40     8    size of the central directory
484       //    48     8    offset of start of centraldirectory with respect to the starting disk number
485       //    56          Zip64 extensible data sector (variable size)
486       signature = get_u4 (b);
487       if (signature != 0x06064b50)
488 	{
489 	  append_msg (CMSG_ERROR, GTXT ("%s: cannot find the Zip64 central directory record"), name);
490 	  return 0;
491 	}
492       endCentDir->count = get_u8 (b + 32);
493       endCentDir->size = get_u8 (b + 40);
494       endCentDir->offset = get_u8 (b + 48);
495       Dprintf (DUMP_JAR_FILE,
496 	       NTXT ("  cent-dir offset:                    %10lld (0x%016llx)\n"
497 		     "  cent-dir size:                      %10lld (0x%016llx)\n"
498 		     "  cent-dir entries:                   %10lld\n"),
499 	       (long long) endCentDir->offset, (long long) endCentDir->offset,
500 	       (long long) endCentDir->size, (long long) endCentDir->size,
501 	       (long long) endCentDir->count);
502     }
503   return 1;
504 }
505 
506