xref: /llvm-project/lldb/source/Utility/ZipFile.cpp (revision 12dee9d3cd762d9754e2adadffa13c1cce85cf07)
1*12dee9d3SKazuki Sakamoto //===-- ZipFile.cpp -------------------------------------------------------===//
2*12dee9d3SKazuki Sakamoto //
3*12dee9d3SKazuki Sakamoto // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*12dee9d3SKazuki Sakamoto // See https://llvm.org/LICENSE.txt for license information.
5*12dee9d3SKazuki Sakamoto // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*12dee9d3SKazuki Sakamoto //
7*12dee9d3SKazuki Sakamoto //===----------------------------------------------------------------------===//
8*12dee9d3SKazuki Sakamoto 
9*12dee9d3SKazuki Sakamoto #include "lldb/Utility/ZipFile.h"
10*12dee9d3SKazuki Sakamoto #include "lldb/Utility/DataBuffer.h"
11*12dee9d3SKazuki Sakamoto #include "lldb/Utility/FileSpec.h"
12*12dee9d3SKazuki Sakamoto #include "llvm/Support/Endian.h"
13*12dee9d3SKazuki Sakamoto 
14*12dee9d3SKazuki Sakamoto using namespace lldb_private;
15*12dee9d3SKazuki Sakamoto using namespace llvm::support;
16*12dee9d3SKazuki Sakamoto 
17*12dee9d3SKazuki Sakamoto namespace {
18*12dee9d3SKazuki Sakamoto 
19*12dee9d3SKazuki Sakamoto // Zip headers.
20*12dee9d3SKazuki Sakamoto // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
21*12dee9d3SKazuki Sakamoto 
22*12dee9d3SKazuki Sakamoto // The end of central directory record.
23*12dee9d3SKazuki Sakamoto struct EocdRecord {
24*12dee9d3SKazuki Sakamoto   static constexpr char kSignature[] = {0x50, 0x4b, 0x05, 0x06};
25*12dee9d3SKazuki Sakamoto   char signature[sizeof(kSignature)];
26*12dee9d3SKazuki Sakamoto   unaligned_uint16_t disks;
27*12dee9d3SKazuki Sakamoto   unaligned_uint16_t cd_start_disk;
28*12dee9d3SKazuki Sakamoto   unaligned_uint16_t cds_on_this_disk;
29*12dee9d3SKazuki Sakamoto   unaligned_uint16_t cd_records;
30*12dee9d3SKazuki Sakamoto   unaligned_uint32_t cd_size;
31*12dee9d3SKazuki Sakamoto   unaligned_uint32_t cd_offset;
32*12dee9d3SKazuki Sakamoto   unaligned_uint16_t comment_length;
33*12dee9d3SKazuki Sakamoto };
34*12dee9d3SKazuki Sakamoto 
35*12dee9d3SKazuki Sakamoto // Logical find limit for the end of central directory record.
36*12dee9d3SKazuki Sakamoto const size_t kEocdRecordFindLimit =
37*12dee9d3SKazuki Sakamoto     sizeof(EocdRecord) +
38*12dee9d3SKazuki Sakamoto     std::numeric_limits<decltype(EocdRecord::comment_length)>::max();
39*12dee9d3SKazuki Sakamoto 
40*12dee9d3SKazuki Sakamoto // Central directory record.
41*12dee9d3SKazuki Sakamoto struct CdRecord {
42*12dee9d3SKazuki Sakamoto   static constexpr char kSignature[] = {0x50, 0x4b, 0x01, 0x02};
43*12dee9d3SKazuki Sakamoto   char signature[sizeof(kSignature)];
44*12dee9d3SKazuki Sakamoto   unaligned_uint16_t version_made_by;
45*12dee9d3SKazuki Sakamoto   unaligned_uint16_t version_needed_to_extract;
46*12dee9d3SKazuki Sakamoto   unaligned_uint16_t general_purpose_bit_flag;
47*12dee9d3SKazuki Sakamoto   unaligned_uint16_t compression_method;
48*12dee9d3SKazuki Sakamoto   unaligned_uint16_t last_modification_time;
49*12dee9d3SKazuki Sakamoto   unaligned_uint16_t last_modification_date;
50*12dee9d3SKazuki Sakamoto   unaligned_uint32_t crc32;
51*12dee9d3SKazuki Sakamoto   unaligned_uint32_t compressed_size;
52*12dee9d3SKazuki Sakamoto   unaligned_uint32_t uncompressed_size;
53*12dee9d3SKazuki Sakamoto   unaligned_uint16_t file_name_length;
54*12dee9d3SKazuki Sakamoto   unaligned_uint16_t extra_field_length;
55*12dee9d3SKazuki Sakamoto   unaligned_uint16_t comment_length;
56*12dee9d3SKazuki Sakamoto   unaligned_uint16_t file_start_disk;
57*12dee9d3SKazuki Sakamoto   unaligned_uint16_t internal_file_attributes;
58*12dee9d3SKazuki Sakamoto   unaligned_uint32_t external_file_attributes;
59*12dee9d3SKazuki Sakamoto   unaligned_uint32_t local_file_header_offset;
60*12dee9d3SKazuki Sakamoto };
61*12dee9d3SKazuki Sakamoto // Immediately after CdRecord,
62*12dee9d3SKazuki Sakamoto // - file name (file_name_length)
63*12dee9d3SKazuki Sakamoto // - extra field (extra_field_length)
64*12dee9d3SKazuki Sakamoto // - comment (comment_length)
65*12dee9d3SKazuki Sakamoto 
66*12dee9d3SKazuki Sakamoto // Local file header.
67*12dee9d3SKazuki Sakamoto struct LocalFileHeader {
68*12dee9d3SKazuki Sakamoto   static constexpr char kSignature[] = {0x50, 0x4b, 0x03, 0x04};
69*12dee9d3SKazuki Sakamoto   char signature[sizeof(kSignature)];
70*12dee9d3SKazuki Sakamoto   unaligned_uint16_t version_needed_to_extract;
71*12dee9d3SKazuki Sakamoto   unaligned_uint16_t general_purpose_bit_flag;
72*12dee9d3SKazuki Sakamoto   unaligned_uint16_t compression_method;
73*12dee9d3SKazuki Sakamoto   unaligned_uint16_t last_modification_time;
74*12dee9d3SKazuki Sakamoto   unaligned_uint16_t last_modification_date;
75*12dee9d3SKazuki Sakamoto   unaligned_uint32_t crc32;
76*12dee9d3SKazuki Sakamoto   unaligned_uint32_t compressed_size;
77*12dee9d3SKazuki Sakamoto   unaligned_uint32_t uncompressed_size;
78*12dee9d3SKazuki Sakamoto   unaligned_uint16_t file_name_length;
79*12dee9d3SKazuki Sakamoto   unaligned_uint16_t extra_field_length;
80*12dee9d3SKazuki Sakamoto };
81*12dee9d3SKazuki Sakamoto // Immediately after LocalFileHeader,
82*12dee9d3SKazuki Sakamoto // - file name (file_name_length)
83*12dee9d3SKazuki Sakamoto // - extra field (extra_field_length)
84*12dee9d3SKazuki Sakamoto // - file data (should be compressed_size == uncompressed_size, page aligned)
85*12dee9d3SKazuki Sakamoto 
FindEocdRecord(lldb::DataBufferSP zip_data)86*12dee9d3SKazuki Sakamoto const EocdRecord *FindEocdRecord(lldb::DataBufferSP zip_data) {
87*12dee9d3SKazuki Sakamoto   // Find backward the end of central directory record from the end of the zip
88*12dee9d3SKazuki Sakamoto   // file to the find limit.
89*12dee9d3SKazuki Sakamoto   const uint8_t *zip_data_end = zip_data->GetBytes() + zip_data->GetByteSize();
90*12dee9d3SKazuki Sakamoto   const uint8_t *find_limit = zip_data_end - kEocdRecordFindLimit;
91*12dee9d3SKazuki Sakamoto   const uint8_t *p = zip_data_end - sizeof(EocdRecord);
92*12dee9d3SKazuki Sakamoto   for (; p >= zip_data->GetBytes() && p >= find_limit; p--) {
93*12dee9d3SKazuki Sakamoto     auto eocd = reinterpret_cast<const EocdRecord *>(p);
94*12dee9d3SKazuki Sakamoto     if (::memcmp(eocd->signature, EocdRecord::kSignature,
95*12dee9d3SKazuki Sakamoto                  sizeof(EocdRecord::kSignature)) == 0) {
96*12dee9d3SKazuki Sakamoto       // Found the end of central directory. Sanity check the values.
97*12dee9d3SKazuki Sakamoto       if (eocd->cd_records * sizeof(CdRecord) > eocd->cd_size ||
98*12dee9d3SKazuki Sakamoto           zip_data->GetBytes() + eocd->cd_offset + eocd->cd_size > p)
99*12dee9d3SKazuki Sakamoto         return nullptr;
100*12dee9d3SKazuki Sakamoto 
101*12dee9d3SKazuki Sakamoto       // This is a valid end of central directory record.
102*12dee9d3SKazuki Sakamoto       return eocd;
103*12dee9d3SKazuki Sakamoto     }
104*12dee9d3SKazuki Sakamoto   }
105*12dee9d3SKazuki Sakamoto   return nullptr;
106*12dee9d3SKazuki Sakamoto }
107*12dee9d3SKazuki Sakamoto 
GetFile(lldb::DataBufferSP zip_data,uint32_t local_file_header_offset,lldb::offset_t & file_offset,lldb::offset_t & file_size)108*12dee9d3SKazuki Sakamoto bool GetFile(lldb::DataBufferSP zip_data, uint32_t local_file_header_offset,
109*12dee9d3SKazuki Sakamoto              lldb::offset_t &file_offset, lldb::offset_t &file_size) {
110*12dee9d3SKazuki Sakamoto   auto local_file_header = reinterpret_cast<const LocalFileHeader *>(
111*12dee9d3SKazuki Sakamoto       zip_data->GetBytes() + local_file_header_offset);
112*12dee9d3SKazuki Sakamoto   // The signature should match.
113*12dee9d3SKazuki Sakamoto   if (::memcmp(local_file_header->signature, LocalFileHeader::kSignature,
114*12dee9d3SKazuki Sakamoto                sizeof(LocalFileHeader::kSignature)) != 0)
115*12dee9d3SKazuki Sakamoto     return false;
116*12dee9d3SKazuki Sakamoto 
117*12dee9d3SKazuki Sakamoto   auto file_data = reinterpret_cast<const uint8_t *>(local_file_header + 1) +
118*12dee9d3SKazuki Sakamoto                    local_file_header->file_name_length +
119*12dee9d3SKazuki Sakamoto                    local_file_header->extra_field_length;
120*12dee9d3SKazuki Sakamoto   // File should be uncompressed.
121*12dee9d3SKazuki Sakamoto   if (local_file_header->compressed_size !=
122*12dee9d3SKazuki Sakamoto       local_file_header->uncompressed_size)
123*12dee9d3SKazuki Sakamoto     return false;
124*12dee9d3SKazuki Sakamoto 
125*12dee9d3SKazuki Sakamoto   // This file is valid. Return the file offset and size.
126*12dee9d3SKazuki Sakamoto   file_offset = file_data - zip_data->GetBytes();
127*12dee9d3SKazuki Sakamoto   file_size = local_file_header->uncompressed_size;
128*12dee9d3SKazuki Sakamoto   return true;
129*12dee9d3SKazuki Sakamoto }
130*12dee9d3SKazuki Sakamoto 
FindFile(lldb::DataBufferSP zip_data,const EocdRecord * eocd,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)131*12dee9d3SKazuki Sakamoto bool FindFile(lldb::DataBufferSP zip_data, const EocdRecord *eocd,
132*12dee9d3SKazuki Sakamoto               const llvm::StringRef file_path, lldb::offset_t &file_offset,
133*12dee9d3SKazuki Sakamoto               lldb::offset_t &file_size) {
134*12dee9d3SKazuki Sakamoto   // Find the file from the central directory records.
135*12dee9d3SKazuki Sakamoto   auto cd = reinterpret_cast<const CdRecord *>(zip_data->GetBytes() +
136*12dee9d3SKazuki Sakamoto                                                eocd->cd_offset);
137*12dee9d3SKazuki Sakamoto   size_t cd_records = eocd->cd_records;
138*12dee9d3SKazuki Sakamoto   for (size_t i = 0; i < cd_records; i++) {
139*12dee9d3SKazuki Sakamoto     // The signature should match.
140*12dee9d3SKazuki Sakamoto     if (::memcmp(cd->signature, CdRecord::kSignature,
141*12dee9d3SKazuki Sakamoto                  sizeof(CdRecord::kSignature)) != 0)
142*12dee9d3SKazuki Sakamoto       return false;
143*12dee9d3SKazuki Sakamoto 
144*12dee9d3SKazuki Sakamoto     // Sanity check the file name values.
145*12dee9d3SKazuki Sakamoto     auto file_name = reinterpret_cast<const char *>(cd + 1);
146*12dee9d3SKazuki Sakamoto     size_t file_name_length = cd->file_name_length;
147*12dee9d3SKazuki Sakamoto     if (file_name + file_name_length >= reinterpret_cast<const char *>(eocd) ||
148*12dee9d3SKazuki Sakamoto         file_name_length == 0)
149*12dee9d3SKazuki Sakamoto       return false;
150*12dee9d3SKazuki Sakamoto 
151*12dee9d3SKazuki Sakamoto     // Compare the file name.
152*12dee9d3SKazuki Sakamoto     if (file_path == llvm::StringRef(file_name, file_name_length)) {
153*12dee9d3SKazuki Sakamoto       // Found the file.
154*12dee9d3SKazuki Sakamoto       return GetFile(zip_data, cd->local_file_header_offset, file_offset,
155*12dee9d3SKazuki Sakamoto                      file_size);
156*12dee9d3SKazuki Sakamoto     } else {
157*12dee9d3SKazuki Sakamoto       // Skip to the next central directory record.
158*12dee9d3SKazuki Sakamoto       cd = reinterpret_cast<const CdRecord *>(
159*12dee9d3SKazuki Sakamoto           reinterpret_cast<const char *>(cd) + sizeof(CdRecord) +
160*12dee9d3SKazuki Sakamoto           cd->file_name_length + cd->extra_field_length + cd->comment_length);
161*12dee9d3SKazuki Sakamoto       // Sanity check the pointer.
162*12dee9d3SKazuki Sakamoto       if (reinterpret_cast<const char *>(cd) >=
163*12dee9d3SKazuki Sakamoto           reinterpret_cast<const char *>(eocd))
164*12dee9d3SKazuki Sakamoto         return false;
165*12dee9d3SKazuki Sakamoto     }
166*12dee9d3SKazuki Sakamoto   }
167*12dee9d3SKazuki Sakamoto 
168*12dee9d3SKazuki Sakamoto   return false;
169*12dee9d3SKazuki Sakamoto }
170*12dee9d3SKazuki Sakamoto 
171*12dee9d3SKazuki Sakamoto } // end anonymous namespace
172*12dee9d3SKazuki Sakamoto 
Find(lldb::DataBufferSP zip_data,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)173*12dee9d3SKazuki Sakamoto bool ZipFile::Find(lldb::DataBufferSP zip_data, const llvm::StringRef file_path,
174*12dee9d3SKazuki Sakamoto                    lldb::offset_t &file_offset, lldb::offset_t &file_size) {
175*12dee9d3SKazuki Sakamoto   const EocdRecord *eocd = FindEocdRecord(zip_data);
176*12dee9d3SKazuki Sakamoto   if (!eocd)
177*12dee9d3SKazuki Sakamoto     return false;
178*12dee9d3SKazuki Sakamoto 
179*12dee9d3SKazuki Sakamoto   return FindFile(zip_data, eocd, file_path, file_offset, file_size);
180*12dee9d3SKazuki Sakamoto }
181