1*06c3fb27SDimitry Andric //===-- ZipFile.cpp -------------------------------------------------------===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric
9*06c3fb27SDimitry Andric #include "lldb/Utility/ZipFile.h"
10*06c3fb27SDimitry Andric #include "lldb/Utility/DataBuffer.h"
11*06c3fb27SDimitry Andric #include "lldb/Utility/FileSpec.h"
12*06c3fb27SDimitry Andric #include "llvm/Support/Endian.h"
13*06c3fb27SDimitry Andric
14*06c3fb27SDimitry Andric using namespace lldb_private;
15*06c3fb27SDimitry Andric using namespace llvm::support;
16*06c3fb27SDimitry Andric
17*06c3fb27SDimitry Andric namespace {
18*06c3fb27SDimitry Andric
19*06c3fb27SDimitry Andric // Zip headers.
20*06c3fb27SDimitry Andric // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
21*06c3fb27SDimitry Andric
22*06c3fb27SDimitry Andric // The end of central directory record.
23*06c3fb27SDimitry Andric struct EocdRecord {
24*06c3fb27SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x05, 0x06};
25*06c3fb27SDimitry Andric char signature[sizeof(kSignature)];
26*06c3fb27SDimitry Andric unaligned_uint16_t disks;
27*06c3fb27SDimitry Andric unaligned_uint16_t cd_start_disk;
28*06c3fb27SDimitry Andric unaligned_uint16_t cds_on_this_disk;
29*06c3fb27SDimitry Andric unaligned_uint16_t cd_records;
30*06c3fb27SDimitry Andric unaligned_uint32_t cd_size;
31*06c3fb27SDimitry Andric unaligned_uint32_t cd_offset;
32*06c3fb27SDimitry Andric unaligned_uint16_t comment_length;
33*06c3fb27SDimitry Andric };
34*06c3fb27SDimitry Andric
35*06c3fb27SDimitry Andric // Logical find limit for the end of central directory record.
36*06c3fb27SDimitry Andric const size_t kEocdRecordFindLimit =
37*06c3fb27SDimitry Andric sizeof(EocdRecord) +
38*06c3fb27SDimitry Andric std::numeric_limits<decltype(EocdRecord::comment_length)>::max();
39*06c3fb27SDimitry Andric
40*06c3fb27SDimitry Andric // Central directory record.
41*06c3fb27SDimitry Andric struct CdRecord {
42*06c3fb27SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x01, 0x02};
43*06c3fb27SDimitry Andric char signature[sizeof(kSignature)];
44*06c3fb27SDimitry Andric unaligned_uint16_t version_made_by;
45*06c3fb27SDimitry Andric unaligned_uint16_t version_needed_to_extract;
46*06c3fb27SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
47*06c3fb27SDimitry Andric unaligned_uint16_t compression_method;
48*06c3fb27SDimitry Andric unaligned_uint16_t last_modification_time;
49*06c3fb27SDimitry Andric unaligned_uint16_t last_modification_date;
50*06c3fb27SDimitry Andric unaligned_uint32_t crc32;
51*06c3fb27SDimitry Andric unaligned_uint32_t compressed_size;
52*06c3fb27SDimitry Andric unaligned_uint32_t uncompressed_size;
53*06c3fb27SDimitry Andric unaligned_uint16_t file_name_length;
54*06c3fb27SDimitry Andric unaligned_uint16_t extra_field_length;
55*06c3fb27SDimitry Andric unaligned_uint16_t comment_length;
56*06c3fb27SDimitry Andric unaligned_uint16_t file_start_disk;
57*06c3fb27SDimitry Andric unaligned_uint16_t internal_file_attributes;
58*06c3fb27SDimitry Andric unaligned_uint32_t external_file_attributes;
59*06c3fb27SDimitry Andric unaligned_uint32_t local_file_header_offset;
60*06c3fb27SDimitry Andric };
61*06c3fb27SDimitry Andric // Immediately after CdRecord,
62*06c3fb27SDimitry Andric // - file name (file_name_length)
63*06c3fb27SDimitry Andric // - extra field (extra_field_length)
64*06c3fb27SDimitry Andric // - comment (comment_length)
65*06c3fb27SDimitry Andric
66*06c3fb27SDimitry Andric // Local file header.
67*06c3fb27SDimitry Andric struct LocalFileHeader {
68*06c3fb27SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x03, 0x04};
69*06c3fb27SDimitry Andric char signature[sizeof(kSignature)];
70*06c3fb27SDimitry Andric unaligned_uint16_t version_needed_to_extract;
71*06c3fb27SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
72*06c3fb27SDimitry Andric unaligned_uint16_t compression_method;
73*06c3fb27SDimitry Andric unaligned_uint16_t last_modification_time;
74*06c3fb27SDimitry Andric unaligned_uint16_t last_modification_date;
75*06c3fb27SDimitry Andric unaligned_uint32_t crc32;
76*06c3fb27SDimitry Andric unaligned_uint32_t compressed_size;
77*06c3fb27SDimitry Andric unaligned_uint32_t uncompressed_size;
78*06c3fb27SDimitry Andric unaligned_uint16_t file_name_length;
79*06c3fb27SDimitry Andric unaligned_uint16_t extra_field_length;
80*06c3fb27SDimitry Andric };
81*06c3fb27SDimitry Andric // Immediately after LocalFileHeader,
82*06c3fb27SDimitry Andric // - file name (file_name_length)
83*06c3fb27SDimitry Andric // - extra field (extra_field_length)
84*06c3fb27SDimitry Andric // - file data (should be compressed_size == uncompressed_size, page aligned)
85*06c3fb27SDimitry Andric
FindEocdRecord(lldb::DataBufferSP zip_data)86*06c3fb27SDimitry Andric const EocdRecord *FindEocdRecord(lldb::DataBufferSP zip_data) {
87*06c3fb27SDimitry Andric // Find backward the end of central directory record from the end of the zip
88*06c3fb27SDimitry Andric // file to the find limit.
89*06c3fb27SDimitry Andric const uint8_t *zip_data_end = zip_data->GetBytes() + zip_data->GetByteSize();
90*06c3fb27SDimitry Andric const uint8_t *find_limit = zip_data_end - kEocdRecordFindLimit;
91*06c3fb27SDimitry Andric const uint8_t *p = zip_data_end - sizeof(EocdRecord);
92*06c3fb27SDimitry Andric for (; p >= zip_data->GetBytes() && p >= find_limit; p--) {
93*06c3fb27SDimitry Andric auto eocd = reinterpret_cast<const EocdRecord *>(p);
94*06c3fb27SDimitry Andric if (::memcmp(eocd->signature, EocdRecord::kSignature,
95*06c3fb27SDimitry Andric sizeof(EocdRecord::kSignature)) == 0) {
96*06c3fb27SDimitry Andric // Found the end of central directory. Sanity check the values.
97*06c3fb27SDimitry Andric if (eocd->cd_records * sizeof(CdRecord) > eocd->cd_size ||
98*06c3fb27SDimitry Andric zip_data->GetBytes() + eocd->cd_offset + eocd->cd_size > p)
99*06c3fb27SDimitry Andric return nullptr;
100*06c3fb27SDimitry Andric
101*06c3fb27SDimitry Andric // This is a valid end of central directory record.
102*06c3fb27SDimitry Andric return eocd;
103*06c3fb27SDimitry Andric }
104*06c3fb27SDimitry Andric }
105*06c3fb27SDimitry Andric return nullptr;
106*06c3fb27SDimitry Andric }
107*06c3fb27SDimitry Andric
GetFile(lldb::DataBufferSP zip_data,uint32_t local_file_header_offset,lldb::offset_t & file_offset,lldb::offset_t & file_size)108*06c3fb27SDimitry Andric bool GetFile(lldb::DataBufferSP zip_data, uint32_t local_file_header_offset,
109*06c3fb27SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
110*06c3fb27SDimitry Andric auto local_file_header = reinterpret_cast<const LocalFileHeader *>(
111*06c3fb27SDimitry Andric zip_data->GetBytes() + local_file_header_offset);
112*06c3fb27SDimitry Andric // The signature should match.
113*06c3fb27SDimitry Andric if (::memcmp(local_file_header->signature, LocalFileHeader::kSignature,
114*06c3fb27SDimitry Andric sizeof(LocalFileHeader::kSignature)) != 0)
115*06c3fb27SDimitry Andric return false;
116*06c3fb27SDimitry Andric
117*06c3fb27SDimitry Andric auto file_data = reinterpret_cast<const uint8_t *>(local_file_header + 1) +
118*06c3fb27SDimitry Andric local_file_header->file_name_length +
119*06c3fb27SDimitry Andric local_file_header->extra_field_length;
120*06c3fb27SDimitry Andric // File should be uncompressed.
121*06c3fb27SDimitry Andric if (local_file_header->compressed_size !=
122*06c3fb27SDimitry Andric local_file_header->uncompressed_size)
123*06c3fb27SDimitry Andric return false;
124*06c3fb27SDimitry Andric
125*06c3fb27SDimitry Andric // This file is valid. Return the file offset and size.
126*06c3fb27SDimitry Andric file_offset = file_data - zip_data->GetBytes();
127*06c3fb27SDimitry Andric file_size = local_file_header->uncompressed_size;
128*06c3fb27SDimitry Andric return true;
129*06c3fb27SDimitry Andric }
130*06c3fb27SDimitry Andric
FindFile(lldb::DataBufferSP zip_data,const EocdRecord * eocd,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)131*06c3fb27SDimitry Andric bool FindFile(lldb::DataBufferSP zip_data, const EocdRecord *eocd,
132*06c3fb27SDimitry Andric const llvm::StringRef file_path, lldb::offset_t &file_offset,
133*06c3fb27SDimitry Andric lldb::offset_t &file_size) {
134*06c3fb27SDimitry Andric // Find the file from the central directory records.
135*06c3fb27SDimitry Andric auto cd = reinterpret_cast<const CdRecord *>(zip_data->GetBytes() +
136*06c3fb27SDimitry Andric eocd->cd_offset);
137*06c3fb27SDimitry Andric size_t cd_records = eocd->cd_records;
138*06c3fb27SDimitry Andric for (size_t i = 0; i < cd_records; i++) {
139*06c3fb27SDimitry Andric // The signature should match.
140*06c3fb27SDimitry Andric if (::memcmp(cd->signature, CdRecord::kSignature,
141*06c3fb27SDimitry Andric sizeof(CdRecord::kSignature)) != 0)
142*06c3fb27SDimitry Andric return false;
143*06c3fb27SDimitry Andric
144*06c3fb27SDimitry Andric // Sanity check the file name values.
145*06c3fb27SDimitry Andric auto file_name = reinterpret_cast<const char *>(cd + 1);
146*06c3fb27SDimitry Andric size_t file_name_length = cd->file_name_length;
147*06c3fb27SDimitry Andric if (file_name + file_name_length >= reinterpret_cast<const char *>(eocd) ||
148*06c3fb27SDimitry Andric file_name_length == 0)
149*06c3fb27SDimitry Andric return false;
150*06c3fb27SDimitry Andric
151*06c3fb27SDimitry Andric // Compare the file name.
152*06c3fb27SDimitry Andric if (file_path == llvm::StringRef(file_name, file_name_length)) {
153*06c3fb27SDimitry Andric // Found the file.
154*06c3fb27SDimitry Andric return GetFile(zip_data, cd->local_file_header_offset, file_offset,
155*06c3fb27SDimitry Andric file_size);
156*06c3fb27SDimitry Andric } else {
157*06c3fb27SDimitry Andric // Skip to the next central directory record.
158*06c3fb27SDimitry Andric cd = reinterpret_cast<const CdRecord *>(
159*06c3fb27SDimitry Andric reinterpret_cast<const char *>(cd) + sizeof(CdRecord) +
160*06c3fb27SDimitry Andric cd->file_name_length + cd->extra_field_length + cd->comment_length);
161*06c3fb27SDimitry Andric // Sanity check the pointer.
162*06c3fb27SDimitry Andric if (reinterpret_cast<const char *>(cd) >=
163*06c3fb27SDimitry Andric reinterpret_cast<const char *>(eocd))
164*06c3fb27SDimitry Andric return false;
165*06c3fb27SDimitry Andric }
166*06c3fb27SDimitry Andric }
167*06c3fb27SDimitry Andric
168*06c3fb27SDimitry Andric return false;
169*06c3fb27SDimitry Andric }
170*06c3fb27SDimitry Andric
171*06c3fb27SDimitry Andric } // end anonymous namespace
172*06c3fb27SDimitry Andric
Find(lldb::DataBufferSP zip_data,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)173*06c3fb27SDimitry Andric bool ZipFile::Find(lldb::DataBufferSP zip_data, const llvm::StringRef file_path,
174*06c3fb27SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
175*06c3fb27SDimitry Andric const EocdRecord *eocd = FindEocdRecord(zip_data);
176*06c3fb27SDimitry Andric if (!eocd)
177*06c3fb27SDimitry Andric return false;
178*06c3fb27SDimitry Andric
179*06c3fb27SDimitry Andric return FindFile(zip_data, eocd, file_path, file_offset, file_size);
180*06c3fb27SDimitry Andric }
181