xref: /freebsd-src/contrib/llvm-project/llvm/lib/ObjCopy/COFF/COFFReader.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1*81ad6265SDimitry Andric //===- COFFReader.cpp -----------------------------------------------------===//
2*81ad6265SDimitry Andric //
3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric //
7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
8*81ad6265SDimitry Andric 
9*81ad6265SDimitry Andric #include "COFFReader.h"
10*81ad6265SDimitry Andric #include "COFFObject.h"
11*81ad6265SDimitry Andric #include "llvm/ADT/ArrayRef.h"
12*81ad6265SDimitry Andric #include "llvm/ADT/StringRef.h"
13*81ad6265SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
14*81ad6265SDimitry Andric #include "llvm/Object/COFF.h"
15*81ad6265SDimitry Andric #include "llvm/Support/ErrorHandling.h"
16*81ad6265SDimitry Andric #include <cstddef>
17*81ad6265SDimitry Andric #include <cstdint>
18*81ad6265SDimitry Andric 
19*81ad6265SDimitry Andric namespace llvm {
20*81ad6265SDimitry Andric namespace objcopy {
21*81ad6265SDimitry Andric namespace coff {
22*81ad6265SDimitry Andric 
23*81ad6265SDimitry Andric using namespace object;
24*81ad6265SDimitry Andric using namespace COFF;
25*81ad6265SDimitry Andric 
26*81ad6265SDimitry Andric Error COFFReader::readExecutableHeaders(Object &Obj) const {
27*81ad6265SDimitry Andric   const dos_header *DH = COFFObj.getDOSHeader();
28*81ad6265SDimitry Andric   Obj.Is64 = COFFObj.is64();
29*81ad6265SDimitry Andric   if (!DH)
30*81ad6265SDimitry Andric     return Error::success();
31*81ad6265SDimitry Andric 
32*81ad6265SDimitry Andric   Obj.IsPE = true;
33*81ad6265SDimitry Andric   Obj.DosHeader = *DH;
34*81ad6265SDimitry Andric   if (DH->AddressOfNewExeHeader > sizeof(*DH))
35*81ad6265SDimitry Andric     Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]),
36*81ad6265SDimitry Andric                                     DH->AddressOfNewExeHeader - sizeof(*DH));
37*81ad6265SDimitry Andric 
38*81ad6265SDimitry Andric   if (COFFObj.is64()) {
39*81ad6265SDimitry Andric     Obj.PeHeader = *COFFObj.getPE32PlusHeader();
40*81ad6265SDimitry Andric   } else {
41*81ad6265SDimitry Andric     const pe32_header *PE32 = COFFObj.getPE32Header();
42*81ad6265SDimitry Andric     copyPeHeader(Obj.PeHeader, *PE32);
43*81ad6265SDimitry Andric     // The pe32plus_header (stored in Object) lacks the BaseOfData field.
44*81ad6265SDimitry Andric     Obj.BaseOfData = PE32->BaseOfData;
45*81ad6265SDimitry Andric   }
46*81ad6265SDimitry Andric 
47*81ad6265SDimitry Andric   for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) {
48*81ad6265SDimitry Andric     const data_directory *Dir = COFFObj.getDataDirectory(I);
49*81ad6265SDimitry Andric     if (!Dir)
50*81ad6265SDimitry Andric       return errorCodeToError(object_error::parse_failed);
51*81ad6265SDimitry Andric     Obj.DataDirectories.emplace_back(*Dir);
52*81ad6265SDimitry Andric   }
53*81ad6265SDimitry Andric   return Error::success();
54*81ad6265SDimitry Andric }
55*81ad6265SDimitry Andric 
56*81ad6265SDimitry Andric Error COFFReader::readSections(Object &Obj) const {
57*81ad6265SDimitry Andric   std::vector<Section> Sections;
58*81ad6265SDimitry Andric   // Section indexing starts from 1.
59*81ad6265SDimitry Andric   for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
60*81ad6265SDimitry Andric     Expected<const coff_section *> SecOrErr = COFFObj.getSection(I);
61*81ad6265SDimitry Andric     if (!SecOrErr)
62*81ad6265SDimitry Andric       return SecOrErr.takeError();
63*81ad6265SDimitry Andric     const coff_section *Sec = *SecOrErr;
64*81ad6265SDimitry Andric     Sections.push_back(Section());
65*81ad6265SDimitry Andric     Section &S = Sections.back();
66*81ad6265SDimitry Andric     S.Header = *Sec;
67*81ad6265SDimitry Andric     S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
68*81ad6265SDimitry Andric     ArrayRef<uint8_t> Contents;
69*81ad6265SDimitry Andric     if (Error E = COFFObj.getSectionContents(Sec, Contents))
70*81ad6265SDimitry Andric       return E;
71*81ad6265SDimitry Andric     S.setContentsRef(Contents);
72*81ad6265SDimitry Andric     ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
73*81ad6265SDimitry Andric     for (const coff_relocation &R : Relocs)
74*81ad6265SDimitry Andric       S.Relocs.push_back(R);
75*81ad6265SDimitry Andric     if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec))
76*81ad6265SDimitry Andric       S.Name = *NameOrErr;
77*81ad6265SDimitry Andric     else
78*81ad6265SDimitry Andric       return NameOrErr.takeError();
79*81ad6265SDimitry Andric   }
80*81ad6265SDimitry Andric   Obj.addSections(Sections);
81*81ad6265SDimitry Andric   return Error::success();
82*81ad6265SDimitry Andric }
83*81ad6265SDimitry Andric 
84*81ad6265SDimitry Andric Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
85*81ad6265SDimitry Andric   std::vector<Symbol> Symbols;
86*81ad6265SDimitry Andric   Symbols.reserve(COFFObj.getRawNumberOfSymbols());
87*81ad6265SDimitry Andric   ArrayRef<Section> Sections = Obj.getSections();
88*81ad6265SDimitry Andric   for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
89*81ad6265SDimitry Andric     Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
90*81ad6265SDimitry Andric     if (!SymOrErr)
91*81ad6265SDimitry Andric       return SymOrErr.takeError();
92*81ad6265SDimitry Andric     COFFSymbolRef SymRef = *SymOrErr;
93*81ad6265SDimitry Andric 
94*81ad6265SDimitry Andric     Symbols.push_back(Symbol());
95*81ad6265SDimitry Andric     Symbol &Sym = Symbols.back();
96*81ad6265SDimitry Andric     // Copy symbols from the original form into an intermediate coff_symbol32.
97*81ad6265SDimitry Andric     if (IsBigObj)
98*81ad6265SDimitry Andric       copySymbol(Sym.Sym,
99*81ad6265SDimitry Andric                  *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr()));
100*81ad6265SDimitry Andric     else
101*81ad6265SDimitry Andric       copySymbol(Sym.Sym,
102*81ad6265SDimitry Andric                  *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
103*81ad6265SDimitry Andric     auto NameOrErr = COFFObj.getSymbolName(SymRef);
104*81ad6265SDimitry Andric     if (!NameOrErr)
105*81ad6265SDimitry Andric       return NameOrErr.takeError();
106*81ad6265SDimitry Andric     Sym.Name = *NameOrErr;
107*81ad6265SDimitry Andric 
108*81ad6265SDimitry Andric     ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
109*81ad6265SDimitry Andric     size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
110*81ad6265SDimitry Andric     assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
111*81ad6265SDimitry Andric     // The auxillary symbols are structs of sizeof(coff_symbol16) each.
112*81ad6265SDimitry Andric     // In the big object format (where symbols are coff_symbol32), each
113*81ad6265SDimitry Andric     // auxillary symbol is padded with 2 bytes at the end. Copy each
114*81ad6265SDimitry Andric     // auxillary symbol to the Sym.AuxData vector. For file symbols,
115*81ad6265SDimitry Andric     // the whole range of aux symbols are interpreted as one null padded
116*81ad6265SDimitry Andric     // string instead.
117*81ad6265SDimitry Andric     if (SymRef.isFileRecord())
118*81ad6265SDimitry Andric       Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
119*81ad6265SDimitry Andric                               AuxData.size())
120*81ad6265SDimitry Andric                         .rtrim('\0');
121*81ad6265SDimitry Andric     else
122*81ad6265SDimitry Andric       for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
123*81ad6265SDimitry Andric         Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
124*81ad6265SDimitry Andric 
125*81ad6265SDimitry Andric     // Find the unique id of the section
126*81ad6265SDimitry Andric     if (SymRef.getSectionNumber() <=
127*81ad6265SDimitry Andric         0) // Special symbol (undefined/absolute/debug)
128*81ad6265SDimitry Andric       Sym.TargetSectionId = SymRef.getSectionNumber();
129*81ad6265SDimitry Andric     else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) <
130*81ad6265SDimitry Andric              Sections.size())
131*81ad6265SDimitry Andric       Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId;
132*81ad6265SDimitry Andric     else
133*81ad6265SDimitry Andric       return createStringError(object_error::parse_failed,
134*81ad6265SDimitry Andric                                "section number out of range");
135*81ad6265SDimitry Andric     // For section definitions, check if it is comdat associative, and if
136*81ad6265SDimitry Andric     // it is, find the target section unique id.
137*81ad6265SDimitry Andric     const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
138*81ad6265SDimitry Andric     const coff_aux_weak_external *WE = SymRef.getWeakExternal();
139*81ad6265SDimitry Andric     if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
140*81ad6265SDimitry Andric       int32_t Index = SD->getNumber(IsBigObj);
141*81ad6265SDimitry Andric       if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size())
142*81ad6265SDimitry Andric         return createStringError(object_error::parse_failed,
143*81ad6265SDimitry Andric                                  "unexpected associative section index");
144*81ad6265SDimitry Andric       Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId;
145*81ad6265SDimitry Andric     } else if (WE) {
146*81ad6265SDimitry Andric       // This is a raw symbol index for now, but store it in the Symbol
147*81ad6265SDimitry Andric       // until we've added them to the Object, which assigns the final
148*81ad6265SDimitry Andric       // unique ids.
149*81ad6265SDimitry Andric       Sym.WeakTargetSymbolId = WE->TagIndex;
150*81ad6265SDimitry Andric     }
151*81ad6265SDimitry Andric     I += 1 + SymRef.getNumberOfAuxSymbols();
152*81ad6265SDimitry Andric   }
153*81ad6265SDimitry Andric   Obj.addSymbols(Symbols);
154*81ad6265SDimitry Andric   return Error::success();
155*81ad6265SDimitry Andric }
156*81ad6265SDimitry Andric 
157*81ad6265SDimitry Andric Error COFFReader::setSymbolTargets(Object &Obj) const {
158*81ad6265SDimitry Andric   std::vector<const Symbol *> RawSymbolTable;
159*81ad6265SDimitry Andric   for (const Symbol &Sym : Obj.getSymbols()) {
160*81ad6265SDimitry Andric     RawSymbolTable.push_back(&Sym);
161*81ad6265SDimitry Andric     for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
162*81ad6265SDimitry Andric       RawSymbolTable.push_back(nullptr);
163*81ad6265SDimitry Andric   }
164*81ad6265SDimitry Andric   for (Symbol &Sym : Obj.getMutableSymbols()) {
165*81ad6265SDimitry Andric     // Convert WeakTargetSymbolId from the original raw symbol index to
166*81ad6265SDimitry Andric     // a proper unique id.
167*81ad6265SDimitry Andric     if (Sym.WeakTargetSymbolId) {
168*81ad6265SDimitry Andric       if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size())
169*81ad6265SDimitry Andric         return createStringError(object_error::parse_failed,
170*81ad6265SDimitry Andric                                  "weak external reference out of range");
171*81ad6265SDimitry Andric       const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId];
172*81ad6265SDimitry Andric       if (Target == nullptr)
173*81ad6265SDimitry Andric         return createStringError(object_error::parse_failed,
174*81ad6265SDimitry Andric                                  "invalid SymbolTableIndex");
175*81ad6265SDimitry Andric       Sym.WeakTargetSymbolId = Target->UniqueId;
176*81ad6265SDimitry Andric     }
177*81ad6265SDimitry Andric   }
178*81ad6265SDimitry Andric   for (Section &Sec : Obj.getMutableSections()) {
179*81ad6265SDimitry Andric     for (Relocation &R : Sec.Relocs) {
180*81ad6265SDimitry Andric       if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
181*81ad6265SDimitry Andric         return createStringError(object_error::parse_failed,
182*81ad6265SDimitry Andric                                  "SymbolTableIndex out of range");
183*81ad6265SDimitry Andric       const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
184*81ad6265SDimitry Andric       if (Sym == nullptr)
185*81ad6265SDimitry Andric         return createStringError(object_error::parse_failed,
186*81ad6265SDimitry Andric                                  "invalid SymbolTableIndex");
187*81ad6265SDimitry Andric       R.Target = Sym->UniqueId;
188*81ad6265SDimitry Andric       R.TargetName = Sym->Name;
189*81ad6265SDimitry Andric     }
190*81ad6265SDimitry Andric   }
191*81ad6265SDimitry Andric   return Error::success();
192*81ad6265SDimitry Andric }
193*81ad6265SDimitry Andric 
194*81ad6265SDimitry Andric Expected<std::unique_ptr<Object>> COFFReader::create() const {
195*81ad6265SDimitry Andric   auto Obj = std::make_unique<Object>();
196*81ad6265SDimitry Andric 
197*81ad6265SDimitry Andric   bool IsBigObj = false;
198*81ad6265SDimitry Andric   if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) {
199*81ad6265SDimitry Andric     Obj->CoffFileHeader = *CFH;
200*81ad6265SDimitry Andric   } else {
201*81ad6265SDimitry Andric     const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader();
202*81ad6265SDimitry Andric     if (!CBFH)
203*81ad6265SDimitry Andric       return createStringError(object_error::parse_failed,
204*81ad6265SDimitry Andric                                "no COFF file header returned");
205*81ad6265SDimitry Andric     // Only copying the few fields from the bigobj header that we need
206*81ad6265SDimitry Andric     // and won't recreate in the end.
207*81ad6265SDimitry Andric     Obj->CoffFileHeader.Machine = CBFH->Machine;
208*81ad6265SDimitry Andric     Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp;
209*81ad6265SDimitry Andric     IsBigObj = true;
210*81ad6265SDimitry Andric   }
211*81ad6265SDimitry Andric 
212*81ad6265SDimitry Andric   if (Error E = readExecutableHeaders(*Obj))
213*81ad6265SDimitry Andric     return std::move(E);
214*81ad6265SDimitry Andric   if (Error E = readSections(*Obj))
215*81ad6265SDimitry Andric     return std::move(E);
216*81ad6265SDimitry Andric   if (Error E = readSymbols(*Obj, IsBigObj))
217*81ad6265SDimitry Andric     return std::move(E);
218*81ad6265SDimitry Andric   if (Error E = setSymbolTargets(*Obj))
219*81ad6265SDimitry Andric     return std::move(E);
220*81ad6265SDimitry Andric 
221*81ad6265SDimitry Andric   return std::move(Obj);
222*81ad6265SDimitry Andric }
223*81ad6265SDimitry Andric 
224*81ad6265SDimitry Andric } // end namespace coff
225*81ad6265SDimitry Andric } // end namespace objcopy
226*81ad6265SDimitry Andric } // end namespace llvm
227