1*81ad6265SDimitry Andric //===- COFFReader.cpp -----------------------------------------------------===// 2*81ad6265SDimitry Andric // 3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*81ad6265SDimitry Andric // 7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===// 8*81ad6265SDimitry Andric 9*81ad6265SDimitry Andric #include "COFFReader.h" 10*81ad6265SDimitry Andric #include "COFFObject.h" 11*81ad6265SDimitry Andric #include "llvm/ADT/ArrayRef.h" 12*81ad6265SDimitry Andric #include "llvm/ADT/StringRef.h" 13*81ad6265SDimitry Andric #include "llvm/BinaryFormat/COFF.h" 14*81ad6265SDimitry Andric #include "llvm/Object/COFF.h" 15*81ad6265SDimitry Andric #include "llvm/Support/ErrorHandling.h" 16*81ad6265SDimitry Andric #include <cstddef> 17*81ad6265SDimitry Andric #include <cstdint> 18*81ad6265SDimitry Andric 19*81ad6265SDimitry Andric namespace llvm { 20*81ad6265SDimitry Andric namespace objcopy { 21*81ad6265SDimitry Andric namespace coff { 22*81ad6265SDimitry Andric 23*81ad6265SDimitry Andric using namespace object; 24*81ad6265SDimitry Andric using namespace COFF; 25*81ad6265SDimitry Andric 26*81ad6265SDimitry Andric Error COFFReader::readExecutableHeaders(Object &Obj) const { 27*81ad6265SDimitry Andric const dos_header *DH = COFFObj.getDOSHeader(); 28*81ad6265SDimitry Andric Obj.Is64 = COFFObj.is64(); 29*81ad6265SDimitry Andric if (!DH) 30*81ad6265SDimitry Andric return Error::success(); 31*81ad6265SDimitry Andric 32*81ad6265SDimitry Andric Obj.IsPE = true; 33*81ad6265SDimitry Andric Obj.DosHeader = *DH; 34*81ad6265SDimitry Andric if (DH->AddressOfNewExeHeader > sizeof(*DH)) 35*81ad6265SDimitry Andric Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]), 36*81ad6265SDimitry Andric DH->AddressOfNewExeHeader - sizeof(*DH)); 37*81ad6265SDimitry Andric 38*81ad6265SDimitry Andric if (COFFObj.is64()) { 39*81ad6265SDimitry Andric Obj.PeHeader = *COFFObj.getPE32PlusHeader(); 40*81ad6265SDimitry Andric } else { 41*81ad6265SDimitry Andric const pe32_header *PE32 = COFFObj.getPE32Header(); 42*81ad6265SDimitry Andric copyPeHeader(Obj.PeHeader, *PE32); 43*81ad6265SDimitry Andric // The pe32plus_header (stored in Object) lacks the BaseOfData field. 44*81ad6265SDimitry Andric Obj.BaseOfData = PE32->BaseOfData; 45*81ad6265SDimitry Andric } 46*81ad6265SDimitry Andric 47*81ad6265SDimitry Andric for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) { 48*81ad6265SDimitry Andric const data_directory *Dir = COFFObj.getDataDirectory(I); 49*81ad6265SDimitry Andric if (!Dir) 50*81ad6265SDimitry Andric return errorCodeToError(object_error::parse_failed); 51*81ad6265SDimitry Andric Obj.DataDirectories.emplace_back(*Dir); 52*81ad6265SDimitry Andric } 53*81ad6265SDimitry Andric return Error::success(); 54*81ad6265SDimitry Andric } 55*81ad6265SDimitry Andric 56*81ad6265SDimitry Andric Error COFFReader::readSections(Object &Obj) const { 57*81ad6265SDimitry Andric std::vector<Section> Sections; 58*81ad6265SDimitry Andric // Section indexing starts from 1. 59*81ad6265SDimitry Andric for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) { 60*81ad6265SDimitry Andric Expected<const coff_section *> SecOrErr = COFFObj.getSection(I); 61*81ad6265SDimitry Andric if (!SecOrErr) 62*81ad6265SDimitry Andric return SecOrErr.takeError(); 63*81ad6265SDimitry Andric const coff_section *Sec = *SecOrErr; 64*81ad6265SDimitry Andric Sections.push_back(Section()); 65*81ad6265SDimitry Andric Section &S = Sections.back(); 66*81ad6265SDimitry Andric S.Header = *Sec; 67*81ad6265SDimitry Andric S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL; 68*81ad6265SDimitry Andric ArrayRef<uint8_t> Contents; 69*81ad6265SDimitry Andric if (Error E = COFFObj.getSectionContents(Sec, Contents)) 70*81ad6265SDimitry Andric return E; 71*81ad6265SDimitry Andric S.setContentsRef(Contents); 72*81ad6265SDimitry Andric ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec); 73*81ad6265SDimitry Andric for (const coff_relocation &R : Relocs) 74*81ad6265SDimitry Andric S.Relocs.push_back(R); 75*81ad6265SDimitry Andric if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec)) 76*81ad6265SDimitry Andric S.Name = *NameOrErr; 77*81ad6265SDimitry Andric else 78*81ad6265SDimitry Andric return NameOrErr.takeError(); 79*81ad6265SDimitry Andric } 80*81ad6265SDimitry Andric Obj.addSections(Sections); 81*81ad6265SDimitry Andric return Error::success(); 82*81ad6265SDimitry Andric } 83*81ad6265SDimitry Andric 84*81ad6265SDimitry Andric Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const { 85*81ad6265SDimitry Andric std::vector<Symbol> Symbols; 86*81ad6265SDimitry Andric Symbols.reserve(COFFObj.getRawNumberOfSymbols()); 87*81ad6265SDimitry Andric ArrayRef<Section> Sections = Obj.getSections(); 88*81ad6265SDimitry Andric for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) { 89*81ad6265SDimitry Andric Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I); 90*81ad6265SDimitry Andric if (!SymOrErr) 91*81ad6265SDimitry Andric return SymOrErr.takeError(); 92*81ad6265SDimitry Andric COFFSymbolRef SymRef = *SymOrErr; 93*81ad6265SDimitry Andric 94*81ad6265SDimitry Andric Symbols.push_back(Symbol()); 95*81ad6265SDimitry Andric Symbol &Sym = Symbols.back(); 96*81ad6265SDimitry Andric // Copy symbols from the original form into an intermediate coff_symbol32. 97*81ad6265SDimitry Andric if (IsBigObj) 98*81ad6265SDimitry Andric copySymbol(Sym.Sym, 99*81ad6265SDimitry Andric *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr())); 100*81ad6265SDimitry Andric else 101*81ad6265SDimitry Andric copySymbol(Sym.Sym, 102*81ad6265SDimitry Andric *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr())); 103*81ad6265SDimitry Andric auto NameOrErr = COFFObj.getSymbolName(SymRef); 104*81ad6265SDimitry Andric if (!NameOrErr) 105*81ad6265SDimitry Andric return NameOrErr.takeError(); 106*81ad6265SDimitry Andric Sym.Name = *NameOrErr; 107*81ad6265SDimitry Andric 108*81ad6265SDimitry Andric ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef); 109*81ad6265SDimitry Andric size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16); 110*81ad6265SDimitry Andric assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols()); 111*81ad6265SDimitry Andric // The auxillary symbols are structs of sizeof(coff_symbol16) each. 112*81ad6265SDimitry Andric // In the big object format (where symbols are coff_symbol32), each 113*81ad6265SDimitry Andric // auxillary symbol is padded with 2 bytes at the end. Copy each 114*81ad6265SDimitry Andric // auxillary symbol to the Sym.AuxData vector. For file symbols, 115*81ad6265SDimitry Andric // the whole range of aux symbols are interpreted as one null padded 116*81ad6265SDimitry Andric // string instead. 117*81ad6265SDimitry Andric if (SymRef.isFileRecord()) 118*81ad6265SDimitry Andric Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()), 119*81ad6265SDimitry Andric AuxData.size()) 120*81ad6265SDimitry Andric .rtrim('\0'); 121*81ad6265SDimitry Andric else 122*81ad6265SDimitry Andric for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++) 123*81ad6265SDimitry Andric Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol))); 124*81ad6265SDimitry Andric 125*81ad6265SDimitry Andric // Find the unique id of the section 126*81ad6265SDimitry Andric if (SymRef.getSectionNumber() <= 127*81ad6265SDimitry Andric 0) // Special symbol (undefined/absolute/debug) 128*81ad6265SDimitry Andric Sym.TargetSectionId = SymRef.getSectionNumber(); 129*81ad6265SDimitry Andric else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) < 130*81ad6265SDimitry Andric Sections.size()) 131*81ad6265SDimitry Andric Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId; 132*81ad6265SDimitry Andric else 133*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 134*81ad6265SDimitry Andric "section number out of range"); 135*81ad6265SDimitry Andric // For section definitions, check if it is comdat associative, and if 136*81ad6265SDimitry Andric // it is, find the target section unique id. 137*81ad6265SDimitry Andric const coff_aux_section_definition *SD = SymRef.getSectionDefinition(); 138*81ad6265SDimitry Andric const coff_aux_weak_external *WE = SymRef.getWeakExternal(); 139*81ad6265SDimitry Andric if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) { 140*81ad6265SDimitry Andric int32_t Index = SD->getNumber(IsBigObj); 141*81ad6265SDimitry Andric if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size()) 142*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 143*81ad6265SDimitry Andric "unexpected associative section index"); 144*81ad6265SDimitry Andric Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId; 145*81ad6265SDimitry Andric } else if (WE) { 146*81ad6265SDimitry Andric // This is a raw symbol index for now, but store it in the Symbol 147*81ad6265SDimitry Andric // until we've added them to the Object, which assigns the final 148*81ad6265SDimitry Andric // unique ids. 149*81ad6265SDimitry Andric Sym.WeakTargetSymbolId = WE->TagIndex; 150*81ad6265SDimitry Andric } 151*81ad6265SDimitry Andric I += 1 + SymRef.getNumberOfAuxSymbols(); 152*81ad6265SDimitry Andric } 153*81ad6265SDimitry Andric Obj.addSymbols(Symbols); 154*81ad6265SDimitry Andric return Error::success(); 155*81ad6265SDimitry Andric } 156*81ad6265SDimitry Andric 157*81ad6265SDimitry Andric Error COFFReader::setSymbolTargets(Object &Obj) const { 158*81ad6265SDimitry Andric std::vector<const Symbol *> RawSymbolTable; 159*81ad6265SDimitry Andric for (const Symbol &Sym : Obj.getSymbols()) { 160*81ad6265SDimitry Andric RawSymbolTable.push_back(&Sym); 161*81ad6265SDimitry Andric for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++) 162*81ad6265SDimitry Andric RawSymbolTable.push_back(nullptr); 163*81ad6265SDimitry Andric } 164*81ad6265SDimitry Andric for (Symbol &Sym : Obj.getMutableSymbols()) { 165*81ad6265SDimitry Andric // Convert WeakTargetSymbolId from the original raw symbol index to 166*81ad6265SDimitry Andric // a proper unique id. 167*81ad6265SDimitry Andric if (Sym.WeakTargetSymbolId) { 168*81ad6265SDimitry Andric if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size()) 169*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 170*81ad6265SDimitry Andric "weak external reference out of range"); 171*81ad6265SDimitry Andric const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId]; 172*81ad6265SDimitry Andric if (Target == nullptr) 173*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 174*81ad6265SDimitry Andric "invalid SymbolTableIndex"); 175*81ad6265SDimitry Andric Sym.WeakTargetSymbolId = Target->UniqueId; 176*81ad6265SDimitry Andric } 177*81ad6265SDimitry Andric } 178*81ad6265SDimitry Andric for (Section &Sec : Obj.getMutableSections()) { 179*81ad6265SDimitry Andric for (Relocation &R : Sec.Relocs) { 180*81ad6265SDimitry Andric if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size()) 181*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 182*81ad6265SDimitry Andric "SymbolTableIndex out of range"); 183*81ad6265SDimitry Andric const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex]; 184*81ad6265SDimitry Andric if (Sym == nullptr) 185*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 186*81ad6265SDimitry Andric "invalid SymbolTableIndex"); 187*81ad6265SDimitry Andric R.Target = Sym->UniqueId; 188*81ad6265SDimitry Andric R.TargetName = Sym->Name; 189*81ad6265SDimitry Andric } 190*81ad6265SDimitry Andric } 191*81ad6265SDimitry Andric return Error::success(); 192*81ad6265SDimitry Andric } 193*81ad6265SDimitry Andric 194*81ad6265SDimitry Andric Expected<std::unique_ptr<Object>> COFFReader::create() const { 195*81ad6265SDimitry Andric auto Obj = std::make_unique<Object>(); 196*81ad6265SDimitry Andric 197*81ad6265SDimitry Andric bool IsBigObj = false; 198*81ad6265SDimitry Andric if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) { 199*81ad6265SDimitry Andric Obj->CoffFileHeader = *CFH; 200*81ad6265SDimitry Andric } else { 201*81ad6265SDimitry Andric const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader(); 202*81ad6265SDimitry Andric if (!CBFH) 203*81ad6265SDimitry Andric return createStringError(object_error::parse_failed, 204*81ad6265SDimitry Andric "no COFF file header returned"); 205*81ad6265SDimitry Andric // Only copying the few fields from the bigobj header that we need 206*81ad6265SDimitry Andric // and won't recreate in the end. 207*81ad6265SDimitry Andric Obj->CoffFileHeader.Machine = CBFH->Machine; 208*81ad6265SDimitry Andric Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp; 209*81ad6265SDimitry Andric IsBigObj = true; 210*81ad6265SDimitry Andric } 211*81ad6265SDimitry Andric 212*81ad6265SDimitry Andric if (Error E = readExecutableHeaders(*Obj)) 213*81ad6265SDimitry Andric return std::move(E); 214*81ad6265SDimitry Andric if (Error E = readSections(*Obj)) 215*81ad6265SDimitry Andric return std::move(E); 216*81ad6265SDimitry Andric if (Error E = readSymbols(*Obj, IsBigObj)) 217*81ad6265SDimitry Andric return std::move(E); 218*81ad6265SDimitry Andric if (Error E = setSymbolTargets(*Obj)) 219*81ad6265SDimitry Andric return std::move(E); 220*81ad6265SDimitry Andric 221*81ad6265SDimitry Andric return std::move(Obj); 222*81ad6265SDimitry Andric } 223*81ad6265SDimitry Andric 224*81ad6265SDimitry Andric } // end namespace coff 225*81ad6265SDimitry Andric } // end namespace objcopy 226*81ad6265SDimitry Andric } // end namespace llvm 227