181ad6265SDimitry Andric //===- COFFReader.cpp -----------------------------------------------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric
981ad6265SDimitry Andric #include "COFFReader.h"
1081ad6265SDimitry Andric #include "COFFObject.h"
1181ad6265SDimitry Andric #include "llvm/ADT/ArrayRef.h"
1281ad6265SDimitry Andric #include "llvm/ADT/StringRef.h"
1381ad6265SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
1481ad6265SDimitry Andric #include "llvm/Object/COFF.h"
1581ad6265SDimitry Andric #include "llvm/Support/ErrorHandling.h"
1681ad6265SDimitry Andric #include <cstddef>
1781ad6265SDimitry Andric #include <cstdint>
1881ad6265SDimitry Andric
1981ad6265SDimitry Andric namespace llvm {
2081ad6265SDimitry Andric namespace objcopy {
2181ad6265SDimitry Andric namespace coff {
2281ad6265SDimitry Andric
2381ad6265SDimitry Andric using namespace object;
2481ad6265SDimitry Andric using namespace COFF;
2581ad6265SDimitry Andric
readExecutableHeaders(Object & Obj) const2681ad6265SDimitry Andric Error COFFReader::readExecutableHeaders(Object &Obj) const {
2781ad6265SDimitry Andric const dos_header *DH = COFFObj.getDOSHeader();
2881ad6265SDimitry Andric Obj.Is64 = COFFObj.is64();
2981ad6265SDimitry Andric if (!DH)
3081ad6265SDimitry Andric return Error::success();
3181ad6265SDimitry Andric
3281ad6265SDimitry Andric Obj.IsPE = true;
3381ad6265SDimitry Andric Obj.DosHeader = *DH;
3481ad6265SDimitry Andric if (DH->AddressOfNewExeHeader > sizeof(*DH))
3581ad6265SDimitry Andric Obj.DosStub = ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&DH[1]),
3681ad6265SDimitry Andric DH->AddressOfNewExeHeader - sizeof(*DH));
3781ad6265SDimitry Andric
3881ad6265SDimitry Andric if (COFFObj.is64()) {
3981ad6265SDimitry Andric Obj.PeHeader = *COFFObj.getPE32PlusHeader();
4081ad6265SDimitry Andric } else {
4181ad6265SDimitry Andric const pe32_header *PE32 = COFFObj.getPE32Header();
4281ad6265SDimitry Andric copyPeHeader(Obj.PeHeader, *PE32);
4381ad6265SDimitry Andric // The pe32plus_header (stored in Object) lacks the BaseOfData field.
4481ad6265SDimitry Andric Obj.BaseOfData = PE32->BaseOfData;
4581ad6265SDimitry Andric }
4681ad6265SDimitry Andric
4781ad6265SDimitry Andric for (size_t I = 0; I < Obj.PeHeader.NumberOfRvaAndSize; I++) {
4881ad6265SDimitry Andric const data_directory *Dir = COFFObj.getDataDirectory(I);
4981ad6265SDimitry Andric if (!Dir)
5081ad6265SDimitry Andric return errorCodeToError(object_error::parse_failed);
5181ad6265SDimitry Andric Obj.DataDirectories.emplace_back(*Dir);
5281ad6265SDimitry Andric }
5381ad6265SDimitry Andric return Error::success();
5481ad6265SDimitry Andric }
5581ad6265SDimitry Andric
readSections(Object & Obj) const5681ad6265SDimitry Andric Error COFFReader::readSections(Object &Obj) const {
5781ad6265SDimitry Andric std::vector<Section> Sections;
5881ad6265SDimitry Andric // Section indexing starts from 1.
5981ad6265SDimitry Andric for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
6081ad6265SDimitry Andric Expected<const coff_section *> SecOrErr = COFFObj.getSection(I);
6181ad6265SDimitry Andric if (!SecOrErr)
6281ad6265SDimitry Andric return SecOrErr.takeError();
6381ad6265SDimitry Andric const coff_section *Sec = *SecOrErr;
6481ad6265SDimitry Andric Sections.push_back(Section());
6581ad6265SDimitry Andric Section &S = Sections.back();
6681ad6265SDimitry Andric S.Header = *Sec;
6781ad6265SDimitry Andric S.Header.Characteristics &= ~COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
6881ad6265SDimitry Andric ArrayRef<uint8_t> Contents;
6981ad6265SDimitry Andric if (Error E = COFFObj.getSectionContents(Sec, Contents))
7081ad6265SDimitry Andric return E;
7181ad6265SDimitry Andric S.setContentsRef(Contents);
7281ad6265SDimitry Andric ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
7381ad6265SDimitry Andric for (const coff_relocation &R : Relocs)
7481ad6265SDimitry Andric S.Relocs.push_back(R);
7581ad6265SDimitry Andric if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec))
7681ad6265SDimitry Andric S.Name = *NameOrErr;
7781ad6265SDimitry Andric else
7881ad6265SDimitry Andric return NameOrErr.takeError();
7981ad6265SDimitry Andric }
8081ad6265SDimitry Andric Obj.addSections(Sections);
8181ad6265SDimitry Andric return Error::success();
8281ad6265SDimitry Andric }
8381ad6265SDimitry Andric
readSymbols(Object & Obj,bool IsBigObj) const8481ad6265SDimitry Andric Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
8581ad6265SDimitry Andric std::vector<Symbol> Symbols;
86*bdd1243dSDimitry Andric Symbols.reserve(COFFObj.getNumberOfSymbols());
8781ad6265SDimitry Andric ArrayRef<Section> Sections = Obj.getSections();
88*bdd1243dSDimitry Andric for (uint32_t I = 0, E = COFFObj.getNumberOfSymbols(); I < E;) {
8981ad6265SDimitry Andric Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
9081ad6265SDimitry Andric if (!SymOrErr)
9181ad6265SDimitry Andric return SymOrErr.takeError();
9281ad6265SDimitry Andric COFFSymbolRef SymRef = *SymOrErr;
9381ad6265SDimitry Andric
9481ad6265SDimitry Andric Symbols.push_back(Symbol());
9581ad6265SDimitry Andric Symbol &Sym = Symbols.back();
9681ad6265SDimitry Andric // Copy symbols from the original form into an intermediate coff_symbol32.
9781ad6265SDimitry Andric if (IsBigObj)
9881ad6265SDimitry Andric copySymbol(Sym.Sym,
9981ad6265SDimitry Andric *reinterpret_cast<const coff_symbol32 *>(SymRef.getRawPtr()));
10081ad6265SDimitry Andric else
10181ad6265SDimitry Andric copySymbol(Sym.Sym,
10281ad6265SDimitry Andric *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
10381ad6265SDimitry Andric auto NameOrErr = COFFObj.getSymbolName(SymRef);
10481ad6265SDimitry Andric if (!NameOrErr)
10581ad6265SDimitry Andric return NameOrErr.takeError();
10681ad6265SDimitry Andric Sym.Name = *NameOrErr;
10781ad6265SDimitry Andric
10881ad6265SDimitry Andric ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
10981ad6265SDimitry Andric size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
11081ad6265SDimitry Andric assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
11181ad6265SDimitry Andric // The auxillary symbols are structs of sizeof(coff_symbol16) each.
11281ad6265SDimitry Andric // In the big object format (where symbols are coff_symbol32), each
11381ad6265SDimitry Andric // auxillary symbol is padded with 2 bytes at the end. Copy each
11481ad6265SDimitry Andric // auxillary symbol to the Sym.AuxData vector. For file symbols,
11581ad6265SDimitry Andric // the whole range of aux symbols are interpreted as one null padded
11681ad6265SDimitry Andric // string instead.
11781ad6265SDimitry Andric if (SymRef.isFileRecord())
11881ad6265SDimitry Andric Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
11981ad6265SDimitry Andric AuxData.size())
12081ad6265SDimitry Andric .rtrim('\0');
12181ad6265SDimitry Andric else
12281ad6265SDimitry Andric for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
12381ad6265SDimitry Andric Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
12481ad6265SDimitry Andric
12581ad6265SDimitry Andric // Find the unique id of the section
12681ad6265SDimitry Andric if (SymRef.getSectionNumber() <=
12781ad6265SDimitry Andric 0) // Special symbol (undefined/absolute/debug)
12881ad6265SDimitry Andric Sym.TargetSectionId = SymRef.getSectionNumber();
12981ad6265SDimitry Andric else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) <
13081ad6265SDimitry Andric Sections.size())
13181ad6265SDimitry Andric Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId;
13281ad6265SDimitry Andric else
13381ad6265SDimitry Andric return createStringError(object_error::parse_failed,
13481ad6265SDimitry Andric "section number out of range");
13581ad6265SDimitry Andric // For section definitions, check if it is comdat associative, and if
13681ad6265SDimitry Andric // it is, find the target section unique id.
13781ad6265SDimitry Andric const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
13881ad6265SDimitry Andric const coff_aux_weak_external *WE = SymRef.getWeakExternal();
13981ad6265SDimitry Andric if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
14081ad6265SDimitry Andric int32_t Index = SD->getNumber(IsBigObj);
14181ad6265SDimitry Andric if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size())
14281ad6265SDimitry Andric return createStringError(object_error::parse_failed,
14381ad6265SDimitry Andric "unexpected associative section index");
14481ad6265SDimitry Andric Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId;
14581ad6265SDimitry Andric } else if (WE) {
14681ad6265SDimitry Andric // This is a raw symbol index for now, but store it in the Symbol
14781ad6265SDimitry Andric // until we've added them to the Object, which assigns the final
14881ad6265SDimitry Andric // unique ids.
14981ad6265SDimitry Andric Sym.WeakTargetSymbolId = WE->TagIndex;
15081ad6265SDimitry Andric }
15181ad6265SDimitry Andric I += 1 + SymRef.getNumberOfAuxSymbols();
15281ad6265SDimitry Andric }
15381ad6265SDimitry Andric Obj.addSymbols(Symbols);
15481ad6265SDimitry Andric return Error::success();
15581ad6265SDimitry Andric }
15681ad6265SDimitry Andric
setSymbolTargets(Object & Obj) const15781ad6265SDimitry Andric Error COFFReader::setSymbolTargets(Object &Obj) const {
15881ad6265SDimitry Andric std::vector<const Symbol *> RawSymbolTable;
15981ad6265SDimitry Andric for (const Symbol &Sym : Obj.getSymbols()) {
16081ad6265SDimitry Andric RawSymbolTable.push_back(&Sym);
16181ad6265SDimitry Andric for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
16281ad6265SDimitry Andric RawSymbolTable.push_back(nullptr);
16381ad6265SDimitry Andric }
16481ad6265SDimitry Andric for (Symbol &Sym : Obj.getMutableSymbols()) {
16581ad6265SDimitry Andric // Convert WeakTargetSymbolId from the original raw symbol index to
16681ad6265SDimitry Andric // a proper unique id.
16781ad6265SDimitry Andric if (Sym.WeakTargetSymbolId) {
16881ad6265SDimitry Andric if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size())
16981ad6265SDimitry Andric return createStringError(object_error::parse_failed,
17081ad6265SDimitry Andric "weak external reference out of range");
17181ad6265SDimitry Andric const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId];
17281ad6265SDimitry Andric if (Target == nullptr)
17381ad6265SDimitry Andric return createStringError(object_error::parse_failed,
17481ad6265SDimitry Andric "invalid SymbolTableIndex");
17581ad6265SDimitry Andric Sym.WeakTargetSymbolId = Target->UniqueId;
17681ad6265SDimitry Andric }
17781ad6265SDimitry Andric }
17881ad6265SDimitry Andric for (Section &Sec : Obj.getMutableSections()) {
17981ad6265SDimitry Andric for (Relocation &R : Sec.Relocs) {
18081ad6265SDimitry Andric if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
18181ad6265SDimitry Andric return createStringError(object_error::parse_failed,
18281ad6265SDimitry Andric "SymbolTableIndex out of range");
18381ad6265SDimitry Andric const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
18481ad6265SDimitry Andric if (Sym == nullptr)
18581ad6265SDimitry Andric return createStringError(object_error::parse_failed,
18681ad6265SDimitry Andric "invalid SymbolTableIndex");
18781ad6265SDimitry Andric R.Target = Sym->UniqueId;
18881ad6265SDimitry Andric R.TargetName = Sym->Name;
18981ad6265SDimitry Andric }
19081ad6265SDimitry Andric }
19181ad6265SDimitry Andric return Error::success();
19281ad6265SDimitry Andric }
19381ad6265SDimitry Andric
create() const19481ad6265SDimitry Andric Expected<std::unique_ptr<Object>> COFFReader::create() const {
19581ad6265SDimitry Andric auto Obj = std::make_unique<Object>();
19681ad6265SDimitry Andric
19781ad6265SDimitry Andric bool IsBigObj = false;
19881ad6265SDimitry Andric if (const coff_file_header *CFH = COFFObj.getCOFFHeader()) {
19981ad6265SDimitry Andric Obj->CoffFileHeader = *CFH;
20081ad6265SDimitry Andric } else {
20181ad6265SDimitry Andric const coff_bigobj_file_header *CBFH = COFFObj.getCOFFBigObjHeader();
20281ad6265SDimitry Andric if (!CBFH)
20381ad6265SDimitry Andric return createStringError(object_error::parse_failed,
20481ad6265SDimitry Andric "no COFF file header returned");
20581ad6265SDimitry Andric // Only copying the few fields from the bigobj header that we need
20681ad6265SDimitry Andric // and won't recreate in the end.
20781ad6265SDimitry Andric Obj->CoffFileHeader.Machine = CBFH->Machine;
20881ad6265SDimitry Andric Obj->CoffFileHeader.TimeDateStamp = CBFH->TimeDateStamp;
20981ad6265SDimitry Andric IsBigObj = true;
21081ad6265SDimitry Andric }
21181ad6265SDimitry Andric
21281ad6265SDimitry Andric if (Error E = readExecutableHeaders(*Obj))
21381ad6265SDimitry Andric return std::move(E);
21481ad6265SDimitry Andric if (Error E = readSections(*Obj))
21581ad6265SDimitry Andric return std::move(E);
21681ad6265SDimitry Andric if (Error E = readSymbols(*Obj, IsBigObj))
21781ad6265SDimitry Andric return std::move(E);
21881ad6265SDimitry Andric if (Error E = setSymbolTargets(*Obj))
21981ad6265SDimitry Andric return std::move(E);
22081ad6265SDimitry Andric
22181ad6265SDimitry Andric return std::move(Obj);
22281ad6265SDimitry Andric }
22381ad6265SDimitry Andric
22481ad6265SDimitry Andric } // end namespace coff
22581ad6265SDimitry Andric } // end namespace objcopy
22681ad6265SDimitry Andric } // end namespace llvm
227