xref: /llvm-project/lld/COFF/DLL.cpp (revision 3a51466caf93b179f859175b7fe87018a2607e6c)
1 //===- DLL.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines various types of chunks for the DLL import or export
10 // descriptor tables. They are inherently Windows-specific.
11 // You need to read Microsoft PE/COFF spec to understand details
12 // about the data structures.
13 //
14 // If you are not particularly interested in linking against Windows
15 // DLL, you can skip this file, and you should still be able to
16 // understand the rest of the linker.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "DLL.h"
21 #include "COFFLinkerContext.h"
22 #include "Chunks.h"
23 #include "SymbolTable.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Object/COFF.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Path.h"
28 
29 using namespace llvm;
30 using namespace llvm::object;
31 using namespace llvm::support::endian;
32 using namespace llvm::COFF;
33 
34 namespace lld::coff {
35 namespace {
36 
37 // Import table
38 
39 // A chunk for the import descriptor table.
40 class HintNameChunk : public NonSectionChunk {
41 public:
42   HintNameChunk(StringRef n, uint16_t h) : name(n), hint(h) {}
43 
44   size_t getSize() const override {
45     // Starts with 2 byte Hint field, followed by a null-terminated string,
46     // ends with 0 or 1 byte padding.
47     return alignTo(name.size() + 3, 2);
48   }
49 
50   void writeTo(uint8_t *buf) const override {
51     memset(buf, 0, getSize());
52     write16le(buf, hint);
53     memcpy(buf + 2, name.data(), name.size());
54   }
55 
56 private:
57   StringRef name;
58   uint16_t hint;
59 };
60 
61 // A chunk for the import descriptor table.
62 class LookupChunk : public NonSectionChunk {
63 public:
64   explicit LookupChunk(COFFLinkerContext &ctx, Chunk *c)
65       : hintName(c), ctx(ctx) {
66     setAlignment(ctx.config.wordsize);
67   }
68   size_t getSize() const override { return ctx.config.wordsize; }
69 
70   void writeTo(uint8_t *buf) const override {
71     if (ctx.config.is64())
72       write64le(buf, hintName->getRVA());
73     else
74       write32le(buf, hintName->getRVA());
75   }
76 
77   Chunk *hintName;
78 
79 private:
80   COFFLinkerContext &ctx;
81 };
82 
83 // A chunk for the import descriptor table.
84 // This chunk represent import-by-ordinal symbols.
85 // See Microsoft PE/COFF spec 7.1. Import Header for details.
86 class OrdinalOnlyChunk : public NonSectionChunk {
87 public:
88   explicit OrdinalOnlyChunk(COFFLinkerContext &c, uint16_t v)
89       : ordinal(v), ctx(c) {
90     setAlignment(ctx.config.wordsize);
91   }
92   size_t getSize() const override { return ctx.config.wordsize; }
93 
94   void writeTo(uint8_t *buf) const override {
95     // An import-by-ordinal slot has MSB 1 to indicate that
96     // this is import-by-ordinal (and not import-by-name).
97     if (ctx.config.is64()) {
98       write64le(buf, (1ULL << 63) | ordinal);
99     } else {
100       write32le(buf, (1ULL << 31) | ordinal);
101     }
102   }
103 
104   uint16_t ordinal;
105 
106 private:
107   COFFLinkerContext &ctx;
108 };
109 
110 // A chunk for the import descriptor table.
111 class ImportDirectoryChunk : public NonSectionChunk {
112 public:
113   explicit ImportDirectoryChunk(Chunk *n) : dllName(n) { setAlignment(4); }
114   size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
115 
116   void writeTo(uint8_t *buf) const override {
117     memset(buf, 0, getSize());
118 
119     auto *e = (coff_import_directory_table_entry *)(buf);
120     e->ImportLookupTableRVA = lookupTab->getRVA();
121     e->NameRVA = dllName->getRVA();
122     e->ImportAddressTableRVA = addressTab->getRVA();
123   }
124 
125   Chunk *dllName;
126   Chunk *lookupTab;
127   Chunk *addressTab;
128 };
129 
130 // A chunk representing null terminator in the import table.
131 // Contents of this chunk is always null bytes.
132 class NullChunk : public NonSectionChunk {
133 public:
134   explicit NullChunk(size_t n, uint32_t align) : size(n) {
135     setAlignment(align);
136   }
137   explicit NullChunk(COFFLinkerContext &ctx)
138       : NullChunk(ctx.config.wordsize, ctx.config.wordsize) {}
139   explicit NullChunk(COFFLinkerContext &ctx, size_t n)
140       : NullChunk(n, ctx.config.wordsize) {}
141   size_t getSize() const override { return size; }
142 
143   void writeTo(uint8_t *buf) const override {
144     memset(buf, 0, size);
145   }
146 
147 private:
148   size_t size;
149 };
150 
151 // A chunk for ARM64EC auxiliary IAT.
152 class AuxImportChunk : public NonSectionChunk {
153 public:
154   explicit AuxImportChunk(ImportFile *file) : file(file) {
155     setAlignment(sizeof(uint64_t));
156   }
157   size_t getSize() const override { return sizeof(uint64_t); }
158 
159   void writeTo(uint8_t *buf) const override {
160     uint64_t impchkVA = 0;
161     if (file->impchkThunk)
162       impchkVA =
163           file->impchkThunk->getRVA() + file->symtab.ctx.config.imageBase;
164     write64le(buf, impchkVA);
165   }
166 
167   void getBaserels(std::vector<Baserel> *res) override {
168     if (file->impchkThunk)
169       res->emplace_back(rva, file->symtab.machine);
170   }
171 
172 private:
173   ImportFile *file;
174 };
175 
176 static std::vector<std::vector<DefinedImportData *>>
177 binImports(COFFLinkerContext &ctx,
178            const std::vector<DefinedImportData *> &imports) {
179   // Group DLL-imported symbols by DLL name because that's how
180   // symbols are laid out in the import descriptor table.
181   auto less = [&ctx](const std::string &a, const std::string &b) {
182     return ctx.config.dllOrder[a] < ctx.config.dllOrder[b];
183   };
184   std::map<std::string, std::vector<DefinedImportData *>, decltype(less)> m(
185       less);
186   for (DefinedImportData *sym : imports)
187     m[sym->getDLLName().lower()].push_back(sym);
188 
189   std::vector<std::vector<DefinedImportData *>> v;
190   for (auto &kv : m) {
191     // Sort symbols by name for each group.
192     std::vector<DefinedImportData *> &syms = kv.second;
193     llvm::sort(syms, [](DefinedImportData *a, DefinedImportData *b) {
194       auto getBaseName = [](DefinedImportData *sym) {
195         StringRef name = sym->getName();
196         name.consume_front("__imp_");
197         // Skip aux_ part of ARM64EC function symbol name.
198         if (sym->file->impchkThunk)
199           name.consume_front("aux_");
200         return name;
201       };
202       return getBaseName(a) < getBaseName(b);
203     });
204     v.push_back(std::move(syms));
205   }
206   return v;
207 }
208 
209 // See Microsoft PE/COFF spec 4.3 for details.
210 
211 // A chunk for the delay import descriptor table etnry.
212 class DelayDirectoryChunk : public NonSectionChunk {
213 public:
214   explicit DelayDirectoryChunk(Chunk *n) : dllName(n) { setAlignment(4); }
215 
216   size_t getSize() const override {
217     return sizeof(delay_import_directory_table_entry);
218   }
219 
220   void writeTo(uint8_t *buf) const override {
221     memset(buf, 0, getSize());
222 
223     auto *e = (delay_import_directory_table_entry *)(buf);
224     e->Attributes = 1;
225     e->Name = dllName->getRVA();
226     e->ModuleHandle = moduleHandle->getRVA();
227     e->DelayImportAddressTable = addressTab->getRVA();
228     e->DelayImportNameTable = nameTab->getRVA();
229   }
230 
231   Chunk *dllName;
232   Chunk *moduleHandle;
233   Chunk *addressTab;
234   Chunk *nameTab;
235 };
236 
237 // Initial contents for delay-loaded functions.
238 // This code calls __delayLoadHelper2 function to resolve a symbol
239 // which then overwrites its jump table slot with the result
240 // for subsequent function calls.
241 static const uint8_t thunkX64[] = {
242     0x48, 0x8D, 0x05, 0, 0, 0, 0,       // lea     rax, [__imp_<FUNCNAME>]
243     0xE9, 0, 0, 0, 0,                   // jmp     __tailMerge_<lib>
244 };
245 
246 static const uint8_t tailMergeX64[] = {
247     0x51,                               // push    rcx
248     0x52,                               // push    rdx
249     0x41, 0x50,                         // push    r8
250     0x41, 0x51,                         // push    r9
251     0x48, 0x83, 0xEC, 0x48,             // sub     rsp, 48h
252     0x66, 0x0F, 0x7F, 0x04, 0x24,       // movdqa  xmmword ptr [rsp], xmm0
253     0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa  xmmword ptr [rsp+10h], xmm1
254     0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa  xmmword ptr [rsp+20h], xmm2
255     0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa  xmmword ptr [rsp+30h], xmm3
256     0x48, 0x8B, 0xD0,                   // mov     rdx, rax
257     0x48, 0x8D, 0x0D, 0, 0, 0, 0,       // lea     rcx, [___DELAY_IMPORT_...]
258     0xE8, 0, 0, 0, 0,                   // call    __delayLoadHelper2
259     0x66, 0x0F, 0x6F, 0x04, 0x24,       // movdqa  xmm0, xmmword ptr [rsp]
260     0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa  xmm1, xmmword ptr [rsp+10h]
261     0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa  xmm2, xmmword ptr [rsp+20h]
262     0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa  xmm3, xmmword ptr [rsp+30h]
263     0x48, 0x83, 0xC4, 0x48,             // add     rsp, 48h
264     0x41, 0x59,                         // pop     r9
265     0x41, 0x58,                         // pop     r8
266     0x5A,                               // pop     rdx
267     0x59,                               // pop     rcx
268     0xFF, 0xE0,                         // jmp     rax
269 };
270 
271 static const uint8_t tailMergeUnwindInfoX64[] = {
272     0x01,       // Version=1, Flags=UNW_FLAG_NHANDLER
273     0x0a,       // Size of prolog
274     0x05,       // Count of unwind codes
275     0x00,       // No frame register
276     0x0a, 0x82, // Offset 0xa: UWOP_ALLOC_SMALL(0x48)
277     0x06, 0x02, // Offset 6: UWOP_ALLOC_SMALL(8)
278     0x04, 0x02, // Offset 4: UWOP_ALLOC_SMALL(8)
279     0x02, 0x02, // Offset 2: UWOP_ALLOC_SMALL(8)
280     0x01, 0x02, // Offset 1: UWOP_ALLOC_SMALL(8)
281     0x00, 0x00  // Padding to align on 32-bits
282 };
283 
284 static const uint8_t thunkX86[] = {
285     0xB8, 0, 0, 0, 0,  // mov   eax, offset ___imp__<FUNCNAME>
286     0xE9, 0, 0, 0, 0,  // jmp   __tailMerge_<lib>
287 };
288 
289 static const uint8_t tailMergeX86[] = {
290     0x51,              // push  ecx
291     0x52,              // push  edx
292     0x50,              // push  eax
293     0x68, 0, 0, 0, 0,  // push  offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll
294     0xE8, 0, 0, 0, 0,  // call  ___delayLoadHelper2@8
295     0x5A,              // pop   edx
296     0x59,              // pop   ecx
297     0xFF, 0xE0,        // jmp   eax
298 };
299 
300 static const uint8_t thunkARM[] = {
301     0x40, 0xf2, 0x00, 0x0c, // mov.w   ip, #0 __imp_<FUNCNAME>
302     0xc0, 0xf2, 0x00, 0x0c, // mov.t   ip, #0 __imp_<FUNCNAME>
303     0x00, 0xf0, 0x00, 0xb8, // b.w     __tailMerge_<lib>
304 };
305 
306 static const uint8_t tailMergeARM[] = {
307     0x2d, 0xe9, 0x0f, 0x48, // push.w  {r0, r1, r2, r3, r11, lr}
308     0x0d, 0xf2, 0x10, 0x0b, // addw    r11, sp, #16
309     0x2d, 0xed, 0x10, 0x0b, // vpush   {d0, d1, d2, d3, d4, d5, d6, d7}
310     0x61, 0x46,             // mov     r1, ip
311     0x40, 0xf2, 0x00, 0x00, // mov.w   r0, #0 DELAY_IMPORT_DESCRIPTOR
312     0xc0, 0xf2, 0x00, 0x00, // mov.t   r0, #0 DELAY_IMPORT_DESCRIPTOR
313     0x00, 0xf0, 0x00, 0xd0, // bl      #0 __delayLoadHelper2
314     0x84, 0x46,             // mov     ip, r0
315     0xbd, 0xec, 0x10, 0x0b, // vpop    {d0, d1, d2, d3, d4, d5, d6, d7}
316     0xbd, 0xe8, 0x0f, 0x48, // pop.w   {r0, r1, r2, r3, r11, lr}
317     0x60, 0x47,             // bx      ip
318 };
319 
320 static const uint8_t thunkARM64[] = {
321     0x11, 0x00, 0x00, 0x90, // adrp    x17, #0      __imp_<FUNCNAME>
322     0x31, 0x02, 0x00, 0x91, // add     x17, x17, #0 :lo12:__imp_<FUNCNAME>
323     0x00, 0x00, 0x00, 0x14, // b       __tailMerge_<lib>
324 };
325 
326 static const uint8_t tailMergeARM64[] = {
327     0xfd, 0x7b, 0xb3, 0xa9, // stp     x29, x30, [sp, #-208]!
328     0xfd, 0x03, 0x00, 0x91, // mov     x29, sp
329     0xe0, 0x07, 0x01, 0xa9, // stp     x0, x1, [sp, #16]
330     0xe2, 0x0f, 0x02, 0xa9, // stp     x2, x3, [sp, #32]
331     0xe4, 0x17, 0x03, 0xa9, // stp     x4, x5, [sp, #48]
332     0xe6, 0x1f, 0x04, 0xa9, // stp     x6, x7, [sp, #64]
333     0xe0, 0x87, 0x02, 0xad, // stp     q0, q1, [sp, #80]
334     0xe2, 0x8f, 0x03, 0xad, // stp     q2, q3, [sp, #112]
335     0xe4, 0x97, 0x04, 0xad, // stp     q4, q5, [sp, #144]
336     0xe6, 0x9f, 0x05, 0xad, // stp     q6, q7, [sp, #176]
337     0xe1, 0x03, 0x11, 0xaa, // mov     x1, x17
338     0x00, 0x00, 0x00, 0x90, // adrp    x0, #0     DELAY_IMPORT_DESCRIPTOR
339     0x00, 0x00, 0x00, 0x91, // add     x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR
340     0x00, 0x00, 0x00, 0x94, // bl      #0 __delayLoadHelper2
341     0xf0, 0x03, 0x00, 0xaa, // mov     x16, x0
342     0xe6, 0x9f, 0x45, 0xad, // ldp     q6, q7, [sp, #176]
343     0xe4, 0x97, 0x44, 0xad, // ldp     q4, q5, [sp, #144]
344     0xe2, 0x8f, 0x43, 0xad, // ldp     q2, q3, [sp, #112]
345     0xe0, 0x87, 0x42, 0xad, // ldp     q0, q1, [sp, #80]
346     0xe6, 0x1f, 0x44, 0xa9, // ldp     x6, x7, [sp, #64]
347     0xe4, 0x17, 0x43, 0xa9, // ldp     x4, x5, [sp, #48]
348     0xe2, 0x0f, 0x42, 0xa9, // ldp     x2, x3, [sp, #32]
349     0xe0, 0x07, 0x41, 0xa9, // ldp     x0, x1, [sp, #16]
350     0xfd, 0x7b, 0xcd, 0xa8, // ldp     x29, x30, [sp], #208
351     0x00, 0x02, 0x1f, 0xd6, // br      x16
352 };
353 
354 // A chunk for the delay import thunk.
355 class ThunkChunkX64 : public NonSectionCodeChunk {
356 public:
357   ThunkChunkX64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {}
358 
359   size_t getSize() const override { return sizeof(thunkX64); }
360   MachineTypes getMachine() const override { return AMD64; }
361 
362   void writeTo(uint8_t *buf) const override {
363     memcpy(buf, thunkX64, sizeof(thunkX64));
364     write32le(buf + 3, imp->getRVA() - rva - 7);
365     write32le(buf + 8, tailMerge->getRVA() - rva - 12);
366   }
367 
368   Defined *imp = nullptr;
369   Chunk *tailMerge = nullptr;
370 };
371 
372 class TailMergeChunkX64 : public NonSectionCodeChunk {
373 public:
374   TailMergeChunkX64(Chunk *d, Defined *h) : desc(d), helper(h) {}
375 
376   size_t getSize() const override { return sizeof(tailMergeX64); }
377   MachineTypes getMachine() const override { return AMD64; }
378 
379   void writeTo(uint8_t *buf) const override {
380     memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
381     write32le(buf + 39, desc->getRVA() - rva - 43);
382     write32le(buf + 44, helper->getRVA() - rva - 48);
383   }
384 
385   Chunk *desc = nullptr;
386   Defined *helper = nullptr;
387 };
388 
389 class TailMergePDataChunkX64 : public NonSectionChunk {
390 public:
391   TailMergePDataChunkX64(Chunk *tm, Chunk *unwind) : tm(tm), unwind(unwind) {
392     // See
393     // https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-runtime_function
394     setAlignment(4);
395   }
396 
397   size_t getSize() const override { return 3 * sizeof(uint32_t); }
398   MachineTypes getMachine() const override { return AMD64; }
399 
400   void writeTo(uint8_t *buf) const override {
401     write32le(buf + 0, tm->getRVA()); // TailMergeChunk start RVA
402     write32le(buf + 4, tm->getRVA() + tm->getSize()); // TailMergeChunk stop RVA
403     write32le(buf + 8, unwind->getRVA());             // UnwindInfo RVA
404   }
405 
406   Chunk *tm = nullptr;
407   Chunk *unwind = nullptr;
408 };
409 
410 class TailMergeUnwindInfoX64 : public NonSectionChunk {
411 public:
412   TailMergeUnwindInfoX64() {
413     // See
414     // https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64#struct-unwind_info
415     setAlignment(4);
416   }
417 
418   size_t getSize() const override { return sizeof(tailMergeUnwindInfoX64); }
419   MachineTypes getMachine() const override { return AMD64; }
420 
421   void writeTo(uint8_t *buf) const override {
422     memcpy(buf, tailMergeUnwindInfoX64, sizeof(tailMergeUnwindInfoX64));
423   }
424 };
425 
426 class ThunkChunkX86 : public NonSectionCodeChunk {
427 public:
428   ThunkChunkX86(COFFLinkerContext &ctx, Defined *i, Chunk *tm)
429       : imp(i), tailMerge(tm), ctx(ctx) {}
430 
431   size_t getSize() const override { return sizeof(thunkX86); }
432   MachineTypes getMachine() const override { return I386; }
433 
434   void writeTo(uint8_t *buf) const override {
435     memcpy(buf, thunkX86, sizeof(thunkX86));
436     write32le(buf + 1, imp->getRVA() + ctx.config.imageBase);
437     write32le(buf + 6, tailMerge->getRVA() - rva - 10);
438   }
439 
440   void getBaserels(std::vector<Baserel> *res) override {
441     res->emplace_back(rva + 1, ctx.config.machine);
442   }
443 
444   Defined *imp = nullptr;
445   Chunk *tailMerge = nullptr;
446 
447 private:
448   const COFFLinkerContext &ctx;
449 };
450 
451 class TailMergeChunkX86 : public NonSectionCodeChunk {
452 public:
453   TailMergeChunkX86(COFFLinkerContext &ctx, Chunk *d, Defined *h)
454       : desc(d), helper(h), ctx(ctx) {}
455 
456   size_t getSize() const override { return sizeof(tailMergeX86); }
457   MachineTypes getMachine() const override { return I386; }
458 
459   void writeTo(uint8_t *buf) const override {
460     memcpy(buf, tailMergeX86, sizeof(tailMergeX86));
461     write32le(buf + 4, desc->getRVA() + ctx.config.imageBase);
462     write32le(buf + 9, helper->getRVA() - rva - 13);
463   }
464 
465   void getBaserels(std::vector<Baserel> *res) override {
466     res->emplace_back(rva + 4, ctx.config.machine);
467   }
468 
469   Chunk *desc = nullptr;
470   Defined *helper = nullptr;
471 
472 private:
473   const COFFLinkerContext &ctx;
474 };
475 
476 class ThunkChunkARM : public NonSectionCodeChunk {
477 public:
478   ThunkChunkARM(COFFLinkerContext &ctx, Defined *i, Chunk *tm)
479       : imp(i), tailMerge(tm), ctx(ctx) {
480     setAlignment(2);
481   }
482 
483   size_t getSize() const override { return sizeof(thunkARM); }
484   MachineTypes getMachine() const override { return ARMNT; }
485 
486   void writeTo(uint8_t *buf) const override {
487     memcpy(buf, thunkARM, sizeof(thunkARM));
488     applyMOV32T(buf + 0, imp->getRVA() + ctx.config.imageBase);
489     applyBranch24T(buf + 8, tailMerge->getRVA() - rva - 12);
490   }
491 
492   void getBaserels(std::vector<Baserel> *res) override {
493     res->emplace_back(rva + 0, IMAGE_REL_BASED_ARM_MOV32T);
494   }
495 
496   Defined *imp = nullptr;
497   Chunk *tailMerge = nullptr;
498 
499 private:
500   const COFFLinkerContext &ctx;
501 };
502 
503 class TailMergeChunkARM : public NonSectionCodeChunk {
504 public:
505   TailMergeChunkARM(COFFLinkerContext &ctx, Chunk *d, Defined *h)
506       : desc(d), helper(h), ctx(ctx) {
507     setAlignment(2);
508   }
509 
510   size_t getSize() const override { return sizeof(tailMergeARM); }
511   MachineTypes getMachine() const override { return ARMNT; }
512 
513   void writeTo(uint8_t *buf) const override {
514     memcpy(buf, tailMergeARM, sizeof(tailMergeARM));
515     applyMOV32T(buf + 14, desc->getRVA() + ctx.config.imageBase);
516     applyBranch24T(buf + 22, helper->getRVA() - rva - 26);
517   }
518 
519   void getBaserels(std::vector<Baserel> *res) override {
520     res->emplace_back(rva + 14, IMAGE_REL_BASED_ARM_MOV32T);
521   }
522 
523   Chunk *desc = nullptr;
524   Defined *helper = nullptr;
525 
526 private:
527   const COFFLinkerContext &ctx;
528 };
529 
530 class ThunkChunkARM64 : public NonSectionCodeChunk {
531 public:
532   ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {
533     setAlignment(4);
534   }
535 
536   size_t getSize() const override { return sizeof(thunkARM64); }
537   MachineTypes getMachine() const override { return ARM64; }
538 
539   void writeTo(uint8_t *buf) const override {
540     memcpy(buf, thunkARM64, sizeof(thunkARM64));
541     applyArm64Addr(buf + 0, imp->getRVA(), rva + 0, 12);
542     applyArm64Imm(buf + 4, imp->getRVA() & 0xfff, 0);
543     applyArm64Branch26(buf + 8, tailMerge->getRVA() - rva - 8);
544   }
545 
546   Defined *imp = nullptr;
547   Chunk *tailMerge = nullptr;
548 };
549 
550 class TailMergeChunkARM64 : public NonSectionCodeChunk {
551 public:
552   TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) {
553     setAlignment(4);
554   }
555 
556   size_t getSize() const override { return sizeof(tailMergeARM64); }
557   MachineTypes getMachine() const override { return ARM64; }
558 
559   void writeTo(uint8_t *buf) const override {
560     memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64));
561     applyArm64Addr(buf + 44, desc->getRVA(), rva + 44, 12);
562     applyArm64Imm(buf + 48, desc->getRVA() & 0xfff, 0);
563     applyArm64Branch26(buf + 52, helper->getRVA() - rva - 52);
564   }
565 
566   Chunk *desc = nullptr;
567   Defined *helper = nullptr;
568 };
569 
570 // A chunk for the import descriptor table.
571 class DelayAddressChunk : public NonSectionChunk {
572 public:
573   explicit DelayAddressChunk(COFFLinkerContext &ctx, Chunk *c)
574       : thunk(c), ctx(ctx) {
575     setAlignment(ctx.config.wordsize);
576   }
577   size_t getSize() const override { return ctx.config.wordsize; }
578 
579   void writeTo(uint8_t *buf) const override {
580     if (ctx.config.is64()) {
581       write64le(buf, thunk->getRVA() + ctx.config.imageBase);
582     } else {
583       uint32_t bit = 0;
584       // Pointer to thumb code must have the LSB set, so adjust it.
585       if (ctx.config.machine == ARMNT)
586         bit = 1;
587       write32le(buf, (thunk->getRVA() + ctx.config.imageBase) | bit);
588     }
589   }
590 
591   void getBaserels(std::vector<Baserel> *res) override {
592     res->emplace_back(rva, ctx.config.machine);
593   }
594 
595   Chunk *thunk;
596 
597 private:
598   const COFFLinkerContext &ctx;
599 };
600 
601 // Export table
602 // Read Microsoft PE/COFF spec 5.3 for details.
603 
604 // A chunk for the export descriptor table.
605 class ExportDirectoryChunk : public NonSectionChunk {
606 public:
607   ExportDirectoryChunk(int baseOrdinal, int maxOrdinal, int nameTabSize,
608                        Chunk *d, Chunk *a, Chunk *n, Chunk *o)
609       : baseOrdinal(baseOrdinal), maxOrdinal(maxOrdinal),
610         nameTabSize(nameTabSize), dllName(d), addressTab(a), nameTab(n),
611         ordinalTab(o) {}
612 
613   size_t getSize() const override {
614     return sizeof(export_directory_table_entry);
615   }
616 
617   void writeTo(uint8_t *buf) const override {
618     memset(buf, 0, getSize());
619 
620     auto *e = (export_directory_table_entry *)(buf);
621     e->NameRVA = dllName->getRVA();
622     e->OrdinalBase = baseOrdinal;
623     e->AddressTableEntries = (maxOrdinal - baseOrdinal) + 1;
624     e->NumberOfNamePointers = nameTabSize;
625     e->ExportAddressTableRVA = addressTab->getRVA();
626     e->NamePointerRVA = nameTab->getRVA();
627     e->OrdinalTableRVA = ordinalTab->getRVA();
628   }
629 
630   uint16_t baseOrdinal;
631   uint16_t maxOrdinal;
632   uint16_t nameTabSize;
633   Chunk *dllName;
634   Chunk *addressTab;
635   Chunk *nameTab;
636   Chunk *ordinalTab;
637 };
638 
639 class AddressTableChunk : public NonSectionChunk {
640 public:
641   explicit AddressTableChunk(SymbolTable &symtab, size_t baseOrdinal,
642                              size_t maxOrdinal)
643       : baseOrdinal(baseOrdinal), size((maxOrdinal - baseOrdinal) + 1),
644         symtab(symtab) {}
645   size_t getSize() const override { return size * 4; }
646 
647   void writeTo(uint8_t *buf) const override {
648     memset(buf, 0, getSize());
649 
650     for (const Export &e : symtab.exports) {
651       assert(e.ordinal >= baseOrdinal && "Export symbol has invalid ordinal");
652       // Subtract the OrdinalBase to get the index.
653       uint8_t *p = buf + (e.ordinal - baseOrdinal) * 4;
654       uint32_t bit = 0;
655       // Pointer to thumb code must have the LSB set, so adjust it.
656       if (symtab.machine == ARMNT && !e.data)
657         bit = 1;
658       if (e.forwardChunk) {
659         write32le(p, e.forwardChunk->getRVA() | bit);
660       } else {
661         assert(cast<Defined>(e.sym)->getRVA() != 0 &&
662                "Exported symbol unmapped");
663         write32le(p, cast<Defined>(e.sym)->getRVA() | bit);
664       }
665     }
666   }
667 
668 private:
669   size_t baseOrdinal;
670   size_t size;
671   const SymbolTable &symtab;
672 };
673 
674 class NamePointersChunk : public NonSectionChunk {
675 public:
676   explicit NamePointersChunk(std::vector<Chunk *> &v) : chunks(v) {}
677   size_t getSize() const override { return chunks.size() * 4; }
678 
679   void writeTo(uint8_t *buf) const override {
680     for (Chunk *c : chunks) {
681       write32le(buf, c->getRVA());
682       buf += 4;
683     }
684   }
685 
686 private:
687   std::vector<Chunk *> chunks;
688 };
689 
690 class ExportOrdinalChunk : public NonSectionChunk {
691 public:
692   explicit ExportOrdinalChunk(const SymbolTable &symtab, size_t baseOrdinal,
693                               size_t tableSize)
694       : baseOrdinal(baseOrdinal), size(tableSize), symtab(symtab) {}
695   size_t getSize() const override { return size * 2; }
696 
697   void writeTo(uint8_t *buf) const override {
698     for (const Export &e : symtab.exports) {
699       if (e.noname)
700         continue;
701       assert(e.ordinal >= baseOrdinal && "Export symbol has invalid ordinal");
702       // This table stores unbiased indices, so subtract OrdinalBase.
703       write16le(buf, e.ordinal - baseOrdinal);
704       buf += 2;
705     }
706   }
707 
708 private:
709   size_t baseOrdinal;
710   size_t size;
711   const SymbolTable &symtab;
712 };
713 
714 } // anonymous namespace
715 
716 void IdataContents::create(COFFLinkerContext &ctx) {
717   std::vector<std::vector<DefinedImportData *>> v = binImports(ctx, imports);
718 
719   // In hybrid images, EC and native code are usually very similar,
720   // resulting in a highly similar set of imported symbols. Consequently,
721   // their import tables can be shared, with ARM64X relocations handling any
722   // differences. Identify matching import files used by EC and native code, and
723   // merge them into a single hybrid import entry.
724   if (ctx.hybridSymtab) {
725     for (std::vector<DefinedImportData *> &syms : v) {
726       std::vector<DefinedImportData *> hybridSyms;
727       ImportFile *prev = nullptr;
728       for (DefinedImportData *sym : syms) {
729         ImportFile *file = sym->file;
730         // At this stage, symbols are sorted by base name, ensuring that
731         // compatible import files, if present, are adjacent. Check if the
732         // current symbol's file imports the same symbol as the previously added
733         // one (if any and if it was not already merged). Additionally, verify
734         // that one of them is native while the other is EC. In rare cases,
735         // separate matching import entries may exist within the same namespace,
736         // which cannot be merged.
737         if (!prev || file->isEC() == prev->isEC() ||
738             !file->isSameImport(prev)) {
739           // We can't merge the import file, just add it to hybridSyms
740           // and set prev to its file so that we can try to match the next
741           // symbol.
742           hybridSyms.push_back(sym);
743           prev = file;
744           continue;
745         }
746 
747         // A matching symbol may appear in syms in any order. The native variant
748         // exposes a subset of EC symbols and chunks, so always use the EC
749         // variant as the hybrid import file. If the native file was already
750         // added, replace it with the EC symbol in hybridSyms. Otherwise, the EC
751         // variant is already pushed, so we can simply merge it.
752         if (file->isEC()) {
753           hybridSyms.pop_back();
754           hybridSyms.push_back(sym);
755         }
756 
757         // Merge import files by storing their hybrid form in the corresponding
758         // file class.
759         prev->hybridFile = file;
760         file->hybridFile = prev;
761         prev = nullptr; // A hybrid import file cannot be merged again.
762       }
763 
764       // Sort symbols by type: native-only files first, followed by merged
765       // hybrid files, and then EC-only files.
766       llvm::stable_sort(hybridSyms,
767                         [](DefinedImportData *a, DefinedImportData *b) {
768                           if (a->file->hybridFile)
769                             return !b->file->hybridFile && b->file->isEC();
770                           return !a->file->isEC() && b->file->isEC();
771                         });
772       syms = std::move(hybridSyms);
773     }
774   }
775 
776   // Create .idata contents for each DLL.
777   for (std::vector<DefinedImportData *> &syms : v) {
778     // Create lookup and address tables. If they have external names,
779     // we need to create hintName chunks to store the names.
780     // If they don't (if they are import-by-ordinals), we store only
781     // ordinal values to the table.
782     size_t base = lookups.size();
783     Chunk *lookupsTerminator = nullptr, *addressesTerminator = nullptr;
784     for (DefinedImportData *s : syms) {
785       uint16_t ord = s->getOrdinal();
786       HintNameChunk *hintChunk = nullptr;
787       Chunk *lookupsChunk, *addressesChunk;
788 
789       if (s->getExternalName().empty()) {
790         lookupsChunk = make<OrdinalOnlyChunk>(ctx, ord);
791         addressesChunk = make<OrdinalOnlyChunk>(ctx, ord);
792       } else {
793         hintChunk = make<HintNameChunk>(s->getExternalName(), ord);
794         lookupsChunk = make<LookupChunk>(ctx, hintChunk);
795         addressesChunk = make<LookupChunk>(ctx, hintChunk);
796         hints.push_back(hintChunk);
797       }
798 
799       // Detect the first EC-only import in the hybrid IAT. Emit null chunk
800       // as a terminator for the native view, and add an ARM64X relocation to
801       // replace it with the correct import for the EC view.
802       //
803       // Additionally, for MSVC compatibility, store the lookup and address
804       // chunks and append them at the end of EC-only imports, where a null
805       // terminator chunk would typically be placed. Since they appear after
806       // the native terminator, they will be ignored in the native view.
807       // In the EC view, they should act as terminators, so emit ZEROFILL
808       // relocations overriding them.
809       if (ctx.hybridSymtab && !lookupsTerminator && s->file->isEC() &&
810           !s->file->hybridFile) {
811         lookupsTerminator = lookupsChunk;
812         addressesTerminator = addressesChunk;
813         lookupsChunk = make<NullChunk>(ctx);
814         addressesChunk = make<NullChunk>(ctx);
815 
816         Arm64XRelocVal relocVal = hintChunk;
817         if (!hintChunk)
818           relocVal = (1ULL << 63) | ord;
819         ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE,
820                                sizeof(uint64_t), lookupsChunk, relocVal);
821         ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE,
822                                sizeof(uint64_t), addressesChunk, relocVal);
823         ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL,
824                                sizeof(uint64_t), lookupsTerminator);
825         ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_ZEROFILL,
826                                sizeof(uint64_t), addressesTerminator);
827       }
828 
829       lookups.push_back(lookupsChunk);
830       addresses.push_back(addressesChunk);
831 
832       if (s->file->isEC()) {
833         auto chunk = make<AuxImportChunk>(s->file);
834         auxIat.push_back(chunk);
835         s->file->impECSym->setLocation(chunk);
836 
837         chunk = make<AuxImportChunk>(s->file);
838         auxIatCopy.push_back(chunk);
839         s->file->auxImpCopySym->setLocation(chunk);
840       } else if (ctx.hybridSymtab) {
841         // Fill the auxiliary IAT with null chunks for native-only imports.
842         auxIat.push_back(make<NullChunk>(ctx));
843         auxIatCopy.push_back(make<NullChunk>(ctx));
844       }
845     }
846     // Terminate with null values.
847     lookups.push_back(lookupsTerminator ? lookupsTerminator
848                                         : make<NullChunk>(ctx));
849     addresses.push_back(addressesTerminator ? addressesTerminator
850                                             : make<NullChunk>(ctx));
851     if (ctx.symtabEC) {
852       auxIat.push_back(make<NullChunk>(ctx));
853       auxIatCopy.push_back(make<NullChunk>(ctx));
854     }
855 
856     for (int i = 0, e = syms.size(); i < e; ++i) {
857       syms[i]->setLocation(addresses[base + i]);
858       if (syms[i]->file->hybridFile)
859         syms[i]->file->hybridFile->impSym->setLocation(addresses[base + i]);
860     }
861 
862     // Create the import table header.
863     dllNames.push_back(make<StringChunk>(syms[0]->getDLLName()));
864     auto *dir = make<ImportDirectoryChunk>(dllNames.back());
865     dir->lookupTab = lookups[base];
866     dir->addressTab = addresses[base];
867     dirs.push_back(dir);
868 
869     if (ctx.hybridSymtab) {
870       // If native-only imports exist, they will appear as a prefix to all
871       // imports. Emit ARM64X relocations to skip them in the EC view.
872       uint32_t nativeOnly =
873           llvm::find_if(syms,
874                         [](DefinedImportData *s) { return s->file->isEC(); }) -
875           syms.begin();
876       if (nativeOnly) {
877         ctx.dynamicRelocs->add(
878             IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0,
879             Arm64XRelocVal(
880                 dir, offsetof(ImportDirectoryTableEntry, ImportLookupTableRVA)),
881             nativeOnly * sizeof(uint64_t));
882         ctx.dynamicRelocs->add(
883             IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0,
884             Arm64XRelocVal(dir, offsetof(ImportDirectoryTableEntry,
885                                          ImportAddressTableRVA)),
886             nativeOnly * sizeof(uint64_t));
887       }
888     }
889   }
890   // Add null terminator.
891   dirs.push_back(make<NullChunk>(sizeof(ImportDirectoryTableEntry), 4));
892 }
893 
894 std::vector<Chunk *> DelayLoadContents::getChunks() {
895   std::vector<Chunk *> v;
896   v.insert(v.end(), dirs.begin(), dirs.end());
897   v.insert(v.end(), names.begin(), names.end());
898   v.insert(v.end(), hintNames.begin(), hintNames.end());
899   v.insert(v.end(), dllNames.begin(), dllNames.end());
900   return v;
901 }
902 
903 std::vector<Chunk *> DelayLoadContents::getDataChunks() {
904   std::vector<Chunk *> v;
905   v.insert(v.end(), moduleHandles.begin(), moduleHandles.end());
906   v.insert(v.end(), addresses.begin(), addresses.end());
907   return v;
908 }
909 
910 uint64_t DelayLoadContents::getDirSize() {
911   return dirs.size() * sizeof(delay_import_directory_table_entry);
912 }
913 
914 void DelayLoadContents::create() {
915   std::vector<std::vector<DefinedImportData *>> v = binImports(ctx, imports);
916 
917   // Create .didat contents for each DLL.
918   for (std::vector<DefinedImportData *> &syms : v) {
919     // Create the delay import table header.
920     dllNames.push_back(make<StringChunk>(syms[0]->getDLLName()));
921     auto *dir = make<DelayDirectoryChunk>(dllNames.back());
922 
923     size_t base = addresses.size();
924     ctx.forEachSymtab([&](SymbolTable &symtab) {
925       if (ctx.hybridSymtab && symtab.isEC()) {
926         // For hybrid images, emit null-terminated native import entries
927         // followed by null-terminated EC entries. If a view is missing imports
928         // for a given module, only terminators are emitted. Emit ARM64X
929         // relocations to skip native entries in the EC view.
930         ctx.dynamicRelocs->add(
931             IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0,
932             Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry,
933                                          DelayImportAddressTable)),
934             (addresses.size() - base) * sizeof(uint64_t));
935         ctx.dynamicRelocs->add(
936             IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0,
937             Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry,
938                                          DelayImportNameTable)),
939             (addresses.size() - base) * sizeof(uint64_t));
940       }
941 
942       Chunk *tm = nullptr;
943 
944       for (DefinedImportData *s : syms) {
945         // Process only the symbols belonging to the current symtab.
946         if (symtab.isEC() != s->file->isEC())
947           continue;
948 
949         if (!tm) {
950           tm = newTailMergeChunk(symtab, dir);
951           Chunk *pdataChunk = newTailMergePDataChunk(symtab, tm);
952           if (pdataChunk)
953             pdata.push_back(pdataChunk);
954         }
955 
956         Chunk *t = newThunkChunk(s, tm);
957         auto *a = make<DelayAddressChunk>(ctx, t);
958         addresses.push_back(a);
959         s->setLocation(a);
960         thunks.push_back(t);
961         StringRef extName = s->getExternalName();
962         if (extName.empty()) {
963           names.push_back(make<OrdinalOnlyChunk>(ctx, s->getOrdinal()));
964         } else {
965           auto *c = make<HintNameChunk>(extName, 0);
966           names.push_back(make<LookupChunk>(ctx, c));
967           hintNames.push_back(c);
968           // Add a synthetic symbol for this load thunk, using the
969           // "__imp___load" prefix, in case this thunk needs to be added to the
970           // list of valid call targets for Control Flow Guard.
971           StringRef symName = saver().save("__imp___load_" + extName);
972           s->loadThunkSym =
973               cast<DefinedSynthetic>(symtab.addSynthetic(symName, t));
974         }
975 
976         if (symtab.isEC()) {
977           auto chunk = make<AuxImportChunk>(s->file);
978           auxIat.push_back(chunk);
979           s->file->impECSym->setLocation(chunk);
980 
981           chunk = make<AuxImportChunk>(s->file);
982           auxIatCopy.push_back(chunk);
983           s->file->auxImpCopySym->setLocation(chunk);
984         } else if (ctx.hybridSymtab) {
985           // Fill the auxiliary IAT with null chunks for native imports.
986           auxIat.push_back(make<NullChunk>(ctx));
987           auxIatCopy.push_back(make<NullChunk>(ctx));
988         }
989       }
990 
991       if (tm) {
992         thunks.push_back(tm);
993         StringRef tmName =
994             saver().save("__tailMerge_" + syms[0]->getDLLName().lower());
995         symtab.addSynthetic(tmName, tm);
996       }
997 
998       // Terminate with null values.
999       addresses.push_back(make<NullChunk>(ctx, 8));
1000       names.push_back(make<NullChunk>(ctx, 8));
1001       if (ctx.symtabEC) {
1002         auxIat.push_back(make<NullChunk>(ctx, 8));
1003         auxIatCopy.push_back(make<NullChunk>(ctx, 8));
1004       }
1005     });
1006 
1007     auto *mh = make<NullChunk>(8, 8);
1008     moduleHandles.push_back(mh);
1009 
1010     // Fill the delay import table header fields.
1011     dir->moduleHandle = mh;
1012     dir->addressTab = addresses[base];
1013     dir->nameTab = names[base];
1014     dirs.push_back(dir);
1015   }
1016 
1017   ctx.forEachSymtab([&](SymbolTable &symtab) {
1018     if (symtab.tailMergeUnwindInfoChunk)
1019       unwindinfo.push_back(symtab.tailMergeUnwindInfoChunk);
1020   });
1021   // Add null terminator.
1022   dirs.push_back(
1023       make<NullChunk>(sizeof(delay_import_directory_table_entry), 4));
1024 }
1025 
1026 Chunk *DelayLoadContents::newTailMergeChunk(SymbolTable &symtab, Chunk *dir) {
1027   auto helper = cast<Defined>(symtab.delayLoadHelper);
1028   switch (symtab.machine) {
1029   case AMD64:
1030   case ARM64EC:
1031     return make<TailMergeChunkX64>(dir, helper);
1032   case I386:
1033     return make<TailMergeChunkX86>(ctx, dir, helper);
1034   case ARMNT:
1035     return make<TailMergeChunkARM>(ctx, dir, helper);
1036   case ARM64:
1037     return make<TailMergeChunkARM64>(dir, helper);
1038   default:
1039     llvm_unreachable("unsupported machine type");
1040   }
1041 }
1042 
1043 Chunk *DelayLoadContents::newTailMergePDataChunk(SymbolTable &symtab,
1044                                                  Chunk *tm) {
1045   switch (symtab.machine) {
1046   case AMD64:
1047   case ARM64EC:
1048     if (!symtab.tailMergeUnwindInfoChunk)
1049       symtab.tailMergeUnwindInfoChunk = make<TailMergeUnwindInfoX64>();
1050     return make<TailMergePDataChunkX64>(tm, symtab.tailMergeUnwindInfoChunk);
1051     // FIXME: Add support for other architectures.
1052   default:
1053     return nullptr; // Just don't generate unwind info.
1054   }
1055 }
1056 
1057 Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *s,
1058                                         Chunk *tailMerge) {
1059   switch (s->file->getMachineType()) {
1060   case AMD64:
1061   case ARM64EC:
1062     return make<ThunkChunkX64>(s, tailMerge);
1063   case I386:
1064     return make<ThunkChunkX86>(ctx, s, tailMerge);
1065   case ARMNT:
1066     return make<ThunkChunkARM>(ctx, s, tailMerge);
1067   case ARM64:
1068     return make<ThunkChunkARM64>(s, tailMerge);
1069   default:
1070     llvm_unreachable("unsupported machine type");
1071   }
1072 }
1073 
1074 void createEdataChunks(SymbolTable &symtab, std::vector<Chunk *> &chunks) {
1075   unsigned baseOrdinal = 1 << 16, maxOrdinal = 0;
1076   for (Export &e : symtab.exports) {
1077     baseOrdinal = std::min(baseOrdinal, (unsigned)e.ordinal);
1078     maxOrdinal = std::max(maxOrdinal, (unsigned)e.ordinal);
1079   }
1080   // Ordinals must start at 1 as suggested in:
1081   // https://learn.microsoft.com/en-us/cpp/build/reference/export-exports-a-function?view=msvc-170
1082   assert(baseOrdinal >= 1);
1083 
1084   auto *dllName =
1085       make<StringChunk>(sys::path::filename(symtab.ctx.config.outputFile));
1086   auto *addressTab = make<AddressTableChunk>(symtab, baseOrdinal, maxOrdinal);
1087   std::vector<Chunk *> names;
1088   for (Export &e : symtab.exports)
1089     if (!e.noname)
1090       names.push_back(make<StringChunk>(e.exportName));
1091 
1092   std::vector<Chunk *> forwards;
1093   for (Export &e : symtab.exports) {
1094     if (e.forwardTo.empty())
1095       continue;
1096     e.forwardChunk = make<StringChunk>(e.forwardTo);
1097     forwards.push_back(e.forwardChunk);
1098   }
1099 
1100   auto *nameTab = make<NamePointersChunk>(names);
1101   auto *ordinalTab =
1102       make<ExportOrdinalChunk>(symtab, baseOrdinal, names.size());
1103   auto *dir =
1104       make<ExportDirectoryChunk>(baseOrdinal, maxOrdinal, names.size(), dllName,
1105                                  addressTab, nameTab, ordinalTab);
1106   chunks.push_back(dir);
1107   chunks.push_back(dllName);
1108   chunks.push_back(addressTab);
1109   chunks.push_back(nameTab);
1110   chunks.push_back(ordinalTab);
1111   chunks.insert(chunks.end(), names.begin(), names.end());
1112   chunks.insert(chunks.end(), forwards.begin(), forwards.end());
1113 }
1114 
1115 } // namespace lld::coff
1116